From d4f132b716a6cdbc3a6a521a21fd2811c9da3480 Mon Sep 17 00:00:00 2001 From: Craig Jennings Date: Thu, 2 Jul 2026 01:38:24 -0400 Subject: feat(flush): add auto mode with self-injected /clear for unattended runs Long autonomous sessions bloat or hit auto-compaction because /clear is a prompt keystroke no tool call can execute. Auto mode closes that gap: after the write-verified checkpoint, the agent derives its own tmux pane, arms self-inject.sh through tmux run-shell -b, and ends the turn so /clear and a resume line land at an idle prompt. The server-owned arm is load-bearing: a detached child of a tool call dies at the turn boundary. The pane must be derived before arming because ancestry detection can't work under the tmux server. self-inject.sh joins the synced scripts with a six-test bats suite, tmux stubbed at the boundary. work-the-backlog now auto-flushes between tasks when context grows heavy, and its speedrun preset gained the per-item disposition rule: feature-level work gets a spec, unguessable decisions get a VERIFY, well-defined tasks get implemented. The mechanism was proven live in another project's session and its design note is preserved under docs/design/. --- .ai/scripts/self-inject.sh | 68 +++++++++++++++++++ .ai/scripts/tests/self-inject.bats | 78 ++++++++++++++++++++++ .ai/workflows/work-the-backlog.org | 16 ++++- claude-templates/.ai/scripts/self-inject.sh | 68 +++++++++++++++++++ .../.ai/scripts/tests/self-inject.bats | 78 ++++++++++++++++++++++ .../.ai/workflows/work-the-backlog.org | 16 ++++- .../2026-07-02-auto-flush-mechanism-note.org | 20 ++++++ flush/SKILL.md | 32 ++++++++- 8 files changed, 371 insertions(+), 5 deletions(-) create mode 100755 .ai/scripts/self-inject.sh create mode 100644 .ai/scripts/tests/self-inject.bats create mode 100755 claude-templates/.ai/scripts/self-inject.sh create mode 100644 claude-templates/.ai/scripts/tests/self-inject.bats create mode 100644 docs/design/2026-07-02-auto-flush-mechanism-note.org diff --git a/.ai/scripts/self-inject.sh b/.ai/scripts/self-inject.sh new file mode 100755 index 0000000..e7340c1 --- /dev/null +++ b/.ai/scripts/self-inject.sh @@ -0,0 +1,68 @@ +#!/bin/sh +# self-inject.sh — type text into the tmux pane running this agent session. +# +# The building block for AUTO-FLUSH: an agent checkpoints its session-context, +# then has tmux type "/clear" and a resume prompt at its own idle prompt, so a +# session flushes with no human at the keyboard. +# +# Usage: +# self-inject.sh -t %PANE [ ...] +# self-inject.sh [...] # derive pane from ancestry +# self-inject.sh [-t %PANE] # no pairs: report the pane +# +# Each pair: sleep seconds, then type literally and press Enter. +# +# TWO HARD-WON GOTCHAS (2026-07-02, archsetup session): +# 1. A detached child (setsid/nohup/&) of an agent tool call DIES when the +# tool call ends — the harness cleans up the process group. The arm step +# must run under the tmux SERVER instead: +# tmux run-shell -b "self-inject.sh -t %1 25 '/clear' 15 'go — resume...'" +# 2. Under tmux run-shell the process is a child of the tmux server, so +# ancestry-based pane detection CANNOT work there. Derive the pane FIRST, +# synchronously from the agent's own shell (no -t), then pass it +# explicitly with -t when arming. +# +# Collision hazard: if the user happens to be typing when the send fires, the +# injected text merges into their input line (a real /clear became "/clearto" +# mid-word). Auto-flush is for sessions running unattended; warn the user to +# keep hands off for the armed window if they're present. + +PANE="" +if [ "$1" = "-t" ]; then + PANE=$2; shift 2 +fi + +ppid_of() { + # /proc//stat: pid (comm) state ppid ... — comm may contain spaces, + # so take the 2nd field after the LAST ')'. + stat=$(cat "/proc/$1/stat" 2>/dev/null) || return 1 + # shellcheck disable=SC2086 # word-splitting the stat tail is the point + set -- ${stat##*) } + echo "$2" +} + +find_pane() { + anc=" " + pid=$$ + while [ -n "$pid" ] && [ "$pid" -gt 1 ] 2>/dev/null; do + anc="$anc$pid " + pid=$(ppid_of "$pid") || break + done + tmux list-panes -a -F "#{pane_pid} #{pane_id}" 2>/dev/null | \ + while read -r ppid pane; do + case "$anc" in *" $ppid "*) echo "$pane"; break;; esac + done +} + +[ -n "$PANE" ] || PANE=$(find_pane) +[ -n "$PANE" ] || { echo "self-inject: no owning pane found (pass -t %PANE)" >&2; exit 1; } + +# With no delay/text pairs, just report the pane (the derive-first step). +[ $# -ge 2 ] || { echo "$PANE"; exit 0; } + +while [ $# -ge 2 ]; do + sleep "$1" + tmux send-keys -t "$PANE" -l "$2" + tmux send-keys -t "$PANE" Enter + shift 2 +done diff --git a/.ai/scripts/tests/self-inject.bats b/.ai/scripts/tests/self-inject.bats new file mode 100644 index 0000000..482f61d --- /dev/null +++ b/.ai/scripts/tests/self-inject.bats @@ -0,0 +1,78 @@ +#!/usr/bin/env bats +# Tests for self-inject.sh — tmux is the external boundary, stubbed with a +# recording fake so no real server is needed. + +setup() { + SCRIPT="$BATS_TEST_DIRNAME/../self-inject.sh" + STUB_DIR="$BATS_TEST_TMPDIR/bin" + LOG="$BATS_TEST_TMPDIR/tmux.log" + mkdir -p "$STUB_DIR" +} + +# A tmux stub that records every invocation and answers list-panes from +# $STUB_PANES (empty by default, so pane derivation fails unless a test +# provides ancestry-matching output). +make_stub() { + cat > "$STUB_DIR/tmux" <<'EOF' +#!/bin/sh +echo "$@" >> "$LOG" +case "$1" in + list-panes) printf '%s\n' "$STUB_PANES" ;; +esac +EOF + chmod +x "$STUB_DIR/tmux" +} + +@test "self-inject: -t pane with no pairs echoes the pane and exits 0" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" sh "$SCRIPT" -t %42 + [ "$status" -eq 0 ] + [ "$output" = "%42" ] + # Pane was supplied, nothing sent: tmux must not have been called. + [ ! -e "$LOG" ] +} + +@test "self-inject: no pane derivable and no -t exits 1 with an error" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" sh "$SCRIPT" 0 "hello" + [ "$status" -eq 1 ] + case "$output" in *"no owning pane"*) : ;; *) false ;; esac +} + +@test "self-inject: derives the pane from process ancestry via list-panes" { + make_stub + # The stub reports the bats test process itself as a pane's pane_pid; + # the script runs as our child, so that pid is in its ancestry. + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="$$ %7" sh "$SCRIPT" + [ "$status" -eq 0 ] + [ "$output" = "%7" ] +} + +@test "self-inject: one delay/text pair sends literal text then Enter" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" sh "$SCRIPT" -t %3 0 "/clear" + [ "$status" -eq 0 ] + run cat "$LOG" + [ "${lines[0]}" = "send-keys -t %3 -l /clear" ] + [ "${lines[1]}" = "send-keys -t %3 Enter" ] +} + +@test "self-inject: multiple pairs send in order" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" \ + sh "$SCRIPT" -t %3 0 "/clear" 0 "go — resume" + [ "$status" -eq 0 ] + run cat "$LOG" + [ "${lines[0]}" = "send-keys -t %3 -l /clear" ] + [ "${lines[1]}" = "send-keys -t %3 Enter" ] + [ "${lines[2]}" = "send-keys -t %3 -l go — resume" ] + [ "${lines[3]}" = "send-keys -t %3 Enter" ] +} + +@test "self-inject: dangling odd argument after pairs is ignored" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" sh "$SCRIPT" -t %3 0 "one" 99 + [ "$status" -eq 0 ] + run cat "$LOG" + [ "${#lines[@]}" -eq 2 ] +} diff --git a/.ai/workflows/work-the-backlog.org b/.ai/workflows/work-the-backlog.org index 284935b..642162d 100644 --- a/.ai/workflows/work-the-backlog.org +++ b/.ai/workflows/work-the-backlog.org @@ -140,6 +140,10 @@ The cap is a hard per-run task ceiling passed by the caller — the kill switch Even the speedrun stops at the cap and surfaces (and, with paging on, pages) the remainder. The cap bounds task *count*, not cost; a token budget is logged as vNext. +* Context hygiene — auto-flush between tasks + +Task boundaries are clean boundaries by construction: the previous task is closed and committed (or filed), nothing is half-edited. When the context window grows heavy mid-run, run the flush skill's *auto mode* between tasks: checkpoint the session anchor with the remaining task set, session mode, and cap in Next Steps (so the resumed context continues the run blind), arm the self-injection (=.ai/scripts/self-inject.sh= via =tmux run-shell -b=), and end the turn. The fresh context resumes from the anchor and works on. Unattended runs only — the keystroke-collision hazard and the full mechanism live in the flush skill. + * End-of-set page With paging on, fire one page when the set is done or the cap is hit — end-of-set only, never per-task: @@ -207,11 +211,21 @@ When Craig names a task set and says "speedrun": 3. *Order* the list — priority, then the author's ordering / =:next:=. 4. *Intro the work* — present the ordered plan: what will run, what was dropped and why, and the batched questions for the needs-quick-decisions tasks. 5. *Craig answers each question or says "skip this"* — a skip removes the task (recorded =dropped-by-craig=; the task itself stays =TODO=); an answer is recorded so implementation works from the decision, not a guess. -6. *Run the finalized list autonomously* — no further approvals until done. Cap = the list length (the human bounded the set by naming it), still one commit per logical change, always-push per the project's flow. +6. *Run the finalized list autonomously* — no further approvals until done. Cap = the list length (the human bounded the set by naming it), still one commit per logical change, always-push per the project's flow, auto-flushing between tasks when the context grows heavy (see Context hygiene above). 7. *End-of-set page* with completed + remaining + skipped. The batch-ask (step 4-5) is one message: each question names its task, puts the recommended answer at item 1 when there is one (per =interaction.md= — inline numbered, no popup), and offers "skip this" as the last option. Before the run starts, write each answer into its task's body in =todo.org= as a dated line — the implementation works from the recorded decision, and the record survives the session. The Q&A fires only under this preset; the loop caller never asks (its decision-needing tasks defer). +*** Per-item disposition rule + +For every item the run picks up (this holds for any executing caller, including an auto-inbox-zero run given a standing yes): + +- *Feature-level task* → write a spec first (=spec-create=), don't implement directly. The spec is the run's deliverable for that item. +- *Needs decisions you can't confidently guess* → file it as a =VERIFY= carrying the question (under this preset, one or two quick questions route to the pre-flight Q&A instead). +- *Well-defined* → implement it, taking the time it needs. + +This extends the defer checklist: the checklist decides *act vs file*; this rule decides the *shape* of the act. + * Synthesis: metrics → org-roam KB Trigger: "synthesize backlog metrics" (optionally a weekly scheduled run). This is the read side of the metrics log — Craig's ask was "gather data and create org-roam articles we can look at later," and this step is the second half. It is read-only over the logs plus exactly one KB write. diff --git a/claude-templates/.ai/scripts/self-inject.sh b/claude-templates/.ai/scripts/self-inject.sh new file mode 100755 index 0000000..e7340c1 --- /dev/null +++ b/claude-templates/.ai/scripts/self-inject.sh @@ -0,0 +1,68 @@ +#!/bin/sh +# self-inject.sh — type text into the tmux pane running this agent session. +# +# The building block for AUTO-FLUSH: an agent checkpoints its session-context, +# then has tmux type "/clear" and a resume prompt at its own idle prompt, so a +# session flushes with no human at the keyboard. +# +# Usage: +# self-inject.sh -t %PANE [ ...] +# self-inject.sh [...] # derive pane from ancestry +# self-inject.sh [-t %PANE] # no pairs: report the pane +# +# Each pair: sleep seconds, then type literally and press Enter. +# +# TWO HARD-WON GOTCHAS (2026-07-02, archsetup session): +# 1. A detached child (setsid/nohup/&) of an agent tool call DIES when the +# tool call ends — the harness cleans up the process group. The arm step +# must run under the tmux SERVER instead: +# tmux run-shell -b "self-inject.sh -t %1 25 '/clear' 15 'go — resume...'" +# 2. Under tmux run-shell the process is a child of the tmux server, so +# ancestry-based pane detection CANNOT work there. Derive the pane FIRST, +# synchronously from the agent's own shell (no -t), then pass it +# explicitly with -t when arming. +# +# Collision hazard: if the user happens to be typing when the send fires, the +# injected text merges into their input line (a real /clear became "/clearto" +# mid-word). Auto-flush is for sessions running unattended; warn the user to +# keep hands off for the armed window if they're present. + +PANE="" +if [ "$1" = "-t" ]; then + PANE=$2; shift 2 +fi + +ppid_of() { + # /proc//stat: pid (comm) state ppid ... — comm may contain spaces, + # so take the 2nd field after the LAST ')'. + stat=$(cat "/proc/$1/stat" 2>/dev/null) || return 1 + # shellcheck disable=SC2086 # word-splitting the stat tail is the point + set -- ${stat##*) } + echo "$2" +} + +find_pane() { + anc=" " + pid=$$ + while [ -n "$pid" ] && [ "$pid" -gt 1 ] 2>/dev/null; do + anc="$anc$pid " + pid=$(ppid_of "$pid") || break + done + tmux list-panes -a -F "#{pane_pid} #{pane_id}" 2>/dev/null | \ + while read -r ppid pane; do + case "$anc" in *" $ppid "*) echo "$pane"; break;; esac + done +} + +[ -n "$PANE" ] || PANE=$(find_pane) +[ -n "$PANE" ] || { echo "self-inject: no owning pane found (pass -t %PANE)" >&2; exit 1; } + +# With no delay/text pairs, just report the pane (the derive-first step). +[ $# -ge 2 ] || { echo "$PANE"; exit 0; } + +while [ $# -ge 2 ]; do + sleep "$1" + tmux send-keys -t "$PANE" -l "$2" + tmux send-keys -t "$PANE" Enter + shift 2 +done diff --git a/claude-templates/.ai/scripts/tests/self-inject.bats b/claude-templates/.ai/scripts/tests/self-inject.bats new file mode 100644 index 0000000..482f61d --- /dev/null +++ b/claude-templates/.ai/scripts/tests/self-inject.bats @@ -0,0 +1,78 @@ +#!/usr/bin/env bats +# Tests for self-inject.sh — tmux is the external boundary, stubbed with a +# recording fake so no real server is needed. + +setup() { + SCRIPT="$BATS_TEST_DIRNAME/../self-inject.sh" + STUB_DIR="$BATS_TEST_TMPDIR/bin" + LOG="$BATS_TEST_TMPDIR/tmux.log" + mkdir -p "$STUB_DIR" +} + +# A tmux stub that records every invocation and answers list-panes from +# $STUB_PANES (empty by default, so pane derivation fails unless a test +# provides ancestry-matching output). +make_stub() { + cat > "$STUB_DIR/tmux" <<'EOF' +#!/bin/sh +echo "$@" >> "$LOG" +case "$1" in + list-panes) printf '%s\n' "$STUB_PANES" ;; +esac +EOF + chmod +x "$STUB_DIR/tmux" +} + +@test "self-inject: -t pane with no pairs echoes the pane and exits 0" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" sh "$SCRIPT" -t %42 + [ "$status" -eq 0 ] + [ "$output" = "%42" ] + # Pane was supplied, nothing sent: tmux must not have been called. + [ ! -e "$LOG" ] +} + +@test "self-inject: no pane derivable and no -t exits 1 with an error" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" sh "$SCRIPT" 0 "hello" + [ "$status" -eq 1 ] + case "$output" in *"no owning pane"*) : ;; *) false ;; esac +} + +@test "self-inject: derives the pane from process ancestry via list-panes" { + make_stub + # The stub reports the bats test process itself as a pane's pane_pid; + # the script runs as our child, so that pid is in its ancestry. + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="$$ %7" sh "$SCRIPT" + [ "$status" -eq 0 ] + [ "$output" = "%7" ] +} + +@test "self-inject: one delay/text pair sends literal text then Enter" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" sh "$SCRIPT" -t %3 0 "/clear" + [ "$status" -eq 0 ] + run cat "$LOG" + [ "${lines[0]}" = "send-keys -t %3 -l /clear" ] + [ "${lines[1]}" = "send-keys -t %3 Enter" ] +} + +@test "self-inject: multiple pairs send in order" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" \ + sh "$SCRIPT" -t %3 0 "/clear" 0 "go — resume" + [ "$status" -eq 0 ] + run cat "$LOG" + [ "${lines[0]}" = "send-keys -t %3 -l /clear" ] + [ "${lines[1]}" = "send-keys -t %3 Enter" ] + [ "${lines[2]}" = "send-keys -t %3 -l go — resume" ] + [ "${lines[3]}" = "send-keys -t %3 Enter" ] +} + +@test "self-inject: dangling odd argument after pairs is ignored" { + make_stub + run env PATH="$STUB_DIR:$PATH" LOG="$LOG" STUB_PANES="" sh "$SCRIPT" -t %3 0 "one" 99 + [ "$status" -eq 0 ] + run cat "$LOG" + [ "${#lines[@]}" -eq 2 ] +} diff --git a/claude-templates/.ai/workflows/work-the-backlog.org b/claude-templates/.ai/workflows/work-the-backlog.org index 284935b..642162d 100644 --- a/claude-templates/.ai/workflows/work-the-backlog.org +++ b/claude-templates/.ai/workflows/work-the-backlog.org @@ -140,6 +140,10 @@ The cap is a hard per-run task ceiling passed by the caller — the kill switch Even the speedrun stops at the cap and surfaces (and, with paging on, pages) the remainder. The cap bounds task *count*, not cost; a token budget is logged as vNext. +* Context hygiene — auto-flush between tasks + +Task boundaries are clean boundaries by construction: the previous task is closed and committed (or filed), nothing is half-edited. When the context window grows heavy mid-run, run the flush skill's *auto mode* between tasks: checkpoint the session anchor with the remaining task set, session mode, and cap in Next Steps (so the resumed context continues the run blind), arm the self-injection (=.ai/scripts/self-inject.sh= via =tmux run-shell -b=), and end the turn. The fresh context resumes from the anchor and works on. Unattended runs only — the keystroke-collision hazard and the full mechanism live in the flush skill. + * End-of-set page With paging on, fire one page when the set is done or the cap is hit — end-of-set only, never per-task: @@ -207,11 +211,21 @@ When Craig names a task set and says "speedrun": 3. *Order* the list — priority, then the author's ordering / =:next:=. 4. *Intro the work* — present the ordered plan: what will run, what was dropped and why, and the batched questions for the needs-quick-decisions tasks. 5. *Craig answers each question or says "skip this"* — a skip removes the task (recorded =dropped-by-craig=; the task itself stays =TODO=); an answer is recorded so implementation works from the decision, not a guess. -6. *Run the finalized list autonomously* — no further approvals until done. Cap = the list length (the human bounded the set by naming it), still one commit per logical change, always-push per the project's flow. +6. *Run the finalized list autonomously* — no further approvals until done. Cap = the list length (the human bounded the set by naming it), still one commit per logical change, always-push per the project's flow, auto-flushing between tasks when the context grows heavy (see Context hygiene above). 7. *End-of-set page* with completed + remaining + skipped. The batch-ask (step 4-5) is one message: each question names its task, puts the recommended answer at item 1 when there is one (per =interaction.md= — inline numbered, no popup), and offers "skip this" as the last option. Before the run starts, write each answer into its task's body in =todo.org= as a dated line — the implementation works from the recorded decision, and the record survives the session. The Q&A fires only under this preset; the loop caller never asks (its decision-needing tasks defer). +*** Per-item disposition rule + +For every item the run picks up (this holds for any executing caller, including an auto-inbox-zero run given a standing yes): + +- *Feature-level task* → write a spec first (=spec-create=), don't implement directly. The spec is the run's deliverable for that item. +- *Needs decisions you can't confidently guess* → file it as a =VERIFY= carrying the question (under this preset, one or two quick questions route to the pre-flight Q&A instead). +- *Well-defined* → implement it, taking the time it needs. + +This extends the defer checklist: the checklist decides *act vs file*; this rule decides the *shape* of the act. + * Synthesis: metrics → org-roam KB Trigger: "synthesize backlog metrics" (optionally a weekly scheduled run). This is the read side of the metrics log — Craig's ask was "gather data and create org-roam articles we can look at later," and this step is the second half. It is read-only over the logs plus exactly one KB write. diff --git a/docs/design/2026-07-02-auto-flush-mechanism-note.org b/docs/design/2026-07-02-auto-flush-mechanism-note.org new file mode 100644 index 0000000..fbe06ae --- /dev/null +++ b/docs/design/2026-07-02-auto-flush-mechanism-note.org @@ -0,0 +1,20 @@ +#+TITLE: AUTO-FLUSH capability — proven live in the archsetup session +#+SOURCE: from archsetup +#+DATE: 2026-07-02 01:26:20 -0400 + +AUTO-FLUSH capability — proven live in the archsetup session 2026-07-02, Craig asks that it be promoted to all projects and recommended as part of the no-approvals speedrun to keep sessions sharp. + +Problem: /clear is a user-only keystroke, so long autonomous sessions either bloat or hit arbitrary auto-compaction. Craig can't always be around to type it. + +Mechanism (companion script: self-inject.sh, sent separately to this inbox): +1. At a clean task boundary, the agent refreshes .ai/session-context.org exactly as the flush skill does (checkpoint with Active Goal / Decisions / Next Steps). +2. It derives its own tmux pane: match pane_pid from 'tmux list-panes -a' against its process ancestry (the ai launcher runs every agent session inside tmux, so this holds everywhere). +3. It arms the injection VIA THE TMUX SERVER — tmux run-shell -b "sleep 25; tmux send-keys -t %N -l '/clear'; tmux send-keys -t %N Enter; sleep 15; tmux send-keys -t %N -l 'go — auto-flush resume: read .ai/session-context.org and continue per Next Steps'; tmux send-keys -t %N Enter" — and immediately ends its turn so the prompt is idle when the keys land. +4. /clear fires the SessionStart hook (which already points a fresh context at notes.org + session-context.org), and the injected resume line starts the next turn. Zero human keystrokes. + +Gotchas learned the hard way: +- A detached child (setsid/nohup/&) of a tool call DIES when the tool call ends; only tmux run-shell -b (server-owned) survives the turn boundary. +- Under run-shell the process is a child of the tmux server, so ancestry-based pane detection can't run there — derive the pane first from the agent's shell, pass it explicitly. +- Collision: if the user is typing when the keys fire, the injection merges into their input (a real /clear became '/clearto' mid-word). Fine for unattended sessions; warn the user to keep hands off the armed window if present. + +Suggested integration: an 'auto' mode on the flush skill (checkpoint, then self-inject instead of prompting the user), plus a line in the no-approvals speedrun workflow to auto-flush at clean boundaries when context grows heavy. The script could live in claude-templates' .ai/scripts/ so every project gets it on sync. diff --git a/flush/SKILL.md b/flush/SKILL.md index 4c2709a..ca139c1 100644 --- a/flush/SKILL.md +++ b/flush/SKILL.md @@ -1,6 +1,6 @@ --- name: flush -description: Mid-session context flush — the checkpoint half of the wrap/restart rhythm. Refresh the session-context anchor in place, prompt the user to /clear, then resume the same logical session from the anchor without re-running startup. Cheaper tokens and a sharper context window without fragmenting the session into archive files. Agent-callable and agent-initiated: the agent may run the pre-clear checkpoint on its own judgment at a clean task boundary, but /clear is user-only — the agent does all the work, then prompts for the single /clear keystroke. Use when the current task has a clean boundary and the context window is large enough that a reset would sharpen the work. Do NOT use for end-of-day or done-for-now (use wrap-it-up, which archives to .ai/sessions/ and commits), or for a genuine fresh start after being away or on another machine (use startup, which pulls + syncs + surfaces inbox). +description: Mid-session context flush — the checkpoint half of the wrap/restart rhythm. Refresh the session-context anchor in place, prompt the user to /clear (or in auto mode self-inject it via tmux), then resume the same logical session from the anchor without re-running startup. Cheaper tokens and a sharper context window without fragmenting the session into archive files. Agent-callable and agent-initiated: the agent may run the pre-clear checkpoint on its own judgment at a clean task boundary; interactively /clear stays the user's keystroke, while auto mode ("/flush auto", for unattended runs like the no-approvals speedrun or a recurring loop) arms .ai/scripts/self-inject.sh so tmux types /clear and a resume line at the agent's own idle prompt — zero human keystrokes. Use when the current task has a clean boundary and the context window is large enough that a reset would sharpen the work. Do NOT use for end-of-day or done-for-now (use wrap-it-up, which archives to .ai/sessions/ and commits), or for a genuine fresh start after being away or on another machine (use startup, which pulls + syncs + surfaces inbox). --- # /flush — Mid-Session Context Checkpoint @@ -20,9 +20,11 @@ This is the checkpoint half of the wrap/restart rhythm. It is distinct from two The skill is agent-callable. The agent may also **initiate** a flush on its own judgment when the rhythm calls for it (see below) — it runs the pre-clear checkpoint, then prompts the user to type `/clear`. -## The hard constraint +## The constraint, and the auto-mode exception -`/clear` is a user-only command. The agent **cannot** execute it. "Agent-initiated" means the agent runs the pre-clear checkpoint (refresh the anchor + verify the write landed) on its own, then **prompts** the user: "checkpoint saved, type /clear to reset." The agent proposes and does all the work; the user supplies the single `/clear` keystroke. Never design or imply a flow where the agent self-triggers `/clear`. +`/clear` is a prompt command — the agent cannot execute it as a tool call. Interactively, "agent-initiated" means the agent runs the pre-clear checkpoint (refresh the anchor + verify the write landed) on its own, then **prompts** the user: "checkpoint saved, type /clear to reset." The user supplies the single `/clear` keystroke. + +**Auto mode** (`/flush auto`) is the sanctioned exception for unattended sessions: after the checkpoint, the agent arms the tmux server to type `/clear` and a resume line at its own idle prompt (see Auto mode below). The constraint that never bends is the **gate order**: the anchor write is verified on disk *before* anything arms or prompts a clear. There is no recovering the conversation afterward. ## When the agent should initiate @@ -68,6 +70,30 @@ That is the wrap/restart rhythm. When both conditions hold, run Phase 1 and end 6. **Hand off the clear.** Tell the user the checkpoint is saved, name the anchor path, and prompt: type `/clear` now, then send any message to resume. +## Auto mode — self-injected clear for unattended sessions + +`/flush auto` runs Phase 1 in full (steps 1-5, including the write-verified gate), then replaces step 6's user prompt with a self-injection. Proven live in the archsetup session 2026-07-02; the mechanism and its gotchas live in `.ai/scripts/self-inject.sh` (synced into every project). + +1. **Derive the pane first, synchronously, from this shell:** + + ```bash + pane=$(.ai/scripts/self-inject.sh) + ``` + + This must happen before arming: the armed step runs under the tmux *server*, where ancestry-based pane detection cannot work. + +2. **Arm the injection via the tmux server, then end the turn immediately:** + + ```bash + tmux run-shell -b ".ai/scripts/self-inject.sh -t $pane 25 '/clear' 15 'go (auto-flush resume: continue per Next Steps)'" + ``` + + `run-shell -b` is load-bearing — a detached child of a tool call (`setsid`/`nohup`/`&`) dies when the tool call ends; only a server-owned process survives the turn boundary. The delays let the turn fully end before `/clear` lands (25s) and let the `SessionStart` hook finish before the resume line lands (15s). + +3. **End the turn.** The prompt must be idle when the keys arrive. The injected `/clear` fires the same `SessionStart(clear)` hook as a hand-typed one; the injected resume line starts the next turn. Zero human keystrokes. + +**When to use it:** unattended runs only — the no-approvals speedrun, a recurring loop, any session where nobody is at the keyboard. The collision hazard is real: keys injected while a human is mid-keystroke merge into their input (`/clear` has become `/clearto`). If a user may be present, say the window is armed and to keep hands off, or use the interactive prompt instead. + ## Phase 2 — Post-clear resume (hook-driven) This half is driven by the `SessionStart(clear)` hook, not by this skill — but it is documented here so the loop is legible. -- cgit v1.2.3