Merge pull request #1606 from affaan-m/fix/1598-observer-sigusr1-wait

fix: retry observer wait after SIGUSR1
This commit is contained in:
Affaan Mustafa
2026-04-28 22:12:04 -04:00
committed by GitHub
2 changed files with 80 additions and 1 deletions

View File

@@ -83,6 +83,28 @@ exit_if_idle_without_sessions() {
fi
}
wait_for_claude_analysis() {
local child_pid="$1"
local wait_status=0
while true; do
wait "$child_pid"
wait_status=$?
if [ "$wait_status" -eq 0 ]; then
return 0
fi
# SIGUSR1 can interrupt wait while the Claude child is still running.
# Re-wait in that case so a signal is not logged as a false child failure.
if kill -0 "$child_pid" 2>/dev/null; then
continue
fi
return "$wait_status"
done
}
analyze_observations() {
if [ ! -f "$OBSERVATIONS_FILE" ]; then
return
@@ -217,7 +239,7 @@ PROMPT
) &
watchdog_pid=$!
wait "$claude_pid"
wait_for_claude_analysis "$claude_pid"
exit_code=$?
kill "$watchdog_pid" 2>/dev/null || true
rm -f "$analysis_file"

View File

@@ -205,6 +205,63 @@ test('prompt references analysis_file not full OBSERVATIONS_FILE', () => {
assert.ok(promptSection.includes('${analysis_relpath}'), 'Prompt should point Claude at the sampled analysis file (via relative path), not the full observations file');
});
test('observer-loop wait helper retries SIGUSR1-interrupted waits while claude child is alive', () => {
if (process.platform === 'win32') {
return;
}
const content = fs.readFileSync(observerLoopPath, 'utf8');
const helperMatch = content.match(/wait_for_claude_analysis\(\) \{[\s\S]*?\n\}/);
assert.ok(helperMatch, 'observer-loop.sh should define wait_for_claude_analysis helper');
const script = [
'set +e',
helperMatch[0],
'trap ":" USR1',
'( sleep 0.35; exit 0 ) &',
'claude_child=$!',
'( sleep 0.05; kill -USR1 $$ ) &',
'signaler=$!',
'wait_for_claude_analysis "$claude_child"',
'status=$?',
'wait "$signaler" 2>/dev/null || true',
'exit "$status"'
].join('\n');
const result = spawnSync('bash', ['-c', script], {
encoding: 'utf8',
timeout: 5000
});
assert.strictEqual(result.status, 0, `interrupted wait should return child exit 0, got ${result.status}; stderr: ${result.stderr}`);
});
test('observer-loop wait helper preserves real nonzero claude exits', () => {
if (process.platform === 'win32') {
return;
}
const content = fs.readFileSync(observerLoopPath, 'utf8');
const helperMatch = content.match(/wait_for_claude_analysis\(\) \{[\s\S]*?\n\}/);
assert.ok(helperMatch, 'observer-loop.sh should define wait_for_claude_analysis helper');
const script = [
'set +e',
helperMatch[0],
'( sleep 0.05; exit 7 ) &',
'claude_child=$!',
'wait_for_claude_analysis "$claude_child"',
'exit "$?"'
].join('\n');
const result = spawnSync('bash', ['-c', script], {
encoding: 'utf8',
timeout: 5000
});
assert.strictEqual(result.status, 7, `real child failure should be preserved, got ${result.status}; stderr: ${result.stderr}`);
});
// ──────────────────────────────────────────────────────
// Test group 5: Signal counter file simulation
// ──────────────────────────────────────────────────────