mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-04-07 17:53:32 +08:00
fix: observer memory explosion with throttling, re-entrancy guard, and tail sampling (#536)
Three fixes for the positive feedback loop causing runaway memory usage:
1. SIGUSR1 throttling in observe.sh: Signal observer only every 20
observations (configurable via ECC_OBSERVER_SIGNAL_EVERY_N) instead
of on every tool call. Uses a counter file to track invocations.
2. Re-entrancy guard in observer-loop.sh on_usr1(): ANALYZING flag
prevents parallel Claude analysis processes from spawning when
signals arrive while analysis is already running.
3. Cooldown + tail-based sampling in observer-loop.sh:
- 60s cooldown between analyses (ECC_OBSERVER_ANALYSIS_COOLDOWN)
- Only last 500 lines sent to LLM (ECC_OBSERVER_MAX_ANALYSIS_LINES)
instead of the entire observations file
Closes #521
This commit is contained in:
@@ -365,24 +365,45 @@ if [ "$OBSERVER_ENABLED" = "true" ]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Signal observer if running (check both project-scoped and global observer, deduplicate)
|
||||
signaled_pids=" "
|
||||
for pid_file in "${PROJECT_DIR}/.observer.pid" "${CONFIG_DIR}/.observer.pid"; do
|
||||
if [ -f "$pid_file" ]; then
|
||||
observer_pid=$(cat "$pid_file" 2>/dev/null || true)
|
||||
# Validate PID is a positive integer (>1)
|
||||
case "$observer_pid" in
|
||||
''|*[!0-9]*|0|1) rm -f "$pid_file" 2>/dev/null || true; continue ;;
|
||||
esac
|
||||
# Deduplicate: skip if already signaled this pass
|
||||
case "$signaled_pids" in
|
||||
*" $observer_pid "*) continue ;;
|
||||
esac
|
||||
if kill -0 "$observer_pid" 2>/dev/null; then
|
||||
kill -USR1 "$observer_pid" 2>/dev/null || true
|
||||
signaled_pids="${signaled_pids}${observer_pid} "
|
||||
fi
|
||||
# Throttle SIGUSR1: only signal observer every N observations (#521)
|
||||
# This prevents rapid signaling when tool calls fire every second,
|
||||
# which caused runaway parallel Claude analysis processes.
|
||||
SIGNAL_EVERY_N="${ECC_OBSERVER_SIGNAL_EVERY_N:-20}"
|
||||
SIGNAL_COUNTER_FILE="${PROJECT_DIR}/.observer-signal-counter"
|
||||
|
||||
should_signal=0
|
||||
if [ -f "$SIGNAL_COUNTER_FILE" ]; then
|
||||
counter=$(cat "$SIGNAL_COUNTER_FILE" 2>/dev/null || echo 0)
|
||||
counter=$((counter + 1))
|
||||
if [ "$counter" -ge "$SIGNAL_EVERY_N" ]; then
|
||||
should_signal=1
|
||||
counter=0
|
||||
fi
|
||||
done
|
||||
echo "$counter" > "$SIGNAL_COUNTER_FILE"
|
||||
else
|
||||
echo "1" > "$SIGNAL_COUNTER_FILE"
|
||||
fi
|
||||
|
||||
# Signal observer if running and throttle allows (check both project-scoped and global observer, deduplicate)
|
||||
if [ "$should_signal" -eq 1 ]; then
|
||||
signaled_pids=" "
|
||||
for pid_file in "${PROJECT_DIR}/.observer.pid" "${CONFIG_DIR}/.observer.pid"; do
|
||||
if [ -f "$pid_file" ]; then
|
||||
observer_pid=$(cat "$pid_file" 2>/dev/null || true)
|
||||
# Validate PID is a positive integer (>1)
|
||||
case "$observer_pid" in
|
||||
''|*[!0-9]*|0|1) rm -f "$pid_file" 2>/dev/null || true; continue ;;
|
||||
esac
|
||||
# Deduplicate: skip if already signaled this pass
|
||||
case "$signaled_pids" in
|
||||
*" $observer_pid "*) continue ;;
|
||||
esac
|
||||
if kill -0 "$observer_pid" 2>/dev/null; then
|
||||
kill -USR1 "$observer_pid" 2>/dev/null || true
|
||||
signaled_pids="${signaled_pids}${observer_pid} "
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
exit 0
|
||||
|
||||
Reference in New Issue
Block a user