fix(clv2): archive observations only after successful analysis in observer-loop (#2386)

analyze_observations moved observations.jsonl into observations.archive/ unconditionally, even when the Claude analysis failed (timeout, non-zero exit, rate limit). Because the analyzer only reads the live file, a failed batch was archived and never re-analyzed, silently dropping the instincts it would have produced. Return early on a non-zero analysis exit so the archive mv runs only on success, retaining observations for the next cycle to retry. Resolve the script's own directory from ${BASH_SOURCE[0]} (SCRIPT_DIR) so sibling scripts (session-guardian.sh) and relative helpers resolve correctly under both execution and sourcing, and add a source-guard so observer-loop.sh can be sourced without starting the loop. Add a regression test covering both the failure (retain) and success (archive) paths. Fixes #2370
2026-07-02 04:51:26 +08:00 · 2026-06-30 07:13:37 +05:30
parent a36148fff9
commit f720885cea
2 changed files with 226 additions and 4 deletions
@@ -19,6 +19,12 @@ IDLE_TIMEOUT_SECONDS="${ECC_OBSERVER_IDLE_TIMEOUT_SECONDS:-1800}"
 SESSION_LEASE_DIR="${PROJECT_DIR}/.observer-sessions"
 ACTIVITY_FILE="${PROJECT_DIR}/.observer-last-activity"

+# Resolve this script's own directory so sibling scripts (session-guardian.sh)
+# and relative helpers (../scripts/instinct-cli.py) resolve correctly whether
+# this file is executed or sourced. $0 is the *caller* when sourced, so prefer
+# ${BASH_SOURCE[0]}, which always points at this file (#2370).
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
 cleanup() {
  [ -n "$SLEEP_PID" ] && kill "$SLEEP_PID" 2>/dev/null
  if [ -f "$PID_FILE" ] && [ "$(cat "$PID_FILE" 2>/dev/null)" = "$$" ]; then
@@ -129,7 +135,7 @@ analyze_observations() {
  fi

  # session-guardian: gate observer cycle (active hours, cooldown, idle detection)
-  if ! bash "$(dirname "$0")/session-guardian.sh"; then
+  if ! bash "${SCRIPT_DIR}/session-guardian.sh"; then
    echo "[$(date)] Observer cycle skipped by session-guardian" >> "$LOG_FILE"
    return
  fi
@@ -259,9 +265,14 @@ PROMPT
  rm -f "$analysis_file"

  if [ "$exit_code" -ne 0 ]; then
-    echo "[$(date)] Claude analysis failed (exit $exit_code)" >> "$LOG_FILE"
+    echo "[$(date)] Claude analysis failed (exit $exit_code); retaining observations for retry" >> "$LOG_FILE"
+    return
  fi

+  # Archive observations only after a successful analysis. A transient
+  # failure (timeout, non-zero exit, rate limit) must not discard the batch
+  # before it has been turned into instincts, since the analyzer only ever
+  # reads the live observations file (#2370).
  if [ -f "$OBSERVATIONS_FILE" ]; then
    archive_dir="${PROJECT_DIR}/observations.archive"
    mkdir -p "$archive_dir"
@@ -298,11 +309,20 @@ on_usr1() {
 }
 trap on_usr1 USR1

+# When this file is sourced (e.g. by tests/hooks/observer-loop-archive.test.js)
+# rather than executed, stop here so callers can invoke individual functions
+# such as analyze_observations without starting the observer loop. The only
+# production caller (start-observer.sh) executes the script, so $0 equals
+# BASH_SOURCE[0] there and this guard is a no-op (#2370).
+if [ "${BASH_SOURCE[0]}" != "${0}" ]; then
+  return 0 2>/dev/null || true
+fi
+
 echo "$$" > "$PID_FILE"
 echo "[$(date)] Observer started for ${PROJECT_NAME} (PID: $$)" >> "$LOG_FILE"

-# Prune expired pending instincts before analysis
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+# Prune expired pending instincts before analysis (SCRIPT_DIR resolved at top
+# via ${BASH_SOURCE[0]} so it is correct under both execution and sourcing).
 "${CLV2_PYTHON_CMD:-python3}" "${SCRIPT_DIR}/../scripts/instinct-cli.py" prune --quiet >> "$LOG_FILE" 2>&1 || echo "[$(date)] Warning: instinct prune failed (non-fatal)" >> "$LOG_FILE"

 while true; do