Merge pull request #1367 from ozoz5/feat/gateguard

feat(hooks,skills): add gateguard fact-forcing pre-action gate
2026-06-14 12:11:27 +08:00 · 2026-04-13 01:05:20 -07:00
parent 85e331e49a 6c67566767
commit e0ddb331f6
4 changed files with 846 additions and 0 deletions
@@ -126,6 +126,30 @@
        ],
        "description": "Check MCP server health before MCP tool execution and block unhealthy MCP calls",
        "id": "pre:mcp-health-check"
      },
      {
        "matcher": "Edit|Write|MultiEdit",
        "hooks": [
          {
            "type": "command",
            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/hooks/run-with-flags.js\" \"pre:edit-write:gateguard-fact-force\" \"scripts/hooks/gateguard-fact-force.js\" \"standard,strict\"",
            "timeout": 5
          }
        ],
        "description": "Fact-forcing gate: block first Edit/Write/MultiEdit per file and demand investigation (importers, data schemas, user instruction) before allowing",
        "id": "pre:edit-write:gateguard-fact-force"
      },
      {
        "matcher": "Bash",
        "hooks": [
          {
            "type": "command",
            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/hooks/run-with-flags.js\" \"pre:bash:gateguard-fact-force\" \"scripts/hooks/gateguard-fact-force.js\" \"standard,strict\"",
            "timeout": 5
          }
        ],
        "description": "Fact-forcing gate: block destructive Bash commands and demand rollback plan; quote user instruction on first Bash per session",
        "id": "pre:bash:gateguard-fact-force"
      }
    ],
    "PreCompact": [
@@ -0,0 +1,265 @@
 #!/usr/bin/env node
 /**
 * PreToolUse Hook: GateGuard Fact-Forcing Gate
 *
 * Forces Claude to investigate before editing files or running commands.
 * Instead of asking "are you sure?" (which LLMs always answer "yes"),
 * this hook demands concrete facts: importers, public API, data schemas.
 *
 * The act of investigation creates awareness that self-evaluation never did.
 *
 * Gates:
 *   - Edit/Write: list importers, affected API, verify data schemas, quote instruction
 *   - Bash (destructive): list targets, rollback plan, quote instruction
 *   - Bash (routine): quote current instruction (once per session)
 *
 * Compatible with run-with-flags.js via module.exports.run().
 * Cross-platform (Windows, macOS, Linux).
 *
 * Full package with config support: pip install gateguard-ai
 * Repo: https://github.com/zunoworks/gateguard
 */
 'use strict';
 const crypto = require('crypto');
 const fs = require('fs');
 const path = require('path');
 // Session state — scoped per session to avoid cross-session races.
 // Uses CLAUDE_SESSION_ID (set by Claude Code) or falls back to PID-based isolation.
 const STATE_DIR = process.env.GATEGUARD_STATE_DIR || path.join(process.env.HOME || process.env.USERPROFILE || '/tmp', '.gateguard');
 const SESSION_ID = process.env.CLAUDE_SESSION_ID || process.env.ECC_SESSION_ID || `pid-${process.ppid || process.pid}`;
 const STATE_FILE = path.join(STATE_DIR, `state-${SESSION_ID.replace(/[^a-zA-Z0-9_-]/g, '_')}.json`);
 // State expires after 30 minutes of inactivity
 const SESSION_TIMEOUT_MS = 30 * 60 * 1000;
 // Maximum checked entries to prevent unbounded growth
 const MAX_CHECKED_ENTRIES = 500;
 const MAX_SESSION_KEYS = 50;
 const ROUTINE_BASH_SESSION_KEY = '__bash_session__';
 const DESTRUCTIVE_BASH = /\b(rm\s+-rf|git\s+reset\s+--hard|git\s+checkout\s+--|git\s+clean\s+-f|drop\s+table|delete\s+from|truncate|git\s+push\s+--force|dd\s+if=)\b/i;
 // --- State management (per-session, atomic writes, bounded) ---
 function loadState() {
  try {
    if (fs.existsSync(STATE_FILE)) {
      const state = JSON.parse(fs.readFileSync(STATE_FILE, 'utf8'));
      const lastActive = state.last_active || 0;
      if (Date.now() - lastActive > SESSION_TIMEOUT_MS) {
        try { fs.unlinkSync(STATE_FILE); } catch (_) { /* ignore */ }
        return { checked: [], last_active: Date.now() };
      }
      return state;
    }
  } catch (_) { /* ignore */ }
  return { checked: [], last_active: Date.now() };
 }
 function pruneCheckedEntries(checked) {
  if (checked.length <= MAX_CHECKED_ENTRIES) {
    return checked;
  }
  const preserved = checked.includes(ROUTINE_BASH_SESSION_KEY) ? [ROUTINE_BASH_SESSION_KEY] : [];
  const sessionKeys = checked.filter(k => k.startsWith('__') && k !== ROUTINE_BASH_SESSION_KEY);
  const fileKeys = checked.filter(k => !k.startsWith('__'));
  const remainingSessionSlots = Math.max(MAX_SESSION_KEYS - preserved.length, 0);
  const cappedSession = sessionKeys.slice(-remainingSessionSlots);
  const remainingFileSlots = Math.max(MAX_CHECKED_ENTRIES - preserved.length - cappedSession.length, 0);
  const cappedFiles = fileKeys.slice(-remainingFileSlots);
  return [...preserved, ...cappedSession, ...cappedFiles];
 }
 function saveState(state) {
  try {
    state.last_active = Date.now();
    state.checked = pruneCheckedEntries(state.checked);
    fs.mkdirSync(STATE_DIR, { recursive: true });
    // Atomic write: temp file + rename prevents partial reads
    const tmpFile = STATE_FILE + '.tmp.' + process.pid;
    fs.writeFileSync(tmpFile, JSON.stringify(state, null, 2), 'utf8');
    fs.renameSync(tmpFile, STATE_FILE);
  } catch (_) { /* ignore */ }
 }
 function markChecked(key) {
  const state = loadState();
  if (!state.checked.includes(key)) {
    state.checked.push(key);
    saveState(state);
  }
 }
 function isChecked(key) {
  const state = loadState();
  const found = state.checked.includes(key);
  saveState(state);
  return found;
 }
 // Prune stale session files older than 1 hour
 (function pruneStaleFiles() {
  try {
    const files = fs.readdirSync(STATE_DIR);
    const now = Date.now();
    for (const f of files) {
      if (!f.startsWith('state-') || !f.endsWith('.json')) continue;
      const fp = path.join(STATE_DIR, f);
      const stat = fs.statSync(fp);
      if (now - stat.mtimeMs > SESSION_TIMEOUT_MS * 2) {
        fs.unlinkSync(fp);
      }
    }
  } catch (_) { /* ignore */ }
 })();
 // --- Sanitize file path against injection ---
 function sanitizePath(filePath) {
  // Strip control chars (including null), bidi overrides, and newlines
  return filePath.replace(/[\x00-\x1f\x7f\u200e\u200f\u202a-\u202e\u2066-\u2069]/g, ' ').trim().slice(0, 500);
 }
 // --- Gate messages ---
 function editGateMsg(filePath) {
  const safe = sanitizePath(filePath);
  return [
    '[Fact-Forcing Gate]',
    '',
    `Before editing ${safe}, present these facts:`,
    '',
    '1. List ALL files that import/require this file (use Grep)',
    '2. List the public functions/classes affected by this change',
    '3. If this file reads/writes data files, show field names, structure, and date format (use redacted or synthetic values, not raw production data)',
    '4. Quote the user\'s current instruction verbatim',
    '',
    'Present the facts, then retry the same operation.'
  ].join('\n');
 }
 function writeGateMsg(filePath) {
  const safe = sanitizePath(filePath);
  return [
    '[Fact-Forcing Gate]',
    '',
    `Before creating ${safe}, present these facts:`,
    '',
    '1. Name the file(s) and line(s) that will call this new file',
    '2. Confirm no existing file serves the same purpose (use Glob)',
    '3. If this file reads/writes data files, show field names, structure, and date format (use redacted or synthetic values, not raw production data)',
    '4. Quote the user\'s current instruction verbatim',
    '',
    'Present the facts, then retry the same operation.'
  ].join('\n');
 }
 function destructiveBashMsg() {
  return [
    '[Fact-Forcing Gate]',
    '',
    'Destructive command detected. Before running, present:',
    '',
    '1. List all files/data this command will modify or delete',
    '2. Write a one-line rollback procedure',
    '3. Quote the user\'s current instruction verbatim',
    '',
    'Present the facts, then retry the same operation.'
  ].join('\n');
 }
 function routineBashMsg() {
  return [
    '[Fact-Forcing Gate]',
    '',
    'Quote the user\'s current instruction verbatim.',
    'Then retry the same operation.'
  ].join('\n');
 }
 // --- Deny helper ---
 function denyResult(reason) {
  return {
    stdout: JSON.stringify({
      hookSpecificOutput: {
        hookEventName: 'PreToolUse',
        permissionDecision: 'deny',
        permissionDecisionReason: reason
      }
    }),
    exitCode: 0
  };
 }
 // --- Core logic (exported for run-with-flags.js) ---
 function run(rawInput) {
  let data;
  try {
    data = typeof rawInput === 'string' ? JSON.parse(rawInput) : rawInput;
  } catch (_) {
    return rawInput; // allow on parse error
  }
  const rawToolName = data.tool_name || '';
  const toolInput = data.tool_input || {};
  // Normalize: case-insensitive matching via lookup map
  const TOOL_MAP = { 'edit': 'Edit', 'write': 'Write', 'multiedit': 'MultiEdit', 'bash': 'Bash' };
  const toolName = TOOL_MAP[rawToolName.toLowerCase()] || rawToolName;
  if (toolName === 'Edit' || toolName === 'Write') {
    const filePath = toolInput.file_path || '';
    if (!filePath) {
      return rawInput; // allow
    }
    if (!isChecked(filePath)) {
      markChecked(filePath);
      return denyResult(toolName === 'Edit' ? editGateMsg(filePath) : writeGateMsg(filePath));
    }
    return rawInput; // allow
  }
  if (toolName === 'MultiEdit') {
    const edits = toolInput.edits || [];
    for (const edit of edits) {
      const filePath = edit.file_path || '';
      if (filePath && !isChecked(filePath)) {
        markChecked(filePath);
        return denyResult(editGateMsg(filePath));
      }
    }
    return rawInput; // allow
  }
  if (toolName === 'Bash') {
    const command = toolInput.command || '';
    if (DESTRUCTIVE_BASH.test(command)) {
      // Gate destructive commands on first attempt; allow retry after facts presented
      const key = '__destructive__' + crypto.createHash('sha256').update(command).digest('hex').slice(0, 16);
      if (!isChecked(key)) {
        markChecked(key);
        return denyResult(destructiveBashMsg());
      }
      return rawInput; // allow retry after facts presented
    }
    if (!isChecked(ROUTINE_BASH_SESSION_KEY)) {
      markChecked(ROUTINE_BASH_SESSION_KEY);
      return denyResult(routineBashMsg());
    }
    return rawInput; // allow
  }
  return rawInput; // allow
 }
 module.exports = { run };
@@ -0,0 +1,120 @@
 ---
 name: gateguard
 description: Fact-forcing gate that blocks Edit/Write/Bash (including MultiEdit) and demands concrete investigation (importers, data schemas, user instruction) before allowing the action. Measurably improves output quality by +2.25 points vs ungated agents.
 origin: community
 ---
 # GateGuard — Fact-Forcing Pre-Action Gate
 A PreToolUse hook that forces Claude to investigate before editing. Instead of self-evaluation ("are you sure?"), it demands concrete facts. The act of investigation creates awareness that self-evaluation never did.
 ## When to Activate
 - Working on any codebase where file edits affect multiple modules
 - Projects with data files that have specific schemas or date formats
 - Teams where AI-generated code must match existing patterns
 - Any workflow where Claude tends to guess instead of investigating
 ## Core Concept
 LLM self-evaluation doesn't work. Ask "did you violate any policies?" and the answer is always "no." This is verified experimentally.
 But asking "list every file that imports this module" forces the LLM to run Grep and Read. The investigation itself creates context that changes the output.
 **Three-stage gate:**
 ```
 1. DENY  — block the first Edit/Write/Bash attempt
 2. FORCE — tell the model exactly which facts to gather
 3. ALLOW — permit retry after facts are presented
 ```
 No competitor does all three. Most stop at deny.
 ## Evidence
 Two independent A/B tests, identical agents, same task:
 | Task | Gated | Ungated | Gap |
 | --- | --- | --- | --- |
 | Analytics module | 8.0/10 | 6.5/10 | +1.5 |
 | Webhook validator | 10.0/10 | 7.0/10 | +3.0 |
 | **Average** | **9.0** | **6.75** | **+2.25** |
 Both agents produce code that runs and passes tests. The difference is design depth.
 ## Gate Types
 ### Edit / MultiEdit Gate (first edit per file)
 MultiEdit is handled identically — each file in the batch is gated individually.
 ```
 Before editing {file_path}, present these facts:
 1. List ALL files that import/require this file (use Grep)
 2. List the public functions/classes affected by this change
 3. If this file reads/writes data files, show field names, structure,
   and date format (use redacted or synthetic values, not raw production data)
 4. Quote the user's current instruction verbatim
 ```
 ### Write Gate (first new file creation)
 ```
 Before creating {file_path}, present these facts:
 1. Name the file(s) and line(s) that will call this new file
 2. Confirm no existing file serves the same purpose (use Glob)
 3. If this file reads/writes data files, show field names, structure,
   and date format (use redacted or synthetic values, not raw production data)
 4. Quote the user's current instruction verbatim
 ```
 ### Destructive Bash Gate (every destructive command)
 Triggers on: `rm -rf`, `git reset --hard`, `git push --force`, `drop table`, etc.
 ```
 1. List all files/data this command will modify or delete
 2. Write a one-line rollback procedure
 3. Quote the user's current instruction verbatim
 ```
 ### Routine Bash Gate (once per session)
 ```
 Quote the user's current instruction verbatim.
 ```
 ## Quick Start
 ### Option A: Use the ECC hook (zero install)
 The hook at `scripts/hooks/gateguard-fact-force.js` is included in this plugin. Enable it via hooks.json.
 ### Option B: Full package with config
 ```bash
 pip install gateguard-ai
 gateguard init
 ```
 This adds `.gateguard.yml` for per-project configuration (custom messages, ignore paths, gate toggles).
 ## Anti-Patterns
 - **Don't use self-evaluation instead.** "Are you sure?" always gets "yes." This is experimentally verified.
 - **Don't skip the data schema check.** Both A/B test agents assumed ISO-8601 dates when real data used `%Y/%m/%d %H:%M`. Checking data structure (with redacted values) prevents this entire class of bugs.
 - **Don't gate every single Bash command.** Routine bash gates once per session. Destructive bash gates every time. This balance avoids slowdown while catching real risks.
 ## Best Practices
 - Let the gate fire naturally. Don't try to pre-answer the gate questions — the investigation itself is what improves quality.
 - Customize gate messages for your domain. If your project has specific conventions, add them to the gate prompts.
 - Use `.gateguard.yml` to ignore paths like `.venv/`, `node_modules/`, `.git/`.
 ## Related Skills
 - `safety-guard` — Runtime safety checks (complementary, not overlapping)
 - `code-reviewer` — Post-edit review (GateGuard is pre-edit investigation)
@@ -0,0 +1,437 @@
 /**
 * Tests for scripts/hooks/gateguard-fact-force.js via run-with-flags.js
 */
 const assert = require('assert');
 const fs = require('fs');
 const path = require('path');
 const { spawnSync } = require('child_process');
 const runner = path.join(__dirname, '..', '..', 'scripts', 'hooks', 'run-with-flags.js');
 const externalStateDir = process.env.GATEGUARD_STATE_DIR;
 const tmpRoot = process.env.TMPDIR || process.env.TEMP || process.env.TMP || '/tmp';
 const stateDir = externalStateDir || fs.mkdtempSync(path.join(tmpRoot, 'gateguard-test-'));
 // Use a fixed session ID so test process and spawned hook process share the same state file
 const TEST_SESSION_ID = 'gateguard-test-session';
 const stateFile = path.join(stateDir, `state-${TEST_SESSION_ID}.json`);
 function test(name, fn) {
  try {
    fn();
    console.log(`  ✓ ${name}`);
    return true;
  } catch (error) {
    console.log(`  ✗ ${name}`);
    console.log(`    Error: ${error.message}`);
    return false;
  }
 }
 function clearState() {
  try {
    if (fs.existsSync(stateFile)) {
      fs.unlinkSync(stateFile);
    }
  } catch (err) {
    console.error(`  [clearState] failed to remove ${stateFile}: ${err.message}`);
  }
 }
 function writeExpiredState() {
  try {
    fs.mkdirSync(stateDir, { recursive: true });
    const expired = {
      checked: ['some_file.js', '__bash_session__'],
      last_active: Date.now() - (31 * 60 * 1000) // 31 minutes ago
    };
    fs.writeFileSync(stateFile, JSON.stringify(expired), 'utf8');
  } catch (_) { /* ignore */ }
 }
 function writeState(state) {
  fs.mkdirSync(stateDir, { recursive: true });
  fs.writeFileSync(stateFile, JSON.stringify(state), 'utf8');
 }
 function runHook(input, env = {}) {
  const rawInput = typeof input === 'string' ? input : JSON.stringify(input);
  const result = spawnSync('node', [
    runner,
    'pre:edit-write:gateguard-fact-force',
    'scripts/hooks/gateguard-fact-force.js',
    'standard,strict'
  ], {
    input: rawInput,
    encoding: 'utf8',
    env: {
      ...process.env,
      ECC_HOOK_PROFILE: 'standard',
      GATEGUARD_STATE_DIR: stateDir,
      CLAUDE_SESSION_ID: TEST_SESSION_ID,
      ...env
    },
    timeout: 15000,
    stdio: ['pipe', 'pipe', 'pipe']
  });
  return {
    code: Number.isInteger(result.status) ? result.status : 1,
    stdout: result.stdout || '',
    stderr: result.stderr || ''
  };
 }
 function runBashHook(input, env = {}) {
  const rawInput = typeof input === 'string' ? input : JSON.stringify(input);
  const result = spawnSync('node', [
    runner,
    'pre:bash:gateguard-fact-force',
    'scripts/hooks/gateguard-fact-force.js',
    'standard,strict'
  ], {
    input: rawInput,
    encoding: 'utf8',
    env: {
      ...process.env,
      ECC_HOOK_PROFILE: 'standard',
      GATEGUARD_STATE_DIR: stateDir,
      CLAUDE_SESSION_ID: TEST_SESSION_ID,
      ...env
    },
    timeout: 15000,
    stdio: ['pipe', 'pipe', 'pipe']
  });
  return {
    code: Number.isInteger(result.status) ? result.status : 1,
    stdout: result.stdout || '',
    stderr: result.stderr || ''
  };
 }
 function parseOutput(stdout) {
  try {
    return JSON.parse(stdout);
  } catch (_) {
    return null;
  }
 }
 function runTests() {
  console.log('\n=== Testing gateguard-fact-force ===\n');
  let passed = 0;
  let failed = 0;
  // --- Test 1: denies first Edit per file ---
  clearState();
  if (test('denies first Edit per file with fact-forcing message', () => {
    const input = {
      tool_name: 'Edit',
      tool_input: { file_path: '/src/app.js', old_string: 'foo', new_string: 'bar' }
    };
    const result = runHook(input);
    assert.strictEqual(result.code, 0, 'exit code should be 0');
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce JSON output');
    assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny');
    assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('Fact-Forcing Gate'));
    assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('import/require'));
    assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('/src/app.js'));
  })) passed++; else failed++;
  // --- Test 2: allows second Edit on same file ---
  if (test('allows second Edit on same file (gate already passed)', () => {
    const input = {
      tool_name: 'Edit',
      tool_input: { file_path: '/src/app.js', old_string: 'foo', new_string: 'bar' }
    };
    const result = runHook(input);
    assert.strictEqual(result.code, 0, 'exit code should be 0');
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce valid JSON output');
    // When allowed, the hook passes through the raw input (no hookSpecificOutput)
    // OR if hookSpecificOutput exists, it must not be deny
    if (output.hookSpecificOutput) {
      assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny',
        'should not deny second edit on same file');
    } else {
      // Pass-through: output matches original input (allow)
      assert.strictEqual(output.tool_name, 'Edit', 'pass-through should preserve input');
    }
  })) passed++; else failed++;
  // --- Test 3: denies first Write per file ---
  clearState();
  if (test('denies first Write per file with fact-forcing message', () => {
    const input = {
      tool_name: 'Write',
      tool_input: { file_path: '/src/new-file.js', content: 'console.log("hello")' }
    };
    const result = runHook(input);
    assert.strictEqual(result.code, 0, 'exit code should be 0');
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce JSON output');
    assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny');
    assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('creating'));
    assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('call this new file'));
  })) passed++; else failed++;
  // --- Test 4: denies destructive Bash, allows retry ---
  clearState();
  if (test('denies destructive Bash commands, allows retry after facts presented', () => {
    const input = {
      tool_name: 'Bash',
      tool_input: { command: 'rm -rf /important/data' }
    };
    // First call: should deny
    const result1 = runBashHook(input);
    assert.strictEqual(result1.code, 0, 'first call exit code should be 0');
    const output1 = parseOutput(result1.stdout);
    assert.ok(output1, 'first call should produce JSON output');
    assert.strictEqual(output1.hookSpecificOutput.permissionDecision, 'deny');
    assert.ok(output1.hookSpecificOutput.permissionDecisionReason.includes('Destructive'));
    assert.ok(output1.hookSpecificOutput.permissionDecisionReason.includes('rollback'));
    // Second call (retry after facts presented): should allow
    const result2 = runBashHook(input);
    assert.strictEqual(result2.code, 0, 'second call exit code should be 0');
    const output2 = parseOutput(result2.stdout);
    assert.ok(output2, 'second call should produce valid JSON output');
    if (output2.hookSpecificOutput) {
      assert.notStrictEqual(output2.hookSpecificOutput.permissionDecision, 'deny',
        'should not deny destructive bash retry after facts presented');
    } else {
      assert.strictEqual(output2.tool_name, 'Bash', 'pass-through should preserve input');
    }
  })) passed++; else failed++;
  // --- Test 5: denies first routine Bash, allows second ---
  clearState();
  if (test('denies first routine Bash, allows second', () => {
    const input = {
      tool_name: 'Bash',
      tool_input: { command: 'ls -la' }
    };
    // First call: should deny
    const result1 = runBashHook(input);
    assert.strictEqual(result1.code, 0, 'first call exit code should be 0');
    const output1 = parseOutput(result1.stdout);
    assert.ok(output1, 'first call should produce JSON output');
    assert.strictEqual(output1.hookSpecificOutput.permissionDecision, 'deny');
    // Second call: should allow
    const result2 = runBashHook(input);
    assert.strictEqual(result2.code, 0, 'second call exit code should be 0');
    const output2 = parseOutput(result2.stdout);
    assert.ok(output2, 'second call should produce valid JSON output');
    if (output2.hookSpecificOutput) {
      assert.notStrictEqual(output2.hookSpecificOutput.permissionDecision, 'deny',
        'should not deny second routine bash');
    } else {
      assert.strictEqual(output2.tool_name, 'Bash', 'pass-through should preserve input');
    }
  })) passed++; else failed++;
  // --- Test 6: session state resets after timeout ---
  if (test('session state resets after 30-minute timeout', () => {
    writeExpiredState();
    const input = {
      tool_name: 'Edit',
      tool_input: { file_path: 'some_file.js', old_string: 'a', new_string: 'b' }
    };
    const result = runHook(input);
    assert.strictEqual(result.code, 0, 'exit code should be 0');
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce JSON output after expired state');
    assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny',
      'should deny again after session timeout (state was reset)');
  })) passed++; else failed++;
  // --- Test 7: allows unknown tool names ---
  clearState();
  if (test('allows unknown tool names through', () => {
    const input = {
      tool_name: 'Read',
      tool_input: { file_path: '/src/app.js' }
    };
    const result = runHook(input);
    assert.strictEqual(result.code, 0, 'exit code should be 0');
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce valid JSON output');
    if (output.hookSpecificOutput) {
      assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny',
        'should not deny unknown tool');
    } else {
      assert.strictEqual(output.tool_name, 'Read', 'pass-through should preserve input');
    }
  })) passed++; else failed++;
  // --- Test 8: sanitizes file paths with newlines ---
  clearState();
  if (test('sanitizes file paths containing newlines', () => {
    const input = {
      tool_name: 'Edit',
      tool_input: { file_path: '/src/app.js\ninjected content', old_string: 'a', new_string: 'b' }
    };
    const result = runHook(input);
    assert.strictEqual(result.code, 0, 'exit code should be 0');
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce JSON output');
    assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny');
    const reason = output.hookSpecificOutput.permissionDecisionReason;
    // The file path portion of the reason must not contain any raw newlines
    // (sanitizePath replaces \n and \r with spaces)
    const pathLine = reason.split('\n').find(l => l.includes('/src/app.js'));
    assert.ok(pathLine, 'reason should mention the file path');
    assert.ok(!pathLine.includes('\n'), 'file path line must not contain raw newlines');
    assert.ok(!reason.includes('/src/app.js\n'), 'newline after file path should be sanitized');
    assert.ok(!reason.includes('\ninjected'), 'injected content must not appear on its own line');
  })) passed++; else failed++;
  // --- Test 9: respects ECC_DISABLED_HOOKS ---
  clearState();
  if (test('respects ECC_DISABLED_HOOKS (skips when disabled)', () => {
    const input = {
      tool_name: 'Edit',
      tool_input: { file_path: '/src/disabled.js', old_string: 'a', new_string: 'b' }
    };
    const result = runHook(input, {
      ECC_DISABLED_HOOKS: 'pre:edit-write:gateguard-fact-force'
    });
    assert.strictEqual(result.code, 0, 'exit code should be 0');
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce valid JSON output');
    if (output.hookSpecificOutput) {
      assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny',
        'should not deny when hook is disabled');
    } else {
      // When disabled, hook passes through raw input
      assert.strictEqual(output.tool_name, 'Edit', 'pass-through should preserve input');
    }
  })) passed++; else failed++;
  // --- Test 10: MultiEdit gates first unchecked file ---
  clearState();
  if (test('denies first MultiEdit with unchecked file', () => {
    const input = {
      tool_name: 'MultiEdit',
      tool_input: {
        edits: [
          { file_path: '/src/multi-a.js', old_string: 'a', new_string: 'b' },
          { file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }
        ]
      }
    };
    const result = runHook(input);
    assert.strictEqual(result.code, 0, 'exit code should be 0');
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce JSON output');
    assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny');
    assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('Fact-Forcing Gate'));
    assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('/src/multi-a.js'));
  })) passed++; else failed++;
  // --- Test 11: MultiEdit allows after all files gated ---
  if (test('allows MultiEdit after all files gated', () => {
    // multi-a.js was gated in test 10; gate multi-b.js
    const input2 = {
      tool_name: 'MultiEdit',
      tool_input: { edits: [{ file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }] }
    };
    runHook(input2); // gates multi-b.js
    // Now both files are gated — retry should allow
    const input3 = {
      tool_name: 'MultiEdit',
      tool_input: {
        edits: [
          { file_path: '/src/multi-a.js', old_string: 'a', new_string: 'b' },
          { file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }
        ]
      }
    };
    const result3 = runHook(input3);
    const output3 = parseOutput(result3.stdout);
    assert.ok(output3, 'should produce valid JSON');
    if (output3.hookSpecificOutput) {
      assert.notStrictEqual(output3.hookSpecificOutput.permissionDecision, 'deny',
        'should allow MultiEdit after all files gated');
    }
  })) passed++; else failed++;
  // --- Test 12: reads refresh active session state ---
  clearState();
  if (test('touches last_active on read so active sessions do not age out', () => {
    const staleButActive = Date.now() - (29 * 60 * 1000);
    writeState({
      checked: ['/src/keep-alive.js'],
      last_active: staleButActive
    });
    const before = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
    assert.strictEqual(before.last_active, staleButActive, 'seed state should use the expected timestamp');
    const result = runHook({
      tool_name: 'Edit',
      tool_input: { file_path: '/src/keep-alive.js', old_string: 'a', new_string: 'b' }
    });
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce valid JSON output');
    if (output.hookSpecificOutput) {
      assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny',
        'already-checked file should still be allowed');
    }
    const after = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
    assert.ok(after.last_active > staleButActive, 'successful reads should refresh last_active');
  })) passed++; else failed++;
  // --- Test 13: pruning preserves routine bash gate marker ---
  clearState();
  if (test('preserves __bash_session__ when pruning oversized state', () => {
    const checked = ['__bash_session__'];
    for (let i = 0; i < 80; i++) checked.push(`__destructive__${i}`);
    for (let i = 0; i < 700; i++) checked.push(`/src/file-${i}.js`);
    writeState({ checked, last_active: Date.now() });
    runHook({
      tool_name: 'Edit',
      tool_input: { file_path: '/src/newly-gated.js', old_string: 'a', new_string: 'b' }
    });
    const result = runBashHook({
      tool_name: 'Bash',
      tool_input: { command: 'pwd' }
    });
    const output = parseOutput(result.stdout);
    assert.ok(output, 'should produce valid JSON output');
    if (output.hookSpecificOutput) {
      assert.notStrictEqual(output.hookSpecificOutput.permissionDecision, 'deny',
        'routine bash marker should survive pruning');
    }
    const persisted = JSON.parse(fs.readFileSync(stateFile, 'utf8'));
    assert.ok(persisted.checked.includes('__bash_session__'), 'pruned state should retain __bash_session__');
    assert.ok(persisted.checked.length <= 500, 'pruned state should still honor the checked-entry cap');
  })) passed++; else failed++;
  // Cleanup only the temp directory created by this test file.
  if (!externalStateDir) {
    try {
      if (fs.existsSync(stateDir)) {
        fs.rmSync(stateDir, { recursive: true, force: true });
      }
    } catch (err) {
      console.error(`  [cleanup] failed to remove ${stateDir}: ${err.message}`);
    }
  }
  console.log(`\n  ${passed} passed, ${failed} failed\n`);
  process.exit(failed > 0 ? 1 : 0);
 }
 runTests();