diff --git a/scripts/codex/check-codex-global-state.sh b/scripts/codex/check-codex-global-state.sh index eb980199..c9dac74b 100755 --- a/scripts/codex/check-codex-global-state.sh +++ b/scripts/codex/check-codex-global-state.sh @@ -107,11 +107,11 @@ if [[ -f "$CONFIG_FILE" ]]; then check_config_pattern '^\[profiles\.strict\]' "profiles.strict exists" check_config_pattern '^\[profiles\.yolo\]' "profiles.yolo exists" + # Current default connector set (docs/MCP-CONNECTOR-POLICY.md): exactly + # one connector. Former defaults (github, memory, sequential-thinking, + # context7, exa, ...) are opt-in user choices, so they are not required. for section in \ - 'mcp_servers.github' \ - 'mcp_servers.memory' \ - 'mcp_servers.sequential-thinking' \ - 'mcp_servers.context7' + 'mcp_servers.chrome-devtools' do if search_file "^\[$section\]" "$CONFIG_FILE"; then ok "MCP section [$section] exists" @@ -120,25 +120,17 @@ if [[ -f "$CONFIG_FILE" ]]; then fi done - has_context7_legacy=0 - has_context7_current=0 - - if search_file '^\[mcp_servers\.context7\]' "$CONFIG_FILE"; then - has_context7_legacy=1 - fi - - if search_file '^\[mcp_servers\.context7-mcp\]' "$CONFIG_FILE"; then - has_context7_current=1 - fi - - if [[ "$has_context7_legacy" -eq 1 || "$has_context7_current" -eq 1 ]]; then - ok "MCP section [mcp_servers.context7] or [mcp_servers.context7-mcp] exists" - else - fail "MCP section [mcp_servers.context7] or [mcp_servers.context7-mcp] missing" - fi - - if [[ "$has_context7_legacy" -eq 1 && "$has_context7_current" -eq 1 ]]; then - warn "Both [mcp_servers.context7] and [mcp_servers.context7-mcp] exist; prefer one name" + # ECC <= 2.0.0 emitted a url-only exa entry that Codex's stdio-only + # schema rejects, breaking the whole config (#2224). Flag it so users + # re-run the sync (which repairs it) or remove it manually. + if search_file '^\[mcp_servers\.exa\]' "$CONFIG_FILE"; then + exa_block="$(awk '/^\[mcp_servers\.exa\]/{flag=1;next}/^\[/{flag=0}flag' "$CONFIG_FILE")" + if printf '%s\n' "$exa_block" | grep -Eq '^[[:space:]]*url[[:space:]]*=' \ + && ! printf '%s\n' "$exa_block" | grep -Eq '^[[:space:]]*command[[:space:]]*='; then + fail "MCP section [mcp_servers.exa] uses a url key, which Codex rejects for stdio servers — re-run ecc-sync-codex to repair (#2224)" + else + ok "MCP section [mcp_servers.exa] uses the stdio form" + fi fi fi diff --git a/scripts/codex/merge-mcp-config.js b/scripts/codex/merge-mcp-config.js index 0acb0085..721e3c29 100644 --- a/scripts/codex/merge-mcp-config.js +++ b/scripts/codex/merge-mcp-config.js @@ -65,14 +65,14 @@ const PM_EXEC_PARTS = PM_EXEC.split(/\s+/); // ["pnpm", "dlx"] or ["npx"] or ["b // ECC-recommended MCP servers // --------------------------------------------------------------------------- -// GitHub bootstrap uses bash for token forwarding — this is intentionally -// shell-based regardless of package manager, since Codex runs on macOS/Linux. -const GH_BOOTSTRAP = `token=$(gh auth token 2>/dev/null || true); if [ -n "$token" ]; then export GITHUB_PERSONAL_ACCESS_TOKEN="$token"; fi; exec ${PM_EXEC} @modelcontextprotocol/server-github`; - /** * Build a server spec with the detected package manager. * Returns { fields, toml } where fields is for drift detection and * toml is the raw text appended to the file. + * + * Codex's [mcp_servers.*] TOML schema is stdio-only (command/args) — + * never emit a `url` key here. The http/url form is valid only for + * Claude Code's .mcp.json (#2224). */ function dlxServer(name, pkg, extraFields, extraToml) { const args = [...PM_EXEC_PARTS.slice(1), pkg]; @@ -87,31 +87,29 @@ function dlxServer(name, pkg, extraFields, extraToml) { const DEFAULT_MCP_STARTUP_TIMEOUT_SEC = 30; const DEFAULT_MCP_STARTUP_TIMEOUT_TOML = `startup_timeout_sec = ${DEFAULT_MCP_STARTUP_TIMEOUT_SEC}`; +// Current default connector set (docs/MCP-CONNECTOR-POLICY.md): exactly one +// connector. The former defaults (supabase, playwright, context7, exa, +// github, memory, sequential-thinking) were retired in the June 2026 audit +// and must not be re-emitted; they remain opt-in via +// mcp-configs/mcp-servers.json. Existing user-managed entries are never +// touched by the merge (add-only), except the known-invalid repair below. const ECC_SERVERS = { - supabase: dlxServer('supabase', '@supabase/mcp-server-supabase@latest', { startup_timeout_sec: 20.0, tool_timeout_sec: 120.0 }, 'startup_timeout_sec = 20.0\ntool_timeout_sec = 120.0'), - playwright: dlxServer('playwright', '@playwright/mcp@latest', { startup_timeout_sec: DEFAULT_MCP_STARTUP_TIMEOUT_SEC }, DEFAULT_MCP_STARTUP_TIMEOUT_TOML), - context7: dlxServer('context7', '@upstash/context7-mcp@latest', { startup_timeout_sec: DEFAULT_MCP_STARTUP_TIMEOUT_SEC }, DEFAULT_MCP_STARTUP_TIMEOUT_TOML), - exa: { - fields: { url: 'https://mcp.exa.ai/mcp' }, - toml: `[mcp_servers.exa]\nurl = "https://mcp.exa.ai/mcp"` - }, - github: { - fields: { command: 'bash', args: ['-lc', GH_BOOTSTRAP], startup_timeout_sec: DEFAULT_MCP_STARTUP_TIMEOUT_SEC }, - toml: `[mcp_servers.github]\ncommand = "bash"\nargs = ["-lc", ${JSON.stringify(GH_BOOTSTRAP)}]\n${DEFAULT_MCP_STARTUP_TIMEOUT_TOML}` - }, - memory: dlxServer('memory', '@modelcontextprotocol/server-memory', { startup_timeout_sec: DEFAULT_MCP_STARTUP_TIMEOUT_SEC }, DEFAULT_MCP_STARTUP_TIMEOUT_TOML), - 'sequential-thinking': dlxServer('sequential-thinking', '@modelcontextprotocol/server-sequential-thinking', { startup_timeout_sec: DEFAULT_MCP_STARTUP_TIMEOUT_SEC }, DEFAULT_MCP_STARTUP_TIMEOUT_TOML) + 'chrome-devtools': dlxServer('chrome-devtools', 'chrome-devtools-mcp@latest', { startup_timeout_sec: DEFAULT_MCP_STARTUP_TIMEOUT_SEC }, DEFAULT_MCP_STARTUP_TIMEOUT_TOML) }; -// Append --features arg for supabase after dlxServer builds the base -ECC_SERVERS.supabase.fields.args.push('--features=account,docs,database,debugging,development,functions,storage,branching'); -ECC_SERVERS.supabase.toml = ECC_SERVERS.supabase.toml.replace(/^(args = \[.*)\]$/m, '$1, "--features=account,docs,database,debugging,development,functions,storage,branching"]'); +// ECC <= 2.0.0 emitted [mcp_servers.exa] with a `url` key. Codex rejects +// `url` for stdio servers, which makes the *entire* config.toml fail to +// load (#2224). Repair exactly that ECC-emitted form on every merge so +// re-running the installer fixes broken configs instead of preserving +// them. A user-managed stdio exa entry (command/args) is left untouched. +const RETIRED_INVALID_URL_SERVERS = { + exa: 'https://mcp.exa.ai/mcp' +}; // Legacy section names that should be treated as an existing ECC server. -// e.g. older configs shipped [mcp_servers.context7-mcp] instead of [mcp_servers.context7]. -const LEGACY_ALIASES = { - context7: ['context7-mcp'] -}; +// e.g. older configs shipped [mcp_servers.context7-mcp] instead of +// [mcp_servers.context7]. Empty since the June 2026 default-set reduction. +const LEGACY_ALIASES = {}; // --------------------------------------------------------------------------- // Helpers @@ -241,6 +239,21 @@ function main() { const toAppend = []; const toRemoveLog = []; + // Repair schema-invalid entries emitted by earlier ECC versions (#2224). + for (const [name, invalidUrl] of Object.entries(RETIRED_INVALID_URL_SERVERS)) { + const entry = existing[name]; + const isBrokenEccForm = + entry && + typeof entry.url === 'string' && + entry.url === invalidUrl && + typeof entry.command !== 'string'; + if (isBrokenEccForm) { + toRemoveLog.push(`mcp_servers.${name} (invalid url entry from earlier ECC versions)`); + raw = removeServerFromText(raw, name, existing); + log(` [repair] mcp_servers.${name} — url is not valid for Codex stdio servers, removing`); + } + } + for (const [name, spec] of Object.entries(ECC_SERVERS)) { const entry = existing[name]; const aliases = LEGACY_ALIASES[name] || []; @@ -249,7 +262,9 @@ function main() { // Prefer canonical entry over legacy alias const hasCanonical = entry && typeof entry.command === 'string'; const resolvedEntry = hasCanonical ? entry : legacyName ? existing[legacyName] : null; - // For URL-based servers (exa), check for url field instead of command + // Recognize url-form entries as existing so they are never duplicated. + // (Codex itself rejects url-form stdio servers; ECC only ever emits + // command/args, but a user-managed entry must still count as present.) const urlEntry = !resolvedEntry && entry && typeof entry.url === 'string' ? entry : null; const finalEntry = resolvedEntry || urlEntry; const resolvedLabel = hasCanonical ? name : legacyName || name; @@ -306,11 +321,13 @@ function main() { if (dryRun) { if (toRemoveLog.length > 0) { - log('Dry run — would remove and re-add:'); + log('Dry run — would remove:'); for (const label of toRemoveLog) log(` [remove] ${label}`); } - log('Dry run — would append:'); - console.log(appendText); + if (toAppend.length > 0) { + log('Dry run — would append:'); + console.log(appendText); + } return; } @@ -325,7 +342,7 @@ function main() { } if (hasRemovals && toAppend.length === 0) { - log(`Done. Removed ${toRemoveLog.length} disabled server(s).`); + log(`Done. Removed ${toRemoveLog.length} server section(s).`); return; } diff --git a/scripts/hooks/check-console-log.js b/scripts/hooks/check-console-log.js index f55a5ed1..94e60a15 100755 --- a/scripts/hooks/check-console-log.js +++ b/scripts/hooks/check-console-log.js @@ -28,20 +28,40 @@ const EXCLUDED_PATTERNS = [ const MAX_STDIN = 1024 * 1024; // 1MB limit let data = ''; +let truncated = false; process.stdin.setEncoding('utf8'); process.stdin.on('data', chunk => { if (data.length < MAX_STDIN) { const remaining = MAX_STDIN - data.length; data += chunk.substring(0, remaining); + if (chunk.length > remaining) truncated = true; + } else { + truncated = true; } }); +/** + * Echo stdin back (ECC pass-through convention), then exit once the pipe has + * flushed. Truncated stdin is never echoed: a JSON document cut mid-stream is + * reported by the harness as a Stop hook JSON validation failure (#2090). + */ +function passThroughAndExit() { + if (truncated) { + log('[Hook] check-console-log: stdin exceeded 1MB; suppressing pass-through (fail-open)'); + process.exit(0); + } + if (!data) { + process.exit(0); + } + process.stdout.write(data, () => process.exit(0)); +} + process.stdin.on('end', () => { try { if (!isGitRepo()) { - process.stdout.write(data); - process.exit(0); + passThroughAndExit(); + return; } const files = getGitModifiedFiles(['\\.tsx?$', '\\.jsx?$']) @@ -65,7 +85,6 @@ process.stdin.on('end', () => { log(`[Hook] check-console-log error: ${err.message}`); } - // Always output the original data - process.stdout.write(data); - process.exit(0); + // Always output the original data (unless truncated) + passThroughAndExit(); }); diff --git a/scripts/hooks/cost-tracker.js b/scripts/hooks/cost-tracker.js index 03937578..8eb60527 100755 --- a/scripts/hooks/cost-tracker.js +++ b/scripts/hooks/cost-tracker.js @@ -128,12 +128,22 @@ function sumUsageFromTranscript(transcriptPath) { return { inputTokens, outputTokens, cacheWriteTokens, cacheReadTokens, model }; } -const MAX_STDIN = 64 * 1024; +// 1MB, matching the other Stop hooks. The Stop payload carries +// last_assistant_message, which routinely exceeded the old 64KB cap and +// made this hook echo a JSON document cut mid-stream (#2090). +const MAX_STDIN = 1024 * 1024; let raw = ''; +let truncated = false; process.stdin.setEncoding('utf8'); process.stdin.on('data', chunk => { - if (raw.length < MAX_STDIN) raw += chunk.substring(0, MAX_STDIN - raw.length); + if (raw.length < MAX_STDIN) { + const remaining = MAX_STDIN - raw.length; + raw += chunk.substring(0, remaining); + if (chunk.length > remaining) truncated = true; + } else { + truncated = true; + } }); process.stdin.on('end', () => { @@ -201,6 +211,11 @@ process.stdin.on('end', () => { // Non-blocking — never fail the Stop hook. } - // Pass stdin through (required by ECC hook convention). + // Pass stdin through (ECC hook convention) — but never echo truncated + // stdin: invalid JSON on stdout is reported as a Stop hook failure (#2090). + if (truncated) { + process.stderr.write('[Hook] cost-tracker: stdin exceeded 1MB; suppressing pass-through (fail-open)\n'); + return; + } process.stdout.write(raw); }); diff --git a/scripts/hooks/desktop-notify.js b/scripts/hooks/desktop-notify.js index 469648c7..a9458711 100644 --- a/scripts/hooks/desktop-notify.js +++ b/scripts/hooks/desktop-notify.js @@ -236,15 +236,26 @@ module.exports = { run }; if (require.main === module) { const MAX_STDIN = 1024 * 1024; let data = ''; + let truncated = false; process.stdin.setEncoding('utf8'); process.stdin.on('data', chunk => { if (data.length < MAX_STDIN) { - data += chunk.substring(0, MAX_STDIN - data.length); + const remaining = MAX_STDIN - data.length; + data += chunk.substring(0, remaining); + if (chunk.length > remaining) truncated = true; + } else { + truncated = true; } }); process.stdin.on('end', () => { const output = run(data); + // Never echo truncated stdin — invalid JSON on stdout is reported as a + // Stop hook failure (#2090). + if (truncated) { + log('[DesktopNotify] stdin exceeded 1MB; suppressing pass-through (fail-open)'); + return; + } if (output) process.stdout.write(output); }); } diff --git a/scripts/hooks/gateguard-fact-force.js b/scripts/hooks/gateguard-fact-force.js index 33a4ac0b..d28d818c 100644 --- a/scripts/hooks/gateguard-fact-force.js +++ b/scripts/hooks/gateguard-fact-force.js @@ -592,6 +592,7 @@ function saveState(state) { let mergedChecked = Array.isArray(state.checked) ? state.checked : []; let mergedLastActive = typeof state.last_active === 'number' ? state.last_active : 0; + let mergedDenials = getDenialCount(state); try { if (fs.existsSync(stateFile)) { @@ -602,6 +603,7 @@ function saveState(state) { if (typeof diskState.last_active === 'number') { mergedLastActive = Math.max(mergedLastActive, diskState.last_active); } + mergedDenials = Math.max(mergedDenials, getDenialCount(diskState)); } } catch (_) { /* ignore malformed or transient disk state */ @@ -609,7 +611,8 @@ function saveState(state) { const finalState = { checked: pruneCheckedEntries(mergedChecked), - last_active: Math.max(mergedLastActive, Date.now()) + last_active: Math.max(mergedLastActive, Date.now()), + fact_force_denials: mergedDenials }; // Atomic write: temp file + rename prevents partial reads @@ -652,6 +655,48 @@ function markChecked(key) { return true; } +// --- Fact-force denial dampening (#2142) --- +// +// In long sessions the near-identical four-fact deny blocks accumulate in +// the context window and measurably raise the odds of the model dropping +// into a degenerate repetition loop. Emit the full four-fact block only for +// the first GATEGUARD_FACT_FORCE_FULL_DENIALS denials per session (default +// 3); afterwards emit a condensed single-line denial that carries the +// denial ordinal, so consecutive denials are structurally different and +// never textually identical. True retries of an already-gated target are +// unaffected (they were always allowed). Destructive-Bash and routine-Bash +// gates are unchanged. + +const DEFAULT_FULL_DENIALS = 3; + +function getFullDenialBudget() { + const raw = Number.parseInt(process.env.GATEGUARD_FACT_FORCE_FULL_DENIALS || '', 10); + if (Number.isInteger(raw) && raw >= 0) { + return raw; + } + return DEFAULT_FULL_DENIALS; +} + +function getDenialCount(state) { + const n = Number(state && state.fact_force_denials); + return Number.isFinite(n) && n >= 0 ? Math.floor(n) : 0; +} + +/** + * Record a first-touch target AND count the fact-force denial in the same + * state write. Returns the new denial ordinal (1-based) plus whether the + * write persisted. + */ +function markCheckedAndCountDenial(key) { + const state = loadState(); + if (!state.checked.includes(key)) { + state.checked.push(key); + } + const denials = getDenialCount(state) + 1; + state.fact_force_denials = denials; + return { ok: saveState(state), denials }; +} + function isChecked(key) { const state = loadState(); const found = state.checked.includes(key); @@ -792,6 +837,20 @@ function writeGateMsg(filePath) { ].join('\n'); } +/** + * Condensed single-line denial used after the full-block budget is spent + * (#2142). Carries the denial ordinal so consecutive denials differ + * textually, and a one-line recovery hint instead of the multi-line block. + */ +function condensedGateMsg(action, filePath, ordinal) { + const safe = sanitizePath(filePath); + return ( + `[Fact-Forcing Gate] (denial #${ordinal} this session) First ${action} of ${safe}: ` + + "briefly state importers/callers, affected API, data schemas if any, and the user's verbatim instruction, then retry. " + + '(ECC_GATEGUARD=off disables this gate.)' + ); +} + function destructiveBashMsg() { return [ '[Fact-Forcing Gate]', @@ -902,9 +961,14 @@ function run(rawInput) { } if (!isChecked(filePath)) { - if (!markChecked(filePath)) { + const { ok, denials } = markCheckedAndCountDenial(filePath); + if (!ok) { return allowWithStateWarning(); } + if (denials > getFullDenialBudget()) { + const action = toolName === 'Edit' ? 'edit' : 'creation'; + return denyResult(condensedGateMsg(action, filePath, denials), { includeRecoveryHint: false }); + } return denyResult(toolName === 'Edit' ? editGateMsg(filePath) : writeGateMsg(filePath)); } @@ -920,9 +984,13 @@ function run(rawInput) { for (const edit of edits) { const filePath = edit.file_path || ''; if (filePath && !isClaudeSettingsPath(filePath) && !isChecked(filePath)) { - if (!markChecked(filePath)) { + const { ok, denials } = markCheckedAndCountDenial(filePath); + if (!ok) { return allowWithStateWarning(); } + if (denials > getFullDenialBudget()) { + return denyResult(condensedGateMsg('edit', filePath, denials), { includeRecoveryHint: false }); + } return denyResult(editGateMsg(filePath)); } } diff --git a/scripts/hooks/run-with-flags.js b/scripts/hooks/run-with-flags.js index a0b43fba..c1c3b904 100755 --- a/scripts/hooks/run-with-flags.js +++ b/scripts/hooks/run-with-flags.js @@ -45,40 +45,52 @@ function writeStderr(stderr) { process.stderr.write(stderr.endsWith('\n') ? stderr : `${stderr}\n`); } -function emitHookResult(raw, output) { +/** + * Write stdout fully, then exit. `process.exit()` immediately after + * `process.stdout.write()` drops anything beyond the ~64KB pipe buffer, + * which cut large pass-through payloads mid-JSON and made the harness + * treat the hook as failed (#2222). The write callback fires only after + * the chunk is flushed to the pipe. + */ +function exitWithStdout(text, exitCode) { + if (typeof text !== 'string' || text.length === 0) { + process.exit(exitCode); + } + process.stdout.write(text, () => process.exit(exitCode)); +} + +function resolveHookResult(raw, output) { if (typeof output === 'string' || Buffer.isBuffer(output)) { - process.stdout.write(String(output)); - return 0; + return { stdout: String(output), exitCode: 0 }; } if (output && typeof output === 'object') { writeStderr(output.stderr); + const exitCode = Number.isInteger(output.exitCode) ? output.exitCode : 0; if (Object.prototype.hasOwnProperty.call(output, 'additionalContext')) { - process.stdout.write(buildPreToolUseAdditionalContext(output.additionalContext)); - } else if (Object.prototype.hasOwnProperty.call(output, 'stdout')) { - process.stdout.write(String(output.stdout ?? '')); - } else if (!Number.isInteger(output.exitCode) || output.exitCode === 0) { - process.stdout.write(raw); + return { stdout: buildPreToolUseAdditionalContext(output.additionalContext), exitCode }; } - - return Number.isInteger(output.exitCode) ? output.exitCode : 0; + if (Object.prototype.hasOwnProperty.call(output, 'stdout')) { + return { stdout: String(output.stdout ?? ''), exitCode }; + } + return { stdout: exitCode === 0 ? raw : '', exitCode }; } - process.stdout.write(raw); - return 0; + return { stdout: raw, exitCode: 0 }; } -function writeLegacySpawnOutput(raw, result) { +function resolveLegacySpawnStdout(raw, result) { const stdout = typeof result.stdout === 'string' ? result.stdout : ''; if (stdout) { - process.stdout.write(stdout); - return; + return stdout; } if (Number.isInteger(result.status) && result.status === 0) { - process.stdout.write(raw); + return raw; } + + return ''; } function getPluginRoot() { @@ -92,14 +104,25 @@ async function main() { const [, , hookId, relScriptPath, profilesCsv] = process.argv; const { raw, truncated } = await readStdinRaw(); + // Oversized payloads: never echo the truncated string — a JSON document + // cut mid-stream is treated by the harness as a hook failure, blocking the + // tool call (#2222). Empty stdout + exit 0 means "no opinion", so + // pass-through paths fail open. The hook itself still runs and receives + // the truncated flag (run() context / ECC_HOOK_INPUT_TRUNCATED), so + // security hooks like config-protection can still choose to block. + const sanitizeEcho = text => (truncated && text === raw ? '' : text); + if (truncated) { + process.stderr.write(`[Hook] stdin exceeded ${MAX_STDIN} bytes for ${hookId || 'unknown'}; suppressing pass-through (fail-open unless the hook blocks)\n`); + } + if (!hookId || !relScriptPath) { - process.stdout.write(raw); - process.exit(0); + exitWithStdout(sanitizeEcho(raw), 0); + return; } if (!isHookEnabled(hookId, { profiles: profilesCsv })) { - process.stdout.write(raw); - process.exit(0); + exitWithStdout(sanitizeEcho(raw), 0); + return; } const pluginRoot = getPluginRoot(); @@ -109,14 +132,14 @@ async function main() { // Prevent path traversal outside the plugin root if (!scriptPath.startsWith(resolvedRoot + path.sep)) { process.stderr.write(`[Hook] Path traversal rejected for ${hookId}: ${scriptPath}\n`); - process.stdout.write(raw); - process.exit(0); + exitWithStdout(sanitizeEcho(raw), 0); + return; } if (!fs.existsSync(scriptPath)) { process.stderr.write(`[Hook] Script not found for ${hookId}: ${scriptPath}\n`); - process.stdout.write(raw); - process.exit(0); + exitWithStdout(sanitizeEcho(raw), 0); + return; } // Prefer direct require() when the hook exports a run(rawInput) function. @@ -147,12 +170,13 @@ async function main() { truncated, maxStdin: MAX_STDIN }); - process.exit(emitHookResult(raw, output)); + const result = resolveHookResult(raw, output); + exitWithStdout(sanitizeEcho(result.stdout), result.exitCode); } catch (runErr) { process.stderr.write(`[Hook] run() error for ${hookId}: ${runErr.message}\n`); - process.stdout.write(raw); + exitWithStdout(sanitizeEcho(raw), 0); } - process.exit(0); + return; } // Legacy path: spawn a child Node process for hooks without run() export @@ -171,20 +195,17 @@ async function main() { timeout: 30000 }); - writeLegacySpawnOutput(raw, result); + const legacyStdout = sanitizeEcho(resolveLegacySpawnStdout(raw, result)); if (result.stderr) process.stderr.write(result.stderr); if (result.error || result.signal || result.status === null) { - const failureDetail = result.error - ? result.error.message - : result.signal - ? `terminated by signal ${result.signal}` - : 'missing exit status'; + const failureDetail = result.error ? result.error.message : result.signal ? `terminated by signal ${result.signal}` : 'missing exit status'; writeStderr(`[Hook] legacy hook execution failed for ${hookId}: ${failureDetail}`); - process.exit(1); + exitWithStdout(legacyStdout, 1); + return; } - process.exit(Number.isInteger(result.status) ? result.status : 0); + exitWithStdout(legacyStdout, Number.isInteger(result.status) ? result.status : 0); } main().catch(err => { diff --git a/scripts/hooks/stop-format-typecheck.js b/scripts/hooks/stop-format-typecheck.js index 50c5d2c6..a7bc0b78 100644 --- a/scripts/hooks/stop-format-typecheck.js +++ b/scripts/hooks/stop-format-typecheck.js @@ -196,13 +196,30 @@ function run(rawInput) { if (require.main === module) { let stdinData = ''; + let truncated = false; process.stdin.setEncoding('utf8'); process.stdin.on('data', chunk => { - if (stdinData.length < MAX_STDIN) stdinData += chunk.substring(0, MAX_STDIN - stdinData.length); + if (stdinData.length < MAX_STDIN) { + const remaining = MAX_STDIN - stdinData.length; + stdinData += chunk.substring(0, remaining); + if (chunk.length > remaining) truncated = true; + } else { + truncated = true; + } }); process.stdin.on('end', () => { - process.stdout.write(run(stdinData)); - process.exit(0); + const output = run(stdinData); + // Never echo truncated stdin (invalid JSON would be reported as a Stop + // hook failure, #2090); flush stdout before exiting so large payloads + // are not cut at the pipe buffer. + if (truncated) { + process.stderr.write('[Hook] stop-format-typecheck: stdin exceeded 1MB; suppressing pass-through (fail-open)\n'); + process.exit(0); + } + if (!output) { + process.exit(0); + } + process.stdout.write(output, () => process.exit(0)); }); } diff --git a/skills/gateguard/SKILL.md b/skills/gateguard/SKILL.md index 6b112599..ab31ffcb 100644 --- a/skills/gateguard/SKILL.md +++ b/skills/gateguard/SKILL.md @@ -98,6 +98,13 @@ If GateGuard blocks setup or repair work, start the session with `ECC_GATEGUARD=off`. For hook-level control, keep using `ECC_DISABLED_HOOKS` with the GateGuard hook ID. +In long sessions, only the first `GATEGUARD_FACT_FORCE_FULL_DENIALS` +fact-force denials (default 3) emit the full four-fact block; later +denials are condensed to a single line carrying the denial ordinal, so +near-identical blocks cannot accumulate in the context window and +amplify model repetition loops (#2142). Retrying the same file or +command after presenting facts never re-triggers the gate. + ### Option B: Full package with config ```bash diff --git a/tests/hooks/gateguard-fact-force.test.js b/tests/hooks/gateguard-fact-force.test.js index 458476a7..f6b31739 100644 --- a/tests/hooks/gateguard-fact-force.test.js +++ b/tests/hooks/gateguard-fact-force.test.js @@ -1637,6 +1637,114 @@ function runTests() { } })) passed++; else failed++; + // --- Fact-force denial dampening (#2142) --- + + console.log('\n Fact-force denial dampening (#2142):'); + + clearState(); + if (test('first denials use the full four-fact block and count toward the budget', () => { + const result = runHook({ tool_name: 'Edit', tool_input: { file_path: '/src/damp-one.js' } }); + const output = parseOutput(result.stdout); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny'); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('present these facts'), + 'first denial should use the full block'); + const state = JSON.parse(fs.readFileSync(stateFile, 'utf8')); + assert.strictEqual(state.fact_force_denials, 1, 'denial counter should persist in session state'); + })) passed++; else failed++; + + clearState(); + if (test('emits a condensed single-line denial once the full-block budget is spent', () => { + writeState({ checked: [], last_active: Date.now(), fact_force_denials: 3 }); + const result = runHook({ tool_name: 'Edit', tool_input: { file_path: '/src/damp-two.js' } }); + const output = parseOutput(result.stdout); + assert.strictEqual(result.code, 0); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny', 'still denies first touch'); + const reason = output.hookSpecificOutput.permissionDecisionReason; + assert.ok(reason.includes('[Fact-Forcing Gate]'), 'condensed message keeps the gate marker'); + assert.ok(reason.includes('denial #4'), 'condensed message carries the denial ordinal'); + assert.ok(reason.includes('/src/damp-two.js'), 'condensed message names the target'); + assert.ok(!reason.includes('present these facts'), 'no repeated four-fact block'); + assert.ok(!reason.includes('\n'), 'condensed message is a single line'); + assert.ok(reason.includes('ECC_GATEGUARD=off'), 'condensed message keeps a recovery hint'); + })) passed++; else failed++; + + clearState(); + if (test('consecutive condensed denials are textually different (ordinal advances)', () => { + writeState({ checked: [], last_active: Date.now(), fact_force_denials: 5 }); + const first = parseOutput(runHook({ tool_name: 'Write', tool_input: { file_path: '/src/damp-a.js', content: 'x' } }).stdout); + const second = parseOutput(runHook({ tool_name: 'Write', tool_input: { file_path: '/src/damp-b.js', content: 'x' } }).stdout); + const firstReason = first.hookSpecificOutput.permissionDecisionReason; + const secondReason = second.hookSpecificOutput.permissionDecisionReason; + assert.ok(firstReason.includes('denial #6'), `expected ordinal 6, got: ${firstReason}`); + assert.ok(secondReason.includes('denial #7'), `expected ordinal 7, got: ${secondReason}`); + assert.notStrictEqual(firstReason, secondReason, 'successive denials must differ so they cannot compound verbatim'); + })) passed++; else failed++; + + clearState(); + if (test('retry of the same target is still allowed after a condensed denial', () => { + writeState({ checked: [], last_active: Date.now(), fact_force_denials: 9 }); + const input = { tool_name: 'Edit', tool_input: { file_path: '/src/damp-retry.js' } }; + const denied = parseOutput(runHook(input).stdout); + assert.strictEqual(denied.hookSpecificOutput.permissionDecision, 'deny'); + const retryOutput = parseOutput(runHook(input).stdout); + assert.ok(!retryOutput || !retryOutput.hookSpecificOutput, 'retry passes through (no second deny, no re-prompt)'); + })) passed++; else failed++; + + clearState(); + if (test('GATEGUARD_FACT_FORCE_FULL_DENIALS tunes the full-block budget', () => { + // Budget 0: condensed from the very first denial. + const zero = parseOutput(runHook( + { tool_name: 'Edit', tool_input: { file_path: '/src/damp-zero.js' } }, + { GATEGUARD_FACT_FORCE_FULL_DENIALS: '0' } + ).stdout); + assert.ok(zero.hookSpecificOutput.permissionDecisionReason.includes('denial #1')); + assert.ok(!zero.hookSpecificOutput.permissionDecisionReason.includes('present these facts')); + + // Large budget: full block well past the default threshold. + clearState(); + writeState({ checked: [], last_active: Date.now(), fact_force_denials: 7 }); + const big = parseOutput(runHook( + { tool_name: 'Edit', tool_input: { file_path: '/src/damp-big.js' } }, + { GATEGUARD_FACT_FORCE_FULL_DENIALS: '20' } + ).stdout); + assert.ok(big.hookSpecificOutput.permissionDecisionReason.includes('present these facts'), + 'budget of 20 keeps the full block at denial 8'); + })) passed++; else failed++; + + clearState(); + if (test('malformed denial counter in state is treated as zero (full block, no crash)', () => { + writeState({ checked: [], last_active: Date.now(), fact_force_denials: 'garbage' }); + const result = runHook({ tool_name: 'Edit', tool_input: { file_path: '/src/damp-malformed.js' } }); + assert.strictEqual(result.code, 0); + const output = parseOutput(result.stdout); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny'); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('present these facts'), + 'malformed counter resets to the full block'); + })) passed++; else failed++; + + clearState(); + if (test('MultiEdit denials are dampened past the budget', () => { + writeState({ checked: [], last_active: Date.now(), fact_force_denials: 4 }); + const result = runHook({ + tool_name: 'MultiEdit', + tool_input: { edits: [{ file_path: '/src/damp-multi.js', old_string: 'a', new_string: 'b' }] } + }); + const output = parseOutput(result.stdout); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny'); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('denial #5')); + assert.ok(!output.hookSpecificOutput.permissionDecisionReason.includes('present these facts')); + })) passed++; else failed++; + + clearState(); + if (test('destructive Bash gate keeps the full message regardless of denial count', () => { + writeState({ checked: ['__bash_session__'], last_active: Date.now(), fact_force_denials: 50 }); + const result = runBashHook({ tool_name: 'Bash', tool_input: { command: 'rm -rf /tmp/damp-target' } }); + const output = parseOutput(result.stdout); + assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny'); + assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('rollback'), + 'destructive gate is exempt from dampening'); + })) passed++; else failed++; + // Cleanup only the temp directory created by this test file. try { if (fs.existsSync(stateDir)) { diff --git a/tests/hooks/hooks.test.js b/tests/hooks/hooks.test.js index af0d0d24..14602354 100644 --- a/tests/hooks/hooks.test.js +++ b/tests/hooks/hooks.test.js @@ -5285,17 +5285,15 @@ async function runTests() { console.log('\nRound 59: check-console-log.js (stdin exceeding 1MB — truncation):'); if ( - await asyncTest('truncates stdin at 1MB limit and still passes through data', async () => { - // Send 1.2MB of data — exceeds the 1MB MAX_STDIN limit + await asyncTest('suppresses pass-through for oversized stdin (fail-open, #2090)', async () => { + // Send 1.2MB of data — exceeds the 1MB MAX_STDIN limit. Echoing the + // truncated string would emit a JSON document cut mid-stream, which the + // harness reports as a Stop hook JSON validation failure. const payload = 'x'.repeat(1024 * 1024 + 200000); const result = await runScript(path.join(scriptsDir, 'check-console-log.js'), payload); assert.strictEqual(result.code, 0, 'Should exit 0 even with oversized stdin'); - // Output should be truncated — significantly less than input - assert.ok(result.stdout.length < payload.length, `stdout (${result.stdout.length}) should be shorter than input (${payload.length})`); - // Output should be approximately 1MB (last accepted chunk may push slightly over) - assert.ok(result.stdout.length <= 1024 * 1024 + 65536, `stdout (${result.stdout.length}) should be near 1MB, not unbounded`); - assert.ok(result.stdout.length > 0, 'Should still pass through truncated data'); + assert.strictEqual(result.stdout, '', 'Truncated stdin must not be echoed (empty stdout = no opinion)'); }) ) passed++; diff --git a/tests/hooks/run-with-flags-truncation.test.js b/tests/hooks/run-with-flags-truncation.test.js new file mode 100644 index 00000000..36cdf06e --- /dev/null +++ b/tests/hooks/run-with-flags-truncation.test.js @@ -0,0 +1,154 @@ +/** + * Regression tests for #2222: run-with-flags.js must fail open on >1MB stdin. + * + * Before the fix, every fallthrough path echoed the truncated payload to + * stdout. The harness parses hook stdout as JSON, got a document cut + * mid-stream, and treated the hook as failed — blocking every Edit/Write + * whose hook payload exceeded the 1MB cap. + */ + +'use strict'; + +const assert = require('assert'); +const path = require('path'); +const { spawnSync } = require('child_process'); + +const repoRoot = path.join(__dirname, '..', '..'); +const runner = path.join(repoRoot, 'scripts', 'hooks', 'run-with-flags.js'); + +const MAX_STDIN = 1024 * 1024; + +function test(name, fn) { + try { + fn(); + console.log(` ✓ ${name}`); + return true; + } catch (error) { + console.log(` ✗ ${name}`); + console.log(` Error: ${error.message}`); + return false; + } +} + +function runRunner(args, input, env = {}) { + return spawnSync('node', [runner, ...args], { + input, + encoding: 'utf8', + cwd: repoRoot, + env: { ...process.env, ...env }, + timeout: 30000, + maxBuffer: 16 * 1024 * 1024, + stdio: ['pipe', 'pipe', 'pipe'] + }); +} + +function oversizedPayload() { + // JSON document that exceeds MAX_STDIN so the runner's stdin cap trips. + return JSON.stringify({ + tool_name: 'Write', + tool_input: { file_path: '/tmp/big.md', content: 'x'.repeat(MAX_STDIN + 64 * 1024) } + }); +} + +console.log('\nrun-with-flags truncation (fail-open) tests:'); + +let passed = 0; +let failed = 0; + +if ( + test('oversized payload exits 0 with empty stdout for an enabled hook', () => { + const result = runRunner(['pre:write:doc-file-warning', 'scripts/hooks/doc-file-warning.js', 'standard,strict'], oversizedPayload()); + assert.strictEqual(result.status, 0, `expected exit 0, got ${result.status}: ${result.stderr}`); + assert.strictEqual(result.stdout, '', `stdout must be empty, got: ${result.stdout.slice(0, 120)}...`); + assert.match(result.stderr, /stdin exceeded \d+ bytes for pre:write:doc-file-warning/); + assert.match(result.stderr, /fail-open/); + }) +) + passed++; +else failed++; + +if ( + test('oversized payload never echoes truncated stdin when hook args are missing', () => { + const result = runRunner([], oversizedPayload()); + assert.strictEqual(result.status, 0); + assert.strictEqual(result.stdout, '', 'missing-args path must not echo truncated stdin'); + }) +) + passed++; +else failed++; + +if ( + test('oversized payload never echoes truncated stdin for a disabled hook', () => { + const result = runRunner(['pre:write:doc-file-warning', 'scripts/hooks/doc-file-warning.js', 'standard,strict'], oversizedPayload(), { ECC_DISABLED_HOOKS: 'pre:write:doc-file-warning' }); + assert.strictEqual(result.status, 0); + assert.strictEqual(result.stdout, '', 'disabled-hook path must not echo truncated stdin'); + }) +) + passed++; +else failed++; + +if ( + test('normal-sized payload still passes through unchanged', () => { + const payload = JSON.stringify({ + tool_name: 'Write', + tool_input: { file_path: '/tmp/small.js', content: 'const x = 1;\n' } + }); + const result = runRunner(['pre:write:doc-file-warning', 'scripts/hooks/doc-file-warning.js', 'standard,strict'], payload); + assert.strictEqual(result.status, 0, `expected exit 0, got ${result.status}: ${result.stderr}`); + assert.ok(result.stdout.length > 0, 'normal payloads keep the pass-through behavior'); + JSON.parse(result.stdout); // stdout must remain valid JSON + }) +) + passed++; +else failed++; + +if ( + test('a security hook can still block on an oversized payload (no blanket skip)', () => { + // config-protection refuses to fail open on truncated payloads. The + // runner must still execute the hook and forward its verdict — only the + // runner's own raw-echo is suppressed. + const payload = JSON.stringify({ + tool_name: 'Write', + tool_input: { file_path: '.eslintrc.js', content: 'x'.repeat(MAX_STDIN + 2048) } + }); + const result = runRunner(['pre:config-protection', 'scripts/hooks/config-protection.js', 'standard,strict'], payload); + assert.strictEqual(result.status, 2, `expected block exit 2, got ${result.status}: ${result.stderr}`); + assert.strictEqual(result.stdout, '', 'blocked truncated payload must not echo raw input'); + }) +) + passed++; +else failed++; + +if ( + test('payload just under the cap echoes through completely (no 64KB pipe cut)', () => { + // process.exit() right after stdout.write() used to drop everything past + // the ~64KB pipe buffer, cutting the echoed JSON mid-stream. + const content = 'y'.repeat(MAX_STDIN - 1024); + const payload = JSON.stringify({ tool_name: 'Write', tool_input: { file_path: '/tmp/edge.md', content } }); + assert.ok(payload.length < MAX_STDIN, 'fixture must stay under the stdin cap'); + const result = runRunner([], payload); + assert.strictEqual(result.status, 0); + assert.strictEqual(result.stdout.length, payload.length, 'echo must not be cut at the pipe buffer'); + assert.strictEqual(result.stdout, payload, 'sub-cap payloads still echo through fallthrough paths'); + }) +) + passed++; +else failed++; + +if ( + test('disabled-hook passthrough of a >64KB payload stays valid JSON', () => { + const payload = JSON.stringify({ + tool_name: 'Write', + tool_input: { file_path: '/tmp/medium.md', content: 'z'.repeat(256 * 1024) } + }); + const result = runRunner(['pre:write:doc-file-warning', 'scripts/hooks/doc-file-warning.js', 'standard,strict'], payload, { ECC_DISABLED_HOOKS: 'pre:write:doc-file-warning' }); + assert.strictEqual(result.status, 0); + assert.strictEqual(result.stdout, payload); + JSON.parse(result.stdout); + }) +) + passed++; +else failed++; + +console.log(`\n ${passed} passed, ${failed} failed\n`); +process.exit(failed > 0 ? 1 : 0); diff --git a/tests/hooks/stop-hooks-stdout.test.js b/tests/hooks/stop-hooks-stdout.test.js new file mode 100644 index 00000000..08b79e66 --- /dev/null +++ b/tests/hooks/stop-hooks-stdout.test.js @@ -0,0 +1,203 @@ +/** + * Regression tests for #2090: "Stop hook error: JSON validation failed". + * + * Stop hooks follow the ECC pass-through convention (echo stdin on stdout). + * The Stop payload carries `last_assistant_message`, which can be large; any + * hook that caps stdin and echoes the capped string emits a JSON document cut + * mid-stream, which the harness reports as a Stop hook JSON validation + * failure. Worst offender: cost-tracker capped stdin at 64KB, so any Stop + * payload with a >64KB final assistant message broke the whole Stop chain. + * + * Contract under test: for every Stop hook, stdout is either empty or valid + * JSON, and the exit code is 0 — for realistic large payloads and for + * oversized (>1MB) payloads, via the production runner and via direct + * invocation. + */ + +'use strict'; + +const assert = require('assert'); +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const { spawnSync } = require('child_process'); + +const repoRoot = path.join(__dirname, '..', '..'); +const runner = path.join(repoRoot, 'scripts', 'hooks', 'run-with-flags.js'); + +const MAX_STDIN = 1024 * 1024; + +const workDir = fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-stop-stdout-')); // non-git cwd +const dataHome = fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-stop-data-')); + +function test(name, fn) { + try { + fn(); + console.log(` ✓ ${name}`); + return true; + } catch (error) { + console.log(` ✗ ${name}`); + console.log(` Error: ${error.message}`); + return false; + } +} + +function stopPayload(messageBytes) { + return JSON.stringify({ + session_id: `stop-stdout-test-${process.pid}`, + transcript_path: path.join(workDir, 'missing-transcript.jsonl'), + cwd: workDir, + hook_event_name: 'Stop', + stop_hook_active: false, + last_assistant_message: 'm'.repeat(messageBytes) + }); +} + +function hookEnv() { + const env = { + ...process.env, + ECC_HOOK_PROFILE: 'standard', + ECC_AGENT_DATA_HOME: dataHome, + CLAUDE_SESSION_ID: `stop-stdout-test-${process.pid}` + }; + delete env.ECC_GATEGUARD; + delete env.ECC_DISABLED_HOOKS; + return env; +} + +function runViaRunner(hookId, script, input) { + return spawnSync('node', [runner, hookId, script, 'minimal,standard,strict'], { + input, + encoding: 'utf8', + cwd: workDir, + env: hookEnv(), + timeout: 60000, + maxBuffer: 16 * 1024 * 1024, + stdio: ['pipe', 'pipe', 'pipe'] + }); +} + +function runDirect(script, input) { + return spawnSync('node', [path.join(repoRoot, script)], { + input, + encoding: 'utf8', + cwd: workDir, + env: hookEnv(), + timeout: 60000, + maxBuffer: 16 * 1024 * 1024, + stdio: ['pipe', 'pipe', 'pipe'] + }); +} + +function assertStdoutContract(result, label) { + assert.strictEqual(result.status, 0, `${label}: expected exit 0, got ${result.status}: ${result.stderr}`); + if (result.stdout.length > 0) { + try { + JSON.parse(result.stdout); + } catch (err) { + assert.fail(`${label}: stdout is non-empty but not valid JSON (${err.message}); first 120 chars: ${result.stdout.slice(0, 120)}`); + } + } +} + +// All registered Stop hooks (hooks/hooks.json). +const STOP_HOOKS = [ + ['stop:format-typecheck', 'scripts/hooks/stop-format-typecheck.js'], + ['stop:check-console-log', 'scripts/hooks/check-console-log.js'], + ['stop:session-end', 'scripts/hooks/session-end.js'], + ['stop:evaluate-session', 'scripts/hooks/evaluate-session.js'], + ['stop:cost-tracker', 'scripts/hooks/cost-tracker.js'] + // stop:desktop-notify is excluded from the valid-payload run because a + // successful run() fires a real OS notification; its truncation path is + // covered separately below (run() bails on JSON.parse before notifying). +]; + +// Direct-invocation legacy paths that echo stdin. +const ECHOING_STOP_HOOKS = [ + 'scripts/hooks/stop-format-typecheck.js', + 'scripts/hooks/check-console-log.js', + 'scripts/hooks/cost-tracker.js', + 'scripts/hooks/desktop-notify.js' +]; + +console.log('\nStop hook stdout contract tests (#2090):'); + +let passed = 0; +let failed = 0; + +// A 100KB last_assistant_message is a realistic long-session Stop payload. +// Before the fix, cost-tracker echoed it cut at 64KB through the production +// runner path, making the harness report "JSON validation failed". +const realisticPayload = stopPayload(100 * 1024); + +for (const [hookId, script] of STOP_HOOKS) { + if ( + test(`${hookId} via runner keeps stdout valid for a 100KB Stop payload`, () => { + const result = runViaRunner(hookId, script, realisticPayload); + assertStdoutContract(result, hookId); + if (result.stdout.length > 0) { + assert.strictEqual(result.stdout, realisticPayload, `${hookId}: pass-through must echo the payload uncut`); + } + }) + ) + passed++; + else failed++; +} + +const oversizedPayload = stopPayload(MAX_STDIN + 64 * 1024); + +for (const [hookId, script] of [...STOP_HOOKS, ['stop:desktop-notify', 'scripts/hooks/desktop-notify.js']]) { + if ( + test(`${hookId} via runner fails open on a >1MB Stop payload`, () => { + const result = runViaRunner(hookId, script, oversizedPayload); + assert.strictEqual(result.status, 0, `${hookId}: expected exit 0, got ${result.status}: ${result.stderr}`); + assert.strictEqual(result.stdout, '', `${hookId}: oversized payloads must not be echoed`); + }) + ) + passed++; + else failed++; +} + +for (const script of ECHOING_STOP_HOOKS) { + if ( + test(`${path.basename(script)} invoked directly never echoes truncated stdin`, () => { + const result = runDirect(script, oversizedPayload); + assert.strictEqual(result.status, 0, `${script}: expected exit 0, got ${result.status}: ${result.stderr}`); + assert.strictEqual(result.stdout, '', `${script}: truncated stdin must not be echoed`); + }) + ) + passed++; + else failed++; +} + +if ( + test('check-console-log invoked directly echoes a sub-cap >64KB payload uncut', () => { + const result = runDirect('scripts/hooks/check-console-log.js', realisticPayload); + assert.strictEqual(result.status, 0); + assert.strictEqual(result.stdout, realisticPayload, 'pass-through must not be cut at the pipe buffer'); + JSON.parse(result.stdout); + }) +) + passed++; +else failed++; + +if ( + test('cost-tracker invoked directly echoes a sub-cap >64KB payload uncut', () => { + const result = runDirect('scripts/hooks/cost-tracker.js', realisticPayload); + assert.strictEqual(result.status, 0); + assert.strictEqual(result.stdout, realisticPayload, 'the old 64KB cap must not cut realistic Stop payloads'); + JSON.parse(result.stdout); + }) +) + passed++; +else failed++; + +try { + fs.rmSync(workDir, { recursive: true, force: true }); + fs.rmSync(dataHome, { recursive: true, force: true }); +} catch { + /* best-effort cleanup */ +} + +console.log(`\n ${passed} passed, ${failed} failed\n`); +process.exit(failed > 0 ? 1 : 0); diff --git a/tests/scripts/codex-hooks.test.js b/tests/scripts/codex-hooks.test.js index 59b82b5e..d482bb32 100644 --- a/tests/scripts/codex-hooks.test.js +++ b/tests/scripts/codex-hooks.test.js @@ -261,7 +261,7 @@ if ( else failed++; if ( - test('merge-mcp-config dry-run appends all recommended servers without mutating target', () => { + test('merge-mcp-config dry-run appends the current default set without mutating target', () => { const tempDir = createTempDir('mcp-merge-dry-run-'); const configPath = path.join(tempDir, 'config.toml'); const original = ''; @@ -272,9 +272,12 @@ if ( assert.strictEqual(result.status, 0, `${result.stdout}\n${result.stderr}`); assert.match(result.stdout, /Package manager: npm \(exec: npx\)/); - assert.match(result.stdout, /\[add\] mcp_servers\.supabase/); - assert.match(result.stdout, /\[mcp_servers\.github\]/); + assert.match(result.stdout, /\[add\] mcp_servers\.chrome-devtools/); + assert.match(result.stdout, /\[mcp_servers\.chrome-devtools\]/); assert.match(result.stdout, /Dry run/); + // Retired defaults (June 2026 connector policy) must not be emitted. + assert.doesNotMatch(result.stdout, /mcp_servers\.(supabase|playwright|context7|exa|github|memory|sequential-thinking)\b/); + assert.doesNotMatch(result.stdout, /url = /); assert.strictEqual(fs.readFileSync(configPath, 'utf8'), original); } finally { cleanup(tempDir); @@ -296,14 +299,17 @@ if ( const merged = fs.readFileSync(configPath, 'utf8'); const parsed = TOML.parse(merged); - assert.strictEqual(parsed.mcp_servers.exa.url, 'https://mcp.exa.ai/mcp'); - assert.strictEqual(parsed.mcp_servers.github.command, 'bash'); - assert.deepStrictEqual(parsed.mcp_servers.memory.args, ['@modelcontextprotocol/server-memory']); - assert.strictEqual(parsed.mcp_servers.supabase.tool_timeout_sec, 120); + assert.strictEqual(parsed.mcp_servers['chrome-devtools'].command, 'npx'); + assert.deepStrictEqual(parsed.mcp_servers['chrome-devtools'].args, ['chrome-devtools-mcp@latest']); + assert.strictEqual(parsed.mcp_servers['chrome-devtools'].startup_timeout_sec, 30); + // No retired server may be (re-)emitted — exa's url form broke Codex (#2224). + assert.strictEqual(parsed.mcp_servers.exa, undefined); + assert.strictEqual(parsed.mcp_servers.github, undefined); + assert.strictEqual(parsed.mcp_servers.supabase, undefined); const second = runNode(mergeMcpConfigScript, [configPath], deterministicPackageEnv); assert.strictEqual(second.status, 0, `${second.stdout}\n${second.stderr}`); - assert.match(second.stdout, /\[ok\] mcp_servers\.github/); + assert.match(second.stdout, /\[ok\] mcp_servers\.chrome-devtools/); assert.match(second.stdout, /All ECC MCP servers already present/); assert.strictEqual(fs.readFileSync(configPath, 'utf8'), merged); } finally { @@ -315,24 +321,88 @@ if ( else failed++; if ( - test('merge-mcp-config update dry-run reports canonical and legacy section refreshes', () => { + test('merge-mcp-config repairs the invalid exa url entry from earlier ECC versions (#2224)', () => { + const tempDir = createTempDir('mcp-merge-exa-repair-'); + const configPath = path.join(tempDir, 'config.toml'); + const original = [ + '[mcp_servers.github]', + 'command = "npx"', + 'args = ["-y", "@modelcontextprotocol/server-github"]', + '', + '[mcp_servers.exa]', + 'url = "https://mcp.exa.ai/mcp"', + '', + ].join('\n'); + + try { + fs.writeFileSync(configPath, original); + const result = runNode(mergeMcpConfigScript, [configPath], deterministicPackageEnv); + + assert.strictEqual(result.status, 0, `${result.stdout}\n${result.stderr}`); + assert.match(result.stdout, /\[repair\] mcp_servers\.exa/); + + const updated = fs.readFileSync(configPath, 'utf8'); + const parsed = TOML.parse(updated); + assert.strictEqual(parsed.mcp_servers.exa, undefined, 'invalid exa url entry must be removed'); + assert.doesNotMatch(updated, /url = "https:\/\/mcp\.exa\.ai\/mcp"/); + // User-managed servers are untouched; current default is added. + assert.strictEqual(parsed.mcp_servers.github.command, 'npx'); + assert.strictEqual(parsed.mcp_servers['chrome-devtools'].command, 'npx'); + + // Re-running must not re-introduce the invalid entry. + const second = runNode(mergeMcpConfigScript, [configPath], deterministicPackageEnv); + assert.strictEqual(second.status, 0, `${second.stdout}\n${second.stderr}`); + assert.doesNotMatch(fs.readFileSync(configPath, 'utf8'), /mcp_servers\.exa/); + } finally { + cleanup(tempDir); + } + }) +) + passed++; +else failed++; + +if ( + test('merge-mcp-config leaves a user-managed stdio exa entry untouched', () => { + const tempDir = createTempDir('mcp-merge-exa-stdio-'); + const configPath = path.join(tempDir, 'config.toml'); + const original = [ + '[mcp_servers.exa]', + 'command = "npx"', + 'args = ["-y", "mcp-remote", "https://mcp.exa.ai/mcp"]', + 'startup_timeout_sec = 30', + '', + ].join('\n'); + + try { + fs.writeFileSync(configPath, original); + const result = runNode(mergeMcpConfigScript, [configPath], deterministicPackageEnv); + + assert.strictEqual(result.status, 0, `${result.stdout}\n${result.stderr}`); + assert.doesNotMatch(result.stdout, /\[repair\]/); + + const parsed = TOML.parse(fs.readFileSync(configPath, 'utf8')); + assert.strictEqual(parsed.mcp_servers.exa.command, 'npx'); + assert.deepStrictEqual(parsed.mcp_servers.exa.args, ['-y', 'mcp-remote', 'https://mcp.exa.ai/mcp']); + } finally { + cleanup(tempDir); + } + }) +) + passed++; +else failed++; + +if ( + test('merge-mcp-config update dry-run refreshes managed sections and leaves user servers alone', () => { const tempDir = createTempDir('mcp-merge-update-dry-run-'); const configPath = path.join(tempDir, 'config.toml'); const original = [ + '[mcp_servers.chrome-devtools]', + 'command = "custom"', + 'args = ["old"]', + '', '[mcp_servers.context7]', - 'command = "custom"', - 'args = ["old"]', - '', - '[mcp_servers.context7-mcp]', 'command = "npx"', - 'args = ["legacy"]', - '', - '[mcp_servers.supabase]', - 'command = "custom"', - 'args = ["old"]', - '', - '[mcp_servers.supabase.env]', - 'SUPABASE_ACCESS_TOKEN = "token"', + 'args = ["-y", "@upstash/context7-mcp@latest"]', '', ].join('\n'); @@ -341,11 +411,10 @@ if ( const result = runNode(mergeMcpConfigScript, [configPath, '--update-mcp', '--dry-run'], deterministicPackageEnv); assert.strictEqual(result.status, 0, `${result.stdout}\n${result.stderr}`); - assert.match(result.stdout, /\[remove\] mcp_servers\.context7/); - assert.match(result.stdout, /\[remove\] mcp_servers\.context7-mcp/); - assert.match(result.stdout, /\[remove\] mcp_servers\.supabase/); - assert.match(result.stdout, /\[mcp_servers\.supabase\]/); - assert.match(result.stdout, /\[mcp_servers\.context7\]/); + assert.match(result.stdout, /\[remove\] mcp_servers\.chrome-devtools/); + assert.match(result.stdout, /\[mcp_servers\.chrome-devtools\]/); + // Retired servers are no longer ECC-managed: never removed or re-added. + assert.doesNotMatch(result.stdout, /\[remove\] mcp_servers\.context7/); assert.strictEqual(fs.readFileSync(configPath, 'utf8'), original); } finally { cleanup(tempDir); @@ -356,38 +425,31 @@ if ( else failed++; if ( - test('merge-mcp-config removes disabled legacy servers without appending replacements', () => { + test('merge-mcp-config removes disabled servers without appending replacements', () => { const tempDir = createTempDir('mcp-merge-disabled-'); const configPath = path.join(tempDir, 'config.toml'); const original = [ - '[mcp_servers.context7-mcp]', + '[mcp_servers.chrome-devtools]', 'command = "npx"', - 'args = ["legacy"]', - '', - '[mcp_servers.exa]', - 'url = "https://mcp.exa.ai/mcp"', + 'args = ["chrome-devtools-mcp@latest"]', '', ].join('\n'); - const allServersDisabled = 'supabase,playwright,context7,exa,github,memory,sequential-thinking'; try { fs.writeFileSync(configPath, original); const result = runNode(mergeMcpConfigScript, [configPath], { ...deterministicPackageEnv, - ECC_DISABLED_MCPS: allServersDisabled, + ECC_DISABLED_MCPS: 'chrome-devtools', }); assert.strictEqual(result.status, 0, `${result.stdout}\n${result.stderr}`); assert.match(result.stdout, /Disabled via ECC_DISABLED_MCPS/); - assert.match(result.stdout, /\[skip\] mcp_servers\.context7 \(disabled\)/); - assert.match(result.stdout, /\[skip\] mcp_servers\.exa \(disabled\)/); - assert.match(result.stdout, /\[update\] mcp_servers\.context7-mcp \(disabled\)/); - assert.match(result.stdout, /\[update\] mcp_servers\.exa \(disabled\)/); - assert.match(result.stdout, /Done\. Removed 2 disabled server\(s\)\./); + assert.match(result.stdout, /\[skip\] mcp_servers\.chrome-devtools \(disabled\)/); + assert.match(result.stdout, /\[update\] mcp_servers\.chrome-devtools \(disabled\)/); + assert.match(result.stdout, /Done\. Removed 1 server section\(s\)\./); const updated = fs.readFileSync(configPath, 'utf8'); - assert.doesNotMatch(updated, /context7-mcp/); - assert.doesNotMatch(updated, /mcp_servers\.exa/); + assert.doesNotMatch(updated, /chrome-devtools/); } finally { cleanup(tempDir); } @@ -454,7 +516,10 @@ if ( assert.strictEqual(parsedConfig.agents.explorer.config_file, 'agents/explorer.toml'); assert.strictEqual(parsedConfig.agents.reviewer.config_file, 'agents/reviewer.toml'); assert.strictEqual(parsedConfig.agents.docs_researcher.config_file, 'agents/docs-researcher.toml'); - assert.ok(parsedConfig.mcp_servers.exa); + // Current default connector is added; retired servers are not emitted, + // and pre-existing user-managed entries are preserved untouched. + assert.ok(parsedConfig.mcp_servers['chrome-devtools']); + assert.strictEqual(parsedConfig.mcp_servers.exa, undefined); assert.ok(parsedConfig.mcp_servers.github); assert.ok(parsedConfig.mcp_servers.memory); assert.ok(parsedConfig.mcp_servers['sequential-thinking']);