From 28548f67ba1d4108d35c0800af88def995a0995f Mon Sep 17 00:00:00 2001 From: Jamkris Date: Tue, 19 May 2026 09:29:09 +0900 Subject: [PATCH] fix(lib): use unique tmp suffix in writeBridgeAtomic to eliminate ENOENT race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `writeBridgeAtomic` wrote to a fixed `${target}.tmp` path before calling `renameSync`. When two processes write to the same session bridge concurrently (e.g. PostToolUse `ecc-metrics-bridge` + the background `ecc-statusline`, both calling `writeBridgeAtomic(sessionId, ...)`), the canonical atomic-rename race fires: 1. Process A: writeFileSync(target.tmp, JSON_A) — tmp file exists. 2. Process B: writeFileSync(target.tmp, JSON_B) — tmp file overwritten. 3. Process A: renameSync(target.tmp, target) — succeeds; target = JSON_B (A's payload silently corrupted en-route). 4. Process B: renameSync(target.tmp, target) — throws ENOENT (the rename consumed the file). Every caller in the repo wraps `writeBridgeAtomic` in `try {} catch {}`, so the ENOENT exception is swallowed and the user-visible symptom is just "the bridge file occasionally contains the wrong process's payload" with no diagnostic. Reproduced before this commit: $ # two concurrent writers, each calling writeBridgeAtomic 500 times $ # against the same session ID [A] errors=244 # 244 ENOENT exceptions swallowed [B] errors=248 # ditto After this commit the same workload reports 0 errors in both subprocesses: tmp paths no longer collide. Fix: change `${target}.tmp` to `${target}.${process.pid}.${crypto.randomBytes(4).toString('hex')}.tmp`, matching the pattern already used by `writeCostWarningIfChanged` in `scripts/hooks/ecc-metrics-bridge.js` (commit 9b1d8918). The pid + 4-byte nonce gives each writer process a distinct tmp path, so step 2 above no longer overwrites step 1's payload and step 4 no longer races step 3. Also added: on `renameSync` failure, attempt `fs.unlinkSync(tmp)` so a writer that fails (disk full, permission, parent dir gone) does not leak its tmp file. The cleanup is best-effort and the original error is still re-thrown. **Scope clarification.** This commit closes the atomic-rename primitive's race only. The *read-modify-write* race in callers — two writers each read the same bridge state, increment, and write back, the second clobbering the first — is a separate concern that needs locking or per-writer logs, and is intentionally out of scope for this PR. The cost-tracker / metrics-bridge callers tolerate last-writer-wins on their cumulative aggregates today and this commit does not change that contract. The companion `writeWarnState` in `ecc-context-monitor.js` has the same fixed-suffix pattern and the same race; that fix lands in the next commit so each can be reviewed against its own diff. --- scripts/lib/session-bridge.js | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/scripts/lib/session-bridge.js b/scripts/lib/session-bridge.js index aceae9cb..b50216fc 100644 --- a/scripts/lib/session-bridge.js +++ b/scripts/lib/session-bridge.js @@ -8,6 +8,7 @@ * without scanning large JSONL logs on every invocation. */ +const crypto = require('crypto'); const fs = require('fs'); const os = require('os'); const path = require('path'); @@ -51,15 +52,35 @@ function readBridge(sessionId) { } /** - * Write bridge data atomically (write .tmp then rename). + * Write bridge data atomically (write unique-suffix tmp then rename). + * + * The tmp path includes `process.pid` plus a random nonce so concurrent + * writers (e.g. PostToolUse `ecc-metrics-bridge` and the background + * `ecc-statusline`, both writing to the same session bridge) do not + * clobber each other's tmp file mid-write. With a fixed `.tmp` suffix + * two writers could both call `writeFileSync` against the same path + * before either reaches `renameSync`, causing one writer's payload to + * silently overwrite the other and the second `renameSync` to throw + * ENOENT once the rename consumes the file. + * + * Same pattern already used by `writeCostWarningIfChanged` in + * `scripts/hooks/ecc-metrics-bridge.js` (commit 9b1d8918) for the + * cost-warning cache; this commit applies it to the session-bridge + * primitive too. + * * @param {string} sessionId - Already-sanitized session ID * @param {object} data */ function writeBridgeAtomic(sessionId, data) { const target = getBridgePath(sessionId); - const tmp = `${target}.tmp`; + const tmp = `${target}.${process.pid}.${crypto.randomBytes(4).toString('hex')}.tmp`; fs.writeFileSync(tmp, JSON.stringify(data), 'utf8'); - fs.renameSync(tmp, target); + try { + fs.renameSync(tmp, target); + } catch (err) { + try { fs.unlinkSync(tmp); } catch { /* ignore */ } + throw err; + } } /**