mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-06-12 19:23:07 +08:00
Compare commits
1 Commits
fix/cost-t
...
pr-2039
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
88c02f3494 |
@@ -170,6 +170,11 @@
|
|||||||
"OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
|
"OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
|
||||||
},
|
},
|
||||||
"description": "AI agent regression testing — snapshot behavior, detect regressions in tool calls and output quality. 8 tools: create_test, run_snapshot, run_check, list_tests, validate_skill, generate_skill_tests, run_skill_test, generate_visual_report. API key optional — deterministic checks (tool diff, output hash) work without it. Install: pip install \"evalview>=0.5,<1\""
|
"description": "AI agent regression testing — snapshot behavior, detect regressions in tool calls and output quality. 8 tools: create_test, run_snapshot, run_check, list_tests, validate_skill, generate_skill_tests, run_skill_test, generate_visual_report. API key optional — deterministic checks (tool diff, output hash) work without it. Install: pip install \"evalview>=0.5,<1\""
|
||||||
|
},
|
||||||
|
"squish": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": ["-y", "squish-memory"],
|
||||||
|
"description": "Local-first persistent memory runtime for AI agents — MCP server for Claude Code, Cursor, OpenCode, Codex, Cline. Auto-captures context across sessions. 1-20ms recall, 283KB, no second LLM needed. Runs locally with SQLite. Supports cloud sync via Stripe checkout ($9-$99/mo). GitHub: https://github.com/michielhdoteth/squish | Docs: https://squishplugin.dev | (also available via local `squish run mcp`)"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"_comments": {
|
"_comments": {
|
||||||
|
|||||||
@@ -20,54 +20,15 @@
|
|||||||
* Each row therefore represents the cumulative session total up to that point.
|
* Each row therefore represents the cumulative session total up to that point.
|
||||||
* To get per-session cost, take the last row per session_id. To get per-day
|
* To get per-session cost, take the last row per session_id. To get per-day
|
||||||
* spend, aggregate.
|
* spend, aggregate.
|
||||||
*
|
|
||||||
* Harness-cost contract (optional, opt-in by the statusline):
|
|
||||||
* If the user's statusline (which receives `cost.total_cost_usd` directly
|
|
||||||
* from Claude Code) writes `{ts, cost_usd}` to
|
|
||||||
* `<os.tmpdir()>/harness-cost-<session_id>.json` on each render, this hook
|
|
||||||
* prefers that authoritative value over the transcript-sum estimate when
|
|
||||||
* the cache is fresh (≤ 300s). The transcript-sum is kept as a safe
|
|
||||||
* fallback because:
|
|
||||||
* - the hard-coded rate table cannot represent Opus 4.7's >200K-token
|
|
||||||
* 2x tier or the 1h-cache 2x tier (under-counts on long sessions);
|
|
||||||
* - summing the full transcript double-counts work done across
|
|
||||||
* `--resume` boundaries while `cost.total_cost_usd` is per-process.
|
|
||||||
* Absent a writer, behavior is unchanged.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const os = require('os');
|
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const { ensureDir, appendFile, getClaudeDir } = require('../lib/utils');
|
const { ensureDir, appendFile, getClaudeDir } = require('../lib/utils');
|
||||||
const { sanitizeSessionId } = require('../lib/session-bridge');
|
const { sanitizeSessionId } = require('../lib/session-bridge');
|
||||||
|
|
||||||
const HARNESS_COST_MAX_AGE_SECONDS = 300;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Read authoritative harness cost from the per-session cache file.
|
|
||||||
* @param {string} sessionId
|
|
||||||
* @param {number} maxAgeSeconds
|
|
||||||
* @returns {number|null} cost in USD, or null on miss / stale / parse error
|
|
||||||
*/
|
|
||||||
function readHarnessCost(sessionId, maxAgeSeconds) {
|
|
||||||
if (!sessionId) return null;
|
|
||||||
try {
|
|
||||||
const fp = path.join(os.tmpdir(), `harness-cost-${sessionId}.json`);
|
|
||||||
if (!fs.existsSync(fp)) return null;
|
|
||||||
const obj = JSON.parse(fs.readFileSync(fp, 'utf8'));
|
|
||||||
const ts = Number(obj && obj.ts);
|
|
||||||
const cost = Number(obj && obj.cost_usd);
|
|
||||||
if (!Number.isFinite(ts) || !Number.isFinite(cost) || cost < 0) return null;
|
|
||||||
const age = Math.floor(Date.now() / 1000) - ts;
|
|
||||||
if (age < 0 || age > maxAgeSeconds) return null;
|
|
||||||
return cost;
|
|
||||||
} catch {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Approximate per-1M-token billing rates (USD).
|
// Approximate per-1M-token billing rates (USD).
|
||||||
// Cache creation: 1.25x input rate. Cache read: 0.1x input rate.
|
// Cache creation: 1.25x input rate. Cache read: 0.1x input rate.
|
||||||
const RATE_TABLE = {
|
const RATE_TABLE = {
|
||||||
@@ -164,23 +125,13 @@ process.stdin.on('end', () => {
|
|||||||
} = usageTotals || {};
|
} = usageTotals || {};
|
||||||
|
|
||||||
const rates = getRates(model);
|
const rates = getRates(model);
|
||||||
const transcriptCostUsd = Math.round((
|
const estimatedCostUsd = Math.round((
|
||||||
(inputTokens / 1e6) * rates.in +
|
(inputTokens / 1e6) * rates.in +
|
||||||
(outputTokens / 1e6) * rates.out +
|
(outputTokens / 1e6) * rates.out +
|
||||||
(cacheWriteTokens / 1e6) * rates.cacheWrite +
|
(cacheWriteTokens / 1e6) * rates.cacheWrite +
|
||||||
(cacheReadTokens / 1e6) * rates.cacheRead
|
(cacheReadTokens / 1e6) * rates.cacheRead
|
||||||
) * 1e6) / 1e6;
|
) * 1e6) / 1e6;
|
||||||
|
|
||||||
// Prefer the harness's authoritative `cost.total_cost_usd` when the
|
|
||||||
// statusline has written it to the per-session cache (see contract in
|
|
||||||
// the file header). The harness number reflects API-billed truth
|
|
||||||
// (correct rates, 1h-cache 2x, >200K tier 2x) and is per-process so it
|
|
||||||
// does not drift across `--resume`. Cache miss → transcript-sum.
|
|
||||||
const harnessCost = readHarnessCost(sessionId, HARNESS_COST_MAX_AGE_SECONDS);
|
|
||||||
const estimatedCostUsd = harnessCost !== null
|
|
||||||
? Math.round(harnessCost * 1e6) / 1e6
|
|
||||||
: transcriptCostUsd;
|
|
||||||
|
|
||||||
const metricsDir = path.join(getClaudeDir(), 'metrics');
|
const metricsDir = path.join(getClaudeDir(), 'metrics');
|
||||||
ensureDir(metricsDir);
|
ensureDir(metricsDir);
|
||||||
|
|
||||||
|
|||||||
@@ -215,93 +215,6 @@ function runTests() {
|
|||||||
fs.rmSync(tmpHome, { recursive: true, force: true });
|
fs.rmSync(tmpHome, { recursive: true, force: true });
|
||||||
}) ? passed++ : failed++);
|
}) ? passed++ : failed++);
|
||||||
|
|
||||||
// 8. Prefers harness-cost cache value over transcript-sum when fresh
|
|
||||||
(test('prefers fresh harness-cost cache over transcript estimate', () => {
|
|
||||||
const tmpHome = makeTempDir();
|
|
||||||
const sessionId = 'harness-fresh-' + Date.now();
|
|
||||||
const transcriptPath = path.join(tmpHome, 'session.jsonl');
|
|
||||||
writeTranscript(transcriptPath, [
|
|
||||||
{
|
|
||||||
type: 'assistant',
|
|
||||||
message: {
|
|
||||||
model: 'claude-opus-4-20250514',
|
|
||||||
usage: {
|
|
||||||
input_tokens: 10000,
|
|
||||||
output_tokens: 5000,
|
|
||||||
cache_creation_input_tokens: 200000,
|
|
||||||
cache_read_input_tokens: 1000000,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
const harnessCachePath = path.join(os.tmpdir(), `harness-cost-${sessionId}.json`);
|
|
||||||
const nowEpoch = Math.floor(Date.now() / 1000);
|
|
||||||
fs.writeFileSync(
|
|
||||||
harnessCachePath,
|
|
||||||
JSON.stringify({ ts: nowEpoch, cost_usd: 1.23 }),
|
|
||||||
'utf8'
|
|
||||||
);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = runScript(
|
|
||||||
{ session_id: sessionId, transcript_path: transcriptPath },
|
|
||||||
withTempHome(tmpHome)
|
|
||||||
);
|
|
||||||
assert.strictEqual(result.code, 0, `Expected exit code 0, got ${result.code}`);
|
|
||||||
|
|
||||||
const metricsFile = path.join(tmpHome, '.claude', 'metrics', 'costs.jsonl');
|
|
||||||
const row = JSON.parse(fs.readFileSync(metricsFile, 'utf8').trim());
|
|
||||||
assert.strictEqual(row.estimated_cost_usd, 1.23, 'Expected harness cost to win');
|
|
||||||
// Token totals still reflect the transcript scan
|
|
||||||
assert.strictEqual(row.input_tokens, 10000, 'Token totals should still come from transcript');
|
|
||||||
assert.strictEqual(row.output_tokens, 5000, 'Token totals should still come from transcript');
|
|
||||||
} finally {
|
|
||||||
try { fs.unlinkSync(harnessCachePath); } catch { /* best-effort */ }
|
|
||||||
fs.rmSync(tmpHome, { recursive: true, force: true });
|
|
||||||
}
|
|
||||||
}) ? passed++ : failed++);
|
|
||||||
|
|
||||||
// 9. Ignores stale harness-cost cache and falls back to transcript estimate
|
|
||||||
(test('ignores stale harness-cost cache (>300s) and uses transcript estimate', () => {
|
|
||||||
const tmpHome = makeTempDir();
|
|
||||||
const sessionId = 'harness-stale-' + Date.now();
|
|
||||||
const transcriptPath = path.join(tmpHome, 'session.jsonl');
|
|
||||||
writeTranscript(transcriptPath, [
|
|
||||||
{
|
|
||||||
type: 'assistant',
|
|
||||||
message: {
|
|
||||||
model: 'claude-sonnet-4-20250514',
|
|
||||||
usage: { input_tokens: 1000, output_tokens: 500 },
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
const harnessCachePath = path.join(os.tmpdir(), `harness-cost-${sessionId}.json`);
|
|
||||||
const staleEpoch = Math.floor(Date.now() / 1000) - 3600;
|
|
||||||
fs.writeFileSync(
|
|
||||||
harnessCachePath,
|
|
||||||
JSON.stringify({ ts: staleEpoch, cost_usd: 999.99 }),
|
|
||||||
'utf8'
|
|
||||||
);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = runScript(
|
|
||||||
{ session_id: sessionId, transcript_path: transcriptPath },
|
|
||||||
withTempHome(tmpHome)
|
|
||||||
);
|
|
||||||
assert.strictEqual(result.code, 0, `Expected exit code 0, got ${result.code}`);
|
|
||||||
|
|
||||||
const metricsFile = path.join(tmpHome, '.claude', 'metrics', 'costs.jsonl');
|
|
||||||
const row = JSON.parse(fs.readFileSync(metricsFile, 'utf8').trim());
|
|
||||||
assert.notStrictEqual(row.estimated_cost_usd, 999.99, 'Stale cache must not win');
|
|
||||||
assert.ok(row.estimated_cost_usd > 0, 'Expected fallback transcript estimate to be positive');
|
|
||||||
// Sonnet rates: 1000/1e6*3 + 500/1e6*15 ≈ $0.011 — well below the 999.99 stale value
|
|
||||||
assert.ok(row.estimated_cost_usd < 1, 'Expected small transcript estimate, not the stale 999.99');
|
|
||||||
} finally {
|
|
||||||
try { fs.unlinkSync(harnessCachePath); } catch { /* best-effort */ }
|
|
||||||
fs.rmSync(tmpHome, { recursive: true, force: true });
|
|
||||||
}
|
|
||||||
}) ? passed++ : failed++);
|
|
||||||
|
|
||||||
console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`);
|
console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`);
|
||||||
process.exit(failed > 0 ? 1 : 0);
|
process.exit(failed > 0 ? 1 : 0);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user