mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-04-14 22:13:41 +08:00
fix: 5 bugs + 2 tests from 3-agent deep bughunt
Bugs fixed: - B1: JS gate messages still said "cat one real record" -> redacted/synthetic - B2: Destructive bash key used 200-char truncation (collision bypass) -> SHA256 hash - B3: sanitizePath only stripped \n\r -> now strips null bytes, bidi overrides, all control chars - B4: Tool name matching was case-sensitive (latent bypass) -> lookup map normalization - B5: SKILL.md Gate Types missing MultiEdit -> added with explanation Tests added: - T1: MultiEdit gate denies first unchecked file (CRITICAL - was untested) - T2: MultiEdit allows after all files gated 11/11 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
'use strict';
|
'use strict';
|
||||||
|
|
||||||
|
const crypto = require('crypto');
|
||||||
const fs = require('fs');
|
const fs = require('fs');
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
|
|
||||||
@@ -109,7 +110,8 @@ function isChecked(key) {
|
|||||||
// --- Sanitize file path against injection ---
|
// --- Sanitize file path against injection ---
|
||||||
|
|
||||||
function sanitizePath(filePath) {
|
function sanitizePath(filePath) {
|
||||||
return filePath.replace(/[\n\r]/g, ' ').trim().slice(0, 500);
|
// Strip control chars (including null), bidi overrides, and newlines
|
||||||
|
return filePath.replace(/[\x00-\x1f\x7f\u200e\u200f\u202a-\u202e\u2066-\u2069]/g, ' ').trim().slice(0, 500);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Gate messages ---
|
// --- Gate messages ---
|
||||||
@@ -123,7 +125,7 @@ function editGateMsg(filePath) {
|
|||||||
'',
|
'',
|
||||||
'1. List ALL files that import/require this file (use Grep)',
|
'1. List ALL files that import/require this file (use Grep)',
|
||||||
'2. List the public functions/classes affected by this change',
|
'2. List the public functions/classes affected by this change',
|
||||||
'3. If this file reads/writes data files, cat one real record and show actual field names, structure, and date format',
|
'3. If this file reads/writes data files, show field names, structure, and date format (use redacted or synthetic values, not raw production data)',
|
||||||
'4. Quote the user\'s current instruction verbatim',
|
'4. Quote the user\'s current instruction verbatim',
|
||||||
'',
|
'',
|
||||||
'Present the facts, then retry the same operation.'
|
'Present the facts, then retry the same operation.'
|
||||||
@@ -139,7 +141,7 @@ function writeGateMsg(filePath) {
|
|||||||
'',
|
'',
|
||||||
'1. Name the file(s) and line(s) that will call this new file',
|
'1. Name the file(s) and line(s) that will call this new file',
|
||||||
'2. Confirm no existing file serves the same purpose (use Glob)',
|
'2. Confirm no existing file serves the same purpose (use Glob)',
|
||||||
'3. If this file reads/writes data files, cat one real record and show actual field names, structure, and date format',
|
'3. If this file reads/writes data files, show field names, structure, and date format (use redacted or synthetic values, not raw production data)',
|
||||||
'4. Quote the user\'s current instruction verbatim',
|
'4. Quote the user\'s current instruction verbatim',
|
||||||
'',
|
'',
|
||||||
'Present the facts, then retry the same operation.'
|
'Present the facts, then retry the same operation.'
|
||||||
@@ -194,8 +196,11 @@ function run(rawInput) {
|
|||||||
return rawInput; // allow on parse error
|
return rawInput; // allow on parse error
|
||||||
}
|
}
|
||||||
|
|
||||||
const toolName = data.tool_name || '';
|
const rawToolName = data.tool_name || '';
|
||||||
const toolInput = data.tool_input || {};
|
const toolInput = data.tool_input || {};
|
||||||
|
// Normalize: case-insensitive matching via lookup map
|
||||||
|
const TOOL_MAP = { 'edit': 'Edit', 'write': 'Write', 'multiedit': 'MultiEdit', 'bash': 'Bash' };
|
||||||
|
const toolName = TOOL_MAP[rawToolName.toLowerCase()] || rawToolName;
|
||||||
|
|
||||||
if (toolName === 'Edit' || toolName === 'Write') {
|
if (toolName === 'Edit' || toolName === 'Write') {
|
||||||
const filePath = toolInput.file_path || '';
|
const filePath = toolInput.file_path || '';
|
||||||
@@ -228,7 +233,7 @@ function run(rawInput) {
|
|||||||
|
|
||||||
if (DESTRUCTIVE_BASH.test(command)) {
|
if (DESTRUCTIVE_BASH.test(command)) {
|
||||||
// Gate destructive commands on first attempt; allow retry after facts presented
|
// Gate destructive commands on first attempt; allow retry after facts presented
|
||||||
const key = '__destructive__' + command.slice(0, 200);
|
const key = '__destructive__' + crypto.createHash('sha256').update(command).digest('hex').slice(0, 16);
|
||||||
if (!isChecked(key)) {
|
if (!isChecked(key)) {
|
||||||
markChecked(key);
|
markChecked(key);
|
||||||
return denyResult(destructiveBashMsg());
|
return denyResult(destructiveBashMsg());
|
||||||
|
|||||||
@@ -45,7 +45,9 @@ Both agents produce code that runs and passes tests. The difference is design de
|
|||||||
|
|
||||||
## Gate Types
|
## Gate Types
|
||||||
|
|
||||||
### Edit Gate (first edit per file)
|
### Edit / MultiEdit Gate (first edit per file)
|
||||||
|
|
||||||
|
MultiEdit is handled identically — each file in the batch is gated individually.
|
||||||
|
|
||||||
```
|
```
|
||||||
Before editing {file_path}, present these facts:
|
Before editing {file_path}, present these facts:
|
||||||
|
|||||||
@@ -307,6 +307,55 @@ function runTests() {
|
|||||||
}
|
}
|
||||||
})) passed++; else failed++;
|
})) passed++; else failed++;
|
||||||
|
|
||||||
|
// --- Test 10: MultiEdit gates first unchecked file ---
|
||||||
|
clearState();
|
||||||
|
if (test('denies first MultiEdit with unchecked file', () => {
|
||||||
|
const input = {
|
||||||
|
tool_name: 'MultiEdit',
|
||||||
|
tool_input: {
|
||||||
|
edits: [
|
||||||
|
{ file_path: '/src/multi-a.js', old_string: 'a', new_string: 'b' },
|
||||||
|
{ file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const result = runHook(input);
|
||||||
|
assert.strictEqual(result.code, 0, 'exit code should be 0');
|
||||||
|
const output = parseOutput(result.stdout);
|
||||||
|
assert.ok(output, 'should produce JSON output');
|
||||||
|
assert.strictEqual(output.hookSpecificOutput.permissionDecision, 'deny');
|
||||||
|
assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('Fact-Forcing Gate'));
|
||||||
|
assert.ok(output.hookSpecificOutput.permissionDecisionReason.includes('/src/multi-a.js'));
|
||||||
|
})) passed++; else failed++;
|
||||||
|
|
||||||
|
// --- Test 11: MultiEdit allows after all files gated ---
|
||||||
|
if (test('allows MultiEdit after all files gated', () => {
|
||||||
|
// multi-a.js was gated in test 10; gate multi-b.js
|
||||||
|
const input2 = {
|
||||||
|
tool_name: 'MultiEdit',
|
||||||
|
tool_input: { edits: [{ file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }] }
|
||||||
|
};
|
||||||
|
runHook(input2); // gates multi-b.js
|
||||||
|
|
||||||
|
// Now both files are gated — retry should allow
|
||||||
|
const input3 = {
|
||||||
|
tool_name: 'MultiEdit',
|
||||||
|
tool_input: {
|
||||||
|
edits: [
|
||||||
|
{ file_path: '/src/multi-a.js', old_string: 'a', new_string: 'b' },
|
||||||
|
{ file_path: '/src/multi-b.js', old_string: 'c', new_string: 'd' }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const result3 = runHook(input3);
|
||||||
|
const output3 = parseOutput(result3.stdout);
|
||||||
|
assert.ok(output3, 'should produce valid JSON');
|
||||||
|
if (output3.hookSpecificOutput) {
|
||||||
|
assert.notStrictEqual(output3.hookSpecificOutput.permissionDecision, 'deny',
|
||||||
|
'should allow MultiEdit after all files gated');
|
||||||
|
}
|
||||||
|
})) passed++; else failed++;
|
||||||
|
|
||||||
// Cleanup: remove test-isolated state directory
|
// Cleanup: remove test-isolated state directory
|
||||||
try {
|
try {
|
||||||
if (fs.existsSync(stateDir)) {
|
if (fs.existsSync(stateDir)) {
|
||||||
|
|||||||
Reference in New Issue
Block a user