From 9661a6f042d52ad1c3d4e0674a1e63dc5fd06528 Mon Sep 17 00:00:00 2001 From: jtzingsheim1 <43869157+jtzingsheim1@users.noreply.github.com> Date: Sun, 8 Mar 2026 08:47:31 +1000 Subject: [PATCH] fix(hooks): scrub secrets and harden hook security (#348) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(hooks): scrub secrets and harden hook security - Scrub common secret patterns (api_key, token, password, etc.) from observation logs before persisting to JSONL (observe.sh) - Auto-purge observation files older than 30 days (observe.sh) - Strip embedded credentials from git remote URLs before saving to projects.json (detect-project.sh) - Add command prefix allowlist to runCommand — only git, node, npx, which, where are permitted (utils.js) - Sanitize CLAUDE_SESSION_ID in temp file paths to prevent path traversal (suggest-compact.js) Co-Authored-By: Claude Opus 4.6 * fix(hooks): address review feedback from CodeRabbit and Cubic - Reject shell command-chaining operators (;|&`) in runCommand, strip quoted sections before checking to avoid false positives (utils.js) - Remove command string from blocked error message to avoid leaking secrets (utils.js) - Fix Python regex quoting: switch outer shell string from double to single quotes so regex compiles correctly (observe.sh) - Add optional auth scheme match (Bearer, Basic) to secret scrubber regex (observe.sh) - Scope auto-purge to current project dir and match only archived files (observations-*.jsonl), not live queue (observe.sh) - Add second fallback after session ID sanitization to prevent empty string (suggest-compact.js) - Preserve backward compatibility when credential stripping changes project hash — detect and migrate legacy directories (detect-project.sh) Co-Authored-By: Claude Opus 4.6 * fix(hooks): block $() substitution, fix Bearer redaction, add security tests - Add $ and \n to blocked shell metacharacters in runCommand to prevent command substitution via $(cmd) and newline injection (utils.js) - Make auth scheme group capturing so Bearer/Basic is preserved in redacted output instead of being silently dropped (observe.sh) - Add 10 unit tests covering runCommand allowlist blocking (rm, curl, bash prefixes) and metacharacter rejection (;|&`$ chaining), plus error message leak prevention (utils.test.js) Co-Authored-By: Claude Opus 4.6 * fix(hooks): scrub parse-error fallback, strengthen security tests Address remaining reviewer feedback from CodeRabbit and Cubic: - Scrub secrets in observe.sh parse-error fallback path (was writing raw unsanitized input to observations file) - Remove redundant re.IGNORECASE flag ((?i) inline flag already set) - Add inline comment documenting quote-stripping limitation trade-off - Fix misleading test name for error-output test - Add 5 new security tests: single-quote passthrough, mixed quoted+unquoted metacharacters, prefix boundary (no trailing space), npx acceptance, and newline injection - Improve existing quoted-metacharacter test to actually exercise quote-stripping logic Co-Authored-By: Claude Opus 4.6 * fix(security): block $() and backtick inside quotes in runCommand Shell evaluates $() and backticks inside double quotes, so checking only the unquoted portion was insufficient. Now $ and ` are rejected anywhere in the command string, while ; | & remain quote-aware. Addresses CodeRabbit and Cubic review feedback on PR #348. Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- scripts/hooks/suggest-compact.js | 2 +- scripts/lib/utils.js | 14 +++ .../continuous-learning-v2/hooks/observe.sh | 67 ++++++++--- .../scripts/detect-project.sh | 19 +++ tests/lib/utils.test.js | 113 +++++++++++++++++- 5 files changed, 193 insertions(+), 22 deletions(-) diff --git a/scripts/hooks/suggest-compact.js b/scripts/hooks/suggest-compact.js index 81acc53e..7e07549a 100644 --- a/scripts/hooks/suggest-compact.js +++ b/scripts/hooks/suggest-compact.js @@ -25,7 +25,7 @@ async function main() { // Track tool call count (increment in a temp file) // Use a session-specific counter file based on session ID from environment // or parent PID as fallback - const sessionId = process.env.CLAUDE_SESSION_ID || 'default'; + const sessionId = (process.env.CLAUDE_SESSION_ID || 'default').replace(/[^a-zA-Z0-9_-]/g, '') || 'default'; const counterFile = path.join(getTempDir(), `claude-tool-count-${sessionId}`); const rawThreshold = parseInt(process.env.COMPACT_THRESHOLD || '50', 10); const threshold = Number.isFinite(rawThreshold) && rawThreshold > 0 && rawThreshold <= 10000 diff --git a/scripts/lib/utils.js b/scripts/lib/utils.js index 4c497546..41e04e57 100644 --- a/scripts/lib/utils.js +++ b/scripts/lib/utils.js @@ -339,6 +339,20 @@ function commandExists(cmd) { * @param {object} options - execSync options */ function runCommand(cmd, options = {}) { + // Allowlist: only permit known-safe command prefixes + const allowedPrefixes = ['git ', 'node ', 'npx ', 'which ', 'where ']; + if (!allowedPrefixes.some(prefix => cmd.startsWith(prefix))) { + return { success: false, output: 'runCommand blocked: unrecognized command prefix' }; + } + + // Reject shell metacharacters. $() and backticks are evaluated inside + // double quotes, so block $ and ` anywhere in cmd. Other operators + // (;|&) are literal inside quotes, so only check unquoted portions. + const unquoted = cmd.replace(/"[^"]*"/g, '').replace(/'[^']*'/g, ''); + if (/[;|&\n]/.test(unquoted) || /[`$]/.test(cmd)) { + return { success: false, output: 'runCommand blocked: shell metacharacters not allowed' }; + } + try { const result = execSync(cmd, { encoding: 'utf8', diff --git a/skills/continuous-learning-v2/hooks/observe.sh b/skills/continuous-learning-v2/hooks/observe.sh index 7f78f801..307cde67 100755 --- a/skills/continuous-learning-v2/hooks/observe.sh +++ b/skills/continuous-learning-v2/hooks/observe.sh @@ -72,6 +72,13 @@ if [ -f "$CONFIG_DIR/disabled" ]; then exit 0 fi +# Auto-purge observation files older than 30 days (runs once per session) +PURGE_MARKER="${PROJECT_DIR}/.last-purge" +if [ ! -f "$PURGE_MARKER" ] || [ "$(find "$PURGE_MARKER" -mtime +1 2>/dev/null)" ]; then + find "${PROJECT_DIR}" -name "observations-*.jsonl" -mtime +30 -delete 2>/dev/null || true + touch "$PURGE_MARKER" 2>/dev/null || true +fi + # Parse using python via stdin pipe (safe for all JSON payloads) # Pass HOOK_PHASE via env var since Claude Code does not include hook type in stdin JSON PARSED=$(echo "$INPUT_JSON" | HOOK_PHASE="$HOOK_PHASE" python3 -c ' @@ -125,14 +132,23 @@ except Exception as e: PARSED_OK=$(echo "$PARSED" | python3 -c "import json,sys; print(json.load(sys.stdin).get('parsed', False))" 2>/dev/null || echo "False") if [ "$PARSED_OK" != "True" ]; then - # Fallback: log raw input for debugging + # Fallback: log raw input for debugging (scrub secrets before persisting) timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") export TIMESTAMP="$timestamp" - echo "$INPUT_JSON" | python3 -c " -import json, sys, os + echo "$INPUT_JSON" | python3 -c ' +import json, sys, os, re + +_SECRET_RE = re.compile( + r"(?i)(api[_-]?key|token|secret|password|authorization|credentials?|auth)" + r"""(["'"'"'\s:=]+)""" + r"([A-Za-z]+\s+)?" + r"([A-Za-z0-9_\-/.+=]{8,})" +) + raw = sys.stdin.read()[:2000] -print(json.dumps({'timestamp': os.environ['TIMESTAMP'], 'event': 'parse_error', 'raw': raw})) -" >> "$OBSERVATIONS_FILE" +raw = _SECRET_RE.sub(lambda m: m.group(1) + m.group(2) + (m.group(3) or "") + "[REDACTED]", raw) +print(json.dumps({"timestamp": os.environ["TIMESTAMP"], "event": "parse_error", "raw": raw})) +' >> "$OBSERVATIONS_FILE" exit 0 fi @@ -147,32 +163,47 @@ if [ -f "$OBSERVATIONS_FILE" ]; then fi # Build and write observation (now includes project context) +# Scrub common secret patterns from tool I/O before persisting timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") export PROJECT_ID_ENV="$PROJECT_ID" export PROJECT_NAME_ENV="$PROJECT_NAME" export TIMESTAMP="$timestamp" -echo "$PARSED" | python3 -c " -import json, sys, os +echo "$PARSED" | python3 -c ' +import json, sys, os, re parsed = json.load(sys.stdin) observation = { - 'timestamp': os.environ['TIMESTAMP'], - 'event': parsed['event'], - 'tool': parsed['tool'], - 'session': parsed['session'], - 'project_id': os.environ.get('PROJECT_ID_ENV', 'global'), - 'project_name': os.environ.get('PROJECT_NAME_ENV', 'global') + "timestamp": os.environ["TIMESTAMP"], + "event": parsed["event"], + "tool": parsed["tool"], + "session": parsed["session"], + "project_id": os.environ.get("PROJECT_ID_ENV", "global"), + "project_name": os.environ.get("PROJECT_NAME_ENV", "global") } -if parsed['input']: - observation['input'] = parsed['input'] -if parsed['output'] is not None: - observation['output'] = parsed['output'] +# Scrub secrets: match common key=value, key: value, and key"value patterns +# Includes optional auth scheme (e.g., "Bearer", "Basic") before token +_SECRET_RE = re.compile( + r"(?i)(api[_-]?key|token|secret|password|authorization|credentials?|auth)" + r"""(["'"'"'\s:=]+)""" + r"([A-Za-z]+\s+)?" + r"([A-Za-z0-9_\-/.+=]{8,})" +) + +def scrub(val): + if val is None: + return None + return _SECRET_RE.sub(lambda m: m.group(1) + m.group(2) + (m.group(3) or "") + "[REDACTED]", str(val)) + +if parsed["input"]: + observation["input"] = scrub(parsed["input"]) +if parsed["output"] is not None: + observation["output"] = scrub(parsed["output"]) print(json.dumps(observation)) -" >> "$OBSERVATIONS_FILE" +' >> "$OBSERVATIONS_FILE" # Signal observer if running (check both project-scoped and global observer) for pid_file in "${PROJECT_DIR}/.observer.pid" "${CONFIG_DIR}/.observer.pid"; do diff --git a/skills/continuous-learning-v2/scripts/detect-project.sh b/skills/continuous-learning-v2/scripts/detect-project.sh index 31703a21..a4bbfb32 100755 --- a/skills/continuous-learning-v2/scripts/detect-project.sh +++ b/skills/continuous-learning-v2/scripts/detect-project.sh @@ -64,6 +64,14 @@ _clv2_detect_project() { fi fi + # Compute hash from the original remote URL (legacy, for backward compatibility) + local legacy_hash_input="${remote_url:-$project_root}" + + # Strip embedded credentials from remote URL (e.g., https://ghp_xxxx@github.com/...) + if [ -n "$remote_url" ]; then + remote_url=$(printf '%s' "$remote_url" | sed -E 's|://[^@]+@|://|') + fi + local hash_input="${remote_url:-$project_root}" # Use SHA256 via python3 (portable across macOS/Linux, no shasum/sha256sum divergence) project_id=$(printf '%s' "$hash_input" | python3 -c "import sys,hashlib; print(hashlib.sha256(sys.stdin.buffer.read()).hexdigest()[:12])" 2>/dev/null) @@ -75,6 +83,17 @@ _clv2_detect_project() { echo "fallback") fi + # Backward compatibility: if credentials were stripped and the hash changed, + # check if a project dir exists under the legacy hash and reuse it + if [ "$legacy_hash_input" != "$hash_input" ]; then + local legacy_id + legacy_id=$(printf '%s' "$legacy_hash_input" | python3 -c "import sys,hashlib; print(hashlib.sha256(sys.stdin.buffer.read()).hexdigest()[:12])" 2>/dev/null) + if [ -n "$legacy_id" ] && [ -d "${_CLV2_PROJECTS_DIR}/${legacy_id}" ] && [ ! -d "${_CLV2_PROJECTS_DIR}/${project_id}" ]; then + # Migrate legacy directory to new hash + mv "${_CLV2_PROJECTS_DIR}/${legacy_id}" "${_CLV2_PROJECTS_DIR}/${project_id}" 2>/dev/null || project_id="$legacy_id" + fi + fi + # Export results _CLV2_PROJECT_ID="$project_id" _CLV2_PROJECT_NAME="$project_name" diff --git a/tests/lib/utils.test.js b/tests/lib/utils.test.js index 6a7c4125..a71590ff 100644 --- a/tests/lib/utils.test.js +++ b/tests/lib/utils.test.js @@ -976,13 +976,120 @@ function runTests() { assert.ok(result.output.includes('custom error'), 'Should include stderr output'); })) passed++; else failed++; - if (test('runCommand falls back to err.message when no stderr', () => { - // An invalid command that won't produce stderr through child process - const result = utils.runCommand('nonexistent_cmd_xyz_12345'); + if (test('runCommand returns error output on failed command', () => { + // Use an allowed prefix with a nonexistent subcommand to reach execSync + const result = utils.runCommand('git nonexistent-subcmd-xyz-12345'); assert.strictEqual(result.success, false); assert.ok(result.output.length > 0, 'Should have some error output'); })) passed++; else failed++; + // ── runCommand security: allowlist and metacharacter blocking ── + console.log('\nrunCommand Security (allowlist + metacharacters):'); + + if (test('runCommand blocks disallowed command prefix', () => { + const result = utils.runCommand('rm -rf /'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('unrecognized command prefix'), 'Should mention blocked prefix'); + })) passed++; else failed++; + + if (test('runCommand blocks curl command', () => { + const result = utils.runCommand('curl http://example.com'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('unrecognized command prefix')); + })) passed++; else failed++; + + if (test('runCommand blocks bash command', () => { + const result = utils.runCommand('bash -c "echo hello"'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('unrecognized command prefix')); + })) passed++; else failed++; + + if (test('runCommand blocks semicolon command chaining', () => { + const result = utils.runCommand('git status; echo pwned'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('metacharacters not allowed'), 'Should block semicolon chaining'); + })) passed++; else failed++; + + if (test('runCommand blocks pipe command chaining', () => { + const result = utils.runCommand('git log | cat'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('metacharacters not allowed'), 'Should block pipe chaining'); + })) passed++; else failed++; + + if (test('runCommand blocks ampersand command chaining', () => { + const result = utils.runCommand('git status && echo pwned'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('metacharacters not allowed'), 'Should block ampersand chaining'); + })) passed++; else failed++; + + if (test('runCommand blocks dollar sign command substitution', () => { + const result = utils.runCommand('git log $(whoami)'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('metacharacters not allowed'), 'Should block $ substitution'); + })) passed++; else failed++; + + if (test('runCommand blocks backtick command substitution', () => { + const result = utils.runCommand('git log `whoami`'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('metacharacters not allowed'), 'Should block backtick substitution'); + })) passed++; else failed++; + + if (test('runCommand allows metacharacters inside double quotes', () => { + // Semicolon inside quotes should not trigger metacharacter blocking + const result = utils.runCommand('node -e "console.log(1);process.exit(0)"'); + assert.strictEqual(result.success, true); + })) passed++; else failed++; + + if (test('runCommand allows metacharacters inside single quotes', () => { + const result = utils.runCommand("node -e 'process.exit(0);'"); + assert.strictEqual(result.success, true); + })) passed++; else failed++; + + if (test('runCommand blocks unquoted metacharacters alongside quoted ones', () => { + // Semicolon inside quotes is safe, but && outside is not + const result = utils.runCommand('git log "safe;part" && echo pwned'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('metacharacters not allowed')); + })) passed++; else failed++; + + if (test('runCommand blocks prefix without trailing space', () => { + // "gitconfig" starts with "git" but not "git " — must be blocked + const result = utils.runCommand('gitconfig --list'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('unrecognized command prefix')); + })) passed++; else failed++; + + if (test('runCommand allows npx prefix', () => { + const result = utils.runCommand('npx --version'); + assert.strictEqual(result.success, true); + })) passed++; else failed++; + + if (test('runCommand blocks newline command injection', () => { + const result = utils.runCommand('git status\necho pwned'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('metacharacters not allowed'), 'Should block newline injection'); + })) passed++; else failed++; + + if (test('runCommand blocks $() inside double quotes (shell still evaluates)', () => { + // $() inside double quotes is still evaluated by the shell, so block $ everywhere + const result = utils.runCommand('node -e "$(whoami)"'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('metacharacters not allowed'), 'Should block $ inside quotes'); + })) passed++; else failed++; + + if (test('runCommand blocks backtick inside double quotes (shell still evaluates)', () => { + const result = utils.runCommand('node -e "`whoami`"'); + assert.strictEqual(result.success, false); + assert.ok(result.output.includes('metacharacters not allowed'), 'Should block backtick inside quotes'); + })) passed++; else failed++; + + if (test('runCommand error message does not leak command string', () => { + const secret = 'rm secret_password_123'; + const result = utils.runCommand(secret); + assert.strictEqual(result.success, false); + assert.ok(!result.output.includes('secret_password_123'), 'Should not leak command contents'); + })) passed++; else failed++; + // ── Round 31: getGitModifiedFiles with empty patterns ── console.log('\ngetGitModifiedFiles empty patterns (Round 31):');