From 0ff1b594d078045e3ed4b8f1e595b35611371063 Mon Sep 17 00:00:00 2001 From: Chandan Semwal <132329210+XploitMonk0x01@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:04:30 +0530 Subject: [PATCH] fix(harness-audit): make scoring deterministic with scripted rubric (#524) --- .opencode/commands/harness-audit.md | 47 ++- commands/harness-audit.md | 47 ++- package.json | 2 + scripts/harness-audit.js | 512 ++++++++++++++++++++++++++++ tests/scripts/harness-audit.test.js | 86 +++++ 5 files changed, 660 insertions(+), 34 deletions(-) create mode 100644 scripts/harness-audit.js create mode 100644 tests/scripts/harness-audit.test.js diff --git a/.opencode/commands/harness-audit.md b/.opencode/commands/harness-audit.md index e62eb2cd..1fd08425 100644 --- a/.opencode/commands/harness-audit.md +++ b/.opencode/commands/harness-audit.md @@ -1,6 +1,6 @@ # Harness Audit Command -Audit the current repository's agent harness setup and return a prioritized scorecard. +Run a deterministic repository harness audit and return a prioritized scorecard. ## Usage @@ -9,9 +9,19 @@ Audit the current repository's agent harness setup and return a prioritized scor - `scope` (optional): `repo` (default), `hooks`, `skills`, `commands`, `agents` - `--format`: output style (`text` default, `json` for automation) -## What to Evaluate +## Deterministic Engine -Score each category from `0` to `10`: +Always run: + +```bash +node scripts/harness-audit.js --format +``` + +This script is the source of truth for scoring and checks. Do not invent additional dimensions or ad-hoc points. + +Rubric version: `2026-03-16`. + +The script computes 7 fixed categories (`0-10` normalized each): 1. Tool Coverage 2. Context Efficiency @@ -21,34 +31,37 @@ Score each category from `0` to `10`: 6. Security Guardrails 7. Cost Efficiency +Scores are derived from explicit file/rule checks and are reproducible for the same commit. + ## Output Contract Return: -1. `overall_score` out of 70 +1. `overall_score` out of `max_score` (70 for `repo`; smaller for scoped audits) 2. Category scores and concrete findings -3. Top 3 actions with exact file paths -4. Suggested ECC skills to apply next +3. Failed checks with exact file paths +4. Top 3 actions from the deterministic output (`top_actions`) +5. Suggested ECC skills to apply next ## Checklist -- Inspect `hooks/hooks.json`, `scripts/hooks/`, and hook tests. -- Inspect `skills/`, command coverage, and agent coverage. -- Verify cross-harness parity for `.cursor/`, `.opencode/`, `.codex/`. -- Flag broken or stale references. +- Use script output directly; do not rescore manually. +- If `--format json` is requested, return the script JSON unchanged. +- If text is requested, summarize failing checks and top actions. +- Include exact file paths from `checks[]` and `top_actions[]`. ## Example Result ```text -Harness Audit (repo): 52/70 -- Quality Gates: 9/10 -- Eval Coverage: 6/10 -- Cost Efficiency: 4/10 +Harness Audit (repo): 66/70 +- Tool Coverage: 10/10 (10/10 pts) +- Context Efficiency: 9/10 (9/10 pts) +- Quality Gates: 10/10 (10/10 pts) Top 3 Actions: -1) Add cost tracking hook in scripts/hooks/cost-tracker.js -2) Add pass@k docs and templates in skills/eval-harness/SKILL.md -3) Add command parity for /harness-audit in .opencode/commands/ +1) [Security Guardrails] Add prompt/tool preflight security guards in hooks/hooks.json. (hooks/hooks.json) +2) [Tool Coverage] Sync commands/harness-audit.md and .opencode/commands/harness-audit.md. (.opencode/commands/harness-audit.md) +3) [Eval Coverage] Increase automated test coverage across scripts/hooks/lib. (tests/) ``` ## Arguments diff --git a/commands/harness-audit.md b/commands/harness-audit.md index e62eb2cd..1fd08425 100644 --- a/commands/harness-audit.md +++ b/commands/harness-audit.md @@ -1,6 +1,6 @@ # Harness Audit Command -Audit the current repository's agent harness setup and return a prioritized scorecard. +Run a deterministic repository harness audit and return a prioritized scorecard. ## Usage @@ -9,9 +9,19 @@ Audit the current repository's agent harness setup and return a prioritized scor - `scope` (optional): `repo` (default), `hooks`, `skills`, `commands`, `agents` - `--format`: output style (`text` default, `json` for automation) -## What to Evaluate +## Deterministic Engine -Score each category from `0` to `10`: +Always run: + +```bash +node scripts/harness-audit.js --format +``` + +This script is the source of truth for scoring and checks. Do not invent additional dimensions or ad-hoc points. + +Rubric version: `2026-03-16`. + +The script computes 7 fixed categories (`0-10` normalized each): 1. Tool Coverage 2. Context Efficiency @@ -21,34 +31,37 @@ Score each category from `0` to `10`: 6. Security Guardrails 7. Cost Efficiency +Scores are derived from explicit file/rule checks and are reproducible for the same commit. + ## Output Contract Return: -1. `overall_score` out of 70 +1. `overall_score` out of `max_score` (70 for `repo`; smaller for scoped audits) 2. Category scores and concrete findings -3. Top 3 actions with exact file paths -4. Suggested ECC skills to apply next +3. Failed checks with exact file paths +4. Top 3 actions from the deterministic output (`top_actions`) +5. Suggested ECC skills to apply next ## Checklist -- Inspect `hooks/hooks.json`, `scripts/hooks/`, and hook tests. -- Inspect `skills/`, command coverage, and agent coverage. -- Verify cross-harness parity for `.cursor/`, `.opencode/`, `.codex/`. -- Flag broken or stale references. +- Use script output directly; do not rescore manually. +- If `--format json` is requested, return the script JSON unchanged. +- If text is requested, summarize failing checks and top actions. +- Include exact file paths from `checks[]` and `top_actions[]`. ## Example Result ```text -Harness Audit (repo): 52/70 -- Quality Gates: 9/10 -- Eval Coverage: 6/10 -- Cost Efficiency: 4/10 +Harness Audit (repo): 66/70 +- Tool Coverage: 10/10 (10/10 pts) +- Context Efficiency: 9/10 (9/10 pts) +- Quality Gates: 10/10 (10/10 pts) Top 3 Actions: -1) Add cost tracking hook in scripts/hooks/cost-tracker.js -2) Add pass@k docs and templates in skills/eval-harness/SKILL.md -3) Add command parity for /harness-audit in .opencode/commands/ +1) [Security Guardrails] Add prompt/tool preflight security guards in hooks/hooks.json. (hooks/hooks.json) +2) [Tool Coverage] Sync commands/harness-audit.md and .opencode/commands/harness-audit.md. (.opencode/commands/harness-audit.md) +3) [Eval Coverage] Increase automated test coverage across scripts/hooks/lib. (tests/) ``` ## Arguments diff --git a/package.json b/package.json index baf2e410..96b5085c 100644 --- a/package.json +++ b/package.json @@ -81,6 +81,7 @@ "scripts/setup-package-manager.js", "scripts/skill-create-output.js", "scripts/repair.js", + "scripts/harness-audit.js", "scripts/session-inspect.js", "scripts/uninstall.js", "skills/", @@ -97,6 +98,7 @@ "scripts": { "postinstall": "echo '\\n ecc-universal installed!\\n Run: npx ecc typescript\\n Compat: npx ecc-install typescript\\n Docs: https://github.com/affaan-m/everything-claude-code\\n'", "lint": "eslint . && markdownlint '**/*.md' --ignore node_modules", + "harness:audit": "node scripts/harness-audit.js", "claw": "node scripts/claw.js", "orchestrate:status": "node scripts/orchestration-status.js", "orchestrate:worker": "bash scripts/orchestrate-codex-worker.sh", diff --git a/scripts/harness-audit.js b/scripts/harness-audit.js new file mode 100644 index 00000000..27867a34 --- /dev/null +++ b/scripts/harness-audit.js @@ -0,0 +1,512 @@ +#!/usr/bin/env node + +const fs = require('fs'); +const path = require('path'); + +const REPO_ROOT = path.join(__dirname, '..'); + +const CATEGORIES = [ + 'Tool Coverage', + 'Context Efficiency', + 'Quality Gates', + 'Memory Persistence', + 'Eval Coverage', + 'Security Guardrails', + 'Cost Efficiency', +]; + +function normalizeScope(scope) { + const value = (scope || 'repo').toLowerCase(); + if (!['repo', 'hooks', 'skills', 'commands', 'agents'].includes(value)) { + throw new Error(`Invalid scope: ${scope}`); + } + return value; +} + +function parseArgs(argv) { + const args = argv.slice(2); + const parsed = { + scope: 'repo', + format: 'text', + help: false, + }; + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]; + + if (arg === '--help' || arg === '-h') { + parsed.help = true; + continue; + } + + if (arg === '--format') { + parsed.format = (args[index + 1] || '').toLowerCase(); + index += 1; + continue; + } + + if (arg === '--scope') { + parsed.scope = normalizeScope(args[index + 1]); + index += 1; + continue; + } + + if (arg.startsWith('--format=')) { + parsed.format = arg.split('=')[1].toLowerCase(); + continue; + } + + if (arg.startsWith('--scope=')) { + parsed.scope = normalizeScope(arg.split('=')[1]); + continue; + } + + if (arg.startsWith('-')) { + throw new Error(`Unknown argument: ${arg}`); + } + + parsed.scope = normalizeScope(arg); + } + + if (!['text', 'json'].includes(parsed.format)) { + throw new Error(`Invalid format: ${parsed.format}. Use text or json.`); + } + + return parsed; +} + +function fileExists(relativePath) { + return fs.existsSync(path.join(REPO_ROOT, relativePath)); +} + +function readText(relativePath) { + return fs.readFileSync(path.join(REPO_ROOT, relativePath), 'utf8'); +} + +function countFiles(relativeDir, extension) { + const dirPath = path.join(REPO_ROOT, relativeDir); + if (!fs.existsSync(dirPath)) { + return 0; + } + + const stack = [dirPath]; + let count = 0; + + while (stack.length > 0) { + const current = stack.pop(); + const entries = fs.readdirSync(current, { withFileTypes: true }); + + for (const entry of entries) { + const nextPath = path.join(current, entry.name); + if (entry.isDirectory()) { + stack.push(nextPath); + } else if (!extension || entry.name.endsWith(extension)) { + count += 1; + } + } + } + + return count; +} + +function safeRead(relativePath) { + try { + return readText(relativePath); + } catch (_error) { + return ''; + } +} + +function getChecks() { + const packageJson = JSON.parse(readText('package.json')); + const commandPrimary = safeRead('commands/harness-audit.md').trim(); + const commandParity = safeRead('.opencode/commands/harness-audit.md').trim(); + const hooksJson = safeRead('hooks/hooks.json'); + + return [ + { + id: 'tool-hooks-config', + category: 'Tool Coverage', + points: 2, + scopes: ['repo', 'hooks'], + path: 'hooks/hooks.json', + description: 'Hook configuration file exists', + pass: fileExists('hooks/hooks.json'), + fix: 'Create hooks/hooks.json and define baseline hook events.', + }, + { + id: 'tool-hooks-impl-count', + category: 'Tool Coverage', + points: 2, + scopes: ['repo', 'hooks'], + path: 'scripts/hooks/', + description: 'At least 8 hook implementation scripts exist', + pass: countFiles('scripts/hooks', '.js') >= 8, + fix: 'Add missing hook implementations in scripts/hooks/.', + }, + { + id: 'tool-agent-count', + category: 'Tool Coverage', + points: 2, + scopes: ['repo', 'agents'], + path: 'agents/', + description: 'At least 10 agent definitions exist', + pass: countFiles('agents', '.md') >= 10, + fix: 'Add or restore agent definitions under agents/.', + }, + { + id: 'tool-skill-count', + category: 'Tool Coverage', + points: 2, + scopes: ['repo', 'skills'], + path: 'skills/', + description: 'At least 20 skill definitions exist', + pass: countFiles('skills', 'SKILL.md') >= 20, + fix: 'Add missing skill directories with SKILL.md definitions.', + }, + { + id: 'tool-command-parity', + category: 'Tool Coverage', + points: 2, + scopes: ['repo', 'commands'], + path: '.opencode/commands/harness-audit.md', + description: 'Harness-audit command parity exists between primary and OpenCode command docs', + pass: commandPrimary.length > 0 && commandPrimary === commandParity, + fix: 'Sync commands/harness-audit.md and .opencode/commands/harness-audit.md.', + }, + { + id: 'context-strategic-compact', + category: 'Context Efficiency', + points: 3, + scopes: ['repo', 'skills'], + path: 'skills/strategic-compact/SKILL.md', + description: 'Strategic compaction guidance is present', + pass: fileExists('skills/strategic-compact/SKILL.md'), + fix: 'Add strategic context compaction guidance at skills/strategic-compact/SKILL.md.', + }, + { + id: 'context-suggest-compact-hook', + category: 'Context Efficiency', + points: 3, + scopes: ['repo', 'hooks'], + path: 'scripts/hooks/suggest-compact.js', + description: 'Suggest-compact automation hook exists', + pass: fileExists('scripts/hooks/suggest-compact.js'), + fix: 'Implement scripts/hooks/suggest-compact.js for context pressure hints.', + }, + { + id: 'context-model-route', + category: 'Context Efficiency', + points: 2, + scopes: ['repo', 'commands'], + path: 'commands/model-route.md', + description: 'Model routing command exists', + pass: fileExists('commands/model-route.md'), + fix: 'Add model-route command guidance in commands/model-route.md.', + }, + { + id: 'context-token-doc', + category: 'Context Efficiency', + points: 2, + scopes: ['repo'], + path: 'docs/token-optimization.md', + description: 'Token optimization documentation exists', + pass: fileExists('docs/token-optimization.md'), + fix: 'Add docs/token-optimization.md with concrete context-cost controls.', + }, + { + id: 'quality-test-runner', + category: 'Quality Gates', + points: 3, + scopes: ['repo'], + path: 'tests/run-all.js', + description: 'Central test runner exists', + pass: fileExists('tests/run-all.js'), + fix: 'Add tests/run-all.js to enforce complete suite execution.', + }, + { + id: 'quality-ci-validations', + category: 'Quality Gates', + points: 3, + scopes: ['repo'], + path: 'package.json', + description: 'Test script runs validator chain before tests', + pass: typeof packageJson.scripts?.test === 'string' && packageJson.scripts.test.includes('validate-commands.js') && packageJson.scripts.test.includes('tests/run-all.js'), + fix: 'Update package.json test script to run validators plus tests/run-all.js.', + }, + { + id: 'quality-hook-tests', + category: 'Quality Gates', + points: 2, + scopes: ['repo', 'hooks'], + path: 'tests/hooks/hooks.test.js', + description: 'Hook coverage test file exists', + pass: fileExists('tests/hooks/hooks.test.js'), + fix: 'Add tests/hooks/hooks.test.js for hook behavior validation.', + }, + { + id: 'quality-doctor-script', + category: 'Quality Gates', + points: 2, + scopes: ['repo'], + path: 'scripts/doctor.js', + description: 'Installation drift doctor script exists', + pass: fileExists('scripts/doctor.js'), + fix: 'Add scripts/doctor.js for install-state integrity checks.', + }, + { + id: 'memory-hooks-dir', + category: 'Memory Persistence', + points: 4, + scopes: ['repo', 'hooks'], + path: 'hooks/memory-persistence/', + description: 'Memory persistence hooks directory exists', + pass: fileExists('hooks/memory-persistence'), + fix: 'Add hooks/memory-persistence with lifecycle hook definitions.', + }, + { + id: 'memory-session-hooks', + category: 'Memory Persistence', + points: 4, + scopes: ['repo', 'hooks'], + path: 'scripts/hooks/session-start.js', + description: 'Session start/end persistence scripts exist', + pass: fileExists('scripts/hooks/session-start.js') && fileExists('scripts/hooks/session-end.js'), + fix: 'Implement scripts/hooks/session-start.js and scripts/hooks/session-end.js.', + }, + { + id: 'memory-learning-skill', + category: 'Memory Persistence', + points: 2, + scopes: ['repo', 'skills'], + path: 'skills/continuous-learning-v2/SKILL.md', + description: 'Continuous learning v2 skill exists', + pass: fileExists('skills/continuous-learning-v2/SKILL.md'), + fix: 'Add skills/continuous-learning-v2/SKILL.md for memory evolution flow.', + }, + { + id: 'eval-skill', + category: 'Eval Coverage', + points: 4, + scopes: ['repo', 'skills'], + path: 'skills/eval-harness/SKILL.md', + description: 'Eval harness skill exists', + pass: fileExists('skills/eval-harness/SKILL.md'), + fix: 'Add skills/eval-harness/SKILL.md for pass/fail regression evaluation.', + }, + { + id: 'eval-commands', + category: 'Eval Coverage', + points: 4, + scopes: ['repo', 'commands'], + path: 'commands/eval.md', + description: 'Eval and verification commands exist', + pass: fileExists('commands/eval.md') && fileExists('commands/verify.md') && fileExists('commands/checkpoint.md'), + fix: 'Add eval/checkpoint/verify commands to standardize verification loops.', + }, + { + id: 'eval-tests-presence', + category: 'Eval Coverage', + points: 2, + scopes: ['repo'], + path: 'tests/', + description: 'At least 10 test files exist', + pass: countFiles('tests', '.test.js') >= 10, + fix: 'Increase automated test coverage across scripts/hooks/lib.', + }, + { + id: 'security-review-skill', + category: 'Security Guardrails', + points: 3, + scopes: ['repo', 'skills'], + path: 'skills/security-review/SKILL.md', + description: 'Security review skill exists', + pass: fileExists('skills/security-review/SKILL.md'), + fix: 'Add skills/security-review/SKILL.md for security checklist coverage.', + }, + { + id: 'security-agent', + category: 'Security Guardrails', + points: 3, + scopes: ['repo', 'agents'], + path: 'agents/security-reviewer.md', + description: 'Security reviewer agent exists', + pass: fileExists('agents/security-reviewer.md'), + fix: 'Add agents/security-reviewer.md for delegated security audits.', + }, + { + id: 'security-prompt-hook', + category: 'Security Guardrails', + points: 2, + scopes: ['repo', 'hooks'], + path: 'hooks/hooks.json', + description: 'Hooks include prompt submission guardrail event references', + pass: hooksJson.includes('beforeSubmitPrompt') || hooksJson.includes('PreToolUse'), + fix: 'Add prompt/tool preflight security guards in hooks/hooks.json.', + }, + { + id: 'security-scan-command', + category: 'Security Guardrails', + points: 2, + scopes: ['repo', 'commands'], + path: 'commands/security-scan.md', + description: 'Security scan command exists', + pass: fileExists('commands/security-scan.md'), + fix: 'Add commands/security-scan.md with scan and remediation workflow.', + }, + { + id: 'cost-skill', + category: 'Cost Efficiency', + points: 4, + scopes: ['repo', 'skills'], + path: 'skills/cost-aware-llm-pipeline/SKILL.md', + description: 'Cost-aware LLM skill exists', + pass: fileExists('skills/cost-aware-llm-pipeline/SKILL.md'), + fix: 'Add skills/cost-aware-llm-pipeline/SKILL.md for budget-aware routing.', + }, + { + id: 'cost-doc', + category: 'Cost Efficiency', + points: 3, + scopes: ['repo'], + path: 'docs/token-optimization.md', + description: 'Cost optimization documentation exists', + pass: fileExists('docs/token-optimization.md'), + fix: 'Create docs/token-optimization.md with target settings and tradeoffs.', + }, + { + id: 'cost-model-route-command', + category: 'Cost Efficiency', + points: 3, + scopes: ['repo', 'commands'], + path: 'commands/model-route.md', + description: 'Model route command exists for complexity-aware routing', + pass: fileExists('commands/model-route.md'), + fix: 'Add commands/model-route.md and route policies for cheap-default execution.', + }, + ]; +} + +function summarizeCategoryScores(checks) { + const scores = {}; + for (const category of CATEGORIES) { + const inCategory = checks.filter(check => check.category === category); + const max = inCategory.reduce((sum, check) => sum + check.points, 0); + const earned = inCategory + .filter(check => check.pass) + .reduce((sum, check) => sum + check.points, 0); + + const normalized = max === 0 ? 0 : Math.round((earned / max) * 10); + scores[category] = { + score: normalized, + earned, + max, + }; + } + + return scores; +} + +function buildReport(scope) { + const checks = getChecks().filter(check => check.scopes.includes(scope)); + const categoryScores = summarizeCategoryScores(checks); + const maxScore = checks.reduce((sum, check) => sum + check.points, 0); + const overallScore = checks + .filter(check => check.pass) + .reduce((sum, check) => sum + check.points, 0); + + const failedChecks = checks.filter(check => !check.pass); + const topActions = failedChecks + .sort((left, right) => right.points - left.points) + .slice(0, 3) + .map(check => ({ + action: check.fix, + path: check.path, + category: check.category, + points: check.points, + })); + + return { + scope, + deterministic: true, + rubric_version: '2026-03-16', + overall_score: overallScore, + max_score: maxScore, + categories: categoryScores, + checks: checks.map(check => ({ + id: check.id, + category: check.category, + points: check.points, + path: check.path, + description: check.description, + pass: check.pass, + })), + top_actions: topActions, + }; +} + +function printText(report) { + console.log(`Harness Audit (${report.scope}): ${report.overall_score}/${report.max_score}`); + console.log(''); + + for (const category of CATEGORIES) { + const data = report.categories[category]; + if (!data || data.max === 0) { + continue; + } + + console.log(`- ${category}: ${data.score}/10 (${data.earned}/${data.max} pts)`); + } + + const failed = report.checks.filter(check => !check.pass); + console.log(''); + console.log(`Checks: ${report.checks.length} total, ${failed.length} failing`); + + if (failed.length > 0) { + console.log(''); + console.log('Top 3 Actions:'); + report.top_actions.forEach((action, index) => { + console.log(`${index + 1}) [${action.category}] ${action.action} (${action.path})`); + }); + } +} + +function showHelp(exitCode = 0) { + console.log(` +Usage: node scripts/harness-audit.js [scope] [--scope ] [--format ] + +Deterministic harness audit based on explicit file/rule checks. +`); + process.exit(exitCode); +} + +function main() { + try { + const args = parseArgs(process.argv); + + if (args.help) { + showHelp(0); + return; + } + + const report = buildReport(args.scope); + + if (args.format === 'json') { + console.log(JSON.stringify(report, null, 2)); + } else { + printText(report); + } + } catch (error) { + console.error(`Error: ${error.message}`); + process.exit(1); + } +} + +if (require.main === module) { + main(); +} + +module.exports = { + buildReport, + parseArgs, +}; diff --git a/tests/scripts/harness-audit.test.js b/tests/scripts/harness-audit.test.js new file mode 100644 index 00000000..39f40b98 --- /dev/null +++ b/tests/scripts/harness-audit.test.js @@ -0,0 +1,86 @@ +/** + * Tests for scripts/harness-audit.js + */ + +const assert = require('assert'); +const path = require('path'); +const { execFileSync } = require('child_process'); + +const SCRIPT = path.join(__dirname, '..', '..', 'scripts', 'harness-audit.js'); + +function run(args = []) { + const stdout = execFileSync('node', [SCRIPT, ...args], { + cwd: path.join(__dirname, '..', '..'), + encoding: 'utf8', + stdio: ['pipe', 'pipe', 'pipe'], + timeout: 10000, + }); + + return stdout; +} + +function test(name, fn) { + try { + fn(); + console.log(` \u2713 ${name}`); + return true; + } catch (error) { + console.log(` \u2717 ${name}`); + console.log(` Error: ${error.message}`); + return false; + } +} + +function runTests() { + console.log('\n=== Testing harness-audit.js ===\n'); + + let passed = 0; + let failed = 0; + + if (test('json output is deterministic between runs', () => { + const first = run(['repo', '--format', 'json']); + const second = run(['repo', '--format', 'json']); + + assert.strictEqual(first, second); + })) passed++; else failed++; + + if (test('report includes bounded scores and fixed categories', () => { + const parsed = JSON.parse(run(['repo', '--format', 'json'])); + + assert.strictEqual(parsed.deterministic, true); + assert.strictEqual(parsed.rubric_version, '2026-03-16'); + assert.ok(parsed.overall_score >= 0); + assert.ok(parsed.max_score > 0); + assert.ok(parsed.overall_score <= parsed.max_score); + + const categoryNames = Object.keys(parsed.categories); + assert.ok(categoryNames.includes('Tool Coverage')); + assert.ok(categoryNames.includes('Context Efficiency')); + assert.ok(categoryNames.includes('Quality Gates')); + assert.ok(categoryNames.includes('Memory Persistence')); + assert.ok(categoryNames.includes('Eval Coverage')); + assert.ok(categoryNames.includes('Security Guardrails')); + assert.ok(categoryNames.includes('Cost Efficiency')); + })) passed++; else failed++; + + if (test('scope filtering changes max score and check list', () => { + const full = JSON.parse(run(['repo', '--format', 'json'])); + const scoped = JSON.parse(run(['hooks', '--format', 'json'])); + + assert.strictEqual(scoped.scope, 'hooks'); + assert.ok(scoped.max_score < full.max_score); + assert.ok(scoped.checks.length < full.checks.length); + assert.ok(scoped.checks.every(check => check.path.includes('hooks') || check.path.includes('scripts/hooks'))); + })) passed++; else failed++; + + if (test('text format includes summary header', () => { + const output = run(['repo']); + assert.ok(output.includes('Harness Audit (repo):')); + assert.ok(output.includes('Top 3 Actions:') || output.includes('Checks:')); + })) passed++; else failed++; + + console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`); + process.exit(failed > 0 ? 1 : 0); +} + +runTests();