feat: agent compression, inspection logic, governance hooks (#491, #485, #482) (#688)

Implements three roadmap features:

- Agent description compression (#491): New `agent-compress` module with
  catalog/summary/full compression modes and lazy-loading. Reduces ~26k
  token agent descriptions to ~2-3k catalog entries for context efficiency.

- Inspection logic (#485): New `inspection` module that detects recurring
  failure patterns in skill_runs. Groups by skill + normalized failure
  reason, generates structured reports with suggested remediation actions.
  Configurable threshold (default: 3 failures).

- Governance event capture hook (#482): PreToolUse/PostToolUse hook that
  detects secrets, policy violations, approval-required commands, and
  elevated privilege usage. Gated behind ECC_GOVERNANCE_CAPTURE=1 flag.
  Writes to governance_events table via JSON-line stderr output.

59 new tests (16 + 16 + 27), all passing.
This commit is contained in:
Affaan Mustafa
2026-03-20 01:38:13 -07:00
committed by GitHub
parent 28de7cc420
commit 0b0b66c02f
7 changed files with 1563 additions and 0 deletions

View File

@@ -0,0 +1,293 @@
/**
* Tests for agent description compression and lazy loading.
*/
const assert = require('assert');
const fs = require('fs');
const os = require('os');
const path = require('path');
const {
parseFrontmatter,
extractSummary,
loadAgent,
loadAgents,
compressToCatalog,
compressToSummary,
buildAgentCatalog,
lazyLoadAgent,
} = require('../../scripts/lib/agent-compress');
function createTempDir(prefix) {
return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
}
function cleanupTempDir(dirPath) {
fs.rmSync(dirPath, { recursive: true, force: true });
}
function writeAgent(dir, name, content) {
fs.writeFileSync(path.join(dir, `${name}.md`), content, 'utf8');
}
const SAMPLE_AGENT = `---
name: test-agent
description: A test agent for unit testing purposes.
tools: ["Read", "Grep", "Glob"]
model: sonnet
---
You are a test agent that validates compression logic.
## Your Role
- Run unit tests
- Validate compression output
- Ensure correctness
## Process
### 1. Setup
- Prepare test fixtures
- Load agent files
### 2. Validate
Check the output format and content.
`;
const MINIMAL_AGENT = `---
name: minimal
description: Minimal agent.
tools: ["Read"]
model: haiku
---
Short body.
`;
async function test(name, fn) {
try {
await fn();
console.log(` \u2713 ${name}`);
return true;
} catch (error) {
console.log(` \u2717 ${name}`);
console.log(` Error: ${error.message}`);
return false;
}
}
async function runTests() {
console.log('\n=== Testing agent-compress ===\n');
let passed = 0;
let failed = 0;
if (await test('parseFrontmatter extracts YAML frontmatter and body', async () => {
const { frontmatter, body } = parseFrontmatter(SAMPLE_AGENT);
assert.strictEqual(frontmatter.name, 'test-agent');
assert.strictEqual(frontmatter.description, 'A test agent for unit testing purposes.');
assert.deepStrictEqual(frontmatter.tools, ['Read', 'Grep', 'Glob']);
assert.strictEqual(frontmatter.model, 'sonnet');
assert.ok(body.includes('You are a test agent'));
})) passed += 1; else failed += 1;
if (await test('parseFrontmatter handles content without frontmatter', async () => {
const { frontmatter, body } = parseFrontmatter('Just a plain document.');
assert.deepStrictEqual(frontmatter, {});
assert.strictEqual(body, 'Just a plain document.');
})) passed += 1; else failed += 1;
if (await test('extractSummary returns the first paragraph of the body', async () => {
const { body } = parseFrontmatter(SAMPLE_AGENT);
const summary = extractSummary(body);
assert.ok(summary.includes('test agent'));
assert.ok(summary.includes('compression logic'));
})) passed += 1; else failed += 1;
if (await test('extractSummary returns empty string for empty body', async () => {
assert.strictEqual(extractSummary(''), '');
assert.strictEqual(extractSummary('# Just a heading'), '');
})) passed += 1; else failed += 1;
if (await test('loadAgent reads and parses a single agent file', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
writeAgent(tmpDir, 'test-agent', SAMPLE_AGENT);
const agent = loadAgent(path.join(tmpDir, 'test-agent.md'));
assert.strictEqual(agent.name, 'test-agent');
assert.strictEqual(agent.fileName, 'test-agent');
assert.deepStrictEqual(agent.tools, ['Read', 'Grep', 'Glob']);
assert.strictEqual(agent.model, 'sonnet');
assert.ok(agent.byteSize > 0);
assert.ok(agent.body.includes('You are a test agent'));
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('loadAgents reads all .md files from a directory', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
writeAgent(tmpDir, 'agent-a', SAMPLE_AGENT);
writeAgent(tmpDir, 'agent-b', MINIMAL_AGENT);
const agents = loadAgents(tmpDir);
assert.strictEqual(agents.length, 2);
assert.strictEqual(agents[0].fileName, 'agent-a');
assert.strictEqual(agents[1].fileName, 'agent-b');
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('loadAgents returns empty array for non-existent directory', async () => {
const agents = loadAgents('/tmp/nonexistent-ecc-dir-12345');
assert.deepStrictEqual(agents, []);
})) passed += 1; else failed += 1;
if (await test('compressToCatalog strips body and keeps only metadata', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
writeAgent(tmpDir, 'test-agent', SAMPLE_AGENT);
const agent = loadAgent(path.join(tmpDir, 'test-agent.md'));
const catalog = compressToCatalog(agent);
assert.strictEqual(catalog.name, 'test-agent');
assert.strictEqual(catalog.description, 'A test agent for unit testing purposes.');
assert.deepStrictEqual(catalog.tools, ['Read', 'Grep', 'Glob']);
assert.strictEqual(catalog.model, 'sonnet');
assert.strictEqual(catalog.body, undefined);
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('compressToSummary includes first paragraph summary', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
writeAgent(tmpDir, 'test-agent', SAMPLE_AGENT);
const agent = loadAgent(path.join(tmpDir, 'test-agent.md'));
const summary = compressToSummary(agent);
assert.strictEqual(summary.name, 'test-agent');
assert.ok(summary.summary.length > 0);
assert.strictEqual(summary.body, undefined);
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('buildAgentCatalog in catalog mode produces minimal output with stats', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
writeAgent(tmpDir, 'agent-a', SAMPLE_AGENT);
writeAgent(tmpDir, 'agent-b', MINIMAL_AGENT);
const result = buildAgentCatalog(tmpDir, { mode: 'catalog' });
assert.strictEqual(result.agents.length, 2);
assert.strictEqual(result.stats.totalAgents, 2);
assert.strictEqual(result.stats.mode, 'catalog');
assert.ok(result.stats.originalBytes > 0);
assert.ok(result.stats.compressedBytes > 0);
assert.ok(result.stats.compressedBytes < result.stats.originalBytes);
assert.ok(result.stats.compressedTokenEstimate > 0);
// Catalog entries should not have body
for (const agent of result.agents) {
assert.strictEqual(agent.body, undefined);
assert.ok(agent.name);
assert.ok(agent.description);
}
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('buildAgentCatalog in summary mode includes summaries', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
writeAgent(tmpDir, 'agent-a', SAMPLE_AGENT);
const result = buildAgentCatalog(tmpDir, { mode: 'summary' });
assert.strictEqual(result.agents.length, 1);
assert.ok(result.agents[0].summary);
assert.strictEqual(result.agents[0].body, undefined);
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('buildAgentCatalog in full mode preserves body', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
writeAgent(tmpDir, 'agent-a', SAMPLE_AGENT);
const result = buildAgentCatalog(tmpDir, { mode: 'full' });
assert.strictEqual(result.agents.length, 1);
assert.ok(result.agents[0].body.includes('You are a test agent'));
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('buildAgentCatalog supports filter function', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
writeAgent(tmpDir, 'agent-a', SAMPLE_AGENT);
writeAgent(tmpDir, 'agent-b', MINIMAL_AGENT);
const result = buildAgentCatalog(tmpDir, {
mode: 'catalog',
filter: agent => agent.model === 'haiku',
});
assert.strictEqual(result.agents.length, 1);
assert.strictEqual(result.agents[0].name, 'minimal');
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('lazyLoadAgent loads a single agent by name', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
writeAgent(tmpDir, 'test-agent', SAMPLE_AGENT);
writeAgent(tmpDir, 'other', MINIMAL_AGENT);
const agent = lazyLoadAgent(tmpDir, 'test-agent');
assert.ok(agent);
assert.strictEqual(agent.name, 'test-agent');
assert.ok(agent.body.includes('You are a test agent'));
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('lazyLoadAgent returns null for non-existent agent', async () => {
const tmpDir = createTempDir('ecc-agent-compress-');
try {
const agent = lazyLoadAgent(tmpDir, 'nonexistent');
assert.strictEqual(agent, null);
} finally {
cleanupTempDir(tmpDir);
}
})) passed += 1; else failed += 1;
if (await test('buildAgentCatalog works with real agents directory', async () => {
const agentsDir = path.join(__dirname, '..', '..', 'agents');
if (!fs.existsSync(agentsDir)) {
// Skip if agents dir doesn't exist (shouldn't happen in this repo)
return;
}
const result = buildAgentCatalog(agentsDir, { mode: 'catalog' });
assert.ok(result.agents.length > 0, 'Should find at least one agent');
assert.ok(result.stats.originalBytes > 0);
assert.ok(result.stats.compressedBytes < result.stats.originalBytes,
'Catalog mode should be smaller than full agent files');
})) passed += 1; else failed += 1;
console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`);
process.exit(failed > 0 ? 1 : 0);
}
runTests();

View File

@@ -0,0 +1,232 @@
/**
* Tests for inspection logic — pattern detection from failures.
*/
const assert = require('assert');
const {
normalizeFailureReason,
groupFailures,
detectPatterns,
generateReport,
suggestAction,
DEFAULT_FAILURE_THRESHOLD,
} = require('../../scripts/lib/inspection');
async function test(name, fn) {
try {
await fn();
console.log(` \u2713 ${name}`);
return true;
} catch (error) {
console.log(` \u2717 ${name}`);
console.log(` Error: ${error.message}`);
return false;
}
}
function makeSkillRun(overrides = {}) {
return {
id: overrides.id || `run-${Math.random().toString(36).slice(2, 8)}`,
skillId: overrides.skillId || 'test-skill',
skillVersion: overrides.skillVersion || '1.0.0',
sessionId: overrides.sessionId || 'session-1',
taskDescription: overrides.taskDescription || 'test task',
outcome: overrides.outcome || 'failure',
failureReason: overrides.failureReason || 'generic error',
tokensUsed: overrides.tokensUsed || 500,
durationMs: overrides.durationMs || 1000,
userFeedback: overrides.userFeedback || null,
createdAt: overrides.createdAt || '2026-03-15T08:00:00.000Z',
};
}
async function runTests() {
console.log('\n=== Testing inspection ===\n');
let passed = 0;
let failed = 0;
if (await test('normalizeFailureReason strips timestamps and UUIDs', async () => {
const normalized = normalizeFailureReason(
'Error at 2026-03-15T08:00:00.000Z for id 550e8400-e29b-41d4-a716-446655440000'
);
assert.ok(!normalized.includes('2026'));
assert.ok(!normalized.includes('550e8400'));
assert.ok(normalized.includes('<timestamp>'));
assert.ok(normalized.includes('<uuid>'));
})) passed += 1; else failed += 1;
if (await test('normalizeFailureReason strips file paths', async () => {
const normalized = normalizeFailureReason('File not found: /usr/local/bin/node');
assert.ok(!normalized.includes('/usr/local'));
assert.ok(normalized.includes('<path>'));
})) passed += 1; else failed += 1;
if (await test('normalizeFailureReason handles null and empty values', async () => {
assert.strictEqual(normalizeFailureReason(null), 'unknown');
assert.strictEqual(normalizeFailureReason(''), 'unknown');
assert.strictEqual(normalizeFailureReason(undefined), 'unknown');
})) passed += 1; else failed += 1;
if (await test('groupFailures groups by skillId and normalized reason', async () => {
const runs = [
makeSkillRun({ id: 'r1', skillId: 'skill-a', failureReason: 'timeout' }),
makeSkillRun({ id: 'r2', skillId: 'skill-a', failureReason: 'timeout' }),
makeSkillRun({ id: 'r3', skillId: 'skill-b', failureReason: 'parse error' }),
makeSkillRun({ id: 'r4', skillId: 'skill-a', outcome: 'success' }), // should be excluded
];
const groups = groupFailures(runs);
assert.strictEqual(groups.size, 2);
const skillAGroup = groups.get('skill-a::timeout');
assert.ok(skillAGroup);
assert.strictEqual(skillAGroup.runs.length, 2);
const skillBGroup = groups.get('skill-b::parse error');
assert.ok(skillBGroup);
assert.strictEqual(skillBGroup.runs.length, 1);
})) passed += 1; else failed += 1;
if (await test('groupFailures handles mixed outcome casing', async () => {
const runs = [
makeSkillRun({ id: 'r1', outcome: 'FAILURE', failureReason: 'timeout' }),
makeSkillRun({ id: 'r2', outcome: 'Failed', failureReason: 'timeout' }),
makeSkillRun({ id: 'r3', outcome: 'error', failureReason: 'timeout' }),
];
const groups = groupFailures(runs);
assert.strictEqual(groups.size, 1);
const group = groups.values().next().value;
assert.strictEqual(group.runs.length, 3);
})) passed += 1; else failed += 1;
if (await test('detectPatterns returns empty array when below threshold', async () => {
const runs = [
makeSkillRun({ id: 'r1', failureReason: 'timeout' }),
makeSkillRun({ id: 'r2', failureReason: 'timeout' }),
];
const patterns = detectPatterns(runs, { threshold: 3 });
assert.strictEqual(patterns.length, 0);
})) passed += 1; else failed += 1;
if (await test('detectPatterns detects patterns at or above threshold', async () => {
const runs = [
makeSkillRun({ id: 'r1', failureReason: 'timeout', createdAt: '2026-03-15T08:00:00Z' }),
makeSkillRun({ id: 'r2', failureReason: 'timeout', createdAt: '2026-03-15T08:01:00Z' }),
makeSkillRun({ id: 'r3', failureReason: 'timeout', createdAt: '2026-03-15T08:02:00Z' }),
];
const patterns = detectPatterns(runs, { threshold: 3 });
assert.strictEqual(patterns.length, 1);
assert.strictEqual(patterns[0].count, 3);
assert.strictEqual(patterns[0].skillId, 'test-skill');
assert.strictEqual(patterns[0].normalizedReason, 'timeout');
assert.strictEqual(patterns[0].firstSeen, '2026-03-15T08:00:00Z');
assert.strictEqual(patterns[0].lastSeen, '2026-03-15T08:02:00Z');
assert.strictEqual(patterns[0].runIds.length, 3);
})) passed += 1; else failed += 1;
if (await test('detectPatterns uses default threshold', async () => {
const runs = Array.from({ length: DEFAULT_FAILURE_THRESHOLD }, (_, i) =>
makeSkillRun({ id: `r${i}`, failureReason: 'permission denied' })
);
const patterns = detectPatterns(runs);
assert.strictEqual(patterns.length, 1);
})) passed += 1; else failed += 1;
if (await test('detectPatterns sorts by count descending', async () => {
const runs = [
// 4 timeouts
...Array.from({ length: 4 }, (_, i) =>
makeSkillRun({ id: `t${i}`, skillId: 'skill-a', failureReason: 'timeout' })
),
// 3 parse errors
...Array.from({ length: 3 }, (_, i) =>
makeSkillRun({ id: `p${i}`, skillId: 'skill-b', failureReason: 'parse error' })
),
];
const patterns = detectPatterns(runs, { threshold: 3 });
assert.strictEqual(patterns.length, 2);
assert.strictEqual(patterns[0].count, 4);
assert.strictEqual(patterns[0].skillId, 'skill-a');
assert.strictEqual(patterns[1].count, 3);
assert.strictEqual(patterns[1].skillId, 'skill-b');
})) passed += 1; else failed += 1;
if (await test('detectPatterns groups similar failure reasons with different timestamps', async () => {
const runs = [
makeSkillRun({ id: 'r1', failureReason: 'Error at 2026-03-15T08:00:00Z in /tmp/foo' }),
makeSkillRun({ id: 'r2', failureReason: 'Error at 2026-03-15T09:00:00Z in /tmp/bar' }),
makeSkillRun({ id: 'r3', failureReason: 'Error at 2026-03-15T10:00:00Z in /tmp/baz' }),
];
const patterns = detectPatterns(runs, { threshold: 3 });
assert.strictEqual(patterns.length, 1);
assert.ok(patterns[0].normalizedReason.includes('<timestamp>'));
assert.ok(patterns[0].normalizedReason.includes('<path>'));
})) passed += 1; else failed += 1;
if (await test('detectPatterns tracks unique session IDs and versions', async () => {
const runs = [
makeSkillRun({ id: 'r1', sessionId: 'sess-1', skillVersion: '1.0.0', failureReason: 'err' }),
makeSkillRun({ id: 'r2', sessionId: 'sess-2', skillVersion: '1.0.0', failureReason: 'err' }),
makeSkillRun({ id: 'r3', sessionId: 'sess-1', skillVersion: '1.1.0', failureReason: 'err' }),
];
const patterns = detectPatterns(runs, { threshold: 3 });
assert.strictEqual(patterns.length, 1);
assert.deepStrictEqual(patterns[0].sessionIds.sort(), ['sess-1', 'sess-2']);
assert.deepStrictEqual(patterns[0].versions.sort(), ['1.0.0', '1.1.0']);
})) passed += 1; else failed += 1;
if (await test('generateReport returns clean status with no patterns', async () => {
const report = generateReport([]);
assert.strictEqual(report.status, 'clean');
assert.strictEqual(report.patternCount, 0);
assert.ok(report.summary.includes('No recurring'));
assert.ok(report.generatedAt);
})) passed += 1; else failed += 1;
if (await test('generateReport produces structured report from patterns', async () => {
const runs = [
...Array.from({ length: 3 }, (_, i) =>
makeSkillRun({ id: `r${i}`, skillId: 'my-skill', failureReason: 'timeout' })
),
];
const patterns = detectPatterns(runs, { threshold: 3 });
const report = generateReport(patterns, { generatedAt: '2026-03-15T09:00:00Z' });
assert.strictEqual(report.status, 'attention_needed');
assert.strictEqual(report.patternCount, 1);
assert.strictEqual(report.totalFailures, 3);
assert.deepStrictEqual(report.affectedSkills, ['my-skill']);
assert.strictEqual(report.patterns[0].skillId, 'my-skill');
assert.ok(report.patterns[0].suggestedAction);
assert.strictEqual(report.generatedAt, '2026-03-15T09:00:00Z');
})) passed += 1; else failed += 1;
if (await test('suggestAction returns timeout-specific advice', async () => {
const action = suggestAction({ normalizedReason: 'timeout after 30s', versions: ['1.0.0'] });
assert.ok(action.toLowerCase().includes('timeout'));
})) passed += 1; else failed += 1;
if (await test('suggestAction returns permission-specific advice', async () => {
const action = suggestAction({ normalizedReason: 'permission denied', versions: ['1.0.0'] });
assert.ok(action.toLowerCase().includes('permission'));
})) passed += 1; else failed += 1;
if (await test('suggestAction returns version-span advice when multiple versions affected', async () => {
const action = suggestAction({ normalizedReason: 'something broke', versions: ['1.0.0', '1.1.0'] });
assert.ok(action.toLowerCase().includes('version'));
})) passed += 1; else failed += 1;
console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`);
process.exit(failed > 0 ? 1 : 0);
}
runTests();