mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-03-30 13:43:26 +08:00
- Add skill health observation layer (execution logging, success/failure tracking) - Add skill health inspector (trace recurring failures across runs) - Add amendify mechanism (propose SKILL.md patches from failure evidence) - Add evaluation scaffolding (compare amended vs original performance) - Wire into session-inspect CLI: skills:health, skills:amendify, skills:evaluate - 1145/1145 tests passing (+3 new)
187 lines
6.2 KiB
JavaScript
187 lines
6.2 KiB
JavaScript
'use strict';
|
|
|
|
const assert = require('assert');
|
|
const fs = require('fs');
|
|
const os = require('os');
|
|
const path = require('path');
|
|
|
|
const {
|
|
appendSkillObservation,
|
|
createSkillObservation,
|
|
getSkillObservationsPath,
|
|
readSkillObservations
|
|
} = require('../../scripts/lib/skill-improvement/observations');
|
|
const { buildSkillHealthReport } = require('../../scripts/lib/skill-improvement/health');
|
|
const { proposeSkillAmendment } = require('../../scripts/lib/skill-improvement/amendify');
|
|
const { buildSkillEvaluationScaffold } = require('../../scripts/lib/skill-improvement/evaluate');
|
|
|
|
console.log('=== Testing skill-improvement ===\n');
|
|
|
|
let passed = 0;
|
|
let failed = 0;
|
|
|
|
function test(name, fn) {
|
|
try {
|
|
fn();
|
|
console.log(` ✓ ${name}`);
|
|
passed += 1;
|
|
} catch (error) {
|
|
console.log(` ✗ ${name}: ${error.message}`);
|
|
failed += 1;
|
|
}
|
|
}
|
|
|
|
function makeProjectRoot(prefix) {
|
|
return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
|
|
}
|
|
|
|
function cleanup(dirPath) {
|
|
fs.rmSync(dirPath, { recursive: true, force: true });
|
|
}
|
|
|
|
test('observation layer writes and reads structured skill outcomes', () => {
|
|
const projectRoot = makeProjectRoot('ecc-skill-observe-');
|
|
|
|
try {
|
|
const observation = createSkillObservation({
|
|
task: 'Fix flaky Playwright test',
|
|
skill: {
|
|
id: 'e2e-testing',
|
|
path: 'skills/e2e-testing/SKILL.md'
|
|
},
|
|
success: false,
|
|
error: 'playwright timeout',
|
|
feedback: 'Timed out waiting for locator',
|
|
sessionId: 'sess-1234'
|
|
});
|
|
|
|
appendSkillObservation(observation, { projectRoot });
|
|
const records = readSkillObservations({ projectRoot });
|
|
|
|
assert.strictEqual(records.length, 1);
|
|
assert.strictEqual(records[0].schemaVersion, 'ecc.skill-observation.v1');
|
|
assert.strictEqual(records[0].task, 'Fix flaky Playwright test');
|
|
assert.strictEqual(records[0].skill.id, 'e2e-testing');
|
|
assert.strictEqual(records[0].outcome.success, false);
|
|
assert.strictEqual(records[0].outcome.error, 'playwright timeout');
|
|
assert.strictEqual(getSkillObservationsPath({ projectRoot }), path.join(projectRoot, '.claude', 'ecc', 'skills', 'observations.jsonl'));
|
|
} finally {
|
|
cleanup(projectRoot);
|
|
}
|
|
});
|
|
|
|
test('health inspector traces recurring failures for a skill across runs', () => {
|
|
const projectRoot = makeProjectRoot('ecc-skill-health-');
|
|
|
|
try {
|
|
[
|
|
createSkillObservation({
|
|
task: 'Ship Next.js auth middleware',
|
|
skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' },
|
|
success: false,
|
|
error: 'missing csrf guidance',
|
|
feedback: 'Did not mention CSRF'
|
|
}),
|
|
createSkillObservation({
|
|
task: 'Harden Next.js auth middleware',
|
|
skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' },
|
|
success: false,
|
|
error: 'missing csrf guidance',
|
|
feedback: 'Repeated omission'
|
|
}),
|
|
createSkillObservation({
|
|
task: 'Review payment webhook security',
|
|
skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' },
|
|
success: true
|
|
})
|
|
].forEach(record => appendSkillObservation(record, { projectRoot }));
|
|
|
|
const report = buildSkillHealthReport(readSkillObservations({ projectRoot }), {
|
|
minFailureCount: 2
|
|
});
|
|
const skill = report.skills.find(entry => entry.skill.id === 'security-review');
|
|
|
|
assert.ok(skill, 'security-review should appear in the report');
|
|
assert.strictEqual(skill.totalRuns, 3);
|
|
assert.strictEqual(skill.failures, 2);
|
|
assert.strictEqual(skill.status, 'failing');
|
|
assert.strictEqual(skill.recurringErrors[0].error, 'missing csrf guidance');
|
|
assert.strictEqual(skill.recurringErrors[0].count, 2);
|
|
} finally {
|
|
cleanup(projectRoot);
|
|
}
|
|
});
|
|
|
|
test('amendify proposes SKILL.md patch content from failure evidence', () => {
|
|
const records = [
|
|
createSkillObservation({
|
|
task: 'Add API rate limiting',
|
|
skill: { id: 'api-design', path: 'skills/api-design/SKILL.md' },
|
|
success: false,
|
|
error: 'missing rate limiting guidance',
|
|
feedback: 'No rate-limit section'
|
|
}),
|
|
createSkillObservation({
|
|
task: 'Design public API error envelopes',
|
|
skill: { id: 'api-design', path: 'skills/api-design/SKILL.md' },
|
|
success: false,
|
|
error: 'missing error response examples',
|
|
feedback: 'Need explicit examples'
|
|
})
|
|
];
|
|
|
|
const proposal = proposeSkillAmendment('api-design', records);
|
|
|
|
assert.strictEqual(proposal.schemaVersion, 'ecc.skill-amendment-proposal.v1');
|
|
assert.strictEqual(proposal.skill.id, 'api-design');
|
|
assert.strictEqual(proposal.status, 'proposed');
|
|
assert.ok(proposal.patch.preview.includes('## Failure-Driven Amendments'));
|
|
assert.ok(proposal.patch.preview.includes('rate limiting'));
|
|
assert.ok(proposal.patch.preview.includes('error response'));
|
|
});
|
|
|
|
test('evaluation scaffold compares amended and baseline performance', () => {
|
|
const records = [
|
|
createSkillObservation({
|
|
task: 'Fix flaky login test',
|
|
skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
|
|
success: false,
|
|
variant: 'baseline'
|
|
}),
|
|
createSkillObservation({
|
|
task: 'Fix flaky checkout test',
|
|
skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
|
|
success: true,
|
|
variant: 'baseline'
|
|
}),
|
|
createSkillObservation({
|
|
task: 'Fix flaky login test',
|
|
skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
|
|
success: true,
|
|
variant: 'amended',
|
|
amendmentId: 'amend-1'
|
|
}),
|
|
createSkillObservation({
|
|
task: 'Fix flaky checkout test',
|
|
skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
|
|
success: true,
|
|
variant: 'amended',
|
|
amendmentId: 'amend-1'
|
|
})
|
|
];
|
|
|
|
const evaluation = buildSkillEvaluationScaffold('e2e-testing', records, {
|
|
amendmentId: 'amend-1',
|
|
minimumRunsPerVariant: 2
|
|
});
|
|
|
|
assert.strictEqual(evaluation.schemaVersion, 'ecc.skill-evaluation.v1');
|
|
assert.strictEqual(evaluation.baseline.runs, 2);
|
|
assert.strictEqual(evaluation.amended.runs, 2);
|
|
assert.strictEqual(evaluation.delta.successRate, 0.5);
|
|
assert.strictEqual(evaluation.recommendation, 'promote-amendment');
|
|
});
|
|
|
|
console.log(`\n=== Results: ${passed} passed, ${failed} failed ===`);
|
|
if (failed > 0) process.exit(1);
|