From c53bba9e02a616cb446755d012afc692b89296c1 Mon Sep 17 00:00:00 2001 From: Affaan Mustafa Date: Sat, 14 Mar 2026 23:21:18 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20self-improving=20skills=20loop=20?= =?UTF-8?q?=E2=80=94=20observe,=20inspect,=20amend,=20evaluate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add skill health observation layer (execution logging, success/failure tracking) - Add skill health inspector (trace recurring failures across runs) - Add amendify mechanism (propose SKILL.md patches from failure evidence) - Add evaluation scaffolding (compare amended vs original performance) - Wire into session-inspect CLI: skills:health, skills:amendify, skills:evaluate - 1145/1145 tests passing (+3 new) --- scripts/lib/skill-improvement/amendify.js | 89 +++++++++ scripts/lib/skill-improvement/evaluate.js | 59 ++++++ scripts/lib/skill-improvement/health.js | 118 +++++++++++ scripts/lib/skill-improvement/observations.js | 108 ++++++++++ scripts/session-inspect.js | 72 ++++++- tests/lib/skill-improvement.test.js | 186 ++++++++++++++++++ tests/scripts/session-inspect.test.js | 127 ++++++++++++ 7 files changed, 753 insertions(+), 6 deletions(-) create mode 100644 scripts/lib/skill-improvement/amendify.js create mode 100644 scripts/lib/skill-improvement/evaluate.js create mode 100644 scripts/lib/skill-improvement/health.js create mode 100644 scripts/lib/skill-improvement/observations.js create mode 100644 tests/lib/skill-improvement.test.js diff --git a/scripts/lib/skill-improvement/amendify.js b/scripts/lib/skill-improvement/amendify.js new file mode 100644 index 00000000..c95f0e91 --- /dev/null +++ b/scripts/lib/skill-improvement/amendify.js @@ -0,0 +1,89 @@ +'use strict'; + +const { buildSkillHealthReport } = require('./health'); + +const AMENDMENT_SCHEMA_VERSION = 'ecc.skill-amendment-proposal.v1'; + +function createProposalId(skillId) { + return `amend-${skillId}-${Date.now()}`; +} + +function summarizePatchPreview(skillId, health) { + const lines = [ + '## Failure-Driven Amendments', + '', + `- Focus skill routing for \`${skillId}\` when tasks match the proven success cases.`, + ]; + + if (health.recurringErrors[0]) { + lines.push(`- Add explicit guardrails for recurring failure: ${health.recurringErrors[0].error}.`); + } + + if (health.recurringTasks[0]) { + lines.push(`- Add an example workflow for task pattern: ${health.recurringTasks[0].task}.`); + } + + if (health.recurringFeedback[0]) { + lines.push(`- Address repeated user feedback: ${health.recurringFeedback[0].feedback}.`); + } + + lines.push('- Add a verification checklist before declaring the skill output complete.'); + return lines.join('\n'); +} + +function proposeSkillAmendment(skillId, records, options = {}) { + const report = buildSkillHealthReport(records, { + ...options, + skillId, + minFailureCount: options.minFailureCount || 1 + }); + const [health] = report.skills; + + if (!health || health.failures === 0) { + return { + schemaVersion: AMENDMENT_SCHEMA_VERSION, + skill: { + id: skillId, + path: null + }, + status: 'insufficient-evidence', + rationale: ['No failed observations were available for this skill.'], + patch: null + }; + } + + const preview = summarizePatchPreview(skillId, health); + + return { + schemaVersion: AMENDMENT_SCHEMA_VERSION, + proposalId: createProposalId(skillId), + generatedAt: new Date().toISOString(), + status: 'proposed', + skill: { + id: skillId, + path: health.skill.path || null + }, + evidence: { + totalRuns: health.totalRuns, + failures: health.failures, + successRate: health.successRate, + recurringErrors: health.recurringErrors, + recurringTasks: health.recurringTasks, + recurringFeedback: health.recurringFeedback + }, + rationale: [ + 'Proposals are generated from repeated failed runs rather than a single anecdotal error.', + 'The suggested patch is additive so the original SKILL.md intent remains auditable.' + ], + patch: { + format: 'markdown-fragment', + targetPath: health.skill.path || `skills/${skillId}/SKILL.md`, + preview + } + }; +} + +module.exports = { + AMENDMENT_SCHEMA_VERSION, + proposeSkillAmendment +}; diff --git a/scripts/lib/skill-improvement/evaluate.js b/scripts/lib/skill-improvement/evaluate.js new file mode 100644 index 00000000..f45023d5 --- /dev/null +++ b/scripts/lib/skill-improvement/evaluate.js @@ -0,0 +1,59 @@ +'use strict'; + +const EVALUATION_SCHEMA_VERSION = 'ecc.skill-evaluation.v1'; + +function roundRate(value) { + return Math.round(value * 1000) / 1000; +} + +function summarize(records) { + const runs = records.length; + const successes = records.filter(record => record.outcome && record.outcome.success).length; + const failures = runs - successes; + return { + runs, + successes, + failures, + successRate: runs > 0 ? roundRate(successes / runs) : 0 + }; +} + +function buildSkillEvaluationScaffold(skillId, records, options = {}) { + const minimumRunsPerVariant = options.minimumRunsPerVariant || 2; + const amendmentId = options.amendmentId || null; + const filtered = records.filter(record => record.skill && record.skill.id === skillId); + const baseline = filtered.filter(record => !record.run || record.run.variant !== 'amended'); + const amended = filtered.filter(record => record.run && record.run.variant === 'amended') + .filter(record => !amendmentId || record.run.amendmentId === amendmentId); + + const baselineSummary = summarize(baseline); + const amendedSummary = summarize(amended); + const delta = { + successRate: roundRate(amendedSummary.successRate - baselineSummary.successRate), + failures: amendedSummary.failures - baselineSummary.failures + }; + + let recommendation = 'insufficient-data'; + if (baselineSummary.runs >= minimumRunsPerVariant && amendedSummary.runs >= minimumRunsPerVariant) { + recommendation = delta.successRate > 0 ? 'promote-amendment' : 'keep-baseline'; + } + + return { + schemaVersion: EVALUATION_SCHEMA_VERSION, + generatedAt: new Date().toISOString(), + skillId, + amendmentId, + gate: { + minimumRunsPerVariant + }, + baseline: baselineSummary, + amended: amendedSummary, + delta, + recommendation + }; +} + +module.exports = { + EVALUATION_SCHEMA_VERSION, + buildSkillEvaluationScaffold +}; diff --git a/scripts/lib/skill-improvement/health.js b/scripts/lib/skill-improvement/health.js new file mode 100644 index 00000000..8aba5986 --- /dev/null +++ b/scripts/lib/skill-improvement/health.js @@ -0,0 +1,118 @@ +'use strict'; + +const HEALTH_SCHEMA_VERSION = 'ecc.skill-health.v1'; + +function roundRate(value) { + return Math.round(value * 1000) / 1000; +} + +function rankCounts(values) { + return Array.from(values.entries()) + .map(([value, count]) => ({ value, count })) + .sort((left, right) => right.count - left.count || left.value.localeCompare(right.value)); +} + +function summarizeVariantRuns(records) { + return records.reduce((accumulator, record) => { + const key = record.run && record.run.variant ? record.run.variant : 'baseline'; + if (!accumulator[key]) { + accumulator[key] = { runs: 0, successes: 0, failures: 0 }; + } + + accumulator[key].runs += 1; + if (record.outcome && record.outcome.success) { + accumulator[key].successes += 1; + } else { + accumulator[key].failures += 1; + } + + return accumulator; + }, {}); +} + +function deriveSkillStatus(skillSummary, options = {}) { + const minFailureCount = options.minFailureCount || 2; + if (skillSummary.failures >= minFailureCount) { + return 'failing'; + } + + if (skillSummary.failures > 0) { + return 'watch'; + } + + return 'healthy'; +} + +function buildSkillHealthReport(records, options = {}) { + const filterSkillId = options.skillId || null; + const filtered = filterSkillId + ? records.filter(record => record.skill && record.skill.id === filterSkillId) + : records.slice(); + + const grouped = filtered.reduce((accumulator, record) => { + const skillId = record.skill.id; + if (!accumulator.has(skillId)) { + accumulator.set(skillId, []); + } + accumulator.get(skillId).push(record); + return accumulator; + }, new Map()); + + const skills = Array.from(grouped.entries()) + .map(([skillId, skillRecords]) => { + const successes = skillRecords.filter(record => record.outcome && record.outcome.success).length; + const failures = skillRecords.length - successes; + const recurringErrors = new Map(); + const recurringTasks = new Map(); + const recurringFeedback = new Map(); + + skillRecords.forEach(record => { + if (!record.outcome || record.outcome.success) { + return; + } + + if (record.outcome.error) { + recurringErrors.set(record.outcome.error, (recurringErrors.get(record.outcome.error) || 0) + 1); + } + if (record.task) { + recurringTasks.set(record.task, (recurringTasks.get(record.task) || 0) + 1); + } + if (record.outcome.feedback) { + recurringFeedback.set(record.outcome.feedback, (recurringFeedback.get(record.outcome.feedback) || 0) + 1); + } + }); + + const summary = { + skill: { + id: skillId, + path: skillRecords[0].skill.path || null + }, + totalRuns: skillRecords.length, + successes, + failures, + successRate: skillRecords.length > 0 ? roundRate(successes / skillRecords.length) : 0, + status: 'healthy', + recurringErrors: rankCounts(recurringErrors).map(entry => ({ error: entry.value, count: entry.count })), + recurringTasks: rankCounts(recurringTasks).map(entry => ({ task: entry.value, count: entry.count })), + recurringFeedback: rankCounts(recurringFeedback).map(entry => ({ feedback: entry.value, count: entry.count })), + variants: summarizeVariantRuns(skillRecords) + }; + + summary.status = deriveSkillStatus(summary, options); + return summary; + }) + .sort((left, right) => right.failures - left.failures || left.skill.id.localeCompare(right.skill.id)); + + return { + schemaVersion: HEALTH_SCHEMA_VERSION, + generatedAt: new Date().toISOString(), + totalObservations: filtered.length, + skillCount: skills.length, + skills + }; +} + +module.exports = { + HEALTH_SCHEMA_VERSION, + buildSkillHealthReport +}; diff --git a/scripts/lib/skill-improvement/observations.js b/scripts/lib/skill-improvement/observations.js new file mode 100644 index 00000000..6c37e5ae --- /dev/null +++ b/scripts/lib/skill-improvement/observations.js @@ -0,0 +1,108 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); + +const OBSERVATION_SCHEMA_VERSION = 'ecc.skill-observation.v1'; + +function resolveProjectRoot(options = {}) { + return path.resolve(options.projectRoot || options.cwd || process.cwd()); +} + +function getSkillTelemetryRoot(options = {}) { + return path.join(resolveProjectRoot(options), '.claude', 'ecc', 'skills'); +} + +function getSkillObservationsPath(options = {}) { + return path.join(getSkillTelemetryRoot(options), 'observations.jsonl'); +} + +function ensureString(value, label) { + if (typeof value !== 'string' || value.trim().length === 0) { + throw new Error(`${label} must be a non-empty string`); + } + + return value.trim(); +} + +function createObservationId() { + return `obs-${Date.now()}-${process.pid}-${Math.random().toString(16).slice(2, 8)}`; +} + +function createSkillObservation(input) { + const task = ensureString(input.task, 'task'); + const skillId = ensureString(input.skill && input.skill.id, 'skill.id'); + const skillPath = typeof input.skill.path === 'string' && input.skill.path.trim().length > 0 + ? input.skill.path.trim() + : null; + const success = Boolean(input.success); + const error = input.error == null ? null : String(input.error); + const feedback = input.feedback == null ? null : String(input.feedback); + const variant = typeof input.variant === 'string' && input.variant.trim().length > 0 + ? input.variant.trim() + : 'baseline'; + + return { + schemaVersion: OBSERVATION_SCHEMA_VERSION, + observationId: typeof input.observationId === 'string' && input.observationId.length > 0 + ? input.observationId + : createObservationId(), + timestamp: typeof input.timestamp === 'string' && input.timestamp.length > 0 + ? input.timestamp + : new Date().toISOString(), + task, + skill: { + id: skillId, + path: skillPath + }, + outcome: { + success, + status: success ? 'success' : 'failure', + error, + feedback + }, + run: { + variant, + amendmentId: input.amendmentId || null, + sessionId: input.sessionId || null, + source: input.source || 'manual' + } + }; +} + +function appendSkillObservation(observation, options = {}) { + const outputPath = getSkillObservationsPath(options); + fs.mkdirSync(path.dirname(outputPath), { recursive: true }); + fs.appendFileSync(outputPath, `${JSON.stringify(observation)}${os.EOL}`, 'utf8'); + return outputPath; +} + +function readSkillObservations(options = {}) { + const observationPath = path.resolve(options.observationsPath || getSkillObservationsPath(options)); + if (!fs.existsSync(observationPath)) { + return []; + } + + return fs.readFileSync(observationPath, 'utf8') + .split(/\r?\n/) + .filter(Boolean) + .map(line => { + try { + return JSON.parse(line); + } catch { + return null; + } + }) + .filter(record => record && record.schemaVersion === OBSERVATION_SCHEMA_VERSION); +} + +module.exports = { + OBSERVATION_SCHEMA_VERSION, + appendSkillObservation, + createSkillObservation, + getSkillObservationsPath, + getSkillTelemetryRoot, + readSkillObservations, + resolveProjectRoot +}; diff --git a/scripts/session-inspect.js b/scripts/session-inspect.js index c0e7684f..e166c8ef 100644 --- a/scripts/session-inspect.js +++ b/scripts/session-inspect.js @@ -5,6 +5,10 @@ const fs = require('fs'); const path = require('path'); const { createAdapterRegistry, inspectSessionTarget } = require('./lib/session-adapters/registry'); +const { readSkillObservations } = require('./lib/skill-improvement/observations'); +const { buildSkillHealthReport } = require('./lib/skill-improvement/health'); +const { proposeSkillAmendment } = require('./lib/skill-improvement/amendify'); +const { buildSkillEvaluationScaffold } = require('./lib/skill-improvement/evaluate'); function usage() { console.log([ @@ -18,12 +22,17 @@ function usage() { ' claude:latest Most recent Claude session history entry', ' claude: Specific Claude session or alias', ' Direct path to a Claude session file', + ' skills:health Inspect skill failure/success patterns from observations', + ' skills:amendify Propose a SKILL.md patch from failure evidence', + ' skills:evaluate Compare baseline vs amended skill outcomes', '', 'Examples:', ' node scripts/session-inspect.js .claude/plan/workflow.json', ' node scripts/session-inspect.js workflow-visual-proof', ' node scripts/session-inspect.js claude:latest', ' node scripts/session-inspect.js latest --target-type claude-history', + ' node scripts/session-inspect.js skills:health', + ' node scripts/session-inspect.js skills:amendify --skill api-design', ' node scripts/session-inspect.js claude:a1b2c3d4 --write /tmp/session.json' ].join('\n')); } @@ -39,14 +48,57 @@ function parseArgs(argv) { const targetTypeIndex = args.indexOf('--target-type'); const targetType = targetTypeIndex >= 0 ? args[targetTypeIndex + 1] : null; + const skillIndex = args.indexOf('--skill'); + const skillId = skillIndex >= 0 ? args[skillIndex + 1] : null; + + const amendmentIndex = args.indexOf('--amendment-id'); + const amendmentId = amendmentIndex >= 0 ? args[amendmentIndex + 1] : null; + + const observationsIndex = args.indexOf('--observations'); + const observationsPath = observationsIndex >= 0 ? args[observationsIndex + 1] : null; + const writeIndex = args.indexOf('--write'); const writePath = writeIndex >= 0 ? args[writeIndex + 1] : null; - return { target, adapterId, targetType, writePath, listAdapters }; + return { target, adapterId, targetType, writePath, listAdapters, skillId, amendmentId, observationsPath }; +} + +function inspectSkillLoopTarget(target, options = {}) { + const observations = readSkillObservations({ + cwd: options.cwd, + projectRoot: options.cwd, + observationsPath: options.observationsPath + }); + + if (target === 'skills:health') { + return buildSkillHealthReport(observations, { + skillId: options.skillId || null + }); + } + + if (target === 'skills:amendify') { + if (!options.skillId) { + throw new Error('skills:amendify requires --skill '); + } + + return proposeSkillAmendment(options.skillId, observations); + } + + if (target === 'skills:evaluate') { + if (!options.skillId) { + throw new Error('skills:evaluate requires --skill '); + } + + return buildSkillEvaluationScaffold(options.skillId, observations, { + amendmentId: options.amendmentId || null + }); + } + + return null; } function main() { - const { target, adapterId, targetType, writePath, listAdapters } = parseArgs(process.argv); + const { target, adapterId, targetType, writePath, listAdapters, skillId, amendmentId, observationsPath } = parseArgs(process.argv); if (listAdapters) { const registry = createAdapterRegistry(); @@ -59,12 +111,20 @@ function main() { process.exit(1); } - const inspectTarget = targetType ? { type: targetType, value: target } : target; - const snapshot = inspectSessionTarget(inspectTarget, { + const skillLoopPayload = inspectSkillLoopTarget(target, { cwd: process.cwd(), - adapterId + skillId, + amendmentId, + observationsPath }); - const payload = JSON.stringify(snapshot, null, 2); + const payloadObject = skillLoopPayload || inspectSessionTarget( + targetType ? { type: targetType, value: target } : target, + { + cwd: process.cwd(), + adapterId + } + ); + const payload = JSON.stringify(payloadObject, null, 2); if (writePath) { const absoluteWritePath = path.resolve(writePath); diff --git a/tests/lib/skill-improvement.test.js b/tests/lib/skill-improvement.test.js new file mode 100644 index 00000000..ea80a25f --- /dev/null +++ b/tests/lib/skill-improvement.test.js @@ -0,0 +1,186 @@ +'use strict'; + +const assert = require('assert'); +const fs = require('fs'); +const os = require('os'); +const path = require('path'); + +const { + appendSkillObservation, + createSkillObservation, + getSkillObservationsPath, + readSkillObservations +} = require('../../scripts/lib/skill-improvement/observations'); +const { buildSkillHealthReport } = require('../../scripts/lib/skill-improvement/health'); +const { proposeSkillAmendment } = require('../../scripts/lib/skill-improvement/amendify'); +const { buildSkillEvaluationScaffold } = require('../../scripts/lib/skill-improvement/evaluate'); + +console.log('=== Testing skill-improvement ===\n'); + +let passed = 0; +let failed = 0; + +function test(name, fn) { + try { + fn(); + console.log(` ✓ ${name}`); + passed += 1; + } catch (error) { + console.log(` ✗ ${name}: ${error.message}`); + failed += 1; + } +} + +function makeProjectRoot(prefix) { + return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); +} + +function cleanup(dirPath) { + fs.rmSync(dirPath, { recursive: true, force: true }); +} + +test('observation layer writes and reads structured skill outcomes', () => { + const projectRoot = makeProjectRoot('ecc-skill-observe-'); + + try { + const observation = createSkillObservation({ + task: 'Fix flaky Playwright test', + skill: { + id: 'e2e-testing', + path: 'skills/e2e-testing/SKILL.md' + }, + success: false, + error: 'playwright timeout', + feedback: 'Timed out waiting for locator', + sessionId: 'sess-1234' + }); + + appendSkillObservation(observation, { projectRoot }); + const records = readSkillObservations({ projectRoot }); + + assert.strictEqual(records.length, 1); + assert.strictEqual(records[0].schemaVersion, 'ecc.skill-observation.v1'); + assert.strictEqual(records[0].task, 'Fix flaky Playwright test'); + assert.strictEqual(records[0].skill.id, 'e2e-testing'); + assert.strictEqual(records[0].outcome.success, false); + assert.strictEqual(records[0].outcome.error, 'playwright timeout'); + assert.strictEqual(getSkillObservationsPath({ projectRoot }), path.join(projectRoot, '.claude', 'ecc', 'skills', 'observations.jsonl')); + } finally { + cleanup(projectRoot); + } +}); + +test('health inspector traces recurring failures for a skill across runs', () => { + const projectRoot = makeProjectRoot('ecc-skill-health-'); + + try { + [ + createSkillObservation({ + task: 'Ship Next.js auth middleware', + skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' }, + success: false, + error: 'missing csrf guidance', + feedback: 'Did not mention CSRF' + }), + createSkillObservation({ + task: 'Harden Next.js auth middleware', + skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' }, + success: false, + error: 'missing csrf guidance', + feedback: 'Repeated omission' + }), + createSkillObservation({ + task: 'Review payment webhook security', + skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' }, + success: true + }) + ].forEach(record => appendSkillObservation(record, { projectRoot })); + + const report = buildSkillHealthReport(readSkillObservations({ projectRoot }), { + minFailureCount: 2 + }); + const skill = report.skills.find(entry => entry.skill.id === 'security-review'); + + assert.ok(skill, 'security-review should appear in the report'); + assert.strictEqual(skill.totalRuns, 3); + assert.strictEqual(skill.failures, 2); + assert.strictEqual(skill.status, 'failing'); + assert.strictEqual(skill.recurringErrors[0].error, 'missing csrf guidance'); + assert.strictEqual(skill.recurringErrors[0].count, 2); + } finally { + cleanup(projectRoot); + } +}); + +test('amendify proposes SKILL.md patch content from failure evidence', () => { + const records = [ + createSkillObservation({ + task: 'Add API rate limiting', + skill: { id: 'api-design', path: 'skills/api-design/SKILL.md' }, + success: false, + error: 'missing rate limiting guidance', + feedback: 'No rate-limit section' + }), + createSkillObservation({ + task: 'Design public API error envelopes', + skill: { id: 'api-design', path: 'skills/api-design/SKILL.md' }, + success: false, + error: 'missing error response examples', + feedback: 'Need explicit examples' + }) + ]; + + const proposal = proposeSkillAmendment('api-design', records); + + assert.strictEqual(proposal.schemaVersion, 'ecc.skill-amendment-proposal.v1'); + assert.strictEqual(proposal.skill.id, 'api-design'); + assert.strictEqual(proposal.status, 'proposed'); + assert.ok(proposal.patch.preview.includes('## Failure-Driven Amendments')); + assert.ok(proposal.patch.preview.includes('rate limiting')); + assert.ok(proposal.patch.preview.includes('error response')); +}); + +test('evaluation scaffold compares amended and baseline performance', () => { + const records = [ + createSkillObservation({ + task: 'Fix flaky login test', + skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' }, + success: false, + variant: 'baseline' + }), + createSkillObservation({ + task: 'Fix flaky checkout test', + skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' }, + success: true, + variant: 'baseline' + }), + createSkillObservation({ + task: 'Fix flaky login test', + skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' }, + success: true, + variant: 'amended', + amendmentId: 'amend-1' + }), + createSkillObservation({ + task: 'Fix flaky checkout test', + skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' }, + success: true, + variant: 'amended', + amendmentId: 'amend-1' + }) + ]; + + const evaluation = buildSkillEvaluationScaffold('e2e-testing', records, { + amendmentId: 'amend-1', + minimumRunsPerVariant: 2 + }); + + assert.strictEqual(evaluation.schemaVersion, 'ecc.skill-evaluation.v1'); + assert.strictEqual(evaluation.baseline.runs, 2); + assert.strictEqual(evaluation.amended.runs, 2); + assert.strictEqual(evaluation.delta.successRate, 0.5); + assert.strictEqual(evaluation.recommendation, 'promote-amendment'); +}); + +console.log(`\n=== Results: ${passed} passed, ${failed} failed ===`); +if (failed > 0) process.exit(1); diff --git a/tests/scripts/session-inspect.test.js b/tests/scripts/session-inspect.test.js index cb6cb30d..42dd1132 100644 --- a/tests/scripts/session-inspect.test.js +++ b/tests/scripts/session-inspect.test.js @@ -143,6 +143,133 @@ function runTests() { } })) passed++; else failed++; + if (test('inspects skill health from recorded observations', () => { + const projectRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-session-inspect-skills-')); + const observationsDir = path.join(projectRoot, '.claude', 'ecc', 'skills'); + fs.mkdirSync(observationsDir, { recursive: true }); + fs.writeFileSync( + path.join(observationsDir, 'observations.jsonl'), + [ + JSON.stringify({ + schemaVersion: 'ecc.skill-observation.v1', + observationId: 'obs-1', + timestamp: '2026-03-14T12:00:00.000Z', + task: 'Review auth middleware', + skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' }, + outcome: { success: false, status: 'failure', error: 'missing csrf guidance', feedback: 'Need CSRF coverage' }, + run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-1' } + }), + JSON.stringify({ + schemaVersion: 'ecc.skill-observation.v1', + observationId: 'obs-2', + timestamp: '2026-03-14T12:05:00.000Z', + task: 'Review auth middleware', + skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' }, + outcome: { success: false, status: 'failure', error: 'missing csrf guidance', feedback: null }, + run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-2' } + }) + ].join('\n') + '\n' + ); + + try { + const result = run(['skills:health'], { cwd: projectRoot }); + assert.strictEqual(result.code, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.strictEqual(payload.schemaVersion, 'ecc.skill-health.v1'); + assert.ok(payload.skills.some(skill => skill.skill.id === 'security-review')); + } finally { + fs.rmSync(projectRoot, { recursive: true, force: true }); + } + })) passed++; else failed++; + + if (test('proposes skill amendments through session-inspect', () => { + const projectRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-session-inspect-amend-')); + const observationsDir = path.join(projectRoot, '.claude', 'ecc', 'skills'); + fs.mkdirSync(observationsDir, { recursive: true }); + fs.writeFileSync( + path.join(observationsDir, 'observations.jsonl'), + [ + JSON.stringify({ + schemaVersion: 'ecc.skill-observation.v1', + observationId: 'obs-1', + timestamp: '2026-03-14T12:00:00.000Z', + task: 'Add rate limiting', + skill: { id: 'api-design', path: 'skills/api-design/SKILL.md' }, + outcome: { success: false, status: 'failure', error: 'missing rate limiting guidance', feedback: 'Need rate limiting examples' }, + run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-1' } + }) + ].join('\n') + '\n' + ); + + try { + const result = run(['skills:amendify', '--skill', 'api-design'], { cwd: projectRoot }); + assert.strictEqual(result.code, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.strictEqual(payload.schemaVersion, 'ecc.skill-amendment-proposal.v1'); + assert.strictEqual(payload.skill.id, 'api-design'); + assert.ok(payload.patch.preview.includes('Failure-Driven Amendments')); + } finally { + fs.rmSync(projectRoot, { recursive: true, force: true }); + } + })) passed++; else failed++; + + if (test('builds skill evaluation scaffolding through session-inspect', () => { + const projectRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-session-inspect-eval-')); + const observationsDir = path.join(projectRoot, '.claude', 'ecc', 'skills'); + fs.mkdirSync(observationsDir, { recursive: true }); + fs.writeFileSync( + path.join(observationsDir, 'observations.jsonl'), + [ + JSON.stringify({ + schemaVersion: 'ecc.skill-observation.v1', + observationId: 'obs-1', + timestamp: '2026-03-14T12:00:00.000Z', + task: 'Fix flaky login test', + skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' }, + outcome: { success: false, status: 'failure', error: null, feedback: null }, + run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-1' } + }), + JSON.stringify({ + schemaVersion: 'ecc.skill-observation.v1', + observationId: 'obs-2', + timestamp: '2026-03-14T12:10:00.000Z', + task: 'Fix flaky checkout test', + skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' }, + outcome: { success: true, status: 'success', error: null, feedback: null }, + run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-2' } + }), + JSON.stringify({ + schemaVersion: 'ecc.skill-observation.v1', + observationId: 'obs-3', + timestamp: '2026-03-14T12:20:00.000Z', + task: 'Fix flaky login test', + skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' }, + outcome: { success: true, status: 'success', error: null, feedback: null }, + run: { variant: 'amended', amendmentId: 'amend-1', sessionId: 'sess-3' } + }), + JSON.stringify({ + schemaVersion: 'ecc.skill-observation.v1', + observationId: 'obs-4', + timestamp: '2026-03-14T12:30:00.000Z', + task: 'Fix flaky checkout test', + skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' }, + outcome: { success: true, status: 'success', error: null, feedback: null }, + run: { variant: 'amended', amendmentId: 'amend-1', sessionId: 'sess-4' } + }) + ].join('\n') + '\n' + ); + + try { + const result = run(['skills:evaluate', '--skill', 'e2e-testing', '--amendment-id', 'amend-1'], { cwd: projectRoot }); + assert.strictEqual(result.code, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.strictEqual(payload.schemaVersion, 'ecc.skill-evaluation.v1'); + assert.strictEqual(payload.recommendation, 'promote-amendment'); + } finally { + fs.rmSync(projectRoot, { recursive: true, force: true }); + } + })) passed++; else failed++; + console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`); process.exit(failed > 0 ? 1 : 0); }