mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-03-30 21:53:28 +08:00
- Add skill health observation layer (execution logging, success/failure tracking) - Add skill health inspector (trace recurring failures across runs) - Add amendify mechanism (propose SKILL.md patches from failure evidence) - Add evaluation scaffolding (compare amended vs original performance) - Wire into session-inspect CLI: skills:health, skills:amendify, skills:evaluate - 1145/1145 tests passing (+3 new)
60 lines
1.8 KiB
JavaScript
60 lines
1.8 KiB
JavaScript
'use strict';
|
|
|
|
const EVALUATION_SCHEMA_VERSION = 'ecc.skill-evaluation.v1';
|
|
|
|
function roundRate(value) {
|
|
return Math.round(value * 1000) / 1000;
|
|
}
|
|
|
|
function summarize(records) {
|
|
const runs = records.length;
|
|
const successes = records.filter(record => record.outcome && record.outcome.success).length;
|
|
const failures = runs - successes;
|
|
return {
|
|
runs,
|
|
successes,
|
|
failures,
|
|
successRate: runs > 0 ? roundRate(successes / runs) : 0
|
|
};
|
|
}
|
|
|
|
function buildSkillEvaluationScaffold(skillId, records, options = {}) {
|
|
const minimumRunsPerVariant = options.minimumRunsPerVariant || 2;
|
|
const amendmentId = options.amendmentId || null;
|
|
const filtered = records.filter(record => record.skill && record.skill.id === skillId);
|
|
const baseline = filtered.filter(record => !record.run || record.run.variant !== 'amended');
|
|
const amended = filtered.filter(record => record.run && record.run.variant === 'amended')
|
|
.filter(record => !amendmentId || record.run.amendmentId === amendmentId);
|
|
|
|
const baselineSummary = summarize(baseline);
|
|
const amendedSummary = summarize(amended);
|
|
const delta = {
|
|
successRate: roundRate(amendedSummary.successRate - baselineSummary.successRate),
|
|
failures: amendedSummary.failures - baselineSummary.failures
|
|
};
|
|
|
|
let recommendation = 'insufficient-data';
|
|
if (baselineSummary.runs >= minimumRunsPerVariant && amendedSummary.runs >= minimumRunsPerVariant) {
|
|
recommendation = delta.successRate > 0 ? 'promote-amendment' : 'keep-baseline';
|
|
}
|
|
|
|
return {
|
|
schemaVersion: EVALUATION_SCHEMA_VERSION,
|
|
generatedAt: new Date().toISOString(),
|
|
skillId,
|
|
amendmentId,
|
|
gate: {
|
|
minimumRunsPerVariant
|
|
},
|
|
baseline: baselineSummary,
|
|
amended: amendedSummary,
|
|
delta,
|
|
recommendation
|
|
};
|
|
}
|
|
|
|
module.exports = {
|
|
EVALUATION_SCHEMA_VERSION,
|
|
buildSkillEvaluationScaffold
|
|
};
|