mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-03-30 13:43:26 +08:00
feat: self-improving skills loop — observe, inspect, amend, evaluate
- Add skill health observation layer (execution logging, success/failure tracking) - Add skill health inspector (trace recurring failures across runs) - Add amendify mechanism (propose SKILL.md patches from failure evidence) - Add evaluation scaffolding (compare amended vs original performance) - Wire into session-inspect CLI: skills:health, skills:amendify, skills:evaluate - 1145/1145 tests passing (+3 new)
This commit is contained in:
89
scripts/lib/skill-improvement/amendify.js
Normal file
89
scripts/lib/skill-improvement/amendify.js
Normal file
@@ -0,0 +1,89 @@
|
||||
'use strict';
|
||||
|
||||
const { buildSkillHealthReport } = require('./health');
|
||||
|
||||
const AMENDMENT_SCHEMA_VERSION = 'ecc.skill-amendment-proposal.v1';
|
||||
|
||||
function createProposalId(skillId) {
|
||||
return `amend-${skillId}-${Date.now()}`;
|
||||
}
|
||||
|
||||
function summarizePatchPreview(skillId, health) {
|
||||
const lines = [
|
||||
'## Failure-Driven Amendments',
|
||||
'',
|
||||
`- Focus skill routing for \`${skillId}\` when tasks match the proven success cases.`,
|
||||
];
|
||||
|
||||
if (health.recurringErrors[0]) {
|
||||
lines.push(`- Add explicit guardrails for recurring failure: ${health.recurringErrors[0].error}.`);
|
||||
}
|
||||
|
||||
if (health.recurringTasks[0]) {
|
||||
lines.push(`- Add an example workflow for task pattern: ${health.recurringTasks[0].task}.`);
|
||||
}
|
||||
|
||||
if (health.recurringFeedback[0]) {
|
||||
lines.push(`- Address repeated user feedback: ${health.recurringFeedback[0].feedback}.`);
|
||||
}
|
||||
|
||||
lines.push('- Add a verification checklist before declaring the skill output complete.');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function proposeSkillAmendment(skillId, records, options = {}) {
|
||||
const report = buildSkillHealthReport(records, {
|
||||
...options,
|
||||
skillId,
|
||||
minFailureCount: options.minFailureCount || 1
|
||||
});
|
||||
const [health] = report.skills;
|
||||
|
||||
if (!health || health.failures === 0) {
|
||||
return {
|
||||
schemaVersion: AMENDMENT_SCHEMA_VERSION,
|
||||
skill: {
|
||||
id: skillId,
|
||||
path: null
|
||||
},
|
||||
status: 'insufficient-evidence',
|
||||
rationale: ['No failed observations were available for this skill.'],
|
||||
patch: null
|
||||
};
|
||||
}
|
||||
|
||||
const preview = summarizePatchPreview(skillId, health);
|
||||
|
||||
return {
|
||||
schemaVersion: AMENDMENT_SCHEMA_VERSION,
|
||||
proposalId: createProposalId(skillId),
|
||||
generatedAt: new Date().toISOString(),
|
||||
status: 'proposed',
|
||||
skill: {
|
||||
id: skillId,
|
||||
path: health.skill.path || null
|
||||
},
|
||||
evidence: {
|
||||
totalRuns: health.totalRuns,
|
||||
failures: health.failures,
|
||||
successRate: health.successRate,
|
||||
recurringErrors: health.recurringErrors,
|
||||
recurringTasks: health.recurringTasks,
|
||||
recurringFeedback: health.recurringFeedback
|
||||
},
|
||||
rationale: [
|
||||
'Proposals are generated from repeated failed runs rather than a single anecdotal error.',
|
||||
'The suggested patch is additive so the original SKILL.md intent remains auditable.'
|
||||
],
|
||||
patch: {
|
||||
format: 'markdown-fragment',
|
||||
targetPath: health.skill.path || `skills/${skillId}/SKILL.md`,
|
||||
preview
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
AMENDMENT_SCHEMA_VERSION,
|
||||
proposeSkillAmendment
|
||||
};
|
||||
59
scripts/lib/skill-improvement/evaluate.js
Normal file
59
scripts/lib/skill-improvement/evaluate.js
Normal file
@@ -0,0 +1,59 @@
|
||||
'use strict';
|
||||
|
||||
const EVALUATION_SCHEMA_VERSION = 'ecc.skill-evaluation.v1';
|
||||
|
||||
function roundRate(value) {
|
||||
return Math.round(value * 1000) / 1000;
|
||||
}
|
||||
|
||||
function summarize(records) {
|
||||
const runs = records.length;
|
||||
const successes = records.filter(record => record.outcome && record.outcome.success).length;
|
||||
const failures = runs - successes;
|
||||
return {
|
||||
runs,
|
||||
successes,
|
||||
failures,
|
||||
successRate: runs > 0 ? roundRate(successes / runs) : 0
|
||||
};
|
||||
}
|
||||
|
||||
function buildSkillEvaluationScaffold(skillId, records, options = {}) {
|
||||
const minimumRunsPerVariant = options.minimumRunsPerVariant || 2;
|
||||
const amendmentId = options.amendmentId || null;
|
||||
const filtered = records.filter(record => record.skill && record.skill.id === skillId);
|
||||
const baseline = filtered.filter(record => !record.run || record.run.variant !== 'amended');
|
||||
const amended = filtered.filter(record => record.run && record.run.variant === 'amended')
|
||||
.filter(record => !amendmentId || record.run.amendmentId === amendmentId);
|
||||
|
||||
const baselineSummary = summarize(baseline);
|
||||
const amendedSummary = summarize(amended);
|
||||
const delta = {
|
||||
successRate: roundRate(amendedSummary.successRate - baselineSummary.successRate),
|
||||
failures: amendedSummary.failures - baselineSummary.failures
|
||||
};
|
||||
|
||||
let recommendation = 'insufficient-data';
|
||||
if (baselineSummary.runs >= minimumRunsPerVariant && amendedSummary.runs >= minimumRunsPerVariant) {
|
||||
recommendation = delta.successRate > 0 ? 'promote-amendment' : 'keep-baseline';
|
||||
}
|
||||
|
||||
return {
|
||||
schemaVersion: EVALUATION_SCHEMA_VERSION,
|
||||
generatedAt: new Date().toISOString(),
|
||||
skillId,
|
||||
amendmentId,
|
||||
gate: {
|
||||
minimumRunsPerVariant
|
||||
},
|
||||
baseline: baselineSummary,
|
||||
amended: amendedSummary,
|
||||
delta,
|
||||
recommendation
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
EVALUATION_SCHEMA_VERSION,
|
||||
buildSkillEvaluationScaffold
|
||||
};
|
||||
118
scripts/lib/skill-improvement/health.js
Normal file
118
scripts/lib/skill-improvement/health.js
Normal file
@@ -0,0 +1,118 @@
|
||||
'use strict';
|
||||
|
||||
const HEALTH_SCHEMA_VERSION = 'ecc.skill-health.v1';
|
||||
|
||||
function roundRate(value) {
|
||||
return Math.round(value * 1000) / 1000;
|
||||
}
|
||||
|
||||
function rankCounts(values) {
|
||||
return Array.from(values.entries())
|
||||
.map(([value, count]) => ({ value, count }))
|
||||
.sort((left, right) => right.count - left.count || left.value.localeCompare(right.value));
|
||||
}
|
||||
|
||||
function summarizeVariantRuns(records) {
|
||||
return records.reduce((accumulator, record) => {
|
||||
const key = record.run && record.run.variant ? record.run.variant : 'baseline';
|
||||
if (!accumulator[key]) {
|
||||
accumulator[key] = { runs: 0, successes: 0, failures: 0 };
|
||||
}
|
||||
|
||||
accumulator[key].runs += 1;
|
||||
if (record.outcome && record.outcome.success) {
|
||||
accumulator[key].successes += 1;
|
||||
} else {
|
||||
accumulator[key].failures += 1;
|
||||
}
|
||||
|
||||
return accumulator;
|
||||
}, {});
|
||||
}
|
||||
|
||||
function deriveSkillStatus(skillSummary, options = {}) {
|
||||
const minFailureCount = options.minFailureCount || 2;
|
||||
if (skillSummary.failures >= minFailureCount) {
|
||||
return 'failing';
|
||||
}
|
||||
|
||||
if (skillSummary.failures > 0) {
|
||||
return 'watch';
|
||||
}
|
||||
|
||||
return 'healthy';
|
||||
}
|
||||
|
||||
function buildSkillHealthReport(records, options = {}) {
|
||||
const filterSkillId = options.skillId || null;
|
||||
const filtered = filterSkillId
|
||||
? records.filter(record => record.skill && record.skill.id === filterSkillId)
|
||||
: records.slice();
|
||||
|
||||
const grouped = filtered.reduce((accumulator, record) => {
|
||||
const skillId = record.skill.id;
|
||||
if (!accumulator.has(skillId)) {
|
||||
accumulator.set(skillId, []);
|
||||
}
|
||||
accumulator.get(skillId).push(record);
|
||||
return accumulator;
|
||||
}, new Map());
|
||||
|
||||
const skills = Array.from(grouped.entries())
|
||||
.map(([skillId, skillRecords]) => {
|
||||
const successes = skillRecords.filter(record => record.outcome && record.outcome.success).length;
|
||||
const failures = skillRecords.length - successes;
|
||||
const recurringErrors = new Map();
|
||||
const recurringTasks = new Map();
|
||||
const recurringFeedback = new Map();
|
||||
|
||||
skillRecords.forEach(record => {
|
||||
if (!record.outcome || record.outcome.success) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (record.outcome.error) {
|
||||
recurringErrors.set(record.outcome.error, (recurringErrors.get(record.outcome.error) || 0) + 1);
|
||||
}
|
||||
if (record.task) {
|
||||
recurringTasks.set(record.task, (recurringTasks.get(record.task) || 0) + 1);
|
||||
}
|
||||
if (record.outcome.feedback) {
|
||||
recurringFeedback.set(record.outcome.feedback, (recurringFeedback.get(record.outcome.feedback) || 0) + 1);
|
||||
}
|
||||
});
|
||||
|
||||
const summary = {
|
||||
skill: {
|
||||
id: skillId,
|
||||
path: skillRecords[0].skill.path || null
|
||||
},
|
||||
totalRuns: skillRecords.length,
|
||||
successes,
|
||||
failures,
|
||||
successRate: skillRecords.length > 0 ? roundRate(successes / skillRecords.length) : 0,
|
||||
status: 'healthy',
|
||||
recurringErrors: rankCounts(recurringErrors).map(entry => ({ error: entry.value, count: entry.count })),
|
||||
recurringTasks: rankCounts(recurringTasks).map(entry => ({ task: entry.value, count: entry.count })),
|
||||
recurringFeedback: rankCounts(recurringFeedback).map(entry => ({ feedback: entry.value, count: entry.count })),
|
||||
variants: summarizeVariantRuns(skillRecords)
|
||||
};
|
||||
|
||||
summary.status = deriveSkillStatus(summary, options);
|
||||
return summary;
|
||||
})
|
||||
.sort((left, right) => right.failures - left.failures || left.skill.id.localeCompare(right.skill.id));
|
||||
|
||||
return {
|
||||
schemaVersion: HEALTH_SCHEMA_VERSION,
|
||||
generatedAt: new Date().toISOString(),
|
||||
totalObservations: filtered.length,
|
||||
skillCount: skills.length,
|
||||
skills
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
HEALTH_SCHEMA_VERSION,
|
||||
buildSkillHealthReport
|
||||
};
|
||||
108
scripts/lib/skill-improvement/observations.js
Normal file
108
scripts/lib/skill-improvement/observations.js
Normal file
@@ -0,0 +1,108 @@
|
||||
'use strict';
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const os = require('os');
|
||||
|
||||
const OBSERVATION_SCHEMA_VERSION = 'ecc.skill-observation.v1';
|
||||
|
||||
function resolveProjectRoot(options = {}) {
|
||||
return path.resolve(options.projectRoot || options.cwd || process.cwd());
|
||||
}
|
||||
|
||||
function getSkillTelemetryRoot(options = {}) {
|
||||
return path.join(resolveProjectRoot(options), '.claude', 'ecc', 'skills');
|
||||
}
|
||||
|
||||
function getSkillObservationsPath(options = {}) {
|
||||
return path.join(getSkillTelemetryRoot(options), 'observations.jsonl');
|
||||
}
|
||||
|
||||
function ensureString(value, label) {
|
||||
if (typeof value !== 'string' || value.trim().length === 0) {
|
||||
throw new Error(`${label} must be a non-empty string`);
|
||||
}
|
||||
|
||||
return value.trim();
|
||||
}
|
||||
|
||||
function createObservationId() {
|
||||
return `obs-${Date.now()}-${process.pid}-${Math.random().toString(16).slice(2, 8)}`;
|
||||
}
|
||||
|
||||
function createSkillObservation(input) {
|
||||
const task = ensureString(input.task, 'task');
|
||||
const skillId = ensureString(input.skill && input.skill.id, 'skill.id');
|
||||
const skillPath = typeof input.skill.path === 'string' && input.skill.path.trim().length > 0
|
||||
? input.skill.path.trim()
|
||||
: null;
|
||||
const success = Boolean(input.success);
|
||||
const error = input.error == null ? null : String(input.error);
|
||||
const feedback = input.feedback == null ? null : String(input.feedback);
|
||||
const variant = typeof input.variant === 'string' && input.variant.trim().length > 0
|
||||
? input.variant.trim()
|
||||
: 'baseline';
|
||||
|
||||
return {
|
||||
schemaVersion: OBSERVATION_SCHEMA_VERSION,
|
||||
observationId: typeof input.observationId === 'string' && input.observationId.length > 0
|
||||
? input.observationId
|
||||
: createObservationId(),
|
||||
timestamp: typeof input.timestamp === 'string' && input.timestamp.length > 0
|
||||
? input.timestamp
|
||||
: new Date().toISOString(),
|
||||
task,
|
||||
skill: {
|
||||
id: skillId,
|
||||
path: skillPath
|
||||
},
|
||||
outcome: {
|
||||
success,
|
||||
status: success ? 'success' : 'failure',
|
||||
error,
|
||||
feedback
|
||||
},
|
||||
run: {
|
||||
variant,
|
||||
amendmentId: input.amendmentId || null,
|
||||
sessionId: input.sessionId || null,
|
||||
source: input.source || 'manual'
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
function appendSkillObservation(observation, options = {}) {
|
||||
const outputPath = getSkillObservationsPath(options);
|
||||
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
|
||||
fs.appendFileSync(outputPath, `${JSON.stringify(observation)}${os.EOL}`, 'utf8');
|
||||
return outputPath;
|
||||
}
|
||||
|
||||
function readSkillObservations(options = {}) {
|
||||
const observationPath = path.resolve(options.observationsPath || getSkillObservationsPath(options));
|
||||
if (!fs.existsSync(observationPath)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return fs.readFileSync(observationPath, 'utf8')
|
||||
.split(/\r?\n/)
|
||||
.filter(Boolean)
|
||||
.map(line => {
|
||||
try {
|
||||
return JSON.parse(line);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter(record => record && record.schemaVersion === OBSERVATION_SCHEMA_VERSION);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
OBSERVATION_SCHEMA_VERSION,
|
||||
appendSkillObservation,
|
||||
createSkillObservation,
|
||||
getSkillObservationsPath,
|
||||
getSkillTelemetryRoot,
|
||||
readSkillObservations,
|
||||
resolveProjectRoot
|
||||
};
|
||||
Reference in New Issue
Block a user