feat: self-improving skills loop — observe, inspect, amend, evaluate

- Add skill health observation layer (execution logging, success/failure tracking) - Add skill health inspector (trace recurring failures across runs) - Add amendify mechanism (propose SKILL.md patches from failure evidence) - Add evaluation scaffolding (compare amended vs original performance) - Wire into session-inspect CLI: skills:health, skills:amendify, skills:evaluate - 1145/1145 tests passing (+3 new)
2026-06-13 03:33:15 +08:00 · 2026-03-14 23:21:18 -07:00
parent 2b2777915e
commit c53bba9e02
7 changed files with 753 additions and 6 deletions
--- a/scripts/lib/skill-improvement/amendify.js
+++ b/scripts/lib/skill-improvement/amendify.js
@@ -0,0 +1,89 @@
+'use strict';
+
+const { buildSkillHealthReport } = require('./health');
+
+const AMENDMENT_SCHEMA_VERSION = 'ecc.skill-amendment-proposal.v1';
+
+function createProposalId(skillId) {
+  return `amend-${skillId}-${Date.now()}`;
+}
+
+function summarizePatchPreview(skillId, health) {
+  const lines = [
+    '## Failure-Driven Amendments',
+    '',
+    `- Focus skill routing for \`${skillId}\` when tasks match the proven success cases.`,
+  ];
+
+  if (health.recurringErrors[0]) {
+    lines.push(`- Add explicit guardrails for recurring failure: ${health.recurringErrors[0].error}.`);
+  }
+
+  if (health.recurringTasks[0]) {
+    lines.push(`- Add an example workflow for task pattern: ${health.recurringTasks[0].task}.`);
+  }
+
+  if (health.recurringFeedback[0]) {
+    lines.push(`- Address repeated user feedback: ${health.recurringFeedback[0].feedback}.`);
+  }
+
+  lines.push('- Add a verification checklist before declaring the skill output complete.');
+  return lines.join('\n');
+}
+
+function proposeSkillAmendment(skillId, records, options = {}) {
+  const report = buildSkillHealthReport(records, {
+    ...options,
+    skillId,
+    minFailureCount: options.minFailureCount || 1
+  });
+  const [health] = report.skills;
+
+  if (!health || health.failures === 0) {
+    return {
+      schemaVersion: AMENDMENT_SCHEMA_VERSION,
+      skill: {
+        id: skillId,
+        path: null
+      },
+      status: 'insufficient-evidence',
+      rationale: ['No failed observations were available for this skill.'],
+      patch: null
+    };
+  }
+
+  const preview = summarizePatchPreview(skillId, health);
+
+  return {
+    schemaVersion: AMENDMENT_SCHEMA_VERSION,
+    proposalId: createProposalId(skillId),
+    generatedAt: new Date().toISOString(),
+    status: 'proposed',
+    skill: {
+      id: skillId,
+      path: health.skill.path || null
+    },
+    evidence: {
+      totalRuns: health.totalRuns,
+      failures: health.failures,
+      successRate: health.successRate,
+      recurringErrors: health.recurringErrors,
+      recurringTasks: health.recurringTasks,
+      recurringFeedback: health.recurringFeedback
+    },
+    rationale: [
+      'Proposals are generated from repeated failed runs rather than a single anecdotal error.',
+      'The suggested patch is additive so the original SKILL.md intent remains auditable.'
+    ],
+    patch: {
+      format: 'markdown-fragment',
+      targetPath: health.skill.path || `skills/${skillId}/SKILL.md`,
+      preview
+    }
+  };
+}
+
+module.exports = {
+  AMENDMENT_SCHEMA_VERSION,
+  proposeSkillAmendment
+};