feat: self-improving skills loop — observe, inspect, amend, evaluate

- Add skill health observation layer (execution logging, success/failure tracking) - Add skill health inspector (trace recurring failures across runs) - Add amendify mechanism (propose SKILL.md patches from failure evidence) - Add evaluation scaffolding (compare amended vs original performance) - Wire into session-inspect CLI: skills:health, skills:amendify, skills:evaluate - 1145/1145 tests passing (+3 new)
2026-05-15 13:23:13 +08:00 · 2026-03-14 23:21:18 -07:00
parent 2b2777915e
commit c53bba9e02
7 changed files with 753 additions and 6 deletions
--- a/scripts/lib/skill-improvement/amendify.js
+++ b/scripts/lib/skill-improvement/amendify.js
@@ -0,0 +1,89 @@
+'use strict';
+
+const { buildSkillHealthReport } = require('./health');
+
+const AMENDMENT_SCHEMA_VERSION = 'ecc.skill-amendment-proposal.v1';
+
+function createProposalId(skillId) {
+  return `amend-${skillId}-${Date.now()}`;
+}
+
+function summarizePatchPreview(skillId, health) {
+  const lines = [
+    '## Failure-Driven Amendments',
+    '',
+    `- Focus skill routing for \`${skillId}\` when tasks match the proven success cases.`,
+  ];
+
+  if (health.recurringErrors[0]) {
+    lines.push(`- Add explicit guardrails for recurring failure: ${health.recurringErrors[0].error}.`);
+  }
+
+  if (health.recurringTasks[0]) {
+    lines.push(`- Add an example workflow for task pattern: ${health.recurringTasks[0].task}.`);
+  }
+
+  if (health.recurringFeedback[0]) {
+    lines.push(`- Address repeated user feedback: ${health.recurringFeedback[0].feedback}.`);
+  }
+
+  lines.push('- Add a verification checklist before declaring the skill output complete.');
+  return lines.join('\n');
+}
+
+function proposeSkillAmendment(skillId, records, options = {}) {
+  const report = buildSkillHealthReport(records, {
+    ...options,
+    skillId,
+    minFailureCount: options.minFailureCount || 1
+  });
+  const [health] = report.skills;
+
+  if (!health || health.failures === 0) {
+    return {
+      schemaVersion: AMENDMENT_SCHEMA_VERSION,
+      skill: {
+        id: skillId,
+        path: null
+      },
+      status: 'insufficient-evidence',
+      rationale: ['No failed observations were available for this skill.'],
+      patch: null
+    };
+  }
+
+  const preview = summarizePatchPreview(skillId, health);
+
+  return {
+    schemaVersion: AMENDMENT_SCHEMA_VERSION,
+    proposalId: createProposalId(skillId),
+    generatedAt: new Date().toISOString(),
+    status: 'proposed',
+    skill: {
+      id: skillId,
+      path: health.skill.path || null
+    },
+    evidence: {
+      totalRuns: health.totalRuns,
+      failures: health.failures,
+      successRate: health.successRate,
+      recurringErrors: health.recurringErrors,
+      recurringTasks: health.recurringTasks,
+      recurringFeedback: health.recurringFeedback
+    },
+    rationale: [
+      'Proposals are generated from repeated failed runs rather than a single anecdotal error.',
+      'The suggested patch is additive so the original SKILL.md intent remains auditable.'
+    ],
+    patch: {
+      format: 'markdown-fragment',
+      targetPath: health.skill.path || `skills/${skillId}/SKILL.md`,
+      preview
+    }
+  };
+}
+
+module.exports = {
+  AMENDMENT_SCHEMA_VERSION,
+  proposeSkillAmendment
+};
--- a/scripts/lib/skill-improvement/evaluate.js
+++ b/scripts/lib/skill-improvement/evaluate.js
@@ -0,0 +1,59 @@
+'use strict';
+
+const EVALUATION_SCHEMA_VERSION = 'ecc.skill-evaluation.v1';
+
+function roundRate(value) {
+  return Math.round(value * 1000) / 1000;
+}
+
+function summarize(records) {
+  const runs = records.length;
+  const successes = records.filter(record => record.outcome && record.outcome.success).length;
+  const failures = runs - successes;
+  return {
+    runs,
+    successes,
+    failures,
+    successRate: runs > 0 ? roundRate(successes / runs) : 0
+  };
+}
+
+function buildSkillEvaluationScaffold(skillId, records, options = {}) {
+  const minimumRunsPerVariant = options.minimumRunsPerVariant || 2;
+  const amendmentId = options.amendmentId || null;
+  const filtered = records.filter(record => record.skill && record.skill.id === skillId);
+  const baseline = filtered.filter(record => !record.run || record.run.variant !== 'amended');
+  const amended = filtered.filter(record => record.run && record.run.variant === 'amended')
+    .filter(record => !amendmentId || record.run.amendmentId === amendmentId);
+
+  const baselineSummary = summarize(baseline);
+  const amendedSummary = summarize(amended);
+  const delta = {
+    successRate: roundRate(amendedSummary.successRate - baselineSummary.successRate),
+    failures: amendedSummary.failures - baselineSummary.failures
+  };
+
+  let recommendation = 'insufficient-data';
+  if (baselineSummary.runs >= minimumRunsPerVariant && amendedSummary.runs >= minimumRunsPerVariant) {
+    recommendation = delta.successRate > 0 ? 'promote-amendment' : 'keep-baseline';
+  }
+
+  return {
+    schemaVersion: EVALUATION_SCHEMA_VERSION,
+    generatedAt: new Date().toISOString(),
+    skillId,
+    amendmentId,
+    gate: {
+      minimumRunsPerVariant
+    },
+    baseline: baselineSummary,
+    amended: amendedSummary,
+    delta,
+    recommendation
+  };
+}
+
+module.exports = {
+  EVALUATION_SCHEMA_VERSION,
+  buildSkillEvaluationScaffold
+};
--- a/scripts/lib/skill-improvement/health.js
+++ b/scripts/lib/skill-improvement/health.js
@@ -0,0 +1,118 @@
+'use strict';
+
+const HEALTH_SCHEMA_VERSION = 'ecc.skill-health.v1';
+
+function roundRate(value) {
+  return Math.round(value * 1000) / 1000;
+}
+
+function rankCounts(values) {
+  return Array.from(values.entries())
+    .map(([value, count]) => ({ value, count }))
+    .sort((left, right) => right.count - left.count || left.value.localeCompare(right.value));
+}
+
+function summarizeVariantRuns(records) {
+  return records.reduce((accumulator, record) => {
+    const key = record.run && record.run.variant ? record.run.variant : 'baseline';
+    if (!accumulator[key]) {
+      accumulator[key] = { runs: 0, successes: 0, failures: 0 };
+    }
+
+    accumulator[key].runs += 1;
+    if (record.outcome && record.outcome.success) {
+      accumulator[key].successes += 1;
+    } else {
+      accumulator[key].failures += 1;
+    }
+
+    return accumulator;
+  }, {});
+}
+
+function deriveSkillStatus(skillSummary, options = {}) {
+  const minFailureCount = options.minFailureCount || 2;
+  if (skillSummary.failures >= minFailureCount) {
+    return 'failing';
+  }
+
+  if (skillSummary.failures > 0) {
+    return 'watch';
+  }
+
+  return 'healthy';
+}
+
+function buildSkillHealthReport(records, options = {}) {
+  const filterSkillId = options.skillId || null;
+  const filtered = filterSkillId
+    ? records.filter(record => record.skill && record.skill.id === filterSkillId)
+    : records.slice();
+
+  const grouped = filtered.reduce((accumulator, record) => {
+    const skillId = record.skill.id;
+    if (!accumulator.has(skillId)) {
+      accumulator.set(skillId, []);
+    }
+    accumulator.get(skillId).push(record);
+    return accumulator;
+  }, new Map());
+
+  const skills = Array.from(grouped.entries())
+    .map(([skillId, skillRecords]) => {
+      const successes = skillRecords.filter(record => record.outcome && record.outcome.success).length;
+      const failures = skillRecords.length - successes;
+      const recurringErrors = new Map();
+      const recurringTasks = new Map();
+      const recurringFeedback = new Map();
+
+      skillRecords.forEach(record => {
+        if (!record.outcome || record.outcome.success) {
+          return;
+        }
+
+        if (record.outcome.error) {
+          recurringErrors.set(record.outcome.error, (recurringErrors.get(record.outcome.error) || 0) + 1);
+        }
+        if (record.task) {
+          recurringTasks.set(record.task, (recurringTasks.get(record.task) || 0) + 1);
+        }
+        if (record.outcome.feedback) {
+          recurringFeedback.set(record.outcome.feedback, (recurringFeedback.get(record.outcome.feedback) || 0) + 1);
+        }
+      });
+
+      const summary = {
+        skill: {
+          id: skillId,
+          path: skillRecords[0].skill.path || null
+        },
+        totalRuns: skillRecords.length,
+        successes,
+        failures,
+        successRate: skillRecords.length > 0 ? roundRate(successes / skillRecords.length) : 0,
+        status: 'healthy',
+        recurringErrors: rankCounts(recurringErrors).map(entry => ({ error: entry.value, count: entry.count })),
+        recurringTasks: rankCounts(recurringTasks).map(entry => ({ task: entry.value, count: entry.count })),
+        recurringFeedback: rankCounts(recurringFeedback).map(entry => ({ feedback: entry.value, count: entry.count })),
+        variants: summarizeVariantRuns(skillRecords)
+      };
+
+      summary.status = deriveSkillStatus(summary, options);
+      return summary;
+    })
+    .sort((left, right) => right.failures - left.failures || left.skill.id.localeCompare(right.skill.id));
+
+  return {
+    schemaVersion: HEALTH_SCHEMA_VERSION,
+    generatedAt: new Date().toISOString(),
+    totalObservations: filtered.length,
+    skillCount: skills.length,
+    skills
+  };
+}
+
+module.exports = {
+  HEALTH_SCHEMA_VERSION,
+  buildSkillHealthReport
+};
--- a/scripts/lib/skill-improvement/observations.js
+++ b/scripts/lib/skill-improvement/observations.js
@@ -0,0 +1,108 @@
+'use strict';
+
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+
+const OBSERVATION_SCHEMA_VERSION = 'ecc.skill-observation.v1';
+
+function resolveProjectRoot(options = {}) {
+  return path.resolve(options.projectRoot || options.cwd || process.cwd());
+}
+
+function getSkillTelemetryRoot(options = {}) {
+  return path.join(resolveProjectRoot(options), '.claude', 'ecc', 'skills');
+}
+
+function getSkillObservationsPath(options = {}) {
+  return path.join(getSkillTelemetryRoot(options), 'observations.jsonl');
+}
+
+function ensureString(value, label) {
+  if (typeof value !== 'string' || value.trim().length === 0) {
+    throw new Error(`${label} must be a non-empty string`);
+  }
+
+  return value.trim();
+}
+
+function createObservationId() {
+  return `obs-${Date.now()}-${process.pid}-${Math.random().toString(16).slice(2, 8)}`;
+}
+
+function createSkillObservation(input) {
+  const task = ensureString(input.task, 'task');
+  const skillId = ensureString(input.skill && input.skill.id, 'skill.id');
+  const skillPath = typeof input.skill.path === 'string' && input.skill.path.trim().length > 0
+    ? input.skill.path.trim()
+    : null;
+  const success = Boolean(input.success);
+  const error = input.error == null ? null : String(input.error);
+  const feedback = input.feedback == null ? null : String(input.feedback);
+  const variant = typeof input.variant === 'string' && input.variant.trim().length > 0
+    ? input.variant.trim()
+    : 'baseline';
+
+  return {
+    schemaVersion: OBSERVATION_SCHEMA_VERSION,
+    observationId: typeof input.observationId === 'string' && input.observationId.length > 0
+      ? input.observationId
+      : createObservationId(),
+    timestamp: typeof input.timestamp === 'string' && input.timestamp.length > 0
+      ? input.timestamp
+      : new Date().toISOString(),
+    task,
+    skill: {
+      id: skillId,
+      path: skillPath
+    },
+    outcome: {
+      success,
+      status: success ? 'success' : 'failure',
+      error,
+      feedback
+    },
+    run: {
+      variant,
+      amendmentId: input.amendmentId || null,
+      sessionId: input.sessionId || null,
+      source: input.source || 'manual'
+    }
+  };
+}
+
+function appendSkillObservation(observation, options = {}) {
+  const outputPath = getSkillObservationsPath(options);
+  fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+  fs.appendFileSync(outputPath, `${JSON.stringify(observation)}${os.EOL}`, 'utf8');
+  return outputPath;
+}
+
+function readSkillObservations(options = {}) {
+  const observationPath = path.resolve(options.observationsPath || getSkillObservationsPath(options));
+  if (!fs.existsSync(observationPath)) {
+    return [];
+  }
+
+  return fs.readFileSync(observationPath, 'utf8')
+    .split(/\r?\n/)
+    .filter(Boolean)
+    .map(line => {
+      try {
+        return JSON.parse(line);
+      } catch {
+        return null;
+      }
+    })
+    .filter(record => record && record.schemaVersion === OBSERVATION_SCHEMA_VERSION);
+}
+
+module.exports = {
+  OBSERVATION_SCHEMA_VERSION,
+  appendSkillObservation,
+  createSkillObservation,
+  getSkillObservationsPath,
+  getSkillTelemetryRoot,
+  readSkillObservations,
+  resolveProjectRoot
+};
--- a/scripts/session-inspect.js
+++ b/scripts/session-inspect.js
@@ -5,6 +5,10 @@ const fs = require('fs');
 const path = require('path');

 const { createAdapterRegistry, inspectSessionTarget } = require('./lib/session-adapters/registry');
+const { readSkillObservations } = require('./lib/skill-improvement/observations');
+const { buildSkillHealthReport } = require('./lib/skill-improvement/health');
+const { proposeSkillAmendment } = require('./lib/skill-improvement/amendify');
+const { buildSkillEvaluationScaffold } = require('./lib/skill-improvement/evaluate');

 function usage() {
  console.log([
@@ -18,12 +22,17 @@ function usage() {
    '  claude:latest        Most recent Claude session history entry',
    '  claude:<id|alias>    Specific Claude session or alias',
    '  <session.tmp>        Direct path to a Claude session file',
+    '  skills:health        Inspect skill failure/success patterns from observations',
+    '  skills:amendify      Propose a SKILL.md patch from failure evidence',
+    '  skills:evaluate      Compare baseline vs amended skill outcomes',
    '',
    'Examples:',
    '  node scripts/session-inspect.js .claude/plan/workflow.json',
    '  node scripts/session-inspect.js workflow-visual-proof',
    '  node scripts/session-inspect.js claude:latest',
    '  node scripts/session-inspect.js latest --target-type claude-history',
+    '  node scripts/session-inspect.js skills:health',
+    '  node scripts/session-inspect.js skills:amendify --skill api-design',
    '  node scripts/session-inspect.js claude:a1b2c3d4 --write /tmp/session.json'
  ].join('\n'));
 }
@@ -39,14 +48,57 @@ function parseArgs(argv) {
  const targetTypeIndex = args.indexOf('--target-type');
  const targetType = targetTypeIndex >= 0 ? args[targetTypeIndex + 1] : null;

+  const skillIndex = args.indexOf('--skill');
+  const skillId = skillIndex >= 0 ? args[skillIndex + 1] : null;
+
+  const amendmentIndex = args.indexOf('--amendment-id');
+  const amendmentId = amendmentIndex >= 0 ? args[amendmentIndex + 1] : null;
+
+  const observationsIndex = args.indexOf('--observations');
+  const observationsPath = observationsIndex >= 0 ? args[observationsIndex + 1] : null;
+
  const writeIndex = args.indexOf('--write');
  const writePath = writeIndex >= 0 ? args[writeIndex + 1] : null;

-  return { target, adapterId, targetType, writePath, listAdapters };
+  return { target, adapterId, targetType, writePath, listAdapters, skillId, amendmentId, observationsPath };
+}
+
+function inspectSkillLoopTarget(target, options = {}) {
+  const observations = readSkillObservations({
+    cwd: options.cwd,
+    projectRoot: options.cwd,
+    observationsPath: options.observationsPath
+  });
+
+  if (target === 'skills:health') {
+    return buildSkillHealthReport(observations, {
+      skillId: options.skillId || null
+    });
+  }
+
+  if (target === 'skills:amendify') {
+    if (!options.skillId) {
+      throw new Error('skills:amendify requires --skill <id>');
+    }
+
+    return proposeSkillAmendment(options.skillId, observations);
+  }
+
+  if (target === 'skills:evaluate') {
+    if (!options.skillId) {
+      throw new Error('skills:evaluate requires --skill <id>');
+    }
+
+    return buildSkillEvaluationScaffold(options.skillId, observations, {
+      amendmentId: options.amendmentId || null
+    });
+  }
+
+  return null;
 }

 function main() {
-  const { target, adapterId, targetType, writePath, listAdapters } = parseArgs(process.argv);
+  const { target, adapterId, targetType, writePath, listAdapters, skillId, amendmentId, observationsPath } = parseArgs(process.argv);

  if (listAdapters) {
    const registry = createAdapterRegistry();
@@ -59,12 +111,20 @@ function main() {
    process.exit(1);
  }

-  const inspectTarget = targetType ? { type: targetType, value: target } : target;
-  const snapshot = inspectSessionTarget(inspectTarget, {
+  const skillLoopPayload = inspectSkillLoopTarget(target, {
+    cwd: process.cwd(),
+    skillId,
+    amendmentId,
+    observationsPath
+  });
+  const payloadObject = skillLoopPayload || inspectSessionTarget(
+    targetType ? { type: targetType, value: target } : target,
+    {
      cwd: process.cwd(),
      adapterId
-  });
-  const payload = JSON.stringify(snapshot, null, 2);
+    }
+  );
+  const payload = JSON.stringify(payloadObject, null, 2);

  if (writePath) {
    const absoluteWritePath = path.resolve(writePath);
--- a/tests/lib/skill-improvement.test.js
+++ b/tests/lib/skill-improvement.test.js
@@ -0,0 +1,186 @@
+'use strict';
+
+const assert = require('assert');
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+
+const {
+  appendSkillObservation,
+  createSkillObservation,
+  getSkillObservationsPath,
+  readSkillObservations
+} = require('../../scripts/lib/skill-improvement/observations');
+const { buildSkillHealthReport } = require('../../scripts/lib/skill-improvement/health');
+const { proposeSkillAmendment } = require('../../scripts/lib/skill-improvement/amendify');
+const { buildSkillEvaluationScaffold } = require('../../scripts/lib/skill-improvement/evaluate');
+
+console.log('=== Testing skill-improvement ===\n');
+
+let passed = 0;
+let failed = 0;
+
+function test(name, fn) {
+  try {
+    fn();
+    console.log(`  ✓ ${name}`);
+    passed += 1;
+  } catch (error) {
+    console.log(`  ✗ ${name}: ${error.message}`);
+    failed += 1;
+  }
+}
+
+function makeProjectRoot(prefix) {
+  return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
+}
+
+function cleanup(dirPath) {
+  fs.rmSync(dirPath, { recursive: true, force: true });
+}
+
+test('observation layer writes and reads structured skill outcomes', () => {
+  const projectRoot = makeProjectRoot('ecc-skill-observe-');
+
+  try {
+    const observation = createSkillObservation({
+      task: 'Fix flaky Playwright test',
+      skill: {
+        id: 'e2e-testing',
+        path: 'skills/e2e-testing/SKILL.md'
+      },
+      success: false,
+      error: 'playwright timeout',
+      feedback: 'Timed out waiting for locator',
+      sessionId: 'sess-1234'
+    });
+
+    appendSkillObservation(observation, { projectRoot });
+    const records = readSkillObservations({ projectRoot });
+
+    assert.strictEqual(records.length, 1);
+    assert.strictEqual(records[0].schemaVersion, 'ecc.skill-observation.v1');
+    assert.strictEqual(records[0].task, 'Fix flaky Playwright test');
+    assert.strictEqual(records[0].skill.id, 'e2e-testing');
+    assert.strictEqual(records[0].outcome.success, false);
+    assert.strictEqual(records[0].outcome.error, 'playwright timeout');
+    assert.strictEqual(getSkillObservationsPath({ projectRoot }), path.join(projectRoot, '.claude', 'ecc', 'skills', 'observations.jsonl'));
+  } finally {
+    cleanup(projectRoot);
+  }
+});
+
+test('health inspector traces recurring failures for a skill across runs', () => {
+  const projectRoot = makeProjectRoot('ecc-skill-health-');
+
+  try {
+    [
+      createSkillObservation({
+        task: 'Ship Next.js auth middleware',
+        skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' },
+        success: false,
+        error: 'missing csrf guidance',
+        feedback: 'Did not mention CSRF'
+      }),
+      createSkillObservation({
+        task: 'Harden Next.js auth middleware',
+        skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' },
+        success: false,
+        error: 'missing csrf guidance',
+        feedback: 'Repeated omission'
+      }),
+      createSkillObservation({
+        task: 'Review payment webhook security',
+        skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' },
+        success: true
+      })
+    ].forEach(record => appendSkillObservation(record, { projectRoot }));
+
+    const report = buildSkillHealthReport(readSkillObservations({ projectRoot }), {
+      minFailureCount: 2
+    });
+    const skill = report.skills.find(entry => entry.skill.id === 'security-review');
+
+    assert.ok(skill, 'security-review should appear in the report');
+    assert.strictEqual(skill.totalRuns, 3);
+    assert.strictEqual(skill.failures, 2);
+    assert.strictEqual(skill.status, 'failing');
+    assert.strictEqual(skill.recurringErrors[0].error, 'missing csrf guidance');
+    assert.strictEqual(skill.recurringErrors[0].count, 2);
+  } finally {
+    cleanup(projectRoot);
+  }
+});
+
+test('amendify proposes SKILL.md patch content from failure evidence', () => {
+  const records = [
+    createSkillObservation({
+      task: 'Add API rate limiting',
+      skill: { id: 'api-design', path: 'skills/api-design/SKILL.md' },
+      success: false,
+      error: 'missing rate limiting guidance',
+      feedback: 'No rate-limit section'
+    }),
+    createSkillObservation({
+      task: 'Design public API error envelopes',
+      skill: { id: 'api-design', path: 'skills/api-design/SKILL.md' },
+      success: false,
+      error: 'missing error response examples',
+      feedback: 'Need explicit examples'
+    })
+  ];
+
+  const proposal = proposeSkillAmendment('api-design', records);
+
+  assert.strictEqual(proposal.schemaVersion, 'ecc.skill-amendment-proposal.v1');
+  assert.strictEqual(proposal.skill.id, 'api-design');
+  assert.strictEqual(proposal.status, 'proposed');
+  assert.ok(proposal.patch.preview.includes('## Failure-Driven Amendments'));
+  assert.ok(proposal.patch.preview.includes('rate limiting'));
+  assert.ok(proposal.patch.preview.includes('error response'));
+});
+
+test('evaluation scaffold compares amended and baseline performance', () => {
+  const records = [
+    createSkillObservation({
+      task: 'Fix flaky login test',
+      skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
+      success: false,
+      variant: 'baseline'
+    }),
+    createSkillObservation({
+      task: 'Fix flaky checkout test',
+      skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
+      success: true,
+      variant: 'baseline'
+    }),
+    createSkillObservation({
+      task: 'Fix flaky login test',
+      skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
+      success: true,
+      variant: 'amended',
+      amendmentId: 'amend-1'
+    }),
+    createSkillObservation({
+      task: 'Fix flaky checkout test',
+      skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
+      success: true,
+      variant: 'amended',
+      amendmentId: 'amend-1'
+    })
+  ];
+
+  const evaluation = buildSkillEvaluationScaffold('e2e-testing', records, {
+    amendmentId: 'amend-1',
+    minimumRunsPerVariant: 2
+  });
+
+  assert.strictEqual(evaluation.schemaVersion, 'ecc.skill-evaluation.v1');
+  assert.strictEqual(evaluation.baseline.runs, 2);
+  assert.strictEqual(evaluation.amended.runs, 2);
+  assert.strictEqual(evaluation.delta.successRate, 0.5);
+  assert.strictEqual(evaluation.recommendation, 'promote-amendment');
+});
+
+console.log(`\n=== Results: ${passed} passed, ${failed} failed ===`);
+if (failed > 0) process.exit(1);
--- a/tests/scripts/session-inspect.test.js
+++ b/tests/scripts/session-inspect.test.js
@@ -143,6 +143,133 @@ function runTests() {
    }
  })) passed++; else failed++;

+  if (test('inspects skill health from recorded observations', () => {
+    const projectRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-session-inspect-skills-'));
+    const observationsDir = path.join(projectRoot, '.claude', 'ecc', 'skills');
+    fs.mkdirSync(observationsDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(observationsDir, 'observations.jsonl'),
+      [
+        JSON.stringify({
+          schemaVersion: 'ecc.skill-observation.v1',
+          observationId: 'obs-1',
+          timestamp: '2026-03-14T12:00:00.000Z',
+          task: 'Review auth middleware',
+          skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' },
+          outcome: { success: false, status: 'failure', error: 'missing csrf guidance', feedback: 'Need CSRF coverage' },
+          run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-1' }
+        }),
+        JSON.stringify({
+          schemaVersion: 'ecc.skill-observation.v1',
+          observationId: 'obs-2',
+          timestamp: '2026-03-14T12:05:00.000Z',
+          task: 'Review auth middleware',
+          skill: { id: 'security-review', path: 'skills/security-review/SKILL.md' },
+          outcome: { success: false, status: 'failure', error: 'missing csrf guidance', feedback: null },
+          run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-2' }
+        })
+      ].join('\n') + '\n'
+    );
+
+    try {
+      const result = run(['skills:health'], { cwd: projectRoot });
+      assert.strictEqual(result.code, 0, result.stderr);
+      const payload = JSON.parse(result.stdout);
+      assert.strictEqual(payload.schemaVersion, 'ecc.skill-health.v1');
+      assert.ok(payload.skills.some(skill => skill.skill.id === 'security-review'));
+    } finally {
+      fs.rmSync(projectRoot, { recursive: true, force: true });
+    }
+  })) passed++; else failed++;
+
+  if (test('proposes skill amendments through session-inspect', () => {
+    const projectRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-session-inspect-amend-'));
+    const observationsDir = path.join(projectRoot, '.claude', 'ecc', 'skills');
+    fs.mkdirSync(observationsDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(observationsDir, 'observations.jsonl'),
+      [
+        JSON.stringify({
+          schemaVersion: 'ecc.skill-observation.v1',
+          observationId: 'obs-1',
+          timestamp: '2026-03-14T12:00:00.000Z',
+          task: 'Add rate limiting',
+          skill: { id: 'api-design', path: 'skills/api-design/SKILL.md' },
+          outcome: { success: false, status: 'failure', error: 'missing rate limiting guidance', feedback: 'Need rate limiting examples' },
+          run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-1' }
+        })
+      ].join('\n') + '\n'
+    );
+
+    try {
+      const result = run(['skills:amendify', '--skill', 'api-design'], { cwd: projectRoot });
+      assert.strictEqual(result.code, 0, result.stderr);
+      const payload = JSON.parse(result.stdout);
+      assert.strictEqual(payload.schemaVersion, 'ecc.skill-amendment-proposal.v1');
+      assert.strictEqual(payload.skill.id, 'api-design');
+      assert.ok(payload.patch.preview.includes('Failure-Driven Amendments'));
+    } finally {
+      fs.rmSync(projectRoot, { recursive: true, force: true });
+    }
+  })) passed++; else failed++;
+
+  if (test('builds skill evaluation scaffolding through session-inspect', () => {
+    const projectRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-session-inspect-eval-'));
+    const observationsDir = path.join(projectRoot, '.claude', 'ecc', 'skills');
+    fs.mkdirSync(observationsDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(observationsDir, 'observations.jsonl'),
+      [
+        JSON.stringify({
+          schemaVersion: 'ecc.skill-observation.v1',
+          observationId: 'obs-1',
+          timestamp: '2026-03-14T12:00:00.000Z',
+          task: 'Fix flaky login test',
+          skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
+          outcome: { success: false, status: 'failure', error: null, feedback: null },
+          run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-1' }
+        }),
+        JSON.stringify({
+          schemaVersion: 'ecc.skill-observation.v1',
+          observationId: 'obs-2',
+          timestamp: '2026-03-14T12:10:00.000Z',
+          task: 'Fix flaky checkout test',
+          skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
+          outcome: { success: true, status: 'success', error: null, feedback: null },
+          run: { variant: 'baseline', amendmentId: null, sessionId: 'sess-2' }
+        }),
+        JSON.stringify({
+          schemaVersion: 'ecc.skill-observation.v1',
+          observationId: 'obs-3',
+          timestamp: '2026-03-14T12:20:00.000Z',
+          task: 'Fix flaky login test',
+          skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
+          outcome: { success: true, status: 'success', error: null, feedback: null },
+          run: { variant: 'amended', amendmentId: 'amend-1', sessionId: 'sess-3' }
+        }),
+        JSON.stringify({
+          schemaVersion: 'ecc.skill-observation.v1',
+          observationId: 'obs-4',
+          timestamp: '2026-03-14T12:30:00.000Z',
+          task: 'Fix flaky checkout test',
+          skill: { id: 'e2e-testing', path: 'skills/e2e-testing/SKILL.md' },
+          outcome: { success: true, status: 'success', error: null, feedback: null },
+          run: { variant: 'amended', amendmentId: 'amend-1', sessionId: 'sess-4' }
+        })
+      ].join('\n') + '\n'
+    );
+
+    try {
+      const result = run(['skills:evaluate', '--skill', 'e2e-testing', '--amendment-id', 'amend-1'], { cwd: projectRoot });
+      assert.strictEqual(result.code, 0, result.stderr);
+      const payload = JSON.parse(result.stdout);
+      assert.strictEqual(payload.schemaVersion, 'ecc.skill-evaluation.v1');
+      assert.strictEqual(payload.recommendation, 'promote-amendment');
+    } finally {
+      fs.rmSync(projectRoot, { recursive: true, force: true });
+    }
+  })) passed++; else failed++;
+
  console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`);
  process.exit(failed > 0 ? 1 : 0);
 }