From d5371d28aa9965f23ed82b08a706089bf27e5f14 Mon Sep 17 00:00:00 2001 From: Affaan Mustafa Date: Sun, 15 Mar 2026 21:47:39 -0700 Subject: [PATCH] feat: add skill evolution foundation (#514) --- scripts/lib/skill-evolution/health.js | 260 +++++++++++ scripts/lib/skill-evolution/index.js | 17 + scripts/lib/skill-evolution/provenance.js | 187 ++++++++ scripts/lib/skill-evolution/tracker.js | 146 ++++++ scripts/lib/skill-evolution/versioning.js | 237 ++++++++++ scripts/skills-health.js | 113 +++++ tests/lib/skill-evolution.test.js | 536 ++++++++++++++++++++++ 7 files changed, 1496 insertions(+) create mode 100644 scripts/lib/skill-evolution/health.js create mode 100644 scripts/lib/skill-evolution/index.js create mode 100644 scripts/lib/skill-evolution/provenance.js create mode 100644 scripts/lib/skill-evolution/tracker.js create mode 100644 scripts/lib/skill-evolution/versioning.js create mode 100644 scripts/skills-health.js create mode 100644 tests/lib/skill-evolution.test.js diff --git a/scripts/lib/skill-evolution/health.js b/scripts/lib/skill-evolution/health.js new file mode 100644 index 00000000..7c14648f --- /dev/null +++ b/scripts/lib/skill-evolution/health.js @@ -0,0 +1,260 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +const provenance = require('./provenance'); +const tracker = require('./tracker'); +const versioning = require('./versioning'); + +const DAY_IN_MS = 24 * 60 * 60 * 1000; +const PENDING_AMENDMENT_STATUSES = new Set(['pending', 'proposed', 'queued', 'open']); + +function roundRate(value) { + if (value === null) { + return null; + } + + return Math.round(value * 10000) / 10000; +} + +function formatRate(value) { + if (value === null) { + return 'n/a'; + } + + return `${Math.round(value * 100)}%`; +} + +function summarizeHealthReport(report) { + const totalSkills = report.skills.length; + const decliningSkills = report.skills.filter(skill => skill.declining).length; + const healthySkills = totalSkills - decliningSkills; + + return { + total_skills: totalSkills, + healthy_skills: healthySkills, + declining_skills: decliningSkills, + }; +} + +function listSkillsInRoot(rootPath) { + if (!rootPath || !fs.existsSync(rootPath)) { + return []; + } + + return fs.readdirSync(rootPath, { withFileTypes: true }) + .filter(entry => entry.isDirectory()) + .map(entry => ({ + skill_id: entry.name, + skill_dir: path.join(rootPath, entry.name), + })) + .filter(entry => fs.existsSync(path.join(entry.skill_dir, 'SKILL.md'))); +} + +function discoverSkills(options = {}) { + const roots = provenance.getSkillRoots(options); + const discoveredSkills = [ + ...listSkillsInRoot(options.skillsRoot || roots.curated).map(skill => ({ + ...skill, + skill_type: provenance.SKILL_TYPES.CURATED, + })), + ...listSkillsInRoot(options.learnedRoot || roots.learned).map(skill => ({ + ...skill, + skill_type: provenance.SKILL_TYPES.LEARNED, + })), + ...listSkillsInRoot(options.importedRoot || roots.imported).map(skill => ({ + ...skill, + skill_type: provenance.SKILL_TYPES.IMPORTED, + })), + ]; + + return discoveredSkills.reduce((skillsById, skill) => { + if (!skillsById.has(skill.skill_id)) { + skillsById.set(skill.skill_id, skill); + } + return skillsById; + }, new Map()); +} + +function calculateSuccessRate(records) { + if (records.length === 0) { + return null; + } + + const successfulRecords = records.filter(record => record.outcome === 'success').length; + return roundRate(successfulRecords / records.length); +} + +function filterRecordsWithinDays(records, nowMs, days) { + const cutoff = nowMs - (days * DAY_IN_MS); + return records.filter(record => { + const recordedAtMs = Date.parse(record.recorded_at); + return !Number.isNaN(recordedAtMs) && recordedAtMs >= cutoff && recordedAtMs <= nowMs; + }); +} + +function getFailureTrend(successRate7d, successRate30d, warnThreshold) { + if (successRate7d === null || successRate30d === null) { + return 'stable'; + } + + const delta = roundRate(successRate7d - successRate30d); + if (delta <= (-1 * warnThreshold)) { + return 'worsening'; + } + + if (delta >= warnThreshold) { + return 'improving'; + } + + return 'stable'; +} + +function countPendingAmendments(skillDir) { + if (!skillDir) { + return 0; + } + + return versioning.getEvolutionLog(skillDir, 'amendments') + .filter(entry => { + if (typeof entry.status === 'string') { + return PENDING_AMENDMENT_STATUSES.has(entry.status); + } + + return entry.event === 'proposal'; + }) + .length; +} + +function getLastRun(records) { + if (records.length === 0) { + return null; + } + + return records + .map(record => ({ + timestamp: record.recorded_at, + timeMs: Date.parse(record.recorded_at), + })) + .filter(entry => !Number.isNaN(entry.timeMs)) + .sort((left, right) => left.timeMs - right.timeMs) + .at(-1)?.timestamp || null; +} + +function collectSkillHealth(options = {}) { + const now = options.now || new Date().toISOString(); + const nowMs = Date.parse(now); + if (Number.isNaN(nowMs)) { + throw new Error(`Invalid now timestamp: ${now}`); + } + + const warnThreshold = typeof options.warnThreshold === 'number' + ? options.warnThreshold + : Number(options.warnThreshold || 0.1); + if (!Number.isFinite(warnThreshold) || warnThreshold < 0) { + throw new Error(`Invalid warn threshold: ${options.warnThreshold}`); + } + + const records = tracker.readSkillExecutionRecords(options); + const skillsById = discoverSkills(options); + const recordsBySkill = records.reduce((groupedRecords, record) => { + if (!groupedRecords.has(record.skill_id)) { + groupedRecords.set(record.skill_id, []); + } + + groupedRecords.get(record.skill_id).push(record); + return groupedRecords; + }, new Map()); + + for (const skillId of recordsBySkill.keys()) { + if (!skillsById.has(skillId)) { + skillsById.set(skillId, { + skill_id: skillId, + skill_dir: null, + skill_type: provenance.SKILL_TYPES.UNKNOWN, + }); + } + } + + const skills = Array.from(skillsById.values()) + .sort((left, right) => left.skill_id.localeCompare(right.skill_id)) + .map(skill => { + const skillRecords = recordsBySkill.get(skill.skill_id) || []; + const records7d = filterRecordsWithinDays(skillRecords, nowMs, 7); + const records30d = filterRecordsWithinDays(skillRecords, nowMs, 30); + const successRate7d = calculateSuccessRate(records7d); + const successRate30d = calculateSuccessRate(records30d); + const currentVersionNumber = skill.skill_dir ? versioning.getCurrentVersion(skill.skill_dir) : 0; + const failureTrend = getFailureTrend(successRate7d, successRate30d, warnThreshold); + + return { + skill_id: skill.skill_id, + skill_type: skill.skill_type, + current_version: currentVersionNumber > 0 ? `v${currentVersionNumber}` : null, + pending_amendments: countPendingAmendments(skill.skill_dir), + success_rate_7d: successRate7d, + success_rate_30d: successRate30d, + failure_trend: failureTrend, + declining: failureTrend === 'worsening', + last_run: getLastRun(skillRecords), + run_count_7d: records7d.length, + run_count_30d: records30d.length, + }; + }); + + return { + generated_at: now, + warn_threshold: warnThreshold, + skills, + }; +} + +function formatHealthReport(report, options = {}) { + if (options.json) { + return `${JSON.stringify(report, null, 2)}\n`; + } + + const summary = summarizeHealthReport(report); + + if (!report.skills.length) { + return [ + 'ECC skill health', + `Generated: ${report.generated_at}`, + '', + 'No skill execution records found.', + '', + ].join('\n'); + } + + const lines = [ + 'ECC skill health', + `Generated: ${report.generated_at}`, + `Skills: ${summary.total_skills} total, ${summary.healthy_skills} healthy, ${summary.declining_skills} declining`, + '', + 'skill version 7d 30d trend pending last run', + '--------------------------------------------------------------------------', + ]; + + for (const skill of report.skills) { + const statusLabel = skill.declining ? '!' : ' '; + lines.push([ + `${statusLabel}${skill.skill_id}`.padEnd(16), + String(skill.current_version || '-').padEnd(9), + formatRate(skill.success_rate_7d).padEnd(6), + formatRate(skill.success_rate_30d).padEnd(6), + skill.failure_trend.padEnd(11), + String(skill.pending_amendments).padEnd(9), + skill.last_run || '-', + ].join(' ')); + } + + return `${lines.join('\n')}\n`; +} + +module.exports = { + collectSkillHealth, + discoverSkills, + formatHealthReport, + summarizeHealthReport, +}; diff --git a/scripts/lib/skill-evolution/index.js b/scripts/lib/skill-evolution/index.js new file mode 100644 index 00000000..0132083f --- /dev/null +++ b/scripts/lib/skill-evolution/index.js @@ -0,0 +1,17 @@ +'use strict'; + +const provenance = require('./provenance'); +const versioning = require('./versioning'); +const tracker = require('./tracker'); +const health = require('./health'); + +module.exports = { + ...provenance, + ...versioning, + ...tracker, + ...health, + provenance, + versioning, + tracker, + health, +}; diff --git a/scripts/lib/skill-evolution/provenance.js b/scripts/lib/skill-evolution/provenance.js new file mode 100644 index 00000000..1062514a --- /dev/null +++ b/scripts/lib/skill-evolution/provenance.js @@ -0,0 +1,187 @@ +'use strict'; + +const fs = require('fs'); +const os = require('os'); +const path = require('path'); + +const { ensureDir } = require('../utils'); + +const PROVENANCE_FILE_NAME = '.provenance.json'; +const SKILL_TYPES = Object.freeze({ + CURATED: 'curated', + LEARNED: 'learned', + IMPORTED: 'imported', + UNKNOWN: 'unknown', +}); + +function resolveRepoRoot(repoRoot) { + if (repoRoot) { + return path.resolve(repoRoot); + } + + return path.resolve(__dirname, '..', '..', '..'); +} + +function resolveHomeDir(homeDir) { + return homeDir ? path.resolve(homeDir) : os.homedir(); +} + +function normalizeSkillDir(skillPath) { + if (!skillPath || typeof skillPath !== 'string') { + throw new Error('skillPath is required'); + } + + const resolvedPath = path.resolve(skillPath); + if (path.basename(resolvedPath) === 'SKILL.md') { + return path.dirname(resolvedPath); + } + + return resolvedPath; +} + +function isWithinRoot(targetPath, rootPath) { + const relativePath = path.relative(rootPath, targetPath); + return relativePath === '' || ( + !relativePath.startsWith('..') + && !path.isAbsolute(relativePath) + ); +} + +function getSkillRoots(options = {}) { + const repoRoot = resolveRepoRoot(options.repoRoot); + const homeDir = resolveHomeDir(options.homeDir); + + return { + curated: path.join(repoRoot, 'skills'), + learned: path.join(homeDir, '.claude', 'skills', 'learned'), + imported: path.join(homeDir, '.claude', 'skills', 'imported'), + }; +} + +function classifySkillPath(skillPath, options = {}) { + const skillDir = normalizeSkillDir(skillPath); + const roots = getSkillRoots(options); + + if (isWithinRoot(skillDir, roots.curated)) { + return SKILL_TYPES.CURATED; + } + + if (isWithinRoot(skillDir, roots.learned)) { + return SKILL_TYPES.LEARNED; + } + + if (isWithinRoot(skillDir, roots.imported)) { + return SKILL_TYPES.IMPORTED; + } + + return SKILL_TYPES.UNKNOWN; +} + +function requiresProvenance(skillPath, options = {}) { + const skillType = classifySkillPath(skillPath, options); + return skillType === SKILL_TYPES.LEARNED || skillType === SKILL_TYPES.IMPORTED; +} + +function getProvenancePath(skillPath) { + return path.join(normalizeSkillDir(skillPath), PROVENANCE_FILE_NAME); +} + +function isIsoTimestamp(value) { + if (typeof value !== 'string' || value.trim().length === 0) { + return false; + } + + const timestamp = Date.parse(value); + return !Number.isNaN(timestamp); +} + +function validateProvenance(record) { + const errors = []; + + if (!record || typeof record !== 'object' || Array.isArray(record)) { + errors.push('provenance record must be an object'); + return { + valid: false, + errors, + }; + } + + if (typeof record.source !== 'string' || record.source.trim().length === 0) { + errors.push('source is required'); + } + + if (!isIsoTimestamp(record.created_at)) { + errors.push('created_at must be an ISO timestamp'); + } + + if (typeof record.confidence !== 'number' || Number.isNaN(record.confidence)) { + errors.push('confidence must be a number'); + } else if (record.confidence < 0 || record.confidence > 1) { + errors.push('confidence must be between 0 and 1'); + } + + if (typeof record.author !== 'string' || record.author.trim().length === 0) { + errors.push('author is required'); + } + + return { + valid: errors.length === 0, + errors, + }; +} + +function assertValidProvenance(record) { + const validation = validateProvenance(record); + if (!validation.valid) { + throw new Error(`Invalid provenance metadata: ${validation.errors.join('; ')}`); + } +} + +function readProvenance(skillPath, options = {}) { + const skillDir = normalizeSkillDir(skillPath); + const provenancePath = getProvenancePath(skillDir); + const provenanceRequired = options.required === true || requiresProvenance(skillDir, options); + + if (!fs.existsSync(provenancePath)) { + if (provenanceRequired) { + throw new Error(`Missing provenance metadata for ${skillDir}`); + } + + return null; + } + + const record = JSON.parse(fs.readFileSync(provenancePath, 'utf8')); + assertValidProvenance(record); + return record; +} + +function writeProvenance(skillPath, record, options = {}) { + const skillDir = normalizeSkillDir(skillPath); + + if (!requiresProvenance(skillDir, options)) { + throw new Error(`Provenance metadata is only required for learned or imported skills: ${skillDir}`); + } + + assertValidProvenance(record); + + const provenancePath = getProvenancePath(skillDir); + ensureDir(skillDir); + fs.writeFileSync(provenancePath, `${JSON.stringify(record, null, 2)}\n`, 'utf8'); + + return { + path: provenancePath, + record: { ...record }, + }; +} + +module.exports = { + PROVENANCE_FILE_NAME, + SKILL_TYPES, + classifySkillPath, + getProvenancePath, + getSkillRoots, + readProvenance, + requiresProvenance, + validateProvenance, + writeProvenance, +}; diff --git a/scripts/lib/skill-evolution/tracker.js b/scripts/lib/skill-evolution/tracker.js new file mode 100644 index 00000000..67220eb9 --- /dev/null +++ b/scripts/lib/skill-evolution/tracker.js @@ -0,0 +1,146 @@ +'use strict'; + +const fs = require('fs'); +const os = require('os'); +const path = require('path'); + +const { appendFile } = require('../utils'); + +const VALID_OUTCOMES = new Set(['success', 'failure', 'partial']); +const VALID_FEEDBACK = new Set(['accepted', 'corrected', 'rejected']); + +function resolveHomeDir(homeDir) { + return homeDir ? path.resolve(homeDir) : os.homedir(); +} + +function getRunsFilePath(options = {}) { + if (options.runsFilePath) { + return path.resolve(options.runsFilePath); + } + + return path.join(resolveHomeDir(options.homeDir), '.claude', 'state', 'skill-runs.jsonl'); +} + +function toNullableNumber(value, fieldName) { + if (value === null || typeof value === 'undefined') { + return null; + } + + const numericValue = Number(value); + if (!Number.isFinite(numericValue)) { + throw new Error(`${fieldName} must be a number`); + } + + return numericValue; +} + +function normalizeExecutionRecord(input, options = {}) { + if (!input || typeof input !== 'object' || Array.isArray(input)) { + throw new Error('skill execution payload must be an object'); + } + + const skillId = input.skill_id || input.skillId; + const skillVersion = input.skill_version || input.skillVersion; + const taskDescription = input.task_description || input.task_attempted || input.taskAttempted; + const outcome = input.outcome; + const recordedAt = input.recorded_at || options.now || new Date().toISOString(); + const userFeedback = input.user_feedback || input.userFeedback || null; + + if (typeof skillId !== 'string' || skillId.trim().length === 0) { + throw new Error('skill_id is required'); + } + + if (typeof skillVersion !== 'string' || skillVersion.trim().length === 0) { + throw new Error('skill_version is required'); + } + + if (typeof taskDescription !== 'string' || taskDescription.trim().length === 0) { + throw new Error('task_description is required'); + } + + if (!VALID_OUTCOMES.has(outcome)) { + throw new Error('outcome must be one of success, failure, or partial'); + } + + if (userFeedback !== null && !VALID_FEEDBACK.has(userFeedback)) { + throw new Error('user_feedback must be accepted, corrected, rejected, or null'); + } + + if (Number.isNaN(Date.parse(recordedAt))) { + throw new Error('recorded_at must be an ISO timestamp'); + } + + return { + skill_id: skillId, + skill_version: skillVersion, + task_description: taskDescription, + outcome, + failure_reason: input.failure_reason || input.failureReason || null, + tokens_used: toNullableNumber(input.tokens_used ?? input.tokensUsed, 'tokens_used'), + duration_ms: toNullableNumber(input.duration_ms ?? input.durationMs, 'duration_ms'), + user_feedback: userFeedback, + recorded_at: recordedAt, + }; +} + +function readJsonl(filePath) { + if (!fs.existsSync(filePath)) { + return []; + } + + return fs.readFileSync(filePath, 'utf8') + .split('\n') + .map(line => line.trim()) + .filter(Boolean) + .reduce((rows, line) => { + try { + rows.push(JSON.parse(line)); + } catch { + // Ignore malformed rows so analytics remain best-effort. + } + return rows; + }, []); +} + +function recordSkillExecution(input, options = {}) { + const record = normalizeExecutionRecord(input, options); + + if (options.stateStore && typeof options.stateStore.recordSkillExecution === 'function') { + try { + const result = options.stateStore.recordSkillExecution(record); + return { + storage: 'state-store', + record, + result, + }; + } catch { + // Fall back to JSONL until the formal state-store exists on this branch. + } + } + + const runsFilePath = getRunsFilePath(options); + appendFile(runsFilePath, `${JSON.stringify(record)}\n`); + + return { + storage: 'jsonl', + path: runsFilePath, + record, + }; +} + +function readSkillExecutionRecords(options = {}) { + if (options.stateStore && typeof options.stateStore.listSkillExecutionRecords === 'function') { + return options.stateStore.listSkillExecutionRecords(); + } + + return readJsonl(getRunsFilePath(options)); +} + +module.exports = { + VALID_FEEDBACK, + VALID_OUTCOMES, + getRunsFilePath, + normalizeExecutionRecord, + readSkillExecutionRecords, + recordSkillExecution, +}; diff --git a/scripts/lib/skill-evolution/versioning.js b/scripts/lib/skill-evolution/versioning.js new file mode 100644 index 00000000..6aa7a932 --- /dev/null +++ b/scripts/lib/skill-evolution/versioning.js @@ -0,0 +1,237 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +const { appendFile, ensureDir } = require('../utils'); + +const VERSION_DIRECTORY_NAME = '.versions'; +const EVOLUTION_DIRECTORY_NAME = '.evolution'; +const EVOLUTION_LOG_TYPES = Object.freeze([ + 'observations', + 'inspections', + 'amendments', +]); + +function normalizeSkillDir(skillPath) { + if (!skillPath || typeof skillPath !== 'string') { + throw new Error('skillPath is required'); + } + + const resolvedPath = path.resolve(skillPath); + if (path.basename(resolvedPath) === 'SKILL.md') { + return path.dirname(resolvedPath); + } + + return resolvedPath; +} + +function getSkillFilePath(skillPath) { + return path.join(normalizeSkillDir(skillPath), 'SKILL.md'); +} + +function ensureSkillExists(skillPath) { + const skillFilePath = getSkillFilePath(skillPath); + if (!fs.existsSync(skillFilePath)) { + throw new Error(`Skill file not found: ${skillFilePath}`); + } + + return skillFilePath; +} + +function getVersionsDir(skillPath) { + return path.join(normalizeSkillDir(skillPath), VERSION_DIRECTORY_NAME); +} + +function getEvolutionDir(skillPath) { + return path.join(normalizeSkillDir(skillPath), EVOLUTION_DIRECTORY_NAME); +} + +function getEvolutionLogPath(skillPath, logType) { + if (!EVOLUTION_LOG_TYPES.includes(logType)) { + throw new Error(`Unknown evolution log type: ${logType}`); + } + + return path.join(getEvolutionDir(skillPath), `${logType}.jsonl`); +} + +function ensureSkillVersioning(skillPath) { + ensureSkillExists(skillPath); + + const versionsDir = getVersionsDir(skillPath); + const evolutionDir = getEvolutionDir(skillPath); + + ensureDir(versionsDir); + ensureDir(evolutionDir); + + for (const logType of EVOLUTION_LOG_TYPES) { + const logPath = getEvolutionLogPath(skillPath, logType); + if (!fs.existsSync(logPath)) { + fs.writeFileSync(logPath, '', 'utf8'); + } + } + + return { + versionsDir, + evolutionDir, + }; +} + +function parseVersionNumber(fileName) { + const match = /^v(\d+)\.md$/.exec(fileName); + if (!match) { + return null; + } + + return Number(match[1]); +} + +function listVersions(skillPath) { + const versionsDir = getVersionsDir(skillPath); + if (!fs.existsSync(versionsDir)) { + return []; + } + + return fs.readdirSync(versionsDir) + .map(fileName => { + const version = parseVersionNumber(fileName); + if (version === null) { + return null; + } + + const filePath = path.join(versionsDir, fileName); + const stats = fs.statSync(filePath); + + return { + version, + path: filePath, + created_at: stats.mtime.toISOString(), + }; + }) + .filter(Boolean) + .sort((left, right) => left.version - right.version); +} + +function getCurrentVersion(skillPath) { + const skillFilePath = getSkillFilePath(skillPath); + if (!fs.existsSync(skillFilePath)) { + return 0; + } + + const versions = listVersions(skillPath); + if (versions.length === 0) { + return 1; + } + + return versions[versions.length - 1].version; +} + +function appendEvolutionRecord(skillPath, logType, record) { + ensureSkillVersioning(skillPath); + appendFile(getEvolutionLogPath(skillPath, logType), `${JSON.stringify(record)}\n`); + return { ...record }; +} + +function readJsonl(filePath) { + if (!fs.existsSync(filePath)) { + return []; + } + + return fs.readFileSync(filePath, 'utf8') + .split('\n') + .map(line => line.trim()) + .filter(Boolean) + .reduce((rows, line) => { + try { + rows.push(JSON.parse(line)); + } catch { + // Ignore malformed rows so the log remains append-only and resilient. + } + return rows; + }, []); +} + +function getEvolutionLog(skillPath, logType) { + return readJsonl(getEvolutionLogPath(skillPath, logType)); +} + +function createVersion(skillPath, options = {}) { + const skillFilePath = ensureSkillExists(skillPath); + ensureSkillVersioning(skillPath); + + const versions = listVersions(skillPath); + const nextVersion = versions.length === 0 ? 1 : versions[versions.length - 1].version + 1; + const snapshotPath = path.join(getVersionsDir(skillPath), `v${nextVersion}.md`); + const skillContent = fs.readFileSync(skillFilePath, 'utf8'); + const createdAt = options.timestamp || new Date().toISOString(); + + fs.writeFileSync(snapshotPath, skillContent, 'utf8'); + appendEvolutionRecord(skillPath, 'amendments', { + event: 'snapshot', + version: nextVersion, + reason: options.reason || null, + author: options.author || null, + status: 'applied', + created_at: createdAt, + }); + + return { + version: nextVersion, + path: snapshotPath, + created_at: createdAt, + }; +} + +function rollbackTo(skillPath, targetVersion, options = {}) { + const normalizedTargetVersion = Number(targetVersion); + if (!Number.isInteger(normalizedTargetVersion) || normalizedTargetVersion <= 0) { + throw new Error(`Invalid target version: ${targetVersion}`); + } + + ensureSkillExists(skillPath); + ensureSkillVersioning(skillPath); + + const targetPath = path.join(getVersionsDir(skillPath), `v${normalizedTargetVersion}.md`); + if (!fs.existsSync(targetPath)) { + throw new Error(`Version not found: v${normalizedTargetVersion}`); + } + + const currentVersion = getCurrentVersion(skillPath); + const targetContent = fs.readFileSync(targetPath, 'utf8'); + fs.writeFileSync(getSkillFilePath(skillPath), targetContent, 'utf8'); + + const createdVersion = createVersion(skillPath, { + timestamp: options.timestamp, + reason: options.reason || `rollback to v${normalizedTargetVersion}`, + author: options.author || null, + }); + + appendEvolutionRecord(skillPath, 'amendments', { + event: 'rollback', + version: createdVersion.version, + source_version: currentVersion, + target_version: normalizedTargetVersion, + reason: options.reason || null, + author: options.author || null, + status: 'applied', + created_at: options.timestamp || new Date().toISOString(), + }); + + return createdVersion; +} + +module.exports = { + EVOLUTION_DIRECTORY_NAME, + EVOLUTION_LOG_TYPES, + VERSION_DIRECTORY_NAME, + appendEvolutionRecord, + createVersion, + ensureSkillVersioning, + getCurrentVersion, + getEvolutionDir, + getEvolutionLog, + getEvolutionLogPath, + getVersionsDir, + listVersions, + rollbackTo, +}; diff --git a/scripts/skills-health.js b/scripts/skills-health.js new file mode 100644 index 00000000..3195ce23 --- /dev/null +++ b/scripts/skills-health.js @@ -0,0 +1,113 @@ +#!/usr/bin/env node +'use strict'; + +const { collectSkillHealth, formatHealthReport } = require('./lib/skill-evolution/health'); + +function showHelp() { + console.log(` +Usage: node scripts/skills-health.js [options] + +Options: + --json Emit machine-readable JSON + --skills-root Override curated skills root + --learned-root Override learned skills root + --imported-root Override imported skills root + --home Override home directory for learned/imported skill roots + --runs-file Override skill run JSONL path + --now Override current time for deterministic reports + --warn-threshold Decline sensitivity threshold (default: 0.1) + --help Show this help text +`); +} + +function requireValue(argv, index, argName) { + const value = argv[index + 1]; + if (!value || value.startsWith('--')) { + throw new Error(`Missing value for ${argName}`); + } + + return value; +} + +function parseArgs(argv) { + const options = {}; + + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + + if (arg === '--json') { + options.json = true; + continue; + } + + if (arg === '--help' || arg === '-h') { + options.help = true; + continue; + } + + if (arg === '--skills-root') { + options.skillsRoot = requireValue(argv, index, '--skills-root'); + index += 1; + continue; + } + + if (arg === '--learned-root') { + options.learnedRoot = requireValue(argv, index, '--learned-root'); + index += 1; + continue; + } + + if (arg === '--imported-root') { + options.importedRoot = requireValue(argv, index, '--imported-root'); + index += 1; + continue; + } + + if (arg === '--home') { + options.homeDir = requireValue(argv, index, '--home'); + index += 1; + continue; + } + + if (arg === '--runs-file') { + options.runsFilePath = requireValue(argv, index, '--runs-file'); + index += 1; + continue; + } + + if (arg === '--now') { + options.now = requireValue(argv, index, '--now'); + index += 1; + continue; + } + + if (arg === '--warn-threshold') { + options.warnThreshold = Number(requireValue(argv, index, '--warn-threshold')); + index += 1; + continue; + } + + throw new Error(`Unknown argument: ${arg}`); + } + + return options; +} + +function main() { + try { + const options = parseArgs(process.argv.slice(2)); + + if (options.help) { + showHelp(); + process.exit(0); + } + + const report = collectSkillHealth(options); + process.stdout.write(formatHealthReport(report, { json: options.json })); + } catch (error) { + process.stderr.write(`Error: ${error.message}\n`); + process.exit(1); + } +} + +main(); diff --git a/tests/lib/skill-evolution.test.js b/tests/lib/skill-evolution.test.js new file mode 100644 index 00000000..badcb175 --- /dev/null +++ b/tests/lib/skill-evolution.test.js @@ -0,0 +1,536 @@ +/** + * Tests for skill evolution helpers. + * + * Run with: node tests/lib/skill-evolution.test.js + */ + +const assert = require('assert'); +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const { spawnSync } = require('child_process'); + +const provenance = require('../../scripts/lib/skill-evolution/provenance'); +const versioning = require('../../scripts/lib/skill-evolution/versioning'); +const tracker = require('../../scripts/lib/skill-evolution/tracker'); +const health = require('../../scripts/lib/skill-evolution/health'); +const skillEvolution = require('../../scripts/lib/skill-evolution'); + +const HEALTH_SCRIPT = path.join(__dirname, '..', '..', 'scripts', 'skills-health.js'); + +function test(name, fn) { + try { + fn(); + console.log(` \u2713 ${name}`); + return true; + } catch (error) { + console.log(` \u2717 ${name}`); + console.log(` Error: ${error.message}`); + return false; + } +} + +function createTempDir(prefix) { + return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); +} + +function cleanupTempDir(dirPath) { + fs.rmSync(dirPath, { recursive: true, force: true }); +} + +function createSkill(skillRoot, name, content) { + const skillDir = path.join(skillRoot, name); + fs.mkdirSync(skillDir, { recursive: true }); + fs.writeFileSync(path.join(skillDir, 'SKILL.md'), content); + return skillDir; +} + +function appendJsonl(filePath, rows) { + const lines = rows.map(row => JSON.stringify(row)).join('\n'); + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, `${lines}\n`); +} + +function readJson(filePath) { + return JSON.parse(fs.readFileSync(filePath, 'utf8')); +} + +function runCli(args, options = {}) { + return spawnSync(process.execPath, [HEALTH_SCRIPT, ...args], { + encoding: 'utf8', + env: { + ...process.env, + ...(options.env || {}), + }, + }); +} + +function runTests() { + console.log('\n=== Testing skill evolution ===\n'); + + let passed = 0; + let failed = 0; + + const repoRoot = createTempDir('skill-evolution-repo-'); + const homeDir = createTempDir('skill-evolution-home-'); + const skillsRoot = path.join(repoRoot, 'skills'); + const learnedRoot = path.join(homeDir, '.claude', 'skills', 'learned'); + const importedRoot = path.join(homeDir, '.claude', 'skills', 'imported'); + const runsFile = path.join(homeDir, '.claude', 'state', 'skill-runs.jsonl'); + const now = '2026-03-15T12:00:00.000Z'; + + fs.mkdirSync(skillsRoot, { recursive: true }); + fs.mkdirSync(learnedRoot, { recursive: true }); + fs.mkdirSync(importedRoot, { recursive: true }); + + try { + console.log('Provenance:'); + + if (test('classifies curated, learned, and imported skill directories', () => { + const curatedSkillDir = createSkill(skillsRoot, 'curated-alpha', '# Curated\n'); + const learnedSkillDir = createSkill(learnedRoot, 'learned-beta', '# Learned\n'); + const importedSkillDir = createSkill(importedRoot, 'imported-gamma', '# Imported\n'); + + const roots = provenance.getSkillRoots({ repoRoot, homeDir }); + + assert.strictEqual(roots.curated, skillsRoot); + assert.strictEqual(roots.learned, learnedRoot); + assert.strictEqual(roots.imported, importedRoot); + assert.strictEqual( + provenance.classifySkillPath(curatedSkillDir, { repoRoot, homeDir }), + provenance.SKILL_TYPES.CURATED + ); + assert.strictEqual( + provenance.classifySkillPath(learnedSkillDir, { repoRoot, homeDir }), + provenance.SKILL_TYPES.LEARNED + ); + assert.strictEqual( + provenance.classifySkillPath(importedSkillDir, { repoRoot, homeDir }), + provenance.SKILL_TYPES.IMPORTED + ); + assert.strictEqual( + provenance.requiresProvenance(curatedSkillDir, { repoRoot, homeDir }), + false + ); + assert.strictEqual( + provenance.requiresProvenance(learnedSkillDir, { repoRoot, homeDir }), + true + ); + })) passed++; else failed++; + + if (test('writes and validates provenance metadata for non-curated skills', () => { + const importedSkillDir = createSkill(importedRoot, 'imported-delta', '# Imported\n'); + const provenanceRecord = { + source: 'https://example.com/skills/imported-delta', + created_at: '2026-03-15T10:00:00.000Z', + confidence: 0.86, + author: 'external-importer', + }; + + const writeResult = provenance.writeProvenance(importedSkillDir, provenanceRecord, { + repoRoot, + homeDir, + }); + + assert.strictEqual(writeResult.path, path.join(importedSkillDir, '.provenance.json')); + assert.deepStrictEqual(readJson(writeResult.path), provenanceRecord); + assert.deepStrictEqual( + provenance.readProvenance(importedSkillDir, { repoRoot, homeDir }), + provenanceRecord + ); + assert.throws( + () => provenance.writeProvenance(importedSkillDir, { + source: 'bad', + created_at: '2026-03-15T10:00:00.000Z', + author: 'external-importer', + }, { repoRoot, homeDir }), + /confidence/ + ); + assert.throws( + () => provenance.readProvenance(path.join(learnedRoot, 'missing-provenance'), { + repoRoot, + homeDir, + required: true, + }), + /Missing provenance metadata/ + ); + })) passed++; else failed++; + + if (test('exports the consolidated module surface from index.js', () => { + assert.strictEqual(skillEvolution.provenance, provenance); + assert.strictEqual(skillEvolution.versioning, versioning); + assert.strictEqual(skillEvolution.tracker, tracker); + assert.strictEqual(skillEvolution.health, health); + assert.strictEqual(typeof skillEvolution.collectSkillHealth, 'function'); + assert.strictEqual(typeof skillEvolution.recordSkillExecution, 'function'); + })) passed++; else failed++; + + console.log('\nVersioning:'); + + if (test('creates version snapshots and evolution logs for a skill', () => { + const skillDir = createSkill(skillsRoot, 'alpha', '# Alpha v1\n'); + + const versionOne = versioning.createVersion(skillDir, { + timestamp: '2026-03-15T11:00:00.000Z', + reason: 'bootstrap', + author: 'observer', + }); + + assert.strictEqual(versionOne.version, 1); + assert.ok(fs.existsSync(path.join(skillDir, '.versions', 'v1.md'))); + assert.ok(fs.existsSync(path.join(skillDir, '.evolution', 'observations.jsonl'))); + assert.ok(fs.existsSync(path.join(skillDir, '.evolution', 'inspections.jsonl'))); + assert.ok(fs.existsSync(path.join(skillDir, '.evolution', 'amendments.jsonl'))); + assert.strictEqual(versioning.getCurrentVersion(skillDir), 1); + + fs.writeFileSync(path.join(skillDir, 'SKILL.md'), '# Alpha v2\n'); + const versionTwo = versioning.createVersion(skillDir, { + timestamp: '2026-03-16T11:00:00.000Z', + reason: 'accepted-amendment', + author: 'observer', + }); + + assert.strictEqual(versionTwo.version, 2); + assert.deepStrictEqual( + versioning.listVersions(skillDir).map(entry => entry.version), + [1, 2] + ); + + const amendments = versioning.getEvolutionLog(skillDir, 'amendments'); + assert.strictEqual(amendments.length, 2); + assert.strictEqual(amendments[0].event, 'snapshot'); + assert.strictEqual(amendments[1].version, 2); + })) passed++; else failed++; + + if (test('rolls back to a previous snapshot without losing history', () => { + const skillDir = path.join(skillsRoot, 'alpha'); + + const rollback = versioning.rollbackTo(skillDir, 1, { + timestamp: '2026-03-17T11:00:00.000Z', + author: 'maintainer', + reason: 'restore known-good version', + }); + + assert.strictEqual(rollback.version, 3); + assert.strictEqual( + fs.readFileSync(path.join(skillDir, 'SKILL.md'), 'utf8'), + '# Alpha v1\n' + ); + assert.deepStrictEqual( + versioning.listVersions(skillDir).map(entry => entry.version), + [1, 2, 3] + ); + assert.strictEqual(versioning.getCurrentVersion(skillDir), 3); + + const amendments = versioning.getEvolutionLog(skillDir, 'amendments'); + const rollbackEntry = amendments[amendments.length - 1]; + assert.strictEqual(rollbackEntry.event, 'rollback'); + assert.strictEqual(rollbackEntry.target_version, 1); + assert.strictEqual(rollbackEntry.version, 3); + })) passed++; else failed++; + + console.log('\nTracking:'); + + if (test('records skill execution rows to JSONL fallback storage', () => { + const result = tracker.recordSkillExecution({ + skill_id: 'alpha', + skill_version: 'v3', + task_description: 'Fix flaky tests', + outcome: 'partial', + failure_reason: 'One integration test still flakes', + tokens_used: 812, + duration_ms: 4400, + user_feedback: 'corrected', + recorded_at: '2026-03-15T11:30:00.000Z', + }, { + runsFilePath: runsFile, + }); + + assert.strictEqual(result.storage, 'jsonl'); + assert.strictEqual(result.path, runsFile); + + const records = tracker.readSkillExecutionRecords({ runsFilePath: runsFile }); + assert.strictEqual(records.length, 1); + assert.strictEqual(records[0].skill_id, 'alpha'); + assert.strictEqual(records[0].task_description, 'Fix flaky tests'); + assert.strictEqual(records[0].outcome, 'partial'); + })) passed++; else failed++; + + if (test('falls back to JSONL when a state-store adapter is unavailable', () => { + const result = tracker.recordSkillExecution({ + skill_id: 'beta', + skill_version: 'v1', + task_description: 'Import external skill', + outcome: 'success', + failure_reason: null, + tokens_used: 215, + duration_ms: 900, + user_feedback: 'accepted', + recorded_at: '2026-03-15T11:35:00.000Z', + }, { + runsFilePath: runsFile, + stateStore: { + recordSkillExecution() { + throw new Error('state store offline'); + }, + }, + }); + + assert.strictEqual(result.storage, 'jsonl'); + assert.strictEqual(tracker.readSkillExecutionRecords({ runsFilePath: runsFile }).length, 2); + })) passed++; else failed++; + + if (test('ignores malformed JSONL rows when reading execution records', () => { + const malformedRunsFile = path.join(homeDir, '.claude', 'state', 'malformed-skill-runs.jsonl'); + fs.writeFileSync( + malformedRunsFile, + `${JSON.stringify({ + skill_id: 'alpha', + skill_version: 'v3', + task_description: 'Good row', + outcome: 'success', + failure_reason: null, + tokens_used: 1, + duration_ms: 1, + user_feedback: 'accepted', + recorded_at: '2026-03-15T11:45:00.000Z', + })}\n{bad-json}\n`, + 'utf8' + ); + + const records = tracker.readSkillExecutionRecords({ runsFilePath: malformedRunsFile }); + assert.strictEqual(records.length, 1); + assert.strictEqual(records[0].skill_id, 'alpha'); + })) passed++; else failed++; + + if (test('preserves zero-valued telemetry fields during normalization', () => { + const record = tracker.normalizeExecutionRecord({ + skill_id: 'zero-telemetry', + skill_version: 'v1', + task_description: 'No-op hook', + outcome: 'success', + tokens_used: 0, + duration_ms: 0, + user_feedback: 'accepted', + recorded_at: '2026-03-15T11:40:00.000Z', + }); + + assert.strictEqual(record.tokens_used, 0); + assert.strictEqual(record.duration_ms, 0); + })) passed++; else failed++; + + console.log('\nHealth:'); + + if (test('computes per-skill health metrics and flags declining skills', () => { + const betaSkillDir = createSkill(learnedRoot, 'beta', '# Beta v1\n'); + provenance.writeProvenance(betaSkillDir, { + source: 'observer://session/123', + created_at: '2026-03-14T10:00:00.000Z', + confidence: 0.72, + author: 'observer', + }, { + repoRoot, + homeDir, + }); + versioning.createVersion(betaSkillDir, { + timestamp: '2026-03-14T11:00:00.000Z', + author: 'observer', + reason: 'bootstrap', + }); + + appendJsonl(path.join(skillsRoot, 'alpha', '.evolution', 'amendments.jsonl'), [ + { + event: 'proposal', + status: 'pending', + created_at: '2026-03-15T07:00:00.000Z', + }, + ]); + + appendJsonl(runsFile, [ + { + skill_id: 'alpha', + skill_version: 'v3', + task_description: 'Recent success', + outcome: 'success', + failure_reason: null, + tokens_used: 100, + duration_ms: 1000, + user_feedback: 'accepted', + recorded_at: '2026-03-14T10:00:00.000Z', + }, + { + skill_id: 'alpha', + skill_version: 'v3', + task_description: 'Recent failure', + outcome: 'failure', + failure_reason: 'Regression', + tokens_used: 100, + duration_ms: 1000, + user_feedback: 'rejected', + recorded_at: '2026-03-13T10:00:00.000Z', + }, + { + skill_id: 'alpha', + skill_version: 'v2', + task_description: 'Prior success', + outcome: 'success', + failure_reason: null, + tokens_used: 100, + duration_ms: 1000, + user_feedback: 'accepted', + recorded_at: '2026-03-06T10:00:00.000Z', + }, + { + skill_id: 'alpha', + skill_version: 'v1', + task_description: 'Older success', + outcome: 'success', + failure_reason: null, + tokens_used: 100, + duration_ms: 1000, + user_feedback: 'accepted', + recorded_at: '2026-02-24T10:00:00.000Z', + }, + { + skill_id: 'beta', + skill_version: 'v1', + task_description: 'Recent success', + outcome: 'success', + failure_reason: null, + tokens_used: 90, + duration_ms: 800, + user_feedback: 'accepted', + recorded_at: '2026-03-15T09:00:00.000Z', + }, + { + skill_id: 'beta', + skill_version: 'v1', + task_description: 'Older failure', + outcome: 'failure', + failure_reason: 'Bad import', + tokens_used: 90, + duration_ms: 800, + user_feedback: 'corrected', + recorded_at: '2026-02-20T09:00:00.000Z', + }, + ]); + + const report = health.collectSkillHealth({ + repoRoot, + homeDir, + runsFilePath: runsFile, + now, + warnThreshold: 0.1, + }); + + const alpha = report.skills.find(skill => skill.skill_id === 'alpha'); + const beta = report.skills.find(skill => skill.skill_id === 'beta'); + + assert.ok(alpha); + assert.ok(beta); + assert.strictEqual(alpha.current_version, 'v3'); + assert.strictEqual(alpha.pending_amendments, 1); + assert.strictEqual(alpha.success_rate_7d, 0.5); + assert.strictEqual(alpha.success_rate_30d, 0.75); + assert.strictEqual(alpha.failure_trend, 'worsening'); + assert.strictEqual(alpha.declining, true); + assert.strictEqual(beta.failure_trend, 'improving'); + + const summary = health.summarizeHealthReport(report); + assert.deepStrictEqual(summary, { + total_skills: 6, + healthy_skills: 5, + declining_skills: 1, + }); + + const human = health.formatHealthReport(report, { json: false }); + assert.match(human, /alpha/); + assert.match(human, /worsening/); + assert.match( + human, + new RegExp(`Skills: ${summary.total_skills} total, ${summary.healthy_skills} healthy, ${summary.declining_skills} declining`) + ); + })) passed++; else failed++; + + if (test('treats an unsnapshotted SKILL.md as v1 and orders last_run by actual time', () => { + const gammaSkillDir = createSkill(skillsRoot, 'gamma', '# Gamma v1\n'); + const offsetRunsFile = path.join(homeDir, '.claude', 'state', 'offset-skill-runs.jsonl'); + + appendJsonl(offsetRunsFile, [ + { + skill_id: 'gamma', + skill_version: 'v1', + task_description: 'Offset timestamp run', + outcome: 'success', + failure_reason: null, + tokens_used: 10, + duration_ms: 100, + user_feedback: 'accepted', + recorded_at: '2026-03-15T00:00:00+02:00', + }, + { + skill_id: 'gamma', + skill_version: 'v1', + task_description: 'UTC timestamp run', + outcome: 'success', + failure_reason: null, + tokens_used: 11, + duration_ms: 110, + user_feedback: 'accepted', + recorded_at: '2026-03-14T23:30:00Z', + }, + ]); + + const report = health.collectSkillHealth({ + repoRoot, + homeDir, + runsFilePath: offsetRunsFile, + now, + warnThreshold: 0.1, + }); + + const gamma = report.skills.find(skill => skill.skill_id === path.basename(gammaSkillDir)); + assert.ok(gamma); + assert.strictEqual(gamma.current_version, 'v1'); + assert.strictEqual(gamma.last_run, '2026-03-14T23:30:00Z'); + })) passed++; else failed++; + + if (test('CLI emits JSON health output for standalone integration', () => { + const result = runCli([ + '--json', + '--skills-root', skillsRoot, + '--learned-root', learnedRoot, + '--imported-root', importedRoot, + '--home', homeDir, + '--runs-file', runsFile, + '--now', now, + '--warn-threshold', '0.1', + ]); + + assert.strictEqual(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout.trim()); + assert.ok(Array.isArray(payload.skills)); + assert.strictEqual(payload.skills[0].skill_id, 'alpha'); + assert.strictEqual(payload.skills[0].declining, true); + })) passed++; else failed++; + + if (test('CLI shows help and rejects missing option values', () => { + const helpResult = runCli(['--help']); + assert.strictEqual(helpResult.status, 0); + assert.match(helpResult.stdout, /--learned-root /); + assert.match(helpResult.stdout, /--imported-root /); + + const errorResult = runCli(['--skills-root']); + assert.strictEqual(errorResult.status, 1); + assert.match(errorResult.stderr, /Missing value for --skills-root/); + })) passed++; else failed++; + + console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`); + process.exit(failed > 0 ? 1 : 0); + } finally { + cleanupTempDir(repoRoot); + cleanupTempDir(homeDir); + } +} + +runTests();