From 8511d84042204a629ca5fe3052a808104c787a49 Mon Sep 17 00:00:00 2001 From: Affaan Mustafa Date: Fri, 20 Mar 2026 01:44:55 -0700 Subject: [PATCH] feat(skills): add rules-distill skill (rebased #561) (#678) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(skills): add rules-distill — extract cross-cutting principles from skills into rules Applies the skill-stocktake pattern to rules maintenance: scan skills → extract shared principles → propose rule changes. Key design decisions: - Deterministic collection (scan scripts) + LLM judgment (cross-read & verdict) - 6 verdict types: Append, Revise, New Section, New File, Already Covered, Too Specific - Anti-abstraction safeguard: 2+ skills evidence, actionable behavior test, violation risk - Rules full text passed to LLM (no grep pre-filter) for accurate matching - Never modifies rules automatically — always requires user approval * fix(skills): address review feedback for rules-distill Fixes raised by CodeRabbit, Greptile, and cubic: - Add Prerequisites section documenting skill-stocktake dependency - Add fallback command when skill-stocktake is not installed - Fix shell quoting: add IFS= and -r to while-read loops - Replace hardcoded paths with env var placeholders ($CLAUDE_RULES_DIR, $SKILL_STOCKTAKE_DIR) - Add json language identifier to code blocks - Add "How It Works" parent heading for Phase 1/2/3 - Add "Example" section with end-to-end run output - Add revision.reason/before/after fields to output schema for Revise verdict - Document timestamp format (date -u +%Y-%m-%dT%H:%M:%SZ) - Document candidate-id format (kebab-case from principle) - Use concrete examples in results.json schema * fix(skills): remove skill-stocktake dependency, add self-contained scripts Address P1 review feedback: - Add scan-skills.sh and scan-rules.sh directly in rules-distill/scripts/ (no external dependency on skill-stocktake) - Remove Prerequisites section (no longer needed) - Add cross-batch merge step to prevent 2+ skills requirement from being silently broken across batch boundaries - Fix nested triple-backtick fences (use quadruple backticks) - Remove head -100 cap (silent truncation) - Rename "When to Activate" → "When to Use" (ECC standard) - Remove unnecessary env var placeholders (SKILL.md is a prompt, not a script) * fix: update skill/command counts in README.md and AGENTS.md rules-distill added 1 skill + 1 command: - skills: 108 → 109 - commands: 57 → 58 Updates all count references to pass CI catalog validation. * fix(skills): address Servitor review feedback for rules-distill 1. Rename SKILL_STOCKTAKE_* env vars to RULES_DISTILL_* for consistency 2. Remove unnecessary observation counting (use_7d/use_30d) from scan-skills.sh 3. Fix header comment: scan.sh → scan-skills.sh 4. Use jq for JSON construction in scan-rules.sh to properly escape headings containing special characters (", \) * fix(skills): address CodeRabbit review — portability and scan scope 1. scan-rules.sh: use jq for error JSON output (proper escaping) 2. scan-rules.sh: replace GNU-only sort -z with portable sort (BSD compat) 3. scan-rules.sh: fix pipefail crash on files without H2 headings 4. scan-skills.sh: scan only SKILL.md files (skip learned/*.md and auxiliary docs that lack frontmatter) 5. scan-skills.sh: add portable get_mtime helper (GNU stat/date fallback to BSD stat/date) * fix: sync catalog counts with filesystem (27 agents, 114 skills, 59 commands) --------- Co-authored-by: Tatsuya Shimomoto --- AGENTS.md | 6 +- README.md | 6 +- commands/rules-distill.md | 11 + skills/rules-distill/SKILL.md | 264 ++++++++++++++++++++ skills/rules-distill/scripts/scan-rules.sh | 58 +++++ skills/rules-distill/scripts/scan-skills.sh | 129 ++++++++++ 6 files changed, 468 insertions(+), 6 deletions(-) create mode 100644 commands/rules-distill.md create mode 100644 skills/rules-distill/SKILL.md create mode 100755 skills/rules-distill/scripts/scan-rules.sh create mode 100755 skills/rules-distill/scripts/scan-skills.sh diff --git a/AGENTS.md b/AGENTS.md index 843434ec..0039c62f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,6 +1,6 @@ # Everything Claude Code (ECC) — Agent Instructions -This is a **production-ready AI coding plugin** providing 27 specialized agents, 113 skills, 58 commands, and automated hook workflows for software development. +This is a **production-ready AI coding plugin** providing 27 specialized agents, 114 skills, 59 commands, and automated hook workflows for software development. **Version:** 1.9.0 @@ -142,8 +142,8 @@ Troubleshoot failures: check test isolation → verify mocks → fix implementat ``` agents/ — 27 specialized subagents -skills/ — 113 workflow skills and domain knowledge -commands/ — 58 slash commands +skills/ — 114 workflow skills and domain knowledge +commands/ — 59 slash commands hooks/ — Trigger-based automations rules/ — Always-follow guidelines (common + per-language) scripts/ — Cross-platform Node.js utilities diff --git a/README.md b/README.md index 7dc9b3e8..14853eb3 100644 --- a/README.md +++ b/README.md @@ -203,7 +203,7 @@ For manual install instructions see the README in the `rules/` folder. /plugin list everything-claude-code@everything-claude-code ``` -✨ **That's it!** You now have access to 27 agents, 113 skills, and 58 commands. +✨ **That's it!** You now have access to 27 agents, 114 skills, and 59 commands. --- @@ -1070,8 +1070,8 @@ The configuration is automatically detected from `.opencode/opencode.json`. | Feature | Claude Code | OpenCode | Status | |---------|-------------|----------|--------| | Agents | ✅ 27 agents | ✅ 12 agents | **Claude Code leads** | -| Commands | ✅ 58 commands | ✅ 31 commands | **Claude Code leads** | -| Skills | ✅ 113 skills | ✅ 37 skills | **Claude Code leads** | +| Commands | ✅ 59 commands | ✅ 31 commands | **Claude Code leads** | +| Skills | ✅ 114 skills | ✅ 37 skills | **Claude Code leads** | | Hooks | ✅ 8 event types | ✅ 11 events | **OpenCode has more!** | | Rules | ✅ 29 rules | ✅ 13 instructions | **Claude Code leads** | | MCP Servers | ✅ 14 servers | ✅ Full | **Full parity** | diff --git a/commands/rules-distill.md b/commands/rules-distill.md new file mode 100644 index 00000000..93886a06 --- /dev/null +++ b/commands/rules-distill.md @@ -0,0 +1,11 @@ +--- +description: "Scan skills to extract cross-cutting principles and distill them into rules" +--- + +# /rules-distill — Distill Principles from Skills into Rules + +Scan installed skills, extract cross-cutting principles, and distill them into rules. + +## Process + +Follow the full workflow defined in the `rules-distill` skill. diff --git a/skills/rules-distill/SKILL.md b/skills/rules-distill/SKILL.md new file mode 100644 index 00000000..8e50342e --- /dev/null +++ b/skills/rules-distill/SKILL.md @@ -0,0 +1,264 @@ +--- +name: rules-distill +description: "Scan skills to extract cross-cutting principles and distill them into rules — append, revise, or create new rule files" +origin: ECC +--- + +# Rules Distill + +Scan installed skills, extract cross-cutting principles that appear in multiple skills, and distill them into rules — appending to existing rule files, revising outdated content, or creating new rule files. + +Applies the "deterministic collection + LLM judgment" principle: scripts collect facts exhaustively, then an LLM cross-reads the full context and produces verdicts. + +## When to Use + +- Periodic rules maintenance (monthly or after installing new skills) +- After a skill-stocktake reveals patterns that should be rules +- When rules feel incomplete relative to the skills being used + +## How It Works + +The rules distillation process follows three phases: + +### Phase 1: Inventory (Deterministic Collection) + +#### 1a. Collect skill inventory + +```bash +bash ~/.claude/skills/rules-distill/scripts/scan-skills.sh +``` + +#### 1b. Collect rules index + +```bash +bash ~/.claude/skills/rules-distill/scripts/scan-rules.sh +``` + +#### 1c. Present to user + +``` +Rules Distillation — Phase 1: Inventory +──────────────────────────────────────── +Skills: {N} files scanned +Rules: {M} files ({K} headings indexed) + +Proceeding to cross-read analysis... +``` + +### Phase 2: Cross-read, Match & Verdict (LLM Judgment) + +Extraction and matching are unified in a single pass. Rules files are small enough (~800 lines total) that the full text can be provided to the LLM — no grep pre-filtering needed. + +#### Batching + +Group skills into **thematic clusters** based on their descriptions. Analyze each cluster in a subagent with the full rules text. + +#### Cross-batch Merge + +After all batches complete, merge candidates across batches: +- Deduplicate candidates with the same or overlapping principles +- Re-check the "2+ skills" requirement using evidence from **all** batches combined — a principle found in 1 skill per batch but 2+ skills total is valid + +#### Subagent Prompt + +Launch a general-purpose Agent with the following prompt: + +```` +You are an analyst who cross-reads skills to extract principles that should be promoted to rules. + +## Input +- Skills: {full text of skills in this batch} +- Existing rules: {full text of all rule files} + +## Extraction Criteria + +Include a candidate ONLY if ALL of these are true: + +1. **Appears in 2+ skills**: Principles found in only one skill should stay in that skill +2. **Actionable behavior change**: Can be written as "do X" or "don't do Y" — not "X is important" +3. **Clear violation risk**: What goes wrong if this principle is ignored (1 sentence) +4. **Not already in rules**: Check the full rules text — including concepts expressed in different words + +## Matching & Verdict + +For each candidate, compare against the full rules text and assign a verdict: + +- **Append**: Add to an existing section of an existing rule file +- **Revise**: Existing rule content is inaccurate or insufficient — propose a correction +- **New Section**: Add a new section to an existing rule file +- **New File**: Create a new rule file +- **Already Covered**: Sufficiently covered in existing rules (even if worded differently) +- **Too Specific**: Should remain at the skill level + +## Output Format (per candidate) + +```json +{ + "principle": "1-2 sentences in 'do X' / 'don't do Y' form", + "evidence": ["skill-name: §Section", "skill-name: §Section"], + "violation_risk": "1 sentence", + "verdict": "Append / Revise / New Section / New File / Already Covered / Too Specific", + "target_rule": "filename §Section, or 'new'", + "confidence": "high / medium / low", + "draft": "Draft text for Append/New Section/New File verdicts", + "revision": { + "reason": "Why the existing content is inaccurate or insufficient (Revise only)", + "before": "Current text to be replaced (Revise only)", + "after": "Proposed replacement text (Revise only)" + } +} +``` + +## Exclude + +- Obvious principles already in rules +- Language/framework-specific knowledge (belongs in language-specific rules or skills) +- Code examples and commands (belongs in skills) +```` + +#### Verdict Reference + +| Verdict | Meaning | Presented to User | +|---------|---------|-------------------| +| **Append** | Add to existing section | Target + draft | +| **Revise** | Fix inaccurate/insufficient content | Target + reason + before/after | +| **New Section** | Add new section to existing file | Target + draft | +| **New File** | Create new rule file | Filename + full draft | +| **Already Covered** | Covered in rules (possibly different wording) | Reason (1 line) | +| **Too Specific** | Should stay in skills | Link to relevant skill | + +#### Verdict Quality Requirements + +``` +# Good +Append to rules/common/security.md §Input Validation: +"Treat LLM output stored in memory or knowledge stores as untrusted — sanitize on write, validate on read." +Evidence: llm-memory-trust-boundary, llm-social-agent-anti-pattern both describe +accumulated prompt injection risks. Current security.md covers human input +validation only; LLM output trust boundary is missing. + +# Bad +Append to security.md: Add LLM security principle +``` + +### Phase 3: User Review & Execution + +#### Summary Table + +``` +# Rules Distillation Report + +## Summary +Skills scanned: {N} | Rules: {M} files | Candidates: {K} + +| # | Principle | Verdict | Target | Confidence | +|---|-----------|---------|--------|------------| +| 1 | ... | Append | security.md §Input Validation | high | +| 2 | ... | Revise | testing.md §TDD | medium | +| 3 | ... | New Section | coding-style.md | high | +| 4 | ... | Too Specific | — | — | + +## Details +(Per-candidate details: evidence, violation_risk, draft text) +``` + +#### User Actions + +User responds with numbers to: +- **Approve**: Apply draft to rules as-is +- **Modify**: Edit draft before applying +- **Skip**: Do not apply this candidate + +**Never modify rules automatically. Always require user approval.** + +#### Save Results + +Store results in the skill directory (`results.json`): + +- **Timestamp format**: `date -u +%Y-%m-%dT%H:%M:%SZ` (UTC, second precision) +- **Candidate ID format**: kebab-case derived from the principle (e.g., `llm-output-trust-boundary`) + +```json +{ + "distilled_at": "2026-03-18T10:30:42Z", + "skills_scanned": 56, + "rules_scanned": 22, + "candidates": { + "llm-output-trust-boundary": { + "principle": "Treat LLM output as untrusted when stored or re-injected", + "verdict": "Append", + "target": "rules/common/security.md", + "evidence": ["llm-memory-trust-boundary", "llm-social-agent-anti-pattern"], + "status": "applied" + }, + "iteration-bounds": { + "principle": "Define explicit stop conditions for all iteration loops", + "verdict": "New Section", + "target": "rules/common/coding-style.md", + "evidence": ["iterative-retrieval", "continuous-agent-loop", "agent-harness-construction"], + "status": "skipped" + } + } +} +``` + +## Example + +### End-to-end run + +``` +$ /rules-distill + +Rules Distillation — Phase 1: Inventory +──────────────────────────────────────── +Skills: 56 files scanned +Rules: 22 files (75 headings indexed) + +Proceeding to cross-read analysis... + +[Subagent analysis: Batch 1 (agent/meta skills) ...] +[Subagent analysis: Batch 2 (coding/pattern skills) ...] +[Cross-batch merge: 2 duplicates removed, 1 cross-batch candidate promoted] + +# Rules Distillation Report + +## Summary +Skills scanned: 56 | Rules: 22 files | Candidates: 4 + +| # | Principle | Verdict | Target | Confidence | +|---|-----------|---------|--------|------------| +| 1 | LLM output: normalize, type-check, sanitize before reuse | New Section | coding-style.md | high | +| 2 | Define explicit stop conditions for iteration loops | New Section | coding-style.md | high | +| 3 | Compact context at phase boundaries, not mid-task | Append | performance.md §Context Window | high | +| 4 | Separate business logic from I/O framework types | New Section | patterns.md | high | + +## Details + +### 1. LLM Output Validation +Verdict: New Section in coding-style.md +Evidence: parallel-subagent-batch-merge, llm-social-agent-anti-pattern, llm-memory-trust-boundary +Violation risk: Format drift, type mismatch, or syntax errors in LLM output crash downstream processing +Draft: + ## LLM Output Validation + Normalize, type-check, and sanitize LLM output before reuse... + See skill: parallel-subagent-batch-merge, llm-memory-trust-boundary + +[... details for candidates 2-4 ...] + +Approve, modify, or skip each candidate by number: +> User: Approve 1, 3. Skip 2, 4. + +✓ Applied: coding-style.md §LLM Output Validation +✓ Applied: performance.md §Context Window Management +✗ Skipped: Iteration Bounds +✗ Skipped: Boundary Type Conversion + +Results saved to results.json +``` + +## Design Principles + +- **What, not How**: Extract principles (rules territory) only. Code examples and commands stay in skills. +- **Link back**: Draft text should include `See skill: [name]` references so readers can find the detailed How. +- **Deterministic collection, LLM judgment**: Scripts guarantee exhaustiveness; the LLM guarantees contextual understanding. +- **Anti-abstraction safeguard**: The 3-layer filter (2+ skills evidence, actionable behavior test, violation risk) prevents overly abstract principles from entering rules. diff --git a/skills/rules-distill/scripts/scan-rules.sh b/skills/rules-distill/scripts/scan-rules.sh new file mode 100755 index 00000000..ff011bcb --- /dev/null +++ b/skills/rules-distill/scripts/scan-rules.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# scan-rules.sh — enumerate rule files and extract H2 heading index +# Usage: scan-rules.sh [RULES_DIR] +# Output: JSON to stdout +# +# Environment: +# RULES_DISTILL_DIR Override ~/.claude/rules (for testing only) + +set -euo pipefail + +RULES_DIR="${RULES_DISTILL_DIR:-${1:-$HOME/.claude/rules}}" + +if [[ ! -d "$RULES_DIR" ]]; then + jq -n --arg path "$RULES_DIR" '{"error":"rules directory not found","path":$path}' >&2 + exit 1 +fi + +# Collect all .md files (excluding _archived/) +files=() +while IFS= read -r f; do + files+=("$f") +done < <(find "$RULES_DIR" -name '*.md' -not -path '*/_archived/*' -print | sort) + +total=${#files[@]} + +tmpdir=$(mktemp -d) +_rules_cleanup() { rm -rf "$tmpdir"; } +trap _rules_cleanup EXIT + +for i in "${!files[@]}"; do + file="${files[$i]}" + rel_path="${file#"$HOME"/}" + rel_path="~/$rel_path" + + # Extract H2 headings (## Title) into a JSON array via jq + headings_json=$({ grep -E '^## ' "$file" 2>/dev/null || true; } | sed 's/^## //' | jq -R . | jq -s '.') + + # Get line count + line_count=$(wc -l < "$file" | tr -d ' ') + + jq -n \ + --arg path "$rel_path" \ + --arg file "$(basename "$file")" \ + --argjson lines "$line_count" \ + --argjson headings "$headings_json" \ + '{path:$path,file:$file,lines:$lines,headings:$headings}' \ + > "$tmpdir/$i.json" +done + +if [[ ${#files[@]} -eq 0 ]]; then + jq -n --arg dir "$RULES_DIR" '{rules_dir:$dir,total:0,rules:[]}' +else + jq -n \ + --arg dir "$RULES_DIR" \ + --argjson total "$total" \ + --argjson rules "$(jq -s '.' "$tmpdir"/*.json)" \ + '{rules_dir:$dir,total:$total,rules:$rules}' +fi diff --git a/skills/rules-distill/scripts/scan-skills.sh b/skills/rules-distill/scripts/scan-skills.sh new file mode 100755 index 00000000..1c49cd9d --- /dev/null +++ b/skills/rules-distill/scripts/scan-skills.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# scan-skills.sh — enumerate skill files, extract frontmatter and UTC mtime +# Usage: scan-skills.sh [CWD_SKILLS_DIR] +# Output: JSON to stdout +# +# When CWD_SKILLS_DIR is omitted, defaults to $PWD/.claude/skills so the +# script always picks up project-level skills without relying on the caller. +# +# Environment: +# RULES_DISTILL_GLOBAL_DIR Override ~/.claude/skills (for testing only; +# do not set in production — intended for bats tests) +# RULES_DISTILL_PROJECT_DIR Override project dir detection (for testing only) + +set -euo pipefail + +GLOBAL_DIR="${RULES_DISTILL_GLOBAL_DIR:-$HOME/.claude/skills}" +CWD_SKILLS_DIR="${RULES_DISTILL_PROJECT_DIR:-${1:-$PWD/.claude/skills}}" +# Validate CWD_SKILLS_DIR looks like a .claude/skills path (defense-in-depth). +# Only warn when the path exists — a nonexistent path poses no traversal risk. +if [[ -n "$CWD_SKILLS_DIR" && -d "$CWD_SKILLS_DIR" && "$CWD_SKILLS_DIR" != */.claude/skills* ]]; then + echo "Warning: CWD_SKILLS_DIR does not look like a .claude/skills path: $CWD_SKILLS_DIR" >&2 +fi + +# Extract a frontmatter field (handles both quoted and unquoted single-line values). +# Does NOT support multi-line YAML blocks (| or >) or nested YAML keys. +extract_field() { + local file="$1" field="$2" + awk -v f="$field" ' + BEGIN { fm=0 } + /^---$/ { fm++; next } + fm==1 { + n = length(f) + 2 + if (substr($0, 1, n) == f ": ") { + val = substr($0, n+1) + gsub(/^"/, "", val) + gsub(/"$/, "", val) + print val + exit + } + } + fm>=2 { exit } + ' "$file" +} + +# Get file mtime in UTC ISO8601 (portable: GNU and BSD) +get_mtime() { + local file="$1" + local secs + secs=$(stat -c %Y "$file" 2>/dev/null || stat -f %m "$file" 2>/dev/null) || return 1 + date -u -d "@$secs" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || + date -u -r "$secs" +%Y-%m-%dT%H:%M:%SZ +} + +# Scan a directory and produce a JSON array of skill objects +scan_dir_to_json() { + local dir="$1" + + local tmpdir + tmpdir=$(mktemp -d) + local _scan_tmpdir="$tmpdir" + _scan_cleanup() { rm -rf "$_scan_tmpdir"; } + trap _scan_cleanup RETURN + + local i=0 + while IFS= read -r file; do + local name desc mtime dp + name=$(extract_field "$file" "name") + desc=$(extract_field "$file" "description") + mtime=$(get_mtime "$file") + dp="${file/#$HOME/~}" + + jq -n \ + --arg path "$dp" \ + --arg name "$name" \ + --arg description "$desc" \ + --arg mtime "$mtime" \ + '{path:$path,name:$name,description:$description,mtime:$mtime}' \ + > "$tmpdir/$i.json" + i=$((i+1)) + done < <(find "$dir" -name "SKILL.md" -type f 2>/dev/null | sort) + + if [[ $i -eq 0 ]]; then + echo "[]" + else + jq -s '.' "$tmpdir"/*.json + fi +} + +# --- Main --- + +global_found="false" +global_count=0 +global_skills="[]" + +if [[ -d "$GLOBAL_DIR" ]]; then + global_found="true" + global_skills=$(scan_dir_to_json "$GLOBAL_DIR") + global_count=$(echo "$global_skills" | jq 'length') +fi + +project_found="false" +project_path="" +project_count=0 +project_skills="[]" + +if [[ -n "$CWD_SKILLS_DIR" && -d "$CWD_SKILLS_DIR" ]]; then + project_found="true" + project_path="$CWD_SKILLS_DIR" + project_skills=$(scan_dir_to_json "$CWD_SKILLS_DIR") + project_count=$(echo "$project_skills" | jq 'length') +fi + +# Merge global + project skills into one array +all_skills=$(jq -s 'add' <(echo "$global_skills") <(echo "$project_skills")) + +jq -n \ + --arg global_found "$global_found" \ + --argjson global_count "$global_count" \ + --arg project_found "$project_found" \ + --arg project_path "$project_path" \ + --argjson project_count "$project_count" \ + --argjson skills "$all_skills" \ + '{ + scan_summary: { + global: { found: ($global_found == "true"), count: $global_count }, + project: { found: ($project_found == "true"), path: $project_path, count: $project_count } + }, + skills: $skills + }'