Compare commits

..

34 Commits

Author SHA1 Message Date
Affaan Mustafa
3fc22ae751 fix: lock patched fast-uri 2026-05-12 20:20:54 -04:00
Affaan Mustafa
c229b74d41 docs: record AgentShield baseline CLI (#1834) 2026-05-12 20:15:09 -04:00
Affaan Mustafa
be42989746 docs: define AgentShield enterprise roadmap (#1833) 2026-05-12 19:56:12 -04:00
Affaan Mustafa
d2d8cda8b3 docs: record AgentShield PDF export decision (#1832) 2026-05-12 19:28:26 -04:00
Affaan Mustafa
894ee03930 docs: record ECC-Tools evaluator corpus merge (#1831) 2026-05-12 19:12:20 -04:00
Affaan Mustafa
37c27a60fd docs: add deep-analyzer evaluator scenario 2026-05-12 18:52:09 -04:00
Affaan Mustafa
337ced0828 docs: add skill-quality evaluator scenario 2026-05-12 18:36:25 -04:00
Affaan Mustafa
b25d4770f5 docs: add AgentShield policy exception evaluator scenario 2026-05-12 18:19:49 -04:00
Affaan Mustafa
6fbf58d590 ci: keep package manager cache failures non-blocking 2026-05-12 18:03:30 -04:00
Affaan Mustafa
3dddfc8270 docs: add evaluator harness config scenario 2026-05-12 18:03:30 -04:00
Affaan Mustafa
cd90c84c32 docs: add evaluator CI failure scenario (#1826) 2026-05-12 17:44:00 -04:00
Affaan Mustafa
863519eecf docs: add evaluator billing readiness scenario (#1825) 2026-05-12 17:24:34 -04:00
Affaan Mustafa
dcf5668b27 docs: add evaluator rag prototype (#1824) 2026-05-12 17:04:39 -04:00
Affaan Mustafa
f2deedcf3d docs: record clean plugin publication smoke (#1823) 2026-05-12 16:45:54 -04:00
Affaan Mustafa
bfacf37715 docs: record rc1 publication dry-run evidence (#1822) 2026-05-12 16:27:52 -04:00
Affaan Mustafa
0598af70a5 docs: add HUD status control contract (#1821) 2026-05-12 16:09:18 -04:00
Affaan Mustafa
4d42917cfb docs: add rc1 naming publication matrix (#1820) 2026-05-12 15:52:39 -04:00
Affaan Mustafa
7109ee08db docs: sync roadmap discussion and salvage evidence (#1819) 2026-05-12 15:35:19 -04:00
Affaan Mustafa
4f5f612b61 docs: record stale salvage gap pass (#1818) 2026-05-12 15:18:13 -04:00
Affaan Mustafa
df60af9619 feat: salvage code-reviewer false-positive guardrails (#1817) 2026-05-12 15:01:46 -04:00
Affaan Mustafa
ab0f0187de feat: salvage frontend design guidance (#1816) 2026-05-12 14:44:17 -04:00
Affaan Mustafa
65c1502ecd feat: salvage cost tracking and skill scout (#1815) 2026-05-12 14:23:46 -04:00
Affaan Mustafa
ef86329828 docs: record queue clear and Linear issue blocker (#1814) 2026-05-12 14:00:04 -04:00
Affaan Mustafa
5d3ed622c6 docs: map stale PR salvage sources (#1813) 2026-05-12 13:42:36 -04:00
Affaan Mustafa
f239379ebf feat: salvage Django Celery workflow (#1812)
Source: maintainer-owned salvage of useful Django reviewer/build-resolver/Celery work from stale PR #1310 by mrigank2seven.

- add django-reviewer and django-build-resolver agents

- add django-celery skill with timezone-aware scheduling example

- update catalog counts to 60 agents / 221 skills and record the May 12 salvage gap pass

Co-authored-by: MRIGANK GUPTA <mrigank2seven@users.noreply.github.com>
2026-05-12 13:20:33 -04:00
Affaan Mustafa
2c8cda03e7 docs: record ECC Tools Linear backlog sync (#1811) 2026-05-12 12:56:52 -04:00
Affaan Mustafa
9a5c904d33 docs: record AgentShield exception lifecycle audit (#1810) 2026-05-12 12:32:02 -04:00
Affaan Mustafa
b38992f60e docs: record ECC Tools PR review salvage evidence (#1809) 2026-05-12 12:02:57 -04:00
Affaan Mustafa
86a529b3da docs: record ECC Tools analyzer corpus evidence (#1808) 2026-05-12 11:39:59 -04:00
Affaan Mustafa
adc97769be docs: record ECC Tools deep analyzer sync signal (#1807) 2026-05-12 11:16:14 -04:00
Affaan Mustafa
58489af64f docs: record ECC Tools RAG evaluator signal (#1806) 2026-05-12 10:46:08 -04:00
Affaan Mustafa
fb5897f1a2 docs: record ECC Tools skill quality evidence 2026-05-12 10:07:21 -04:00
Affaan Mustafa
78c8b9b69b docs: add ECC 2.0 execution tracking checklist 2026-05-12 09:49:25 -04:00
Alexis Le Dain
f03e200136 feat: add Quarkus handling
Adds Quarkus handling across the Java skill/reviewer surface, with maintainer follow-up fixes for duplicate catalog entries, required skill sections, localized snippet structure, and current main alignment.\n\nValidation run locally on the final PR head:\n- NODE_PATH=/Users/affoon/GitHub/ECC/everything-claude-code/node_modules node scripts/ci/validate-install-manifests.js\n- NODE_PATH=/Users/affoon/GitHub/ECC/everything-claude-code/node_modules node scripts/ci/validate-skills.js\n- NODE_PATH=/Users/affoon/GitHub/ECC/everything-claude-code/node_modules node scripts/ci/catalog.js --text\n- npx --yes markdownlint-cli docs/ECC-2.0-GA-ROADMAP.md\n- git diff --check\n- NODE_PATH=/Users/affoon/GitHub/ECC/everything-claude-code/node_modules node tests/run-all.js (2324 passed, 0 failed)
2026-05-12 09:30:26 -04:00
75 changed files with 5074 additions and 78 deletions

View File

@@ -11,7 +11,7 @@
{
"name": "ecc",
"source": "./",
"description": "The most comprehensive Claude Code plugin — 58 agents, 220 skills, 74 legacy command shims, selective install profiles, and production-ready hooks for TDD, security scanning, code review, and continuous learning",
"description": "The most comprehensive Claude Code plugin — 60 agents, 225 skills, 75 legacy command shims, selective install profiles, and production-ready hooks for TDD, security scanning, code review, and continuous learning",
"version": "2.0.0-rc.1",
"author": {
"name": "Affaan Mustafa",

View File

@@ -1,7 +1,7 @@
{
"name": "ecc",
"version": "2.0.0-rc.1",
"description": "Battle-tested Claude Code plugin for engineering teams — 58 agents, 220 skills, 74 legacy command shims, production-ready hooks, and selective install workflows evolved through continuous real-world use",
"description": "Battle-tested Claude Code plugin for engineering teams — 60 agents, 225 skills, 75 legacy command shims, production-ready hooks, and selective install workflows evolved through continuous real-world use",
"author": {
"name": "Affaan Mustafa",
"url": "https://x.com/affaanmustafa"

View File

@@ -77,6 +77,7 @@ jobs:
- name: Cache npm
if: matrix.pm == 'npm'
continue-on-error: true
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ${{ steps.npm-cache-dir.outputs.dir }}
@@ -94,6 +95,7 @@ jobs:
- name: Cache pnpm
if: matrix.pm == 'pnpm'
continue-on-error: true
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ${{ steps.pnpm-cache-dir.outputs.dir }}
@@ -115,6 +117,7 @@ jobs:
- name: Cache yarn
if: matrix.pm == 'yarn'
continue-on-error: true
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ${{ steps.yarn-cache-dir.outputs.dir }}
@@ -124,6 +127,7 @@ jobs:
- name: Cache bun
if: matrix.pm == 'bun'
continue-on-error: true
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ~/.bun/install/cache

View File

@@ -67,6 +67,7 @@ jobs:
- name: Cache npm
if: inputs.package-manager == 'npm'
continue-on-error: true
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ${{ steps.npm-cache-dir.outputs.dir }}
@@ -84,6 +85,7 @@ jobs:
- name: Cache pnpm
if: inputs.package-manager == 'pnpm'
continue-on-error: true
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ${{ steps.pnpm-cache-dir.outputs.dir }}
@@ -105,6 +107,7 @@ jobs:
- name: Cache yarn
if: inputs.package-manager == 'yarn'
continue-on-error: true
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ${{ steps.yarn-cache-dir.outputs.dir }}
@@ -114,6 +117,7 @@ jobs:
- name: Cache bun
if: inputs.package-manager == 'bun'
continue-on-error: true
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: ~/.bun/install/cache

View File

@@ -6,3 +6,17 @@ scripts/release.sh
# Plugin dev notes (not needed by consumers)
.claude-plugin/PLUGIN_SCHEMA_NOTES.md
# Python/test cache artifacts are local build byproducts, not runtime surface
__pycache__/
**/__pycache__/
**/__pycache__/**
*.pyc
*.pyo
*.pyd
**/*.pyc
**/*.pyo
**/*.pyd
*$py.class
.pytest_cache/
**/.pytest_cache/**

View File

@@ -1,6 +1,6 @@
# Everything Claude Code (ECC) — Agent Instructions
This is a **production-ready AI coding plugin** providing 58 specialized agents, 220 skills, 74 commands, and automated hook workflows for software development.
This is a **production-ready AI coding plugin** providing 60 specialized agents, 225 skills, 75 commands, and automated hook workflows for software development.
**Version:** 2.0.0-rc.1
@@ -35,6 +35,8 @@ This is a **production-ready AI coding plugin** providing 58 specialized agents,
| kotlin-build-resolver | Kotlin/Gradle build errors | Kotlin build failures |
| database-reviewer | PostgreSQL/Supabase specialist | Schema design, query optimization |
| python-reviewer | Python code review | Python projects |
| django-reviewer | Django code review | Django apps, DRF APIs, ORM, migrations |
| django-build-resolver | Django build, migration, and setup errors | Django startup, dependency, migration, collectstatic failures |
| java-reviewer | Java and Spring Boot code review | Java/Spring Boot projects |
| java-build-resolver | Java/Maven/Gradle build errors | Java build failures |
| loop-operator | Autonomous loop execution | Run loops safely, monitor stalls, intervene |
@@ -147,9 +149,9 @@ Troubleshoot failures: check test isolation → verify mocks → fix implementat
## Project Structure
```
agents/ — 58 specialized subagents
skills/ — 220 workflow skills and domain knowledge
commands/ — 74 slash commands
agents/ — 60 specialized subagents
skills/ — 225 workflow skills and domain knowledge
commands/ — 75 slash commands
hooks/ — Trigger-based automations
rules/ — Always-follow guidelines (common + per-language)
scripts/ — Cross-platform Node.js utilities

View File

@@ -358,7 +358,7 @@ If you stacked methods, clean up in this order:
/plugin list ecc@ecc
```
**That's it!** You now have access to 58 agents, 220 skills, and 74 legacy command shims.
**That's it!** You now have access to 60 agents, 225 skills, and 75 legacy command shims.
### Dashboard GUI
@@ -456,7 +456,7 @@ everything-claude-code/
| |-- plugin.json # Plugin metadata and component paths
| |-- marketplace.json # Marketplace catalog for /plugin marketplace add
|
|-- agents/ # 58 specialized subagents for delegation
|-- agents/ # 60 specialized subagents for delegation
| |-- planner.md # Feature implementation planning
| |-- architect.md # System design decisions
| |-- tdd-guide.md # Test-driven development
@@ -1360,9 +1360,9 @@ The configuration is automatically detected from `.opencode/opencode.json`.
| Feature | Claude Code | OpenCode | Status |
|---------|-------------|----------|--------|
| Agents | PASS: 58 agents | PASS: 12 agents | **Claude Code leads** |
| Commands | PASS: 74 commands | PASS: 35 commands | **Claude Code leads** |
| Skills | PASS: 220 skills | PASS: 37 skills | **Claude Code leads** |
| Agents | PASS: 60 agents | PASS: 12 agents | **Claude Code leads** |
| Commands | PASS: 75 commands | PASS: 35 commands | **Claude Code leads** |
| Skills | PASS: 225 skills | PASS: 37 skills | **Claude Code leads** |
| Hooks | PASS: 8 event types | PASS: 11 events | **OpenCode has more!** |
| Rules | PASS: 29 rules | PASS: 13 instructions | **Claude Code leads** |
| MCP Servers | PASS: 14 servers | PASS: Full | **Full parity** |
@@ -1465,9 +1465,9 @@ ECC is the **first plugin to maximize every major AI coding tool**. Here's how e
| Feature | Claude Code | Cursor IDE | Codex CLI | OpenCode |
|---------|------------|------------|-----------|----------|
| **Agents** | 58 | Shared (AGENTS.md) | Shared (AGENTS.md) | 12 |
| **Commands** | 74 | Shared | Instruction-based | 35 |
| **Skills** | 220 | Shared | 10 (native format) | 37 |
| **Agents** | 60 | Shared (AGENTS.md) | Shared (AGENTS.md) | 12 |
| **Commands** | 75 | Shared | Instruction-based | 35 |
| **Skills** | 225 | Shared | 10 (native format) | 37 |
| **Hook Events** | 8 types | 15 types | None yet | 11 types |
| **Hook Scripts** | 20+ scripts | 16 scripts (DRY adapter) | N/A | Plugin hooks |
| **Rules** | 34 (common + lang) | 34 (YAML frontmatter) | Instruction-based | 13 instructions |

View File

@@ -160,7 +160,7 @@ Copy-Item -Recurse rules/typescript "$HOME/.claude/rules/"
/plugin list ecc@ecc
```
**完成!** 你现在可以使用 58 个代理、220 个技能和 74 个命令。
**完成!** 你现在可以使用 60 个代理、225 个技能和 75 个命令。
### multi-* 命令需要额外配置

View File

@@ -158,6 +158,7 @@ commands:
- build-fix
- checkpoint
- code-review
- cost-report
- cpp-build
- cpp-review
- cpp-test

View File

@@ -27,6 +27,80 @@ When invoked:
- **Consolidate** similar issues (e.g., "5 functions missing error handling" not 5 separate findings)
- **Prioritize** issues that could cause bugs, security vulnerabilities, or data loss
### Pre-Report Gate
Before writing a finding, answer all four questions. If any answer is "no" or
"unsure", downgrade severity or drop the finding.
1. **Can I cite the exact line?** Name the file and line. Vague findings like
"somewhere in the auth layer" are not actionable and must be dropped.
2. **Can I describe the concrete failure mode?** Name the input, state, and bad
outcome. If you cannot name the trigger, you are pattern-matching, not
reviewing.
3. **Have I read the surrounding context?** Check callers, imports, and tests.
Many apparent issues are already handled one frame up or guarded by a type.
4. **Is the severity defensible?** A missing JSDoc is never HIGH. A single
`any` in a test fixture is never CRITICAL. Severity inflation erodes trust
faster than missed findings.
### HIGH / CRITICAL Require Proof
For any finding tagged HIGH or CRITICAL, include:
- The exact snippet and line number
- The specific failure scenario: input, state, and outcome
- Why existing guards, such as types, validation, or framework defaults, do not
catch it
If you cannot produce all three, demote to MEDIUM or drop.
### It Is Acceptable And Expected To Return Zero Findings
A clean review is a valid review. Do not manufacture findings to justify the
invocation. If the diff is small, well-typed, tested, and follows the project's
patterns, the correct output is a summary with zero rows and verdict `APPROVE`.
Manufactured findings, filler nits, speculative "consider using X", and
hypothetical edge cases without a trigger are the primary failure mode of LLM
reviewers and directly undermine this agent's usefulness.
## Common False Positives - Skip These
Patterns that LLM reviewers commonly mis-flag. Skip unless you have evidence
specific to this codebase:
- **"Consider adding error handling"** on a call whose error path is handled by
the caller or framework, such as Express error middleware, React error
boundaries, top-level `try/catch`, or Promise chains with `.catch` upstream.
- **"Missing input validation"** when the function is internal and its callers
already validate. Trace at least one caller before flagging.
- **"Magic number"** for well-known constants: `200`, `404`, `1000` ms, `60`,
`24`, `1024`, array index `0` or `-1`, HTTP status codes, and single-use
local constants whose meaning is obvious from the variable name.
- **"Function too long"** for exhaustive `switch` statements, configuration
objects, test tables, or generated code. Length is not complexity.
- **"Missing JSDoc"** on single-purpose internal helpers whose name and
signature are self-describing.
- **"Prefer `const` over `let`"** when the variable is reassigned. Read the
whole function before flagging.
- **"Possible null dereference"** when the preceding line narrows the type or an
`if` guard is in scope. Trace type flow instead of pattern-matching on `?.`.
- **"N+1 query"** on fixed-cardinality loops, such as iterating a four-element
enum, or on paths already using `DataLoader` or batching.
- **"Missing await"** on fire-and-forget calls that are intentionally detached,
such as logging, metrics, or background queue pushes. Check for a comment or
`void` prefix before flagging.
- **"Should use TypeScript"** or **"Should have types"** in a JavaScript-only
file. Match the project's existing language; do not suggest a stack change.
- **"Hardcoded value"** for values in test fixtures, example code, or
documentation snippets. Tests should have hardcoded expectations.
- **Security theater**: flagging `Math.random()` in a non-cryptographic context
such as animation, jitter, or sampling, or flagging `eval`/`Function` in a
plugin system that is explicitly a code-loading surface.
When tempted to flag one of the above, ask: "Would a senior engineer on this
team actually change this in review?" If no, skip.
## Review Checklist
### Security (CRITICAL)
@@ -206,10 +280,13 @@ Verdict: WARNING — 2 HIGH issues should be resolved before merge.
## Approval Criteria
- **Approve**: No CRITICAL or HIGH issues
- **Approve**: No CRITICAL or HIGH issues, including clean reviews with zero
findings. This is a valid and expected outcome.
- **Warning**: HIGH issues only (can merge with caution)
- **Block**: CRITICAL issues found — must fix before merge
Do not withhold approval to appear rigorous. If the diff is clean, approve it.
## Project-Specific Guidelines
When available, also check project-specific conventions from `CLAUDE.md` or project rules:

View File

@@ -0,0 +1,243 @@
---
name: django-build-resolver
description: Django/Python build, migration, and dependency error resolution specialist. Fixes pip/Poetry errors, migration conflicts, import errors, Django configuration issues, and collectstatic failures with minimal changes. Use when Django setup or startup fails.
tools: ["Read", "Write", "Edit", "Bash", "Grep", "Glob"]
model: sonnet
---
# Django Build Error Resolver
You are an expert Django/Python error resolution specialist. Your mission is to fix build errors, migration conflicts, import failures, dependency issues, and Django startup errors with **minimal, surgical changes**.
You DO NOT refactor or rewrite code — you fix the error only.
## Core Responsibilities
1. Resolve pip, Poetry, and virtualenv dependency errors
2. Fix Django migration conflicts and state inconsistencies
3. Diagnose and repair Django configuration/settings errors
4. Resolve Python import errors and module not found issues
5. Fix `collectstatic`, `runserver`, and management command failures
6. Repair database connection and `DATABASES` misconfiguration
## Diagnostic Commands
Run these in order to locate the error:
```bash
# Check Python and Django versions
python --version
python -m django --version
# Verify virtual environment is active
which python
pip list | grep -E "Django|djangorestframework|celery|psycopg"
# Check for missing dependencies
pip check
# Validate Django configuration
python manage.py check --deploy 2>&1 || python manage.py check 2>&1
# List pending migrations
python manage.py showmigrations 2>&1
# Detect migration conflicts
python manage.py migrate --check 2>&1
# Static files
python manage.py collectstatic --dry-run --noinput 2>&1
```
## Resolution Workflow
```text
1. Reproduce the error -> Capture exact message
2. Identify error category -> See table below
3. Read affected file/config -> Understand context
4. Apply minimal fix -> Only what's needed
5. python manage.py check -> Validate Django config
6. Run test suite -> Ensure nothing broke
```
## Common Fix Patterns
### Dependency / pip Errors
| Error | Cause | Fix |
|-------|-------|-----|
| `ModuleNotFoundError: No module named 'X'` | Missing package | `pip install X` or add to `requirements.txt` |
| `ImportError: cannot import name 'X' from 'Y'` | Version mismatch | Pin compatible version in requirements |
| `ERROR: pip's dependency resolver...` | Conflicting deps | Upgrade pip: `pip install --upgrade pip`, then `pip install -r requirements.txt` |
| `Poetry: No solution found` | Conflicting constraints | Relax version pin in `pyproject.toml` |
| `pkg_resources.DistributionNotFound` | Installed outside venv | Reinstall inside venv |
```bash
# Force reinstall all dependencies
pip install --force-reinstall -r requirements.txt
# Poetry: clear cache and resolve
poetry cache clear --all pypi
poetry install
# Create fresh virtualenv if corrupt
deactivate
python -m venv .venv && source .venv/bin/activate
pip install -r requirements.txt
```
### Migration Errors
| Error | Cause | Fix |
|-------|-------|-----|
| `django.db.migrations.exceptions.MigrationSchemaMissing` | DB tables not created | `python manage.py migrate` |
| `InconsistentMigrationHistory` | Applied out of order | Squash or fake migrations |
| `Migration X dependencies reference nonexistent parent Y` | Missing migration file | Recreate with `makemigrations` |
| `Table already exists` | Migration applied outside Django | `migrate --fake-initial` |
| `Multiple leaf nodes in the migration graph` | Conflicting migration branches | Merge: `python manage.py makemigrations --merge` |
| `django.db.utils.OperationalError: no such column` | Unapplied migration | `python manage.py migrate` |
```bash
# Fix conflicting migrations
python manage.py makemigrations --merge --no-input
# Fake migrations already applied at DB level
python manage.py migrate --fake <app> <migration_number>
# Reset migrations for an app (dev only!)
python manage.py migrate <app> zero
python manage.py makemigrations <app>
python manage.py migrate <app>
# Show migration plan
python manage.py migrate --plan
```
### Django Configuration Errors
| Error | Cause | Fix |
|-------|-------|-----|
| `django.core.exceptions.ImproperlyConfigured` | Missing setting or wrong value | Check `settings.py` for the named setting |
| `DJANGO_SETTINGS_MODULE not set` | Env var missing | `export DJANGO_SETTINGS_MODULE=config.settings.development` |
| `SECRET_KEY must not be empty` | Missing env var | Set `DJANGO_SECRET_KEY` in `.env` |
| `Invalid HTTP_HOST header` | `ALLOWED_HOSTS` misconfigured | Add hostname to `ALLOWED_HOSTS` |
| `Apps aren't loaded yet` | Importing models before `django.setup()` | Call `django.setup()` or move imports inside functions |
| `RuntimeError: Model class ... doesn't declare an explicit app_label` | App not in `INSTALLED_APPS` | Add the app to `INSTALLED_APPS` |
```bash
# Verify settings module resolves
python -c "import django; django.setup(); print('OK')"
# Check environment variable
echo $DJANGO_SETTINGS_MODULE
# Find missing settings
python manage.py diffsettings 2>&1
```
### Import Errors
```bash
# Diagnose circular imports
python -c "import <module>" 2>&1
# Find where an import is used
grep -r "from <module> import" . --include="*.py"
# Check installed app paths
python -c "import <app>; print(<app>.__file__)"
```
**Circular import fix:** Move imports inside functions or use `apps.get_model()`:
```python
# Bad - top-level causes circular import
from apps.users.models import User
# Good - import inside function
def get_user(pk):
from apps.users.models import User
return User.objects.get(pk=pk)
# Good - use apps registry
from django.apps import apps
User = apps.get_model('users', 'User')
```
### Database Connection Errors
| Error | Cause | Fix |
|-------|-------|-----|
| `django.db.utils.OperationalError: could not connect to server` | DB not running or wrong host | Start DB or fix `DATABASES['HOST']` |
| `django.db.utils.OperationalError: FATAL: role X does not exist` | Wrong DB user | Fix `DATABASES['USER']` |
| `django.db.utils.ProgrammingError: relation X does not exist` | Missing migration | `python manage.py migrate` |
| `psycopg2 not installed` | Missing driver | `pip install psycopg2-binary` |
```bash
# Test database connection
python manage.py dbshell
# Check DATABASES setting
python -c "from django.conf import settings; print(settings.DATABASES)"
```
### collectstatic / Static Files Errors
| Error | Cause | Fix |
|-------|-------|-----|
| `staticfiles.E001: The STATICFILES_DIRS...` | Dir in both `STATICFILES_DIRS` and `STATIC_ROOT` | Remove from `STATICFILES_DIRS` |
| `FileNotFoundError` during collectstatic | Missing static file referenced in template | Remove or create the referenced file |
| `AttributeError: 'str' object has no attribute 'path'` | `STORAGES` not configured for Django 4.2+ | Update `STORAGES` dict in settings |
```bash
# Dry run to find issues
python manage.py collectstatic --dry-run --noinput 2>&1
# Clear and recollect
python manage.py collectstatic --clear --noinput
```
### runserver Failures
```bash
# Port already in use
lsof -ti:8000 | xargs kill -9
python manage.py runserver
# Use alternate port
python manage.py runserver 8080
# Verbose startup for hidden errors
python manage.py runserver --verbosity=2 2>&1
```
## Key Principles
- **Surgical fixes only** — don't refactor, just fix the error
- **Never** delete migration files — fake them instead
- **Always** run `python manage.py check` after fixing
- Fix root cause over suppressing symptoms
- Use `--fake` sparingly and only when DB state is known
- Prefer `pip install --upgrade` over manual `requirements.txt` edits when resolving conflicts
## Stop Conditions
Stop and report if:
- Migration conflict requires destructive DB changes (data loss risk)
- Same error persists after 3 fix attempts
- Fix requires changes to production data or irreversible DB operations
- Missing external service (Redis, PostgreSQL) that needs user setup
## Output Format
```text
[FIXED] apps/users/migrations/0003_auto.py
Error: InconsistentMigrationHistory — 0002_add_email applied before 0001_initial
Fix: python manage.py migrate users 0001 --fake, then re-applied
Remaining errors: 0
```
Final: `Django Status: OK/FAILED | Errors Fixed: N | Files Modified: list`
For Django architecture and ORM patterns, see `skill: django-patterns`.
For Django security settings, see `skill: django-security`.

160
agents/django-reviewer.md Normal file
View File

@@ -0,0 +1,160 @@
---
name: django-reviewer
description: Expert Django code reviewer specializing in ORM correctness, DRF patterns, migration safety, security misconfigurations, and production-grade Django practices. Use for all Django code changes. MUST BE USED for Django projects.
tools: ["Read", "Grep", "Glob", "Bash"]
model: sonnet
---
You are a senior Django code reviewer ensuring production-grade quality, security, and performance.
**Note**: This agent focuses on Django-specific concerns. Ensure `python-reviewer` has been invoked for general Python quality checks before or after this review.
When invoked:
1. Run `git diff -- '*.py'` to see recent Python file changes
2. Run `python manage.py check` if a Django project is present
3. Run `ruff check .` and `mypy .` if available
4. Focus on modified `.py` files and any related migrations
5. Assume CI checks have passed (orchestration gated); if CI status needs verification, run `gh pr checks` to confirm green before proceeding
## Review Priorities
### CRITICAL — Security
- **SQL Injection**: Raw SQL with f-strings or `%` formatting — use `%s` parameters or ORM
- **`mark_safe` on user input**: Never without explicit `escape()` first
- **CSRF exemption without reason**: `@csrf_exempt` on non-webhook views
- **`DEBUG = True` in production settings**: Leaks full stack traces
- **Hardcoded `SECRET_KEY`**: Must come from environment variable
- **Missing `permission_classes` on DRF views**: Defaults to global — verify intent
- **`eval()`/`exec()` on user input**: Immediate block
- **File upload without extension/size validation**: Path traversal risk
### CRITICAL — ORM Correctness
- **N+1 queries in loops**: Accessing related objects without `select_related`/`prefetch_related`
```python
# Bad
for order in Order.objects.all():
print(order.user.email) # N+1
# Good
for order in Order.objects.select_related('user').all():
print(order.user.email)
```
- **Missing `atomic()` for multi-step writes**: Use `transaction.atomic()` for any sequence of DB writes
- **`bulk_create` without `update_conflicts`**: Silent data loss on duplicate keys
- **`get()` without `DoesNotExist` handling**: Unhandled exception risk
- **Queryset used after `delete()`**: Stale queryset reference
### CRITICAL — Migration Safety
- **Model change without migration**: Run `python manage.py makemigrations --check`
- **Backward-incompatible column drop**: Must be done in two deployments (nullable first)
- **`RunPython` without `reverse_code`**: Migration cannot be reversed
- **`atomic = False` without justification**: Leaves DB in partial state on failure
### HIGH — DRF Patterns
- **Serializer without explicit `fields`**: `fields = '__all__'` exposes all columns including sensitive ones
- **No pagination on list endpoints**: Unbounded queries can return millions of rows
- **Missing `read_only_fields`**: Auto-generated fields (id, created_at) editable by API
- **`perform_create` not used**: Injecting user context should happen in `perform_create`, not `validate`
- **No throttling on auth endpoints**: Login/registration open to brute force
- **Nested writable serializers without `update()`**: Default update silently ignores nested data
### HIGH — Performance
- **Queryset evaluated in template context**: Use `.values()` or pass list; avoid lazy evaluation in templates
- **Missing `db_index` on FK/filter fields**: Full table scan on filtered queries
- **Synchronous external API call in view**: Blocks the request thread — offload to Celery
- **`len(queryset)` instead of `.count()`**: Forces full fetch
- **`exists()` not used for existence checks**: `if queryset:` fetches objects unnecessarily
```python
# Bad
if Product.objects.filter(sku=sku):
...
# Good
if Product.objects.filter(sku=sku).exists():
...
```
### HIGH — Code Quality
- **Business logic in views or serializers**: Move to `services.py`
- **Signal logic that belongs in a service**: Signals make flow hard to trace — use explicitly
- **Mutable default in model field**: `default=[]` or `default={}` — use `default=list`
- **`save()` called without `update_fields`**: Overwrites all columns — risk of clobbering concurrent writes
```python
# Bad
user.last_active = now()
user.save()
# Good
user.last_active = now()
user.save(update_fields=['last_active'])
```
### MEDIUM — Best Practices
- **`str(queryset)` or slicing for debug**: Use Django shell, not production code
- **Accessing `request.user` in serializer `validate()`**: Pass via context, not direct access
- **`print()` instead of `logger`**: Use `logging.getLogger(__name__)`
- **Missing `related_name`**: Reverse accessors like `user_set` are confusing
- **`blank=True` without `null=True` on non-string fields**: DB stores empty string for non-string types
- **Hardcoded URLs**: Use `reverse()` or `reverse_lazy()`
- **Missing `__str__` on models**: Django admin and logging are broken without it
- **App not using `AppConfig.ready()`**: Signal receivers not connected properly
### MEDIUM — Testing Gaps
- **No test for permission boundary**: Verify unauthorized access returns 403/401
- **`force_authenticate` instead of proper token**: Tests skip auth logic entirely
- **Missing `@pytest.mark.django_db`**: Tests silently hit no DB
- **Factory not used**: Raw `Model.objects.create()` in tests is fragile
## Diagnostic Commands
```bash
python manage.py check # Django system check
python manage.py makemigrations --check # Detect missing migrations
ruff check . # Fast linter
mypy . --ignore-missing-imports # Type checking
bandit -r . -ll # Security scan (medium+)
pytest --cov=apps --cov-report=term-missing -q # Tests + coverage
```
## Review Output Format
```text
[SEVERITY] Issue title
File: apps/orders/views.py:42
Issue: Description of the problem
Fix: What to change and why
```
## Approval Criteria
- **Approve**: No CRITICAL or HIGH issues
- **Warning**: MEDIUM issues only (can merge with caution)
- **Block**: CRITICAL or HIGH issues found
## Framework-Specific Checks
- **Migrations**: Every model change must have a migration. Two-phase for column removal.
- **DRF**: All public endpoints need explicit `permission_classes`. Pagination on all list views.
- **Celery**: Tasks must be idempotent. Use `bind=True` + `self.retry()` for transient failures.
- **Django Admin**: Never expose sensitive fields. Use `readonly_fields` for auto-generated data.
- **Signals**: Prefer explicit service calls. If signals are used, register in `AppConfig.ready()`.
## Reference
For Django architecture patterns and ORM examples, see `skill: django-patterns`.
For security configuration checklists, see `skill: django-security`.
For testing patterns and fixtures, see `skill: django-tdd`.
---
Review with the mindset: "Would this code safely serve 10,000 concurrent users without data loss, security breach, or a 3am pager alert?"

107
commands/cost-report.md Normal file
View File

@@ -0,0 +1,107 @@
---
description: Generate a local Claude Code cost report from a cost-tracker SQLite database.
argument-hint: [csv]
---
# Cost Report
Query the local cost-tracking database and present a spending report by day,
project, tool, and session. This command assumes a cost-tracking hook or plugin
is already writing usage rows to `~/.claude-cost-tracker/usage.db`.
## What This Command Does
1. Check that `sqlite3` is available.
2. Check that `~/.claude-cost-tracker/usage.db` exists.
3. Run aggregate queries against the `usage` table.
4. Present a compact report, or export recent rows as CSV when the argument is
`csv`.
## Prerequisites
The database must be populated by a local cost tracker. If the file is missing,
tell the user the tracker is not set up and suggest installing or enabling a
trusted Claude Code cost-tracking hook/plugin first.
```bash
test -f ~/.claude-cost-tracker/usage.db && echo "Database found" || echo "Database not found"
```
## Summary Query
```bash
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
SELECT
ROUND(COALESCE(SUM(CASE WHEN date(timestamp) = date('now') THEN cost_usd END), 0), 4) AS today_cost,
ROUND(COALESCE(SUM(CASE WHEN date(timestamp) = date('now', '-1 day') THEN cost_usd END), 0), 4) AS yesterday_cost,
ROUND(COALESCE(SUM(cost_usd), 0), 4) AS total_cost,
COUNT(*) AS total_calls,
COUNT(DISTINCT session_id) AS sessions
FROM usage;
"
```
## Project Breakdown
```bash
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
SELECT project, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
FROM usage
GROUP BY project
ORDER BY cost DESC;
"
```
## Tool Breakdown
```bash
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
SELECT tool_name, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
FROM usage
GROUP BY tool_name
ORDER BY cost DESC;
"
```
## Last Seven Days
```bash
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
SELECT date(timestamp) AS date, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
FROM usage
GROUP BY date(timestamp)
ORDER BY date DESC
LIMIT 7;
"
```
## CSV Export
If the user asks for `/cost-report csv`, export the most recent usage rows with
an explicit column list:
```bash
sqlite3 -csv -header ~/.claude-cost-tracker/usage.db "
SELECT timestamp, project, tool_name, input_tokens, output_tokens, cost_usd, session_id, model
FROM usage
ORDER BY timestamp DESC
LIMIT 100;
"
```
## Report Format
Format the response as:
1. Summary: today, yesterday, total, calls, sessions.
2. By project: projects ranked by total cost.
3. By tool: tools ranked by total cost.
4. Last seven days: date, cost, call count.
Use four decimal places for sub-dollar amounts. Do not estimate pricing from raw
tokens in this command; rely on the precomputed `cost_usd` values written by the
tracker.
## Source
Salvaged from stale community PR #1304 by `MayurBhavsar`.

View File

@@ -16,10 +16,21 @@ so the live execution truth is split across:
As of 2026-05-12:
- Public GitHub queues are clean across `everything-claude-code`,
`agentshield`, `JARVIS`, `ECC-Tools`, and `ECC-website`.
- Public GitHub queues are clean across `affaan-m/everything-claude-code`,
`affaan-m/agentshield`, `affaan-m/JARVIS`, `ECC-Tools/ECC-Tools`, and
`ECC-Tools/ECC-website`.
- Public GitHub discussions are also clean across those tracked repos:
`states: OPEN` returned zero discussions for every accessible discussion
surface on 2026-05-12.
- The final open public GitHub issue, #1314, was closed as a non-actionable
external badge/listing notification with a courtesy comment.
- Linear issue creation for this project was re-tested after GitHub cleanup and
is still blocked by the workspace free issue limit. Seven roadmap-lane issue
creation attempts all returned the same limit error, so this repo mirror and
Linear project status updates remain the active tracking surfaces until the
workspace is upgraded or issue capacity is freed.
- `npm run harness:audit -- --format json` reports 70/70 on current `main`.
- `npm run observability:ready` reports 14/14 readiness on current `main`.
- `npm run observability:ready` reports 16/16 readiness on current `main`.
- `docs/architecture/harness-adapter-compliance.md` maps Claude Code, Codex,
OpenCode, Cursor, Gemini, Zed-adjacent, dmux, Orca, Superset, Ghast, and
terminal-only support to install paths, verification commands, and risk
@@ -30,6 +41,28 @@ As of 2026-05-12:
- `docs/releases/2.0.0-rc.1/publication-readiness.md` gates GitHub release,
npm dist-tag, Claude plugin, Codex plugin, OpenCode package, billing, and
announcement publication on fresh evidence fields.
- `docs/releases/2.0.0-rc.1/naming-and-publication-matrix.md` records the
rc.1 naming decision: ship as Everything Claude Code (ECC), keep
`ecc-universal` for npm, keep `ecc` for Claude/Codex plugin slugs, and defer
any broader repo/package rename until after the release pipeline is proven.
- `docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md` records the
dry-run publication evidence pass: npm pack/publish dry-runs, temp install
smoke, Claude plugin validation/tag preflight, Codex marketplace CLI shape,
OpenCode build, and the remaining approval-gated release blockers.
- A detached clean worktree at
`bfacf37715b39655cbc2c48f12f2a35c67cb0253` verified Claude plugin tag
dry-run without `--force`, local marketplace discovery, temp-home local
install, enabled plugin listing, and clean uninstall for `ecc@ecc`
`2.0.0-rc.1`.
- `docs/architecture/evaluator-rag-prototype.md` and
`examples/evaluator-rag-prototype/` define the first read-only
self-improving harness prototype: scenario specs, traces, reports,
candidate playbooks, verifier results, accepted maintainer-salvage,
billing-readiness, CI-failure-diagnosis, and harness-config-quality
candidates, plus the AgentShield policy-exception scenario and rejected
unsafe candidates.
- The npm package surface now excludes Python bytecode/cache artifacts through
package `files` negation rules and a publish-surface regression test.
- `docs/legacy-artifact-inventory.md` records that no `_legacy-documents-*`
directories exist in the current checkout, inventories the two sibling
workspace-level `_legacy-documents-*` repos as sanitized extraction sources,
@@ -58,6 +91,32 @@ As of 2026-05-12:
- AgentShield PR #60 added category-level built-in corpus benchmark output,
a `readyForRegressionGate` signal, terminal `--corpus` category coverage,
README/API docs, built-CLI smoke validation, and 1,705-test coverage.
- AgentShield PR #61 cleared the remaining Dependabot security/bugfix PR with
a lockfile-only `postcss` 8.5.6 -> 8.5.14 bump after local typecheck, full
tests, lint, build, and remote self-scan/action verification.
- AgentShield PR #62 added organization-policy exception lifecycle audit
evidence: active, expiring-soon, and expired exception counts; owner, ticket,
scope, expiry, and days-until-expiry reporting; terminal output and GitHub
Action job-summary evidence; README docs; rebuilt action bundles; and
1,708-test validation.
- AgentShield PR #63 exposed baseline drift in the GitHub Action with
`baseline` / `save-baseline` inputs, baseline drift outputs, job-summary
evidence, regression annotations, README/API docs, rebuilt action bundles,
and green remote action/self-scan/Node verification.
- AgentShield PR #64 added the first-class `agentshield baseline write`
CLI command with severity filtering, JSON metadata output, README/API docs,
rebuilt CLI bundle, local TDD coverage, and green remote action/self-scan/Node
verification.
- AgentShield PDF-export decision: defer a native PDF writer for now. The
self-contained HTML executive report remains the exportable buyer artifact
and can be printed to PDF when needed; native PDF generation should wait for
explicit enterprise/compliance demand or a print-fidelity gap in the HTML
report.
- `docs/architecture/agentshield-enterprise-research-roadmap.md` identifies
the next AgentShield enterprise signal: move from scanner/report/policy gate
to a team control plane with baseline drift, evidence packs, multi-harness
adapters, corpus accuracy gates, remediation routing, threat intelligence,
and ECC-Tools/GitHub App integration.
- ECC PR #1778 recovered the useful stale #1413 network/homelab architect-agent
concepts.
- ECC-Tools PR #26 added cost/token-risk predictive follow-ups for AI routing,
@@ -85,12 +144,57 @@ As of 2026-05-12:
plugin, agent, hook, command, and harness config changes that lack harness
audit, adapter matrix, cross-harness docs, or compatibility regression
evidence.
- ECC-Tools PR #34 added skill-quality predictive follow-ups and a Skill
Quality PR-risk bucket for skill, agent, command, and rule guidance changes
that lack examples, validation, eval, or reference evidence.
- ECC-Tools PR #35 added RAG/evaluator predictive follow-ups and a
RAG/Evaluator Evidence PR-risk bucket for retrieval, embedding, ranking, and
evaluator changes that lack reference-set comparison, golden trace,
benchmark, fixture, or eval-run evidence.
- ECC-Tools PR #36 added deep-analyzer predictive follow-ups, a Deep Analyzer
Evidence PR-risk bucket, and a Linear-ready project sync backlog table for
deferred follow-up work.
- ECC-Tools PR #37 added a maintained analyzer corpus fixture, corpus validation
tests, and co-located analyzer reference-set evidence recognition for future
predictive follow-ups and PR-risk taxonomy checks.
- ECC-Tools PR #38 added PR review/stale-salvage predictive follow-ups, a
PR Review/Salvage Evidence taxonomy bucket, and maintained corpus fixtures
for stale-closure salvage, reviewer-thread, and reopen-flow evidence.
- ECC-Tools PR #39 added opt-in native Linear GraphQL sync for deferred
follow-up backlog items, preserving GitHub object caps while creating or
reusing Linear issues when `LINEAR_API_KEY` and `LINEAR_TEAM_ID` are
configured.
- ECC-Tools PR #40 added a checked-in evaluator/RAG corpus contract covering
stale-PR salvage, billing readiness, CI failure diagnosis, harness config
quality, AgentShield policy exceptions, skill-quality evidence,
deep-analyzer evidence, and RAG/evaluator comparison evidence, with each
scenario exercising missing-evidence and evidence-backed diffs.
- ECC PR #1803 landed the contributor Quarkus handling branch after maintainer
cleanup, current-`main` alignment, full local validation, and preservation of
the author's removal of incomplete ja-JP and zh-CN Quarkus translations.
- ECC PR #1812 salvaged useful Django reviewer, Django build resolver, and
Django Celery guidance from stale PR #1310 through a maintainer-owned branch
with source credit, catalog sync, and full local/remote validation.
- ECC PR #1813 expanded the stale PR salvage ledger with source-to-salvage
mappings for #1325, #1414, #1478, #1504, and #1603, confirming those useful
stale contributions were already preserved through later maintainer PRs.
- ECC PR #1815 salvaged the useful stale #1304 cost-tracking and #1232
skill-scout work into current command/skill conventions with current catalog
sync and full local/remote validation.
- ECC PR #1816 salvaged the useful stale #1659 frontend design guidance into
canonical ECC skill layout while preserving the guardrail that the official
Anthropic `frontend-design` skill remains externally sourced.
- ECC PR #1817 salvaged the useful stale #1658 code-reviewer false-positive
guardrails, adding proof gates for HIGH/CRITICAL findings, common
false-positive exclusions, and a regression test.
- ECC PR #1818 recorded the May 12 stale-salvage gap pass, classifying already
present work, skipped work, and translator/manual-review leftovers.
## Operating Rules
- Keep public PRs and issues below 20, with zero as the preferred release-lane
target.
- Maintain 70/70 harness audit and 14/14 observability readiness after every
- Maintain 70/70 harness audit and 16/16 observability readiness after every
GA-readiness batch.
- Do not publish release or social announcements until the GitHub release,
npm/package state, billing state, and plugin submission surfaces are verified
@@ -100,6 +204,58 @@ As of 2026-05-12:
maintainer-owned branches, and credit the source PR.
- Do not create new Linear issues until the active issue limit is cleared.
## Prompt-To-Artifact Execution Checklist
This table keeps the long operator prompt tied to concrete artifacts. A status
is not complete unless the evidence column exists and has been freshly verified.
| Prompt requirement | Required artifact or gate | Current evidence | Status |
| --- | --- | --- | --- |
| Keep public PRs below 20 | Repo-family PR recheck | 0 open PRs across the tracked public repos on 2026-05-12 | Complete for this checkpoint |
| Keep public issues below 20 | Repo-family issue recheck | 0 open issues across the tracked public repos on 2026-05-12 after closing #1314 as non-actionable badge/listing noise | Complete for this checkpoint |
| Manage repository discussions | Repo-family discussion recheck | 0 open discussions across the tracked public repos on 2026-05-12 via GraphQL `states: OPEN` checks | Complete for this checkpoint |
| Manage PR discussions | PR review/comment closure plus merge/close state | #1803 was maintainer-edited and merged; no open PRs remain | Complete for this checkpoint |
| Salvage useful stale work | `docs/stale-pr-salvage-ledger.md` | Ledger records salvaged, superseded, skipped, and manual-review tails; #1815-#1818 added cost tracking, skill scout, frontend design guidance, code-reviewer false-positive guardrails, and the May 12 gap pass | Complete except translation/manual review tail |
| ECC 2.0 preview pack ready | Release docs, quickstart, publication readiness, release notes | `docs/releases/2.0.0-rc.1/` and readiness docs are in-tree | Needs final release evidence |
| Hermes specialized skills included safely | Hermes setup/import docs and sanitized skill surface | Hermes setup and import playbook are public; secrets stay local | Needs final release review |
| Naming and rename readiness | Naming matrix across package/plugin/docs/social surfaces | `docs/releases/2.0.0-rc.1/naming-and-publication-matrix.md` records current package, repo, Claude plugin, Codex plugin, OpenCode, and npm availability evidence | Complete for rc.1; post-rc rename remains future work |
| Claude and Codex plugin publication | Contact/submission path with required artifacts and status | Publication readiness, naming matrix, and May 12 dry-run evidence document plugin validation, clean-checkout Claude tag/install smoke, and Codex marketplace CLI shape | Needs explicit approval for real tag/push and marketplace submission |
| Articles, tweets, and announcements | X thread, LinkedIn copy, GitHub release copy, push checklist | Draft launch collateral exists under rc.1 release docs | Needs URL-backed refresh |
| AgentShield enterprise iteration | Policy gates, SARIF, packs, provenance, corpus, HTML reports, exception lifecycle audit, baseline drift Action/CLI surfaces, enterprise research roadmap | PRs #53, #55-#64 landed with test evidence; native PDF export deferred in favor of self-contained HTML plus print-to-PDF until explicit enterprise demand appears; `docs/architecture/agentshield-enterprise-research-roadmap.md` selects baseline drift as the first control-plane slice | Baseline-drift Action and CLI write surfaces landed; evidence-pack routing remains |
| ECC Tools next-level app | Billing audit, PR checks, deep analyzer, sync backlog, evaluator/RAG corpus | PRs #26-#40 landed with test evidence | Needs capacity-backed Linear rollout |
| GitGuardian/Dependabot/CodeRabbit-style checks | Non-blocking taxonomy and deterministic follow-up checks | ECC-Tools risk taxonomy check plus follow-up signals landed, including Skill Quality, Deep Analyzer Evidence, Analyzer Corpus Evidence, RAG/Evaluator Evidence, and PR Review/Salvage Evidence | Partially complete |
| Harness-agnostic learning system | Audit, adapter matrix, observability, traces, promotion loop | Audit/adapters/observability gates plus `docs/architecture/evaluator-rag-prototype.md`, `examples/evaluator-rag-prototype/`, and ECC-Tools PR #40 define read-only stale-salvage, billing-readiness, CI-failure-diagnosis, harness-config-quality, AgentShield policy-exception, skill-quality evidence, deep-analyzer evidence, and RAG/evaluator comparison scenarios with trace, report, playbook, verifier, and predictive-check artifacts | Local corpus complete; hosted integration remains future |
| Linear roadmap is detailed | Linear project status plus repo mirror | Repo mirror exists; issue creation was retried on 2026-05-12 and remains blocked by the workspace free issue limit | Needs recurring status updates after each merge batch |
| Flow separation and progress tracking | Flow lanes with owner artifacts and update cadence | This roadmap defines lanes below | Active |
| Realtime Linear sync | Project updates while issue limit is blocked; issues later | ECC-Tools #39 implements opt-in Linear API sync for deferred follow-up backlog items | Needs workspace capacity/config rollout |
| Observability for self-use | Local readiness gate, traces, status snapshots, HUD/status contract, risk ledger | `npm run observability:ready` reports 16/16 | Complete for local gate |
| Proper release and notifications | Release tag, npm publish state, plugin state, social posts | Publication readiness gate exists | Not complete |
## Execution Lanes And Tracking Contract
Until Linear issue capacity is cleared, this document is the durable execution
ledger and Linear receives project status updates only. When capacity is
available, each lane below should become a small set of Linear issues linked
back to the repo evidence and merge commits.
| Lane | Source of truth | Next tracked artifact | Update cadence |
| --- | --- | --- | --- |
| Queue hygiene and salvage | GitHub PR/issue state, salvage ledger | Append ledger entries for any future stale closures | Every cleanup batch |
| Release and publication | rc.1 release docs, publication readiness doc | Naming matrix and plugin submission/contact checklist | Before any tag |
| Harness OS core | Audit, adapter matrix, observability docs, `ecc2/` | HUD/session-control acceptance spec | Weekly until GA |
| Evaluation and RAG | Reference-set validation, harness audit, traces, ECC-Tools corpus | Read-only evaluator/RAG prototype plus stale-salvage, billing-readiness, CI-failure-diagnosis, harness-config-quality, AgentShield policy-exception, skill-quality evidence, deep-analyzer evidence, and RAG/evaluator comparison fixtures | Hosted retrieval/check-run automation plan |
| AgentShield enterprise | AgentShield PR evidence and roadmap notes | Baseline-drift evidence-pack and backlog sync follow-up | Next implementation batch |
| ECC Tools app | ECC-Tools PR evidence, billing audit, risk taxonomy, evaluator/RAG corpus | Capacity-backed Linear rollout | Next implementation batch |
| Linear progress | Linear project status updates and this mirror | Status update with queue/evidence/missing gates | Every significant merge batch |
The project status update should always include:
1. Current public PR and issue counts.
2. Merged evidence since the previous update.
3. Deferred or blocked items with the reason.
4. The next one or two implementation slices.
5. Any release or publication gate that is still not evidence-backed.
## Reference Pressure
The GA roadmap is informed by these reference surfaces:
@@ -160,7 +316,7 @@ Target: 2026-06-07
Acceptance:
- Observability readiness remains 14/14 and is backed by JSONL traces, status
- Observability readiness remains 16/16 and is backed by JSONL traces, status
snapshots, risk ledger, and exportable handoff contracts.
- HUD/status model covers context, tool calls, active agents, todos, checks,
cost, risk, and queue state.
@@ -189,8 +345,9 @@ Target: 2026-06-14
Acceptance:
- Formal policy schema exists for org baselines, exceptions, owners,
expiration, severity, and audit trails.
- Formal policy schema and evaluation output exist for org baselines,
exceptions, owners, expiration, severity, audit trails, expiring-soon
visibility, and expired-exception enforcement.
- SARIF/code-scanning output is implemented and tested.
- GitHub Action policy gates expose organization policy status and violation
counts for branch-protection and CI evidence.
@@ -201,7 +358,11 @@ Acceptance:
- Prompt-injection corpus and regression benchmark are ready for continuous
rule hardening with category-level coverage and regression-gate output.
- Enterprise reports include JSON plus self-contained HTML executive output
with risk posture, priority findings, and category exposure.
with risk posture, priority findings, category exposure, and policy-exception
lifecycle evidence in terminal/CI summaries.
- Native PDF export is not a GA blocker unless an enterprise/compliance
workflow requires a generated PDF file instead of the self-contained HTML
report and browser print-to-PDF path.
### 6. ECC Tools Billing, Deep Analysis, PR Checks, And Linear Sync
@@ -216,15 +377,40 @@ Acceptance:
failure modes.
- Deep analyzer covers diff patterns, CI/CD workflows, dependency/security
surface, PR review behavior, failure history, harness config, skill quality,
and reference-set/RAG comparison.
dedicated analyzer corpus evidence, co-located analyzer reference sets,
PR review/stale-salvage evidence, RAG/evaluator comparison, and reference-set
validation.
- PR check suite taxonomy includes Security Evidence, Harness Drift, Install
Manifest Integrity, CI/CD Recommendation, Cost/Token Risk, and Agent Config
Review.
Manifest Integrity, CI/CD Recommendation, Cost/Token Risk, Reference Set
Validation, Deep Analyzer Evidence, RAG/Evaluator Evidence,
PR Review/Salvage Evidence, Skill Quality, and Agent Config Review.
- Evaluator/RAG billing readiness fixture
`examples/evaluator-rag-prototype/billing-marketplace-readiness/` records the
read-only claim-verification path for Marketplace, App, subscription, seat,
entitlement, and plan language before launch copy can treat those claims as
live.
- Cost/token-risk predictive follow-ups flag AI routing, model-call, usage,
quota, and budget changes when budget evidence is missing.
- Reference-set validation follow-ups flag analyzer, skill, agent, command, and
harness-guidance changes that lack eval, golden trace, benchmark, or
maintained reference-set evidence.
- Deep-analyzer follow-ups flag repository, commit, architecture, pattern, and
analysis-pipeline changes that lack analyzer corpus, snapshot, fixture, or
benchmark evidence.
- Analyzer corpus evidence includes maintained fixtures and tests for current
architecture and commit analyzer outputs, plus co-located
`src/analyzers/{fixtures,goldens,reference-sets,benchmarks,evals}/` evidence
paths.
- RAG/evaluator follow-ups flag retrieval, embedding, ranking, and evaluator
changes that lack reference-set comparison, golden trace, benchmark, fixture,
or eval-run evidence.
- Evaluator/RAG corpus contract mirrors the local prototype scenarios into
ECC-Tools fixtures and tests for stale-PR salvage, billing readiness,
CI failure diagnosis, harness config quality, AgentShield policy exceptions,
skill-quality evidence, deep-analyzer evidence, and RAG/evaluator comparison.
- PR review/stale-salvage follow-ups flag review, triage, stale-closure, and
pull-request automation changes that lack stale-salvage fixtures,
reviewer-thread cases, or reopen-flow reference evidence.
- PR analysis comments summarize review follow-up signals for requested
changes, unresolved or outdated review threads, and missing approvals.
- CI failure-mode predictive follow-ups flag workflow and test-runner changes
@@ -233,8 +419,9 @@ Acceptance:
- Harness-config quality predictive follow-ups flag MCP, plugin, agent, hook,
command, and harness config changes that lack audit, adapter matrix,
cross-harness doc, or compatibility regression evidence.
- Linear sync design maps findings to issues/status without flooding the
workspace.
- Linear sync maps deferred backlog findings to Linear issues without flooding
GitHub, creates or reuses exact-title Linear issues when configured, and
reports skipped sync when credentials or team configuration are absent.
- Follow-up generation caps automatic GitHub object creation and keeps overflow
findings in a copy-ready project sync backlog.
@@ -258,7 +445,14 @@ Acceptance:
## Next Engineering Slices
1. Decide whether AgentShield PDF export adds value beyond the merged HTML
executive report and corpus benchmark output.
2. Extend ECC Tools deep analysis and Linear/project sync without flooding the
workspace.
1. Finish the AgentShield baseline-drift control-plane slice from
`docs/architecture/agentshield-enterprise-research-roadmap.md`: PR #63
shipped the GitHub Action baseline outputs and job-summary evidence; PR #64
shipped first-class baseline snapshot creation through
`agentshield baseline write`; the remaining work is evidence-pack routing
and ECC-Tools backlog sync integration.
2. Enable/configure the merged Linear backlog sync path after workspace issue
capacity clears or the Linear workspace is upgraded.
3. Use the ECC-Tools evaluator/RAG corpus as the promotion gate before adding
hosted retrieval, vector storage, model-backed judging, or automated
check-run promotion.

View File

@@ -136,6 +136,13 @@ Repo work:
- `agentshield`: feed prompt-injection and config-risk findings into regression
suites.
Current prototype:
- `docs/architecture/evaluator-rag-prototype.md` defines the read-only
evaluator/RAG artifact contract.
- `examples/evaluator-rag-prototype/` records the first scenario spec, trace,
report, candidate playbook, and verifier result for stale-PR salvage.
Verification:
- read-only prototype that emits a trace, report, candidate playbook, and

View File

@@ -0,0 +1,329 @@
# AgentShield Enterprise Research Roadmap
Generated: 2026-05-12
This is a planning artifact for the next AgentShield enterprise iteration. It
does not modify AgentShield code. The goal is to turn the current scanner,
policy gate, corpus, and reporting surface into a security control plane for
teams running AI coding agents across multiple harnesses.
## Evidence Reviewed
Current AgentShield repository state:
- AgentShield checkout on clean `main`.
- `README.md`, `API.md`, `package.json`, `.github/workflows/*`, and
`src/`/`tests/` module layout.
- Current supported user surfaces: `agentshield scan`, `agentshield init`,
`agentshield miniclaw start`, scanner JSON, MiniClaw API, GitHub Action,
HTML, SARIF, markdown, terminal, and JSON reports.
- Current enterprise-like surfaces: policy packs, GitHub Action policy
enforcement, SARIF policy violations, supply-chain provenance, corpus
benchmark, HTML executive reports, and exception lifecycle audit.
External references checked from official GitHub repos or README sources:
- [stablyai/orca](https://github.com/stablyai/orca): multi-agent IDE,
worktree isolation, live agent status, GitHub integration, diff review, and
notifications.
- [superset-sh/superset](https://github.com/superset-sh/superset): AI-agent
editor with worktree orchestration, built-in diff review, workspace presets,
and universal CLI-agent compatibility.
- [standardagents/dmux](https://github.com/standardagents/dmux): tmux/worktree
multiplexer with lifecycle hooks, multi-agent launches, pane visibility, and
merge/PR workflows.
- [jarrodwatts/claude-hud](https://github.com/jarrodwatts/claude-hud): Claude
Code statusline, context health, tool activity, agent tracking, todo
progress, transcript parsing, and usage telemetry.
- [stanford-iris-lab/meta-harness](https://github.com/stanford-iris-lab/meta-harness):
harness optimization through repeatable tasks, logged proposer interactions,
and evaluated scaffold changes.
- [greyhaven-ai/autocontext](https://github.com/greyhaven-ai/autocontext):
recursive improvement loop with traces, scored generations, playbooks,
persisted knowledge, scenario evaluation, and optional production traces.
- [NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent):
self-improving skills, memory, session search, multi-platform gateway,
scheduled automation, terminal backends, and trajectory generation.
- [anthropics/claude-code](https://github.com/anthropics/claude-code):
terminal, IDE, GitHub, plugin, permission, MCP, and data-retention surfaces.
- [anomalyco/opencode](https://github.com/anomalyco/opencode): provider-agnostic
open-source coding agent with build/plan agents, desktop beta,
client/server architecture, and LSP support.
- [opencode-ai/opencode](https://github.com/opencode-ai/opencode): earlier
archived Go-based terminal agent with sessions, providers, LSP, file change
tracking, custom commands, and auto-compact.
- [zed-industries/zed](https://github.com/zed-industries/zed): high-performance
multiplayer editor with strict license/compliance CI expectations.
- [aidenybai/ghast](https://github.com/aidenybai/ghast): native terminal
multiplexer built around Ghostty, workspace grouping, split panes, drag/drop,
notifications, and terminal search.
Local Claude Code source inspection:
- Reviewed only non-secret local file/module shape from a private Claude Code
source snapshot.
- Relevant surfaces observed: `tools/`, `utils/permissions/`, `utils/mcp/`,
`utils/hooks/`, `utils/plugins/`, `types/permissions.ts`,
`types/plugin.ts`, `remote/`, `tasks/`, `assistant/sessionHistory.ts`,
and session/history utilities.
- No code was copied. The takeaway is that AgentShield should track permissions,
plugins, MCP, hooks, remote sessions, task/subagent activity, and history as
first-class audit domains rather than treating a `.claude/` tree as the only
source of truth.
## Current AgentShield Position
AgentShield is already more than a static lint tool:
- Rule coverage spans secrets, permissions, hooks, MCP servers, agent configs,
prompt injection, supply chain, taint analysis, sandbox execution, policy
evaluation, runtime repair/status, corpus validation, MiniClaw, and Opus
analysis.
- Reports are usable by humans and machines: terminal, JSON, markdown, HTML,
SARIF, scan logs, and GitHub Action outputs.
- Enterprise hooks exist: policy packs, exception metadata, expiring/expired
exception reporting, SARIF code scanning, and job-summary output.
- Accuracy work is active: `runtimeConfidence`, template/example weighting,
docs-example downgrades, hook-manifest resolution, false-positive audit
guidance, and corpus readiness.
The next iteration should not be "add more regex rules" by default. The higher
leverage move is to make AgentShield remember, compare, route, and enforce
security posture across time, repos, teams, and harnesses.
## Enterprise Gaps
### 1. Organization Baselines And Drift
Enterprise buyers need to know whether a repo, team, or agent fleet is getting
safer or riskier over time. AgentShield has scan logs and baseline comparison
modules, and PR #63 now exposes that drift through GitHub Action inputs,
outputs, annotations, and job-summary evidence. PR #64 adds first-class
baseline snapshot creation through `agentshield baseline write`. The remaining
product surface should make CLI drift summaries, evidence packs, and
owner-ready deltas explicit.
Target capability:
- `agentshield baseline write --path .claude --output agentshield-baseline.json`
- `agentshield scan --baseline agentshield-baseline.json`
- Report sections for new, fixed, unchanged, suppressed, and policy-excepted
findings.
- GitHub Action output that posts "security posture changed" rather than only a
point-in-time grade.
### 2. Multi-Harness Security Adapters
The market is moving toward many parallel agent harnesses, not one tool. Orca,
Superset, dmux, OpenCode, Claude Code, Codex, Gemini, Zed, and terminal
multiplexers all create different security surfaces.
Target capability:
- A small adapter registry for `claude-code`, `opencode`, `codex`, `gemini`,
`zed`, `dmux`, `orca`, `superset`, and `generic-terminal`.
- Each adapter declares config paths, permission concepts, plugin surfaces,
MCP/tooling conventions, history/session surfaces, and CI evidence.
- Report output groups findings by harness and confidence, so template/docs
findings do not look like active runtime exposure.
### 3. Session And Worktree Awareness
Worktree-native orchestrators change the risk model. A team can run many agents
in parallel, each with its own branch, shell, MCP config, and local state.
Target capability:
- Optional scan metadata for branch, worktree path, agent name, session id,
provider, and orchestrator.
- A scan-history table that answers: which worktree introduced a new permission,
which agent run added a risky MCP, which branch relaxed policy, and whether
the final merged branch fixed it.
- A compact "security HUD" summary usable by statuslines, GitHub checks, and
local dashboards.
### 4. Evidence Packs For Buyers And Auditors
HTML reports are the right buyer-facing artifact today; native PDF is deferred.
The deeper need is a portable evidence bundle that can be attached to audits,
security reviews, and customer questionnaires.
Target capability:
- `agentshield scan --evidence-pack out/agentshield-evidence`
- Bundle includes JSON report, HTML report, SARIF, policy evaluation,
exception audit, baseline diff, dependency/provenance summary, and a short
README explaining how to interpret the artifacts.
- Optional redaction mode for secrets, local paths, usernames, and project names.
### 5. Regression Corpus And Reference Sets
Meta-Harness and Autocontext point to the same lesson: improvements need scored
scenarios, traces, and playbooks. AgentShield already has a corpus benchmark,
but enterprise trust needs a curated reference set for false positives,
false negatives, and policy regressions.
Target capability:
- Versioned scenario fixtures for critical rules, false-positive suppressions,
policy exceptions, template/docs examples, plugin manifests, and hook-code
resolution.
- Per-category precision/coverage reporting, not just aggregate readiness.
- A "no accuracy regression" gate that must pass before releases.
- Playbook notes for why a suppression exists and when it should expire.
### 6. Remediation Workflow
Security tools become enterprise-grade when they turn findings into accountable
work without flooding maintainers.
Target capability:
- One-click or CLI-generated remediation branch for safe transforms.
- Policy comments that group findings by owner and risk rather than by file
order.
- GitHub App support for check-run annotations, issue caps, Linear sync, and
deferred backlog export.
- Finding fingerprints that avoid duplicate issues across repeated scans.
### 7. Threat Intelligence And Package Reputation
Agent security depends on MCP packages, plugin repositories, action bundles,
and rapidly changing CLI ecosystems. Static checks need a maintained external
reputation layer.
Target capability:
- A local-first threat-intel cache for known MCP/package risks, CVEs, malware
package names, suspicious install scripts, mutable git dependencies, and
known-good packages.
- Offline deterministic mode remains available.
- Online enrichment is opt-in and produces clear provenance for every external
claim.
### 8. Commercial And Team Controls
AgentShield is already connected conceptually to the ECC Tools GitHub App.
Native GitHub payments make the product path more concrete: free local scans,
paid org policy gates, paid evidence bundles, and paid drift/history.
Target capability:
- Tier-aware GitHub App checks: free static scan, paid org policy enforcement,
paid evidence packs, paid historical drift, and paid deep analysis.
- Seat/team mapping for policy owners and exception approvers.
- Billing readiness checks shared with ECC-Tools so payment state never changes
enforcement behavior silently.
## Recommended Build Order
### Slice 1: Baseline Drift MVP
Implement the smallest enterprise control-plane primitive: compare this scan to
the last accepted baseline.
Artifacts:
- Baseline JSON schema.
- Baseline writer and comparator.
- Terminal and JSON report sections for new/fixed/unchanged findings.
- Tests covering stable fingerprints, fixed findings, new findings, and policy
exception carry-forward.
Why first:
- It reuses existing scan output.
- It improves CLI, GitHub Action, and GitHub App value at once.
- It does not require a hosted service.
### Slice 2: Evidence Pack Bundle
Bundle the existing machine and human reports into a portable audit artifact.
Artifacts:
- `--evidence-pack <dir>` CLI flag.
- Redacted bundle README.
- HTML, JSON, SARIF, policy, exception, and baseline diff files.
- Tests for file layout, redaction, and deterministic output names.
Why second:
- It converts existing reporting work into buyer-ready proof.
- It keeps native PDF deferred while still meeting audit handoff needs.
### Slice 3: Harness Adapter Registry
Make harness support explicit instead of implicit.
Artifacts:
- Adapter metadata for Claude Code, OpenCode, Codex, Gemini, dmux, generic
terminal, and project-local templates.
- Discovery output that reports which adapters matched and why.
- Report grouping by adapter.
- Tests using fixture directories for each adapter.
Why third:
- It aligns AgentShield with ECC's harness-agnostic positioning.
- It creates a stable surface for future Zed, Orca, Superset, and Hermes
integration without pretending all harnesses share Claude's config model.
### Slice 4: Corpus Accuracy Gate
Promote the corpus from a benchmark into a release gate.
Artifacts:
- Per-category corpus report.
- Required category thresholds.
- Regression snapshots for known false-positive suppressions.
- Release checklist entry requiring corpus readiness before publish.
Why fourth:
- It prevents enterprise credibility from degrading as rules expand.
- It creates a durable route for Meta-Harness/Autocontext-style improvement
loops later.
### Slice 5: GitHub App And Linear Sync Wiring
Connect AgentShield findings to ECC-Tools follow-up routing.
Artifacts:
- Finding fingerprints compatible with ECC-Tools issue caps.
- Linear-ready backlog export for baseline drift and policy violations.
- Check-run annotations grouped by owner/risk.
- Tests that ensure repeated scans do not spam duplicate issues.
Why fifth:
- It needs the baseline/fingerprint work from Slice 1.
- It is the bridge from local CLI to paid team workflow.
## Non-Goals For This Iteration
- Native PDF generation, unless buyer/compliance workflows explicitly require
generated PDF instead of HTML plus print-to-PDF.
- Hosted dashboards before the local baseline/evidence/fingerprint contracts are
stable.
- Fine-tuning or model training before deterministic corpus gates and reference
traces exist.
- Broad automated code rewrites for risky findings without explicit,
reviewable transforms and tests.
## Acceptance Gates
The AgentShield enterprise iteration is not complete until these are true:
- Local `npm run typecheck`, `npm run lint`, `npm test`, and `npm run build`
pass from the AgentShield repository root.
- Built CLI smoke tests cover the new flags or report modes.
- GitHub Action self-test covers the new CI-visible output.
- Documentation names the free/local path and the paid/team path separately.
- Evidence produced by the feature is deterministic enough for CI diffing.
- ECC-Tools can consume the finding fingerprints or backlog export without
exceeding GitHub/Linear object caps.
- The GA roadmap and Linear project status link to the merged AgentShield PRs.

View File

@@ -0,0 +1,158 @@
# Evaluator RAG Prototype
ECC 2.0 needs a self-improving harness loop that can learn from real work
without blindly mutating a user's Claude, Codex, OpenCode, dmux, Zed, or
terminal setup. This prototype defines the smallest read-only artifact set for
that loop.
The fixture set lives in
[`examples/evaluator-rag-prototype/`](../../examples/evaluator-rag-prototype/).
It started with the May 2026 stale-PR cleanup and salvage lane because that
lane has real inputs, real accepted work, and real rejected work. The corpus now
also includes a billing/Marketplace readiness scenario so launch copy cannot
treat dry-run release evidence or roadmap intent as live billing state. A
CI-failure diagnosis scenario adds the log-first workflow needed before an
agent proposes fixes for red checks. A harness-config quality scenario keeps
MCP, plugin, hook, command, agent, and adapter recommendations tied to the
adapter matrix before they mutate setup guidance. An AgentShield policy
exception scenario gates security exceptions on SARIF/report evidence, owner
fields, expiry state, and remediation-versus-exception decisions. A
skill-quality evidence scenario requires observed failure or feedback evidence,
working examples, reference-set gaps, and validation commands before a skill
amendment can be promoted. A deep-analyzer evidence scenario requires analyzer
corpus cases, expected-output comparisons, and risk-taxonomy proof before
repository or commit-analysis behavior can change.
## Reference Pressure
- Meta-Harness: treat the harness itself as an experiment with scenario specs,
verifier results, and promoted playbooks.
- Autocontext: store traces, reports, artifacts, and reusable improvements
before changing installed agent assets.
- Claude HUD: expose context, tools, todos, agent activity, checks, and risk so
an evaluator can judge a run after the fact.
- Hermes Agent: keep skills, memories, scheduler-like follow-ups, and terminal
gateway behavior explicit instead of hiding local commands.
- dmux, Orca, Superset, and Ghast: preserve worktree/session state so parallel
agent work can be compared, resumed, or closed cleanly.
- ECC Tools: route evaluator findings into PR comments, check runs, and Linear
backlog items without flooding GitHub.
## Artifact Contract
Every evaluator/RAG run is read-only until a verifier promotes a playbook.
| Artifact | Purpose | Fixture |
| --- | --- | --- |
| Scenario spec | Declares the objective, allowed evidence, forbidden actions, and pass/fail gates. | `scenario.json` |
| Trace | Captures observation, retrieval, proposal, verification, and promotion events. | `trace.json` |
| Report | Summarizes scores, evidence coverage, risks, and recommended next action. | `report.json` |
| Candidate playbook | Describes the maintainer-owned workflow that could be reused later. | `candidate-playbook.md` |
| Verifier result | Accepts or rejects candidates with concrete reasons and rollback notes. | `verifier-result.json` |
The prototype deliberately separates retrieval from action. A run can retrieve
closed PR diffs, Linear status, CI history, and local docs, but it cannot close,
merge, publish, tag, or rewrite configs as part of the evaluator pass.
## Phase Model
1. Observe the current queue, dirty worktrees, branch state, open PRs/issues,
discussions, CI state, and release gates.
2. Retrieve relevant reference evidence: stale-salvage ledger rows, prior
maintainer PRs, current docs, analyzer findings, CI failures, and harness
adapter rules.
3. Propose one or more playbooks with source attribution and expected
validation gates.
4. Verify each playbook against explicit acceptance and rejection rules.
5. Promote only the candidate that improves the scenario without widening blast
radius.
6. Record rollback guidance and unresolved manual-review tails.
## First Scenario
The first scenario is `stale-pr-salvage-maintainer-branch`.
It models the rule Affaan set during the May 2026 cleanup: stale closure is
queue hygiene, not loss of useful work. Useful closed PR work should be ported
into maintainer-owned PRs with attribution/backlinks, while generated churn,
bulk localization, and ambiguous translator work stay out of blind
cherry-picks.
The verifier accepts a maintainer salvage branch that:
- credits source PRs;
- avoids raw private context and personal paths;
- does not import stale bulk localization without translator review;
- records a durable ledger update;
- runs the same validation gates as a normal code, docs, or catalog change;
- leaves release publication actions approval-gated.
The verifier rejects a blind cherry-pick proposal that:
- imports stale translation/doc churn wholesale;
- skips the current catalog/install architecture;
- lacks attribution;
- lacks tests or ledger updates;
- mutates release or plugin publication state.
## Corpus Fixtures
The root fixture files preserve the original
`stale-pr-salvage-maintainer-branch` prototype. Additional scenarios can live in
subdirectories when they reuse the same five-artifact contract.
Current corpus:
- `stale-pr-salvage-maintainer-branch`: recovers useful closed PR work through
maintainer-owned branches with attribution and validation.
- `billing-marketplace-readiness`: verifies billing, App, and Marketplace
launch claims before public copy says they are live.
- `ci-failure-diagnosis`: requires failed-job logs, changed-file scope, and a
named regression command before a CI fix playbook can be promoted.
- `harness-config-quality`: requires adapter state, install/onramp path,
verification commands, risk notes, and config-preservation behavior before a
harness setup recommendation can be promoted.
- `agentshield-policy-exception`: requires AgentShield SARIF or report
evidence, policy-pack source, owner/ticket/scope/expiry fields, and expired
exception enforcement before a policy exception can be promoted.
- `skill-quality-evidence`: requires focused skill scope, observed failure or
user-feedback evidence, examples/reference-set coverage, validation commands,
and publication safety before a skill amendment can be promoted.
- `deep-analyzer-evidence`: requires maintained analyzer corpus cases,
expected-output comparisons, representative repository/commit histories, and
regression commands before deep-analysis behavior can be promoted.
## ECC Tools Mapping
ECC Tools already flags missing RAG/evaluator evidence for retrieval,
embedding, ranking, and evaluator changes. This prototype gives those checks a
target shape:
- `scenario.json` maps to analyzer corpus inputs.
- `trace.json` maps to golden traces and run telemetry.
- `report.json` maps to PR comment summaries and Linear backlog summaries.
- `candidate-playbook.md` maps to the suggested follow-up PR body.
- `verifier-result.json` maps to pass/fail check-run evidence.
Future ECC Tools work should consume these artifacts as fixture shape before it
adds hosted retrieval or model-backed judging. The local prototype is enough to
prove the contract before any paid API or vector store is introduced.
## Promotion Rules
A candidate can be promoted only when:
- the verifier result is `accepted`;
- at least one rejected candidate proves the verifier can say no;
- every source PR or reference artifact has attribution;
- the proposed action is maintainer-owned and reversible;
- validation commands are named;
- unresolved translator, release, billing, or publication items remain blocked
until separately approved.
## Next Expansion
The local evaluator/RAG corpus now covers the current evidence buckets. Future
work should consume these fixtures from ECC Tools before adding hosted
retrieval, vector storage, model-backed judging, or automated check-run
promotion.

View File

@@ -0,0 +1,80 @@
# HUD Status And Session Control Contract
This contract defines the portable status payload ECC uses for local operator
surfaces, handoffs, and future HUDs. It is intentionally harness-neutral: a
Claude Code statusline, Codex pane, dmux session, OpenCode run, or terminal-only
workflow can emit partial data without changing field names.
The canonical example lives at
[`examples/hud-status-contract.json`](../../examples/hud-status-contract.json).
## Payload Shape
Every status payload uses `schema_version: "ecc.hud-status.v1"` and keeps these
top-level sections stable:
| Field | Purpose | Primary Source |
|---|---|---|
| `context` | Model, harness, repo, branch, worktree, session id, and context-window pressure | statusline stdin, git, session adapters |
| `toolCalls` | Recent tool counts, pending calls, stale calls, and last tool event | `loop-status`, `tool-usage.jsonl`, hook bridge |
| `activeAgents` | Current workers/subagents, runtime state, branch, worktree, objective, and handoff paths | dmux/orchestration snapshots |
| `todos` | Current in-progress task and todo counts | Claude todos, local task files, plan metadata |
| `checks` | Local and remote validation status with command/check URLs when available | CI, local commands, release gates |
| `cost` | Session spend, token counts, budget, and trend | cost tracker, metrics bridge |
| `risk` | Attention state, conflict pressure, stale calls, dirty worktree, and manual-review flags | readiness gates, git, queue state |
| `queueState` | GitHub PR/issue/discussion counts, conflict queue, merge queue, and stale-salvage queue | GitHub sync, work items |
| `sessionControls` | Supported operator actions for the current target | ECC CLI, dmux, git/GitHub |
| `sync` | Linear, GitHub, and handoff publication state | status updates, work items, handoff writer |
Fields can be `null`, empty arrays, or `"unknown"` when a harness cannot expose
the signal. Producers should not invent incompatible names. Consumers should
render missing sections as unavailable, not as green.
## Session Controls
The minimum session-control vocabulary is:
| Control | Meaning |
|---|---|
| `create` | Start a new isolated run, worktree, or orchestration plan |
| `resume` | Reattach to an existing session or historical target |
| `status` | Emit the current payload without mutating state |
| `stop` | Request a graceful stop or mark the session completed |
| `diff` | Show current working-tree or worker diff |
| `pr` | Open or inspect the linked pull request |
| `mergeQueue` | Show merge-ready, blocked, and waiting-check items |
| `conflictQueue` | Show dirty/conflicting PRs or worktrees needing integration |
`sessionControls.supported` lists the controls available for the current
harness. `sessionControls.blocked` explains unavailable controls, for example a
missing GitHub token, no tmux session, or a read-only adapter.
## Sync Contract
The sync section separates durable trackers:
- `Linear` records project status update id, health, and whether issue creation
is blocked by workspace capacity.
- `GitHub` records the current repo, PR/issue/discussion queue counts, and the
latest merged or open PR tied to the session.
- `handoff` records the durable Markdown handoff path and whether it has been
written after the latest batch.
This makes real-time progress tracking explicit without requiring every run to
create Linear issues or GitHub comments. When Linear issue capacity is blocked,
the status payload can still prove progress through project updates and repo
handoffs.
## Current Implementations
- `ecc status --json` exposes readiness, active sessions, skill runs, install
health, governance, and linked work items from the SQLite state store.
- `ecc loop-status --json --write-dir <dir>` writes live transcript snapshots
and attention signals for long-running loops.
- `ecc session-inspect <target> --write <path>` emits canonical session
snapshots from dmux and Claude-history adapters.
- `scripts/hooks/ecc-statusline.js` renders compact model, task, cost, tool,
file, duration, directory, and context pressure signals inside Claude Code.
The `ecc.hud-status.v1` payload is the common outer contract these surfaces can
project into before ECC grows a dedicated full-screen HUD.

View File

@@ -19,6 +19,10 @@ operator needs.
- Live status: `scripts/loop-status.js` can emit JSON, watch active loops, and
write snapshots for dashboards or handoffs.
- HUD/status contract: `docs/architecture/hud-status-session-control.md` and
`examples/hud-status-contract.json` define the portable payload for context,
tool calls, active agents, todos, checks, cost, risk, queues, session
controls, and tracker sync.
- Session traces: `scripts/session-inspect.js` can inspect Claude, dmux, and
adapter-backed sessions, then write canonical snapshots.
- Harness baseline: `scripts/harness-audit.js` provides a repeatable scorecard
@@ -56,9 +60,11 @@ later, but only after the local event model is useful enough to trust.
scorecard.
3. Run `node scripts/loop-status.js --json --write-dir .ecc/loop-status`
during longer autonomous batches.
4. Run `node scripts/session-inspect.js --list-adapters` to confirm which
4. Review `examples/hud-status-contract.json` before wiring a new HUD or
operator dashboard.
5. Run `node scripts/session-inspect.js --list-adapters` to confirm which
session surfaces are available.
5. Use ECC2 tool logs for risky operations, conflict analysis, and handoff
6. Use ECC2 tool logs for risky operations, conflict analysis, and handoff
review before increasing autonomy.
The end-state is practical: before asking ECC to run larger multi-agent loops,

View File

@@ -0,0 +1,119 @@
# ECC v2.0.0-rc.1 Naming And Publication Matrix
Snapshot date: 2026-05-12.
This matrix answers the release question "ship as Everything Claude Code, ECC,
or a renamed surface?" for the rc.1 lane. It is evidence for planning, not a
publication action.
## Decision
For `v2.0.0-rc.1`, keep the public identity as **Everything Claude Code (ECC)**.
Use **ECC** as the short product name in copy, plugin slugs, status surfaces,
and diagrams, but do not rename the GitHub repo, npm package, or package entry
points before the rc.1 release.
Reason:
- the current install surface already works as `ecc-universal` plus the `ecc`
plugin slug;
- the exact npm package name `ecc` is already occupied by an unrelated elliptic
curve cryptography package;
- the repo name `affaan-m/ecc` is not present, but renaming
`affaan-m/everything-claude-code` before rc.1 would create avoidable URL,
package, docs, and marketplace churn;
- Claude and Codex plugin surfaces are already short enough as `ecc`;
- rc.1 should prove the release, plugin, and publication pipeline before any
broader brand migration.
## Current Values
| Surface | Current value | Evidence command | 2026-05-12 result | Release decision |
| --- | --- | --- | --- | --- |
| Product display name | `Everything Claude Code` | `rg -n "Everything Claude Code" README.md CHANGELOG.md docs/releases/2.0.0-rc.1` | Present across README, release notes, launch copy, and plugin manifests | Keep for rc.1 |
| Short name | `ECC` | README/release docs | Used as the short cross-harness brand | Keep and prefer in tight copy |
| GitHub repo | `affaan-m/everything-claude-code` | `git remote get-url origin` | `https://github.com/affaan-m/everything-claude-code.git` | Keep for rc.1 |
| Possible short repo | `affaan-m/ecc` | `gh repo view affaan-m/ecc` | Not found with current auth | Candidate after rc.1 only |
| npm package | `ecc-universal` | `node -p "require('./package.json').name"` | `ecc-universal` | Keep for rc.1 |
| npm package version | `2.0.0-rc.1` local, `1.10.0` registry latest | `node -p "require('./package.json').version"` and `npm view ecc-universal name version dist-tags --json` | Local rc.1 is ready; registry latest remains `1.10.0` | Publish rc as `next`, not `latest` |
| Exact npm short name | `ecc` | `npm view ecc name version description repository.url --json` | Occupied by `ecc@0.0.2`, "Elliptic curve cryptography functions." | Do not use |
| Scoped npm short name | `@affaan-m/ecc` | `npm view @affaan-m/ecc name version --json` | Registry 404 | Possible future scoped package if npm scope policy permits |
| Former package name | `everything-claude-code` | `npm view everything-claude-code name version dist-tags --json` | Registry reports unpublished on 2026-02-07 | Do not revive for rc.1 |
| Claude plugin slug | `ecc` | `node -p "require('./.claude-plugin/plugin.json').name"` | `ecc` | Keep |
| Claude plugin version | `2.0.0-rc.1` | `claude plugin validate .claude-plugin/plugin.json` | Validation passed on Claude Code `2.1.121` | Ready for release-tag gate |
| Claude marketplace entry | `ecc` | `.claude-plugin/marketplace.json` | Version and repo point at current rc.1 surface | Keep |
| Codex plugin slug | `ecc` | `node -p "require('./.codex-plugin/plugin.json').name"` | `ecc` | Keep |
| Codex plugin version | `2.0.0-rc.1` | `node tests/docs/ecc2-release-surface.test.js` | Release surface test passed | Ready for Codex marketplace/manual marketplace gate |
| OpenCode package | `ecc-universal` | `node -p "require('./.opencode/package.json').name"` | `ecc-universal` | Keep |
| OpenCode build | Generated package output | `npm run build:opencode` | Passed | Ready for package dry-run gate |
| npm pack surface | Reduced runtime package | `npm pack --dry-run --json` | Produced `ecc-universal-2.0.0-rc.1.tgz`, 969 entries, about 5.0 MB unpacked | Needs final release-commit rerun |
## Publication Paths
| Path | Current evidence | Required next action | Blocker |
| --- | --- | --- | --- |
| GitHub release | `docs/releases/2.0.0-rc.1/` and release notes are in-tree | Re-run required command evidence from the final release commit, then create/verify `v2.0.0-rc.1` prerelease | No tag/release yet |
| npm | `ecc-universal` local package version is `2.0.0-rc.1`; registry latest is `1.10.0` | Publish rc with `npm publish --tag next` after final `npm pack --dry-run` and release tests | Do not publish before final release commit |
| Claude plugin | `claude plugin validate .claude-plugin/plugin.json` passed; `claude plugin tag --help` confirms the release tag flow creates `{name}--v{version}` tags and can push them | Run `claude plugin tag .claude-plugin --dry-run` from the clean release commit, then tag/push only after release approval | No plugin release tag created in this pass |
| Claude marketplace | `.claude-plugin/marketplace.json` points at `ecc` and the public repo | Verify marketplace update/install path after tag exists | External marketplace propagation not verified |
| Codex plugin | `codex plugin marketplace` supports add/upgrade/remove; `.codex-plugin/plugin.json` is present and release-surface tests pass | Confirm marketplace source format, then test add/upgrade from the public repo or marketplace source | No public Codex marketplace submission path verified in this pass |
| OpenCode package | `.opencode/package.json` builds from source and ships inside npm package | Re-run `npm run build:opencode` and package dry-run from release commit | OpenCode CLI 1.2.21 does not expose a separate plugin publication command in this pass |
| ECC Tools billing claim | README and launch copy mention ECC Tools / marketplace context | Verify live GitHub App billing and plan state before any payment announcement | Billing dashboard/API evidence not recorded in this pass |
| Social and longform copy | X thread, LinkedIn copy, article outline, GitHub release copy exist | Replace any stale URLs, then publish only after release/npm/plugin URLs work | Public URLs not final until release actions complete |
## Rename After rc.1
If the project moves from "Everything Claude Code" toward "ECC" after rc.1,
do it as a staged migration:
1. Keep `ecc-universal` as the npm package until a replacement package has a
verified owner, deprecation plan, and install migration.
2. Keep `affaan-m/everything-claude-code` as the canonical repo until release
notes, docs, plugin marketplace entries, npm metadata, and external links
are prepared for redirects.
3. Use `ECC` as the product name in new diagrams, status payloads, and
cross-harness docs immediately.
4. Reserve or create any new GitHub/npm/package surfaces before announcing the
rename.
5. Ship a compatibility guide that maps old commands, package names, plugin
slugs, and docs URLs to the new names.
## Evidence Captured In This Pass
```text
git rev-parse HEAD
7109ee08db7209c5d14809efcf832043020dfc57
node -p "require('./package.json').name + '@' + require('./package.json').version"
ecc-universal@2.0.0-rc.1
node -p "require('./.claude-plugin/plugin.json').name + '@' + require('./.claude-plugin/plugin.json').version"
ecc@2.0.0-rc.1
node -p "require('./.codex-plugin/plugin.json').name + '@' + require('./.codex-plugin/plugin.json').version"
ecc@2.0.0-rc.1
node -p "require('./.opencode/package.json').name + '@' + require('./.opencode/package.json').version"
ecc-universal@2.0.0-rc.1
npm view ecc name version description repository.url --json
ecc@0.0.2 is occupied by an unrelated elliptic curve cryptography package.
npm view ecc-universal name version dist-tags --json
registry latest is 1.10.0; no rc dist-tag exists yet.
claude plugin validate .claude-plugin/plugin.json
Validation passed on Claude Code 2.1.121.
node tests/docs/ecc2-release-surface.test.js
18 release-surface checks passed.
node tests/scripts/npm-publish-surface.test.js
2 npm publish-surface checks passed.
npm run build:opencode
Passed.
npm pack --dry-run --json
Produced ecc-universal-2.0.0-rc.1.tgz, 969 entries, about 5.0 MB unpacked.
```

View File

@@ -0,0 +1,103 @@
# ECC v2.0.0-rc.1 Publication Evidence — 2026-05-12
This is dry-run release evidence only. It does not create a GitHub release, npm
publication, plugin tag, marketplace submission, or announcement post.
## Source Commit
| Field | Evidence |
| --- | --- |
| Upstream main base | `0598af70a51346bae34d987b9bed143386055967` |
| Evidence branch | `codex/release-publication-evidence` |
| Evidence scope | Working tree with this branch's package hygiene and release-doc updates |
| Git remote | `https://github.com/affaan-m/everything-claude-code.git` |
| Local status caveat | Working tree had the unrelated untracked `docs/drafts/` directory |
The actual release operator should repeat these checks from the final release
commit with a clean checkout before publishing.
## Registry And Release State
| Surface | Command | Result |
| --- | --- | --- |
| GitHub prerelease | `gh release view v2.0.0-rc.1 --repo affaan-m/everything-claude-code --json tagName,url,isPrerelease` | `release not found` |
| npm dist-tags | `npm view ecc-universal dist-tags --json` | `{ "latest": "1.10.0" }` |
| npm package metadata | `node -p "require('./package.json').name + '@' + require('./package.json').version"` | `ecc-universal@2.0.0-rc.1` |
| Product identity | `rg -n "Everything Claude Code" README.md CHANGELOG.md docs/releases/2.0.0-rc.1` | Present in README and rc.1 release docs |
## npm Dry Run
The first pack pass exposed local Python bytecode cache files in the tarball
because broad package `files` entries included untracked local `__pycache__`
paths. This branch adds explicit package-file exclusions and a regression test
so `npm pack` fails if Python bytecode appears in the package surface.
| Command | Result |
| --- | --- |
| `node tests/scripts/npm-publish-surface.test.js` | Passed `2/2`; includes Python bytecode exclusion assertion |
| `npm pack --dry-run --json` | `ecc-universal-2.0.0-rc.1.tgz`; `entryCount: 965`; `size: 1565968`; `unpackedSize: 4934637`; `hasBytecode: false` |
| `npm publish --tag next --dry-run --json` | Dry-run target is npm registry with `tag next`; `entryCount: 965`; `hasBytecode: false` |
Temporary install smoke:
| Command | Result |
| --- | --- |
| `npm pack --pack-destination /tmp/ecc-publication-smoke-dd9ud5 --json` | Created `ecc-universal-2.0.0-rc.1.tgz` for local install smoke |
| `npm install --prefix /tmp/ecc-publication-smoke-dd9ud5 /tmp/ecc-publication-smoke-dd9ud5/ecc-universal-2.0.0-rc.1.tgz` | Added 8 packages |
| `node /tmp/ecc-publication-smoke-dd9ud5/node_modules/ecc-universal/scripts/ecc.js --help` | Printed ECC selective-install CLI help |
| `node /tmp/ecc-publication-smoke-dd9ud5/node_modules/ecc-universal/scripts/catalog.js profiles --json` | Returned the 6 install profiles: `minimal`, `core`, `developer`, `security`, `research`, `full` |
| `find /tmp/ecc-publication-smoke-dd9ud5/node_modules/ecc-universal -path '*__pycache__*' -o -name '*.pyc' -o -name '*.pyo' -o -name '*.pyd'` | No output |
## Plugin And Harness Evidence
| Surface | Command | Result |
| --- | --- | --- |
| Claude plugin manifest | `claude plugin validate .claude-plugin/plugin.json` | Passed |
| Claude plugin tag preflight | `claude plugin tag .claude-plugin --dry-run` | Blocked by unrelated untracked `docs/drafts/` |
| Claude plugin tag forced dry-run | `claude plugin tag .claude-plugin --dry-run --force` | Would create `ecc--v2.0.0-rc.1` at HEAD; do not use `--force` for real release unless maintainer decides |
| Codex marketplace CLI | `codex plugin marketplace --help` and subcommand help | Supports `add`, `upgrade`, and `remove`; `add` supports repo and local marketplace roots |
| OpenCode package | `npm run build:opencode` | Passed |
| Claude hook/plugin route | `node tests/hooks/hooks.test.js` | Passed `236/236` |
| Codex release surface | `node tests/docs/ecc2-release-surface.test.js` | Passed `18/18` |
| Agent/catalog metadata | `node tests/scripts/catalog.test.js` | Passed `7/7` |
| Observability gate | `npm run observability:ready` | Passed `16/16` |
## Clean-Checkout Claude Plugin Smoke
This follow-up pass used a detached clean worktree at
`/tmp/ecc-clean-plugin-evidence` from commit
`bfacf37715b39655cbc2c48f12f2a35c67cb0253`. It used an isolated temp home
(`HOME=/tmp/ecc-clean-plugin-home`) and a temp local project
(`/tmp/ecc-plugin-install-smoke`), so it did not write to the user's real Claude
plugin config.
| Command | Result |
| --- | --- |
| `git -C /tmp/ecc-clean-plugin-evidence status --short --branch` | `## HEAD (no branch)` with no dirty or untracked files |
| `claude plugin validate .claude-plugin/plugin.json` | Passed |
| `claude plugin validate .claude-plugin/marketplace.json` | Passed |
| `claude plugin tag .claude-plugin --dry-run` | Passed without `--force`; would create `ecc--v2.0.0-rc.1` at HEAD and push `refs/tags/ecc--v2.0.0-rc.1` |
| `claude plugin marketplace add /tmp/ecc-clean-plugin-evidence --scope local` with temp `HOME` | Added marketplace `ecc` in local settings |
| `claude plugin list --available --json` with temp `HOME` | Listed `ecc@ecc`, version `2.0.0-rc.1`, source `./` |
| `claude plugin install ecc@ecc --scope local` with temp `HOME` | Installed `ecc@ecc` in local scope |
| `claude plugin list --json` with temp `HOME` | Listed `ecc@ecc`, version `2.0.0-rc.1`, enabled, local scope, install path under `/tmp/ecc-clean-plugin-home/.claude/plugins/cache/ecc/ecc/2.0.0-rc.1` |
| `claude plugin uninstall ecc@ecc --scope local` with temp `HOME` | Uninstalled successfully; final plugin list was `[]` |
## Announcement Placeholder Check
The forbidden-placeholder scan only returned the publication-readiness checklist
lines that name those forbidden placeholders. No launch-pack placeholder
instances were found.
## Remaining Blockers
- Create or verify GitHub prerelease `v2.0.0-rc.1`.
- Publish `ecc-universal@2.0.0-rc.1` with npm dist-tag `next`.
- Create and push the Claude plugin tag only after explicit approval. The clean
checkout dry run and temp install smoke now pass.
- Confirm the live Claude/Codex/OpenCode marketplace submission path or record
the manual submission owner and status.
- Verify ECC Tools billing/App/Marketplace claims before using them in launch
copy.
- Refresh announcement copy with live URLs after release and package/plugin
URLs exist.

View File

@@ -4,32 +4,37 @@ This checklist is the release gate for public publication surfaces. Do not use
it as evidence by itself. Fill the evidence fields with fresh command output or
URLs from the exact commit being released.
For the current rc.1 naming decision and package/plugin publication path, see
[`naming-and-publication-matrix.md`](naming-and-publication-matrix.md).
For the May 12 dry-run evidence pass, see
[`publication-evidence-2026-05-12.md`](publication-evidence-2026-05-12.md).
## Release Identity Matrix
| Surface | Expected value | Source of truth | Fresh check | Evidence artifact | Owner | Status |
| --- | --- | --- | --- | --- | --- | --- |
| Product name | Everything Claude Code / ECC | `README.md`, `CHANGELOG.md`, release notes | `rg -n "Everything Claude Code" README.md CHANGELOG.md docs/releases/2.0.0-rc.1` | Pending | Release owner | Pending |
| GitHub repo | `affaan-m/everything-claude-code` | Git remote and release URLs | `git remote get-url origin` | Pending | Release owner | Pending |
| Git tag | `v2.0.0-rc.1` | GitHub releases | `gh release view v2.0.0-rc.1 --repo affaan-m/everything-claude-code` | Pending | Release owner | Pending |
| npm package | `ecc-universal` | `package.json` | `node -p "require('./package.json').name"` | Pending | Package owner | Pending |
| npm version | `2.0.0-rc.1` | `VERSION`, `package.json`, lockfiles | `node -p "require('./package.json').version"` | Pending | Package owner | Pending |
| npm dist-tag | `next` for rc, `latest` only for GA | npm registry | `npm view ecc-universal dist-tags --json` | Pending | Package owner | Pending |
| Claude plugin slug | `ecc` / `ecc@ecc` install path | `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json` | `node tests/hooks/hooks.test.js` | Pending | Plugin owner | Pending |
| Claude plugin manifest | `2.0.0-rc.1`, no unsupported `agents` or explicit `hooks` fields | `.claude-plugin/plugin.json`, `.claude-plugin/PLUGIN_SCHEMA_NOTES.md` | `claude plugin validate .claude-plugin/plugin.json` | Pending | Plugin owner | Pending |
| Codex plugin manifest | `2.0.0-rc.1` with shared skill source | `.codex-plugin/plugin.json` | `node tests/docs/ecc2-release-surface.test.js` | Pending | Plugin owner | Pending |
| OpenCode package | `ecc-universal` plugin module | `.opencode/package.json`, `.opencode/index.ts` | `npm run build:opencode` | Pending | Package owner | Pending |
| Agent metadata | `2.0.0-rc.1` | `agent.yaml`, `.agents/plugins/marketplace.json` | `node tests/scripts/catalog.test.js` | Pending | Release owner | Pending |
| Migration copy | rc.1 upgrade path, not GA claim | `release-notes.md`, `quickstart.md`, `HERMES-SETUP.md` | `npx markdownlint-cli docs/releases/2.0.0-rc.1/*.md` | Pending | Docs owner | Pending |
| Product name | Everything Claude Code / ECC | `README.md`, `CHANGELOG.md`, release notes | `rg -n "Everything Claude Code" README.md CHANGELOG.md docs/releases/2.0.0-rc.1` | `publication-evidence-2026-05-12.md` | Release owner | Evidence recorded |
| GitHub repo | `affaan-m/everything-claude-code` | Git remote and release URLs | `git remote get-url origin` | `publication-evidence-2026-05-12.md` | Release owner | Evidence recorded |
| Git tag | `v2.0.0-rc.1` | GitHub releases | `gh release view v2.0.0-rc.1 --repo affaan-m/everything-claude-code` | `release not found` | Release owner | Blocked until release approval |
| npm package | `ecc-universal` | `package.json` | `node -p "require('./package.json').name"` | `publication-evidence-2026-05-12.md` | Package owner | Evidence recorded |
| npm version | `2.0.0-rc.1` | `VERSION`, `package.json`, lockfiles | `node -p "require('./package.json').version"` | `publication-evidence-2026-05-12.md` | Package owner | Evidence recorded |
| npm dist-tag | `next` for rc, `latest` only for GA | npm registry | `npm view ecc-universal dist-tags --json` | Current registry only has `latest: 1.10.0`; `next` is pending publish | Package owner | Blocked until publish approval |
| Claude plugin slug | `ecc` / `ecc@ecc` install path | `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json` | `node tests/hooks/hooks.test.js` | `publication-evidence-2026-05-12.md` | Plugin owner | Evidence recorded |
| Claude plugin manifest | `2.0.0-rc.1`, no unsupported `agents` or explicit `hooks` fields | `.claude-plugin/plugin.json`, `.claude-plugin/PLUGIN_SCHEMA_NOTES.md` | `claude plugin validate .claude-plugin/plugin.json` | `publication-evidence-2026-05-12.md` | Plugin owner | Evidence recorded |
| Codex plugin manifest | `2.0.0-rc.1` with shared skill source | `.codex-plugin/plugin.json` | `node tests/docs/ecc2-release-surface.test.js` | `publication-evidence-2026-05-12.md` | Plugin owner | Evidence recorded |
| OpenCode package | `ecc-universal` plugin module | `.opencode/package.json`, `.opencode/index.ts` | `npm run build:opencode` | `publication-evidence-2026-05-12.md` | Package owner | Evidence recorded |
| Agent metadata | `2.0.0-rc.1` | `agent.yaml`, `.agents/plugins/marketplace.json` | `node tests/scripts/catalog.test.js` | `publication-evidence-2026-05-12.md` | Release owner | Evidence recorded |
| Migration copy | rc.1 upgrade path, not GA claim | `release-notes.md`, `quickstart.md`, `HERMES-SETUP.md` | `npx markdownlint-cli docs/releases/2.0.0-rc.1/*.md` | Pending final lint on release commit | Docs owner | Pending |
## Publication Gates
| Gate | Required evidence | Fresh check | Blocker field | Owner | Status |
| --- | --- | --- | --- | --- | --- |
| GitHub release | Tag exists, release notes use final URLs, assets attached if needed | `gh release view v2.0.0-rc.1 --json tagName,url,isPrerelease` | `Blocker:` | Release owner | Pending |
| npm package | `npm pack --dry-run` has expected files, version matches, rc goes to `next` | `npm pack --dry-run` and `npm publish --tag next --dry-run` where supported | `Blocker:` | Package owner | Pending |
| Claude plugin | Manifest validates, marketplace JSON points to public repo, install docs match slug | `claude plugin validate .claude-plugin/plugin.json` | `Blocker:` | Plugin owner | Pending |
| Codex plugin | Manifest version matches package and docs, hook limitations are explicit | `node tests/docs/ecc2-release-surface.test.js` | `Blocker:` | Plugin owner | Pending |
| OpenCode package | Build output is regenerated from source and package metadata is current | `npm run build:opencode` | `Blocker:` | Package owner | Pending |
| GitHub release | Tag exists, release notes use final URLs, assets attached if needed | `gh release view v2.0.0-rc.1 --json tagName,url,isPrerelease` | `Blocker: release not found on 2026-05-12` | Release owner | Pending approval |
| npm package | `npm pack --dry-run` has expected files, version matches, rc goes to `next` | `npm pack --dry-run` and `npm publish --tag next --dry-run` where supported | `Blocker: actual publish requires approval; dry run passed with next tag` | Package owner | Dry-run passed |
| Claude plugin | Manifest validates, marketplace JSON points to public repo, install docs match slug | `claude plugin validate .claude-plugin/plugin.json`; `claude plugin tag .claude-plugin --dry-run`; isolated temp-home install smoke | `Blocker: real tag creation/push requires approval` | Plugin owner | Clean-checkout dry-run and install smoke recorded |
| Codex plugin | Manifest version matches package and docs, hook limitations are explicit | `node tests/docs/ecc2-release-surface.test.js` | `Blocker: marketplace submission path still manual/owner-gated` | Plugin owner | Evidence recorded |
| OpenCode package | Build output is regenerated from source and package metadata is current | `npm run build:opencode` | `Blocker: none for local build; public distribution still follows npm/plugin release` | Package owner | Evidence recorded |
| ECC Tools billing reference | Any billing claim links to verified Marketplace/App state | `gh api repos/ECC-Tools/ECC-Tools` plus app/marketplace URL check | `Blocker:` | ECC Tools owner | Pending |
| Announcement copy | X, LinkedIn, GitHub release, and longform copy point to live URLs | `rg -n "TODO" docs/releases/2.0.0-rc.1` and repeat for `TBD` | `Blocker:` | Release owner | Pending |
@@ -42,10 +47,10 @@ Record the exact commit SHA and command output before any publication action:
| Clean release branch | `git status --short --branch` | On intended release commit; no unrelated files | Pending |
| Harness audit | `npm run harness:audit -- --format json` | 70/70 passing | Pending |
| Adapter scorecard | `npm run harness:adapters -- --check` | PASS | Pending |
| Observability readiness | `npm run observability:ready` | 14/14 passing | Pending |
| Observability readiness | `npm run observability:ready` | 16/16 passing | Pending |
| Root suite | `node tests/run-all.js` | 0 failures | Pending |
| Markdown lint | `npx markdownlint-cli '**/*.md' --ignore node_modules` | 0 failures | Pending |
| Package surface | `node tests/scripts/npm-publish-surface.test.js` | 0 failures | Pending |
| Package surface | `node tests/scripts/npm-publish-surface.test.js` | 0 failures; no Python bytecode in npm tarball | `2/2` passed in May 12 evidence pass |
| Release surface | `node tests/docs/ecc2-release-surface.test.js` | 0 failures | Pending |
| Optional Rust surface | `cd ecc2 && cargo test` | 0 failures or explicit deferral | Pending |
@@ -53,8 +58,8 @@ Record the exact commit SHA and command output before any publication action:
- `main` has unreviewed release-surface changes after the evidence was recorded.
- `npm view ecc-universal dist-tags --json` contradicts the intended rc/GA tag.
- Claude plugin validation is unavailable and no manual install smoke test is
recorded.
- Claude plugin validation is unavailable or no clean-checkout install smoke
test is recorded for the intended release commit.
- Release notes or announcement drafts still contain placeholder URLs,
`TODO`, `TBD`, private workspace paths, or personal operator references.
- Billing, Marketplace, or plugin-submission copy claims a live surface before

View File

@@ -19,16 +19,22 @@ on fresh branches, and credit the source PR.
| Source PR | Original contribution | Salvage result |
| --- | --- | --- |
| #1232 | `skill-scout` search-before-creating workflow | Salvaged in the May 12 cost/skill-scout maintainer pass with current repo wording, external-source vetting, and no stale catalog-count edits. |
| #1304 | Cost tracking skill and `/cost-report` command | Salvaged in the May 12 cost/skill-scout maintainer pass with current command/skill conventions and without stale hard-coded model pricing. |
| #1309 | Trading/community project material | Salvaged in #1761 as a neutral community-project README listing. |
| #1310 | Django reviewer, build resolver, and Celery async task guidance | Salvaged in the May 12 Django/Celery maintainer pass with current catalog counts and minor example cleanup. |
| #1322 | Vietnamese README translation | Salvaged in #1764 as `docs/vi-VN/README.md` plus selector updates. |
| #1325 | Quarkus framework guidance, Java agents, and localization material | Salvaged across #1771 and #1803; stale broad docs/count edits were not copied. |
| #1326 | Angular developer skill and rules | Salvaged in #1763 with current skill, rules, install wiring, and catalog updates. |
| #1328 | Continuous-learning Windows UTF-8 stdout fix | Salvaged in #1761. |
| #1329 | Plugin install detection hardening | Salvaged in #1761 through current harness audit detection support. |
| #1334 | Windows desktop E2E skill | Salvaged in #1762 with install, package, and catalog wiring. |
| #1352 | Qwen install target | Salvaged in #1738 through the current Qwen install target. |
| #1413 | Network and homelab skills/agents | Salvaged through #1729, #1731, #1745, and #1778. |
| #1414 | F# rules, reviewer agent, and testing skill | Salvaged in #1770 with current install manifests, detection tests, and catalog wiring. |
| #1429 | JoyCode install target | Salvaged in #1737 through the current JoyCode install target. |
| #1467 | Scientific skills and OpenCode discovery work | Useful USPTO and gget pieces salvaged in #1740; stale generated claims were not copied. |
| #1478 | HarmonyOS/ArkTS rules, resolver agent, and CLAUDE example | Salvaged in #1769 with current install wiring; stale `ecc2` session/TUI edits were not carried. |
| #1493 | SessionStart context scoping | Salvaged in #1774 with current hook semantics and tests. |
| #1498 | PRD planning flow | Salvaged in #1777. |
| #1504 | Statusline/context monitor hooks | Salvaged in #1776 with current hook manifest structure and tests. |
@@ -37,6 +43,9 @@ on fresh branches, and credit the source PR.
| #1559 | `error-handling` skill | Salvaged in #1772. |
| #1566 | Agent architecture audit skill | Salvaged in #1772. |
| #1578 | OpenCode file-probe hardening | Salvaged in #1773. |
| #1603 | `plan-orchestrate` skill | Salvaged in #1766 with current manifest/catalog wiring. |
| #1658 | Code-reviewer false-positive suppression | Salvaged in the May 12 code-reviewer maintainer pass with current review-agent wording, a proof gate for HIGH/CRITICAL findings, common false-positive exclusions, and a regression test. |
| #1659 | Frontend design direction and interface-polish skills | Salvaged in the May 12 frontend-design maintainer pass with canonical `skills/` layout and current ECC frontend guidance, while preserving the repo guardrail that the official `frontend-design` skill should be installed from `anthropics/skills`. |
| #1674 | Production audit skill | Salvaged in #1732 after supply-chain/privacy review and rewrite. |
| #1687 | zh-CN localization sync | Large safe subsets salvaged in #1746-#1752; remaining pieces require translator/manual review. |
| #1694 | Portfolio curation | Useful focused curation updates salvaged in #1723 and #1724. |
@@ -50,6 +59,41 @@ on fresh branches, and credit the source PR.
| #1727 | MySQL patterns skill | Salvaged in #1733. |
| #1757 | Machine-learning engineering workflow | Salvaged in #1758 and tuned in #1759. |
## 2026-05-12 Gap Pass
The initial stale-closure ledger covered the P0 cleanup cohort and the biggest
salvage branches. A follow-up gap pass over PRs closed on 2026-05-11 found
additional useful items that were already present on `main` or still worth
porting.
| Source PR | Disposition |
| --- | --- |
| #1310 | Ported through the Django/Celery maintainer branch after confirming `agents/django-reviewer.md`, `agents/django-build-resolver.md`, and `skills/django-celery/SKILL.md` were still missing. |
| #1325 | Useful Quarkus framework material was already preserved across #1771 and #1803; current `main` contains the Quarkus rules/skills plus Java reviewer/build-resolver surfaces. |
| #1360 | Already present as `skills/security-bounty-hunter/`. |
| #1414 | Useful F# support was already preserved in #1770; current `main` contains the F# rules, reviewer agent, testing skill, install wiring, and detection tests. |
| #1415 | Already present as `skills/vite-patterns/`. |
| #1478 | Useful HarmonyOS/ArkTS support was already preserved in #1769; current `main` contains the ArkTS rules, resolver agent, CLAUDE example, and install wiring. |
| #1438 | Already present as `skills/ui-to-vue/`. |
| #1504 | Already mapped to #1776 in the durable salvage table. |
| #1508 | Already present as `skills/fastapi-patterns/` and `agents/fastapi-reviewer.md`. |
| #1563/#1564/#1565 | Translator/manual review: zh-TW, tr, and pt-BR README syncs may contain useful localization updates, but stale README/version/count text must be reviewed by language owners before import. |
| #1567 | Already present as the current GateGuard subagent file-gate bypass in `scripts/hooks/gateguard-fact-force.js`, with Bash gates preserved and regression tests in `tests/hooks/gateguard-fact-force.test.js`. |
| #1570 | Already present as public `llm.prompt` imports, keyword-based `PromptBuilder` construction, and template registry helpers; current tests register the `unit` marker through `tests/conftest.py`. |
| #1584 | Already present as the iTerm2 native desktop-notification fast path in `scripts/hooks/desktop-notify.js`, with multiplexer fallback to `osascript`. |
| #1589 | Already present as quoted `actions/checkout` detection in `scripts/ci/validate-workflow-security.js` plus double/single-quote regression tests. |
| #1594 | Already present as HTTP MCP reachability handling that treats HTTP 400, 401, and 403 probe responses as reachable/auth-gated, with hook tests. |
| #1597 | Already present as catalog-count validation for README, AGENTS, zh-CN docs, `.claude-plugin/plugin.json`, and `.claude-plugin/marketplace.json`. |
| #1602 | Already present as the `continuous-learning` v1 deprecation that routes new usage to `continuous-learning-v2` while preserving the archival v1 surface. |
| #1603 | Useful `/plan-orchestrate` work was already preserved in #1766 with current package/catalog metadata. |
| #1604 | Skipped: Windows drag-and-drop local installer copies files directly and runs `git pull`; current managed installer/profile flow is safer and supersedes it. |
| #1609 | Translator/manual review: Persian README translation may be useful, but needs language review and current catalog/version refresh before import. |
| #1613 | Already present in `rules/web/hooks.md` as the `tsc --incremental` plus timeout-capped PostToolUse example. |
| #1631 | Already present in `scripts/hooks/suggest-compact.js` and `tests/hooks/hooks.test.js`; current code reads `session_id` from stdin JSON before falling back to `CLAUDE_SESSION_ID`. |
| #1648 | Already present in `src/llm/providers/claude.py`; current Claude provider collects all text and tool-use content blocks and covers the behavior in `tests/test_claude_provider.py`. |
| #1658 | Ported through the code-reviewer maintainer branch after confirming the false-positive proof gate and common false-positive skip list were still missing. |
| #1693 | Already present as `skills/redis-patterns/`. |
## Already Present Or Superseded
| Source PR | Disposition |
@@ -58,6 +102,9 @@ on fresh branches, and credit the source PR.
| #1318 | Gemini agent adaptation utility was already present on current `main`. |
| #1323 | Hook config update was already present on current `main`. |
| #1337 | Catalog count update was superseded by current catalog-count sync. |
| #1631 | `suggest-compact` stdin `session_id` isolation was already present on current `main` with hook tests. |
| #1608 | Unsafe dashboard document/terminal open handling was already present on current `main` through safe runtime helpers and project-bound document opening. |
| #1678 | Windows MCP `.cmd`/`.bat` fallback behavior was already present on current `main` with current health-check tests. |
| #1682/#1701 | Strategic compact hook-path fixes were merged directly or superseded by current docs fixes. |
| JARVIS #4/#5/#6 | Stale failing dependency-only PRs; future dependency state should be regenerated by Dependabot. |
@@ -70,15 +117,22 @@ on fresh branches, and credit the source PR.
| #1341 | Very large low-signal generated change with no safe focused salvage unit. |
| #1416/#1465 | Accidental fork-sync PRs with no focused contribution. |
| #1475 | One-line Gemini CLI bridge idea was too stale and underspecified to port safely. |
| #1604 | Drag-and-drop Windows installer bypasses the current managed installer, performs direct broad copies, and runs `git pull` from a local install script. |
## Remaining Manual-Review Backlog
Only the #1687 localization tail remains plausibly useful but unsafe to
auto-port.
The remaining plausibly useful backlog is translation/localization work that is
unsafe to auto-port without language-owner review:
- #1687 zh-CN localization tail
- #1609 Persian README translation
- #1563 zh-TW README sync
- #1564 Turkish README sync
- #1565 pt-BR README sync
Handling rule:
1. Keep #1687 in translator/manual review.
1. Keep these PRs in translator/manual review.
2. Split any future work by surface: agents, commands, top-level docs, release
and count surfaces, then skills.
3. Do not import stale top-level docs that carry old version or catalog-count

View File

@@ -1,6 +1,6 @@
# Everything Claude Code (ECC) — 智能体指令
这是一个**生产就绪的 AI 编码插件**,提供 58 个专业代理、220 项技能、74 条命令以及自动化钩子工作流,用于软件开发。
这是一个**生产就绪的 AI 编码插件**,提供 60 个专业代理、225 项技能、75 条命令以及自动化钩子工作流,用于软件开发。
**版本:** 2.0.0-rc.1
@@ -146,9 +146,9 @@
## 项目结构
```
agents/ — 58 个专业子代理
skills/ — 220 个工作流技能和领域知识
commands/ — 74 个斜杠命令
agents/ — 60 个专业子代理
skills/ — 225 个工作流技能和领域知识
commands/ — 75 个斜杠命令
hooks/ — 基于触发的自动化
rules/ — 始终遵循的指导方针(通用 + 每种语言)
scripts/ — 跨平台 Node.js 实用工具

View File

@@ -224,7 +224,7 @@ Copy-Item -Recurse rules/typescript "$HOME/.claude/rules/"
/plugin list ecc@ecc
```
**搞定!** 你现在可以使用 58 个智能体、220 项技能和 74 个命令了。
**搞定!** 你现在可以使用 60 个智能体、225 项技能和 75 个命令了。
***
@@ -1136,9 +1136,9 @@ opencode
| 功能特性 | Claude Code | OpenCode | 状态 |
|---------|-------------|----------|--------|
| 智能体 | PASS: 58 个 | PASS: 12 个 | **Claude Code 领先** |
| 命令 | PASS: 74 个 | PASS: 35 个 | **Claude Code 领先** |
| 技能 | PASS: 220 项 | PASS: 37 项 | **Claude Code 领先** |
| 智能体 | PASS: 60 个 | PASS: 12 个 | **Claude Code 领先** |
| 命令 | PASS: 75 个 | PASS: 35 个 | **Claude Code 领先** |
| 技能 | PASS: 225 项 | PASS: 37 项 | **Claude Code 领先** |
| 钩子 | PASS: 8 种事件类型 | PASS: 11 种事件 | **OpenCode 更多!** |
| 规则 | PASS: 29 条 | PASS: 13 条指令 | **Claude Code 领先** |
| MCP 服务器 | PASS: 14 个 | PASS: 完整 | **完全对等** |
@@ -1244,9 +1244,9 @@ ECC 是**第一个最大化利用每个主要 AI 编码工具的插件**。以
| 功能特性 | Claude Code | Cursor IDE | Codex CLI | OpenCode |
|---------|------------|------------|-----------|----------|
| **智能体** | 58 | 共享 (AGENTS.md) | 共享 (AGENTS.md) | 12 |
| **命令** | 74 | 共享 | 基于指令 | 35 |
| **技能** | 220 | 共享 | 10 (原生格式) | 37 |
| **智能体** | 60 | 共享 (AGENTS.md) | 共享 (AGENTS.md) | 12 |
| **命令** | 75 | 共享 | 基于指令 | 35 |
| **技能** | 225 | 共享 | 10 (原生格式) | 37 |
| **钩子事件** | 8 种类型 | 15 种类型 | 暂无 | 11 种类型 |
| **钩子脚本** | 20+ 个脚本 | 16 个脚本 (DRY 适配器) | N/A | 插件钩子 |
| **规则** | 34 (通用 + 语言) | 34 (YAML 前页) | 基于指令 | 13 条指令 |

View File

@@ -0,0 +1,49 @@
# AgentShield Policy Exception Playbook
Candidate id: `sarif-backed-timeboxed-exception-review`
Use this playbook when AgentShield organization-policy output produces a
finding that may need remediation, a time-boxed exception, or explicit
enforcement.
## Accepted Path
1. Identify the AgentShield finding id, category, severity, affected file or
MCP/hook surface, and policy pack or organization baseline.
2. Retrieve scanner evidence before judgment:
- SARIF/code-scanning result, especially `agentshield-policy/*`
- JSON/HTML report evidence
- terminal or GitHub Action job-summary counts
3. Record lifecycle fields for any exception request: owner, ticket, scope,
expiry, rationale, and whether it is active, expiring soon, or expired.
4. Keep expired exceptions rejected or enforced until new evidence exists.
5. Decide whether immediate remediation is possible. If not, only promote a
narrow time-boxed exception tied to the named owner, ticket, scope, and
expiry.
6. Keep AgentShield code, policy packs, enforcement settings, release state,
and live security posture out of the read-only evaluator run.
## Rejected Path
Do not blanket suppress a policy category, policy pack, or organization gate
because a finding is inconvenient.
Do not downgrade critical/high findings without SARIF or report evidence and a
current owner, ticket, scope, and expiry.
Do not treat expired exceptions as active. Expired means the policy gate should
remain enforced until a maintainer creates a fresh, bounded exception or fixes
the underlying issue.
## Minimum Validation
- `npx ecc-agentshield scan --format json`
- AgentShield SARIF/code-scanning artifact or report evidence
- `npx ecc-agentshield scan --format html` when executive review evidence is
needed
- Current exception lifecycle fields: owner, ticket, scope, expiry, status
- `node tests/docs/evaluator-rag-prototype.test.js`
- `git diff --check`
Record the scanner evidence, lifecycle state, policy-pack source, and
remediation-versus-exception decision in the maintainer PR body or handoff.

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.report.v1",
"scenario_id": "agentshield-policy-exception",
"run_id": "2026-05-12-agentshield-policy-exception-prototype",
"result": "prototype_passed",
"read_only": true,
"scores": {
"sarif_report_evidence": 0.95,
"exception_lifecycle": 0.93,
"ownership_specificity": 0.9,
"remediation_decision": 0.88,
"blanket_suppression_safety": 1
},
"findings": [
{
"id": "sarif-report-match-required",
"severity": "warning",
"summary": "AgentShield policy exceptions must name SARIF or report evidence before a remediation or exception playbook can be promoted."
},
{
"id": "expired-exception-enforcement",
"severity": "warning",
"summary": "Expired exceptions must remain rejected or enforced; the evaluator cannot treat stale approvals as active evidence."
},
{
"id": "bounded-owner-fields",
"severity": "info",
"summary": "Accepted exceptions preserve owner, ticket, scope, expiry, policy-pack source, and affected surface fields."
}
],
"recommended_next_action": {
"candidate_id": "sarif-backed-timeboxed-exception-review",
"action": "Use the promoted playbook for future AgentShield policy exception requests before changing gates, suppressing categories, or accepting security risk."
}
}

View File

@@ -0,0 +1,62 @@
{
"schema_version": "ecc.evaluator-rag.scenario.v1",
"scenario_id": "agentshield-policy-exception",
"title": "Gate AgentShield policy exceptions with report and SARIF evidence",
"mode": "read_only_prototype",
"objective": "Given an AgentShield organization-policy finding or proposed exception, retrieve report, SARIF, lifecycle, and ownership evidence before promoting a remediation or time-boxed exception playbook.",
"sources": [
{
"kind": "repo_doc",
"path": "docs/ECC-2.0-GA-ROADMAP.md",
"purpose": "Durable record of AgentShield policy gates, SARIF output, policy packs, reports, corpus benchmark, and exception lifecycle audit evidence"
},
{
"kind": "repo_command",
"path": "commands/security-scan.md",
"purpose": "ECC command contract for running AgentShield and separating scanner facts from follow-up judgment"
},
{
"kind": "repo_skill",
"path": "skills/security-scan/SKILL.md",
"purpose": "Operator-facing AgentShield scan workflow and output-format guidance"
},
{
"kind": "external_pr_evidence",
"repo": "affaan-m/agentshield",
"prs": [
55,
56,
57,
59,
60,
62
],
"purpose": "Policy gate, SARIF, policy-pack, HTML report, corpus benchmark, and exception lifecycle implementation evidence"
}
],
"retrieval_questions": [
"Which AgentShield policy finding, category, severity, and affected file or MCP/hook surface triggered the request?",
"Is there SARIF/code-scanning evidence for an `agentshield-policy/*` result, and does it match the report finding?",
"Is the exception active, expiring soon, or expired?",
"Does the exception include owner, ticket, scope, expiry, and rationale fields?",
"Which policy pack or organization baseline produced the finding?",
"Is remediation possible now, or is a bounded exception safer than a blanket suppression?"
],
"forbidden_actions": [
"approving policy exceptions without SARIF or report evidence",
"treating expired exceptions as active",
"blanket-suppressing AgentShield policy packs or organization-policy gates",
"downgrading critical/high findings without owner, ticket, scope, and expiry",
"editing AgentShield code or policy files from this ECC evaluator run",
"publishing or enforcing new security policy from this read-only evaluator run"
],
"acceptance_gates": [
"SARIF or report evidence is named",
"finding id, category, severity, and affected surface are preserved",
"policy pack or organization baseline is named",
"owner, ticket, scope, and expiry state are recorded",
"expired exceptions stay rejected or enforced",
"remediation versus time-boxed exception decision is explicit",
"at least one blanket suppression candidate is rejected"
]
}

View File

@@ -0,0 +1,45 @@
{
"schema_version": "ecc.evaluator-rag.trace.v1",
"scenario_id": "agentshield-policy-exception",
"run_id": "2026-05-12-agentshield-policy-exception-prototype",
"read_only": true,
"events": [
{
"phase": "observation",
"summary": "A policy finding or exception request references AgentShield organization-policy output. The evaluator records the affected finding without editing AgentShield code, policy packs, or enforcement settings.",
"evidence": [
"docs/ECC-2.0-GA-ROADMAP.md",
"commands/security-scan.md"
]
},
{
"phase": "retrieval",
"summary": "Retrieved SARIF/report evidence, policy-pack source, exception lifecycle state, owner, ticket, scope, expiry, and whether remediation is immediately available.",
"evidence": [
"agentshield-policy/* SARIF result",
"AgentShield report exception counts",
"skills/security-scan/SKILL.md"
]
},
{
"phase": "proposal",
"summary": "Generated two candidate playbooks: SARIF-backed time-boxed exception review, and blanket policy suppression for the affected category.",
"candidate_ids": [
"sarif-backed-timeboxed-exception-review",
"blanket-policy-suppression"
]
},
{
"phase": "verification",
"summary": "Accepted the evidence-backed exception review because it preserves finding details and lifecycle fields. Rejected blanket suppression because it bypasses policy gates and ignores expired exceptions.",
"evidence": [
"examples/evaluator-rag-prototype/agentshield-policy-exception/verifier-result.json"
]
},
{
"phase": "promotion",
"summary": "Promoted only the read-only AgentShield policy exception playbook. The evaluator does not modify AgentShield code, policy packs, enforcement settings, release state, or live security posture.",
"promoted_candidate_id": "sarif-backed-timeboxed-exception-review"
}
]
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.verifier.v1",
"scenario_id": "agentshield-policy-exception",
"run_id": "2026-05-12-agentshield-policy-exception-prototype",
"read_only": true,
"candidates": [
{
"candidate_id": "sarif-backed-timeboxed-exception-review",
"decision": "accepted",
"score": 0.93,
"reasons": [
"names SARIF/code-scanning or report evidence for the AgentShield finding",
"preserves finding id, category, severity, affected surface, and policy-pack source",
"records owner, ticket, scope, expiry, and active/expiring/expired lifecycle state",
"rejects expired exceptions and requires remediation or a time-boxed exception",
"keeps AgentShield code, policy packs, enforcement settings, and release actions out of the read-only evaluator run"
],
"rollback": "Do not apply the future exception or suppression; re-run AgentShield, restore the prior organization policy, and keep the finding enforced until owner/ticket/scope/expiry evidence is current."
},
{
"candidate_id": "blanket-policy-suppression",
"decision": "rejected",
"score": 0.11,
"reasons": [
"has no SARIF or report evidence",
"blanket-suppresses AgentShield policy packs and organization-policy gates",
"treats expired exceptions as active",
"drops owner, ticket, scope, and expiry fields",
"would edit AgentShield or policy gate behavior from an ECC evaluator run"
],
"rollback": "Do not suppress the policy category; restart from scanner evidence, lifecycle state, and a bounded remediation or exception request."
}
],
"promoted_candidate_id": "sarif-backed-timeboxed-exception-review"
}

View File

@@ -0,0 +1,41 @@
# Billing Marketplace Readiness Playbook
Use this playbook when release copy or roadmap text mentions ECC Tools
billing, Marketplace availability, account recovery, plans, seats,
entitlements, or subscription state.
## Accepted Path
1. Start from `docs/releases/2.0.0-rc.1/publication-readiness.md`.
2. Check the current repo and public listing surfaces:
- `gh api repos/ECC-Tools/ECC-Tools`
- `https://github.com/marketplace/ecc-tools`
3. Classify every billing or Marketplace claim as:
- `verified`
- `blocked`
- `remove-before-publication`
4. Keep roadmap acceptance criteria separate from live product claims.
5. Update release copy only after the evidence points to a live URL or command
result.
6. Leave tag creation, npm publish, plugin submission, marketplace edits,
subscription changes, and announcement posting approval-gated.
## Rejected Path
Do not say billing is live because a roadmap item exists, a dry run passed, or a
Marketplace URL is known. Roadmap intent and dry-run publication evidence are
not a billing state.
Do not edit plan limits, subscriptions, seats, entitlements, or Marketplace
metadata from the evaluator run. Those are product/operator actions and require
their own approval path.
## Validation Gates
- `rg -n "billing|Billing|Marketplace|marketplace|subscription|seat|entitlement|plan" README.md docs/releases/2.0.0-rc.1 docs/ECC-2.0-GA-ROADMAP.md`
- `gh api repos/ECC-Tools/ECC-Tools`
- Manual live check of `https://github.com/marketplace/ecc-tools`
- `npx --yes markdownlint-cli docs/releases/2.0.0-rc.1/*.md docs/ECC-2.0-GA-ROADMAP.md`
- `git diff --check`
Record the evidence in a maintainer-owned PR before release copy is published.

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.report.v1",
"scenario_id": "billing-marketplace-readiness",
"run_id": "2026-05-12-billing-marketplace-readiness-prototype",
"result": "prototype_passed",
"read_only": true,
"scores": {
"claim_evidence": 0.82,
"publication_safety": 1,
"marketplace_specificity": 0.84,
"billing_scope_control": 1,
"announcement_safety": 1
},
"findings": [
{
"id": "billing-claim-gate-needed",
"severity": "warning",
"summary": "Release docs require a fresh ECC Tools billing/App/Marketplace check before launch copy can claim live billing readiness."
},
{
"id": "dry-run-not-live-state",
"severity": "warning",
"summary": "May 12 evidence proves package/plugin dry runs and clean install smoke, but it does not prove a live Marketplace billing state."
},
{
"id": "safe-next-action",
"severity": "info",
"summary": "The reusable next action is a read-only evidence checklist that classifies each launch-copy billing claim before publication."
}
],
"recommended_next_action": {
"candidate_id": "evidence-backed-billing-check",
"action": "Run the promoted billing/Marketplace claim-verification checklist before any launch copy, GitHub release text, or social copy says billing is live."
}
}

View File

@@ -0,0 +1,55 @@
{
"schema_version": "ecc.evaluator-rag.scenario.v1",
"scenario_id": "billing-marketplace-readiness",
"title": "Verify billing and Marketplace claims before launch copy",
"mode": "read_only_prototype",
"objective": "Given rc.1 release docs and ECC Tools billing roadmap evidence, separate verified Marketplace/App/billing state from assumptions before any announcement or publication action.",
"sources": [
{
"kind": "repo_doc",
"path": "docs/releases/2.0.0-rc.1/publication-readiness.md",
"purpose": "Release gate that blocks billing and Marketplace claims until fresh evidence exists"
},
{
"kind": "repo_doc",
"path": "docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md",
"purpose": "Dry-run publication evidence and explicit remaining blocker list"
},
{
"kind": "roadmap",
"path": "docs/ECC-2.0-GA-ROADMAP.md",
"purpose": "ECC Tools billing audit acceptance criteria and remaining release blockers"
},
{
"kind": "github_api",
"command": "gh api repos/ECC-Tools/ECC-Tools",
"purpose": "Fresh repository access and app-surface evidence before launch claims"
},
{
"kind": "public_url",
"url": "https://github.com/marketplace/ecc-tools",
"purpose": "Marketplace listing that must be checked live before copy says billing is ready"
}
],
"retrieval_questions": [
"Which billing or Marketplace claims are already backed by repo evidence?",
"Which claims still need a live Marketplace, App, subscription, plan, or entitlement check?",
"Which announcement docs mention billing or Marketplace status?",
"Which publication actions remain approval-gated and must not run during this evaluator pass?"
],
"forbidden_actions": [
"creating or editing GitHub Marketplace listings",
"changing plan limits, subscriptions, seats, or entitlements",
"creating release tags",
"publishing packages or plugins",
"posting announcement copy",
"claiming live billing readiness from dry-run evidence alone"
],
"acceptance_gates": [
"launch-copy claims are classified as verified, blocked, or remove-before-publication",
"Marketplace and App checks name the exact URL or command needed",
"billing claims link to fresh evidence rather than roadmap intent",
"publication actions remain approval-gated",
"at least one overclaim candidate is rejected"
]
}

View File

@@ -0,0 +1,45 @@
{
"schema_version": "ecc.evaluator-rag.trace.v1",
"scenario_id": "billing-marketplace-readiness",
"run_id": "2026-05-12-billing-marketplace-readiness-prototype",
"read_only": true,
"events": [
{
"phase": "observation",
"summary": "Publication readiness still marks ECC Tools billing references and announcement copy as pending. Dry-run publication evidence says billing/App/Marketplace claims must be verified before launch copy uses them.",
"evidence": [
"docs/releases/2.0.0-rc.1/publication-readiness.md",
"docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md"
]
},
{
"phase": "retrieval",
"summary": "Retrieved the release gate, dry-run evidence, roadmap billing acceptance criteria, and the public Marketplace URL that requires a live operator check.",
"evidence": [
"docs/ECC-2.0-GA-ROADMAP.md",
"gh api repos/ECC-Tools/ECC-Tools",
"https://github.com/marketplace/ecc-tools"
]
},
{
"phase": "proposal",
"summary": "Generated two candidate playbooks: evidence-backed billing claim verification, and announcement-first billing copy that treats roadmap intent as live billing readiness.",
"candidate_ids": [
"evidence-backed-billing-check",
"announcement-first-billing-copy"
]
},
{
"phase": "verification",
"summary": "Accepted the evidence-backed check and rejected announcement-first copy because billing and Marketplace surfaces remain pending until verified by fresh URLs or API output.",
"evidence": [
"examples/evaluator-rag-prototype/billing-marketplace-readiness/verifier-result.json"
]
},
{
"phase": "promotion",
"summary": "Promoted only the read-only verification playbook. No Marketplace edits, subscription changes, tags, package publishes, plugin submission, or announcement posts are performed.",
"promoted_candidate_id": "evidence-backed-billing-check"
}
]
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.verifier.v1",
"scenario_id": "billing-marketplace-readiness",
"run_id": "2026-05-12-billing-marketplace-readiness-prototype",
"read_only": true,
"candidates": [
{
"candidate_id": "evidence-backed-billing-check",
"decision": "accepted",
"score": 0.91,
"reasons": [
"keeps the run read-only",
"requires fresh Marketplace or GitHub API evidence",
"classifies launch-copy claims before publication",
"separates roadmap intent from live billing state",
"keeps release, package, plugin, billing, and announcement actions approval-gated"
],
"rollback": "Remove or revert any release-copy edits that cite unverified billing claims; no live billing state is changed by this playbook."
},
{
"candidate_id": "announcement-first-billing-copy",
"decision": "rejected",
"score": 0.18,
"reasons": [
"treats roadmap acceptance criteria as live billing evidence",
"does not require a fresh Marketplace listing check",
"could publish announcement copy before release URLs exist",
"does not classify unsupported claims for removal",
"risks implying subscription or entitlement readiness without proof"
],
"rollback": "Do not publish this copy; keep billing and Marketplace language blocked until the evidence checklist passes."
}
],
"promoted_candidate_id": "evidence-backed-billing-check"
}

View File

@@ -0,0 +1,41 @@
# Candidate Playbook: Maintainer-Owned Stale Salvage
Candidate id: `maintainer-salvage-branch`
## Use When
- A stale or conflicted PR was closed to keep the public queue usable.
- The closed diff contains a useful focused idea, skill, command, doc, test, or
bug fix.
- The contributor may not have time or interest to rebase.
## Steps
1. Record the source PR, author, useful concept, and closure reason in
`docs/stale-pr-salvage-ledger.md`.
2. Re-read the closed PR diff against current `main`.
3. Decide whether the patch can be cherry-picked safely. Prefer reimplementation
when current architecture has moved.
4. Create a maintainer-owned branch with one focused salvage unit.
5. Preserve attribution in the PR body and, when useful, in the commit body.
6. Update the catalog, docs, tests, or release evidence required by the touched
surface.
7. Run the same validation gates a normal change would require.
8. After merge, update the ledger from pending/salvage-branch to landed,
already-present, superseded, skipped, or translator/manual review.
## Reject Conditions
- The patch is bulk generated churn.
- The patch is stale localization that needs translator/manual review.
- The patch imports personal paths, secrets, local settings, or private operator context.
- The patch bypasses current install, catalog, plugin, or release architecture.
- The branch would mix unrelated salvage units into one PR.
## Minimum Validation
- Targeted test for the touched surface.
- `git diff --check`.
- Markdown lint when docs are touched.
- Catalog/install validation when skills, agents, commands, or plugin surfaces
are touched.

View File

@@ -0,0 +1,46 @@
# CI Failure Diagnosis Playbook
Candidate id: `log-backed-minimal-fix`
Use this playbook when a PR, maintainer branch, or release-readiness branch has
one or more red GitHub Actions checks.
## Accepted Path
1. Capture PR and branch context:
- `gh pr view <pr-number> --json files,statusCheckRollup,headRefName,baseRefName`
- `gh run view <run-id> --json jobs`
2. Fetch the failed log evidence:
- `gh run view <run-id> --log-failed`
3. Record the failing job, step, OS, Node/Python/Rust version, package manager,
and shortest useful error excerpt.
4. Compare the failing step to the PR changed files.
5. Search current docs, tests, and prior PRs for a known matching failure mode.
6. Promote the smallest fix path only when it includes a local reproduction or
regression command.
7. After a separate implementation branch exists, rerun the focused local gate,
then wait for the full GitHub Actions matrix before merge.
## Rejected Path
Do not keep rerunning CI until a transient green result appears without
recording the original failure and why it is safe to ignore.
Do not weaken tests, skip matrix legs, or broaden the patch to unrelated files
just to make the check pass.
Do not claim release readiness from a branch with required checks still red.
## Minimum Validation
- `gh run view <run-id> --log-failed`
- Focused local command matching the failing surface, such as:
- `node tests/<matching-test>.js`
- `npm run harness:audit -- --format json`
- `npm run observability:ready`
- `cargo test`
- `git diff --check`
- Full required GitHub Actions matrix before merge
Record the failed-log excerpt and the chosen regression command in the
maintainer PR body or handoff before merging the fix.

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.report.v1",
"scenario_id": "ci-failure-diagnosis",
"run_id": "2026-05-12-ci-failure-diagnosis-prototype",
"result": "prototype_passed",
"read_only": true,
"scores": {
"failure_evidence": 0.92,
"scope_control": 0.9,
"regression_specificity": 0.86,
"matrix_safety": 1,
"publication_safety": 1
},
"findings": [
{
"id": "log-first-required",
"severity": "warning",
"summary": "A CI fix candidate must start from the exact failed job, step, platform, runtime, package manager, and log excerpt rather than from a generic rerun."
},
{
"id": "changed-file-scope-needed",
"severity": "info",
"summary": "Changed-file context should narrow the fix to the surface that can affect the failing step, especially in a broad OS/runtime matrix."
},
{
"id": "regression-gate-needed",
"severity": "warning",
"summary": "A promoted fix playbook must name a local reproduction or regression command before the branch is allowed to merge."
}
],
"recommended_next_action": {
"candidate_id": "log-backed-minimal-fix",
"action": "Use the promoted CI failure diagnosis playbook whenever a PR check goes red before implementing or rerunning fixes."
}
}

View File

@@ -0,0 +1,57 @@
{
"schema_version": "ecc.evaluator-rag.scenario.v1",
"scenario_id": "ci-failure-diagnosis",
"title": "Diagnose CI failures from captured logs before proposing fixes",
"mode": "read_only_prototype",
"objective": "Given a failed CI run on a PR or maintainer branch, retrieve the exact failing job, captured log excerpt, changed-file context, and prior known-fix evidence before promoting a fix playbook.",
"sources": [
{
"kind": "repo_doc",
"path": "docs/ECC-2.0-GA-ROADMAP.md",
"purpose": "Records ECC-Tools CI failure-mode predictive follow-ups and the evaluator/RAG corpus expansion need"
},
{
"kind": "repo_doc",
"path": "docs/architecture/evaluator-rag-prototype.md",
"purpose": "Defines the artifact contract and promotion rules for evaluator/RAG scenarios"
},
{
"kind": "github_actions",
"command": "gh run view <run-id> --log-failed",
"purpose": "Primary evidence for the failing job, failing step, and deterministic error text"
},
{
"kind": "github_pr",
"command": "gh pr view <pr-number> --json files,statusCheckRollup,headRefName,baseRefName",
"purpose": "Changed-file and check-rollup context for scoping the fix"
},
{
"kind": "repo_test",
"command": "node tests/run-all.js",
"purpose": "Local regression gate after a candidate fix is implemented outside the read-only evaluator run"
}
],
"retrieval_questions": [
"Which job, step, platform, runtime, and package manager failed?",
"What is the smallest failing log excerpt that explains the failure?",
"Which changed files are plausibly connected to the failing step?",
"Is there a prior known-fix, troubleshooting note, or fixture that matches this failure mode?",
"Which local command reproduces or guards the failure before a fix can merge?"
],
"forbidden_actions": [
"rerunning CI until it passes without diagnosing the failure",
"pushing speculative fixes without a captured failing log excerpt",
"editing unrelated files to make the matrix green",
"weakening or deleting tests to silence a failure",
"merging or publishing while required checks are red",
"creating release tags or posting announcements from this evaluator run"
],
"acceptance_gates": [
"failing job and step are named",
"captured log excerpt is linked or summarized",
"changed-file context is compared to the failing step",
"known-fix or no-known-fix status is recorded",
"local reproduction or regression command is named",
"at least one rerun-only candidate is rejected"
]
}

View File

@@ -0,0 +1,45 @@
{
"schema_version": "ecc.evaluator-rag.trace.v1",
"scenario_id": "ci-failure-diagnosis",
"run_id": "2026-05-12-ci-failure-diagnosis-prototype",
"read_only": true,
"events": [
{
"phase": "observation",
"summary": "A PR or maintainer branch has a red GitHub Actions matrix. The evaluator records status without rerunning, merging, or editing code.",
"evidence": [
"gh pr view <pr-number> --json statusCheckRollup,files",
"gh run view <run-id> --json jobs"
]
},
{
"phase": "retrieval",
"summary": "Retrieved failed-job logs, changed-file context, current roadmap CI failure-mode requirements, and existing local regression commands.",
"evidence": [
"gh run view <run-id> --log-failed",
"docs/ECC-2.0-GA-ROADMAP.md",
"tests/run-all.js"
]
},
{
"phase": "proposal",
"summary": "Generated two candidate playbooks: log-backed minimal fix with regression coverage, and rerun-only optimism that treats CI flake as proven without evidence.",
"candidate_ids": [
"log-backed-minimal-fix",
"rerun-only-green-wait"
]
},
{
"phase": "verification",
"summary": "Accepted the log-backed minimal fix because it names failing evidence, scope, and validation. Rejected rerun-only waiting because it does not explain the failure or preserve a regression guard.",
"evidence": [
"examples/evaluator-rag-prototype/ci-failure-diagnosis/verifier-result.json"
]
},
{
"phase": "promotion",
"summary": "Promoted only the read-only CI triage playbook. The evaluator does not push a fix, rerun CI, merge, publish, or weaken checks.",
"promoted_candidate_id": "log-backed-minimal-fix"
}
]
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.verifier.v1",
"scenario_id": "ci-failure-diagnosis",
"run_id": "2026-05-12-ci-failure-diagnosis-prototype",
"read_only": true,
"candidates": [
{
"candidate_id": "log-backed-minimal-fix",
"decision": "accepted",
"score": 0.93,
"reasons": [
"requires failed job, step, platform, runtime, and log evidence",
"compares changed files to the failing surface before proposing a fix",
"names a focused local reproduction or regression command",
"keeps required checks intact",
"keeps merge, release, package, plugin, billing, and announcement actions approval-gated"
],
"rollback": "Revert the future implementation PR or restore the original failing test fixture; no code is changed by this read-only playbook."
},
{
"candidate_id": "rerun-only-green-wait",
"decision": "rejected",
"score": 0.17,
"reasons": [
"does not preserve the failing log excerpt",
"does not identify job, step, platform, runtime, or package manager",
"does not compare failure surface to changed files",
"does not add or name a regression gate",
"risks merging a flaky or still-unexplained CI failure"
],
"rollback": "Do not treat this as a fix; restart diagnosis from captured failed logs and changed-file context."
}
],
"promoted_candidate_id": "log-backed-minimal-fix"
}

View File

@@ -0,0 +1,60 @@
# Deep Analyzer Evidence Playbook
Candidate id: `corpus-backed-analyzer-change`
Use this playbook when a PR changes repository analysis, commit analysis,
architecture classification, workflow detection, pattern detection, or
deep-analysis risk-taxonomy behavior.
## Accepted Path
1. Name the changed analyzer surface and source file.
2. Retrieve the Deep Analyzer Evidence contract from `../ECC-Tools/README.md`
and the follow-up logic in `../ECC-Tools/src/lib/analyzer.ts`.
3. Match the change to maintained corpus or reference evidence:
- `../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts`
- `../ECC-Tools/src/analyzers/deep-analyzer-corpus.test.ts`
- `../ECC-Tools/src/lib/analyzer.compare.test.ts`
4. Compare expected outputs for the affected behavior:
- folder type;
- module organization;
- test location;
- primary language;
- commit message type;
- detected workflow names.
5. Add or update analyzer corpus, expected-output snapshots, fixtures,
benchmarks, golden cases, evals, or reference sets for the same changed
surface.
6. Run the relevant validation gate from `../ECC-Tools/`:
- `npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts`
- `npm run typecheck`
- `npm run lint`
7. Record the corpus case, expected-output comparison, validation output, and
rollback notes in the maintainer PR body or handoff.
## Rejected Path
Do not promote analyzer threshold, classification, or risk-taxonomy changes
without corpus, snapshot, fixture, benchmark, golden, eval, or reference-set
evidence.
Do not suppress the `Deep Analyzer Evidence` PR-risk bucket just because the
change is small. Suppress it only when co-located evidence covers the same
analyzer surface.
Do not rely only on broad manual review notes. Analyzer changes need
representative repository shapes or commit-history cases with expected outputs.
Do not post PR comments, create check runs, sync Linear, publish packages, edit
plugins, or create release artifacts from the evaluator run.
## Minimum Validation
- `npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts`
- `npm run typecheck`
- `npm run lint`
- `git diff --check`
- Markdown lint when docs or playbooks are touched
Preserve source attribution for analyzer evidence and include rollback guidance
for the future maintainer PR.

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.report.v1",
"scenario_id": "deep-analyzer-evidence",
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
"result": "prototype_passed",
"read_only": true,
"scores": {
"corpus_retrieval": 0.95,
"expected_output_comparison": 0.91,
"representative_case_coverage": 0.89,
"taxonomy_gap_safety": 0.93,
"publication_safety": 1
},
"findings": [
{
"id": "corpus-required",
"severity": "warning",
"summary": "Deep-analysis behavior changes need maintained corpus, snapshot, fixture, benchmark, golden, eval, or reference-set evidence before promotion."
},
{
"id": "expected-output-required",
"severity": "warning",
"summary": "Analyzer changes should compare expected folder type, module organization, test location, primary language, commit pattern, or workflow outputs."
},
{
"id": "read-only-routing",
"severity": "info",
"summary": "The evaluator can recommend a maintainer PR but cannot post PR comments, check runs, Linear sync updates, packages, plugins, or release actions itself."
}
],
"recommended_next_action": {
"candidate_id": "corpus-backed-analyzer-change",
"action": "Use the promoted deep-analyzer evidence playbook for PRs that change repository, commit, architecture, workflow, pattern, or risk-taxonomy analysis behavior."
}
}

View File

@@ -0,0 +1,57 @@
{
"schema_version": "ecc.evaluator-rag.scenario.v1",
"scenario_id": "deep-analyzer-evidence",
"title": "Require analyzer corpus evidence before promoting deep-analysis changes",
"mode": "read_only_prototype",
"objective": "Given a change to repository, commit, architecture, pattern, or deep-analysis logic, retrieve maintained analyzer corpus evidence and expected-output comparisons before promoting analyzer behavior or risk-taxonomy changes.",
"sources": [
{
"kind": "sibling_repo_doc",
"path": "../ECC-Tools/README.md",
"purpose": "Public description of deep-analyzer predictive follow-ups and the Deep Analyzer Evidence PR-risk bucket"
},
{
"kind": "sibling_repo_source",
"path": "../ECC-Tools/src/lib/analyzer.ts",
"purpose": "Predictive follow-up logic that flags analyzer changes without corpus, snapshot, fixture, or benchmark evidence"
},
{
"kind": "sibling_repo_source",
"path": "../ECC-Tools/src/lib/pr-risk-taxonomy.ts",
"purpose": "Non-blocking PR-risk taxonomy bucket for deep-analyzer evidence"
},
{
"kind": "sibling_repo_fixture",
"path": "../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts",
"purpose": "Maintained corpus cases for representative repository shapes, commit histories, and expected analyzer outputs"
},
{
"kind": "sibling_repo_test",
"command": "npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts",
"purpose": "Regression evidence for analyzer corpus outputs and deep-analyzer follow-up generation"
}
],
"retrieval_questions": [
"Which analyzer surface changed: repository structure, architecture, code style, commit messages, workflow detection, pattern detection, or risk taxonomy?",
"Which maintained corpus case or reference set covers the same analyzer behavior?",
"Do expected outputs compare folder type, module organization, test location, primary language, commit type, and workflow names?",
"Does the PR add analyzer corpus, snapshot, fixture, benchmark, golden, eval, or reference-set evidence alongside analyzer code changes?",
"Does the evaluator keep PR comments, check runs, Linear sync, package changes, and publication actions out of the read-only pass?"
],
"forbidden_actions": [
"promoting repository, commit, architecture, or deep-analysis changes without analyzer corpus evidence",
"suppressing the Deep Analyzer Evidence risk bucket without co-located corpus, snapshot, fixture, or benchmark evidence",
"changing analyzer thresholds or classifications without expected-output comparison",
"relying only on broad manual review notes instead of representative repository and commit-history cases",
"posting PR comments, check runs, or Linear sync updates from this read-only evaluator run",
"changing package, plugin, release, or publication state from this evaluator run"
],
"acceptance_gates": [
"changed analyzer surface is named",
"maintained corpus or reference-set path is included",
"expected analyzer outputs are compared",
"representative repository shape or commit history is described",
"regression command is named",
"at least one no-corpus analyzer change is rejected"
]
}

View File

@@ -0,0 +1,45 @@
{
"schema_version": "ecc.evaluator-rag.trace.v1",
"scenario_id": "deep-analyzer-evidence",
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
"read_only": true,
"events": [
{
"phase": "observation",
"summary": "A deep-analysis PR changes repository, commit, architecture, workflow, pattern, or risk-taxonomy behavior. The evaluator records the touched analyzer surface and remains read-only.",
"evidence": [
"../ECC-Tools/src/lib/analyzer.ts",
"../ECC-Tools/src/lib/pr-risk-taxonomy.ts"
]
},
{
"phase": "retrieval",
"summary": "Retrieved the maintained analyzer corpus, corpus regression test, and follow-up tests that distinguish corpus-backed analyzer changes from no-evidence analyzer rewrites.",
"evidence": [
"../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts",
"../ECC-Tools/src/analyzers/deep-analyzer-corpus.test.ts",
"../ECC-Tools/src/lib/analyzer.compare.test.ts"
]
},
{
"phase": "proposal",
"summary": "Generated two candidate playbooks: corpus-backed analyzer change, and threshold-only analyzer rewrite without expected-output evidence.",
"candidate_ids": [
"corpus-backed-analyzer-change",
"threshold-only-analyzer-rewrite"
]
},
{
"phase": "verification",
"summary": "Accepted the corpus-backed analyzer change because it names representative repository/commit cases and expected-output comparisons. Rejected the threshold-only rewrite because it lacks corpus or benchmark evidence.",
"evidence": [
"examples/evaluator-rag-prototype/deep-analyzer-evidence/verifier-result.json"
]
},
{
"phase": "promotion",
"summary": "Promoted only the read-only deep-analyzer evidence playbook. Future analyzer edits must move through maintainer PRs with corpus evidence, regression commands, and rollback notes.",
"promoted_candidate_id": "corpus-backed-analyzer-change"
}
]
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.verifier.v1",
"scenario_id": "deep-analyzer-evidence",
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
"read_only": true,
"candidates": [
{
"candidate_id": "corpus-backed-analyzer-change",
"decision": "accepted",
"score": 0.92,
"reasons": [
"names the changed analyzer surface and matching maintained corpus case",
"compares expected analyzer outputs for representative repository and commit-history inputs",
"keeps Deep Analyzer Evidence taxonomy behavior tied to co-located corpus or benchmark evidence",
"names the regression command that exercises corpus and follow-up behavior",
"keeps PR comments, check runs, Linear sync, and publication actions out of the evaluator run"
],
"rollback": "Revert the future analyzer PR and restore the prior corpus expectations; no hosted check-run, Linear, package, or publication state changes in this read-only playbook."
},
{
"candidate_id": "threshold-only-analyzer-rewrite",
"decision": "rejected",
"score": 0.13,
"reasons": [
"changes analyzer thresholds without corpus evidence",
"does not compare expected outputs against representative repository or commit-history cases",
"does not update analyzer corpus, snapshot, fixture, benchmark, golden, eval, or reference-set artifacts",
"would suppress Deep Analyzer Evidence risk without proof",
"does not name a regression command"
],
"rollback": "Do not promote this analyzer rewrite; restart from maintained corpus inputs, expected-output snapshots, and a focused maintainer PR."
}
],
"promoted_candidate_id": "corpus-backed-analyzer-change"
}

View File

@@ -0,0 +1,49 @@
# Harness Config Quality Playbook
Candidate id: `adapter-matrix-backed-drift-check`
Use this playbook when a PR, install change, or setup recommendation touches
MCP, plugins, hooks, commands, agents, rules, install targets, or harness
adapter surfaces.
## Accepted Path
1. Identify the touched harness/config surface.
2. Retrieve the adapter state from
`docs/architecture/harness-adapter-compliance.md` or
`scripts/lib/harness-adapter-compliance.js`.
3. Record whether the harness is `Native`, `Adapter-backed`,
`Instruction-backed`, or `Reference-only`.
4. Name the install/onramp path and verification command from the matrix.
5. Preserve existing user and project config by using merge, dry-run, or
explicit no-overwrite behavior.
6. Run the relevant validation gate:
- `npm run harness:adapters -- --check`
- `npm run harness:audit -- --format json`
- `node tests/lib/install-targets.test.js`
- `node tests/opencode-plugin-hooks.test.js`
- `node tests/docs/mcp-management-docs.test.js`
7. Promote a config recommendation only when the evidence matches the harness
state and the config preservation behavior is explicit.
## Rejected Path
Do not claim Claude hook parity for Codex, Gemini, Zed, OpenCode, or other
harnesses unless the adapter matrix and tests prove it.
Do not overwrite `settings.json`, MCP configs, plugin manifests, rule files, or
command surfaces without a merge/dry-run path and a rollback note.
Do not toggle live MCP servers, publish plugins, or edit user-level harness
config from the evaluator run.
## Minimum Validation
- `npm run harness:adapters -- --check`
- `npm run harness:audit -- --format json`
- Focused install, plugin, MCP, or hook test for the changed surface
- `git diff --check`
- Markdown lint when docs are touched
Record the adapter state, risk note, validation commands, and config
preservation behavior in the maintainer PR body or handoff.

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.report.v1",
"scenario_id": "harness-config-quality",
"run_id": "2026-05-12-harness-config-quality-prototype",
"result": "prototype_passed",
"read_only": true,
"scores": {
"adapter_evidence": 0.94,
"config_preservation": 0.88,
"verification_specificity": 0.9,
"parity_claim_safety": 1,
"publication_safety": 1
},
"findings": [
{
"id": "adapter-state-required",
"severity": "warning",
"summary": "Harness recommendations must retrieve the adapter state before claiming native support or runtime enforcement."
},
{
"id": "config-overwrite-risk",
"severity": "warning",
"summary": "MCP, hook, plugin, command, and rule changes must preserve existing user/project config and use dry-run or merge behavior when available."
},
{
"id": "verification-command-needed",
"severity": "info",
"summary": "The accepted playbook names harness adapter, harness audit, install-target, or plugin-hook regression gates before a config change can merge."
}
],
"recommended_next_action": {
"candidate_id": "adapter-matrix-backed-drift-check",
"action": "Use the promoted harness-config quality playbook for PRs or setup work touching MCP, plugin, hook, command, agent, rule, or adapter surfaces."
}
}

View File

@@ -0,0 +1,57 @@
{
"schema_version": "ecc.evaluator-rag.scenario.v1",
"scenario_id": "harness-config-quality",
"title": "Detect harness config drift before changing adapters or installs",
"mode": "read_only_prototype",
"objective": "Given a change to MCP, plugin, hook, command, agent, or harness adapter surfaces, retrieve the adapter matrix and validation evidence before promoting a setup recommendation or config change.",
"sources": [
{
"kind": "repo_doc",
"path": "docs/architecture/harness-adapter-compliance.md",
"purpose": "Public adapter matrix that names harness state, install/onramp paths, verification commands, and risk notes"
},
{
"kind": "repo_source",
"path": "scripts/lib/harness-adapter-compliance.js",
"purpose": "Structured source of truth for the adapter compliance matrix"
},
{
"kind": "repo_config",
"path": "hooks/hooks.json",
"purpose": "Claude hook surface that must not be assumed portable without adapter evidence"
},
{
"kind": "repo_config",
"path": "mcp-configs/mcp-servers.json",
"purpose": "Reference MCP config that can drift from harness-specific runtime semantics"
},
{
"kind": "repo_test",
"command": "npm run harness:adapters -- --check",
"purpose": "Adapter matrix consistency gate"
}
],
"retrieval_questions": [
"Which harness or config surface changed: MCP, plugin, hook, command, agent, rule, or adapter?",
"Does the adapter matrix classify this harness as native, adapter-backed, instruction-backed, or reference-only?",
"Which install path, verification command, risk note, owner, and source doc apply?",
"Does the recommendation preserve existing user config rather than overwriting it?",
"Which compatibility regression or harness audit command proves the setup still works?"
],
"forbidden_actions": [
"claiming native support for instruction-backed or reference-only harnesses",
"copying Claude hook semantics into Codex, Gemini, Zed, or OpenCode without adapter evidence",
"silently overwriting existing user MCP, hook, plugin, command, or rule config",
"disabling or enabling live MCP servers from a read-only evaluator run",
"shipping an adapter change without a verification command",
"publishing packages or plugins from this evaluator run"
],
"acceptance_gates": [
"adapter state is retrieved from the matrix",
"install or onramp path is named",
"verification command is named",
"risk note is preserved",
"config-preservation behavior is explicit",
"at least one unsupported parity claim is rejected"
]
}

View File

@@ -0,0 +1,45 @@
{
"schema_version": "ecc.evaluator-rag.trace.v1",
"scenario_id": "harness-config-quality",
"run_id": "2026-05-12-harness-config-quality-prototype",
"read_only": true,
"events": [
{
"phase": "observation",
"summary": "A setup recommendation or PR touches MCP, plugin, hook, command, agent, rule, or adapter surfaces. The evaluator records the surface without editing local or user-level config.",
"evidence": [
"docs/architecture/harness-adapter-compliance.md",
"scripts/lib/harness-adapter-compliance.js"
]
},
{
"phase": "retrieval",
"summary": "Retrieved the adapter state, install/onramp path, verification commands, risk notes, and config-preservation tests for the affected harness.",
"evidence": [
"npm run harness:adapters -- --check",
"npm run harness:audit -- --format json",
"node tests/lib/install-targets.test.js"
]
},
{
"phase": "proposal",
"summary": "Generated two candidate playbooks: adapter-matrix-backed drift check, and unsupported hook parity claim that copies Claude semantics into every harness.",
"candidate_ids": [
"adapter-matrix-backed-drift-check",
"unsupported-hook-parity-claim"
]
},
{
"phase": "verification",
"summary": "Accepted the matrix-backed drift check because it names state, install path, verification, and preservation behavior. Rejected unsupported hook parity because it overclaims portability.",
"evidence": [
"examples/evaluator-rag-prototype/harness-config-quality/verifier-result.json"
]
},
{
"phase": "promotion",
"summary": "Promoted only the read-only harness-config quality playbook. The evaluator does not overwrite configs, toggle MCP servers, publish plugins, or claim native support.",
"promoted_candidate_id": "adapter-matrix-backed-drift-check"
}
]
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.verifier.v1",
"scenario_id": "harness-config-quality",
"run_id": "2026-05-12-harness-config-quality-prototype",
"read_only": true,
"candidates": [
{
"candidate_id": "adapter-matrix-backed-drift-check",
"decision": "accepted",
"score": 0.92,
"reasons": [
"retrieves adapter state before making a support claim",
"names install or onramp path and verification commands",
"preserves existing user and project config",
"keeps runtime MCP toggles and plugin publication out of the evaluator run",
"requires focused compatibility regression coverage"
],
"rollback": "Revert the future adapter/config PR or restore the prior config merge behavior; no live user config is changed by this read-only playbook."
},
{
"candidate_id": "unsupported-hook-parity-claim",
"decision": "rejected",
"score": 0.16,
"reasons": [
"claims native support without adapter matrix evidence",
"copies Claude hook semantics into instruction-backed harnesses",
"does not name a verification command",
"does not preserve existing MCP or hook config",
"risks publishing or installing unsupported plugin behavior"
],
"rollback": "Do not publish this setup recommendation; restart from adapter state, risk note, and config-preservation evidence."
}
],
"promoted_candidate_id": "adapter-matrix-backed-drift-check"
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.report.v1",
"scenario_id": "stale-pr-salvage-maintainer-branch",
"run_id": "2026-05-12-cleanup-salvage-prototype",
"result": "prototype_passed",
"read_only": true,
"scores": {
"source_attribution": 1,
"blast_radius_control": 1,
"manual_review_respected": 1,
"validation_specificity": 0.8,
"publication_safety": 1
},
"findings": [
{
"id": "salvage-policy-usable",
"severity": "info",
"summary": "The stale-salvage ledger and maintainer PR examples provide enough evidence to promote a reusable maintainer-owned salvage playbook."
},
{
"id": "translation-tail-blocked",
"severity": "warning",
"summary": "Localization tails remain useful but must stay translator/manual-review only."
},
{
"id": "release-actions-blocked",
"severity": "warning",
"summary": "Release, npm, plugin, billing, and announcement actions remain outside this evaluator run and require separate approval."
}
],
"recommended_next_action": {
"candidate_id": "maintainer-salvage-branch",
"action": "Use the promoted playbook for future stale cleanup batches and add additional evaluator/RAG scenarios for CI failure diagnosis, harness-config drift, billing readiness, and AgentShield policy exceptions."
}
}

View File

@@ -0,0 +1,56 @@
{
"schema_version": "ecc.evaluator-rag.scenario.v1",
"scenario_id": "stale-pr-salvage-maintainer-branch",
"title": "Recover useful stale PR work through maintainer-owned branches",
"mode": "read_only_prototype",
"objective": "Given a closed stale PR batch, identify useful work, reject unsafe bulk imports, and promote only a maintainer-owned salvage playbook with attribution and validation.",
"sources": [
{
"kind": "repo_doc",
"path": "docs/stale-pr-salvage-ledger.md",
"purpose": "Durable source-to-disposition mapping for stale PR cleanup"
},
{
"kind": "repo_doc",
"path": "docs/legacy-artifact-inventory.md",
"purpose": "Import guardrails for legacy and private-context material"
},
{
"kind": "roadmap",
"path": "docs/ECC-2.0-GA-ROADMAP.md",
"purpose": "Operating rule and current execution lane"
},
{
"kind": "github_pr",
"url": "https://github.com/affaan-m/everything-claude-code/pull/1815",
"purpose": "Example maintainer-owned stale salvage PR with attribution"
},
{
"kind": "github_pr",
"url": "https://github.com/affaan-m/everything-claude-code/pull/1818",
"purpose": "Example gap pass classifying already-present and skipped stale work"
}
],
"retrieval_questions": [
"Which closed PRs contain useful work that is not already present?",
"Which files or concepts are unsafe to cherry-pick without manual review?",
"Which current docs, skills, commands, or tests are the correct integration points?",
"Which validation gates are required before the salvage work can merge?"
],
"forbidden_actions": [
"closing, reopening, or commenting on PRs",
"merging PRs",
"creating release tags",
"publishing packages or plugins",
"copying private paths, secrets, or raw personal context",
"blindly cherry-picking bulk localization"
],
"acceptance_gates": [
"source attribution is preserved",
"salvage ledger or equivalent tracker is updated",
"translation/manual-review tails remain blocked",
"candidate action is reversible and maintainer-owned",
"validation commands are named",
"at least one unsafe candidate is rejected"
]
}

View File

@@ -0,0 +1,57 @@
# Skill Quality Evidence Playbook
Candidate id: `evidence-backed-skill-amendment`
Use this playbook when a PR or follow-up proposes adding, rewriting, or
amending a skill, agent, command, or rule guidance surface.
## Accepted Path
1. Name the changed guidance surface and source file.
2. Retrieve the quality contract from `docs/SKILL-DEVELOPMENT-GUIDE.md`.
3. Compare the proposed change to nearby focused examples under `skills/*/SKILL.md`.
4. Record the evidence source that justifies the change:
- observed skill-run failure;
- user feedback;
- repeated review finding;
- reference-set gap;
- failing example or regression test.
5. Keep the scope narrow. One skill should cover one domain, workflow, or
reusable pattern.
6. Add or update examples only when they can be validated.
7. Run the relevant validation gate:
- `node scripts/ci/validate-skills.js`
- `node tests/lib/skill-improvement.test.js`
- `node tests/lib/skill-evolution.test.js`
- `npm run catalog:check`
- language-specific example commands such as `npx tsc --noEmit`,
`python -m py_compile`, or `go build` when examples are touched.
8. Record validation output, source attribution, and rollback notes in the
maintainer PR body or handoff.
## Rejected Path
Do not promote a vague skill rewrite because the prose "sounds better" without
observed failure evidence, examples, or a reference set.
Do not merge multi-domain catch-all skills that duplicate focused skills or make
activation less predictable.
Do not copy private operator context, secrets, tokens, personal paths, customer
data, or unpublished release claims into skills.
Do not update package manifests, plugin manifests, catalogs, release notes, or
publication state from the evaluator run.
## Minimum Validation
- `node scripts/ci/validate-skills.js`
- `npm run catalog:check` when catalog/package-visible skill surfaces change
- Focused skill-improvement or skill-evolution regression test when amendment
behavior changes
- Language-specific compile/lint checks for touched examples
- `git diff --check`
- Markdown lint when docs or playbooks are touched
Preserve source attribution for contributed skill material and include rollback
guidance for the future maintainer PR.

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.report.v1",
"scenario_id": "skill-quality-evidence",
"run_id": "2026-05-12-skill-quality-evidence-prototype",
"result": "prototype_passed",
"read_only": true,
"scores": {
"skill_contract_retrieval": 0.94,
"observed_failure_evidence": 0.88,
"example_quality": 0.9,
"validation_specificity": 0.93,
"publication_safety": 1
},
"findings": [
{
"id": "examples-required",
"severity": "warning",
"summary": "Skill-quality changes need working examples or regression evidence; prose-only rewrites are not enough for promotion."
},
{
"id": "observation-source-required",
"severity": "warning",
"summary": "Skill amendments should cite observed failure, user feedback, or a reference-set gap rather than broad style preference."
},
{
"id": "publication-stays-blocked",
"severity": "info",
"summary": "The evaluator can recommend a maintainer PR, but it cannot update package, plugin, catalog, or publication state itself."
}
],
"recommended_next_action": {
"candidate_id": "evidence-backed-skill-amendment",
"action": "Use the promoted skill-quality playbook for PRs that add, rewrite, or amend skills, agents, commands, or rules guidance."
}
}

View File

@@ -0,0 +1,57 @@
{
"schema_version": "ecc.evaluator-rag.scenario.v1",
"scenario_id": "skill-quality-evidence",
"title": "Require examples and validation before promoting skill guidance changes",
"mode": "read_only_prototype",
"objective": "Given a change to skills, agents, commands, or rules guidance, retrieve the skill development contract and observed skill-run evidence before promoting an amendment or new skill-quality recommendation.",
"sources": [
{
"kind": "repo_doc",
"path": "docs/SKILL-DEVELOPMENT-GUIDE.md",
"purpose": "Public skill quality contract for frontmatter, focused scope, examples, testing, and submission evidence"
},
{
"kind": "repo_source",
"path": "scripts/ci/validate-skills.js",
"purpose": "Curated skill structure and frontmatter validation gate"
},
{
"kind": "repo_source",
"path": "scripts/lib/skill-improvement/",
"purpose": "Observation, health, amendment, and evaluation helpers for evidence-backed skill evolution"
},
{
"kind": "repo_test",
"command": "node tests/lib/skill-improvement.test.js",
"purpose": "Regression coverage for observation-backed skill amendment and evaluation scaffolds"
},
{
"kind": "repo_test",
"command": "node scripts/ci/validate-skills.js",
"purpose": "Skill structure validation before catalog or package changes merge"
}
],
"retrieval_questions": [
"Which skill, agent, command, or rule surface changed?",
"Does the change preserve focused scope, clear activation text, and working examples?",
"Which validation command proves frontmatter, catalog, example, or behavior quality?",
"Does observed failure or user feedback justify the amendment?",
"Does the candidate avoid private context, secrets, personal paths, and publication actions?"
],
"forbidden_actions": [
"promoting a skill rewrite without examples, validation, or observed failure evidence",
"adding broad multi-domain skills that duplicate existing focused skills",
"shipping code examples that are uncompiled, untested, or disconnected from the skill guidance",
"copying private operator context, secrets, tokens, or personal paths into skills",
"changing package, plugin, catalog, or publication state from this evaluator run",
"claiming a skill-quality improvement without a reference set or regression command"
],
"acceptance_gates": [
"changed skill or guidance surface is named",
"source evidence includes the skill development guide or current skill examples",
"observed failure, user feedback, or reference-set gap is recorded",
"validation command is named",
"example or regression evidence is attached",
"at least one vague no-evidence rewrite is rejected"
]
}

View File

@@ -0,0 +1,46 @@
{
"schema_version": "ecc.evaluator-rag.trace.v1",
"scenario_id": "skill-quality-evidence",
"run_id": "2026-05-12-skill-quality-evidence-prototype",
"read_only": true,
"events": [
{
"phase": "observation",
"summary": "A skill or guidance PR proposes updated instructions. The evaluator records the changed surface and stays read-only; it does not edit skills, package manifests, catalogs, or publication state.",
"evidence": [
"docs/SKILL-DEVELOPMENT-GUIDE.md",
"scripts/ci/validate-skills.js"
]
},
{
"phase": "retrieval",
"summary": "Retrieved the skill quality contract, existing focused skill examples, observation-backed amendment helpers, and validation commands for skill structure and regression evidence.",
"evidence": [
"node scripts/ci/validate-skills.js",
"node tests/lib/skill-improvement.test.js",
"node tests/lib/skill-evolution.test.js",
"npm run catalog:check"
]
},
{
"phase": "proposal",
"summary": "Generated two candidate playbooks: evidence-backed skill amendment, and broad rewrite with no examples or validation.",
"candidate_ids": [
"evidence-backed-skill-amendment",
"vague-skill-rewrite"
]
},
{
"phase": "verification",
"summary": "Accepted the evidence-backed amendment because it names observed failure evidence, examples, and validation commands. Rejected the vague rewrite because it lacks a reference set and testable examples.",
"evidence": [
"examples/evaluator-rag-prototype/skill-quality-evidence/verifier-result.json"
]
},
{
"phase": "promotion",
"summary": "Promoted only the read-only skill-quality evidence playbook. Future skill edits must move through maintainer PRs with source attribution, validation, and rollback notes.",
"promoted_candidate_id": "evidence-backed-skill-amendment"
}
]
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.verifier.v1",
"scenario_id": "skill-quality-evidence",
"run_id": "2026-05-12-skill-quality-evidence-prototype",
"read_only": true,
"candidates": [
{
"candidate_id": "evidence-backed-skill-amendment",
"decision": "accepted",
"score": 0.91,
"reasons": [
"retrieves the skill development guide and existing focused skill examples",
"records observed failure, user feedback, or reference-set gap before proposing an amendment",
"names validation commands for skill structure, examples, catalog consistency, and regression behavior",
"keeps package, plugin, catalog, and publication actions out of the evaluator run",
"includes rollback guidance for reverting the future maintainer PR"
],
"rollback": "Revert the future skill-amendment PR and restore the prior SKILL.md content; no installed user skill or publication surface changes in this read-only playbook."
},
{
"candidate_id": "vague-skill-rewrite",
"decision": "rejected",
"score": 0.14,
"reasons": [
"does not name observed failure evidence or user feedback",
"rewrites broad skill guidance without focused scope",
"does not include working examples or a reference set",
"does not name a regression command",
"risks changing catalog or publication state from evaluator output"
],
"rollback": "Do not promote this rewrite; restart from observed skill-run evidence, example validation, and a focused maintainer PR."
}
],
"promoted_candidate_id": "evidence-backed-skill-amendment"
}

View File

@@ -0,0 +1,46 @@
{
"schema_version": "ecc.evaluator-rag.trace.v1",
"scenario_id": "stale-pr-salvage-maintainer-branch",
"run_id": "2026-05-12-cleanup-salvage-prototype",
"read_only": true,
"events": [
{
"phase": "observation",
"summary": "Public PR, issue, and discussion queues are clear; release publication remains approval-gated; stale-salvage ledger has landed, skipped, superseded, and manual-review states.",
"evidence": [
"docs/ECC-2.0-GA-ROADMAP.md",
"docs/stale-pr-salvage-ledger.md"
]
},
{
"phase": "retrieval",
"summary": "Retrieved stale PR source mappings, existing maintainer salvage examples, legacy import rules, and manual-review localization tails.",
"evidence": [
"docs/stale-pr-salvage-ledger.md",
"docs/legacy-artifact-inventory.md",
"https://github.com/affaan-m/everything-claude-code/pull/1815",
"https://github.com/affaan-m/everything-claude-code/pull/1818"
]
},
{
"phase": "proposal",
"summary": "Generated two candidate playbooks: maintainer-owned salvage branch with attribution, and blind cherry-pick of stale translations.",
"candidate_ids": [
"maintainer-salvage-branch",
"blind-cherry-pick-translations"
]
},
{
"phase": "verification",
"summary": "Accepted the maintainer-owned salvage branch and rejected blind translation cherry-picking because it violates manual-review and attribution gates.",
"evidence": [
"examples/evaluator-rag-prototype/verifier-result.json"
]
},
{
"phase": "promotion",
"summary": "Promoted only the maintainer-owned salvage branch playbook as a reusable process. No repository, GitHub, release, billing, or plugin publication action is performed by this prototype.",
"promoted_candidate_id": "maintainer-salvage-branch"
}
]
}

View File

@@ -0,0 +1,35 @@
{
"schema_version": "ecc.evaluator-rag.verifier.v1",
"scenario_id": "stale-pr-salvage-maintainer-branch",
"run_id": "2026-05-12-cleanup-salvage-prototype",
"read_only": true,
"candidates": [
{
"candidate_id": "maintainer-salvage-branch",
"decision": "accepted",
"score": 0.94,
"reasons": [
"preserves source PR attribution",
"keeps work on a fresh maintainer-owned branch",
"updates the salvage ledger",
"names validation gates",
"does not perform release or publication actions"
],
"rollback": "Close the maintainer PR or revert its merge commit; source PR state remains unchanged."
},
{
"candidate_id": "blind-cherry-pick-translations",
"decision": "rejected",
"score": 0.21,
"reasons": [
"bulk localization requires translator/manual review",
"does not preserve enough source attribution",
"could import stale generated docs",
"does not name validation gates",
"risks bypassing current catalog and install architecture"
],
"rollback": "Do not create this branch; keep the localization tail in translator/manual-review state."
}
],
"promoted_candidate_id": "maintainer-salvage-branch"
}

View File

@@ -0,0 +1,117 @@
{
"schema_version": "ecc.hud-status.v1",
"generatedAt": "2026-05-12T00:00:00.000Z",
"context": {
"harness": "codex",
"model": "gpt-5",
"repo": "affaan-m/everything-claude-code",
"branch": "main",
"worktree": "/repo/everything-claude-code",
"sessionId": "session-active",
"contextWindow": {
"remainingPct": 62,
"pressure": "normal"
}
},
"toolCalls": {
"total": 47,
"pending": 0,
"stale": 0,
"lastTool": {
"name": "gh-pr-view",
"status": "success",
"finishedAt": "2026-05-12T00:00:00.000Z"
}
},
"activeAgents": [
{
"id": "worker-release-docs",
"state": "completed",
"branch": "codex/release-docs",
"worktree": "/tmp/ecc-release-docs",
"objective": "Update release readiness docs",
"handoffPath": "/tmp/ecc-release-docs/handoff.md"
}
],
"todos": {
"inProgress": "Verify release publication matrix",
"counts": {
"pending": 2,
"inProgress": 1,
"completed": 6
}
},
"checks": {
"local": [
{
"command": "npm run observability:ready",
"status": "pass"
}
],
"remote": [
{
"name": "CI",
"status": "pass",
"url": "https://github.com/affaan-m/everything-claude-code/actions"
}
]
},
"cost": {
"sessionUsd": 1.23,
"budgetUsd": 10,
"trend": "within-budget"
},
"risk": {
"status": "attention",
"reasons": [
"release tag not published"
],
"dirtyWorktree": false,
"conflicts": 0,
"manualReviewRequired": true
},
"queueState": {
"github": {
"openPullRequests": 0,
"openIssues": 0,
"openDiscussions": 0
},
"mergeQueue": [],
"conflictQueue": [],
"staleSalvageQueue": [
{
"sourcePullRequest": 1310,
"status": "landed"
}
]
},
"sessionControls": {
"supported": [
"create",
"resume",
"status",
"stop",
"diff",
"pr",
"mergeQueue",
"conflictQueue"
],
"blocked": []
},
"sync": {
"Linear": {
"project": "ECC 2.0 GA",
"health": "atRisk",
"issueCapacityBlocked": true,
"latestStatusUpdateId": "status-update-id"
},
"GitHub": {
"repo": "affaan-m/everything-claude-code",
"latestPullRequest": 1820
},
"handoff": {
"path": "~/.cluster-swarm/handoffs/ecc-update.md",
"written": true
}
}
}

View File

@@ -137,8 +137,10 @@
"skills/django-verification",
"skills/dotnet-patterns",
"skills/fastapi-patterns",
"skills/frontend-design-direction",
"skills/frontend-patterns",
"skills/frontend-slides",
"skills/make-interfaces-feel-better",
"skills/motion-ui",
"skills/golang-patterns",
"skills/golang-testing",
@@ -236,6 +238,7 @@
"skills/iterative-retrieval",
"skills/plankton-code-quality",
"skills/production-audit",
"skills/skill-scout",
"skills/skill-stocktake",
"skills/strategic-compact",
"skills/tdd-workflow",
@@ -369,6 +372,7 @@
"skills/automation-audit-ops",
"skills/api-connector-builder",
"skills/connections-optimizer",
"skills/cost-tracking",
"skills/customer-billing-ops",
"skills/dashboard-builder",
"skills/ecc-tools-cost-audit",

6
package-lock.json generated
View File

@@ -1044,9 +1044,9 @@
"license": "MIT"
},
"node_modules/fast-uri": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz",
"integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==",
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz",
"integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==",
"funding": [
{
"type": "github",

View File

@@ -124,6 +124,7 @@
"skills/continuous-learning/",
"skills/continuous-learning-v2/",
"skills/cost-aware-llm-pipeline/",
"skills/cost-tracking/",
"skills/council/",
"skills/cpp-coding-standards/",
"skills/cpp-testing/",
@@ -158,6 +159,7 @@
"skills/fastapi-patterns/",
"skills/finance-billing-ops/",
"skills/foundation-models-on-device/",
"skills/frontend-design-direction/",
"skills/frontend-patterns/",
"skills/frontend-slides/",
"skills/fsharp-testing/",
@@ -194,6 +196,7 @@
"skills/logistics-exception-management/",
"skills/manim-video/",
"skills/market-research/",
"skills/make-interfaces-feel-better/",
"skills/mcp-server-patterns/",
"skills/messages-ops/",
"skills/mle-workflow/",
@@ -241,6 +244,7 @@
"skills/security-review/",
"skills/security-scan/",
"skills/seo/",
"skills/skill-scout/",
"skills/skill-stocktake/",
"skills/social-graph-ranker/",
"skills/springboot-patterns/",
@@ -266,7 +270,12 @@
"skills/windows-desktop-e2e/",
"skills/workspace-surface-audit/",
"skills/x-api/",
"the-security-guide.md"
"the-security-guide.md",
"!**/__pycache__/**",
"!**/*.pyc",
"!**/*.pyo",
"!**/*.pyd",
"!**/.pytest_cache/**"
],
"bin": {
"ecc": "scripts/ecc.js",

View File

@@ -103,6 +103,13 @@ function includesAll(text, needles) {
return needles.every(needle => text.includes(needle));
}
function hasObjectKeys(value, keys) {
return value
&& typeof value === 'object'
&& !Array.isArray(value)
&& keys.every(key => Object.prototype.hasOwnProperty.call(value, key));
}
function buildChecks(rootDir) {
const packageJsonText = readText(rootDir, 'package.json');
const packageJson = safeParseJson(packageJsonText) || {};
@@ -116,6 +123,8 @@ function buildChecks(rootDir) {
const sessionStoreRust = readText(rootDir, 'ecc2/src/session/store.rs');
const sessionManagerRust = readText(rootDir, 'ecc2/src/session/manager.rs');
const readinessDoc = readText(rootDir, 'docs/architecture/observability-readiness.md');
const hudStatusContract = readText(rootDir, 'docs/architecture/hud-status-session-control.md');
const hudStatusFixture = safeParseJson(readText(rootDir, 'examples/hud-status-contract.json')) || {};
const quickstart = readText(rootDir, 'docs/releases/2.0.0-rc.1/quickstart.md');
const releaseNotes = readText(rootDir, 'docs/releases/2.0.0-rc.1/release-notes.md');
@@ -130,6 +139,50 @@ function buildChecks(rootDir) {
&& includesAll(loopStatus, ['--json', '--watch', '--write-dir']),
fix: 'Restore loop-status JSON/watch/write-dir support.'
},
{
id: 'hud-status-control-contract',
category: 'Live Status',
points: 2,
path: 'docs/architecture/hud-status-session-control.md',
description: 'HUD/status and session-control surfaces have a portable JSON contract',
pass: fileExists(rootDir, 'docs/architecture/hud-status-session-control.md')
&& fileExists(rootDir, 'examples/hud-status-contract.json')
&& includesAll(hudStatusContract, [
'context',
'toolCalls',
'activeAgents',
'todos',
'checks',
'cost',
'risk',
'queueState',
'create',
'resume',
'status',
'stop',
'diff',
'pr',
'mergeQueue',
'conflictQueue',
'Linear',
'GitHub',
'handoff'
])
&& hudStatusFixture.schema_version === 'ecc.hud-status.v1'
&& hasObjectKeys(hudStatusFixture, [
'context',
'toolCalls',
'activeAgents',
'todos',
'checks',
'cost',
'risk',
'queueState',
'sessionControls',
'sync'
]),
fix: 'Add the HUD/status session-control contract doc and example JSON fixture.'
},
{
id: 'session-inspect-adapter-registry',
category: 'Session Trace',

View File

@@ -0,0 +1,147 @@
---
name: cost-tracking
description: Track and report Claude Code token usage, spending, and budgets from a local cost-tracking database. Use when the user asks about costs, spending, usage, tokens, budgets, or cost breakdowns by project, tool, session, or date.
origin: community
---
# Cost Tracking
Use this skill to analyze Claude Code cost and usage history from a local SQLite
database. It is intended for users who already have a cost-tracking hook or
plugin writing usage rows to `~/.claude-cost-tracker/usage.db`.
Source: salvaged from stale community PR #1304 by `MayurBhavsar`.
## When to Use
- The user asks "how much have I spent?", "what did this session cost?", or
"what is my token usage?"
- The user mentions budgets, spending limits, overruns, or cost controls.
- The user wants a cost breakdown by project, tool, session, model, or date.
- The user wants to compare today against yesterday or inspect a recent trend.
- The user asks for a CSV export of recent usage records.
## How It Works
First verify prerequisites:
```bash
command -v sqlite3 >/dev/null && echo "sqlite3 available" || echo "sqlite3 missing"
test -f ~/.claude-cost-tracker/usage.db && echo "Database found" || echo "Database not found"
```
If the database is missing, do not fabricate usage data. Tell the user that cost
tracking is not configured and suggest installing or enabling a trusted local
cost-tracking hook/plugin.
The expected `usage` table usually contains one row per tool call or model
interaction. Column names vary by tracker, but the examples below assume:
| Column | Meaning |
| --- | --- |
| `timestamp` | ISO timestamp for the usage event |
| `project` | Project or repository name |
| `tool_name` | Tool or event name |
| `input_tokens` | Input token count, when recorded |
| `output_tokens` | Output token count, when recorded |
| `cost_usd` | Precomputed cost in USD |
| `session_id` | Claude Code session identifier |
| `model` | Model used for the event |
Prefer `cost_usd` over hand-calculating pricing. Model prices and cache pricing
change over time, and the tracker should be the source of truth for how each row
was priced.
## Examples
### Quick Summary
```bash
sqlite3 ~/.claude-cost-tracker/usage.db "
SELECT
'Today: $' || ROUND(COALESCE(SUM(CASE WHEN date(timestamp) = date('now') THEN cost_usd END), 0), 4) ||
' | Total: $' || ROUND(COALESCE(SUM(cost_usd), 0), 4) ||
' | Calls: ' || COUNT(*) ||
' | Sessions: ' || COUNT(DISTINCT session_id)
FROM usage;
"
```
### Cost By Project
```bash
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
SELECT project, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
FROM usage
GROUP BY project
ORDER BY cost DESC;
"
```
### Cost By Tool
```bash
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
SELECT tool_name, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
FROM usage
GROUP BY tool_name
ORDER BY cost DESC;
"
```
### Last Seven Days
```bash
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
SELECT date(timestamp) AS date, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
FROM usage
GROUP BY date(timestamp)
ORDER BY date DESC
LIMIT 7;
"
```
### Session Drilldown
```bash
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
SELECT session_id,
MIN(timestamp) AS started,
MAX(timestamp) AS ended,
ROUND(SUM(cost_usd), 4) AS cost,
COUNT(*) AS calls
FROM usage
GROUP BY session_id
ORDER BY started DESC
LIMIT 10;
"
```
## Reporting Guidance
When presenting cost data, include:
1. Today's spend and yesterday comparison.
2. Total spend across the tracked database.
3. Top projects ranked by cost.
4. Top tools ranked by cost.
5. Session count and average cost per session when enough data exists.
For small amounts, format currency with four decimal places. For larger amounts,
two decimals are enough.
## Anti-Patterns
- Do not estimate costs from raw token counts when `cost_usd` is present.
- Do not assume the database exists without checking.
- Do not run unbounded `SELECT *` exports on large databases.
- Do not hard-code current model pricing in user-facing answers.
- Do not recommend installing unreviewed hooks or plugins that execute arbitrary
code.
## Related
- `/cost-report` - Command-form report using the same database.
- `cost-aware-llm-pipeline` - Model-routing and budget-design patterns.
- `token-budget-advisor` - Context and token-budget planning.
- `strategic-compact` - Context compaction to reduce repeated token spend.

View File

@@ -0,0 +1,457 @@
---
name: django-celery
description: Django + Celery async task patterns — configuration, task design, beat scheduling, retries, canvas workflows, monitoring, and testing. Use when adding background jobs, scheduled tasks, or async processing to a Django app.
origin: ECC
---
# Django + Celery Async Task Patterns
Production-grade patterns for background task processing in Django using Celery with Redis or RabbitMQ.
## When to Activate
- Adding background jobs or async processing to a Django app
- Implementing periodic/scheduled tasks
- Offloading slow operations (email, PDF generation, API calls) from request cycle
- Setting up Celery Beat for cron-like scheduling
- Debugging task failures, retries, or queue backlogs
- Writing tests for Celery tasks
## Project Setup
### Installation
```bash
pip install celery[redis] django-celery-results django-celery-beat
```
### `celery.py` — App Entrypoint
```python
# config/celery.py
import os
from celery import Celery
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.development')
app = Celery('myproject')
app.config_from_object('django.conf:settings', namespace='CELERY')
app.autodiscover_tasks() # Discovers tasks.py in each INSTALLED_APP
@app.task(bind=True, ignore_result=True)
def debug_task(self):
print(f'Request: {self.request!r}')
```
```python
# config/__init__.py
from .celery import app as celery_app
__all__ = ('celery_app',)
```
### Django Settings
```python
# config/settings/base.py
# Broker (Redis recommended for production)
CELERY_BROKER_URL = env('CELERY_BROKER_URL', default='redis://localhost:6379/0')
CELERY_RESULT_BACKEND = env('CELERY_RESULT_BACKEND', default='django-db')
# Serialization
CELERY_ACCEPT_CONTENT = ['json']
CELERY_TASK_SERIALIZER = 'json'
CELERY_RESULT_SERIALIZER = 'json'
# Task behavior
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT = 30 * 60 # Hard limit: 30 min
CELERY_TASK_SOFT_TIME_LIMIT = 25 * 60 # Soft limit: sends SoftTimeLimitExceeded
CELERY_WORKER_PREFETCH_MULTIPLIER = 1 # Prevent worker hoarding long tasks
CELERY_TASK_ACKS_LATE = True # Re-queue on worker crash
# Result persistence
CELERY_RESULT_EXPIRES = 60 * 60 * 24 # Keep results 24 hours
# Beat scheduler (for periodic tasks)
CELERY_BEAT_SCHEDULER = 'django_celery_beat.schedulers:DatabaseScheduler'
# Installed apps
INSTALLED_APPS += [
'django_celery_results',
'django_celery_beat',
]
```
### Running Workers
```bash
# Start worker (development)
celery -A config worker --loglevel=info
# Start beat scheduler (periodic tasks)
celery -A config beat --loglevel=info --scheduler django_celery_beat.schedulers:DatabaseScheduler
# Combined worker + beat (dev only, never production)
celery -A config worker --beat --loglevel=info
# Production: multiple workers with concurrency
celery -A config worker --loglevel=warning --concurrency=4 -Q default,high_priority
```
## Task Design Patterns
### Basic Task
```python
# apps/notifications/tasks.py
from celery import shared_task
import logging
logger = logging.getLogger(__name__)
@shared_task(name='notifications.send_welcome_email')
def send_welcome_email(user_id: int) -> None:
"""Send welcome email to newly registered user."""
from apps.users.models import User
from apps.notifications.services import EmailService
try:
user = User.objects.get(pk=user_id)
except User.DoesNotExist:
logger.warning('send_welcome_email: user %s not found', user_id)
return # Idempotent — do not raise, task already impossible to complete
EmailService.send_welcome(user)
logger.info('Welcome email sent to user %s', user_id)
```
### Retryable Task
```python
@shared_task(
bind=True,
name='integrations.sync_to_crm',
max_retries=5,
default_retry_delay=60, # seconds before first retry
autoretry_for=(ConnectionError, TimeoutError),
retry_backoff=True, # exponential backoff
retry_backoff_max=600, # cap at 10 minutes
retry_jitter=True, # randomise to avoid thundering herd
)
def sync_contact_to_crm(self, contact_id: int) -> dict:
"""Sync contact to external CRM with retry on transient failures."""
from apps.crm.services import CRMClient
try:
result = CRMClient().sync(contact_id)
return result
except CRMClient.RateLimitError as exc:
# Specific retry delay from response header
raise self.retry(exc=exc, countdown=int(exc.retry_after))
```
### Idempotent Task Pattern
Design tasks so they can safely run multiple times with the same inputs:
```python
@shared_task(name='orders.mark_shipped')
def mark_order_shipped(order_id: int, tracking_number: str) -> None:
"""Mark order as shipped — safe to run multiple times."""
from apps.orders.models import Order
updated = Order.objects.filter(
pk=order_id,
status=Order.Status.PROCESSING, # Guard: only update if not already shipped
).update(
status=Order.Status.SHIPPED,
tracking_number=tracking_number,
)
if not updated:
logger.info('mark_order_shipped: order %s already shipped or not found', order_id)
```
### Task with Soft Time Limit
```python
from celery.exceptions import SoftTimeLimitExceeded
@shared_task(
bind=True,
name='reports.generate_pdf',
soft_time_limit=120,
time_limit=150,
)
def generate_pdf_report(self, report_id: int) -> str:
"""Generate PDF report with graceful timeout handling."""
from apps.reports.services import PDFGenerator
try:
path = PDFGenerator.build(report_id)
return path
except SoftTimeLimitExceeded:
# Clean up partial files before hard kill
PDFGenerator.cleanup(report_id)
raise
```
## Calling Tasks
```python
from datetime import timedelta
from django.utils import timezone
# Fire and forget (async)
send_welcome_email.delay(user.pk)
# Schedule in the future
send_reminder.apply_async(args=[user.pk], countdown=3600) # 1 hour from now
send_reminder.apply_async(args=[user.pk], eta=timezone.now() + timedelta(days=1))
# Apply with queue routing
sync_contact_to_crm.apply_async(args=[contact.pk], queue='high_priority')
# Run synchronously (tests / debugging only)
result = generate_pdf_report.apply(args=[report.pk])
```
## Beat Scheduling (Periodic Tasks)
### Code-Defined Schedule
```python
# config/settings/base.py
from celery.schedules import crontab
CELERY_BEAT_SCHEDULE = {
'cleanup-expired-sessions': {
'task': 'users.cleanup_expired_sessions',
'schedule': crontab(hour=2, minute=0), # 2am daily
},
'sync-inventory': {
'task': 'products.sync_inventory',
'schedule': 60.0, # every 60 seconds
},
'weekly-digest': {
'task': 'notifications.send_weekly_digest',
'schedule': crontab(day_of_week='monday', hour=8, minute=0),
},
}
```
### Database-Defined Schedule (via django-celery-beat)
```python
# Manage periodic tasks from Django admin or code
from django_celery_beat.models import PeriodicTask, CrontabSchedule
import json
schedule, _ = CrontabSchedule.objects.get_or_create(
hour='*/6', minute='0',
timezone='UTC',
)
PeriodicTask.objects.update_or_create(
name='Sync inventory every 6 hours',
defaults={
'crontab': schedule,
'task': 'products.sync_inventory',
'args': json.dumps([]),
'enabled': True,
}
)
```
## Canvas: Chaining and Grouping Tasks
```python
from celery import chain, group, chord
# Chain: run tasks sequentially, passing results
pipeline = chain(
fetch_data.s(source_id),
transform_data.s(), # receives fetch_data result as first arg
load_to_warehouse.s(),
)
pipeline.delay()
# Group: run tasks in parallel
parallel = group(
send_welcome_email.s(user_id)
for user_id in new_user_ids
)
parallel.delay()
# Chord: parallel tasks + callback when all complete
result = chord(
group(process_chunk.s(chunk) for chunk in data_chunks),
aggregate_results.s(), # called with list of chunk results
)
result.delay()
```
## Error Handling and Dead Letter Queue
```python
# apps/core/tasks.py
from celery.signals import task_failure
@task_failure.connect
def on_task_failure(sender, task_id, exception, args, kwargs, traceback, einfo, **kw):
"""Log all task failures to Sentry / alerting."""
import sentry_sdk
with sentry_sdk.new_scope() as scope:
scope.set_context('celery', {
'task': sender.name,
'task_id': task_id,
'args': args,
'kwargs': kwargs,
})
sentry_sdk.capture_exception(exception)
```
```python
# Route failed tasks to dead-letter queue after max retries
@shared_task(
bind=True,
max_retries=3,
name='payments.charge_card',
)
def charge_card(self, order_id: int) -> None:
from apps.payments.models import Order, FailedCharge
try:
_do_charge(order_id)
except Exception as exc:
if self.request.retries >= self.max_retries:
# Persist to dead-letter table for manual review
FailedCharge.objects.create(
order_id=order_id,
error=str(exc),
task_id=self.request.id,
)
return # Don't raise — task is permanently failed
raise self.retry(exc=exc)
```
## Testing Celery Tasks
### Unit Testing (No Broker)
```python
# tests/test_tasks.py
import pytest
from unittest.mock import patch, MagicMock
from apps.notifications.tasks import send_welcome_email
class TestSendWelcomeEmail:
@pytest.mark.django_db
def test_sends_email_to_existing_user(self, user):
with patch('apps.notifications.services.EmailService') as mock_email:
send_welcome_email(user.pk)
mock_email.send_welcome.assert_called_once_with(user)
@pytest.mark.django_db
def test_skips_missing_user_gracefully(self):
"""Should not raise when user is deleted between enqueue and execute."""
send_welcome_email(99999) # Non-existent user — must not raise
```
### Integration Testing with CELERY_TASK_ALWAYS_EAGER
```python
# config/settings/test.py
CELERY_TASK_ALWAYS_EAGER = True # Run tasks synchronously in tests
CELERY_TASK_EAGER_PROPAGATES = True # Re-raise exceptions from tasks
# tests/test_integration.py
@pytest.mark.django_db
def test_registration_triggers_welcome_email(client):
with patch('apps.notifications.services.EmailService') as mock_email:
response = client.post('/api/users/', {
'email': 'new@example.com',
'password': 'strongpass123',
})
assert response.status_code == 201
mock_email.send_welcome.assert_called_once()
```
### Testing Retries
```python
@pytest.mark.django_db
def test_task_retries_on_connection_error():
with patch('apps.crm.services.CRMClient.sync') as mock_sync:
mock_sync.side_effect = ConnectionError('timeout')
with pytest.raises(ConnectionError):
sync_contact_to_crm.apply(args=[1], throw=True)
assert mock_sync.call_count == 1 # First attempt only when eager
```
## Monitoring
```bash
# Inspect active workers and queues
celery -A config inspect active
celery -A config inspect stats
celery -A config inspect reserved
# Check queue lengths (Redis)
redis-cli llen celery
# Flower: web-based real-time monitor
pip install flower
celery -A config flower --port=5555
```
## Anti-Patterns
```python
# BAD: Passing model instances — they may be stale by execution time
send_welcome_email.delay(user) # Never pass ORM objects
send_welcome_email.delay(user.pk) # Always pass PKs
# BAD: Calling tasks synchronously in production views
result = generate_report.apply() # Blocks the request thread
# BAD: Non-idempotent task without guards
@shared_task
def charge_and_fulfill(order_id):
order.charge() # May charge twice if task retries!
order.fulfill()
# GOOD: Idempotent with status guard
@shared_task
def charge_and_fulfill(order_id):
order = Order.objects.select_for_update().get(pk=order_id)
if order.status != Order.Status.PENDING:
return # Already processed
order.charge()
order.fulfill()
```
## Production Checklist
| Check | Setting |
|-------|---------|
| Worker restarts on crash | `supervisord` or `systemd` unit |
| `CELERY_TASK_ACKS_LATE = True` | Re-queue tasks on worker crash |
| `CELERY_WORKER_PREFETCH_MULTIPLIER = 1` | Fair distribution of long tasks |
| Separate queues per priority | `-Q default,high_priority,low_priority` |
| `CELERY_TASK_SOFT_TIME_LIMIT` set | Graceful timeout before hard kill |
| Sentry integration | Capture all `task_failure` signals |
| Flower or other monitor | Visibility into queue depths |
| Beat runs on single node only | Prevents duplicate scheduled task execution |
## Related Skills
- `django-patterns` — ORM, service layer, and project structure
- `django-tdd` — Testing Django models, views, and services
- `python-testing` — pytest configuration and fixtures

View File

@@ -0,0 +1,92 @@
---
name: frontend-design-direction
description: Set an ECC-specific frontend design direction for production UI work. Use when building or improving websites, dashboards, applications, components, landing pages, visual tools, or any web UI that needs stronger product-specific design judgment.
origin: community
---
# Frontend Design Direction
Use this skill when the work is not just making UI function, but making it feel
purposeful, polished, and appropriate to the product domain.
Source: salvaged from stale community PR #1659 by `linus707`.
Note: ECC intentionally does not rebundle the canonical Anthropic
`frontend-design` skill. Install that from `anthropics/skills` when you want the
official upstream skill. This skill is the ECC-specific design-direction salvage
of the useful local guidance from #1659.
## When to Use
- The user asks to build a web page, app, dashboard, artifact, component, or UI.
- The user asks to make an interface more polished, distinctive, beautiful, or
less generic.
- The implementation needs visual hierarchy, typography, color, motion, layout,
and interaction choices.
- The current UI works but reads as flat, generic, templated, or mismatched to
the audience.
## Design Direction
Before coding, choose a specific direction:
1. Purpose: what job does the interface do?
2. Audience: who repeats this workflow, and what do they need to scan first?
3. Tone: utilitarian, editorial, playful, industrial, refined, technical,
maximal, minimal, dense, calm, or another explicit direction.
4. Memorable detail: one design idea that makes the result feel intentional.
5. Constraints: framework, accessibility, performance, responsiveness, and
existing design system.
Match the direction to the domain. A SaaS operations tool should usually be
dense, quiet, and scannable. A portfolio, launch page, game, or editorial piece
can be more expressive. Do not force a landing-page composition onto a tool that
needs repeated daily use.
## Implementation Guidance
- Build the actual usable experience as the first screen unless the user
explicitly asks for marketing copy.
- Use existing project components, tokens, icon libraries, and routing patterns
before introducing a new visual system.
- Use real or generated visual assets when the interface depends on images,
products, places, people, gameplay, charts, or inspectable media.
- Prefer contextual typography and spacing over generic oversized hero text.
- Keep palettes multi-dimensional: avoid a UI dominated by one hue family.
- Use CSS variables or existing design tokens so the direction remains
coherent across states.
- Design responsive constraints explicitly: grids, aspect ratios, min/max
sizes, stable toolbars, and fixed-format controls should not shift when labels
or hover states appear.
- Use motion sparingly but deliberately. Prefer high-signal transitions that
clarify state over decorative animation.
- Verify text fit on mobile and desktop. Long labels must wrap or resize
cleanly rather than overflowing.
## Anti-Patterns
- Do not default to common generated patterns: purple gradients, decorative
blobs, oversized cards, vague hero copy, or stock-like atmospheric media.
- Do not add UI cards inside other cards.
- Do not use a single decorative style everywhere when the domain calls for
restraint.
- Do not hide the primary product, tool, object, or workflow behind generic
marketing sections.
- Do not add a new dependency for a design flourish unless it clearly pays for
itself.
- Do not describe the UI's features inside the UI when the controls can speak
for themselves.
## Review Checklist
- The first viewport immediately communicates the product, workflow, or object.
- The visual hierarchy supports scanning and repeated use.
- Typography fits the container and does not overlap adjacent content.
- Color choices have contrast and do not collapse into a one-note palette.
- Icons are used for familiar tool actions where available.
- Responsive layout has stable dimensions for boards, grids, toolbars,
controls, tiles, and counters.
- Assets render and carry the subject matter instead of acting as filler.
- Motion improves orientation and does not mask sluggishness.
- The result matches the repo's existing frontend conventions unless there is a
clear reason to depart.

View File

@@ -0,0 +1,151 @@
---
name: make-interfaces-feel-better
description: Apply concrete design-engineering details that make interfaces feel polished. Use when reviewing or improving UI spacing, typography, borders, shadows, motion, hit areas, icons, text wrapping, and interaction states.
origin: community
---
# Make Interfaces Feel Better
Use this skill for the small design-engineering details that compound into a
more polished interface.
Source: salvaged from stale community PR #1659 by `linus707`.
## When to Use
- The user says the UI feels off, flat, generic, cramped, jumpy, or unfinished.
- You are building controls, cards, lists, dashboards, navigation, forms, or
toolbars.
- A component needs hover, active, focus, enter, exit, loading, or empty states.
- A frontend review needs specific before/after recommendations.
## Core Principles
### Concentric Radius
For nearby nested rounded surfaces:
```text
outer radius = inner radius + padding
```
If padding is large, treat layers as separate surfaces instead of forcing the
math. The point is optical coherence, not formula worship.
### Optical Alignment
Geometric centering is not always visual centering. Icon buttons, play
triangles, arrows, stars, and asymmetric icons often need a small offset. Fix the
SVG when possible; otherwise adjust with a pixel-level margin or padding change.
### Shadows And Borders
Use borders for separation and focus rings. Use layered shadows when a card,
button, dropdown, or popover needs depth. Shadows should be transparent and
subtle enough to work across backgrounds.
### Text Wrapping
- Use `text-wrap: balance` on headings and short titles.
- Use `text-wrap: pretty` on short-to-medium body text, captions, descriptions,
and list items.
- Avoid both on long prose, code, and preformatted content.
- Use `font-variant-numeric: tabular-nums` for counters, timers, prices, tables,
and other updating numbers.
### Font Smoothing
On macOS, apply antialiased font smoothing at the root layout when the project
does not already do so:
```css
html {
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
```
### Image Outlines
Images often need a subtle inset outline so their edges do not blur into the
surface.
```css
img {
outline: 1px solid rgba(0, 0, 0, 0.1);
outline-offset: -1px;
}
@media (prefers-color-scheme: dark) {
img {
outline-color: rgba(255, 255, 255, 0.1);
}
}
```
Use neutral black or white alpha outlines. Do not tint image outlines with the
brand palette.
### Motion
Use CSS transitions for interactive state changes because they can retarget
when the user changes intent mid-motion. Reserve keyframes for staged
one-shot entrances or loading sequences.
Good motion defaults:
- Enter: combine opacity, small `translateY`, and optionally blur.
- Exit: shorter and quieter than enter, usually 150ms.
- Press: `scale(0.96)` for tactile buttons, with a way to disable it when the
movement distracts.
- Icon swaps: cross-fade with opacity, scale, and blur instead of instant
visibility toggles.
### Transition Scope
Never use `transition: all`. Specify the changed properties:
```css
.button {
transition-property: transform, background-color, box-shadow;
transition-duration: 150ms;
transition-timing-function: ease-out;
}
```
Use `will-change` only for first-frame stutter on compositor-friendly
properties such as `transform`, `opacity`, and `filter`. Never use
`will-change: all`.
### Hit Areas
Interactive controls should have at least a 40x40px hit area, ideally 44x44px
where the layout allows it. Expand with a pseudo-element when the visible icon
is smaller, but do not let expanded hit areas overlap.
## Review Output
When reviewing a UI polish pass, report concrete changes in before/after rows:
| Principle | Before | After |
| --- | --- | --- |
| Concentric radius | Same radius on parent and child | Parent radius accounts for padding |
| Tabular numbers | Counter shifts as digits change | Counter uses `tabular-nums` |
| Transition scope | `transition: all` | Explicit transition properties |
Include file paths and properties when they are not obvious from the snippets.
Omit principles that you checked but did not change.
## Checklist
- Nested rounded elements are optically coherent.
- Icons are visually centered.
- Buttons, cards, and popovers use borders or shadows for the right reason.
- Headings and short text avoid awkward wrapping.
- Dynamic numbers use tabular numerals.
- Images have neutral outlines where needed.
- Enter and exit animations are split, subtle, and interruptible where
appropriate.
- Buttons have tactile active states without exaggerated motion.
- `transition: all` and `will-change: all` are absent.
- Small controls still have usable hit areas.

140
skills/skill-scout/SKILL.md Normal file
View File

@@ -0,0 +1,140 @@
---
name: skill-scout
description: Search existing local, marketplace, GitHub, and web skill sources before creating a new skill. Use when the user wants to create, build, fork, or find a skill for a workflow.
origin: community
---
# Skill Scout
Use this skill before creating a new skill. The goal is to avoid duplicating
existing community or marketplace work, while still vetting anything external
before adoption.
Source: salvaged from stale community PR #1232 by `redminwang`.
## When to Use
- The user says "create a skill", "build a skill", "make a skill", or "new
skill".
- The user asks "is there a skill for X?" or "does a skill exist that does Y?"
- The user describes a workflow and you are about to suggest creating a new
skill.
- The user wants to fork or extend an existing skill.
If the user explicitly says to skip search or create from scratch, acknowledge
that and proceed with the requested creation workflow.
## How It Works
### Step 1 - Capture Intent
Extract:
- The task the skill should perform.
- The trigger conditions for using it.
- The domain, tools, frameworks, or data sources involved.
- Three to five search keywords plus useful synonyms.
### Step 2 - Search Local Sources
Search installed and marketplace skill names first. Local sources are preferred
because they are already part of the user's environment.
```bash
find ~/.claude/skills -maxdepth 2 -name SKILL.md 2>/dev/null | grep -iE "keyword|synonym"
find ~/.claude/plugins/marketplaces -path '*/skills/*/SKILL.md' 2>/dev/null | grep -iE "keyword|synonym"
```
Then search frontmatter descriptions:
```bash
grep -RilE "keyword|synonym" ~/.claude/skills ~/.claude/plugins/marketplaces 2>/dev/null
```
### Step 3 - Search Remote Sources
Use available GitHub and web search tools. Prefer concise queries:
```bash
gh search repos "claude code skill keyword" --limit 10 --sort stars
gh search code "name: keyword" --filename SKILL.md --limit 10
```
For web search, use at most three targeted queries such as:
```text
"claude code skill" keyword
"SKILL.md" keyword
"everything-claude-code" keyword
```
### Step 4 - Vet External Matches
Before recommending any external skill for adoption or forking:
- Read the `SKILL.md` frontmatter and instructions.
- Look for unexpected shell commands, file writes, network calls, credential
handling, or package installs.
- Check whether the repository appears maintained.
- Prefer copying into a fresh local branch and reviewing the diff over editing
marketplace originals.
### Step 5 - Rank Results
Rank candidates by:
1. Exact keyword match in the skill name.
2. Keyword or synonym match in description.
3. Local installed or marketplace source.
4. Maintained GitHub source with recent activity.
5. Web-only mention.
Cap the final list at 10 results.
### Step 6 - Present Decision Options
Give the user a short table:
| Option | Meaning |
| --- | --- |
| Use existing | Invoke or install a matching skill as-is. |
| Fork or extend | Copy the closest skill and modify it. |
| Create fresh | Build a new skill after confirming no close match exists. |
Only create a new skill after the user chooses that path or after the search
finds no close match.
## Examples
### Result Table
```markdown
| # | Skill | Source | Why it matches | Gap |
| --- | --- | --- | --- | --- |
| 1 | article-writing | Local ECC | Drafts articles and guides | Not focused on release notes |
| 2 | content-engine | Local ECC | Multi-format content workflow | Heavier than needed |
| 3 | blog-writer | GitHub | Blog writing skill with recent commits | Needs security review |
```
### User-Facing Summary
```markdown
I found two close local matches and one external candidate. The closest fit is
`article-writing`; it covers drafting and revision, but it does not include the
release-note checklist you asked for. I can either use it as-is, fork it into a
release-note variant, or create a fresh skill.
```
## Anti-Patterns
- Do not jump directly to new skill creation when a search is reasonable.
- Do not install external skills without reading them first.
- Do not present a long unranked list of weak matches.
- Do not treat web-only mentions as trusted sources.
- Do not edit installed marketplace originals in place.
## Related
- `search-first` - General search-before-building workflow.
- `skill-stocktake` - Audit installed skills for health, duplicates, and gaps.
- `agent-sort` - Categorize and organize existing agents and skills.

View File

@@ -0,0 +1,82 @@
#!/usr/bin/env node
'use strict';
const assert = require('assert');
const fs = require('fs');
const path = require('path');
const repoRoot = path.resolve(__dirname, '..', '..');
const reviewerPath = path.join(repoRoot, 'agents', 'code-reviewer.md');
const requiredHeadings = [
'## Confidence-Based Filtering',
'### Pre-Report Gate',
'### HIGH / CRITICAL Require Proof',
'### It Is Acceptable And Expected To Return Zero Findings',
'## Common False Positives - Skip These',
];
const requiredPatterns = [
/Can I cite the exact line/i,
/concrete failure mode/i,
/Have I read the surrounding context/i,
/Severity inflation/i,
/exact snippet and line number/i,
/specific failure scenario/i,
/demote to MEDIUM or drop/i,
/clean review is a valid review/i,
/Manufactured findings/i,
/Common False Positives/i,
/Consider adding error handling/i,
/Missing input validation/i,
/Magic number/i,
/Would a senior engineer on this\s+team actually change this in review/i,
/Do not withhold approval to appear rigorous/i,
];
let passed = 0;
let failed = 0;
function test(name, fn) {
try {
fn();
console.log(` PASS ${name}`);
passed++;
} catch (error) {
console.log(` FAIL ${name}`);
console.log(` Error: ${error.message}`);
failed++;
}
}
function readReviewer() {
return fs.readFileSync(reviewerPath, 'utf8');
}
console.log('\n=== Testing code-reviewer false-positive guardrails ===\n');
for (const heading of requiredHeadings) {
test(`code-reviewer.md contains heading: ${heading}`, () => {
const source = readReviewer();
assert.ok(source.includes(heading), `code-reviewer.md missing required heading "${heading}"`);
});
}
for (const pattern of requiredPatterns) {
test(`code-reviewer.md matches ${pattern}`, () => {
const source = readReviewer();
assert.ok(pattern.test(source), `code-reviewer.md missing required pattern ${pattern}`);
});
}
test('code-reviewer.md retains the >80% confidence threshold', () => {
const source = readReviewer();
assert.ok(/>\s*80%\s*confident/i.test(source), 'code-reviewer.md missing >80% confidence threshold');
});
if (failed > 0) {
console.log(`\nFailed: ${failed}`);
process.exit(1);
}
console.log(`\nPassed: ${passed}`);

View File

@@ -0,0 +1,417 @@
'use strict';
const assert = require('assert');
const fs = require('fs');
const path = require('path');
const repoRoot = path.resolve(__dirname, '..', '..');
const fixtureRoot = path.join(repoRoot, 'examples', 'evaluator-rag-prototype');
let passed = 0;
let failed = 0;
function test(name, fn) {
try {
fn();
console.log(`${name}`);
passed++;
} catch (error) {
console.log(`${name}`);
console.log(` Error: ${error.message}`);
failed++;
}
}
function read(relativePath) {
return fs.readFileSync(path.join(repoRoot, relativePath), 'utf8');
}
function readJson(fileName) {
return JSON.parse(fs.readFileSync(path.join(fixtureRoot, fileName), 'utf8'));
}
function readFixtureJson(relativePath) {
return JSON.parse(fs.readFileSync(path.join(fixtureRoot, relativePath), 'utf8'));
}
console.log('\n=== Testing evaluator RAG prototype ===\n');
test('architecture doc records the artifact contract and reference pressure', () => {
const source = read('docs/architecture/evaluator-rag-prototype.md');
for (const required of [
'Scenario spec',
'Trace',
'Report',
'Candidate playbook',
'Verifier result',
'Meta-Harness',
'Autocontext',
'Claude HUD',
'Hermes Agent',
'dmux, Orca, Superset, and Ghast',
'ECC Tools'
]) {
assert.ok(source.includes(required), `Missing doc requirement: ${required}`);
}
});
test('fixtures use one scenario id and declare read-only behavior', () => {
const scenario = readJson('scenario.json');
const trace = readJson('trace.json');
const report = readJson('report.json');
const verifier = readJson('verifier-result.json');
assert.strictEqual(scenario.schema_version, 'ecc.evaluator-rag.scenario.v1');
assert.strictEqual(trace.schema_version, 'ecc.evaluator-rag.trace.v1');
assert.strictEqual(report.schema_version, 'ecc.evaluator-rag.report.v1');
assert.strictEqual(verifier.schema_version, 'ecc.evaluator-rag.verifier.v1');
for (const artifact of [trace, report, verifier]) {
assert.strictEqual(artifact.scenario_id, scenario.scenario_id);
assert.strictEqual(artifact.read_only, true);
}
});
test('trace covers the full self-improving harness loop', () => {
const trace = readJson('trace.json');
const phases = trace.events.map(event => event.phase);
for (const phase of ['observation', 'retrieval', 'proposal', 'verification', 'promotion']) {
assert.ok(phases.includes(phase), `Missing trace phase ${phase}`);
}
assert.ok(trace.events.some(event => event.promoted_candidate_id === 'maintainer-salvage-branch'));
});
test('scenario blocks unsafe write actions and release actions', () => {
const scenario = readJson('scenario.json');
const forbidden = scenario.forbidden_actions.join('\n');
for (const blocked of [
'closing, reopening, or commenting on PRs',
'merging PRs',
'creating release tags',
'publishing packages or plugins',
'copying private paths, secrets, or raw personal context',
'blindly cherry-picking bulk localization'
]) {
assert.ok(forbidden.includes(blocked), `Missing forbidden action: ${blocked}`);
}
});
test('verifier accepts maintainer salvage and rejects blind translation imports', () => {
const verifier = readJson('verifier-result.json');
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'maintainer-salvage-branch');
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'blind-cherry-pick-translations');
assert.ok(accepted, 'Missing accepted maintainer salvage candidate');
assert.ok(rejected, 'Missing rejected blind cherry-pick candidate');
assert.strictEqual(accepted.decision, 'accepted');
assert.strictEqual(rejected.decision, 'rejected');
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
assert.ok(accepted.score > rejected.score);
assert.ok(rejected.reasons.join('\n').includes('translator/manual review'));
});
test('candidate playbook preserves stale-salvage operating rules', () => {
const playbook = read('examples/evaluator-rag-prototype/candidate-playbook.md');
for (const required of [
'docs/stale-pr-salvage-ledger.md',
'source PR',
'maintainer-owned branch',
'Preserve attribution',
'translator/manual review',
'private operator context',
'git diff --check'
]) {
assert.ok(playbook.includes(required), `Missing playbook rule: ${required}`);
}
});
test('roadmap points to the evaluator RAG prototype and keeps hosted integration open', () => {
const roadmap = read('docs/ECC-2.0-GA-ROADMAP.md');
assert.ok(roadmap.includes('docs/architecture/evaluator-rag-prototype.md'));
assert.ok(roadmap.includes('examples/evaluator-rag-prototype/'));
assert.ok(roadmap.includes('Local corpus complete; hosted integration remains future'));
});
test('billing readiness scenario rejects launch copy overclaims', () => {
const scenario = readFixtureJson('billing-marketplace-readiness/scenario.json');
const trace = readFixtureJson('billing-marketplace-readiness/trace.json');
const report = readFixtureJson('billing-marketplace-readiness/report.json');
const verifier = readFixtureJson('billing-marketplace-readiness/verifier-result.json');
const playbook = read('examples/evaluator-rag-prototype/billing-marketplace-readiness/candidate-playbook.md');
assert.strictEqual(scenario.scenario_id, 'billing-marketplace-readiness');
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
assert.strictEqual(report.scenario_id, scenario.scenario_id);
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
assert.strictEqual(trace.read_only, true);
assert.strictEqual(report.read_only, true);
assert.strictEqual(verifier.read_only, true);
for (const blocked of [
'creating or editing GitHub Marketplace listings',
'changing plan limits, subscriptions, seats, or entitlements',
'posting announcement copy',
'claiming live billing readiness from dry-run evidence alone'
]) {
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing billing forbidden action: ${blocked}`);
}
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'evidence-backed-billing-check');
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'announcement-first-billing-copy');
assert.ok(accepted, 'Missing accepted billing evidence candidate');
assert.ok(rejected, 'Missing rejected announcement-overclaim candidate');
assert.strictEqual(accepted.decision, 'accepted');
assert.strictEqual(rejected.decision, 'rejected');
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
assert.ok(rejected.reasons.join('\n').includes('roadmap acceptance criteria'));
assert.ok(playbook.includes('remove-before-publication'));
assert.ok(playbook.includes('https://github.com/marketplace/ecc-tools'));
});
test('ci failure diagnosis scenario rejects rerun-only fixes', () => {
const scenario = readFixtureJson('ci-failure-diagnosis/scenario.json');
const trace = readFixtureJson('ci-failure-diagnosis/trace.json');
const report = readFixtureJson('ci-failure-diagnosis/report.json');
const verifier = readFixtureJson('ci-failure-diagnosis/verifier-result.json');
const playbook = read('examples/evaluator-rag-prototype/ci-failure-diagnosis/candidate-playbook.md');
assert.strictEqual(scenario.scenario_id, 'ci-failure-diagnosis');
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
assert.strictEqual(report.scenario_id, scenario.scenario_id);
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
assert.strictEqual(trace.read_only, true);
assert.strictEqual(report.read_only, true);
assert.strictEqual(verifier.read_only, true);
for (const blocked of [
'rerunning CI until it passes without diagnosing the failure',
'pushing speculative fixes without a captured failing log excerpt',
'weakening or deleting tests to silence a failure',
'merging or publishing while required checks are red'
]) {
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing CI forbidden action: ${blocked}`);
}
for (const required of [
'failing job and step are named',
'captured log excerpt is linked or summarized',
'changed-file context is compared to the failing step',
'local reproduction or regression command is named'
]) {
assert.ok(scenario.acceptance_gates.includes(required), `Missing CI acceptance gate: ${required}`);
}
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'log-backed-minimal-fix');
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'rerun-only-green-wait');
assert.ok(accepted, 'Missing accepted log-backed CI candidate');
assert.ok(rejected, 'Missing rejected rerun-only CI candidate');
assert.strictEqual(accepted.decision, 'accepted');
assert.strictEqual(rejected.decision, 'rejected');
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
assert.ok(rejected.reasons.join('\n').includes('failing log excerpt'));
assert.ok(playbook.includes('gh run view <run-id> --log-failed'));
assert.ok(playbook.includes('Full required GitHub Actions matrix before merge'));
});
test('harness config quality scenario rejects unsupported parity claims', () => {
const scenario = readFixtureJson('harness-config-quality/scenario.json');
const trace = readFixtureJson('harness-config-quality/trace.json');
const report = readFixtureJson('harness-config-quality/report.json');
const verifier = readFixtureJson('harness-config-quality/verifier-result.json');
const playbook = read('examples/evaluator-rag-prototype/harness-config-quality/candidate-playbook.md');
assert.strictEqual(scenario.scenario_id, 'harness-config-quality');
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
assert.strictEqual(report.scenario_id, scenario.scenario_id);
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
assert.strictEqual(trace.read_only, true);
assert.strictEqual(report.read_only, true);
assert.strictEqual(verifier.read_only, true);
for (const blocked of [
'claiming native support for instruction-backed or reference-only harnesses',
'copying Claude hook semantics into Codex, Gemini, Zed, or OpenCode without adapter evidence',
'silently overwriting existing user MCP, hook, plugin, command, or rule config',
'publishing packages or plugins from this evaluator run'
]) {
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing harness forbidden action: ${blocked}`);
}
for (const required of [
'adapter state is retrieved from the matrix',
'install or onramp path is named',
'verification command is named',
'config-preservation behavior is explicit'
]) {
assert.ok(scenario.acceptance_gates.includes(required), `Missing harness acceptance gate: ${required}`);
}
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'adapter-matrix-backed-drift-check');
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'unsupported-hook-parity-claim');
assert.ok(accepted, 'Missing accepted adapter-matrix candidate');
assert.ok(rejected, 'Missing rejected unsupported parity candidate');
assert.strictEqual(accepted.decision, 'accepted');
assert.strictEqual(rejected.decision, 'rejected');
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
assert.ok(rejected.reasons.join('\n').includes('native support'));
assert.ok(playbook.includes('npm run harness:adapters -- --check'));
assert.ok(playbook.includes('node tests/docs/mcp-management-docs.test.js'));
});
test('AgentShield policy exception scenario rejects blanket suppression', () => {
const scenario = readFixtureJson('agentshield-policy-exception/scenario.json');
const trace = readFixtureJson('agentshield-policy-exception/trace.json');
const report = readFixtureJson('agentshield-policy-exception/report.json');
const verifier = readFixtureJson('agentshield-policy-exception/verifier-result.json');
const playbook = read('examples/evaluator-rag-prototype/agentshield-policy-exception/candidate-playbook.md');
assert.strictEqual(scenario.scenario_id, 'agentshield-policy-exception');
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
assert.strictEqual(report.scenario_id, scenario.scenario_id);
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
assert.strictEqual(trace.read_only, true);
assert.strictEqual(report.read_only, true);
assert.strictEqual(verifier.read_only, true);
for (const blocked of [
'approving policy exceptions without SARIF or report evidence',
'treating expired exceptions as active',
'blanket-suppressing AgentShield policy packs or organization-policy gates',
'editing AgentShield code or policy files from this ECC evaluator run'
]) {
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing AgentShield forbidden action: ${blocked}`);
}
for (const required of [
'SARIF or report evidence is named',
'owner, ticket, scope, and expiry state are recorded',
'expired exceptions stay rejected or enforced',
'remediation versus time-boxed exception decision is explicit'
]) {
assert.ok(scenario.acceptance_gates.includes(required), `Missing AgentShield acceptance gate: ${required}`);
}
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'sarif-backed-timeboxed-exception-review');
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'blanket-policy-suppression');
assert.ok(accepted, 'Missing accepted AgentShield exception candidate');
assert.ok(rejected, 'Missing rejected blanket suppression candidate');
assert.strictEqual(accepted.decision, 'accepted');
assert.strictEqual(rejected.decision, 'rejected');
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
assert.ok(rejected.reasons.join('\n').includes('blanket-suppresses'));
assert.ok(playbook.includes('agentshield-policy/*'));
assert.ok(playbook.includes('owner, ticket, scope, expiry'));
assert.ok(playbook.includes('npx ecc-agentshield scan --format json'));
});
test('skill quality evidence scenario rejects vague rewrites', () => {
const scenario = readFixtureJson('skill-quality-evidence/scenario.json');
const trace = readFixtureJson('skill-quality-evidence/trace.json');
const report = readFixtureJson('skill-quality-evidence/report.json');
const verifier = readFixtureJson('skill-quality-evidence/verifier-result.json');
const playbook = read('examples/evaluator-rag-prototype/skill-quality-evidence/candidate-playbook.md');
assert.strictEqual(scenario.scenario_id, 'skill-quality-evidence');
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
assert.strictEqual(report.scenario_id, scenario.scenario_id);
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
assert.strictEqual(trace.read_only, true);
assert.strictEqual(report.read_only, true);
assert.strictEqual(verifier.read_only, true);
for (const blocked of [
'promoting a skill rewrite without examples, validation, or observed failure evidence',
'adding broad multi-domain skills that duplicate existing focused skills',
'copying private operator context, secrets, tokens, or personal paths into skills',
'claiming a skill-quality improvement without a reference set or regression command'
]) {
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing skill-quality forbidden action: ${blocked}`);
}
for (const required of [
'changed skill or guidance surface is named',
'observed failure, user feedback, or reference-set gap is recorded',
'validation command is named',
'example or regression evidence is attached'
]) {
assert.ok(scenario.acceptance_gates.includes(required), `Missing skill-quality acceptance gate: ${required}`);
}
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'evidence-backed-skill-amendment');
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'vague-skill-rewrite');
assert.ok(accepted, 'Missing accepted skill-quality candidate');
assert.ok(rejected, 'Missing rejected vague rewrite candidate');
assert.strictEqual(accepted.decision, 'accepted');
assert.strictEqual(rejected.decision, 'rejected');
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
assert.ok(rejected.reasons.join('\n').includes('does not include working examples'));
assert.ok(playbook.includes('docs/SKILL-DEVELOPMENT-GUIDE.md'));
assert.ok(playbook.includes('node scripts/ci/validate-skills.js'));
assert.ok(playbook.includes('observed skill-run failure'));
});
test('deep analyzer evidence scenario rejects no-corpus analyzer changes', () => {
const scenario = readFixtureJson('deep-analyzer-evidence/scenario.json');
const trace = readFixtureJson('deep-analyzer-evidence/trace.json');
const report = readFixtureJson('deep-analyzer-evidence/report.json');
const verifier = readFixtureJson('deep-analyzer-evidence/verifier-result.json');
const playbook = read('examples/evaluator-rag-prototype/deep-analyzer-evidence/candidate-playbook.md');
assert.strictEqual(scenario.scenario_id, 'deep-analyzer-evidence');
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
assert.strictEqual(report.scenario_id, scenario.scenario_id);
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
assert.strictEqual(trace.read_only, true);
assert.strictEqual(report.read_only, true);
assert.strictEqual(verifier.read_only, true);
for (const blocked of [
'promoting repository, commit, architecture, or deep-analysis changes without analyzer corpus evidence',
'suppressing the Deep Analyzer Evidence risk bucket without co-located corpus, snapshot, fixture, or benchmark evidence',
'changing analyzer thresholds or classifications without expected-output comparison',
'posting PR comments, check runs, or Linear sync updates from this read-only evaluator run'
]) {
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing deep-analyzer forbidden action: ${blocked}`);
}
for (const required of [
'changed analyzer surface is named',
'maintained corpus or reference-set path is included',
'expected analyzer outputs are compared',
'representative repository shape or commit history is described',
'regression command is named'
]) {
assert.ok(scenario.acceptance_gates.includes(required), `Missing deep-analyzer acceptance gate: ${required}`);
}
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'corpus-backed-analyzer-change');
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'threshold-only-analyzer-rewrite');
assert.ok(accepted, 'Missing accepted deep-analyzer candidate');
assert.ok(rejected, 'Missing rejected threshold-only analyzer candidate');
assert.strictEqual(accepted.decision, 'accepted');
assert.strictEqual(rejected.decision, 'rejected');
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
assert.ok(rejected.reasons.join('\n').includes('does not compare expected outputs'));
assert.ok(playbook.includes('../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts'));
assert.ok(playbook.includes('npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts'));
assert.ok(playbook.includes('Deep Analyzer Evidence'));
});
if (failed > 0) {
console.log(`\nFailed: ${failed}`);
process.exit(1);
}
console.log(`\nPassed: ${passed}`);

View File

@@ -46,11 +46,20 @@ test('stale PR salvage ledger preserves representative source attribution', () =
for (const pr of [
'#1309',
'#1232',
'#1304',
'#1322',
'#1326',
'#1310',
'#1325',
'#1413',
'#1414',
'#1478',
'#1493',
'#1528/#1529/#1547',
'#1603',
'#1658',
'#1659',
'#1674',
'#1687',
'#1705/#1780',
@@ -71,10 +80,13 @@ test('stale PR salvage ledger records skipped junk and superseded work', () => {
assert.ok(source.includes('too low-signal'));
});
test('stale PR salvage ledger keeps the zh-CN tail manual-review only', () => {
test('stale PR salvage ledger keeps localization tails manual-review only', () => {
const source = read('docs/stale-pr-salvage-ledger.md');
assert.ok(source.includes('Only the #1687 localization tail remains'));
assert.ok(source.includes('The remaining plausibly useful backlog is translation/localization work'));
assert.ok(source.includes('#1687 zh-CN localization tail'));
assert.ok(source.includes('#1609 Persian README translation'));
assert.ok(source.includes('#1563 zh-TW README sync'));
assert.ok(source.includes('translator/manual review'));
assert.ok(source.includes('Do not import stale top-level docs'));
});
@@ -88,10 +100,54 @@ test('legacy inventory and roadmap link to the durable salvage ledger', () => {
assert.ok(roadmap.includes('#1687 translator/manual'));
});
test('stale PR salvage ledger records the May 12 gap pass', () => {
const source = read('docs/stale-pr-salvage-ledger.md');
for (const pr of [
'#1310',
'#1325',
'#1360',
'#1414',
'#1415',
'#1478',
'#1438',
'#1504',
'#1508',
'#1563/#1564/#1565',
'#1567',
'#1570',
'#1584',
'#1589',
'#1594',
'#1597',
'#1602',
'#1603',
'#1604',
'#1609',
'#1613',
'#1631',
'#1648',
'#1658',
'#1693',
]) {
assert.ok(source.includes(pr), `Missing May 12 gap-pass PR ${pr}`);
}
assert.ok(source.includes('Django/Celery maintainer branch'));
assert.ok(source.includes('already preserved in #1770'));
assert.ok(source.includes('already preserved in #1769'));
assert.ok(source.includes('already preserved in #1766'));
assert.ok(source.includes('GateGuard subagent file-gate bypass'));
assert.ok(source.includes('HTTP MCP reachability handling'));
assert.ok(source.includes('current managed installer/profile flow'));
assert.ok(source.includes('false-positive proof gate'));
assert.ok(source.includes('session_id` from stdin JSON'));
assert.ok(source.includes('Already present as `skills/redis-patterns/`'));
});
if (failed > 0) {
console.log(`\nFailed: ${failed}`);
process.exit(1);
}
console.log(`\nPassed: ${passed}`);

View File

@@ -71,9 +71,16 @@ function buildExpectedPublishPaths(repoRoot) {
"agent.yaml",
"VERSION",
]
const exclusionPaths = [
"!**/__pycache__/**",
"!**/*.pyc",
"!**/*.pyo",
"!**/*.pyd",
"!**/.pytest_cache/**",
]
const combined = new Set(
[...modules.flatMap((module) => module.paths || []), ...extraPaths].map(normalizePublishPath)
[...modules.flatMap((module) => module.paths || []), ...extraPaths, ...exclusionPaths].map(normalizePublishPath)
)
return [...combined]
@@ -139,6 +146,17 @@ function main() {
`npm pack should not include ${excludedPath}`
)
}
for (const packagedPath of packagedPaths) {
assert.ok(
!packagedPath.includes("__pycache__/"),
`npm pack should not include Python bytecode cache path ${packagedPath}`
)
assert.ok(
!/\.py[cod]$/.test(packagedPath),
`npm pack should not include Python bytecode file ${packagedPath}`
)
}
}],
]

View File

@@ -62,6 +62,24 @@ function seedMinimalRepo(rootDir, overrides = {}) {
'ecc2/src/session/store.rs': 'insert_tool_log query_tool_logs',
'ecc2/src/session/manager.rs': 'sync_tool_activity_metrics tool-usage.jsonl',
'docs/architecture/observability-readiness.md': 'node scripts/observability-readiness.js --format json',
'docs/architecture/hud-status-session-control.md': [
'context toolCalls activeAgents todos checks cost risk queueState',
'create resume status stop diff pr mergeQueue conflictQueue',
'Linear GitHub handoff'
].join('\n'),
'examples/hud-status-contract.json': JSON.stringify({
schema_version: 'ecc.hud-status.v1',
context: {},
toolCalls: {},
activeAgents: [],
todos: {},
checks: {},
cost: {},
risk: {},
queueState: {},
sessionControls: {},
sync: {}
}, null, 2),
'docs/releases/2.0.0-rc.1/quickstart.md': 'observability-readiness.md',
'docs/releases/2.0.0-rc.1/release-notes.md': 'observability-readiness.md'
};
@@ -195,6 +213,23 @@ function runTests() {
}
})) passed++; else failed++;
if (test('missing HUD status contract fails without disturbing core tool checks', () => {
const projectRoot = createTempDir('observability-readiness-hud-fail-');
try {
seedMinimalRepo(projectRoot, {
'examples/hud-status-contract.json': null
});
const report = buildReport(projectRoot);
assert.strictEqual(report.ready, false);
assert.ok(report.checks.some(check => check.id === 'hud-status-control-contract' && !check.pass));
assert.ok(report.checks.some(check => check.id === 'loop-status-live-signal' && check.pass));
} finally {
cleanup(projectRoot);
}
})) passed++; else failed++;
console.log('\nResults:');
console.log(` Passed: ${passed}`);
console.log(` Failed: ${failed}`);