mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-06-10 10:13:49 +08:00
Compare commits
34 Commits
pr-1803-qu
...
ecc/8d7d05
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3fc22ae751 | ||
|
|
c229b74d41 | ||
|
|
be42989746 | ||
|
|
d2d8cda8b3 | ||
|
|
894ee03930 | ||
|
|
37c27a60fd | ||
|
|
337ced0828 | ||
|
|
b25d4770f5 | ||
|
|
6fbf58d590 | ||
|
|
3dddfc8270 | ||
|
|
cd90c84c32 | ||
|
|
863519eecf | ||
|
|
dcf5668b27 | ||
|
|
f2deedcf3d | ||
|
|
bfacf37715 | ||
|
|
0598af70a5 | ||
|
|
4d42917cfb | ||
|
|
7109ee08db | ||
|
|
4f5f612b61 | ||
|
|
df60af9619 | ||
|
|
ab0f0187de | ||
|
|
65c1502ecd | ||
|
|
ef86329828 | ||
|
|
5d3ed622c6 | ||
|
|
f239379ebf | ||
|
|
2c8cda03e7 | ||
|
|
9a5c904d33 | ||
|
|
b38992f60e | ||
|
|
86a529b3da | ||
|
|
adc97769be | ||
|
|
58489af64f | ||
|
|
fb5897f1a2 | ||
|
|
78c8b9b69b | ||
|
|
f03e200136 |
@@ -11,7 +11,7 @@
|
||||
{
|
||||
"name": "ecc",
|
||||
"source": "./",
|
||||
"description": "The most comprehensive Claude Code plugin — 58 agents, 220 skills, 74 legacy command shims, selective install profiles, and production-ready hooks for TDD, security scanning, code review, and continuous learning",
|
||||
"description": "The most comprehensive Claude Code plugin — 60 agents, 225 skills, 75 legacy command shims, selective install profiles, and production-ready hooks for TDD, security scanning, code review, and continuous learning",
|
||||
"version": "2.0.0-rc.1",
|
||||
"author": {
|
||||
"name": "Affaan Mustafa",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "ecc",
|
||||
"version": "2.0.0-rc.1",
|
||||
"description": "Battle-tested Claude Code plugin for engineering teams — 58 agents, 220 skills, 74 legacy command shims, production-ready hooks, and selective install workflows evolved through continuous real-world use",
|
||||
"description": "Battle-tested Claude Code plugin for engineering teams — 60 agents, 225 skills, 75 legacy command shims, production-ready hooks, and selective install workflows evolved through continuous real-world use",
|
||||
"author": {
|
||||
"name": "Affaan Mustafa",
|
||||
"url": "https://x.com/affaanmustafa"
|
||||
|
||||
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@@ -77,6 +77,7 @@ jobs:
|
||||
|
||||
- name: Cache npm
|
||||
if: matrix.pm == 'npm'
|
||||
continue-on-error: true
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ${{ steps.npm-cache-dir.outputs.dir }}
|
||||
@@ -94,6 +95,7 @@ jobs:
|
||||
|
||||
- name: Cache pnpm
|
||||
if: matrix.pm == 'pnpm'
|
||||
continue-on-error: true
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ${{ steps.pnpm-cache-dir.outputs.dir }}
|
||||
@@ -115,6 +117,7 @@ jobs:
|
||||
|
||||
- name: Cache yarn
|
||||
if: matrix.pm == 'yarn'
|
||||
continue-on-error: true
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ${{ steps.yarn-cache-dir.outputs.dir }}
|
||||
@@ -124,6 +127,7 @@ jobs:
|
||||
|
||||
- name: Cache bun
|
||||
if: matrix.pm == 'bun'
|
||||
continue-on-error: true
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ~/.bun/install/cache
|
||||
|
||||
4
.github/workflows/reusable-test.yml
vendored
4
.github/workflows/reusable-test.yml
vendored
@@ -67,6 +67,7 @@ jobs:
|
||||
|
||||
- name: Cache npm
|
||||
if: inputs.package-manager == 'npm'
|
||||
continue-on-error: true
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ${{ steps.npm-cache-dir.outputs.dir }}
|
||||
@@ -84,6 +85,7 @@ jobs:
|
||||
|
||||
- name: Cache pnpm
|
||||
if: inputs.package-manager == 'pnpm'
|
||||
continue-on-error: true
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ${{ steps.pnpm-cache-dir.outputs.dir }}
|
||||
@@ -105,6 +107,7 @@ jobs:
|
||||
|
||||
- name: Cache yarn
|
||||
if: inputs.package-manager == 'yarn'
|
||||
continue-on-error: true
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ${{ steps.yarn-cache-dir.outputs.dir }}
|
||||
@@ -114,6 +117,7 @@ jobs:
|
||||
|
||||
- name: Cache bun
|
||||
if: inputs.package-manager == 'bun'
|
||||
continue-on-error: true
|
||||
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
|
||||
with:
|
||||
path: ~/.bun/install/cache
|
||||
|
||||
14
.npmignore
14
.npmignore
@@ -6,3 +6,17 @@ scripts/release.sh
|
||||
|
||||
# Plugin dev notes (not needed by consumers)
|
||||
.claude-plugin/PLUGIN_SCHEMA_NOTES.md
|
||||
|
||||
# Python/test cache artifacts are local build byproducts, not runtime surface
|
||||
__pycache__/
|
||||
**/__pycache__/
|
||||
**/__pycache__/**
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
**/*.pyc
|
||||
**/*.pyo
|
||||
**/*.pyd
|
||||
*$py.class
|
||||
.pytest_cache/
|
||||
**/.pytest_cache/**
|
||||
|
||||
10
AGENTS.md
10
AGENTS.md
@@ -1,6 +1,6 @@
|
||||
# Everything Claude Code (ECC) — Agent Instructions
|
||||
|
||||
This is a **production-ready AI coding plugin** providing 58 specialized agents, 220 skills, 74 commands, and automated hook workflows for software development.
|
||||
This is a **production-ready AI coding plugin** providing 60 specialized agents, 225 skills, 75 commands, and automated hook workflows for software development.
|
||||
|
||||
**Version:** 2.0.0-rc.1
|
||||
|
||||
@@ -35,6 +35,8 @@ This is a **production-ready AI coding plugin** providing 58 specialized agents,
|
||||
| kotlin-build-resolver | Kotlin/Gradle build errors | Kotlin build failures |
|
||||
| database-reviewer | PostgreSQL/Supabase specialist | Schema design, query optimization |
|
||||
| python-reviewer | Python code review | Python projects |
|
||||
| django-reviewer | Django code review | Django apps, DRF APIs, ORM, migrations |
|
||||
| django-build-resolver | Django build, migration, and setup errors | Django startup, dependency, migration, collectstatic failures |
|
||||
| java-reviewer | Java and Spring Boot code review | Java/Spring Boot projects |
|
||||
| java-build-resolver | Java/Maven/Gradle build errors | Java build failures |
|
||||
| loop-operator | Autonomous loop execution | Run loops safely, monitor stalls, intervene |
|
||||
@@ -147,9 +149,9 @@ Troubleshoot failures: check test isolation → verify mocks → fix implementat
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
agents/ — 58 specialized subagents
|
||||
skills/ — 220 workflow skills and domain knowledge
|
||||
commands/ — 74 slash commands
|
||||
agents/ — 60 specialized subagents
|
||||
skills/ — 225 workflow skills and domain knowledge
|
||||
commands/ — 75 slash commands
|
||||
hooks/ — Trigger-based automations
|
||||
rules/ — Always-follow guidelines (common + per-language)
|
||||
scripts/ — Cross-platform Node.js utilities
|
||||
|
||||
16
README.md
16
README.md
@@ -358,7 +358,7 @@ If you stacked methods, clean up in this order:
|
||||
/plugin list ecc@ecc
|
||||
```
|
||||
|
||||
**That's it!** You now have access to 58 agents, 220 skills, and 74 legacy command shims.
|
||||
**That's it!** You now have access to 60 agents, 225 skills, and 75 legacy command shims.
|
||||
|
||||
### Dashboard GUI
|
||||
|
||||
@@ -456,7 +456,7 @@ everything-claude-code/
|
||||
| |-- plugin.json # Plugin metadata and component paths
|
||||
| |-- marketplace.json # Marketplace catalog for /plugin marketplace add
|
||||
|
|
||||
|-- agents/ # 58 specialized subagents for delegation
|
||||
|-- agents/ # 60 specialized subagents for delegation
|
||||
| |-- planner.md # Feature implementation planning
|
||||
| |-- architect.md # System design decisions
|
||||
| |-- tdd-guide.md # Test-driven development
|
||||
@@ -1360,9 +1360,9 @@ The configuration is automatically detected from `.opencode/opencode.json`.
|
||||
|
||||
| Feature | Claude Code | OpenCode | Status |
|
||||
|---------|-------------|----------|--------|
|
||||
| Agents | PASS: 58 agents | PASS: 12 agents | **Claude Code leads** |
|
||||
| Commands | PASS: 74 commands | PASS: 35 commands | **Claude Code leads** |
|
||||
| Skills | PASS: 220 skills | PASS: 37 skills | **Claude Code leads** |
|
||||
| Agents | PASS: 60 agents | PASS: 12 agents | **Claude Code leads** |
|
||||
| Commands | PASS: 75 commands | PASS: 35 commands | **Claude Code leads** |
|
||||
| Skills | PASS: 225 skills | PASS: 37 skills | **Claude Code leads** |
|
||||
| Hooks | PASS: 8 event types | PASS: 11 events | **OpenCode has more!** |
|
||||
| Rules | PASS: 29 rules | PASS: 13 instructions | **Claude Code leads** |
|
||||
| MCP Servers | PASS: 14 servers | PASS: Full | **Full parity** |
|
||||
@@ -1465,9 +1465,9 @@ ECC is the **first plugin to maximize every major AI coding tool**. Here's how e
|
||||
|
||||
| Feature | Claude Code | Cursor IDE | Codex CLI | OpenCode |
|
||||
|---------|------------|------------|-----------|----------|
|
||||
| **Agents** | 58 | Shared (AGENTS.md) | Shared (AGENTS.md) | 12 |
|
||||
| **Commands** | 74 | Shared | Instruction-based | 35 |
|
||||
| **Skills** | 220 | Shared | 10 (native format) | 37 |
|
||||
| **Agents** | 60 | Shared (AGENTS.md) | Shared (AGENTS.md) | 12 |
|
||||
| **Commands** | 75 | Shared | Instruction-based | 35 |
|
||||
| **Skills** | 225 | Shared | 10 (native format) | 37 |
|
||||
| **Hook Events** | 8 types | 15 types | None yet | 11 types |
|
||||
| **Hook Scripts** | 20+ scripts | 16 scripts (DRY adapter) | N/A | Plugin hooks |
|
||||
| **Rules** | 34 (common + lang) | 34 (YAML frontmatter) | Instruction-based | 13 instructions |
|
||||
|
||||
@@ -160,7 +160,7 @@ Copy-Item -Recurse rules/typescript "$HOME/.claude/rules/"
|
||||
/plugin list ecc@ecc
|
||||
```
|
||||
|
||||
**完成!** 你现在可以使用 58 个代理、220 个技能和 74 个命令。
|
||||
**完成!** 你现在可以使用 60 个代理、225 个技能和 75 个命令。
|
||||
|
||||
### multi-* 命令需要额外配置
|
||||
|
||||
|
||||
@@ -158,6 +158,7 @@ commands:
|
||||
- build-fix
|
||||
- checkpoint
|
||||
- code-review
|
||||
- cost-report
|
||||
- cpp-build
|
||||
- cpp-review
|
||||
- cpp-test
|
||||
|
||||
@@ -27,6 +27,80 @@ When invoked:
|
||||
- **Consolidate** similar issues (e.g., "5 functions missing error handling" not 5 separate findings)
|
||||
- **Prioritize** issues that could cause bugs, security vulnerabilities, or data loss
|
||||
|
||||
### Pre-Report Gate
|
||||
|
||||
Before writing a finding, answer all four questions. If any answer is "no" or
|
||||
"unsure", downgrade severity or drop the finding.
|
||||
|
||||
1. **Can I cite the exact line?** Name the file and line. Vague findings like
|
||||
"somewhere in the auth layer" are not actionable and must be dropped.
|
||||
2. **Can I describe the concrete failure mode?** Name the input, state, and bad
|
||||
outcome. If you cannot name the trigger, you are pattern-matching, not
|
||||
reviewing.
|
||||
3. **Have I read the surrounding context?** Check callers, imports, and tests.
|
||||
Many apparent issues are already handled one frame up or guarded by a type.
|
||||
4. **Is the severity defensible?** A missing JSDoc is never HIGH. A single
|
||||
`any` in a test fixture is never CRITICAL. Severity inflation erodes trust
|
||||
faster than missed findings.
|
||||
|
||||
### HIGH / CRITICAL Require Proof
|
||||
|
||||
For any finding tagged HIGH or CRITICAL, include:
|
||||
|
||||
- The exact snippet and line number
|
||||
- The specific failure scenario: input, state, and outcome
|
||||
- Why existing guards, such as types, validation, or framework defaults, do not
|
||||
catch it
|
||||
|
||||
If you cannot produce all three, demote to MEDIUM or drop.
|
||||
|
||||
### It Is Acceptable And Expected To Return Zero Findings
|
||||
|
||||
A clean review is a valid review. Do not manufacture findings to justify the
|
||||
invocation. If the diff is small, well-typed, tested, and follows the project's
|
||||
patterns, the correct output is a summary with zero rows and verdict `APPROVE`.
|
||||
|
||||
Manufactured findings, filler nits, speculative "consider using X", and
|
||||
hypothetical edge cases without a trigger are the primary failure mode of LLM
|
||||
reviewers and directly undermine this agent's usefulness.
|
||||
|
||||
## Common False Positives - Skip These
|
||||
|
||||
Patterns that LLM reviewers commonly mis-flag. Skip unless you have evidence
|
||||
specific to this codebase:
|
||||
|
||||
- **"Consider adding error handling"** on a call whose error path is handled by
|
||||
the caller or framework, such as Express error middleware, React error
|
||||
boundaries, top-level `try/catch`, or Promise chains with `.catch` upstream.
|
||||
- **"Missing input validation"** when the function is internal and its callers
|
||||
already validate. Trace at least one caller before flagging.
|
||||
- **"Magic number"** for well-known constants: `200`, `404`, `1000` ms, `60`,
|
||||
`24`, `1024`, array index `0` or `-1`, HTTP status codes, and single-use
|
||||
local constants whose meaning is obvious from the variable name.
|
||||
- **"Function too long"** for exhaustive `switch` statements, configuration
|
||||
objects, test tables, or generated code. Length is not complexity.
|
||||
- **"Missing JSDoc"** on single-purpose internal helpers whose name and
|
||||
signature are self-describing.
|
||||
- **"Prefer `const` over `let`"** when the variable is reassigned. Read the
|
||||
whole function before flagging.
|
||||
- **"Possible null dereference"** when the preceding line narrows the type or an
|
||||
`if` guard is in scope. Trace type flow instead of pattern-matching on `?.`.
|
||||
- **"N+1 query"** on fixed-cardinality loops, such as iterating a four-element
|
||||
enum, or on paths already using `DataLoader` or batching.
|
||||
- **"Missing await"** on fire-and-forget calls that are intentionally detached,
|
||||
such as logging, metrics, or background queue pushes. Check for a comment or
|
||||
`void` prefix before flagging.
|
||||
- **"Should use TypeScript"** or **"Should have types"** in a JavaScript-only
|
||||
file. Match the project's existing language; do not suggest a stack change.
|
||||
- **"Hardcoded value"** for values in test fixtures, example code, or
|
||||
documentation snippets. Tests should have hardcoded expectations.
|
||||
- **Security theater**: flagging `Math.random()` in a non-cryptographic context
|
||||
such as animation, jitter, or sampling, or flagging `eval`/`Function` in a
|
||||
plugin system that is explicitly a code-loading surface.
|
||||
|
||||
When tempted to flag one of the above, ask: "Would a senior engineer on this
|
||||
team actually change this in review?" If no, skip.
|
||||
|
||||
## Review Checklist
|
||||
|
||||
### Security (CRITICAL)
|
||||
@@ -206,10 +280,13 @@ Verdict: WARNING — 2 HIGH issues should be resolved before merge.
|
||||
|
||||
## Approval Criteria
|
||||
|
||||
- **Approve**: No CRITICAL or HIGH issues
|
||||
- **Approve**: No CRITICAL or HIGH issues, including clean reviews with zero
|
||||
findings. This is a valid and expected outcome.
|
||||
- **Warning**: HIGH issues only (can merge with caution)
|
||||
- **Block**: CRITICAL issues found — must fix before merge
|
||||
|
||||
Do not withhold approval to appear rigorous. If the diff is clean, approve it.
|
||||
|
||||
## Project-Specific Guidelines
|
||||
|
||||
When available, also check project-specific conventions from `CLAUDE.md` or project rules:
|
||||
|
||||
243
agents/django-build-resolver.md
Normal file
243
agents/django-build-resolver.md
Normal file
@@ -0,0 +1,243 @@
|
||||
---
|
||||
name: django-build-resolver
|
||||
description: Django/Python build, migration, and dependency error resolution specialist. Fixes pip/Poetry errors, migration conflicts, import errors, Django configuration issues, and collectstatic failures with minimal changes. Use when Django setup or startup fails.
|
||||
tools: ["Read", "Write", "Edit", "Bash", "Grep", "Glob"]
|
||||
model: sonnet
|
||||
---
|
||||
|
||||
# Django Build Error Resolver
|
||||
|
||||
You are an expert Django/Python error resolution specialist. Your mission is to fix build errors, migration conflicts, import failures, dependency issues, and Django startup errors with **minimal, surgical changes**.
|
||||
|
||||
You DO NOT refactor or rewrite code — you fix the error only.
|
||||
|
||||
## Core Responsibilities
|
||||
|
||||
1. Resolve pip, Poetry, and virtualenv dependency errors
|
||||
2. Fix Django migration conflicts and state inconsistencies
|
||||
3. Diagnose and repair Django configuration/settings errors
|
||||
4. Resolve Python import errors and module not found issues
|
||||
5. Fix `collectstatic`, `runserver`, and management command failures
|
||||
6. Repair database connection and `DATABASES` misconfiguration
|
||||
|
||||
## Diagnostic Commands
|
||||
|
||||
Run these in order to locate the error:
|
||||
|
||||
```bash
|
||||
# Check Python and Django versions
|
||||
python --version
|
||||
python -m django --version
|
||||
|
||||
# Verify virtual environment is active
|
||||
which python
|
||||
pip list | grep -E "Django|djangorestframework|celery|psycopg"
|
||||
|
||||
# Check for missing dependencies
|
||||
pip check
|
||||
|
||||
# Validate Django configuration
|
||||
python manage.py check --deploy 2>&1 || python manage.py check 2>&1
|
||||
|
||||
# List pending migrations
|
||||
python manage.py showmigrations 2>&1
|
||||
|
||||
# Detect migration conflicts
|
||||
python manage.py migrate --check 2>&1
|
||||
|
||||
# Static files
|
||||
python manage.py collectstatic --dry-run --noinput 2>&1
|
||||
```
|
||||
|
||||
## Resolution Workflow
|
||||
|
||||
```text
|
||||
1. Reproduce the error -> Capture exact message
|
||||
2. Identify error category -> See table below
|
||||
3. Read affected file/config -> Understand context
|
||||
4. Apply minimal fix -> Only what's needed
|
||||
5. python manage.py check -> Validate Django config
|
||||
6. Run test suite -> Ensure nothing broke
|
||||
```
|
||||
|
||||
## Common Fix Patterns
|
||||
|
||||
### Dependency / pip Errors
|
||||
|
||||
| Error | Cause | Fix |
|
||||
|-------|-------|-----|
|
||||
| `ModuleNotFoundError: No module named 'X'` | Missing package | `pip install X` or add to `requirements.txt` |
|
||||
| `ImportError: cannot import name 'X' from 'Y'` | Version mismatch | Pin compatible version in requirements |
|
||||
| `ERROR: pip's dependency resolver...` | Conflicting deps | Upgrade pip: `pip install --upgrade pip`, then `pip install -r requirements.txt` |
|
||||
| `Poetry: No solution found` | Conflicting constraints | Relax version pin in `pyproject.toml` |
|
||||
| `pkg_resources.DistributionNotFound` | Installed outside venv | Reinstall inside venv |
|
||||
|
||||
```bash
|
||||
# Force reinstall all dependencies
|
||||
pip install --force-reinstall -r requirements.txt
|
||||
|
||||
# Poetry: clear cache and resolve
|
||||
poetry cache clear --all pypi
|
||||
poetry install
|
||||
|
||||
# Create fresh virtualenv if corrupt
|
||||
deactivate
|
||||
python -m venv .venv && source .venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
### Migration Errors
|
||||
|
||||
| Error | Cause | Fix |
|
||||
|-------|-------|-----|
|
||||
| `django.db.migrations.exceptions.MigrationSchemaMissing` | DB tables not created | `python manage.py migrate` |
|
||||
| `InconsistentMigrationHistory` | Applied out of order | Squash or fake migrations |
|
||||
| `Migration X dependencies reference nonexistent parent Y` | Missing migration file | Recreate with `makemigrations` |
|
||||
| `Table already exists` | Migration applied outside Django | `migrate --fake-initial` |
|
||||
| `Multiple leaf nodes in the migration graph` | Conflicting migration branches | Merge: `python manage.py makemigrations --merge` |
|
||||
| `django.db.utils.OperationalError: no such column` | Unapplied migration | `python manage.py migrate` |
|
||||
|
||||
```bash
|
||||
# Fix conflicting migrations
|
||||
python manage.py makemigrations --merge --no-input
|
||||
|
||||
# Fake migrations already applied at DB level
|
||||
python manage.py migrate --fake <app> <migration_number>
|
||||
|
||||
# Reset migrations for an app (dev only!)
|
||||
python manage.py migrate <app> zero
|
||||
python manage.py makemigrations <app>
|
||||
python manage.py migrate <app>
|
||||
|
||||
# Show migration plan
|
||||
python manage.py migrate --plan
|
||||
```
|
||||
|
||||
### Django Configuration Errors
|
||||
|
||||
| Error | Cause | Fix |
|
||||
|-------|-------|-----|
|
||||
| `django.core.exceptions.ImproperlyConfigured` | Missing setting or wrong value | Check `settings.py` for the named setting |
|
||||
| `DJANGO_SETTINGS_MODULE not set` | Env var missing | `export DJANGO_SETTINGS_MODULE=config.settings.development` |
|
||||
| `SECRET_KEY must not be empty` | Missing env var | Set `DJANGO_SECRET_KEY` in `.env` |
|
||||
| `Invalid HTTP_HOST header` | `ALLOWED_HOSTS` misconfigured | Add hostname to `ALLOWED_HOSTS` |
|
||||
| `Apps aren't loaded yet` | Importing models before `django.setup()` | Call `django.setup()` or move imports inside functions |
|
||||
| `RuntimeError: Model class ... doesn't declare an explicit app_label` | App not in `INSTALLED_APPS` | Add the app to `INSTALLED_APPS` |
|
||||
|
||||
```bash
|
||||
# Verify settings module resolves
|
||||
python -c "import django; django.setup(); print('OK')"
|
||||
|
||||
# Check environment variable
|
||||
echo $DJANGO_SETTINGS_MODULE
|
||||
|
||||
# Find missing settings
|
||||
python manage.py diffsettings 2>&1
|
||||
```
|
||||
|
||||
### Import Errors
|
||||
|
||||
```bash
|
||||
# Diagnose circular imports
|
||||
python -c "import <module>" 2>&1
|
||||
|
||||
# Find where an import is used
|
||||
grep -r "from <module> import" . --include="*.py"
|
||||
|
||||
# Check installed app paths
|
||||
python -c "import <app>; print(<app>.__file__)"
|
||||
```
|
||||
|
||||
**Circular import fix:** Move imports inside functions or use `apps.get_model()`:
|
||||
|
||||
```python
|
||||
# Bad - top-level causes circular import
|
||||
from apps.users.models import User
|
||||
|
||||
# Good - import inside function
|
||||
def get_user(pk):
|
||||
from apps.users.models import User
|
||||
return User.objects.get(pk=pk)
|
||||
|
||||
# Good - use apps registry
|
||||
from django.apps import apps
|
||||
User = apps.get_model('users', 'User')
|
||||
```
|
||||
|
||||
### Database Connection Errors
|
||||
|
||||
| Error | Cause | Fix |
|
||||
|-------|-------|-----|
|
||||
| `django.db.utils.OperationalError: could not connect to server` | DB not running or wrong host | Start DB or fix `DATABASES['HOST']` |
|
||||
| `django.db.utils.OperationalError: FATAL: role X does not exist` | Wrong DB user | Fix `DATABASES['USER']` |
|
||||
| `django.db.utils.ProgrammingError: relation X does not exist` | Missing migration | `python manage.py migrate` |
|
||||
| `psycopg2 not installed` | Missing driver | `pip install psycopg2-binary` |
|
||||
|
||||
```bash
|
||||
# Test database connection
|
||||
python manage.py dbshell
|
||||
|
||||
# Check DATABASES setting
|
||||
python -c "from django.conf import settings; print(settings.DATABASES)"
|
||||
```
|
||||
|
||||
### collectstatic / Static Files Errors
|
||||
|
||||
| Error | Cause | Fix |
|
||||
|-------|-------|-----|
|
||||
| `staticfiles.E001: The STATICFILES_DIRS...` | Dir in both `STATICFILES_DIRS` and `STATIC_ROOT` | Remove from `STATICFILES_DIRS` |
|
||||
| `FileNotFoundError` during collectstatic | Missing static file referenced in template | Remove or create the referenced file |
|
||||
| `AttributeError: 'str' object has no attribute 'path'` | `STORAGES` not configured for Django 4.2+ | Update `STORAGES` dict in settings |
|
||||
|
||||
```bash
|
||||
# Dry run to find issues
|
||||
python manage.py collectstatic --dry-run --noinput 2>&1
|
||||
|
||||
# Clear and recollect
|
||||
python manage.py collectstatic --clear --noinput
|
||||
```
|
||||
|
||||
### runserver Failures
|
||||
|
||||
```bash
|
||||
# Port already in use
|
||||
lsof -ti:8000 | xargs kill -9
|
||||
python manage.py runserver
|
||||
|
||||
# Use alternate port
|
||||
python manage.py runserver 8080
|
||||
|
||||
# Verbose startup for hidden errors
|
||||
python manage.py runserver --verbosity=2 2>&1
|
||||
```
|
||||
|
||||
## Key Principles
|
||||
|
||||
- **Surgical fixes only** — don't refactor, just fix the error
|
||||
- **Never** delete migration files — fake them instead
|
||||
- **Always** run `python manage.py check` after fixing
|
||||
- Fix root cause over suppressing symptoms
|
||||
- Use `--fake` sparingly and only when DB state is known
|
||||
- Prefer `pip install --upgrade` over manual `requirements.txt` edits when resolving conflicts
|
||||
|
||||
## Stop Conditions
|
||||
|
||||
Stop and report if:
|
||||
- Migration conflict requires destructive DB changes (data loss risk)
|
||||
- Same error persists after 3 fix attempts
|
||||
- Fix requires changes to production data or irreversible DB operations
|
||||
- Missing external service (Redis, PostgreSQL) that needs user setup
|
||||
|
||||
## Output Format
|
||||
|
||||
```text
|
||||
[FIXED] apps/users/migrations/0003_auto.py
|
||||
Error: InconsistentMigrationHistory — 0002_add_email applied before 0001_initial
|
||||
Fix: python manage.py migrate users 0001 --fake, then re-applied
|
||||
Remaining errors: 0
|
||||
```
|
||||
|
||||
Final: `Django Status: OK/FAILED | Errors Fixed: N | Files Modified: list`
|
||||
|
||||
For Django architecture and ORM patterns, see `skill: django-patterns`.
|
||||
For Django security settings, see `skill: django-security`.
|
||||
160
agents/django-reviewer.md
Normal file
160
agents/django-reviewer.md
Normal file
@@ -0,0 +1,160 @@
|
||||
---
|
||||
name: django-reviewer
|
||||
description: Expert Django code reviewer specializing in ORM correctness, DRF patterns, migration safety, security misconfigurations, and production-grade Django practices. Use for all Django code changes. MUST BE USED for Django projects.
|
||||
tools: ["Read", "Grep", "Glob", "Bash"]
|
||||
model: sonnet
|
||||
---
|
||||
|
||||
You are a senior Django code reviewer ensuring production-grade quality, security, and performance.
|
||||
|
||||
**Note**: This agent focuses on Django-specific concerns. Ensure `python-reviewer` has been invoked for general Python quality checks before or after this review.
|
||||
|
||||
When invoked:
|
||||
1. Run `git diff -- '*.py'` to see recent Python file changes
|
||||
2. Run `python manage.py check` if a Django project is present
|
||||
3. Run `ruff check .` and `mypy .` if available
|
||||
4. Focus on modified `.py` files and any related migrations
|
||||
5. Assume CI checks have passed (orchestration gated); if CI status needs verification, run `gh pr checks` to confirm green before proceeding
|
||||
|
||||
## Review Priorities
|
||||
|
||||
### CRITICAL — Security
|
||||
|
||||
- **SQL Injection**: Raw SQL with f-strings or `%` formatting — use `%s` parameters or ORM
|
||||
- **`mark_safe` on user input**: Never without explicit `escape()` first
|
||||
- **CSRF exemption without reason**: `@csrf_exempt` on non-webhook views
|
||||
- **`DEBUG = True` in production settings**: Leaks full stack traces
|
||||
- **Hardcoded `SECRET_KEY`**: Must come from environment variable
|
||||
- **Missing `permission_classes` on DRF views**: Defaults to global — verify intent
|
||||
- **`eval()`/`exec()` on user input**: Immediate block
|
||||
- **File upload without extension/size validation**: Path traversal risk
|
||||
|
||||
### CRITICAL — ORM Correctness
|
||||
|
||||
- **N+1 queries in loops**: Accessing related objects without `select_related`/`prefetch_related`
|
||||
```python
|
||||
# Bad
|
||||
for order in Order.objects.all():
|
||||
print(order.user.email) # N+1
|
||||
|
||||
# Good
|
||||
for order in Order.objects.select_related('user').all():
|
||||
print(order.user.email)
|
||||
```
|
||||
- **Missing `atomic()` for multi-step writes**: Use `transaction.atomic()` for any sequence of DB writes
|
||||
- **`bulk_create` without `update_conflicts`**: Silent data loss on duplicate keys
|
||||
- **`get()` without `DoesNotExist` handling**: Unhandled exception risk
|
||||
- **Queryset used after `delete()`**: Stale queryset reference
|
||||
|
||||
### CRITICAL — Migration Safety
|
||||
|
||||
- **Model change without migration**: Run `python manage.py makemigrations --check`
|
||||
- **Backward-incompatible column drop**: Must be done in two deployments (nullable first)
|
||||
- **`RunPython` without `reverse_code`**: Migration cannot be reversed
|
||||
- **`atomic = False` without justification**: Leaves DB in partial state on failure
|
||||
|
||||
### HIGH — DRF Patterns
|
||||
|
||||
- **Serializer without explicit `fields`**: `fields = '__all__'` exposes all columns including sensitive ones
|
||||
- **No pagination on list endpoints**: Unbounded queries can return millions of rows
|
||||
- **Missing `read_only_fields`**: Auto-generated fields (id, created_at) editable by API
|
||||
- **`perform_create` not used**: Injecting user context should happen in `perform_create`, not `validate`
|
||||
- **No throttling on auth endpoints**: Login/registration open to brute force
|
||||
- **Nested writable serializers without `update()`**: Default update silently ignores nested data
|
||||
|
||||
### HIGH — Performance
|
||||
|
||||
- **Queryset evaluated in template context**: Use `.values()` or pass list; avoid lazy evaluation in templates
|
||||
- **Missing `db_index` on FK/filter fields**: Full table scan on filtered queries
|
||||
- **Synchronous external API call in view**: Blocks the request thread — offload to Celery
|
||||
- **`len(queryset)` instead of `.count()`**: Forces full fetch
|
||||
- **`exists()` not used for existence checks**: `if queryset:` fetches objects unnecessarily
|
||||
|
||||
```python
|
||||
# Bad
|
||||
if Product.objects.filter(sku=sku):
|
||||
...
|
||||
|
||||
# Good
|
||||
if Product.objects.filter(sku=sku).exists():
|
||||
...
|
||||
```
|
||||
|
||||
### HIGH — Code Quality
|
||||
|
||||
- **Business logic in views or serializers**: Move to `services.py`
|
||||
- **Signal logic that belongs in a service**: Signals make flow hard to trace — use explicitly
|
||||
- **Mutable default in model field**: `default=[]` or `default={}` — use `default=list`
|
||||
- **`save()` called without `update_fields`**: Overwrites all columns — risk of clobbering concurrent writes
|
||||
|
||||
```python
|
||||
# Bad
|
||||
user.last_active = now()
|
||||
user.save()
|
||||
|
||||
# Good
|
||||
user.last_active = now()
|
||||
user.save(update_fields=['last_active'])
|
||||
```
|
||||
|
||||
### MEDIUM — Best Practices
|
||||
|
||||
- **`str(queryset)` or slicing for debug**: Use Django shell, not production code
|
||||
- **Accessing `request.user` in serializer `validate()`**: Pass via context, not direct access
|
||||
- **`print()` instead of `logger`**: Use `logging.getLogger(__name__)`
|
||||
- **Missing `related_name`**: Reverse accessors like `user_set` are confusing
|
||||
- **`blank=True` without `null=True` on non-string fields**: DB stores empty string for non-string types
|
||||
- **Hardcoded URLs**: Use `reverse()` or `reverse_lazy()`
|
||||
- **Missing `__str__` on models**: Django admin and logging are broken without it
|
||||
- **App not using `AppConfig.ready()`**: Signal receivers not connected properly
|
||||
|
||||
### MEDIUM — Testing Gaps
|
||||
|
||||
- **No test for permission boundary**: Verify unauthorized access returns 403/401
|
||||
- **`force_authenticate` instead of proper token**: Tests skip auth logic entirely
|
||||
- **Missing `@pytest.mark.django_db`**: Tests silently hit no DB
|
||||
- **Factory not used**: Raw `Model.objects.create()` in tests is fragile
|
||||
|
||||
## Diagnostic Commands
|
||||
|
||||
```bash
|
||||
python manage.py check # Django system check
|
||||
python manage.py makemigrations --check # Detect missing migrations
|
||||
ruff check . # Fast linter
|
||||
mypy . --ignore-missing-imports # Type checking
|
||||
bandit -r . -ll # Security scan (medium+)
|
||||
pytest --cov=apps --cov-report=term-missing -q # Tests + coverage
|
||||
```
|
||||
|
||||
## Review Output Format
|
||||
|
||||
```text
|
||||
[SEVERITY] Issue title
|
||||
File: apps/orders/views.py:42
|
||||
Issue: Description of the problem
|
||||
Fix: What to change and why
|
||||
```
|
||||
|
||||
## Approval Criteria
|
||||
|
||||
- **Approve**: No CRITICAL or HIGH issues
|
||||
- **Warning**: MEDIUM issues only (can merge with caution)
|
||||
- **Block**: CRITICAL or HIGH issues found
|
||||
|
||||
## Framework-Specific Checks
|
||||
|
||||
- **Migrations**: Every model change must have a migration. Two-phase for column removal.
|
||||
- **DRF**: All public endpoints need explicit `permission_classes`. Pagination on all list views.
|
||||
- **Celery**: Tasks must be idempotent. Use `bind=True` + `self.retry()` for transient failures.
|
||||
- **Django Admin**: Never expose sensitive fields. Use `readonly_fields` for auto-generated data.
|
||||
- **Signals**: Prefer explicit service calls. If signals are used, register in `AppConfig.ready()`.
|
||||
|
||||
## Reference
|
||||
|
||||
For Django architecture patterns and ORM examples, see `skill: django-patterns`.
|
||||
For security configuration checklists, see `skill: django-security`.
|
||||
For testing patterns and fixtures, see `skill: django-tdd`.
|
||||
|
||||
---
|
||||
|
||||
Review with the mindset: "Would this code safely serve 10,000 concurrent users without data loss, security breach, or a 3am pager alert?"
|
||||
107
commands/cost-report.md
Normal file
107
commands/cost-report.md
Normal file
@@ -0,0 +1,107 @@
|
||||
---
|
||||
description: Generate a local Claude Code cost report from a cost-tracker SQLite database.
|
||||
argument-hint: [csv]
|
||||
---
|
||||
|
||||
# Cost Report
|
||||
|
||||
Query the local cost-tracking database and present a spending report by day,
|
||||
project, tool, and session. This command assumes a cost-tracking hook or plugin
|
||||
is already writing usage rows to `~/.claude-cost-tracker/usage.db`.
|
||||
|
||||
## What This Command Does
|
||||
|
||||
1. Check that `sqlite3` is available.
|
||||
2. Check that `~/.claude-cost-tracker/usage.db` exists.
|
||||
3. Run aggregate queries against the `usage` table.
|
||||
4. Present a compact report, or export recent rows as CSV when the argument is
|
||||
`csv`.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
The database must be populated by a local cost tracker. If the file is missing,
|
||||
tell the user the tracker is not set up and suggest installing or enabling a
|
||||
trusted Claude Code cost-tracking hook/plugin first.
|
||||
|
||||
```bash
|
||||
test -f ~/.claude-cost-tracker/usage.db && echo "Database found" || echo "Database not found"
|
||||
```
|
||||
|
||||
## Summary Query
|
||||
|
||||
```bash
|
||||
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
|
||||
SELECT
|
||||
ROUND(COALESCE(SUM(CASE WHEN date(timestamp) = date('now') THEN cost_usd END), 0), 4) AS today_cost,
|
||||
ROUND(COALESCE(SUM(CASE WHEN date(timestamp) = date('now', '-1 day') THEN cost_usd END), 0), 4) AS yesterday_cost,
|
||||
ROUND(COALESCE(SUM(cost_usd), 0), 4) AS total_cost,
|
||||
COUNT(*) AS total_calls,
|
||||
COUNT(DISTINCT session_id) AS sessions
|
||||
FROM usage;
|
||||
"
|
||||
```
|
||||
|
||||
## Project Breakdown
|
||||
|
||||
```bash
|
||||
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
|
||||
SELECT project, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
|
||||
FROM usage
|
||||
GROUP BY project
|
||||
ORDER BY cost DESC;
|
||||
"
|
||||
```
|
||||
|
||||
## Tool Breakdown
|
||||
|
||||
```bash
|
||||
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
|
||||
SELECT tool_name, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
|
||||
FROM usage
|
||||
GROUP BY tool_name
|
||||
ORDER BY cost DESC;
|
||||
"
|
||||
```
|
||||
|
||||
## Last Seven Days
|
||||
|
||||
```bash
|
||||
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
|
||||
SELECT date(timestamp) AS date, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
|
||||
FROM usage
|
||||
GROUP BY date(timestamp)
|
||||
ORDER BY date DESC
|
||||
LIMIT 7;
|
||||
"
|
||||
```
|
||||
|
||||
## CSV Export
|
||||
|
||||
If the user asks for `/cost-report csv`, export the most recent usage rows with
|
||||
an explicit column list:
|
||||
|
||||
```bash
|
||||
sqlite3 -csv -header ~/.claude-cost-tracker/usage.db "
|
||||
SELECT timestamp, project, tool_name, input_tokens, output_tokens, cost_usd, session_id, model
|
||||
FROM usage
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT 100;
|
||||
"
|
||||
```
|
||||
|
||||
## Report Format
|
||||
|
||||
Format the response as:
|
||||
|
||||
1. Summary: today, yesterday, total, calls, sessions.
|
||||
2. By project: projects ranked by total cost.
|
||||
3. By tool: tools ranked by total cost.
|
||||
4. Last seven days: date, cost, call count.
|
||||
|
||||
Use four decimal places for sub-dollar amounts. Do not estimate pricing from raw
|
||||
tokens in this command; rely on the precomputed `cost_usd` values written by the
|
||||
tracker.
|
||||
|
||||
## Source
|
||||
|
||||
Salvaged from stale community PR #1304 by `MayurBhavsar`.
|
||||
@@ -16,10 +16,21 @@ so the live execution truth is split across:
|
||||
|
||||
As of 2026-05-12:
|
||||
|
||||
- Public GitHub queues are clean across `everything-claude-code`,
|
||||
`agentshield`, `JARVIS`, `ECC-Tools`, and `ECC-website`.
|
||||
- Public GitHub queues are clean across `affaan-m/everything-claude-code`,
|
||||
`affaan-m/agentshield`, `affaan-m/JARVIS`, `ECC-Tools/ECC-Tools`, and
|
||||
`ECC-Tools/ECC-website`.
|
||||
- Public GitHub discussions are also clean across those tracked repos:
|
||||
`states: OPEN` returned zero discussions for every accessible discussion
|
||||
surface on 2026-05-12.
|
||||
- The final open public GitHub issue, #1314, was closed as a non-actionable
|
||||
external badge/listing notification with a courtesy comment.
|
||||
- Linear issue creation for this project was re-tested after GitHub cleanup and
|
||||
is still blocked by the workspace free issue limit. Seven roadmap-lane issue
|
||||
creation attempts all returned the same limit error, so this repo mirror and
|
||||
Linear project status updates remain the active tracking surfaces until the
|
||||
workspace is upgraded or issue capacity is freed.
|
||||
- `npm run harness:audit -- --format json` reports 70/70 on current `main`.
|
||||
- `npm run observability:ready` reports 14/14 readiness on current `main`.
|
||||
- `npm run observability:ready` reports 16/16 readiness on current `main`.
|
||||
- `docs/architecture/harness-adapter-compliance.md` maps Claude Code, Codex,
|
||||
OpenCode, Cursor, Gemini, Zed-adjacent, dmux, Orca, Superset, Ghast, and
|
||||
terminal-only support to install paths, verification commands, and risk
|
||||
@@ -30,6 +41,28 @@ As of 2026-05-12:
|
||||
- `docs/releases/2.0.0-rc.1/publication-readiness.md` gates GitHub release,
|
||||
npm dist-tag, Claude plugin, Codex plugin, OpenCode package, billing, and
|
||||
announcement publication on fresh evidence fields.
|
||||
- `docs/releases/2.0.0-rc.1/naming-and-publication-matrix.md` records the
|
||||
rc.1 naming decision: ship as Everything Claude Code (ECC), keep
|
||||
`ecc-universal` for npm, keep `ecc` for Claude/Codex plugin slugs, and defer
|
||||
any broader repo/package rename until after the release pipeline is proven.
|
||||
- `docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md` records the
|
||||
dry-run publication evidence pass: npm pack/publish dry-runs, temp install
|
||||
smoke, Claude plugin validation/tag preflight, Codex marketplace CLI shape,
|
||||
OpenCode build, and the remaining approval-gated release blockers.
|
||||
- A detached clean worktree at
|
||||
`bfacf37715b39655cbc2c48f12f2a35c67cb0253` verified Claude plugin tag
|
||||
dry-run without `--force`, local marketplace discovery, temp-home local
|
||||
install, enabled plugin listing, and clean uninstall for `ecc@ecc`
|
||||
`2.0.0-rc.1`.
|
||||
- `docs/architecture/evaluator-rag-prototype.md` and
|
||||
`examples/evaluator-rag-prototype/` define the first read-only
|
||||
self-improving harness prototype: scenario specs, traces, reports,
|
||||
candidate playbooks, verifier results, accepted maintainer-salvage,
|
||||
billing-readiness, CI-failure-diagnosis, and harness-config-quality
|
||||
candidates, plus the AgentShield policy-exception scenario and rejected
|
||||
unsafe candidates.
|
||||
- The npm package surface now excludes Python bytecode/cache artifacts through
|
||||
package `files` negation rules and a publish-surface regression test.
|
||||
- `docs/legacy-artifact-inventory.md` records that no `_legacy-documents-*`
|
||||
directories exist in the current checkout, inventories the two sibling
|
||||
workspace-level `_legacy-documents-*` repos as sanitized extraction sources,
|
||||
@@ -58,6 +91,32 @@ As of 2026-05-12:
|
||||
- AgentShield PR #60 added category-level built-in corpus benchmark output,
|
||||
a `readyForRegressionGate` signal, terminal `--corpus` category coverage,
|
||||
README/API docs, built-CLI smoke validation, and 1,705-test coverage.
|
||||
- AgentShield PR #61 cleared the remaining Dependabot security/bugfix PR with
|
||||
a lockfile-only `postcss` 8.5.6 -> 8.5.14 bump after local typecheck, full
|
||||
tests, lint, build, and remote self-scan/action verification.
|
||||
- AgentShield PR #62 added organization-policy exception lifecycle audit
|
||||
evidence: active, expiring-soon, and expired exception counts; owner, ticket,
|
||||
scope, expiry, and days-until-expiry reporting; terminal output and GitHub
|
||||
Action job-summary evidence; README docs; rebuilt action bundles; and
|
||||
1,708-test validation.
|
||||
- AgentShield PR #63 exposed baseline drift in the GitHub Action with
|
||||
`baseline` / `save-baseline` inputs, baseline drift outputs, job-summary
|
||||
evidence, regression annotations, README/API docs, rebuilt action bundles,
|
||||
and green remote action/self-scan/Node verification.
|
||||
- AgentShield PR #64 added the first-class `agentshield baseline write`
|
||||
CLI command with severity filtering, JSON metadata output, README/API docs,
|
||||
rebuilt CLI bundle, local TDD coverage, and green remote action/self-scan/Node
|
||||
verification.
|
||||
- AgentShield PDF-export decision: defer a native PDF writer for now. The
|
||||
self-contained HTML executive report remains the exportable buyer artifact
|
||||
and can be printed to PDF when needed; native PDF generation should wait for
|
||||
explicit enterprise/compliance demand or a print-fidelity gap in the HTML
|
||||
report.
|
||||
- `docs/architecture/agentshield-enterprise-research-roadmap.md` identifies
|
||||
the next AgentShield enterprise signal: move from scanner/report/policy gate
|
||||
to a team control plane with baseline drift, evidence packs, multi-harness
|
||||
adapters, corpus accuracy gates, remediation routing, threat intelligence,
|
||||
and ECC-Tools/GitHub App integration.
|
||||
- ECC PR #1778 recovered the useful stale #1413 network/homelab architect-agent
|
||||
concepts.
|
||||
- ECC-Tools PR #26 added cost/token-risk predictive follow-ups for AI routing,
|
||||
@@ -85,12 +144,57 @@ As of 2026-05-12:
|
||||
plugin, agent, hook, command, and harness config changes that lack harness
|
||||
audit, adapter matrix, cross-harness docs, or compatibility regression
|
||||
evidence.
|
||||
- ECC-Tools PR #34 added skill-quality predictive follow-ups and a Skill
|
||||
Quality PR-risk bucket for skill, agent, command, and rule guidance changes
|
||||
that lack examples, validation, eval, or reference evidence.
|
||||
- ECC-Tools PR #35 added RAG/evaluator predictive follow-ups and a
|
||||
RAG/Evaluator Evidence PR-risk bucket for retrieval, embedding, ranking, and
|
||||
evaluator changes that lack reference-set comparison, golden trace,
|
||||
benchmark, fixture, or eval-run evidence.
|
||||
- ECC-Tools PR #36 added deep-analyzer predictive follow-ups, a Deep Analyzer
|
||||
Evidence PR-risk bucket, and a Linear-ready project sync backlog table for
|
||||
deferred follow-up work.
|
||||
- ECC-Tools PR #37 added a maintained analyzer corpus fixture, corpus validation
|
||||
tests, and co-located analyzer reference-set evidence recognition for future
|
||||
predictive follow-ups and PR-risk taxonomy checks.
|
||||
- ECC-Tools PR #38 added PR review/stale-salvage predictive follow-ups, a
|
||||
PR Review/Salvage Evidence taxonomy bucket, and maintained corpus fixtures
|
||||
for stale-closure salvage, reviewer-thread, and reopen-flow evidence.
|
||||
- ECC-Tools PR #39 added opt-in native Linear GraphQL sync for deferred
|
||||
follow-up backlog items, preserving GitHub object caps while creating or
|
||||
reusing Linear issues when `LINEAR_API_KEY` and `LINEAR_TEAM_ID` are
|
||||
configured.
|
||||
- ECC-Tools PR #40 added a checked-in evaluator/RAG corpus contract covering
|
||||
stale-PR salvage, billing readiness, CI failure diagnosis, harness config
|
||||
quality, AgentShield policy exceptions, skill-quality evidence,
|
||||
deep-analyzer evidence, and RAG/evaluator comparison evidence, with each
|
||||
scenario exercising missing-evidence and evidence-backed diffs.
|
||||
- ECC PR #1803 landed the contributor Quarkus handling branch after maintainer
|
||||
cleanup, current-`main` alignment, full local validation, and preservation of
|
||||
the author's removal of incomplete ja-JP and zh-CN Quarkus translations.
|
||||
- ECC PR #1812 salvaged useful Django reviewer, Django build resolver, and
|
||||
Django Celery guidance from stale PR #1310 through a maintainer-owned branch
|
||||
with source credit, catalog sync, and full local/remote validation.
|
||||
- ECC PR #1813 expanded the stale PR salvage ledger with source-to-salvage
|
||||
mappings for #1325, #1414, #1478, #1504, and #1603, confirming those useful
|
||||
stale contributions were already preserved through later maintainer PRs.
|
||||
- ECC PR #1815 salvaged the useful stale #1304 cost-tracking and #1232
|
||||
skill-scout work into current command/skill conventions with current catalog
|
||||
sync and full local/remote validation.
|
||||
- ECC PR #1816 salvaged the useful stale #1659 frontend design guidance into
|
||||
canonical ECC skill layout while preserving the guardrail that the official
|
||||
Anthropic `frontend-design` skill remains externally sourced.
|
||||
- ECC PR #1817 salvaged the useful stale #1658 code-reviewer false-positive
|
||||
guardrails, adding proof gates for HIGH/CRITICAL findings, common
|
||||
false-positive exclusions, and a regression test.
|
||||
- ECC PR #1818 recorded the May 12 stale-salvage gap pass, classifying already
|
||||
present work, skipped work, and translator/manual-review leftovers.
|
||||
|
||||
## Operating Rules
|
||||
|
||||
- Keep public PRs and issues below 20, with zero as the preferred release-lane
|
||||
target.
|
||||
- Maintain 70/70 harness audit and 14/14 observability readiness after every
|
||||
- Maintain 70/70 harness audit and 16/16 observability readiness after every
|
||||
GA-readiness batch.
|
||||
- Do not publish release or social announcements until the GitHub release,
|
||||
npm/package state, billing state, and plugin submission surfaces are verified
|
||||
@@ -100,6 +204,58 @@ As of 2026-05-12:
|
||||
maintainer-owned branches, and credit the source PR.
|
||||
- Do not create new Linear issues until the active issue limit is cleared.
|
||||
|
||||
## Prompt-To-Artifact Execution Checklist
|
||||
|
||||
This table keeps the long operator prompt tied to concrete artifacts. A status
|
||||
is not complete unless the evidence column exists and has been freshly verified.
|
||||
|
||||
| Prompt requirement | Required artifact or gate | Current evidence | Status |
|
||||
| --- | --- | --- | --- |
|
||||
| Keep public PRs below 20 | Repo-family PR recheck | 0 open PRs across the tracked public repos on 2026-05-12 | Complete for this checkpoint |
|
||||
| Keep public issues below 20 | Repo-family issue recheck | 0 open issues across the tracked public repos on 2026-05-12 after closing #1314 as non-actionable badge/listing noise | Complete for this checkpoint |
|
||||
| Manage repository discussions | Repo-family discussion recheck | 0 open discussions across the tracked public repos on 2026-05-12 via GraphQL `states: OPEN` checks | Complete for this checkpoint |
|
||||
| Manage PR discussions | PR review/comment closure plus merge/close state | #1803 was maintainer-edited and merged; no open PRs remain | Complete for this checkpoint |
|
||||
| Salvage useful stale work | `docs/stale-pr-salvage-ledger.md` | Ledger records salvaged, superseded, skipped, and manual-review tails; #1815-#1818 added cost tracking, skill scout, frontend design guidance, code-reviewer false-positive guardrails, and the May 12 gap pass | Complete except translation/manual review tail |
|
||||
| ECC 2.0 preview pack ready | Release docs, quickstart, publication readiness, release notes | `docs/releases/2.0.0-rc.1/` and readiness docs are in-tree | Needs final release evidence |
|
||||
| Hermes specialized skills included safely | Hermes setup/import docs and sanitized skill surface | Hermes setup and import playbook are public; secrets stay local | Needs final release review |
|
||||
| Naming and rename readiness | Naming matrix across package/plugin/docs/social surfaces | `docs/releases/2.0.0-rc.1/naming-and-publication-matrix.md` records current package, repo, Claude plugin, Codex plugin, OpenCode, and npm availability evidence | Complete for rc.1; post-rc rename remains future work |
|
||||
| Claude and Codex plugin publication | Contact/submission path with required artifacts and status | Publication readiness, naming matrix, and May 12 dry-run evidence document plugin validation, clean-checkout Claude tag/install smoke, and Codex marketplace CLI shape | Needs explicit approval for real tag/push and marketplace submission |
|
||||
| Articles, tweets, and announcements | X thread, LinkedIn copy, GitHub release copy, push checklist | Draft launch collateral exists under rc.1 release docs | Needs URL-backed refresh |
|
||||
| AgentShield enterprise iteration | Policy gates, SARIF, packs, provenance, corpus, HTML reports, exception lifecycle audit, baseline drift Action/CLI surfaces, enterprise research roadmap | PRs #53, #55-#64 landed with test evidence; native PDF export deferred in favor of self-contained HTML plus print-to-PDF until explicit enterprise demand appears; `docs/architecture/agentshield-enterprise-research-roadmap.md` selects baseline drift as the first control-plane slice | Baseline-drift Action and CLI write surfaces landed; evidence-pack routing remains |
|
||||
| ECC Tools next-level app | Billing audit, PR checks, deep analyzer, sync backlog, evaluator/RAG corpus | PRs #26-#40 landed with test evidence | Needs capacity-backed Linear rollout |
|
||||
| GitGuardian/Dependabot/CodeRabbit-style checks | Non-blocking taxonomy and deterministic follow-up checks | ECC-Tools risk taxonomy check plus follow-up signals landed, including Skill Quality, Deep Analyzer Evidence, Analyzer Corpus Evidence, RAG/Evaluator Evidence, and PR Review/Salvage Evidence | Partially complete |
|
||||
| Harness-agnostic learning system | Audit, adapter matrix, observability, traces, promotion loop | Audit/adapters/observability gates plus `docs/architecture/evaluator-rag-prototype.md`, `examples/evaluator-rag-prototype/`, and ECC-Tools PR #40 define read-only stale-salvage, billing-readiness, CI-failure-diagnosis, harness-config-quality, AgentShield policy-exception, skill-quality evidence, deep-analyzer evidence, and RAG/evaluator comparison scenarios with trace, report, playbook, verifier, and predictive-check artifacts | Local corpus complete; hosted integration remains future |
|
||||
| Linear roadmap is detailed | Linear project status plus repo mirror | Repo mirror exists; issue creation was retried on 2026-05-12 and remains blocked by the workspace free issue limit | Needs recurring status updates after each merge batch |
|
||||
| Flow separation and progress tracking | Flow lanes with owner artifacts and update cadence | This roadmap defines lanes below | Active |
|
||||
| Realtime Linear sync | Project updates while issue limit is blocked; issues later | ECC-Tools #39 implements opt-in Linear API sync for deferred follow-up backlog items | Needs workspace capacity/config rollout |
|
||||
| Observability for self-use | Local readiness gate, traces, status snapshots, HUD/status contract, risk ledger | `npm run observability:ready` reports 16/16 | Complete for local gate |
|
||||
| Proper release and notifications | Release tag, npm publish state, plugin state, social posts | Publication readiness gate exists | Not complete |
|
||||
|
||||
## Execution Lanes And Tracking Contract
|
||||
|
||||
Until Linear issue capacity is cleared, this document is the durable execution
|
||||
ledger and Linear receives project status updates only. When capacity is
|
||||
available, each lane below should become a small set of Linear issues linked
|
||||
back to the repo evidence and merge commits.
|
||||
|
||||
| Lane | Source of truth | Next tracked artifact | Update cadence |
|
||||
| --- | --- | --- | --- |
|
||||
| Queue hygiene and salvage | GitHub PR/issue state, salvage ledger | Append ledger entries for any future stale closures | Every cleanup batch |
|
||||
| Release and publication | rc.1 release docs, publication readiness doc | Naming matrix and plugin submission/contact checklist | Before any tag |
|
||||
| Harness OS core | Audit, adapter matrix, observability docs, `ecc2/` | HUD/session-control acceptance spec | Weekly until GA |
|
||||
| Evaluation and RAG | Reference-set validation, harness audit, traces, ECC-Tools corpus | Read-only evaluator/RAG prototype plus stale-salvage, billing-readiness, CI-failure-diagnosis, harness-config-quality, AgentShield policy-exception, skill-quality evidence, deep-analyzer evidence, and RAG/evaluator comparison fixtures | Hosted retrieval/check-run automation plan |
|
||||
| AgentShield enterprise | AgentShield PR evidence and roadmap notes | Baseline-drift evidence-pack and backlog sync follow-up | Next implementation batch |
|
||||
| ECC Tools app | ECC-Tools PR evidence, billing audit, risk taxonomy, evaluator/RAG corpus | Capacity-backed Linear rollout | Next implementation batch |
|
||||
| Linear progress | Linear project status updates and this mirror | Status update with queue/evidence/missing gates | Every significant merge batch |
|
||||
|
||||
The project status update should always include:
|
||||
|
||||
1. Current public PR and issue counts.
|
||||
2. Merged evidence since the previous update.
|
||||
3. Deferred or blocked items with the reason.
|
||||
4. The next one or two implementation slices.
|
||||
5. Any release or publication gate that is still not evidence-backed.
|
||||
|
||||
## Reference Pressure
|
||||
|
||||
The GA roadmap is informed by these reference surfaces:
|
||||
@@ -160,7 +316,7 @@ Target: 2026-06-07
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Observability readiness remains 14/14 and is backed by JSONL traces, status
|
||||
- Observability readiness remains 16/16 and is backed by JSONL traces, status
|
||||
snapshots, risk ledger, and exportable handoff contracts.
|
||||
- HUD/status model covers context, tool calls, active agents, todos, checks,
|
||||
cost, risk, and queue state.
|
||||
@@ -189,8 +345,9 @@ Target: 2026-06-14
|
||||
|
||||
Acceptance:
|
||||
|
||||
- Formal policy schema exists for org baselines, exceptions, owners,
|
||||
expiration, severity, and audit trails.
|
||||
- Formal policy schema and evaluation output exist for org baselines,
|
||||
exceptions, owners, expiration, severity, audit trails, expiring-soon
|
||||
visibility, and expired-exception enforcement.
|
||||
- SARIF/code-scanning output is implemented and tested.
|
||||
- GitHub Action policy gates expose organization policy status and violation
|
||||
counts for branch-protection and CI evidence.
|
||||
@@ -201,7 +358,11 @@ Acceptance:
|
||||
- Prompt-injection corpus and regression benchmark are ready for continuous
|
||||
rule hardening with category-level coverage and regression-gate output.
|
||||
- Enterprise reports include JSON plus self-contained HTML executive output
|
||||
with risk posture, priority findings, and category exposure.
|
||||
with risk posture, priority findings, category exposure, and policy-exception
|
||||
lifecycle evidence in terminal/CI summaries.
|
||||
- Native PDF export is not a GA blocker unless an enterprise/compliance
|
||||
workflow requires a generated PDF file instead of the self-contained HTML
|
||||
report and browser print-to-PDF path.
|
||||
|
||||
### 6. ECC Tools Billing, Deep Analysis, PR Checks, And Linear Sync
|
||||
|
||||
@@ -216,15 +377,40 @@ Acceptance:
|
||||
failure modes.
|
||||
- Deep analyzer covers diff patterns, CI/CD workflows, dependency/security
|
||||
surface, PR review behavior, failure history, harness config, skill quality,
|
||||
and reference-set/RAG comparison.
|
||||
dedicated analyzer corpus evidence, co-located analyzer reference sets,
|
||||
PR review/stale-salvage evidence, RAG/evaluator comparison, and reference-set
|
||||
validation.
|
||||
- PR check suite taxonomy includes Security Evidence, Harness Drift, Install
|
||||
Manifest Integrity, CI/CD Recommendation, Cost/Token Risk, and Agent Config
|
||||
Review.
|
||||
Manifest Integrity, CI/CD Recommendation, Cost/Token Risk, Reference Set
|
||||
Validation, Deep Analyzer Evidence, RAG/Evaluator Evidence,
|
||||
PR Review/Salvage Evidence, Skill Quality, and Agent Config Review.
|
||||
- Evaluator/RAG billing readiness fixture
|
||||
`examples/evaluator-rag-prototype/billing-marketplace-readiness/` records the
|
||||
read-only claim-verification path for Marketplace, App, subscription, seat,
|
||||
entitlement, and plan language before launch copy can treat those claims as
|
||||
live.
|
||||
- Cost/token-risk predictive follow-ups flag AI routing, model-call, usage,
|
||||
quota, and budget changes when budget evidence is missing.
|
||||
- Reference-set validation follow-ups flag analyzer, skill, agent, command, and
|
||||
harness-guidance changes that lack eval, golden trace, benchmark, or
|
||||
maintained reference-set evidence.
|
||||
- Deep-analyzer follow-ups flag repository, commit, architecture, pattern, and
|
||||
analysis-pipeline changes that lack analyzer corpus, snapshot, fixture, or
|
||||
benchmark evidence.
|
||||
- Analyzer corpus evidence includes maintained fixtures and tests for current
|
||||
architecture and commit analyzer outputs, plus co-located
|
||||
`src/analyzers/{fixtures,goldens,reference-sets,benchmarks,evals}/` evidence
|
||||
paths.
|
||||
- RAG/evaluator follow-ups flag retrieval, embedding, ranking, and evaluator
|
||||
changes that lack reference-set comparison, golden trace, benchmark, fixture,
|
||||
or eval-run evidence.
|
||||
- Evaluator/RAG corpus contract mirrors the local prototype scenarios into
|
||||
ECC-Tools fixtures and tests for stale-PR salvage, billing readiness,
|
||||
CI failure diagnosis, harness config quality, AgentShield policy exceptions,
|
||||
skill-quality evidence, deep-analyzer evidence, and RAG/evaluator comparison.
|
||||
- PR review/stale-salvage follow-ups flag review, triage, stale-closure, and
|
||||
pull-request automation changes that lack stale-salvage fixtures,
|
||||
reviewer-thread cases, or reopen-flow reference evidence.
|
||||
- PR analysis comments summarize review follow-up signals for requested
|
||||
changes, unresolved or outdated review threads, and missing approvals.
|
||||
- CI failure-mode predictive follow-ups flag workflow and test-runner changes
|
||||
@@ -233,8 +419,9 @@ Acceptance:
|
||||
- Harness-config quality predictive follow-ups flag MCP, plugin, agent, hook,
|
||||
command, and harness config changes that lack audit, adapter matrix,
|
||||
cross-harness doc, or compatibility regression evidence.
|
||||
- Linear sync design maps findings to issues/status without flooding the
|
||||
workspace.
|
||||
- Linear sync maps deferred backlog findings to Linear issues without flooding
|
||||
GitHub, creates or reuses exact-title Linear issues when configured, and
|
||||
reports skipped sync when credentials or team configuration are absent.
|
||||
- Follow-up generation caps automatic GitHub object creation and keeps overflow
|
||||
findings in a copy-ready project sync backlog.
|
||||
|
||||
@@ -258,7 +445,14 @@ Acceptance:
|
||||
|
||||
## Next Engineering Slices
|
||||
|
||||
1. Decide whether AgentShield PDF export adds value beyond the merged HTML
|
||||
executive report and corpus benchmark output.
|
||||
2. Extend ECC Tools deep analysis and Linear/project sync without flooding the
|
||||
workspace.
|
||||
1. Finish the AgentShield baseline-drift control-plane slice from
|
||||
`docs/architecture/agentshield-enterprise-research-roadmap.md`: PR #63
|
||||
shipped the GitHub Action baseline outputs and job-summary evidence; PR #64
|
||||
shipped first-class baseline snapshot creation through
|
||||
`agentshield baseline write`; the remaining work is evidence-pack routing
|
||||
and ECC-Tools backlog sync integration.
|
||||
2. Enable/configure the merged Linear backlog sync path after workspace issue
|
||||
capacity clears or the Linear workspace is upgraded.
|
||||
3. Use the ECC-Tools evaluator/RAG corpus as the promotion gate before adding
|
||||
hosted retrieval, vector storage, model-backed judging, or automated
|
||||
check-run promotion.
|
||||
|
||||
@@ -136,6 +136,13 @@ Repo work:
|
||||
- `agentshield`: feed prompt-injection and config-risk findings into regression
|
||||
suites.
|
||||
|
||||
Current prototype:
|
||||
|
||||
- `docs/architecture/evaluator-rag-prototype.md` defines the read-only
|
||||
evaluator/RAG artifact contract.
|
||||
- `examples/evaluator-rag-prototype/` records the first scenario spec, trace,
|
||||
report, candidate playbook, and verifier result for stale-PR salvage.
|
||||
|
||||
Verification:
|
||||
|
||||
- read-only prototype that emits a trace, report, candidate playbook, and
|
||||
|
||||
329
docs/architecture/agentshield-enterprise-research-roadmap.md
Normal file
329
docs/architecture/agentshield-enterprise-research-roadmap.md
Normal file
@@ -0,0 +1,329 @@
|
||||
# AgentShield Enterprise Research Roadmap
|
||||
|
||||
Generated: 2026-05-12
|
||||
|
||||
This is a planning artifact for the next AgentShield enterprise iteration. It
|
||||
does not modify AgentShield code. The goal is to turn the current scanner,
|
||||
policy gate, corpus, and reporting surface into a security control plane for
|
||||
teams running AI coding agents across multiple harnesses.
|
||||
|
||||
## Evidence Reviewed
|
||||
|
||||
Current AgentShield repository state:
|
||||
|
||||
- AgentShield checkout on clean `main`.
|
||||
- `README.md`, `API.md`, `package.json`, `.github/workflows/*`, and
|
||||
`src/`/`tests/` module layout.
|
||||
- Current supported user surfaces: `agentshield scan`, `agentshield init`,
|
||||
`agentshield miniclaw start`, scanner JSON, MiniClaw API, GitHub Action,
|
||||
HTML, SARIF, markdown, terminal, and JSON reports.
|
||||
- Current enterprise-like surfaces: policy packs, GitHub Action policy
|
||||
enforcement, SARIF policy violations, supply-chain provenance, corpus
|
||||
benchmark, HTML executive reports, and exception lifecycle audit.
|
||||
|
||||
External references checked from official GitHub repos or README sources:
|
||||
|
||||
- [stablyai/orca](https://github.com/stablyai/orca): multi-agent IDE,
|
||||
worktree isolation, live agent status, GitHub integration, diff review, and
|
||||
notifications.
|
||||
- [superset-sh/superset](https://github.com/superset-sh/superset): AI-agent
|
||||
editor with worktree orchestration, built-in diff review, workspace presets,
|
||||
and universal CLI-agent compatibility.
|
||||
- [standardagents/dmux](https://github.com/standardagents/dmux): tmux/worktree
|
||||
multiplexer with lifecycle hooks, multi-agent launches, pane visibility, and
|
||||
merge/PR workflows.
|
||||
- [jarrodwatts/claude-hud](https://github.com/jarrodwatts/claude-hud): Claude
|
||||
Code statusline, context health, tool activity, agent tracking, todo
|
||||
progress, transcript parsing, and usage telemetry.
|
||||
- [stanford-iris-lab/meta-harness](https://github.com/stanford-iris-lab/meta-harness):
|
||||
harness optimization through repeatable tasks, logged proposer interactions,
|
||||
and evaluated scaffold changes.
|
||||
- [greyhaven-ai/autocontext](https://github.com/greyhaven-ai/autocontext):
|
||||
recursive improvement loop with traces, scored generations, playbooks,
|
||||
persisted knowledge, scenario evaluation, and optional production traces.
|
||||
- [NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent):
|
||||
self-improving skills, memory, session search, multi-platform gateway,
|
||||
scheduled automation, terminal backends, and trajectory generation.
|
||||
- [anthropics/claude-code](https://github.com/anthropics/claude-code):
|
||||
terminal, IDE, GitHub, plugin, permission, MCP, and data-retention surfaces.
|
||||
- [anomalyco/opencode](https://github.com/anomalyco/opencode): provider-agnostic
|
||||
open-source coding agent with build/plan agents, desktop beta,
|
||||
client/server architecture, and LSP support.
|
||||
- [opencode-ai/opencode](https://github.com/opencode-ai/opencode): earlier
|
||||
archived Go-based terminal agent with sessions, providers, LSP, file change
|
||||
tracking, custom commands, and auto-compact.
|
||||
- [zed-industries/zed](https://github.com/zed-industries/zed): high-performance
|
||||
multiplayer editor with strict license/compliance CI expectations.
|
||||
- [aidenybai/ghast](https://github.com/aidenybai/ghast): native terminal
|
||||
multiplexer built around Ghostty, workspace grouping, split panes, drag/drop,
|
||||
notifications, and terminal search.
|
||||
|
||||
Local Claude Code source inspection:
|
||||
|
||||
- Reviewed only non-secret local file/module shape from a private Claude Code
|
||||
source snapshot.
|
||||
- Relevant surfaces observed: `tools/`, `utils/permissions/`, `utils/mcp/`,
|
||||
`utils/hooks/`, `utils/plugins/`, `types/permissions.ts`,
|
||||
`types/plugin.ts`, `remote/`, `tasks/`, `assistant/sessionHistory.ts`,
|
||||
and session/history utilities.
|
||||
- No code was copied. The takeaway is that AgentShield should track permissions,
|
||||
plugins, MCP, hooks, remote sessions, task/subagent activity, and history as
|
||||
first-class audit domains rather than treating a `.claude/` tree as the only
|
||||
source of truth.
|
||||
|
||||
## Current AgentShield Position
|
||||
|
||||
AgentShield is already more than a static lint tool:
|
||||
|
||||
- Rule coverage spans secrets, permissions, hooks, MCP servers, agent configs,
|
||||
prompt injection, supply chain, taint analysis, sandbox execution, policy
|
||||
evaluation, runtime repair/status, corpus validation, MiniClaw, and Opus
|
||||
analysis.
|
||||
- Reports are usable by humans and machines: terminal, JSON, markdown, HTML,
|
||||
SARIF, scan logs, and GitHub Action outputs.
|
||||
- Enterprise hooks exist: policy packs, exception metadata, expiring/expired
|
||||
exception reporting, SARIF code scanning, and job-summary output.
|
||||
- Accuracy work is active: `runtimeConfidence`, template/example weighting,
|
||||
docs-example downgrades, hook-manifest resolution, false-positive audit
|
||||
guidance, and corpus readiness.
|
||||
|
||||
The next iteration should not be "add more regex rules" by default. The higher
|
||||
leverage move is to make AgentShield remember, compare, route, and enforce
|
||||
security posture across time, repos, teams, and harnesses.
|
||||
|
||||
## Enterprise Gaps
|
||||
|
||||
### 1. Organization Baselines And Drift
|
||||
|
||||
Enterprise buyers need to know whether a repo, team, or agent fleet is getting
|
||||
safer or riskier over time. AgentShield has scan logs and baseline comparison
|
||||
modules, and PR #63 now exposes that drift through GitHub Action inputs,
|
||||
outputs, annotations, and job-summary evidence. PR #64 adds first-class
|
||||
baseline snapshot creation through `agentshield baseline write`. The remaining
|
||||
product surface should make CLI drift summaries, evidence packs, and
|
||||
owner-ready deltas explicit.
|
||||
|
||||
Target capability:
|
||||
|
||||
- `agentshield baseline write --path .claude --output agentshield-baseline.json`
|
||||
- `agentshield scan --baseline agentshield-baseline.json`
|
||||
- Report sections for new, fixed, unchanged, suppressed, and policy-excepted
|
||||
findings.
|
||||
- GitHub Action output that posts "security posture changed" rather than only a
|
||||
point-in-time grade.
|
||||
|
||||
### 2. Multi-Harness Security Adapters
|
||||
|
||||
The market is moving toward many parallel agent harnesses, not one tool. Orca,
|
||||
Superset, dmux, OpenCode, Claude Code, Codex, Gemini, Zed, and terminal
|
||||
multiplexers all create different security surfaces.
|
||||
|
||||
Target capability:
|
||||
|
||||
- A small adapter registry for `claude-code`, `opencode`, `codex`, `gemini`,
|
||||
`zed`, `dmux`, `orca`, `superset`, and `generic-terminal`.
|
||||
- Each adapter declares config paths, permission concepts, plugin surfaces,
|
||||
MCP/tooling conventions, history/session surfaces, and CI evidence.
|
||||
- Report output groups findings by harness and confidence, so template/docs
|
||||
findings do not look like active runtime exposure.
|
||||
|
||||
### 3. Session And Worktree Awareness
|
||||
|
||||
Worktree-native orchestrators change the risk model. A team can run many agents
|
||||
in parallel, each with its own branch, shell, MCP config, and local state.
|
||||
|
||||
Target capability:
|
||||
|
||||
- Optional scan metadata for branch, worktree path, agent name, session id,
|
||||
provider, and orchestrator.
|
||||
- A scan-history table that answers: which worktree introduced a new permission,
|
||||
which agent run added a risky MCP, which branch relaxed policy, and whether
|
||||
the final merged branch fixed it.
|
||||
- A compact "security HUD" summary usable by statuslines, GitHub checks, and
|
||||
local dashboards.
|
||||
|
||||
### 4. Evidence Packs For Buyers And Auditors
|
||||
|
||||
HTML reports are the right buyer-facing artifact today; native PDF is deferred.
|
||||
The deeper need is a portable evidence bundle that can be attached to audits,
|
||||
security reviews, and customer questionnaires.
|
||||
|
||||
Target capability:
|
||||
|
||||
- `agentshield scan --evidence-pack out/agentshield-evidence`
|
||||
- Bundle includes JSON report, HTML report, SARIF, policy evaluation,
|
||||
exception audit, baseline diff, dependency/provenance summary, and a short
|
||||
README explaining how to interpret the artifacts.
|
||||
- Optional redaction mode for secrets, local paths, usernames, and project names.
|
||||
|
||||
### 5. Regression Corpus And Reference Sets
|
||||
|
||||
Meta-Harness and Autocontext point to the same lesson: improvements need scored
|
||||
scenarios, traces, and playbooks. AgentShield already has a corpus benchmark,
|
||||
but enterprise trust needs a curated reference set for false positives,
|
||||
false negatives, and policy regressions.
|
||||
|
||||
Target capability:
|
||||
|
||||
- Versioned scenario fixtures for critical rules, false-positive suppressions,
|
||||
policy exceptions, template/docs examples, plugin manifests, and hook-code
|
||||
resolution.
|
||||
- Per-category precision/coverage reporting, not just aggregate readiness.
|
||||
- A "no accuracy regression" gate that must pass before releases.
|
||||
- Playbook notes for why a suppression exists and when it should expire.
|
||||
|
||||
### 6. Remediation Workflow
|
||||
|
||||
Security tools become enterprise-grade when they turn findings into accountable
|
||||
work without flooding maintainers.
|
||||
|
||||
Target capability:
|
||||
|
||||
- One-click or CLI-generated remediation branch for safe transforms.
|
||||
- Policy comments that group findings by owner and risk rather than by file
|
||||
order.
|
||||
- GitHub App support for check-run annotations, issue caps, Linear sync, and
|
||||
deferred backlog export.
|
||||
- Finding fingerprints that avoid duplicate issues across repeated scans.
|
||||
|
||||
### 7. Threat Intelligence And Package Reputation
|
||||
|
||||
Agent security depends on MCP packages, plugin repositories, action bundles,
|
||||
and rapidly changing CLI ecosystems. Static checks need a maintained external
|
||||
reputation layer.
|
||||
|
||||
Target capability:
|
||||
|
||||
- A local-first threat-intel cache for known MCP/package risks, CVEs, malware
|
||||
package names, suspicious install scripts, mutable git dependencies, and
|
||||
known-good packages.
|
||||
- Offline deterministic mode remains available.
|
||||
- Online enrichment is opt-in and produces clear provenance for every external
|
||||
claim.
|
||||
|
||||
### 8. Commercial And Team Controls
|
||||
|
||||
AgentShield is already connected conceptually to the ECC Tools GitHub App.
|
||||
Native GitHub payments make the product path more concrete: free local scans,
|
||||
paid org policy gates, paid evidence bundles, and paid drift/history.
|
||||
|
||||
Target capability:
|
||||
|
||||
- Tier-aware GitHub App checks: free static scan, paid org policy enforcement,
|
||||
paid evidence packs, paid historical drift, and paid deep analysis.
|
||||
- Seat/team mapping for policy owners and exception approvers.
|
||||
- Billing readiness checks shared with ECC-Tools so payment state never changes
|
||||
enforcement behavior silently.
|
||||
|
||||
## Recommended Build Order
|
||||
|
||||
### Slice 1: Baseline Drift MVP
|
||||
|
||||
Implement the smallest enterprise control-plane primitive: compare this scan to
|
||||
the last accepted baseline.
|
||||
|
||||
Artifacts:
|
||||
|
||||
- Baseline JSON schema.
|
||||
- Baseline writer and comparator.
|
||||
- Terminal and JSON report sections for new/fixed/unchanged findings.
|
||||
- Tests covering stable fingerprints, fixed findings, new findings, and policy
|
||||
exception carry-forward.
|
||||
|
||||
Why first:
|
||||
|
||||
- It reuses existing scan output.
|
||||
- It improves CLI, GitHub Action, and GitHub App value at once.
|
||||
- It does not require a hosted service.
|
||||
|
||||
### Slice 2: Evidence Pack Bundle
|
||||
|
||||
Bundle the existing machine and human reports into a portable audit artifact.
|
||||
|
||||
Artifacts:
|
||||
|
||||
- `--evidence-pack <dir>` CLI flag.
|
||||
- Redacted bundle README.
|
||||
- HTML, JSON, SARIF, policy, exception, and baseline diff files.
|
||||
- Tests for file layout, redaction, and deterministic output names.
|
||||
|
||||
Why second:
|
||||
|
||||
- It converts existing reporting work into buyer-ready proof.
|
||||
- It keeps native PDF deferred while still meeting audit handoff needs.
|
||||
|
||||
### Slice 3: Harness Adapter Registry
|
||||
|
||||
Make harness support explicit instead of implicit.
|
||||
|
||||
Artifacts:
|
||||
|
||||
- Adapter metadata for Claude Code, OpenCode, Codex, Gemini, dmux, generic
|
||||
terminal, and project-local templates.
|
||||
- Discovery output that reports which adapters matched and why.
|
||||
- Report grouping by adapter.
|
||||
- Tests using fixture directories for each adapter.
|
||||
|
||||
Why third:
|
||||
|
||||
- It aligns AgentShield with ECC's harness-agnostic positioning.
|
||||
- It creates a stable surface for future Zed, Orca, Superset, and Hermes
|
||||
integration without pretending all harnesses share Claude's config model.
|
||||
|
||||
### Slice 4: Corpus Accuracy Gate
|
||||
|
||||
Promote the corpus from a benchmark into a release gate.
|
||||
|
||||
Artifacts:
|
||||
|
||||
- Per-category corpus report.
|
||||
- Required category thresholds.
|
||||
- Regression snapshots for known false-positive suppressions.
|
||||
- Release checklist entry requiring corpus readiness before publish.
|
||||
|
||||
Why fourth:
|
||||
|
||||
- It prevents enterprise credibility from degrading as rules expand.
|
||||
- It creates a durable route for Meta-Harness/Autocontext-style improvement
|
||||
loops later.
|
||||
|
||||
### Slice 5: GitHub App And Linear Sync Wiring
|
||||
|
||||
Connect AgentShield findings to ECC-Tools follow-up routing.
|
||||
|
||||
Artifacts:
|
||||
|
||||
- Finding fingerprints compatible with ECC-Tools issue caps.
|
||||
- Linear-ready backlog export for baseline drift and policy violations.
|
||||
- Check-run annotations grouped by owner/risk.
|
||||
- Tests that ensure repeated scans do not spam duplicate issues.
|
||||
|
||||
Why fifth:
|
||||
|
||||
- It needs the baseline/fingerprint work from Slice 1.
|
||||
- It is the bridge from local CLI to paid team workflow.
|
||||
|
||||
## Non-Goals For This Iteration
|
||||
|
||||
- Native PDF generation, unless buyer/compliance workflows explicitly require
|
||||
generated PDF instead of HTML plus print-to-PDF.
|
||||
- Hosted dashboards before the local baseline/evidence/fingerprint contracts are
|
||||
stable.
|
||||
- Fine-tuning or model training before deterministic corpus gates and reference
|
||||
traces exist.
|
||||
- Broad automated code rewrites for risky findings without explicit,
|
||||
reviewable transforms and tests.
|
||||
|
||||
## Acceptance Gates
|
||||
|
||||
The AgentShield enterprise iteration is not complete until these are true:
|
||||
|
||||
- Local `npm run typecheck`, `npm run lint`, `npm test`, and `npm run build`
|
||||
pass from the AgentShield repository root.
|
||||
- Built CLI smoke tests cover the new flags or report modes.
|
||||
- GitHub Action self-test covers the new CI-visible output.
|
||||
- Documentation names the free/local path and the paid/team path separately.
|
||||
- Evidence produced by the feature is deterministic enough for CI diffing.
|
||||
- ECC-Tools can consume the finding fingerprints or backlog export without
|
||||
exceeding GitHub/Linear object caps.
|
||||
- The GA roadmap and Linear project status link to the merged AgentShield PRs.
|
||||
158
docs/architecture/evaluator-rag-prototype.md
Normal file
158
docs/architecture/evaluator-rag-prototype.md
Normal file
@@ -0,0 +1,158 @@
|
||||
# Evaluator RAG Prototype
|
||||
|
||||
ECC 2.0 needs a self-improving harness loop that can learn from real work
|
||||
without blindly mutating a user's Claude, Codex, OpenCode, dmux, Zed, or
|
||||
terminal setup. This prototype defines the smallest read-only artifact set for
|
||||
that loop.
|
||||
|
||||
The fixture set lives in
|
||||
[`examples/evaluator-rag-prototype/`](../../examples/evaluator-rag-prototype/).
|
||||
It started with the May 2026 stale-PR cleanup and salvage lane because that
|
||||
lane has real inputs, real accepted work, and real rejected work. The corpus now
|
||||
also includes a billing/Marketplace readiness scenario so launch copy cannot
|
||||
treat dry-run release evidence or roadmap intent as live billing state. A
|
||||
CI-failure diagnosis scenario adds the log-first workflow needed before an
|
||||
agent proposes fixes for red checks. A harness-config quality scenario keeps
|
||||
MCP, plugin, hook, command, agent, and adapter recommendations tied to the
|
||||
adapter matrix before they mutate setup guidance. An AgentShield policy
|
||||
exception scenario gates security exceptions on SARIF/report evidence, owner
|
||||
fields, expiry state, and remediation-versus-exception decisions. A
|
||||
skill-quality evidence scenario requires observed failure or feedback evidence,
|
||||
working examples, reference-set gaps, and validation commands before a skill
|
||||
amendment can be promoted. A deep-analyzer evidence scenario requires analyzer
|
||||
corpus cases, expected-output comparisons, and risk-taxonomy proof before
|
||||
repository or commit-analysis behavior can change.
|
||||
|
||||
## Reference Pressure
|
||||
|
||||
- Meta-Harness: treat the harness itself as an experiment with scenario specs,
|
||||
verifier results, and promoted playbooks.
|
||||
- Autocontext: store traces, reports, artifacts, and reusable improvements
|
||||
before changing installed agent assets.
|
||||
- Claude HUD: expose context, tools, todos, agent activity, checks, and risk so
|
||||
an evaluator can judge a run after the fact.
|
||||
- Hermes Agent: keep skills, memories, scheduler-like follow-ups, and terminal
|
||||
gateway behavior explicit instead of hiding local commands.
|
||||
- dmux, Orca, Superset, and Ghast: preserve worktree/session state so parallel
|
||||
agent work can be compared, resumed, or closed cleanly.
|
||||
- ECC Tools: route evaluator findings into PR comments, check runs, and Linear
|
||||
backlog items without flooding GitHub.
|
||||
|
||||
## Artifact Contract
|
||||
|
||||
Every evaluator/RAG run is read-only until a verifier promotes a playbook.
|
||||
|
||||
| Artifact | Purpose | Fixture |
|
||||
| --- | --- | --- |
|
||||
| Scenario spec | Declares the objective, allowed evidence, forbidden actions, and pass/fail gates. | `scenario.json` |
|
||||
| Trace | Captures observation, retrieval, proposal, verification, and promotion events. | `trace.json` |
|
||||
| Report | Summarizes scores, evidence coverage, risks, and recommended next action. | `report.json` |
|
||||
| Candidate playbook | Describes the maintainer-owned workflow that could be reused later. | `candidate-playbook.md` |
|
||||
| Verifier result | Accepts or rejects candidates with concrete reasons and rollback notes. | `verifier-result.json` |
|
||||
|
||||
The prototype deliberately separates retrieval from action. A run can retrieve
|
||||
closed PR diffs, Linear status, CI history, and local docs, but it cannot close,
|
||||
merge, publish, tag, or rewrite configs as part of the evaluator pass.
|
||||
|
||||
## Phase Model
|
||||
|
||||
1. Observe the current queue, dirty worktrees, branch state, open PRs/issues,
|
||||
discussions, CI state, and release gates.
|
||||
2. Retrieve relevant reference evidence: stale-salvage ledger rows, prior
|
||||
maintainer PRs, current docs, analyzer findings, CI failures, and harness
|
||||
adapter rules.
|
||||
3. Propose one or more playbooks with source attribution and expected
|
||||
validation gates.
|
||||
4. Verify each playbook against explicit acceptance and rejection rules.
|
||||
5. Promote only the candidate that improves the scenario without widening blast
|
||||
radius.
|
||||
6. Record rollback guidance and unresolved manual-review tails.
|
||||
|
||||
## First Scenario
|
||||
|
||||
The first scenario is `stale-pr-salvage-maintainer-branch`.
|
||||
|
||||
It models the rule Affaan set during the May 2026 cleanup: stale closure is
|
||||
queue hygiene, not loss of useful work. Useful closed PR work should be ported
|
||||
into maintainer-owned PRs with attribution/backlinks, while generated churn,
|
||||
bulk localization, and ambiguous translator work stay out of blind
|
||||
cherry-picks.
|
||||
|
||||
The verifier accepts a maintainer salvage branch that:
|
||||
|
||||
- credits source PRs;
|
||||
- avoids raw private context and personal paths;
|
||||
- does not import stale bulk localization without translator review;
|
||||
- records a durable ledger update;
|
||||
- runs the same validation gates as a normal code, docs, or catalog change;
|
||||
- leaves release publication actions approval-gated.
|
||||
|
||||
The verifier rejects a blind cherry-pick proposal that:
|
||||
|
||||
- imports stale translation/doc churn wholesale;
|
||||
- skips the current catalog/install architecture;
|
||||
- lacks attribution;
|
||||
- lacks tests or ledger updates;
|
||||
- mutates release or plugin publication state.
|
||||
|
||||
## Corpus Fixtures
|
||||
|
||||
The root fixture files preserve the original
|
||||
`stale-pr-salvage-maintainer-branch` prototype. Additional scenarios can live in
|
||||
subdirectories when they reuse the same five-artifact contract.
|
||||
|
||||
Current corpus:
|
||||
|
||||
- `stale-pr-salvage-maintainer-branch`: recovers useful closed PR work through
|
||||
maintainer-owned branches with attribution and validation.
|
||||
- `billing-marketplace-readiness`: verifies billing, App, and Marketplace
|
||||
launch claims before public copy says they are live.
|
||||
- `ci-failure-diagnosis`: requires failed-job logs, changed-file scope, and a
|
||||
named regression command before a CI fix playbook can be promoted.
|
||||
- `harness-config-quality`: requires adapter state, install/onramp path,
|
||||
verification commands, risk notes, and config-preservation behavior before a
|
||||
harness setup recommendation can be promoted.
|
||||
- `agentshield-policy-exception`: requires AgentShield SARIF or report
|
||||
evidence, policy-pack source, owner/ticket/scope/expiry fields, and expired
|
||||
exception enforcement before a policy exception can be promoted.
|
||||
- `skill-quality-evidence`: requires focused skill scope, observed failure or
|
||||
user-feedback evidence, examples/reference-set coverage, validation commands,
|
||||
and publication safety before a skill amendment can be promoted.
|
||||
- `deep-analyzer-evidence`: requires maintained analyzer corpus cases,
|
||||
expected-output comparisons, representative repository/commit histories, and
|
||||
regression commands before deep-analysis behavior can be promoted.
|
||||
|
||||
## ECC Tools Mapping
|
||||
|
||||
ECC Tools already flags missing RAG/evaluator evidence for retrieval,
|
||||
embedding, ranking, and evaluator changes. This prototype gives those checks a
|
||||
target shape:
|
||||
|
||||
- `scenario.json` maps to analyzer corpus inputs.
|
||||
- `trace.json` maps to golden traces and run telemetry.
|
||||
- `report.json` maps to PR comment summaries and Linear backlog summaries.
|
||||
- `candidate-playbook.md` maps to the suggested follow-up PR body.
|
||||
- `verifier-result.json` maps to pass/fail check-run evidence.
|
||||
|
||||
Future ECC Tools work should consume these artifacts as fixture shape before it
|
||||
adds hosted retrieval or model-backed judging. The local prototype is enough to
|
||||
prove the contract before any paid API or vector store is introduced.
|
||||
|
||||
## Promotion Rules
|
||||
|
||||
A candidate can be promoted only when:
|
||||
|
||||
- the verifier result is `accepted`;
|
||||
- at least one rejected candidate proves the verifier can say no;
|
||||
- every source PR or reference artifact has attribution;
|
||||
- the proposed action is maintainer-owned and reversible;
|
||||
- validation commands are named;
|
||||
- unresolved translator, release, billing, or publication items remain blocked
|
||||
until separately approved.
|
||||
|
||||
## Next Expansion
|
||||
|
||||
The local evaluator/RAG corpus now covers the current evidence buckets. Future
|
||||
work should consume these fixtures from ECC Tools before adding hosted
|
||||
retrieval, vector storage, model-backed judging, or automated check-run
|
||||
promotion.
|
||||
80
docs/architecture/hud-status-session-control.md
Normal file
80
docs/architecture/hud-status-session-control.md
Normal file
@@ -0,0 +1,80 @@
|
||||
# HUD Status And Session Control Contract
|
||||
|
||||
This contract defines the portable status payload ECC uses for local operator
|
||||
surfaces, handoffs, and future HUDs. It is intentionally harness-neutral: a
|
||||
Claude Code statusline, Codex pane, dmux session, OpenCode run, or terminal-only
|
||||
workflow can emit partial data without changing field names.
|
||||
|
||||
The canonical example lives at
|
||||
[`examples/hud-status-contract.json`](../../examples/hud-status-contract.json).
|
||||
|
||||
## Payload Shape
|
||||
|
||||
Every status payload uses `schema_version: "ecc.hud-status.v1"` and keeps these
|
||||
top-level sections stable:
|
||||
|
||||
| Field | Purpose | Primary Source |
|
||||
|---|---|---|
|
||||
| `context` | Model, harness, repo, branch, worktree, session id, and context-window pressure | statusline stdin, git, session adapters |
|
||||
| `toolCalls` | Recent tool counts, pending calls, stale calls, and last tool event | `loop-status`, `tool-usage.jsonl`, hook bridge |
|
||||
| `activeAgents` | Current workers/subagents, runtime state, branch, worktree, objective, and handoff paths | dmux/orchestration snapshots |
|
||||
| `todos` | Current in-progress task and todo counts | Claude todos, local task files, plan metadata |
|
||||
| `checks` | Local and remote validation status with command/check URLs when available | CI, local commands, release gates |
|
||||
| `cost` | Session spend, token counts, budget, and trend | cost tracker, metrics bridge |
|
||||
| `risk` | Attention state, conflict pressure, stale calls, dirty worktree, and manual-review flags | readiness gates, git, queue state |
|
||||
| `queueState` | GitHub PR/issue/discussion counts, conflict queue, merge queue, and stale-salvage queue | GitHub sync, work items |
|
||||
| `sessionControls` | Supported operator actions for the current target | ECC CLI, dmux, git/GitHub |
|
||||
| `sync` | Linear, GitHub, and handoff publication state | status updates, work items, handoff writer |
|
||||
|
||||
Fields can be `null`, empty arrays, or `"unknown"` when a harness cannot expose
|
||||
the signal. Producers should not invent incompatible names. Consumers should
|
||||
render missing sections as unavailable, not as green.
|
||||
|
||||
## Session Controls
|
||||
|
||||
The minimum session-control vocabulary is:
|
||||
|
||||
| Control | Meaning |
|
||||
|---|---|
|
||||
| `create` | Start a new isolated run, worktree, or orchestration plan |
|
||||
| `resume` | Reattach to an existing session or historical target |
|
||||
| `status` | Emit the current payload without mutating state |
|
||||
| `stop` | Request a graceful stop or mark the session completed |
|
||||
| `diff` | Show current working-tree or worker diff |
|
||||
| `pr` | Open or inspect the linked pull request |
|
||||
| `mergeQueue` | Show merge-ready, blocked, and waiting-check items |
|
||||
| `conflictQueue` | Show dirty/conflicting PRs or worktrees needing integration |
|
||||
|
||||
`sessionControls.supported` lists the controls available for the current
|
||||
harness. `sessionControls.blocked` explains unavailable controls, for example a
|
||||
missing GitHub token, no tmux session, or a read-only adapter.
|
||||
|
||||
## Sync Contract
|
||||
|
||||
The sync section separates durable trackers:
|
||||
|
||||
- `Linear` records project status update id, health, and whether issue creation
|
||||
is blocked by workspace capacity.
|
||||
- `GitHub` records the current repo, PR/issue/discussion queue counts, and the
|
||||
latest merged or open PR tied to the session.
|
||||
- `handoff` records the durable Markdown handoff path and whether it has been
|
||||
written after the latest batch.
|
||||
|
||||
This makes real-time progress tracking explicit without requiring every run to
|
||||
create Linear issues or GitHub comments. When Linear issue capacity is blocked,
|
||||
the status payload can still prove progress through project updates and repo
|
||||
handoffs.
|
||||
|
||||
## Current Implementations
|
||||
|
||||
- `ecc status --json` exposes readiness, active sessions, skill runs, install
|
||||
health, governance, and linked work items from the SQLite state store.
|
||||
- `ecc loop-status --json --write-dir <dir>` writes live transcript snapshots
|
||||
and attention signals for long-running loops.
|
||||
- `ecc session-inspect <target> --write <path>` emits canonical session
|
||||
snapshots from dmux and Claude-history adapters.
|
||||
- `scripts/hooks/ecc-statusline.js` renders compact model, task, cost, tool,
|
||||
file, duration, directory, and context pressure signals inside Claude Code.
|
||||
|
||||
The `ecc.hud-status.v1` payload is the common outer contract these surfaces can
|
||||
project into before ECC grows a dedicated full-screen HUD.
|
||||
@@ -19,6 +19,10 @@ operator needs.
|
||||
|
||||
- Live status: `scripts/loop-status.js` can emit JSON, watch active loops, and
|
||||
write snapshots for dashboards or handoffs.
|
||||
- HUD/status contract: `docs/architecture/hud-status-session-control.md` and
|
||||
`examples/hud-status-contract.json` define the portable payload for context,
|
||||
tool calls, active agents, todos, checks, cost, risk, queues, session
|
||||
controls, and tracker sync.
|
||||
- Session traces: `scripts/session-inspect.js` can inspect Claude, dmux, and
|
||||
adapter-backed sessions, then write canonical snapshots.
|
||||
- Harness baseline: `scripts/harness-audit.js` provides a repeatable scorecard
|
||||
@@ -56,9 +60,11 @@ later, but only after the local event model is useful enough to trust.
|
||||
scorecard.
|
||||
3. Run `node scripts/loop-status.js --json --write-dir .ecc/loop-status`
|
||||
during longer autonomous batches.
|
||||
4. Run `node scripts/session-inspect.js --list-adapters` to confirm which
|
||||
4. Review `examples/hud-status-contract.json` before wiring a new HUD or
|
||||
operator dashboard.
|
||||
5. Run `node scripts/session-inspect.js --list-adapters` to confirm which
|
||||
session surfaces are available.
|
||||
5. Use ECC2 tool logs for risky operations, conflict analysis, and handoff
|
||||
6. Use ECC2 tool logs for risky operations, conflict analysis, and handoff
|
||||
review before increasing autonomy.
|
||||
|
||||
The end-state is practical: before asking ECC to run larger multi-agent loops,
|
||||
|
||||
119
docs/releases/2.0.0-rc.1/naming-and-publication-matrix.md
Normal file
119
docs/releases/2.0.0-rc.1/naming-and-publication-matrix.md
Normal file
@@ -0,0 +1,119 @@
|
||||
# ECC v2.0.0-rc.1 Naming And Publication Matrix
|
||||
|
||||
Snapshot date: 2026-05-12.
|
||||
|
||||
This matrix answers the release question "ship as Everything Claude Code, ECC,
|
||||
or a renamed surface?" for the rc.1 lane. It is evidence for planning, not a
|
||||
publication action.
|
||||
|
||||
## Decision
|
||||
|
||||
For `v2.0.0-rc.1`, keep the public identity as **Everything Claude Code (ECC)**.
|
||||
Use **ECC** as the short product name in copy, plugin slugs, status surfaces,
|
||||
and diagrams, but do not rename the GitHub repo, npm package, or package entry
|
||||
points before the rc.1 release.
|
||||
|
||||
Reason:
|
||||
|
||||
- the current install surface already works as `ecc-universal` plus the `ecc`
|
||||
plugin slug;
|
||||
- the exact npm package name `ecc` is already occupied by an unrelated elliptic
|
||||
curve cryptography package;
|
||||
- the repo name `affaan-m/ecc` is not present, but renaming
|
||||
`affaan-m/everything-claude-code` before rc.1 would create avoidable URL,
|
||||
package, docs, and marketplace churn;
|
||||
- Claude and Codex plugin surfaces are already short enough as `ecc`;
|
||||
- rc.1 should prove the release, plugin, and publication pipeline before any
|
||||
broader brand migration.
|
||||
|
||||
## Current Values
|
||||
|
||||
| Surface | Current value | Evidence command | 2026-05-12 result | Release decision |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| Product display name | `Everything Claude Code` | `rg -n "Everything Claude Code" README.md CHANGELOG.md docs/releases/2.0.0-rc.1` | Present across README, release notes, launch copy, and plugin manifests | Keep for rc.1 |
|
||||
| Short name | `ECC` | README/release docs | Used as the short cross-harness brand | Keep and prefer in tight copy |
|
||||
| GitHub repo | `affaan-m/everything-claude-code` | `git remote get-url origin` | `https://github.com/affaan-m/everything-claude-code.git` | Keep for rc.1 |
|
||||
| Possible short repo | `affaan-m/ecc` | `gh repo view affaan-m/ecc` | Not found with current auth | Candidate after rc.1 only |
|
||||
| npm package | `ecc-universal` | `node -p "require('./package.json').name"` | `ecc-universal` | Keep for rc.1 |
|
||||
| npm package version | `2.0.0-rc.1` local, `1.10.0` registry latest | `node -p "require('./package.json').version"` and `npm view ecc-universal name version dist-tags --json` | Local rc.1 is ready; registry latest remains `1.10.0` | Publish rc as `next`, not `latest` |
|
||||
| Exact npm short name | `ecc` | `npm view ecc name version description repository.url --json` | Occupied by `ecc@0.0.2`, "Elliptic curve cryptography functions." | Do not use |
|
||||
| Scoped npm short name | `@affaan-m/ecc` | `npm view @affaan-m/ecc name version --json` | Registry 404 | Possible future scoped package if npm scope policy permits |
|
||||
| Former package name | `everything-claude-code` | `npm view everything-claude-code name version dist-tags --json` | Registry reports unpublished on 2026-02-07 | Do not revive for rc.1 |
|
||||
| Claude plugin slug | `ecc` | `node -p "require('./.claude-plugin/plugin.json').name"` | `ecc` | Keep |
|
||||
| Claude plugin version | `2.0.0-rc.1` | `claude plugin validate .claude-plugin/plugin.json` | Validation passed on Claude Code `2.1.121` | Ready for release-tag gate |
|
||||
| Claude marketplace entry | `ecc` | `.claude-plugin/marketplace.json` | Version and repo point at current rc.1 surface | Keep |
|
||||
| Codex plugin slug | `ecc` | `node -p "require('./.codex-plugin/plugin.json').name"` | `ecc` | Keep |
|
||||
| Codex plugin version | `2.0.0-rc.1` | `node tests/docs/ecc2-release-surface.test.js` | Release surface test passed | Ready for Codex marketplace/manual marketplace gate |
|
||||
| OpenCode package | `ecc-universal` | `node -p "require('./.opencode/package.json').name"` | `ecc-universal` | Keep |
|
||||
| OpenCode build | Generated package output | `npm run build:opencode` | Passed | Ready for package dry-run gate |
|
||||
| npm pack surface | Reduced runtime package | `npm pack --dry-run --json` | Produced `ecc-universal-2.0.0-rc.1.tgz`, 969 entries, about 5.0 MB unpacked | Needs final release-commit rerun |
|
||||
|
||||
## Publication Paths
|
||||
|
||||
| Path | Current evidence | Required next action | Blocker |
|
||||
| --- | --- | --- | --- |
|
||||
| GitHub release | `docs/releases/2.0.0-rc.1/` and release notes are in-tree | Re-run required command evidence from the final release commit, then create/verify `v2.0.0-rc.1` prerelease | No tag/release yet |
|
||||
| npm | `ecc-universal` local package version is `2.0.0-rc.1`; registry latest is `1.10.0` | Publish rc with `npm publish --tag next` after final `npm pack --dry-run` and release tests | Do not publish before final release commit |
|
||||
| Claude plugin | `claude plugin validate .claude-plugin/plugin.json` passed; `claude plugin tag --help` confirms the release tag flow creates `{name}--v{version}` tags and can push them | Run `claude plugin tag .claude-plugin --dry-run` from the clean release commit, then tag/push only after release approval | No plugin release tag created in this pass |
|
||||
| Claude marketplace | `.claude-plugin/marketplace.json` points at `ecc` and the public repo | Verify marketplace update/install path after tag exists | External marketplace propagation not verified |
|
||||
| Codex plugin | `codex plugin marketplace` supports add/upgrade/remove; `.codex-plugin/plugin.json` is present and release-surface tests pass | Confirm marketplace source format, then test add/upgrade from the public repo or marketplace source | No public Codex marketplace submission path verified in this pass |
|
||||
| OpenCode package | `.opencode/package.json` builds from source and ships inside npm package | Re-run `npm run build:opencode` and package dry-run from release commit | OpenCode CLI 1.2.21 does not expose a separate plugin publication command in this pass |
|
||||
| ECC Tools billing claim | README and launch copy mention ECC Tools / marketplace context | Verify live GitHub App billing and plan state before any payment announcement | Billing dashboard/API evidence not recorded in this pass |
|
||||
| Social and longform copy | X thread, LinkedIn copy, article outline, GitHub release copy exist | Replace any stale URLs, then publish only after release/npm/plugin URLs work | Public URLs not final until release actions complete |
|
||||
|
||||
## Rename After rc.1
|
||||
|
||||
If the project moves from "Everything Claude Code" toward "ECC" after rc.1,
|
||||
do it as a staged migration:
|
||||
|
||||
1. Keep `ecc-universal` as the npm package until a replacement package has a
|
||||
verified owner, deprecation plan, and install migration.
|
||||
2. Keep `affaan-m/everything-claude-code` as the canonical repo until release
|
||||
notes, docs, plugin marketplace entries, npm metadata, and external links
|
||||
are prepared for redirects.
|
||||
3. Use `ECC` as the product name in new diagrams, status payloads, and
|
||||
cross-harness docs immediately.
|
||||
4. Reserve or create any new GitHub/npm/package surfaces before announcing the
|
||||
rename.
|
||||
5. Ship a compatibility guide that maps old commands, package names, plugin
|
||||
slugs, and docs URLs to the new names.
|
||||
|
||||
## Evidence Captured In This Pass
|
||||
|
||||
```text
|
||||
git rev-parse HEAD
|
||||
7109ee08db7209c5d14809efcf832043020dfc57
|
||||
|
||||
node -p "require('./package.json').name + '@' + require('./package.json').version"
|
||||
ecc-universal@2.0.0-rc.1
|
||||
|
||||
node -p "require('./.claude-plugin/plugin.json').name + '@' + require('./.claude-plugin/plugin.json').version"
|
||||
ecc@2.0.0-rc.1
|
||||
|
||||
node -p "require('./.codex-plugin/plugin.json').name + '@' + require('./.codex-plugin/plugin.json').version"
|
||||
ecc@2.0.0-rc.1
|
||||
|
||||
node -p "require('./.opencode/package.json').name + '@' + require('./.opencode/package.json').version"
|
||||
ecc-universal@2.0.0-rc.1
|
||||
|
||||
npm view ecc name version description repository.url --json
|
||||
ecc@0.0.2 is occupied by an unrelated elliptic curve cryptography package.
|
||||
|
||||
npm view ecc-universal name version dist-tags --json
|
||||
registry latest is 1.10.0; no rc dist-tag exists yet.
|
||||
|
||||
claude plugin validate .claude-plugin/plugin.json
|
||||
Validation passed on Claude Code 2.1.121.
|
||||
|
||||
node tests/docs/ecc2-release-surface.test.js
|
||||
18 release-surface checks passed.
|
||||
|
||||
node tests/scripts/npm-publish-surface.test.js
|
||||
2 npm publish-surface checks passed.
|
||||
|
||||
npm run build:opencode
|
||||
Passed.
|
||||
|
||||
npm pack --dry-run --json
|
||||
Produced ecc-universal-2.0.0-rc.1.tgz, 969 entries, about 5.0 MB unpacked.
|
||||
```
|
||||
103
docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md
Normal file
103
docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md
Normal file
@@ -0,0 +1,103 @@
|
||||
# ECC v2.0.0-rc.1 Publication Evidence — 2026-05-12
|
||||
|
||||
This is dry-run release evidence only. It does not create a GitHub release, npm
|
||||
publication, plugin tag, marketplace submission, or announcement post.
|
||||
|
||||
## Source Commit
|
||||
|
||||
| Field | Evidence |
|
||||
| --- | --- |
|
||||
| Upstream main base | `0598af70a51346bae34d987b9bed143386055967` |
|
||||
| Evidence branch | `codex/release-publication-evidence` |
|
||||
| Evidence scope | Working tree with this branch's package hygiene and release-doc updates |
|
||||
| Git remote | `https://github.com/affaan-m/everything-claude-code.git` |
|
||||
| Local status caveat | Working tree had the unrelated untracked `docs/drafts/` directory |
|
||||
|
||||
The actual release operator should repeat these checks from the final release
|
||||
commit with a clean checkout before publishing.
|
||||
|
||||
## Registry And Release State
|
||||
|
||||
| Surface | Command | Result |
|
||||
| --- | --- | --- |
|
||||
| GitHub prerelease | `gh release view v2.0.0-rc.1 --repo affaan-m/everything-claude-code --json tagName,url,isPrerelease` | `release not found` |
|
||||
| npm dist-tags | `npm view ecc-universal dist-tags --json` | `{ "latest": "1.10.0" }` |
|
||||
| npm package metadata | `node -p "require('./package.json').name + '@' + require('./package.json').version"` | `ecc-universal@2.0.0-rc.1` |
|
||||
| Product identity | `rg -n "Everything Claude Code" README.md CHANGELOG.md docs/releases/2.0.0-rc.1` | Present in README and rc.1 release docs |
|
||||
|
||||
## npm Dry Run
|
||||
|
||||
The first pack pass exposed local Python bytecode cache files in the tarball
|
||||
because broad package `files` entries included untracked local `__pycache__`
|
||||
paths. This branch adds explicit package-file exclusions and a regression test
|
||||
so `npm pack` fails if Python bytecode appears in the package surface.
|
||||
|
||||
| Command | Result |
|
||||
| --- | --- |
|
||||
| `node tests/scripts/npm-publish-surface.test.js` | Passed `2/2`; includes Python bytecode exclusion assertion |
|
||||
| `npm pack --dry-run --json` | `ecc-universal-2.0.0-rc.1.tgz`; `entryCount: 965`; `size: 1565968`; `unpackedSize: 4934637`; `hasBytecode: false` |
|
||||
| `npm publish --tag next --dry-run --json` | Dry-run target is npm registry with `tag next`; `entryCount: 965`; `hasBytecode: false` |
|
||||
|
||||
Temporary install smoke:
|
||||
|
||||
| Command | Result |
|
||||
| --- | --- |
|
||||
| `npm pack --pack-destination /tmp/ecc-publication-smoke-dd9ud5 --json` | Created `ecc-universal-2.0.0-rc.1.tgz` for local install smoke |
|
||||
| `npm install --prefix /tmp/ecc-publication-smoke-dd9ud5 /tmp/ecc-publication-smoke-dd9ud5/ecc-universal-2.0.0-rc.1.tgz` | Added 8 packages |
|
||||
| `node /tmp/ecc-publication-smoke-dd9ud5/node_modules/ecc-universal/scripts/ecc.js --help` | Printed ECC selective-install CLI help |
|
||||
| `node /tmp/ecc-publication-smoke-dd9ud5/node_modules/ecc-universal/scripts/catalog.js profiles --json` | Returned the 6 install profiles: `minimal`, `core`, `developer`, `security`, `research`, `full` |
|
||||
| `find /tmp/ecc-publication-smoke-dd9ud5/node_modules/ecc-universal -path '*__pycache__*' -o -name '*.pyc' -o -name '*.pyo' -o -name '*.pyd'` | No output |
|
||||
|
||||
## Plugin And Harness Evidence
|
||||
|
||||
| Surface | Command | Result |
|
||||
| --- | --- | --- |
|
||||
| Claude plugin manifest | `claude plugin validate .claude-plugin/plugin.json` | Passed |
|
||||
| Claude plugin tag preflight | `claude plugin tag .claude-plugin --dry-run` | Blocked by unrelated untracked `docs/drafts/` |
|
||||
| Claude plugin tag forced dry-run | `claude plugin tag .claude-plugin --dry-run --force` | Would create `ecc--v2.0.0-rc.1` at HEAD; do not use `--force` for real release unless maintainer decides |
|
||||
| Codex marketplace CLI | `codex plugin marketplace --help` and subcommand help | Supports `add`, `upgrade`, and `remove`; `add` supports repo and local marketplace roots |
|
||||
| OpenCode package | `npm run build:opencode` | Passed |
|
||||
| Claude hook/plugin route | `node tests/hooks/hooks.test.js` | Passed `236/236` |
|
||||
| Codex release surface | `node tests/docs/ecc2-release-surface.test.js` | Passed `18/18` |
|
||||
| Agent/catalog metadata | `node tests/scripts/catalog.test.js` | Passed `7/7` |
|
||||
| Observability gate | `npm run observability:ready` | Passed `16/16` |
|
||||
|
||||
## Clean-Checkout Claude Plugin Smoke
|
||||
|
||||
This follow-up pass used a detached clean worktree at
|
||||
`/tmp/ecc-clean-plugin-evidence` from commit
|
||||
`bfacf37715b39655cbc2c48f12f2a35c67cb0253`. It used an isolated temp home
|
||||
(`HOME=/tmp/ecc-clean-plugin-home`) and a temp local project
|
||||
(`/tmp/ecc-plugin-install-smoke`), so it did not write to the user's real Claude
|
||||
plugin config.
|
||||
|
||||
| Command | Result |
|
||||
| --- | --- |
|
||||
| `git -C /tmp/ecc-clean-plugin-evidence status --short --branch` | `## HEAD (no branch)` with no dirty or untracked files |
|
||||
| `claude plugin validate .claude-plugin/plugin.json` | Passed |
|
||||
| `claude plugin validate .claude-plugin/marketplace.json` | Passed |
|
||||
| `claude plugin tag .claude-plugin --dry-run` | Passed without `--force`; would create `ecc--v2.0.0-rc.1` at HEAD and push `refs/tags/ecc--v2.0.0-rc.1` |
|
||||
| `claude plugin marketplace add /tmp/ecc-clean-plugin-evidence --scope local` with temp `HOME` | Added marketplace `ecc` in local settings |
|
||||
| `claude plugin list --available --json` with temp `HOME` | Listed `ecc@ecc`, version `2.0.0-rc.1`, source `./` |
|
||||
| `claude plugin install ecc@ecc --scope local` with temp `HOME` | Installed `ecc@ecc` in local scope |
|
||||
| `claude plugin list --json` with temp `HOME` | Listed `ecc@ecc`, version `2.0.0-rc.1`, enabled, local scope, install path under `/tmp/ecc-clean-plugin-home/.claude/plugins/cache/ecc/ecc/2.0.0-rc.1` |
|
||||
| `claude plugin uninstall ecc@ecc --scope local` with temp `HOME` | Uninstalled successfully; final plugin list was `[]` |
|
||||
|
||||
## Announcement Placeholder Check
|
||||
|
||||
The forbidden-placeholder scan only returned the publication-readiness checklist
|
||||
lines that name those forbidden placeholders. No launch-pack placeholder
|
||||
instances were found.
|
||||
|
||||
## Remaining Blockers
|
||||
|
||||
- Create or verify GitHub prerelease `v2.0.0-rc.1`.
|
||||
- Publish `ecc-universal@2.0.0-rc.1` with npm dist-tag `next`.
|
||||
- Create and push the Claude plugin tag only after explicit approval. The clean
|
||||
checkout dry run and temp install smoke now pass.
|
||||
- Confirm the live Claude/Codex/OpenCode marketplace submission path or record
|
||||
the manual submission owner and status.
|
||||
- Verify ECC Tools billing/App/Marketplace claims before using them in launch
|
||||
copy.
|
||||
- Refresh announcement copy with live URLs after release and package/plugin
|
||||
URLs exist.
|
||||
@@ -4,32 +4,37 @@ This checklist is the release gate for public publication surfaces. Do not use
|
||||
it as evidence by itself. Fill the evidence fields with fresh command output or
|
||||
URLs from the exact commit being released.
|
||||
|
||||
For the current rc.1 naming decision and package/plugin publication path, see
|
||||
[`naming-and-publication-matrix.md`](naming-and-publication-matrix.md).
|
||||
For the May 12 dry-run evidence pass, see
|
||||
[`publication-evidence-2026-05-12.md`](publication-evidence-2026-05-12.md).
|
||||
|
||||
## Release Identity Matrix
|
||||
|
||||
| Surface | Expected value | Source of truth | Fresh check | Evidence artifact | Owner | Status |
|
||||
| --- | --- | --- | --- | --- | --- | --- |
|
||||
| Product name | Everything Claude Code / ECC | `README.md`, `CHANGELOG.md`, release notes | `rg -n "Everything Claude Code" README.md CHANGELOG.md docs/releases/2.0.0-rc.1` | Pending | Release owner | Pending |
|
||||
| GitHub repo | `affaan-m/everything-claude-code` | Git remote and release URLs | `git remote get-url origin` | Pending | Release owner | Pending |
|
||||
| Git tag | `v2.0.0-rc.1` | GitHub releases | `gh release view v2.0.0-rc.1 --repo affaan-m/everything-claude-code` | Pending | Release owner | Pending |
|
||||
| npm package | `ecc-universal` | `package.json` | `node -p "require('./package.json').name"` | Pending | Package owner | Pending |
|
||||
| npm version | `2.0.0-rc.1` | `VERSION`, `package.json`, lockfiles | `node -p "require('./package.json').version"` | Pending | Package owner | Pending |
|
||||
| npm dist-tag | `next` for rc, `latest` only for GA | npm registry | `npm view ecc-universal dist-tags --json` | Pending | Package owner | Pending |
|
||||
| Claude plugin slug | `ecc` / `ecc@ecc` install path | `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json` | `node tests/hooks/hooks.test.js` | Pending | Plugin owner | Pending |
|
||||
| Claude plugin manifest | `2.0.0-rc.1`, no unsupported `agents` or explicit `hooks` fields | `.claude-plugin/plugin.json`, `.claude-plugin/PLUGIN_SCHEMA_NOTES.md` | `claude plugin validate .claude-plugin/plugin.json` | Pending | Plugin owner | Pending |
|
||||
| Codex plugin manifest | `2.0.0-rc.1` with shared skill source | `.codex-plugin/plugin.json` | `node tests/docs/ecc2-release-surface.test.js` | Pending | Plugin owner | Pending |
|
||||
| OpenCode package | `ecc-universal` plugin module | `.opencode/package.json`, `.opencode/index.ts` | `npm run build:opencode` | Pending | Package owner | Pending |
|
||||
| Agent metadata | `2.0.0-rc.1` | `agent.yaml`, `.agents/plugins/marketplace.json` | `node tests/scripts/catalog.test.js` | Pending | Release owner | Pending |
|
||||
| Migration copy | rc.1 upgrade path, not GA claim | `release-notes.md`, `quickstart.md`, `HERMES-SETUP.md` | `npx markdownlint-cli docs/releases/2.0.0-rc.1/*.md` | Pending | Docs owner | Pending |
|
||||
| Product name | Everything Claude Code / ECC | `README.md`, `CHANGELOG.md`, release notes | `rg -n "Everything Claude Code" README.md CHANGELOG.md docs/releases/2.0.0-rc.1` | `publication-evidence-2026-05-12.md` | Release owner | Evidence recorded |
|
||||
| GitHub repo | `affaan-m/everything-claude-code` | Git remote and release URLs | `git remote get-url origin` | `publication-evidence-2026-05-12.md` | Release owner | Evidence recorded |
|
||||
| Git tag | `v2.0.0-rc.1` | GitHub releases | `gh release view v2.0.0-rc.1 --repo affaan-m/everything-claude-code` | `release not found` | Release owner | Blocked until release approval |
|
||||
| npm package | `ecc-universal` | `package.json` | `node -p "require('./package.json').name"` | `publication-evidence-2026-05-12.md` | Package owner | Evidence recorded |
|
||||
| npm version | `2.0.0-rc.1` | `VERSION`, `package.json`, lockfiles | `node -p "require('./package.json').version"` | `publication-evidence-2026-05-12.md` | Package owner | Evidence recorded |
|
||||
| npm dist-tag | `next` for rc, `latest` only for GA | npm registry | `npm view ecc-universal dist-tags --json` | Current registry only has `latest: 1.10.0`; `next` is pending publish | Package owner | Blocked until publish approval |
|
||||
| Claude plugin slug | `ecc` / `ecc@ecc` install path | `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json` | `node tests/hooks/hooks.test.js` | `publication-evidence-2026-05-12.md` | Plugin owner | Evidence recorded |
|
||||
| Claude plugin manifest | `2.0.0-rc.1`, no unsupported `agents` or explicit `hooks` fields | `.claude-plugin/plugin.json`, `.claude-plugin/PLUGIN_SCHEMA_NOTES.md` | `claude plugin validate .claude-plugin/plugin.json` | `publication-evidence-2026-05-12.md` | Plugin owner | Evidence recorded |
|
||||
| Codex plugin manifest | `2.0.0-rc.1` with shared skill source | `.codex-plugin/plugin.json` | `node tests/docs/ecc2-release-surface.test.js` | `publication-evidence-2026-05-12.md` | Plugin owner | Evidence recorded |
|
||||
| OpenCode package | `ecc-universal` plugin module | `.opencode/package.json`, `.opencode/index.ts` | `npm run build:opencode` | `publication-evidence-2026-05-12.md` | Package owner | Evidence recorded |
|
||||
| Agent metadata | `2.0.0-rc.1` | `agent.yaml`, `.agents/plugins/marketplace.json` | `node tests/scripts/catalog.test.js` | `publication-evidence-2026-05-12.md` | Release owner | Evidence recorded |
|
||||
| Migration copy | rc.1 upgrade path, not GA claim | `release-notes.md`, `quickstart.md`, `HERMES-SETUP.md` | `npx markdownlint-cli docs/releases/2.0.0-rc.1/*.md` | Pending final lint on release commit | Docs owner | Pending |
|
||||
|
||||
## Publication Gates
|
||||
|
||||
| Gate | Required evidence | Fresh check | Blocker field | Owner | Status |
|
||||
| --- | --- | --- | --- | --- | --- |
|
||||
| GitHub release | Tag exists, release notes use final URLs, assets attached if needed | `gh release view v2.0.0-rc.1 --json tagName,url,isPrerelease` | `Blocker:` | Release owner | Pending |
|
||||
| npm package | `npm pack --dry-run` has expected files, version matches, rc goes to `next` | `npm pack --dry-run` and `npm publish --tag next --dry-run` where supported | `Blocker:` | Package owner | Pending |
|
||||
| Claude plugin | Manifest validates, marketplace JSON points to public repo, install docs match slug | `claude plugin validate .claude-plugin/plugin.json` | `Blocker:` | Plugin owner | Pending |
|
||||
| Codex plugin | Manifest version matches package and docs, hook limitations are explicit | `node tests/docs/ecc2-release-surface.test.js` | `Blocker:` | Plugin owner | Pending |
|
||||
| OpenCode package | Build output is regenerated from source and package metadata is current | `npm run build:opencode` | `Blocker:` | Package owner | Pending |
|
||||
| GitHub release | Tag exists, release notes use final URLs, assets attached if needed | `gh release view v2.0.0-rc.1 --json tagName,url,isPrerelease` | `Blocker: release not found on 2026-05-12` | Release owner | Pending approval |
|
||||
| npm package | `npm pack --dry-run` has expected files, version matches, rc goes to `next` | `npm pack --dry-run` and `npm publish --tag next --dry-run` where supported | `Blocker: actual publish requires approval; dry run passed with next tag` | Package owner | Dry-run passed |
|
||||
| Claude plugin | Manifest validates, marketplace JSON points to public repo, install docs match slug | `claude plugin validate .claude-plugin/plugin.json`; `claude plugin tag .claude-plugin --dry-run`; isolated temp-home install smoke | `Blocker: real tag creation/push requires approval` | Plugin owner | Clean-checkout dry-run and install smoke recorded |
|
||||
| Codex plugin | Manifest version matches package and docs, hook limitations are explicit | `node tests/docs/ecc2-release-surface.test.js` | `Blocker: marketplace submission path still manual/owner-gated` | Plugin owner | Evidence recorded |
|
||||
| OpenCode package | Build output is regenerated from source and package metadata is current | `npm run build:opencode` | `Blocker: none for local build; public distribution still follows npm/plugin release` | Package owner | Evidence recorded |
|
||||
| ECC Tools billing reference | Any billing claim links to verified Marketplace/App state | `gh api repos/ECC-Tools/ECC-Tools` plus app/marketplace URL check | `Blocker:` | ECC Tools owner | Pending |
|
||||
| Announcement copy | X, LinkedIn, GitHub release, and longform copy point to live URLs | `rg -n "TODO" docs/releases/2.0.0-rc.1` and repeat for `TBD` | `Blocker:` | Release owner | Pending |
|
||||
|
||||
@@ -42,10 +47,10 @@ Record the exact commit SHA and command output before any publication action:
|
||||
| Clean release branch | `git status --short --branch` | On intended release commit; no unrelated files | Pending |
|
||||
| Harness audit | `npm run harness:audit -- --format json` | 70/70 passing | Pending |
|
||||
| Adapter scorecard | `npm run harness:adapters -- --check` | PASS | Pending |
|
||||
| Observability readiness | `npm run observability:ready` | 14/14 passing | Pending |
|
||||
| Observability readiness | `npm run observability:ready` | 16/16 passing | Pending |
|
||||
| Root suite | `node tests/run-all.js` | 0 failures | Pending |
|
||||
| Markdown lint | `npx markdownlint-cli '**/*.md' --ignore node_modules` | 0 failures | Pending |
|
||||
| Package surface | `node tests/scripts/npm-publish-surface.test.js` | 0 failures | Pending |
|
||||
| Package surface | `node tests/scripts/npm-publish-surface.test.js` | 0 failures; no Python bytecode in npm tarball | `2/2` passed in May 12 evidence pass |
|
||||
| Release surface | `node tests/docs/ecc2-release-surface.test.js` | 0 failures | Pending |
|
||||
| Optional Rust surface | `cd ecc2 && cargo test` | 0 failures or explicit deferral | Pending |
|
||||
|
||||
@@ -53,8 +58,8 @@ Record the exact commit SHA and command output before any publication action:
|
||||
|
||||
- `main` has unreviewed release-surface changes after the evidence was recorded.
|
||||
- `npm view ecc-universal dist-tags --json` contradicts the intended rc/GA tag.
|
||||
- Claude plugin validation is unavailable and no manual install smoke test is
|
||||
recorded.
|
||||
- Claude plugin validation is unavailable or no clean-checkout install smoke
|
||||
test is recorded for the intended release commit.
|
||||
- Release notes or announcement drafts still contain placeholder URLs,
|
||||
`TODO`, `TBD`, private workspace paths, or personal operator references.
|
||||
- Billing, Marketplace, or plugin-submission copy claims a live surface before
|
||||
|
||||
@@ -19,16 +19,22 @@ on fresh branches, and credit the source PR.
|
||||
|
||||
| Source PR | Original contribution | Salvage result |
|
||||
| --- | --- | --- |
|
||||
| #1232 | `skill-scout` search-before-creating workflow | Salvaged in the May 12 cost/skill-scout maintainer pass with current repo wording, external-source vetting, and no stale catalog-count edits. |
|
||||
| #1304 | Cost tracking skill and `/cost-report` command | Salvaged in the May 12 cost/skill-scout maintainer pass with current command/skill conventions and without stale hard-coded model pricing. |
|
||||
| #1309 | Trading/community project material | Salvaged in #1761 as a neutral community-project README listing. |
|
||||
| #1310 | Django reviewer, build resolver, and Celery async task guidance | Salvaged in the May 12 Django/Celery maintainer pass with current catalog counts and minor example cleanup. |
|
||||
| #1322 | Vietnamese README translation | Salvaged in #1764 as `docs/vi-VN/README.md` plus selector updates. |
|
||||
| #1325 | Quarkus framework guidance, Java agents, and localization material | Salvaged across #1771 and #1803; stale broad docs/count edits were not copied. |
|
||||
| #1326 | Angular developer skill and rules | Salvaged in #1763 with current skill, rules, install wiring, and catalog updates. |
|
||||
| #1328 | Continuous-learning Windows UTF-8 stdout fix | Salvaged in #1761. |
|
||||
| #1329 | Plugin install detection hardening | Salvaged in #1761 through current harness audit detection support. |
|
||||
| #1334 | Windows desktop E2E skill | Salvaged in #1762 with install, package, and catalog wiring. |
|
||||
| #1352 | Qwen install target | Salvaged in #1738 through the current Qwen install target. |
|
||||
| #1413 | Network and homelab skills/agents | Salvaged through #1729, #1731, #1745, and #1778. |
|
||||
| #1414 | F# rules, reviewer agent, and testing skill | Salvaged in #1770 with current install manifests, detection tests, and catalog wiring. |
|
||||
| #1429 | JoyCode install target | Salvaged in #1737 through the current JoyCode install target. |
|
||||
| #1467 | Scientific skills and OpenCode discovery work | Useful USPTO and gget pieces salvaged in #1740; stale generated claims were not copied. |
|
||||
| #1478 | HarmonyOS/ArkTS rules, resolver agent, and CLAUDE example | Salvaged in #1769 with current install wiring; stale `ecc2` session/TUI edits were not carried. |
|
||||
| #1493 | SessionStart context scoping | Salvaged in #1774 with current hook semantics and tests. |
|
||||
| #1498 | PRD planning flow | Salvaged in #1777. |
|
||||
| #1504 | Statusline/context monitor hooks | Salvaged in #1776 with current hook manifest structure and tests. |
|
||||
@@ -37,6 +43,9 @@ on fresh branches, and credit the source PR.
|
||||
| #1559 | `error-handling` skill | Salvaged in #1772. |
|
||||
| #1566 | Agent architecture audit skill | Salvaged in #1772. |
|
||||
| #1578 | OpenCode file-probe hardening | Salvaged in #1773. |
|
||||
| #1603 | `plan-orchestrate` skill | Salvaged in #1766 with current manifest/catalog wiring. |
|
||||
| #1658 | Code-reviewer false-positive suppression | Salvaged in the May 12 code-reviewer maintainer pass with current review-agent wording, a proof gate for HIGH/CRITICAL findings, common false-positive exclusions, and a regression test. |
|
||||
| #1659 | Frontend design direction and interface-polish skills | Salvaged in the May 12 frontend-design maintainer pass with canonical `skills/` layout and current ECC frontend guidance, while preserving the repo guardrail that the official `frontend-design` skill should be installed from `anthropics/skills`. |
|
||||
| #1674 | Production audit skill | Salvaged in #1732 after supply-chain/privacy review and rewrite. |
|
||||
| #1687 | zh-CN localization sync | Large safe subsets salvaged in #1746-#1752; remaining pieces require translator/manual review. |
|
||||
| #1694 | Portfolio curation | Useful focused curation updates salvaged in #1723 and #1724. |
|
||||
@@ -50,6 +59,41 @@ on fresh branches, and credit the source PR.
|
||||
| #1727 | MySQL patterns skill | Salvaged in #1733. |
|
||||
| #1757 | Machine-learning engineering workflow | Salvaged in #1758 and tuned in #1759. |
|
||||
|
||||
## 2026-05-12 Gap Pass
|
||||
|
||||
The initial stale-closure ledger covered the P0 cleanup cohort and the biggest
|
||||
salvage branches. A follow-up gap pass over PRs closed on 2026-05-11 found
|
||||
additional useful items that were already present on `main` or still worth
|
||||
porting.
|
||||
|
||||
| Source PR | Disposition |
|
||||
| --- | --- |
|
||||
| #1310 | Ported through the Django/Celery maintainer branch after confirming `agents/django-reviewer.md`, `agents/django-build-resolver.md`, and `skills/django-celery/SKILL.md` were still missing. |
|
||||
| #1325 | Useful Quarkus framework material was already preserved across #1771 and #1803; current `main` contains the Quarkus rules/skills plus Java reviewer/build-resolver surfaces. |
|
||||
| #1360 | Already present as `skills/security-bounty-hunter/`. |
|
||||
| #1414 | Useful F# support was already preserved in #1770; current `main` contains the F# rules, reviewer agent, testing skill, install wiring, and detection tests. |
|
||||
| #1415 | Already present as `skills/vite-patterns/`. |
|
||||
| #1478 | Useful HarmonyOS/ArkTS support was already preserved in #1769; current `main` contains the ArkTS rules, resolver agent, CLAUDE example, and install wiring. |
|
||||
| #1438 | Already present as `skills/ui-to-vue/`. |
|
||||
| #1504 | Already mapped to #1776 in the durable salvage table. |
|
||||
| #1508 | Already present as `skills/fastapi-patterns/` and `agents/fastapi-reviewer.md`. |
|
||||
| #1563/#1564/#1565 | Translator/manual review: zh-TW, tr, and pt-BR README syncs may contain useful localization updates, but stale README/version/count text must be reviewed by language owners before import. |
|
||||
| #1567 | Already present as the current GateGuard subagent file-gate bypass in `scripts/hooks/gateguard-fact-force.js`, with Bash gates preserved and regression tests in `tests/hooks/gateguard-fact-force.test.js`. |
|
||||
| #1570 | Already present as public `llm.prompt` imports, keyword-based `PromptBuilder` construction, and template registry helpers; current tests register the `unit` marker through `tests/conftest.py`. |
|
||||
| #1584 | Already present as the iTerm2 native desktop-notification fast path in `scripts/hooks/desktop-notify.js`, with multiplexer fallback to `osascript`. |
|
||||
| #1589 | Already present as quoted `actions/checkout` detection in `scripts/ci/validate-workflow-security.js` plus double/single-quote regression tests. |
|
||||
| #1594 | Already present as HTTP MCP reachability handling that treats HTTP 400, 401, and 403 probe responses as reachable/auth-gated, with hook tests. |
|
||||
| #1597 | Already present as catalog-count validation for README, AGENTS, zh-CN docs, `.claude-plugin/plugin.json`, and `.claude-plugin/marketplace.json`. |
|
||||
| #1602 | Already present as the `continuous-learning` v1 deprecation that routes new usage to `continuous-learning-v2` while preserving the archival v1 surface. |
|
||||
| #1603 | Useful `/plan-orchestrate` work was already preserved in #1766 with current package/catalog metadata. |
|
||||
| #1604 | Skipped: Windows drag-and-drop local installer copies files directly and runs `git pull`; current managed installer/profile flow is safer and supersedes it. |
|
||||
| #1609 | Translator/manual review: Persian README translation may be useful, but needs language review and current catalog/version refresh before import. |
|
||||
| #1613 | Already present in `rules/web/hooks.md` as the `tsc --incremental` plus timeout-capped PostToolUse example. |
|
||||
| #1631 | Already present in `scripts/hooks/suggest-compact.js` and `tests/hooks/hooks.test.js`; current code reads `session_id` from stdin JSON before falling back to `CLAUDE_SESSION_ID`. |
|
||||
| #1648 | Already present in `src/llm/providers/claude.py`; current Claude provider collects all text and tool-use content blocks and covers the behavior in `tests/test_claude_provider.py`. |
|
||||
| #1658 | Ported through the code-reviewer maintainer branch after confirming the false-positive proof gate and common false-positive skip list were still missing. |
|
||||
| #1693 | Already present as `skills/redis-patterns/`. |
|
||||
|
||||
## Already Present Or Superseded
|
||||
|
||||
| Source PR | Disposition |
|
||||
@@ -58,6 +102,9 @@ on fresh branches, and credit the source PR.
|
||||
| #1318 | Gemini agent adaptation utility was already present on current `main`. |
|
||||
| #1323 | Hook config update was already present on current `main`. |
|
||||
| #1337 | Catalog count update was superseded by current catalog-count sync. |
|
||||
| #1631 | `suggest-compact` stdin `session_id` isolation was already present on current `main` with hook tests. |
|
||||
| #1608 | Unsafe dashboard document/terminal open handling was already present on current `main` through safe runtime helpers and project-bound document opening. |
|
||||
| #1678 | Windows MCP `.cmd`/`.bat` fallback behavior was already present on current `main` with current health-check tests. |
|
||||
| #1682/#1701 | Strategic compact hook-path fixes were merged directly or superseded by current docs fixes. |
|
||||
| JARVIS #4/#5/#6 | Stale failing dependency-only PRs; future dependency state should be regenerated by Dependabot. |
|
||||
|
||||
@@ -70,15 +117,22 @@ on fresh branches, and credit the source PR.
|
||||
| #1341 | Very large low-signal generated change with no safe focused salvage unit. |
|
||||
| #1416/#1465 | Accidental fork-sync PRs with no focused contribution. |
|
||||
| #1475 | One-line Gemini CLI bridge idea was too stale and underspecified to port safely. |
|
||||
| #1604 | Drag-and-drop Windows installer bypasses the current managed installer, performs direct broad copies, and runs `git pull` from a local install script. |
|
||||
|
||||
## Remaining Manual-Review Backlog
|
||||
|
||||
Only the #1687 localization tail remains plausibly useful but unsafe to
|
||||
auto-port.
|
||||
The remaining plausibly useful backlog is translation/localization work that is
|
||||
unsafe to auto-port without language-owner review:
|
||||
|
||||
- #1687 zh-CN localization tail
|
||||
- #1609 Persian README translation
|
||||
- #1563 zh-TW README sync
|
||||
- #1564 Turkish README sync
|
||||
- #1565 pt-BR README sync
|
||||
|
||||
Handling rule:
|
||||
|
||||
1. Keep #1687 in translator/manual review.
|
||||
1. Keep these PRs in translator/manual review.
|
||||
2. Split any future work by surface: agents, commands, top-level docs, release
|
||||
and count surfaces, then skills.
|
||||
3. Do not import stale top-level docs that carry old version or catalog-count
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Everything Claude Code (ECC) — 智能体指令
|
||||
|
||||
这是一个**生产就绪的 AI 编码插件**,提供 58 个专业代理、220 项技能、74 条命令以及自动化钩子工作流,用于软件开发。
|
||||
这是一个**生产就绪的 AI 编码插件**,提供 60 个专业代理、225 项技能、75 条命令以及自动化钩子工作流,用于软件开发。
|
||||
|
||||
**版本:** 2.0.0-rc.1
|
||||
|
||||
@@ -146,9 +146,9 @@
|
||||
## 项目结构
|
||||
|
||||
```
|
||||
agents/ — 58 个专业子代理
|
||||
skills/ — 220 个工作流技能和领域知识
|
||||
commands/ — 74 个斜杠命令
|
||||
agents/ — 60 个专业子代理
|
||||
skills/ — 225 个工作流技能和领域知识
|
||||
commands/ — 75 个斜杠命令
|
||||
hooks/ — 基于触发的自动化
|
||||
rules/ — 始终遵循的指导方针(通用 + 每种语言)
|
||||
scripts/ — 跨平台 Node.js 实用工具
|
||||
|
||||
@@ -224,7 +224,7 @@ Copy-Item -Recurse rules/typescript "$HOME/.claude/rules/"
|
||||
/plugin list ecc@ecc
|
||||
```
|
||||
|
||||
**搞定!** 你现在可以使用 58 个智能体、220 项技能和 74 个命令了。
|
||||
**搞定!** 你现在可以使用 60 个智能体、225 项技能和 75 个命令了。
|
||||
|
||||
***
|
||||
|
||||
@@ -1136,9 +1136,9 @@ opencode
|
||||
|
||||
| 功能特性 | Claude Code | OpenCode | 状态 |
|
||||
|---------|-------------|----------|--------|
|
||||
| 智能体 | PASS: 58 个 | PASS: 12 个 | **Claude Code 领先** |
|
||||
| 命令 | PASS: 74 个 | PASS: 35 个 | **Claude Code 领先** |
|
||||
| 技能 | PASS: 220 项 | PASS: 37 项 | **Claude Code 领先** |
|
||||
| 智能体 | PASS: 60 个 | PASS: 12 个 | **Claude Code 领先** |
|
||||
| 命令 | PASS: 75 个 | PASS: 35 个 | **Claude Code 领先** |
|
||||
| 技能 | PASS: 225 项 | PASS: 37 项 | **Claude Code 领先** |
|
||||
| 钩子 | PASS: 8 种事件类型 | PASS: 11 种事件 | **OpenCode 更多!** |
|
||||
| 规则 | PASS: 29 条 | PASS: 13 条指令 | **Claude Code 领先** |
|
||||
| MCP 服务器 | PASS: 14 个 | PASS: 完整 | **完全对等** |
|
||||
@@ -1244,9 +1244,9 @@ ECC 是**第一个最大化利用每个主要 AI 编码工具的插件**。以
|
||||
|
||||
| 功能特性 | Claude Code | Cursor IDE | Codex CLI | OpenCode |
|
||||
|---------|------------|------------|-----------|----------|
|
||||
| **智能体** | 58 | 共享 (AGENTS.md) | 共享 (AGENTS.md) | 12 |
|
||||
| **命令** | 74 | 共享 | 基于指令 | 35 |
|
||||
| **技能** | 220 | 共享 | 10 (原生格式) | 37 |
|
||||
| **智能体** | 60 | 共享 (AGENTS.md) | 共享 (AGENTS.md) | 12 |
|
||||
| **命令** | 75 | 共享 | 基于指令 | 35 |
|
||||
| **技能** | 225 | 共享 | 10 (原生格式) | 37 |
|
||||
| **钩子事件** | 8 种类型 | 15 种类型 | 暂无 | 11 种类型 |
|
||||
| **钩子脚本** | 20+ 个脚本 | 16 个脚本 (DRY 适配器) | N/A | 插件钩子 |
|
||||
| **规则** | 34 (通用 + 语言) | 34 (YAML 前页) | 基于指令 | 13 条指令 |
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
# AgentShield Policy Exception Playbook
|
||||
|
||||
Candidate id: `sarif-backed-timeboxed-exception-review`
|
||||
|
||||
Use this playbook when AgentShield organization-policy output produces a
|
||||
finding that may need remediation, a time-boxed exception, or explicit
|
||||
enforcement.
|
||||
|
||||
## Accepted Path
|
||||
|
||||
1. Identify the AgentShield finding id, category, severity, affected file or
|
||||
MCP/hook surface, and policy pack or organization baseline.
|
||||
2. Retrieve scanner evidence before judgment:
|
||||
- SARIF/code-scanning result, especially `agentshield-policy/*`
|
||||
- JSON/HTML report evidence
|
||||
- terminal or GitHub Action job-summary counts
|
||||
3. Record lifecycle fields for any exception request: owner, ticket, scope,
|
||||
expiry, rationale, and whether it is active, expiring soon, or expired.
|
||||
4. Keep expired exceptions rejected or enforced until new evidence exists.
|
||||
5. Decide whether immediate remediation is possible. If not, only promote a
|
||||
narrow time-boxed exception tied to the named owner, ticket, scope, and
|
||||
expiry.
|
||||
6. Keep AgentShield code, policy packs, enforcement settings, release state,
|
||||
and live security posture out of the read-only evaluator run.
|
||||
|
||||
## Rejected Path
|
||||
|
||||
Do not blanket suppress a policy category, policy pack, or organization gate
|
||||
because a finding is inconvenient.
|
||||
|
||||
Do not downgrade critical/high findings without SARIF or report evidence and a
|
||||
current owner, ticket, scope, and expiry.
|
||||
|
||||
Do not treat expired exceptions as active. Expired means the policy gate should
|
||||
remain enforced until a maintainer creates a fresh, bounded exception or fixes
|
||||
the underlying issue.
|
||||
|
||||
## Minimum Validation
|
||||
|
||||
- `npx ecc-agentshield scan --format json`
|
||||
- AgentShield SARIF/code-scanning artifact or report evidence
|
||||
- `npx ecc-agentshield scan --format html` when executive review evidence is
|
||||
needed
|
||||
- Current exception lifecycle fields: owner, ticket, scope, expiry, status
|
||||
- `node tests/docs/evaluator-rag-prototype.test.js`
|
||||
- `git diff --check`
|
||||
|
||||
Record the scanner evidence, lifecycle state, policy-pack source, and
|
||||
remediation-versus-exception decision in the maintainer PR body or handoff.
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.report.v1",
|
||||
"scenario_id": "agentshield-policy-exception",
|
||||
"run_id": "2026-05-12-agentshield-policy-exception-prototype",
|
||||
"result": "prototype_passed",
|
||||
"read_only": true,
|
||||
"scores": {
|
||||
"sarif_report_evidence": 0.95,
|
||||
"exception_lifecycle": 0.93,
|
||||
"ownership_specificity": 0.9,
|
||||
"remediation_decision": 0.88,
|
||||
"blanket_suppression_safety": 1
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"id": "sarif-report-match-required",
|
||||
"severity": "warning",
|
||||
"summary": "AgentShield policy exceptions must name SARIF or report evidence before a remediation or exception playbook can be promoted."
|
||||
},
|
||||
{
|
||||
"id": "expired-exception-enforcement",
|
||||
"severity": "warning",
|
||||
"summary": "Expired exceptions must remain rejected or enforced; the evaluator cannot treat stale approvals as active evidence."
|
||||
},
|
||||
{
|
||||
"id": "bounded-owner-fields",
|
||||
"severity": "info",
|
||||
"summary": "Accepted exceptions preserve owner, ticket, scope, expiry, policy-pack source, and affected surface fields."
|
||||
}
|
||||
],
|
||||
"recommended_next_action": {
|
||||
"candidate_id": "sarif-backed-timeboxed-exception-review",
|
||||
"action": "Use the promoted playbook for future AgentShield policy exception requests before changing gates, suppressing categories, or accepting security risk."
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.scenario.v1",
|
||||
"scenario_id": "agentshield-policy-exception",
|
||||
"title": "Gate AgentShield policy exceptions with report and SARIF evidence",
|
||||
"mode": "read_only_prototype",
|
||||
"objective": "Given an AgentShield organization-policy finding or proposed exception, retrieve report, SARIF, lifecycle, and ownership evidence before promoting a remediation or time-boxed exception playbook.",
|
||||
"sources": [
|
||||
{
|
||||
"kind": "repo_doc",
|
||||
"path": "docs/ECC-2.0-GA-ROADMAP.md",
|
||||
"purpose": "Durable record of AgentShield policy gates, SARIF output, policy packs, reports, corpus benchmark, and exception lifecycle audit evidence"
|
||||
},
|
||||
{
|
||||
"kind": "repo_command",
|
||||
"path": "commands/security-scan.md",
|
||||
"purpose": "ECC command contract for running AgentShield and separating scanner facts from follow-up judgment"
|
||||
},
|
||||
{
|
||||
"kind": "repo_skill",
|
||||
"path": "skills/security-scan/SKILL.md",
|
||||
"purpose": "Operator-facing AgentShield scan workflow and output-format guidance"
|
||||
},
|
||||
{
|
||||
"kind": "external_pr_evidence",
|
||||
"repo": "affaan-m/agentshield",
|
||||
"prs": [
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
59,
|
||||
60,
|
||||
62
|
||||
],
|
||||
"purpose": "Policy gate, SARIF, policy-pack, HTML report, corpus benchmark, and exception lifecycle implementation evidence"
|
||||
}
|
||||
],
|
||||
"retrieval_questions": [
|
||||
"Which AgentShield policy finding, category, severity, and affected file or MCP/hook surface triggered the request?",
|
||||
"Is there SARIF/code-scanning evidence for an `agentshield-policy/*` result, and does it match the report finding?",
|
||||
"Is the exception active, expiring soon, or expired?",
|
||||
"Does the exception include owner, ticket, scope, expiry, and rationale fields?",
|
||||
"Which policy pack or organization baseline produced the finding?",
|
||||
"Is remediation possible now, or is a bounded exception safer than a blanket suppression?"
|
||||
],
|
||||
"forbidden_actions": [
|
||||
"approving policy exceptions without SARIF or report evidence",
|
||||
"treating expired exceptions as active",
|
||||
"blanket-suppressing AgentShield policy packs or organization-policy gates",
|
||||
"downgrading critical/high findings without owner, ticket, scope, and expiry",
|
||||
"editing AgentShield code or policy files from this ECC evaluator run",
|
||||
"publishing or enforcing new security policy from this read-only evaluator run"
|
||||
],
|
||||
"acceptance_gates": [
|
||||
"SARIF or report evidence is named",
|
||||
"finding id, category, severity, and affected surface are preserved",
|
||||
"policy pack or organization baseline is named",
|
||||
"owner, ticket, scope, and expiry state are recorded",
|
||||
"expired exceptions stay rejected or enforced",
|
||||
"remediation versus time-boxed exception decision is explicit",
|
||||
"at least one blanket suppression candidate is rejected"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.trace.v1",
|
||||
"scenario_id": "agentshield-policy-exception",
|
||||
"run_id": "2026-05-12-agentshield-policy-exception-prototype",
|
||||
"read_only": true,
|
||||
"events": [
|
||||
{
|
||||
"phase": "observation",
|
||||
"summary": "A policy finding or exception request references AgentShield organization-policy output. The evaluator records the affected finding without editing AgentShield code, policy packs, or enforcement settings.",
|
||||
"evidence": [
|
||||
"docs/ECC-2.0-GA-ROADMAP.md",
|
||||
"commands/security-scan.md"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "retrieval",
|
||||
"summary": "Retrieved SARIF/report evidence, policy-pack source, exception lifecycle state, owner, ticket, scope, expiry, and whether remediation is immediately available.",
|
||||
"evidence": [
|
||||
"agentshield-policy/* SARIF result",
|
||||
"AgentShield report exception counts",
|
||||
"skills/security-scan/SKILL.md"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "proposal",
|
||||
"summary": "Generated two candidate playbooks: SARIF-backed time-boxed exception review, and blanket policy suppression for the affected category.",
|
||||
"candidate_ids": [
|
||||
"sarif-backed-timeboxed-exception-review",
|
||||
"blanket-policy-suppression"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "verification",
|
||||
"summary": "Accepted the evidence-backed exception review because it preserves finding details and lifecycle fields. Rejected blanket suppression because it bypasses policy gates and ignores expired exceptions.",
|
||||
"evidence": [
|
||||
"examples/evaluator-rag-prototype/agentshield-policy-exception/verifier-result.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "promotion",
|
||||
"summary": "Promoted only the read-only AgentShield policy exception playbook. The evaluator does not modify AgentShield code, policy packs, enforcement settings, release state, or live security posture.",
|
||||
"promoted_candidate_id": "sarif-backed-timeboxed-exception-review"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.verifier.v1",
|
||||
"scenario_id": "agentshield-policy-exception",
|
||||
"run_id": "2026-05-12-agentshield-policy-exception-prototype",
|
||||
"read_only": true,
|
||||
"candidates": [
|
||||
{
|
||||
"candidate_id": "sarif-backed-timeboxed-exception-review",
|
||||
"decision": "accepted",
|
||||
"score": 0.93,
|
||||
"reasons": [
|
||||
"names SARIF/code-scanning or report evidence for the AgentShield finding",
|
||||
"preserves finding id, category, severity, affected surface, and policy-pack source",
|
||||
"records owner, ticket, scope, expiry, and active/expiring/expired lifecycle state",
|
||||
"rejects expired exceptions and requires remediation or a time-boxed exception",
|
||||
"keeps AgentShield code, policy packs, enforcement settings, and release actions out of the read-only evaluator run"
|
||||
],
|
||||
"rollback": "Do not apply the future exception or suppression; re-run AgentShield, restore the prior organization policy, and keep the finding enforced until owner/ticket/scope/expiry evidence is current."
|
||||
},
|
||||
{
|
||||
"candidate_id": "blanket-policy-suppression",
|
||||
"decision": "rejected",
|
||||
"score": 0.11,
|
||||
"reasons": [
|
||||
"has no SARIF or report evidence",
|
||||
"blanket-suppresses AgentShield policy packs and organization-policy gates",
|
||||
"treats expired exceptions as active",
|
||||
"drops owner, ticket, scope, and expiry fields",
|
||||
"would edit AgentShield or policy gate behavior from an ECC evaluator run"
|
||||
],
|
||||
"rollback": "Do not suppress the policy category; restart from scanner evidence, lifecycle state, and a bounded remediation or exception request."
|
||||
}
|
||||
],
|
||||
"promoted_candidate_id": "sarif-backed-timeboxed-exception-review"
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
# Billing Marketplace Readiness Playbook
|
||||
|
||||
Use this playbook when release copy or roadmap text mentions ECC Tools
|
||||
billing, Marketplace availability, account recovery, plans, seats,
|
||||
entitlements, or subscription state.
|
||||
|
||||
## Accepted Path
|
||||
|
||||
1. Start from `docs/releases/2.0.0-rc.1/publication-readiness.md`.
|
||||
2. Check the current repo and public listing surfaces:
|
||||
- `gh api repos/ECC-Tools/ECC-Tools`
|
||||
- `https://github.com/marketplace/ecc-tools`
|
||||
3. Classify every billing or Marketplace claim as:
|
||||
- `verified`
|
||||
- `blocked`
|
||||
- `remove-before-publication`
|
||||
4. Keep roadmap acceptance criteria separate from live product claims.
|
||||
5. Update release copy only after the evidence points to a live URL or command
|
||||
result.
|
||||
6. Leave tag creation, npm publish, plugin submission, marketplace edits,
|
||||
subscription changes, and announcement posting approval-gated.
|
||||
|
||||
## Rejected Path
|
||||
|
||||
Do not say billing is live because a roadmap item exists, a dry run passed, or a
|
||||
Marketplace URL is known. Roadmap intent and dry-run publication evidence are
|
||||
not a billing state.
|
||||
|
||||
Do not edit plan limits, subscriptions, seats, entitlements, or Marketplace
|
||||
metadata from the evaluator run. Those are product/operator actions and require
|
||||
their own approval path.
|
||||
|
||||
## Validation Gates
|
||||
|
||||
- `rg -n "billing|Billing|Marketplace|marketplace|subscription|seat|entitlement|plan" README.md docs/releases/2.0.0-rc.1 docs/ECC-2.0-GA-ROADMAP.md`
|
||||
- `gh api repos/ECC-Tools/ECC-Tools`
|
||||
- Manual live check of `https://github.com/marketplace/ecc-tools`
|
||||
- `npx --yes markdownlint-cli docs/releases/2.0.0-rc.1/*.md docs/ECC-2.0-GA-ROADMAP.md`
|
||||
- `git diff --check`
|
||||
|
||||
Record the evidence in a maintainer-owned PR before release copy is published.
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.report.v1",
|
||||
"scenario_id": "billing-marketplace-readiness",
|
||||
"run_id": "2026-05-12-billing-marketplace-readiness-prototype",
|
||||
"result": "prototype_passed",
|
||||
"read_only": true,
|
||||
"scores": {
|
||||
"claim_evidence": 0.82,
|
||||
"publication_safety": 1,
|
||||
"marketplace_specificity": 0.84,
|
||||
"billing_scope_control": 1,
|
||||
"announcement_safety": 1
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"id": "billing-claim-gate-needed",
|
||||
"severity": "warning",
|
||||
"summary": "Release docs require a fresh ECC Tools billing/App/Marketplace check before launch copy can claim live billing readiness."
|
||||
},
|
||||
{
|
||||
"id": "dry-run-not-live-state",
|
||||
"severity": "warning",
|
||||
"summary": "May 12 evidence proves package/plugin dry runs and clean install smoke, but it does not prove a live Marketplace billing state."
|
||||
},
|
||||
{
|
||||
"id": "safe-next-action",
|
||||
"severity": "info",
|
||||
"summary": "The reusable next action is a read-only evidence checklist that classifies each launch-copy billing claim before publication."
|
||||
}
|
||||
],
|
||||
"recommended_next_action": {
|
||||
"candidate_id": "evidence-backed-billing-check",
|
||||
"action": "Run the promoted billing/Marketplace claim-verification checklist before any launch copy, GitHub release text, or social copy says billing is live."
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.scenario.v1",
|
||||
"scenario_id": "billing-marketplace-readiness",
|
||||
"title": "Verify billing and Marketplace claims before launch copy",
|
||||
"mode": "read_only_prototype",
|
||||
"objective": "Given rc.1 release docs and ECC Tools billing roadmap evidence, separate verified Marketplace/App/billing state from assumptions before any announcement or publication action.",
|
||||
"sources": [
|
||||
{
|
||||
"kind": "repo_doc",
|
||||
"path": "docs/releases/2.0.0-rc.1/publication-readiness.md",
|
||||
"purpose": "Release gate that blocks billing and Marketplace claims until fresh evidence exists"
|
||||
},
|
||||
{
|
||||
"kind": "repo_doc",
|
||||
"path": "docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md",
|
||||
"purpose": "Dry-run publication evidence and explicit remaining blocker list"
|
||||
},
|
||||
{
|
||||
"kind": "roadmap",
|
||||
"path": "docs/ECC-2.0-GA-ROADMAP.md",
|
||||
"purpose": "ECC Tools billing audit acceptance criteria and remaining release blockers"
|
||||
},
|
||||
{
|
||||
"kind": "github_api",
|
||||
"command": "gh api repos/ECC-Tools/ECC-Tools",
|
||||
"purpose": "Fresh repository access and app-surface evidence before launch claims"
|
||||
},
|
||||
{
|
||||
"kind": "public_url",
|
||||
"url": "https://github.com/marketplace/ecc-tools",
|
||||
"purpose": "Marketplace listing that must be checked live before copy says billing is ready"
|
||||
}
|
||||
],
|
||||
"retrieval_questions": [
|
||||
"Which billing or Marketplace claims are already backed by repo evidence?",
|
||||
"Which claims still need a live Marketplace, App, subscription, plan, or entitlement check?",
|
||||
"Which announcement docs mention billing or Marketplace status?",
|
||||
"Which publication actions remain approval-gated and must not run during this evaluator pass?"
|
||||
],
|
||||
"forbidden_actions": [
|
||||
"creating or editing GitHub Marketplace listings",
|
||||
"changing plan limits, subscriptions, seats, or entitlements",
|
||||
"creating release tags",
|
||||
"publishing packages or plugins",
|
||||
"posting announcement copy",
|
||||
"claiming live billing readiness from dry-run evidence alone"
|
||||
],
|
||||
"acceptance_gates": [
|
||||
"launch-copy claims are classified as verified, blocked, or remove-before-publication",
|
||||
"Marketplace and App checks name the exact URL or command needed",
|
||||
"billing claims link to fresh evidence rather than roadmap intent",
|
||||
"publication actions remain approval-gated",
|
||||
"at least one overclaim candidate is rejected"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.trace.v1",
|
||||
"scenario_id": "billing-marketplace-readiness",
|
||||
"run_id": "2026-05-12-billing-marketplace-readiness-prototype",
|
||||
"read_only": true,
|
||||
"events": [
|
||||
{
|
||||
"phase": "observation",
|
||||
"summary": "Publication readiness still marks ECC Tools billing references and announcement copy as pending. Dry-run publication evidence says billing/App/Marketplace claims must be verified before launch copy uses them.",
|
||||
"evidence": [
|
||||
"docs/releases/2.0.0-rc.1/publication-readiness.md",
|
||||
"docs/releases/2.0.0-rc.1/publication-evidence-2026-05-12.md"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "retrieval",
|
||||
"summary": "Retrieved the release gate, dry-run evidence, roadmap billing acceptance criteria, and the public Marketplace URL that requires a live operator check.",
|
||||
"evidence": [
|
||||
"docs/ECC-2.0-GA-ROADMAP.md",
|
||||
"gh api repos/ECC-Tools/ECC-Tools",
|
||||
"https://github.com/marketplace/ecc-tools"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "proposal",
|
||||
"summary": "Generated two candidate playbooks: evidence-backed billing claim verification, and announcement-first billing copy that treats roadmap intent as live billing readiness.",
|
||||
"candidate_ids": [
|
||||
"evidence-backed-billing-check",
|
||||
"announcement-first-billing-copy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "verification",
|
||||
"summary": "Accepted the evidence-backed check and rejected announcement-first copy because billing and Marketplace surfaces remain pending until verified by fresh URLs or API output.",
|
||||
"evidence": [
|
||||
"examples/evaluator-rag-prototype/billing-marketplace-readiness/verifier-result.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "promotion",
|
||||
"summary": "Promoted only the read-only verification playbook. No Marketplace edits, subscription changes, tags, package publishes, plugin submission, or announcement posts are performed.",
|
||||
"promoted_candidate_id": "evidence-backed-billing-check"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.verifier.v1",
|
||||
"scenario_id": "billing-marketplace-readiness",
|
||||
"run_id": "2026-05-12-billing-marketplace-readiness-prototype",
|
||||
"read_only": true,
|
||||
"candidates": [
|
||||
{
|
||||
"candidate_id": "evidence-backed-billing-check",
|
||||
"decision": "accepted",
|
||||
"score": 0.91,
|
||||
"reasons": [
|
||||
"keeps the run read-only",
|
||||
"requires fresh Marketplace or GitHub API evidence",
|
||||
"classifies launch-copy claims before publication",
|
||||
"separates roadmap intent from live billing state",
|
||||
"keeps release, package, plugin, billing, and announcement actions approval-gated"
|
||||
],
|
||||
"rollback": "Remove or revert any release-copy edits that cite unverified billing claims; no live billing state is changed by this playbook."
|
||||
},
|
||||
{
|
||||
"candidate_id": "announcement-first-billing-copy",
|
||||
"decision": "rejected",
|
||||
"score": 0.18,
|
||||
"reasons": [
|
||||
"treats roadmap acceptance criteria as live billing evidence",
|
||||
"does not require a fresh Marketplace listing check",
|
||||
"could publish announcement copy before release URLs exist",
|
||||
"does not classify unsupported claims for removal",
|
||||
"risks implying subscription or entitlement readiness without proof"
|
||||
],
|
||||
"rollback": "Do not publish this copy; keep billing and Marketplace language blocked until the evidence checklist passes."
|
||||
}
|
||||
],
|
||||
"promoted_candidate_id": "evidence-backed-billing-check"
|
||||
}
|
||||
41
examples/evaluator-rag-prototype/candidate-playbook.md
Normal file
41
examples/evaluator-rag-prototype/candidate-playbook.md
Normal file
@@ -0,0 +1,41 @@
|
||||
# Candidate Playbook: Maintainer-Owned Stale Salvage
|
||||
|
||||
Candidate id: `maintainer-salvage-branch`
|
||||
|
||||
## Use When
|
||||
|
||||
- A stale or conflicted PR was closed to keep the public queue usable.
|
||||
- The closed diff contains a useful focused idea, skill, command, doc, test, or
|
||||
bug fix.
|
||||
- The contributor may not have time or interest to rebase.
|
||||
|
||||
## Steps
|
||||
|
||||
1. Record the source PR, author, useful concept, and closure reason in
|
||||
`docs/stale-pr-salvage-ledger.md`.
|
||||
2. Re-read the closed PR diff against current `main`.
|
||||
3. Decide whether the patch can be cherry-picked safely. Prefer reimplementation
|
||||
when current architecture has moved.
|
||||
4. Create a maintainer-owned branch with one focused salvage unit.
|
||||
5. Preserve attribution in the PR body and, when useful, in the commit body.
|
||||
6. Update the catalog, docs, tests, or release evidence required by the touched
|
||||
surface.
|
||||
7. Run the same validation gates a normal change would require.
|
||||
8. After merge, update the ledger from pending/salvage-branch to landed,
|
||||
already-present, superseded, skipped, or translator/manual review.
|
||||
|
||||
## Reject Conditions
|
||||
|
||||
- The patch is bulk generated churn.
|
||||
- The patch is stale localization that needs translator/manual review.
|
||||
- The patch imports personal paths, secrets, local settings, or private operator context.
|
||||
- The patch bypasses current install, catalog, plugin, or release architecture.
|
||||
- The branch would mix unrelated salvage units into one PR.
|
||||
|
||||
## Minimum Validation
|
||||
|
||||
- Targeted test for the touched surface.
|
||||
- `git diff --check`.
|
||||
- Markdown lint when docs are touched.
|
||||
- Catalog/install validation when skills, agents, commands, or plugin surfaces
|
||||
are touched.
|
||||
@@ -0,0 +1,46 @@
|
||||
# CI Failure Diagnosis Playbook
|
||||
|
||||
Candidate id: `log-backed-minimal-fix`
|
||||
|
||||
Use this playbook when a PR, maintainer branch, or release-readiness branch has
|
||||
one or more red GitHub Actions checks.
|
||||
|
||||
## Accepted Path
|
||||
|
||||
1. Capture PR and branch context:
|
||||
- `gh pr view <pr-number> --json files,statusCheckRollup,headRefName,baseRefName`
|
||||
- `gh run view <run-id> --json jobs`
|
||||
2. Fetch the failed log evidence:
|
||||
- `gh run view <run-id> --log-failed`
|
||||
3. Record the failing job, step, OS, Node/Python/Rust version, package manager,
|
||||
and shortest useful error excerpt.
|
||||
4. Compare the failing step to the PR changed files.
|
||||
5. Search current docs, tests, and prior PRs for a known matching failure mode.
|
||||
6. Promote the smallest fix path only when it includes a local reproduction or
|
||||
regression command.
|
||||
7. After a separate implementation branch exists, rerun the focused local gate,
|
||||
then wait for the full GitHub Actions matrix before merge.
|
||||
|
||||
## Rejected Path
|
||||
|
||||
Do not keep rerunning CI until a transient green result appears without
|
||||
recording the original failure and why it is safe to ignore.
|
||||
|
||||
Do not weaken tests, skip matrix legs, or broaden the patch to unrelated files
|
||||
just to make the check pass.
|
||||
|
||||
Do not claim release readiness from a branch with required checks still red.
|
||||
|
||||
## Minimum Validation
|
||||
|
||||
- `gh run view <run-id> --log-failed`
|
||||
- Focused local command matching the failing surface, such as:
|
||||
- `node tests/<matching-test>.js`
|
||||
- `npm run harness:audit -- --format json`
|
||||
- `npm run observability:ready`
|
||||
- `cargo test`
|
||||
- `git diff --check`
|
||||
- Full required GitHub Actions matrix before merge
|
||||
|
||||
Record the failed-log excerpt and the chosen regression command in the
|
||||
maintainer PR body or handoff before merging the fix.
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.report.v1",
|
||||
"scenario_id": "ci-failure-diagnosis",
|
||||
"run_id": "2026-05-12-ci-failure-diagnosis-prototype",
|
||||
"result": "prototype_passed",
|
||||
"read_only": true,
|
||||
"scores": {
|
||||
"failure_evidence": 0.92,
|
||||
"scope_control": 0.9,
|
||||
"regression_specificity": 0.86,
|
||||
"matrix_safety": 1,
|
||||
"publication_safety": 1
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"id": "log-first-required",
|
||||
"severity": "warning",
|
||||
"summary": "A CI fix candidate must start from the exact failed job, step, platform, runtime, package manager, and log excerpt rather than from a generic rerun."
|
||||
},
|
||||
{
|
||||
"id": "changed-file-scope-needed",
|
||||
"severity": "info",
|
||||
"summary": "Changed-file context should narrow the fix to the surface that can affect the failing step, especially in a broad OS/runtime matrix."
|
||||
},
|
||||
{
|
||||
"id": "regression-gate-needed",
|
||||
"severity": "warning",
|
||||
"summary": "A promoted fix playbook must name a local reproduction or regression command before the branch is allowed to merge."
|
||||
}
|
||||
],
|
||||
"recommended_next_action": {
|
||||
"candidate_id": "log-backed-minimal-fix",
|
||||
"action": "Use the promoted CI failure diagnosis playbook whenever a PR check goes red before implementing or rerunning fixes."
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.scenario.v1",
|
||||
"scenario_id": "ci-failure-diagnosis",
|
||||
"title": "Diagnose CI failures from captured logs before proposing fixes",
|
||||
"mode": "read_only_prototype",
|
||||
"objective": "Given a failed CI run on a PR or maintainer branch, retrieve the exact failing job, captured log excerpt, changed-file context, and prior known-fix evidence before promoting a fix playbook.",
|
||||
"sources": [
|
||||
{
|
||||
"kind": "repo_doc",
|
||||
"path": "docs/ECC-2.0-GA-ROADMAP.md",
|
||||
"purpose": "Records ECC-Tools CI failure-mode predictive follow-ups and the evaluator/RAG corpus expansion need"
|
||||
},
|
||||
{
|
||||
"kind": "repo_doc",
|
||||
"path": "docs/architecture/evaluator-rag-prototype.md",
|
||||
"purpose": "Defines the artifact contract and promotion rules for evaluator/RAG scenarios"
|
||||
},
|
||||
{
|
||||
"kind": "github_actions",
|
||||
"command": "gh run view <run-id> --log-failed",
|
||||
"purpose": "Primary evidence for the failing job, failing step, and deterministic error text"
|
||||
},
|
||||
{
|
||||
"kind": "github_pr",
|
||||
"command": "gh pr view <pr-number> --json files,statusCheckRollup,headRefName,baseRefName",
|
||||
"purpose": "Changed-file and check-rollup context for scoping the fix"
|
||||
},
|
||||
{
|
||||
"kind": "repo_test",
|
||||
"command": "node tests/run-all.js",
|
||||
"purpose": "Local regression gate after a candidate fix is implemented outside the read-only evaluator run"
|
||||
}
|
||||
],
|
||||
"retrieval_questions": [
|
||||
"Which job, step, platform, runtime, and package manager failed?",
|
||||
"What is the smallest failing log excerpt that explains the failure?",
|
||||
"Which changed files are plausibly connected to the failing step?",
|
||||
"Is there a prior known-fix, troubleshooting note, or fixture that matches this failure mode?",
|
||||
"Which local command reproduces or guards the failure before a fix can merge?"
|
||||
],
|
||||
"forbidden_actions": [
|
||||
"rerunning CI until it passes without diagnosing the failure",
|
||||
"pushing speculative fixes without a captured failing log excerpt",
|
||||
"editing unrelated files to make the matrix green",
|
||||
"weakening or deleting tests to silence a failure",
|
||||
"merging or publishing while required checks are red",
|
||||
"creating release tags or posting announcements from this evaluator run"
|
||||
],
|
||||
"acceptance_gates": [
|
||||
"failing job and step are named",
|
||||
"captured log excerpt is linked or summarized",
|
||||
"changed-file context is compared to the failing step",
|
||||
"known-fix or no-known-fix status is recorded",
|
||||
"local reproduction or regression command is named",
|
||||
"at least one rerun-only candidate is rejected"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.trace.v1",
|
||||
"scenario_id": "ci-failure-diagnosis",
|
||||
"run_id": "2026-05-12-ci-failure-diagnosis-prototype",
|
||||
"read_only": true,
|
||||
"events": [
|
||||
{
|
||||
"phase": "observation",
|
||||
"summary": "A PR or maintainer branch has a red GitHub Actions matrix. The evaluator records status without rerunning, merging, or editing code.",
|
||||
"evidence": [
|
||||
"gh pr view <pr-number> --json statusCheckRollup,files",
|
||||
"gh run view <run-id> --json jobs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "retrieval",
|
||||
"summary": "Retrieved failed-job logs, changed-file context, current roadmap CI failure-mode requirements, and existing local regression commands.",
|
||||
"evidence": [
|
||||
"gh run view <run-id> --log-failed",
|
||||
"docs/ECC-2.0-GA-ROADMAP.md",
|
||||
"tests/run-all.js"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "proposal",
|
||||
"summary": "Generated two candidate playbooks: log-backed minimal fix with regression coverage, and rerun-only optimism that treats CI flake as proven without evidence.",
|
||||
"candidate_ids": [
|
||||
"log-backed-minimal-fix",
|
||||
"rerun-only-green-wait"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "verification",
|
||||
"summary": "Accepted the log-backed minimal fix because it names failing evidence, scope, and validation. Rejected rerun-only waiting because it does not explain the failure or preserve a regression guard.",
|
||||
"evidence": [
|
||||
"examples/evaluator-rag-prototype/ci-failure-diagnosis/verifier-result.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "promotion",
|
||||
"summary": "Promoted only the read-only CI triage playbook. The evaluator does not push a fix, rerun CI, merge, publish, or weaken checks.",
|
||||
"promoted_candidate_id": "log-backed-minimal-fix"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.verifier.v1",
|
||||
"scenario_id": "ci-failure-diagnosis",
|
||||
"run_id": "2026-05-12-ci-failure-diagnosis-prototype",
|
||||
"read_only": true,
|
||||
"candidates": [
|
||||
{
|
||||
"candidate_id": "log-backed-minimal-fix",
|
||||
"decision": "accepted",
|
||||
"score": 0.93,
|
||||
"reasons": [
|
||||
"requires failed job, step, platform, runtime, and log evidence",
|
||||
"compares changed files to the failing surface before proposing a fix",
|
||||
"names a focused local reproduction or regression command",
|
||||
"keeps required checks intact",
|
||||
"keeps merge, release, package, plugin, billing, and announcement actions approval-gated"
|
||||
],
|
||||
"rollback": "Revert the future implementation PR or restore the original failing test fixture; no code is changed by this read-only playbook."
|
||||
},
|
||||
{
|
||||
"candidate_id": "rerun-only-green-wait",
|
||||
"decision": "rejected",
|
||||
"score": 0.17,
|
||||
"reasons": [
|
||||
"does not preserve the failing log excerpt",
|
||||
"does not identify job, step, platform, runtime, or package manager",
|
||||
"does not compare failure surface to changed files",
|
||||
"does not add or name a regression gate",
|
||||
"risks merging a flaky or still-unexplained CI failure"
|
||||
],
|
||||
"rollback": "Do not treat this as a fix; restart diagnosis from captured failed logs and changed-file context."
|
||||
}
|
||||
],
|
||||
"promoted_candidate_id": "log-backed-minimal-fix"
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
# Deep Analyzer Evidence Playbook
|
||||
|
||||
Candidate id: `corpus-backed-analyzer-change`
|
||||
|
||||
Use this playbook when a PR changes repository analysis, commit analysis,
|
||||
architecture classification, workflow detection, pattern detection, or
|
||||
deep-analysis risk-taxonomy behavior.
|
||||
|
||||
## Accepted Path
|
||||
|
||||
1. Name the changed analyzer surface and source file.
|
||||
2. Retrieve the Deep Analyzer Evidence contract from `../ECC-Tools/README.md`
|
||||
and the follow-up logic in `../ECC-Tools/src/lib/analyzer.ts`.
|
||||
3. Match the change to maintained corpus or reference evidence:
|
||||
- `../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts`
|
||||
- `../ECC-Tools/src/analyzers/deep-analyzer-corpus.test.ts`
|
||||
- `../ECC-Tools/src/lib/analyzer.compare.test.ts`
|
||||
4. Compare expected outputs for the affected behavior:
|
||||
- folder type;
|
||||
- module organization;
|
||||
- test location;
|
||||
- primary language;
|
||||
- commit message type;
|
||||
- detected workflow names.
|
||||
5. Add or update analyzer corpus, expected-output snapshots, fixtures,
|
||||
benchmarks, golden cases, evals, or reference sets for the same changed
|
||||
surface.
|
||||
6. Run the relevant validation gate from `../ECC-Tools/`:
|
||||
- `npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts`
|
||||
- `npm run typecheck`
|
||||
- `npm run lint`
|
||||
7. Record the corpus case, expected-output comparison, validation output, and
|
||||
rollback notes in the maintainer PR body or handoff.
|
||||
|
||||
## Rejected Path
|
||||
|
||||
Do not promote analyzer threshold, classification, or risk-taxonomy changes
|
||||
without corpus, snapshot, fixture, benchmark, golden, eval, or reference-set
|
||||
evidence.
|
||||
|
||||
Do not suppress the `Deep Analyzer Evidence` PR-risk bucket just because the
|
||||
change is small. Suppress it only when co-located evidence covers the same
|
||||
analyzer surface.
|
||||
|
||||
Do not rely only on broad manual review notes. Analyzer changes need
|
||||
representative repository shapes or commit-history cases with expected outputs.
|
||||
|
||||
Do not post PR comments, create check runs, sync Linear, publish packages, edit
|
||||
plugins, or create release artifacts from the evaluator run.
|
||||
|
||||
## Minimum Validation
|
||||
|
||||
- `npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts`
|
||||
- `npm run typecheck`
|
||||
- `npm run lint`
|
||||
- `git diff --check`
|
||||
- Markdown lint when docs or playbooks are touched
|
||||
|
||||
Preserve source attribution for analyzer evidence and include rollback guidance
|
||||
for the future maintainer PR.
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.report.v1",
|
||||
"scenario_id": "deep-analyzer-evidence",
|
||||
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
|
||||
"result": "prototype_passed",
|
||||
"read_only": true,
|
||||
"scores": {
|
||||
"corpus_retrieval": 0.95,
|
||||
"expected_output_comparison": 0.91,
|
||||
"representative_case_coverage": 0.89,
|
||||
"taxonomy_gap_safety": 0.93,
|
||||
"publication_safety": 1
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"id": "corpus-required",
|
||||
"severity": "warning",
|
||||
"summary": "Deep-analysis behavior changes need maintained corpus, snapshot, fixture, benchmark, golden, eval, or reference-set evidence before promotion."
|
||||
},
|
||||
{
|
||||
"id": "expected-output-required",
|
||||
"severity": "warning",
|
||||
"summary": "Analyzer changes should compare expected folder type, module organization, test location, primary language, commit pattern, or workflow outputs."
|
||||
},
|
||||
{
|
||||
"id": "read-only-routing",
|
||||
"severity": "info",
|
||||
"summary": "The evaluator can recommend a maintainer PR but cannot post PR comments, check runs, Linear sync updates, packages, plugins, or release actions itself."
|
||||
}
|
||||
],
|
||||
"recommended_next_action": {
|
||||
"candidate_id": "corpus-backed-analyzer-change",
|
||||
"action": "Use the promoted deep-analyzer evidence playbook for PRs that change repository, commit, architecture, workflow, pattern, or risk-taxonomy analysis behavior."
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.scenario.v1",
|
||||
"scenario_id": "deep-analyzer-evidence",
|
||||
"title": "Require analyzer corpus evidence before promoting deep-analysis changes",
|
||||
"mode": "read_only_prototype",
|
||||
"objective": "Given a change to repository, commit, architecture, pattern, or deep-analysis logic, retrieve maintained analyzer corpus evidence and expected-output comparisons before promoting analyzer behavior or risk-taxonomy changes.",
|
||||
"sources": [
|
||||
{
|
||||
"kind": "sibling_repo_doc",
|
||||
"path": "../ECC-Tools/README.md",
|
||||
"purpose": "Public description of deep-analyzer predictive follow-ups and the Deep Analyzer Evidence PR-risk bucket"
|
||||
},
|
||||
{
|
||||
"kind": "sibling_repo_source",
|
||||
"path": "../ECC-Tools/src/lib/analyzer.ts",
|
||||
"purpose": "Predictive follow-up logic that flags analyzer changes without corpus, snapshot, fixture, or benchmark evidence"
|
||||
},
|
||||
{
|
||||
"kind": "sibling_repo_source",
|
||||
"path": "../ECC-Tools/src/lib/pr-risk-taxonomy.ts",
|
||||
"purpose": "Non-blocking PR-risk taxonomy bucket for deep-analyzer evidence"
|
||||
},
|
||||
{
|
||||
"kind": "sibling_repo_fixture",
|
||||
"path": "../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts",
|
||||
"purpose": "Maintained corpus cases for representative repository shapes, commit histories, and expected analyzer outputs"
|
||||
},
|
||||
{
|
||||
"kind": "sibling_repo_test",
|
||||
"command": "npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts",
|
||||
"purpose": "Regression evidence for analyzer corpus outputs and deep-analyzer follow-up generation"
|
||||
}
|
||||
],
|
||||
"retrieval_questions": [
|
||||
"Which analyzer surface changed: repository structure, architecture, code style, commit messages, workflow detection, pattern detection, or risk taxonomy?",
|
||||
"Which maintained corpus case or reference set covers the same analyzer behavior?",
|
||||
"Do expected outputs compare folder type, module organization, test location, primary language, commit type, and workflow names?",
|
||||
"Does the PR add analyzer corpus, snapshot, fixture, benchmark, golden, eval, or reference-set evidence alongside analyzer code changes?",
|
||||
"Does the evaluator keep PR comments, check runs, Linear sync, package changes, and publication actions out of the read-only pass?"
|
||||
],
|
||||
"forbidden_actions": [
|
||||
"promoting repository, commit, architecture, or deep-analysis changes without analyzer corpus evidence",
|
||||
"suppressing the Deep Analyzer Evidence risk bucket without co-located corpus, snapshot, fixture, or benchmark evidence",
|
||||
"changing analyzer thresholds or classifications without expected-output comparison",
|
||||
"relying only on broad manual review notes instead of representative repository and commit-history cases",
|
||||
"posting PR comments, check runs, or Linear sync updates from this read-only evaluator run",
|
||||
"changing package, plugin, release, or publication state from this evaluator run"
|
||||
],
|
||||
"acceptance_gates": [
|
||||
"changed analyzer surface is named",
|
||||
"maintained corpus or reference-set path is included",
|
||||
"expected analyzer outputs are compared",
|
||||
"representative repository shape or commit history is described",
|
||||
"regression command is named",
|
||||
"at least one no-corpus analyzer change is rejected"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.trace.v1",
|
||||
"scenario_id": "deep-analyzer-evidence",
|
||||
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
|
||||
"read_only": true,
|
||||
"events": [
|
||||
{
|
||||
"phase": "observation",
|
||||
"summary": "A deep-analysis PR changes repository, commit, architecture, workflow, pattern, or risk-taxonomy behavior. The evaluator records the touched analyzer surface and remains read-only.",
|
||||
"evidence": [
|
||||
"../ECC-Tools/src/lib/analyzer.ts",
|
||||
"../ECC-Tools/src/lib/pr-risk-taxonomy.ts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "retrieval",
|
||||
"summary": "Retrieved the maintained analyzer corpus, corpus regression test, and follow-up tests that distinguish corpus-backed analyzer changes from no-evidence analyzer rewrites.",
|
||||
"evidence": [
|
||||
"../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts",
|
||||
"../ECC-Tools/src/analyzers/deep-analyzer-corpus.test.ts",
|
||||
"../ECC-Tools/src/lib/analyzer.compare.test.ts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "proposal",
|
||||
"summary": "Generated two candidate playbooks: corpus-backed analyzer change, and threshold-only analyzer rewrite without expected-output evidence.",
|
||||
"candidate_ids": [
|
||||
"corpus-backed-analyzer-change",
|
||||
"threshold-only-analyzer-rewrite"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "verification",
|
||||
"summary": "Accepted the corpus-backed analyzer change because it names representative repository/commit cases and expected-output comparisons. Rejected the threshold-only rewrite because it lacks corpus or benchmark evidence.",
|
||||
"evidence": [
|
||||
"examples/evaluator-rag-prototype/deep-analyzer-evidence/verifier-result.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "promotion",
|
||||
"summary": "Promoted only the read-only deep-analyzer evidence playbook. Future analyzer edits must move through maintainer PRs with corpus evidence, regression commands, and rollback notes.",
|
||||
"promoted_candidate_id": "corpus-backed-analyzer-change"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.verifier.v1",
|
||||
"scenario_id": "deep-analyzer-evidence",
|
||||
"run_id": "2026-05-12-deep-analyzer-evidence-prototype",
|
||||
"read_only": true,
|
||||
"candidates": [
|
||||
{
|
||||
"candidate_id": "corpus-backed-analyzer-change",
|
||||
"decision": "accepted",
|
||||
"score": 0.92,
|
||||
"reasons": [
|
||||
"names the changed analyzer surface and matching maintained corpus case",
|
||||
"compares expected analyzer outputs for representative repository and commit-history inputs",
|
||||
"keeps Deep Analyzer Evidence taxonomy behavior tied to co-located corpus or benchmark evidence",
|
||||
"names the regression command that exercises corpus and follow-up behavior",
|
||||
"keeps PR comments, check runs, Linear sync, and publication actions out of the evaluator run"
|
||||
],
|
||||
"rollback": "Revert the future analyzer PR and restore the prior corpus expectations; no hosted check-run, Linear, package, or publication state changes in this read-only playbook."
|
||||
},
|
||||
{
|
||||
"candidate_id": "threshold-only-analyzer-rewrite",
|
||||
"decision": "rejected",
|
||||
"score": 0.13,
|
||||
"reasons": [
|
||||
"changes analyzer thresholds without corpus evidence",
|
||||
"does not compare expected outputs against representative repository or commit-history cases",
|
||||
"does not update analyzer corpus, snapshot, fixture, benchmark, golden, eval, or reference-set artifacts",
|
||||
"would suppress Deep Analyzer Evidence risk without proof",
|
||||
"does not name a regression command"
|
||||
],
|
||||
"rollback": "Do not promote this analyzer rewrite; restart from maintained corpus inputs, expected-output snapshots, and a focused maintainer PR."
|
||||
}
|
||||
],
|
||||
"promoted_candidate_id": "corpus-backed-analyzer-change"
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
# Harness Config Quality Playbook
|
||||
|
||||
Candidate id: `adapter-matrix-backed-drift-check`
|
||||
|
||||
Use this playbook when a PR, install change, or setup recommendation touches
|
||||
MCP, plugins, hooks, commands, agents, rules, install targets, or harness
|
||||
adapter surfaces.
|
||||
|
||||
## Accepted Path
|
||||
|
||||
1. Identify the touched harness/config surface.
|
||||
2. Retrieve the adapter state from
|
||||
`docs/architecture/harness-adapter-compliance.md` or
|
||||
`scripts/lib/harness-adapter-compliance.js`.
|
||||
3. Record whether the harness is `Native`, `Adapter-backed`,
|
||||
`Instruction-backed`, or `Reference-only`.
|
||||
4. Name the install/onramp path and verification command from the matrix.
|
||||
5. Preserve existing user and project config by using merge, dry-run, or
|
||||
explicit no-overwrite behavior.
|
||||
6. Run the relevant validation gate:
|
||||
- `npm run harness:adapters -- --check`
|
||||
- `npm run harness:audit -- --format json`
|
||||
- `node tests/lib/install-targets.test.js`
|
||||
- `node tests/opencode-plugin-hooks.test.js`
|
||||
- `node tests/docs/mcp-management-docs.test.js`
|
||||
7. Promote a config recommendation only when the evidence matches the harness
|
||||
state and the config preservation behavior is explicit.
|
||||
|
||||
## Rejected Path
|
||||
|
||||
Do not claim Claude hook parity for Codex, Gemini, Zed, OpenCode, or other
|
||||
harnesses unless the adapter matrix and tests prove it.
|
||||
|
||||
Do not overwrite `settings.json`, MCP configs, plugin manifests, rule files, or
|
||||
command surfaces without a merge/dry-run path and a rollback note.
|
||||
|
||||
Do not toggle live MCP servers, publish plugins, or edit user-level harness
|
||||
config from the evaluator run.
|
||||
|
||||
## Minimum Validation
|
||||
|
||||
- `npm run harness:adapters -- --check`
|
||||
- `npm run harness:audit -- --format json`
|
||||
- Focused install, plugin, MCP, or hook test for the changed surface
|
||||
- `git diff --check`
|
||||
- Markdown lint when docs are touched
|
||||
|
||||
Record the adapter state, risk note, validation commands, and config
|
||||
preservation behavior in the maintainer PR body or handoff.
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.report.v1",
|
||||
"scenario_id": "harness-config-quality",
|
||||
"run_id": "2026-05-12-harness-config-quality-prototype",
|
||||
"result": "prototype_passed",
|
||||
"read_only": true,
|
||||
"scores": {
|
||||
"adapter_evidence": 0.94,
|
||||
"config_preservation": 0.88,
|
||||
"verification_specificity": 0.9,
|
||||
"parity_claim_safety": 1,
|
||||
"publication_safety": 1
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"id": "adapter-state-required",
|
||||
"severity": "warning",
|
||||
"summary": "Harness recommendations must retrieve the adapter state before claiming native support or runtime enforcement."
|
||||
},
|
||||
{
|
||||
"id": "config-overwrite-risk",
|
||||
"severity": "warning",
|
||||
"summary": "MCP, hook, plugin, command, and rule changes must preserve existing user/project config and use dry-run or merge behavior when available."
|
||||
},
|
||||
{
|
||||
"id": "verification-command-needed",
|
||||
"severity": "info",
|
||||
"summary": "The accepted playbook names harness adapter, harness audit, install-target, or plugin-hook regression gates before a config change can merge."
|
||||
}
|
||||
],
|
||||
"recommended_next_action": {
|
||||
"candidate_id": "adapter-matrix-backed-drift-check",
|
||||
"action": "Use the promoted harness-config quality playbook for PRs or setup work touching MCP, plugin, hook, command, agent, rule, or adapter surfaces."
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.scenario.v1",
|
||||
"scenario_id": "harness-config-quality",
|
||||
"title": "Detect harness config drift before changing adapters or installs",
|
||||
"mode": "read_only_prototype",
|
||||
"objective": "Given a change to MCP, plugin, hook, command, agent, or harness adapter surfaces, retrieve the adapter matrix and validation evidence before promoting a setup recommendation or config change.",
|
||||
"sources": [
|
||||
{
|
||||
"kind": "repo_doc",
|
||||
"path": "docs/architecture/harness-adapter-compliance.md",
|
||||
"purpose": "Public adapter matrix that names harness state, install/onramp paths, verification commands, and risk notes"
|
||||
},
|
||||
{
|
||||
"kind": "repo_source",
|
||||
"path": "scripts/lib/harness-adapter-compliance.js",
|
||||
"purpose": "Structured source of truth for the adapter compliance matrix"
|
||||
},
|
||||
{
|
||||
"kind": "repo_config",
|
||||
"path": "hooks/hooks.json",
|
||||
"purpose": "Claude hook surface that must not be assumed portable without adapter evidence"
|
||||
},
|
||||
{
|
||||
"kind": "repo_config",
|
||||
"path": "mcp-configs/mcp-servers.json",
|
||||
"purpose": "Reference MCP config that can drift from harness-specific runtime semantics"
|
||||
},
|
||||
{
|
||||
"kind": "repo_test",
|
||||
"command": "npm run harness:adapters -- --check",
|
||||
"purpose": "Adapter matrix consistency gate"
|
||||
}
|
||||
],
|
||||
"retrieval_questions": [
|
||||
"Which harness or config surface changed: MCP, plugin, hook, command, agent, rule, or adapter?",
|
||||
"Does the adapter matrix classify this harness as native, adapter-backed, instruction-backed, or reference-only?",
|
||||
"Which install path, verification command, risk note, owner, and source doc apply?",
|
||||
"Does the recommendation preserve existing user config rather than overwriting it?",
|
||||
"Which compatibility regression or harness audit command proves the setup still works?"
|
||||
],
|
||||
"forbidden_actions": [
|
||||
"claiming native support for instruction-backed or reference-only harnesses",
|
||||
"copying Claude hook semantics into Codex, Gemini, Zed, or OpenCode without adapter evidence",
|
||||
"silently overwriting existing user MCP, hook, plugin, command, or rule config",
|
||||
"disabling or enabling live MCP servers from a read-only evaluator run",
|
||||
"shipping an adapter change without a verification command",
|
||||
"publishing packages or plugins from this evaluator run"
|
||||
],
|
||||
"acceptance_gates": [
|
||||
"adapter state is retrieved from the matrix",
|
||||
"install or onramp path is named",
|
||||
"verification command is named",
|
||||
"risk note is preserved",
|
||||
"config-preservation behavior is explicit",
|
||||
"at least one unsupported parity claim is rejected"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.trace.v1",
|
||||
"scenario_id": "harness-config-quality",
|
||||
"run_id": "2026-05-12-harness-config-quality-prototype",
|
||||
"read_only": true,
|
||||
"events": [
|
||||
{
|
||||
"phase": "observation",
|
||||
"summary": "A setup recommendation or PR touches MCP, plugin, hook, command, agent, rule, or adapter surfaces. The evaluator records the surface without editing local or user-level config.",
|
||||
"evidence": [
|
||||
"docs/architecture/harness-adapter-compliance.md",
|
||||
"scripts/lib/harness-adapter-compliance.js"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "retrieval",
|
||||
"summary": "Retrieved the adapter state, install/onramp path, verification commands, risk notes, and config-preservation tests for the affected harness.",
|
||||
"evidence": [
|
||||
"npm run harness:adapters -- --check",
|
||||
"npm run harness:audit -- --format json",
|
||||
"node tests/lib/install-targets.test.js"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "proposal",
|
||||
"summary": "Generated two candidate playbooks: adapter-matrix-backed drift check, and unsupported hook parity claim that copies Claude semantics into every harness.",
|
||||
"candidate_ids": [
|
||||
"adapter-matrix-backed-drift-check",
|
||||
"unsupported-hook-parity-claim"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "verification",
|
||||
"summary": "Accepted the matrix-backed drift check because it names state, install path, verification, and preservation behavior. Rejected unsupported hook parity because it overclaims portability.",
|
||||
"evidence": [
|
||||
"examples/evaluator-rag-prototype/harness-config-quality/verifier-result.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "promotion",
|
||||
"summary": "Promoted only the read-only harness-config quality playbook. The evaluator does not overwrite configs, toggle MCP servers, publish plugins, or claim native support.",
|
||||
"promoted_candidate_id": "adapter-matrix-backed-drift-check"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.verifier.v1",
|
||||
"scenario_id": "harness-config-quality",
|
||||
"run_id": "2026-05-12-harness-config-quality-prototype",
|
||||
"read_only": true,
|
||||
"candidates": [
|
||||
{
|
||||
"candidate_id": "adapter-matrix-backed-drift-check",
|
||||
"decision": "accepted",
|
||||
"score": 0.92,
|
||||
"reasons": [
|
||||
"retrieves adapter state before making a support claim",
|
||||
"names install or onramp path and verification commands",
|
||||
"preserves existing user and project config",
|
||||
"keeps runtime MCP toggles and plugin publication out of the evaluator run",
|
||||
"requires focused compatibility regression coverage"
|
||||
],
|
||||
"rollback": "Revert the future adapter/config PR or restore the prior config merge behavior; no live user config is changed by this read-only playbook."
|
||||
},
|
||||
{
|
||||
"candidate_id": "unsupported-hook-parity-claim",
|
||||
"decision": "rejected",
|
||||
"score": 0.16,
|
||||
"reasons": [
|
||||
"claims native support without adapter matrix evidence",
|
||||
"copies Claude hook semantics into instruction-backed harnesses",
|
||||
"does not name a verification command",
|
||||
"does not preserve existing MCP or hook config",
|
||||
"risks publishing or installing unsupported plugin behavior"
|
||||
],
|
||||
"rollback": "Do not publish this setup recommendation; restart from adapter state, risk note, and config-preservation evidence."
|
||||
}
|
||||
],
|
||||
"promoted_candidate_id": "adapter-matrix-backed-drift-check"
|
||||
}
|
||||
35
examples/evaluator-rag-prototype/report.json
Normal file
35
examples/evaluator-rag-prototype/report.json
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.report.v1",
|
||||
"scenario_id": "stale-pr-salvage-maintainer-branch",
|
||||
"run_id": "2026-05-12-cleanup-salvage-prototype",
|
||||
"result": "prototype_passed",
|
||||
"read_only": true,
|
||||
"scores": {
|
||||
"source_attribution": 1,
|
||||
"blast_radius_control": 1,
|
||||
"manual_review_respected": 1,
|
||||
"validation_specificity": 0.8,
|
||||
"publication_safety": 1
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"id": "salvage-policy-usable",
|
||||
"severity": "info",
|
||||
"summary": "The stale-salvage ledger and maintainer PR examples provide enough evidence to promote a reusable maintainer-owned salvage playbook."
|
||||
},
|
||||
{
|
||||
"id": "translation-tail-blocked",
|
||||
"severity": "warning",
|
||||
"summary": "Localization tails remain useful but must stay translator/manual-review only."
|
||||
},
|
||||
{
|
||||
"id": "release-actions-blocked",
|
||||
"severity": "warning",
|
||||
"summary": "Release, npm, plugin, billing, and announcement actions remain outside this evaluator run and require separate approval."
|
||||
}
|
||||
],
|
||||
"recommended_next_action": {
|
||||
"candidate_id": "maintainer-salvage-branch",
|
||||
"action": "Use the promoted playbook for future stale cleanup batches and add additional evaluator/RAG scenarios for CI failure diagnosis, harness-config drift, billing readiness, and AgentShield policy exceptions."
|
||||
}
|
||||
}
|
||||
56
examples/evaluator-rag-prototype/scenario.json
Normal file
56
examples/evaluator-rag-prototype/scenario.json
Normal file
@@ -0,0 +1,56 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.scenario.v1",
|
||||
"scenario_id": "stale-pr-salvage-maintainer-branch",
|
||||
"title": "Recover useful stale PR work through maintainer-owned branches",
|
||||
"mode": "read_only_prototype",
|
||||
"objective": "Given a closed stale PR batch, identify useful work, reject unsafe bulk imports, and promote only a maintainer-owned salvage playbook with attribution and validation.",
|
||||
"sources": [
|
||||
{
|
||||
"kind": "repo_doc",
|
||||
"path": "docs/stale-pr-salvage-ledger.md",
|
||||
"purpose": "Durable source-to-disposition mapping for stale PR cleanup"
|
||||
},
|
||||
{
|
||||
"kind": "repo_doc",
|
||||
"path": "docs/legacy-artifact-inventory.md",
|
||||
"purpose": "Import guardrails for legacy and private-context material"
|
||||
},
|
||||
{
|
||||
"kind": "roadmap",
|
||||
"path": "docs/ECC-2.0-GA-ROADMAP.md",
|
||||
"purpose": "Operating rule and current execution lane"
|
||||
},
|
||||
{
|
||||
"kind": "github_pr",
|
||||
"url": "https://github.com/affaan-m/everything-claude-code/pull/1815",
|
||||
"purpose": "Example maintainer-owned stale salvage PR with attribution"
|
||||
},
|
||||
{
|
||||
"kind": "github_pr",
|
||||
"url": "https://github.com/affaan-m/everything-claude-code/pull/1818",
|
||||
"purpose": "Example gap pass classifying already-present and skipped stale work"
|
||||
}
|
||||
],
|
||||
"retrieval_questions": [
|
||||
"Which closed PRs contain useful work that is not already present?",
|
||||
"Which files or concepts are unsafe to cherry-pick without manual review?",
|
||||
"Which current docs, skills, commands, or tests are the correct integration points?",
|
||||
"Which validation gates are required before the salvage work can merge?"
|
||||
],
|
||||
"forbidden_actions": [
|
||||
"closing, reopening, or commenting on PRs",
|
||||
"merging PRs",
|
||||
"creating release tags",
|
||||
"publishing packages or plugins",
|
||||
"copying private paths, secrets, or raw personal context",
|
||||
"blindly cherry-picking bulk localization"
|
||||
],
|
||||
"acceptance_gates": [
|
||||
"source attribution is preserved",
|
||||
"salvage ledger or equivalent tracker is updated",
|
||||
"translation/manual-review tails remain blocked",
|
||||
"candidate action is reversible and maintainer-owned",
|
||||
"validation commands are named",
|
||||
"at least one unsafe candidate is rejected"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
# Skill Quality Evidence Playbook
|
||||
|
||||
Candidate id: `evidence-backed-skill-amendment`
|
||||
|
||||
Use this playbook when a PR or follow-up proposes adding, rewriting, or
|
||||
amending a skill, agent, command, or rule guidance surface.
|
||||
|
||||
## Accepted Path
|
||||
|
||||
1. Name the changed guidance surface and source file.
|
||||
2. Retrieve the quality contract from `docs/SKILL-DEVELOPMENT-GUIDE.md`.
|
||||
3. Compare the proposed change to nearby focused examples under `skills/*/SKILL.md`.
|
||||
4. Record the evidence source that justifies the change:
|
||||
- observed skill-run failure;
|
||||
- user feedback;
|
||||
- repeated review finding;
|
||||
- reference-set gap;
|
||||
- failing example or regression test.
|
||||
5. Keep the scope narrow. One skill should cover one domain, workflow, or
|
||||
reusable pattern.
|
||||
6. Add or update examples only when they can be validated.
|
||||
7. Run the relevant validation gate:
|
||||
- `node scripts/ci/validate-skills.js`
|
||||
- `node tests/lib/skill-improvement.test.js`
|
||||
- `node tests/lib/skill-evolution.test.js`
|
||||
- `npm run catalog:check`
|
||||
- language-specific example commands such as `npx tsc --noEmit`,
|
||||
`python -m py_compile`, or `go build` when examples are touched.
|
||||
8. Record validation output, source attribution, and rollback notes in the
|
||||
maintainer PR body or handoff.
|
||||
|
||||
## Rejected Path
|
||||
|
||||
Do not promote a vague skill rewrite because the prose "sounds better" without
|
||||
observed failure evidence, examples, or a reference set.
|
||||
|
||||
Do not merge multi-domain catch-all skills that duplicate focused skills or make
|
||||
activation less predictable.
|
||||
|
||||
Do not copy private operator context, secrets, tokens, personal paths, customer
|
||||
data, or unpublished release claims into skills.
|
||||
|
||||
Do not update package manifests, plugin manifests, catalogs, release notes, or
|
||||
publication state from the evaluator run.
|
||||
|
||||
## Minimum Validation
|
||||
|
||||
- `node scripts/ci/validate-skills.js`
|
||||
- `npm run catalog:check` when catalog/package-visible skill surfaces change
|
||||
- Focused skill-improvement or skill-evolution regression test when amendment
|
||||
behavior changes
|
||||
- Language-specific compile/lint checks for touched examples
|
||||
- `git diff --check`
|
||||
- Markdown lint when docs or playbooks are touched
|
||||
|
||||
Preserve source attribution for contributed skill material and include rollback
|
||||
guidance for the future maintainer PR.
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.report.v1",
|
||||
"scenario_id": "skill-quality-evidence",
|
||||
"run_id": "2026-05-12-skill-quality-evidence-prototype",
|
||||
"result": "prototype_passed",
|
||||
"read_only": true,
|
||||
"scores": {
|
||||
"skill_contract_retrieval": 0.94,
|
||||
"observed_failure_evidence": 0.88,
|
||||
"example_quality": 0.9,
|
||||
"validation_specificity": 0.93,
|
||||
"publication_safety": 1
|
||||
},
|
||||
"findings": [
|
||||
{
|
||||
"id": "examples-required",
|
||||
"severity": "warning",
|
||||
"summary": "Skill-quality changes need working examples or regression evidence; prose-only rewrites are not enough for promotion."
|
||||
},
|
||||
{
|
||||
"id": "observation-source-required",
|
||||
"severity": "warning",
|
||||
"summary": "Skill amendments should cite observed failure, user feedback, or a reference-set gap rather than broad style preference."
|
||||
},
|
||||
{
|
||||
"id": "publication-stays-blocked",
|
||||
"severity": "info",
|
||||
"summary": "The evaluator can recommend a maintainer PR, but it cannot update package, plugin, catalog, or publication state itself."
|
||||
}
|
||||
],
|
||||
"recommended_next_action": {
|
||||
"candidate_id": "evidence-backed-skill-amendment",
|
||||
"action": "Use the promoted skill-quality playbook for PRs that add, rewrite, or amend skills, agents, commands, or rules guidance."
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.scenario.v1",
|
||||
"scenario_id": "skill-quality-evidence",
|
||||
"title": "Require examples and validation before promoting skill guidance changes",
|
||||
"mode": "read_only_prototype",
|
||||
"objective": "Given a change to skills, agents, commands, or rules guidance, retrieve the skill development contract and observed skill-run evidence before promoting an amendment or new skill-quality recommendation.",
|
||||
"sources": [
|
||||
{
|
||||
"kind": "repo_doc",
|
||||
"path": "docs/SKILL-DEVELOPMENT-GUIDE.md",
|
||||
"purpose": "Public skill quality contract for frontmatter, focused scope, examples, testing, and submission evidence"
|
||||
},
|
||||
{
|
||||
"kind": "repo_source",
|
||||
"path": "scripts/ci/validate-skills.js",
|
||||
"purpose": "Curated skill structure and frontmatter validation gate"
|
||||
},
|
||||
{
|
||||
"kind": "repo_source",
|
||||
"path": "scripts/lib/skill-improvement/",
|
||||
"purpose": "Observation, health, amendment, and evaluation helpers for evidence-backed skill evolution"
|
||||
},
|
||||
{
|
||||
"kind": "repo_test",
|
||||
"command": "node tests/lib/skill-improvement.test.js",
|
||||
"purpose": "Regression coverage for observation-backed skill amendment and evaluation scaffolds"
|
||||
},
|
||||
{
|
||||
"kind": "repo_test",
|
||||
"command": "node scripts/ci/validate-skills.js",
|
||||
"purpose": "Skill structure validation before catalog or package changes merge"
|
||||
}
|
||||
],
|
||||
"retrieval_questions": [
|
||||
"Which skill, agent, command, or rule surface changed?",
|
||||
"Does the change preserve focused scope, clear activation text, and working examples?",
|
||||
"Which validation command proves frontmatter, catalog, example, or behavior quality?",
|
||||
"Does observed failure or user feedback justify the amendment?",
|
||||
"Does the candidate avoid private context, secrets, personal paths, and publication actions?"
|
||||
],
|
||||
"forbidden_actions": [
|
||||
"promoting a skill rewrite without examples, validation, or observed failure evidence",
|
||||
"adding broad multi-domain skills that duplicate existing focused skills",
|
||||
"shipping code examples that are uncompiled, untested, or disconnected from the skill guidance",
|
||||
"copying private operator context, secrets, tokens, or personal paths into skills",
|
||||
"changing package, plugin, catalog, or publication state from this evaluator run",
|
||||
"claiming a skill-quality improvement without a reference set or regression command"
|
||||
],
|
||||
"acceptance_gates": [
|
||||
"changed skill or guidance surface is named",
|
||||
"source evidence includes the skill development guide or current skill examples",
|
||||
"observed failure, user feedback, or reference-set gap is recorded",
|
||||
"validation command is named",
|
||||
"example or regression evidence is attached",
|
||||
"at least one vague no-evidence rewrite is rejected"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.trace.v1",
|
||||
"scenario_id": "skill-quality-evidence",
|
||||
"run_id": "2026-05-12-skill-quality-evidence-prototype",
|
||||
"read_only": true,
|
||||
"events": [
|
||||
{
|
||||
"phase": "observation",
|
||||
"summary": "A skill or guidance PR proposes updated instructions. The evaluator records the changed surface and stays read-only; it does not edit skills, package manifests, catalogs, or publication state.",
|
||||
"evidence": [
|
||||
"docs/SKILL-DEVELOPMENT-GUIDE.md",
|
||||
"scripts/ci/validate-skills.js"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "retrieval",
|
||||
"summary": "Retrieved the skill quality contract, existing focused skill examples, observation-backed amendment helpers, and validation commands for skill structure and regression evidence.",
|
||||
"evidence": [
|
||||
"node scripts/ci/validate-skills.js",
|
||||
"node tests/lib/skill-improvement.test.js",
|
||||
"node tests/lib/skill-evolution.test.js",
|
||||
"npm run catalog:check"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "proposal",
|
||||
"summary": "Generated two candidate playbooks: evidence-backed skill amendment, and broad rewrite with no examples or validation.",
|
||||
"candidate_ids": [
|
||||
"evidence-backed-skill-amendment",
|
||||
"vague-skill-rewrite"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "verification",
|
||||
"summary": "Accepted the evidence-backed amendment because it names observed failure evidence, examples, and validation commands. Rejected the vague rewrite because it lacks a reference set and testable examples.",
|
||||
"evidence": [
|
||||
"examples/evaluator-rag-prototype/skill-quality-evidence/verifier-result.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "promotion",
|
||||
"summary": "Promoted only the read-only skill-quality evidence playbook. Future skill edits must move through maintainer PRs with source attribution, validation, and rollback notes.",
|
||||
"promoted_candidate_id": "evidence-backed-skill-amendment"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.verifier.v1",
|
||||
"scenario_id": "skill-quality-evidence",
|
||||
"run_id": "2026-05-12-skill-quality-evidence-prototype",
|
||||
"read_only": true,
|
||||
"candidates": [
|
||||
{
|
||||
"candidate_id": "evidence-backed-skill-amendment",
|
||||
"decision": "accepted",
|
||||
"score": 0.91,
|
||||
"reasons": [
|
||||
"retrieves the skill development guide and existing focused skill examples",
|
||||
"records observed failure, user feedback, or reference-set gap before proposing an amendment",
|
||||
"names validation commands for skill structure, examples, catalog consistency, and regression behavior",
|
||||
"keeps package, plugin, catalog, and publication actions out of the evaluator run",
|
||||
"includes rollback guidance for reverting the future maintainer PR"
|
||||
],
|
||||
"rollback": "Revert the future skill-amendment PR and restore the prior SKILL.md content; no installed user skill or publication surface changes in this read-only playbook."
|
||||
},
|
||||
{
|
||||
"candidate_id": "vague-skill-rewrite",
|
||||
"decision": "rejected",
|
||||
"score": 0.14,
|
||||
"reasons": [
|
||||
"does not name observed failure evidence or user feedback",
|
||||
"rewrites broad skill guidance without focused scope",
|
||||
"does not include working examples or a reference set",
|
||||
"does not name a regression command",
|
||||
"risks changing catalog or publication state from evaluator output"
|
||||
],
|
||||
"rollback": "Do not promote this rewrite; restart from observed skill-run evidence, example validation, and a focused maintainer PR."
|
||||
}
|
||||
],
|
||||
"promoted_candidate_id": "evidence-backed-skill-amendment"
|
||||
}
|
||||
46
examples/evaluator-rag-prototype/trace.json
Normal file
46
examples/evaluator-rag-prototype/trace.json
Normal file
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.trace.v1",
|
||||
"scenario_id": "stale-pr-salvage-maintainer-branch",
|
||||
"run_id": "2026-05-12-cleanup-salvage-prototype",
|
||||
"read_only": true,
|
||||
"events": [
|
||||
{
|
||||
"phase": "observation",
|
||||
"summary": "Public PR, issue, and discussion queues are clear; release publication remains approval-gated; stale-salvage ledger has landed, skipped, superseded, and manual-review states.",
|
||||
"evidence": [
|
||||
"docs/ECC-2.0-GA-ROADMAP.md",
|
||||
"docs/stale-pr-salvage-ledger.md"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "retrieval",
|
||||
"summary": "Retrieved stale PR source mappings, existing maintainer salvage examples, legacy import rules, and manual-review localization tails.",
|
||||
"evidence": [
|
||||
"docs/stale-pr-salvage-ledger.md",
|
||||
"docs/legacy-artifact-inventory.md",
|
||||
"https://github.com/affaan-m/everything-claude-code/pull/1815",
|
||||
"https://github.com/affaan-m/everything-claude-code/pull/1818"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "proposal",
|
||||
"summary": "Generated two candidate playbooks: maintainer-owned salvage branch with attribution, and blind cherry-pick of stale translations.",
|
||||
"candidate_ids": [
|
||||
"maintainer-salvage-branch",
|
||||
"blind-cherry-pick-translations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "verification",
|
||||
"summary": "Accepted the maintainer-owned salvage branch and rejected blind translation cherry-picking because it violates manual-review and attribution gates.",
|
||||
"evidence": [
|
||||
"examples/evaluator-rag-prototype/verifier-result.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
"phase": "promotion",
|
||||
"summary": "Promoted only the maintainer-owned salvage branch playbook as a reusable process. No repository, GitHub, release, billing, or plugin publication action is performed by this prototype.",
|
||||
"promoted_candidate_id": "maintainer-salvage-branch"
|
||||
}
|
||||
]
|
||||
}
|
||||
35
examples/evaluator-rag-prototype/verifier-result.json
Normal file
35
examples/evaluator-rag-prototype/verifier-result.json
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
"schema_version": "ecc.evaluator-rag.verifier.v1",
|
||||
"scenario_id": "stale-pr-salvage-maintainer-branch",
|
||||
"run_id": "2026-05-12-cleanup-salvage-prototype",
|
||||
"read_only": true,
|
||||
"candidates": [
|
||||
{
|
||||
"candidate_id": "maintainer-salvage-branch",
|
||||
"decision": "accepted",
|
||||
"score": 0.94,
|
||||
"reasons": [
|
||||
"preserves source PR attribution",
|
||||
"keeps work on a fresh maintainer-owned branch",
|
||||
"updates the salvage ledger",
|
||||
"names validation gates",
|
||||
"does not perform release or publication actions"
|
||||
],
|
||||
"rollback": "Close the maintainer PR or revert its merge commit; source PR state remains unchanged."
|
||||
},
|
||||
{
|
||||
"candidate_id": "blind-cherry-pick-translations",
|
||||
"decision": "rejected",
|
||||
"score": 0.21,
|
||||
"reasons": [
|
||||
"bulk localization requires translator/manual review",
|
||||
"does not preserve enough source attribution",
|
||||
"could import stale generated docs",
|
||||
"does not name validation gates",
|
||||
"risks bypassing current catalog and install architecture"
|
||||
],
|
||||
"rollback": "Do not create this branch; keep the localization tail in translator/manual-review state."
|
||||
}
|
||||
],
|
||||
"promoted_candidate_id": "maintainer-salvage-branch"
|
||||
}
|
||||
117
examples/hud-status-contract.json
Normal file
117
examples/hud-status-contract.json
Normal file
@@ -0,0 +1,117 @@
|
||||
{
|
||||
"schema_version": "ecc.hud-status.v1",
|
||||
"generatedAt": "2026-05-12T00:00:00.000Z",
|
||||
"context": {
|
||||
"harness": "codex",
|
||||
"model": "gpt-5",
|
||||
"repo": "affaan-m/everything-claude-code",
|
||||
"branch": "main",
|
||||
"worktree": "/repo/everything-claude-code",
|
||||
"sessionId": "session-active",
|
||||
"contextWindow": {
|
||||
"remainingPct": 62,
|
||||
"pressure": "normal"
|
||||
}
|
||||
},
|
||||
"toolCalls": {
|
||||
"total": 47,
|
||||
"pending": 0,
|
||||
"stale": 0,
|
||||
"lastTool": {
|
||||
"name": "gh-pr-view",
|
||||
"status": "success",
|
||||
"finishedAt": "2026-05-12T00:00:00.000Z"
|
||||
}
|
||||
},
|
||||
"activeAgents": [
|
||||
{
|
||||
"id": "worker-release-docs",
|
||||
"state": "completed",
|
||||
"branch": "codex/release-docs",
|
||||
"worktree": "/tmp/ecc-release-docs",
|
||||
"objective": "Update release readiness docs",
|
||||
"handoffPath": "/tmp/ecc-release-docs/handoff.md"
|
||||
}
|
||||
],
|
||||
"todos": {
|
||||
"inProgress": "Verify release publication matrix",
|
||||
"counts": {
|
||||
"pending": 2,
|
||||
"inProgress": 1,
|
||||
"completed": 6
|
||||
}
|
||||
},
|
||||
"checks": {
|
||||
"local": [
|
||||
{
|
||||
"command": "npm run observability:ready",
|
||||
"status": "pass"
|
||||
}
|
||||
],
|
||||
"remote": [
|
||||
{
|
||||
"name": "CI",
|
||||
"status": "pass",
|
||||
"url": "https://github.com/affaan-m/everything-claude-code/actions"
|
||||
}
|
||||
]
|
||||
},
|
||||
"cost": {
|
||||
"sessionUsd": 1.23,
|
||||
"budgetUsd": 10,
|
||||
"trend": "within-budget"
|
||||
},
|
||||
"risk": {
|
||||
"status": "attention",
|
||||
"reasons": [
|
||||
"release tag not published"
|
||||
],
|
||||
"dirtyWorktree": false,
|
||||
"conflicts": 0,
|
||||
"manualReviewRequired": true
|
||||
},
|
||||
"queueState": {
|
||||
"github": {
|
||||
"openPullRequests": 0,
|
||||
"openIssues": 0,
|
||||
"openDiscussions": 0
|
||||
},
|
||||
"mergeQueue": [],
|
||||
"conflictQueue": [],
|
||||
"staleSalvageQueue": [
|
||||
{
|
||||
"sourcePullRequest": 1310,
|
||||
"status": "landed"
|
||||
}
|
||||
]
|
||||
},
|
||||
"sessionControls": {
|
||||
"supported": [
|
||||
"create",
|
||||
"resume",
|
||||
"status",
|
||||
"stop",
|
||||
"diff",
|
||||
"pr",
|
||||
"mergeQueue",
|
||||
"conflictQueue"
|
||||
],
|
||||
"blocked": []
|
||||
},
|
||||
"sync": {
|
||||
"Linear": {
|
||||
"project": "ECC 2.0 GA",
|
||||
"health": "atRisk",
|
||||
"issueCapacityBlocked": true,
|
||||
"latestStatusUpdateId": "status-update-id"
|
||||
},
|
||||
"GitHub": {
|
||||
"repo": "affaan-m/everything-claude-code",
|
||||
"latestPullRequest": 1820
|
||||
},
|
||||
"handoff": {
|
||||
"path": "~/.cluster-swarm/handoffs/ecc-update.md",
|
||||
"written": true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -137,8 +137,10 @@
|
||||
"skills/django-verification",
|
||||
"skills/dotnet-patterns",
|
||||
"skills/fastapi-patterns",
|
||||
"skills/frontend-design-direction",
|
||||
"skills/frontend-patterns",
|
||||
"skills/frontend-slides",
|
||||
"skills/make-interfaces-feel-better",
|
||||
"skills/motion-ui",
|
||||
"skills/golang-patterns",
|
||||
"skills/golang-testing",
|
||||
@@ -236,6 +238,7 @@
|
||||
"skills/iterative-retrieval",
|
||||
"skills/plankton-code-quality",
|
||||
"skills/production-audit",
|
||||
"skills/skill-scout",
|
||||
"skills/skill-stocktake",
|
||||
"skills/strategic-compact",
|
||||
"skills/tdd-workflow",
|
||||
@@ -369,6 +372,7 @@
|
||||
"skills/automation-audit-ops",
|
||||
"skills/api-connector-builder",
|
||||
"skills/connections-optimizer",
|
||||
"skills/cost-tracking",
|
||||
"skills/customer-billing-ops",
|
||||
"skills/dashboard-builder",
|
||||
"skills/ecc-tools-cost-audit",
|
||||
|
||||
6
package-lock.json
generated
6
package-lock.json
generated
@@ -1044,9 +1044,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/fast-uri": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz",
|
||||
"integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==",
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.2.tgz",
|
||||
"integrity": "sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
|
||||
11
package.json
11
package.json
@@ -124,6 +124,7 @@
|
||||
"skills/continuous-learning/",
|
||||
"skills/continuous-learning-v2/",
|
||||
"skills/cost-aware-llm-pipeline/",
|
||||
"skills/cost-tracking/",
|
||||
"skills/council/",
|
||||
"skills/cpp-coding-standards/",
|
||||
"skills/cpp-testing/",
|
||||
@@ -158,6 +159,7 @@
|
||||
"skills/fastapi-patterns/",
|
||||
"skills/finance-billing-ops/",
|
||||
"skills/foundation-models-on-device/",
|
||||
"skills/frontend-design-direction/",
|
||||
"skills/frontend-patterns/",
|
||||
"skills/frontend-slides/",
|
||||
"skills/fsharp-testing/",
|
||||
@@ -194,6 +196,7 @@
|
||||
"skills/logistics-exception-management/",
|
||||
"skills/manim-video/",
|
||||
"skills/market-research/",
|
||||
"skills/make-interfaces-feel-better/",
|
||||
"skills/mcp-server-patterns/",
|
||||
"skills/messages-ops/",
|
||||
"skills/mle-workflow/",
|
||||
@@ -241,6 +244,7 @@
|
||||
"skills/security-review/",
|
||||
"skills/security-scan/",
|
||||
"skills/seo/",
|
||||
"skills/skill-scout/",
|
||||
"skills/skill-stocktake/",
|
||||
"skills/social-graph-ranker/",
|
||||
"skills/springboot-patterns/",
|
||||
@@ -266,7 +270,12 @@
|
||||
"skills/windows-desktop-e2e/",
|
||||
"skills/workspace-surface-audit/",
|
||||
"skills/x-api/",
|
||||
"the-security-guide.md"
|
||||
"the-security-guide.md",
|
||||
"!**/__pycache__/**",
|
||||
"!**/*.pyc",
|
||||
"!**/*.pyo",
|
||||
"!**/*.pyd",
|
||||
"!**/.pytest_cache/**"
|
||||
],
|
||||
"bin": {
|
||||
"ecc": "scripts/ecc.js",
|
||||
|
||||
@@ -103,6 +103,13 @@ function includesAll(text, needles) {
|
||||
return needles.every(needle => text.includes(needle));
|
||||
}
|
||||
|
||||
function hasObjectKeys(value, keys) {
|
||||
return value
|
||||
&& typeof value === 'object'
|
||||
&& !Array.isArray(value)
|
||||
&& keys.every(key => Object.prototype.hasOwnProperty.call(value, key));
|
||||
}
|
||||
|
||||
function buildChecks(rootDir) {
|
||||
const packageJsonText = readText(rootDir, 'package.json');
|
||||
const packageJson = safeParseJson(packageJsonText) || {};
|
||||
@@ -116,6 +123,8 @@ function buildChecks(rootDir) {
|
||||
const sessionStoreRust = readText(rootDir, 'ecc2/src/session/store.rs');
|
||||
const sessionManagerRust = readText(rootDir, 'ecc2/src/session/manager.rs');
|
||||
const readinessDoc = readText(rootDir, 'docs/architecture/observability-readiness.md');
|
||||
const hudStatusContract = readText(rootDir, 'docs/architecture/hud-status-session-control.md');
|
||||
const hudStatusFixture = safeParseJson(readText(rootDir, 'examples/hud-status-contract.json')) || {};
|
||||
const quickstart = readText(rootDir, 'docs/releases/2.0.0-rc.1/quickstart.md');
|
||||
const releaseNotes = readText(rootDir, 'docs/releases/2.0.0-rc.1/release-notes.md');
|
||||
|
||||
@@ -130,6 +139,50 @@ function buildChecks(rootDir) {
|
||||
&& includesAll(loopStatus, ['--json', '--watch', '--write-dir']),
|
||||
fix: 'Restore loop-status JSON/watch/write-dir support.'
|
||||
},
|
||||
{
|
||||
id: 'hud-status-control-contract',
|
||||
category: 'Live Status',
|
||||
points: 2,
|
||||
path: 'docs/architecture/hud-status-session-control.md',
|
||||
description: 'HUD/status and session-control surfaces have a portable JSON contract',
|
||||
pass: fileExists(rootDir, 'docs/architecture/hud-status-session-control.md')
|
||||
&& fileExists(rootDir, 'examples/hud-status-contract.json')
|
||||
&& includesAll(hudStatusContract, [
|
||||
'context',
|
||||
'toolCalls',
|
||||
'activeAgents',
|
||||
'todos',
|
||||
'checks',
|
||||
'cost',
|
||||
'risk',
|
||||
'queueState',
|
||||
'create',
|
||||
'resume',
|
||||
'status',
|
||||
'stop',
|
||||
'diff',
|
||||
'pr',
|
||||
'mergeQueue',
|
||||
'conflictQueue',
|
||||
'Linear',
|
||||
'GitHub',
|
||||
'handoff'
|
||||
])
|
||||
&& hudStatusFixture.schema_version === 'ecc.hud-status.v1'
|
||||
&& hasObjectKeys(hudStatusFixture, [
|
||||
'context',
|
||||
'toolCalls',
|
||||
'activeAgents',
|
||||
'todos',
|
||||
'checks',
|
||||
'cost',
|
||||
'risk',
|
||||
'queueState',
|
||||
'sessionControls',
|
||||
'sync'
|
||||
]),
|
||||
fix: 'Add the HUD/status session-control contract doc and example JSON fixture.'
|
||||
},
|
||||
{
|
||||
id: 'session-inspect-adapter-registry',
|
||||
category: 'Session Trace',
|
||||
|
||||
147
skills/cost-tracking/SKILL.md
Normal file
147
skills/cost-tracking/SKILL.md
Normal file
@@ -0,0 +1,147 @@
|
||||
---
|
||||
name: cost-tracking
|
||||
description: Track and report Claude Code token usage, spending, and budgets from a local cost-tracking database. Use when the user asks about costs, spending, usage, tokens, budgets, or cost breakdowns by project, tool, session, or date.
|
||||
origin: community
|
||||
---
|
||||
|
||||
# Cost Tracking
|
||||
|
||||
Use this skill to analyze Claude Code cost and usage history from a local SQLite
|
||||
database. It is intended for users who already have a cost-tracking hook or
|
||||
plugin writing usage rows to `~/.claude-cost-tracker/usage.db`.
|
||||
|
||||
Source: salvaged from stale community PR #1304 by `MayurBhavsar`.
|
||||
|
||||
## When to Use
|
||||
|
||||
- The user asks "how much have I spent?", "what did this session cost?", or
|
||||
"what is my token usage?"
|
||||
- The user mentions budgets, spending limits, overruns, or cost controls.
|
||||
- The user wants a cost breakdown by project, tool, session, model, or date.
|
||||
- The user wants to compare today against yesterday or inspect a recent trend.
|
||||
- The user asks for a CSV export of recent usage records.
|
||||
|
||||
## How It Works
|
||||
|
||||
First verify prerequisites:
|
||||
|
||||
```bash
|
||||
command -v sqlite3 >/dev/null && echo "sqlite3 available" || echo "sqlite3 missing"
|
||||
test -f ~/.claude-cost-tracker/usage.db && echo "Database found" || echo "Database not found"
|
||||
```
|
||||
|
||||
If the database is missing, do not fabricate usage data. Tell the user that cost
|
||||
tracking is not configured and suggest installing or enabling a trusted local
|
||||
cost-tracking hook/plugin.
|
||||
|
||||
The expected `usage` table usually contains one row per tool call or model
|
||||
interaction. Column names vary by tracker, but the examples below assume:
|
||||
|
||||
| Column | Meaning |
|
||||
| --- | --- |
|
||||
| `timestamp` | ISO timestamp for the usage event |
|
||||
| `project` | Project or repository name |
|
||||
| `tool_name` | Tool or event name |
|
||||
| `input_tokens` | Input token count, when recorded |
|
||||
| `output_tokens` | Output token count, when recorded |
|
||||
| `cost_usd` | Precomputed cost in USD |
|
||||
| `session_id` | Claude Code session identifier |
|
||||
| `model` | Model used for the event |
|
||||
|
||||
Prefer `cost_usd` over hand-calculating pricing. Model prices and cache pricing
|
||||
change over time, and the tracker should be the source of truth for how each row
|
||||
was priced.
|
||||
|
||||
## Examples
|
||||
|
||||
### Quick Summary
|
||||
|
||||
```bash
|
||||
sqlite3 ~/.claude-cost-tracker/usage.db "
|
||||
SELECT
|
||||
'Today: $' || ROUND(COALESCE(SUM(CASE WHEN date(timestamp) = date('now') THEN cost_usd END), 0), 4) ||
|
||||
' | Total: $' || ROUND(COALESCE(SUM(cost_usd), 0), 4) ||
|
||||
' | Calls: ' || COUNT(*) ||
|
||||
' | Sessions: ' || COUNT(DISTINCT session_id)
|
||||
FROM usage;
|
||||
"
|
||||
```
|
||||
|
||||
### Cost By Project
|
||||
|
||||
```bash
|
||||
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
|
||||
SELECT project, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
|
||||
FROM usage
|
||||
GROUP BY project
|
||||
ORDER BY cost DESC;
|
||||
"
|
||||
```
|
||||
|
||||
### Cost By Tool
|
||||
|
||||
```bash
|
||||
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
|
||||
SELECT tool_name, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
|
||||
FROM usage
|
||||
GROUP BY tool_name
|
||||
ORDER BY cost DESC;
|
||||
"
|
||||
```
|
||||
|
||||
### Last Seven Days
|
||||
|
||||
```bash
|
||||
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
|
||||
SELECT date(timestamp) AS date, ROUND(SUM(cost_usd), 4) AS cost, COUNT(*) AS calls
|
||||
FROM usage
|
||||
GROUP BY date(timestamp)
|
||||
ORDER BY date DESC
|
||||
LIMIT 7;
|
||||
"
|
||||
```
|
||||
|
||||
### Session Drilldown
|
||||
|
||||
```bash
|
||||
sqlite3 -header -column ~/.claude-cost-tracker/usage.db "
|
||||
SELECT session_id,
|
||||
MIN(timestamp) AS started,
|
||||
MAX(timestamp) AS ended,
|
||||
ROUND(SUM(cost_usd), 4) AS cost,
|
||||
COUNT(*) AS calls
|
||||
FROM usage
|
||||
GROUP BY session_id
|
||||
ORDER BY started DESC
|
||||
LIMIT 10;
|
||||
"
|
||||
```
|
||||
|
||||
## Reporting Guidance
|
||||
|
||||
When presenting cost data, include:
|
||||
|
||||
1. Today's spend and yesterday comparison.
|
||||
2. Total spend across the tracked database.
|
||||
3. Top projects ranked by cost.
|
||||
4. Top tools ranked by cost.
|
||||
5. Session count and average cost per session when enough data exists.
|
||||
|
||||
For small amounts, format currency with four decimal places. For larger amounts,
|
||||
two decimals are enough.
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
- Do not estimate costs from raw token counts when `cost_usd` is present.
|
||||
- Do not assume the database exists without checking.
|
||||
- Do not run unbounded `SELECT *` exports on large databases.
|
||||
- Do not hard-code current model pricing in user-facing answers.
|
||||
- Do not recommend installing unreviewed hooks or plugins that execute arbitrary
|
||||
code.
|
||||
|
||||
## Related
|
||||
|
||||
- `/cost-report` - Command-form report using the same database.
|
||||
- `cost-aware-llm-pipeline` - Model-routing and budget-design patterns.
|
||||
- `token-budget-advisor` - Context and token-budget planning.
|
||||
- `strategic-compact` - Context compaction to reduce repeated token spend.
|
||||
457
skills/django-celery/SKILL.md
Normal file
457
skills/django-celery/SKILL.md
Normal file
@@ -0,0 +1,457 @@
|
||||
---
|
||||
name: django-celery
|
||||
description: Django + Celery async task patterns — configuration, task design, beat scheduling, retries, canvas workflows, monitoring, and testing. Use when adding background jobs, scheduled tasks, or async processing to a Django app.
|
||||
origin: ECC
|
||||
---
|
||||
|
||||
# Django + Celery Async Task Patterns
|
||||
|
||||
Production-grade patterns for background task processing in Django using Celery with Redis or RabbitMQ.
|
||||
|
||||
## When to Activate
|
||||
|
||||
- Adding background jobs or async processing to a Django app
|
||||
- Implementing periodic/scheduled tasks
|
||||
- Offloading slow operations (email, PDF generation, API calls) from request cycle
|
||||
- Setting up Celery Beat for cron-like scheduling
|
||||
- Debugging task failures, retries, or queue backlogs
|
||||
- Writing tests for Celery tasks
|
||||
|
||||
## Project Setup
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
pip install celery[redis] django-celery-results django-celery-beat
|
||||
```
|
||||
|
||||
### `celery.py` — App Entrypoint
|
||||
|
||||
```python
|
||||
# config/celery.py
|
||||
import os
|
||||
from celery import Celery
|
||||
|
||||
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.development')
|
||||
|
||||
app = Celery('myproject')
|
||||
app.config_from_object('django.conf:settings', namespace='CELERY')
|
||||
app.autodiscover_tasks() # Discovers tasks.py in each INSTALLED_APP
|
||||
|
||||
@app.task(bind=True, ignore_result=True)
|
||||
def debug_task(self):
|
||||
print(f'Request: {self.request!r}')
|
||||
```
|
||||
|
||||
```python
|
||||
# config/__init__.py
|
||||
from .celery import app as celery_app
|
||||
|
||||
__all__ = ('celery_app',)
|
||||
```
|
||||
|
||||
### Django Settings
|
||||
|
||||
```python
|
||||
# config/settings/base.py
|
||||
|
||||
# Broker (Redis recommended for production)
|
||||
CELERY_BROKER_URL = env('CELERY_BROKER_URL', default='redis://localhost:6379/0')
|
||||
CELERY_RESULT_BACKEND = env('CELERY_RESULT_BACKEND', default='django-db')
|
||||
|
||||
# Serialization
|
||||
CELERY_ACCEPT_CONTENT = ['json']
|
||||
CELERY_TASK_SERIALIZER = 'json'
|
||||
CELERY_RESULT_SERIALIZER = 'json'
|
||||
|
||||
# Task behavior
|
||||
CELERY_TASK_TRACK_STARTED = True
|
||||
CELERY_TASK_TIME_LIMIT = 30 * 60 # Hard limit: 30 min
|
||||
CELERY_TASK_SOFT_TIME_LIMIT = 25 * 60 # Soft limit: sends SoftTimeLimitExceeded
|
||||
CELERY_WORKER_PREFETCH_MULTIPLIER = 1 # Prevent worker hoarding long tasks
|
||||
CELERY_TASK_ACKS_LATE = True # Re-queue on worker crash
|
||||
|
||||
# Result persistence
|
||||
CELERY_RESULT_EXPIRES = 60 * 60 * 24 # Keep results 24 hours
|
||||
|
||||
# Beat scheduler (for periodic tasks)
|
||||
CELERY_BEAT_SCHEDULER = 'django_celery_beat.schedulers:DatabaseScheduler'
|
||||
|
||||
# Installed apps
|
||||
INSTALLED_APPS += [
|
||||
'django_celery_results',
|
||||
'django_celery_beat',
|
||||
]
|
||||
```
|
||||
|
||||
### Running Workers
|
||||
|
||||
```bash
|
||||
# Start worker (development)
|
||||
celery -A config worker --loglevel=info
|
||||
|
||||
# Start beat scheduler (periodic tasks)
|
||||
celery -A config beat --loglevel=info --scheduler django_celery_beat.schedulers:DatabaseScheduler
|
||||
|
||||
# Combined worker + beat (dev only, never production)
|
||||
celery -A config worker --beat --loglevel=info
|
||||
|
||||
# Production: multiple workers with concurrency
|
||||
celery -A config worker --loglevel=warning --concurrency=4 -Q default,high_priority
|
||||
```
|
||||
|
||||
## Task Design Patterns
|
||||
|
||||
### Basic Task
|
||||
|
||||
```python
|
||||
# apps/notifications/tasks.py
|
||||
from celery import shared_task
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@shared_task(name='notifications.send_welcome_email')
|
||||
def send_welcome_email(user_id: int) -> None:
|
||||
"""Send welcome email to newly registered user."""
|
||||
from apps.users.models import User
|
||||
from apps.notifications.services import EmailService
|
||||
|
||||
try:
|
||||
user = User.objects.get(pk=user_id)
|
||||
except User.DoesNotExist:
|
||||
logger.warning('send_welcome_email: user %s not found', user_id)
|
||||
return # Idempotent — do not raise, task already impossible to complete
|
||||
|
||||
EmailService.send_welcome(user)
|
||||
logger.info('Welcome email sent to user %s', user_id)
|
||||
```
|
||||
|
||||
### Retryable Task
|
||||
|
||||
```python
|
||||
@shared_task(
|
||||
bind=True,
|
||||
name='integrations.sync_to_crm',
|
||||
max_retries=5,
|
||||
default_retry_delay=60, # seconds before first retry
|
||||
autoretry_for=(ConnectionError, TimeoutError),
|
||||
retry_backoff=True, # exponential backoff
|
||||
retry_backoff_max=600, # cap at 10 minutes
|
||||
retry_jitter=True, # randomise to avoid thundering herd
|
||||
)
|
||||
def sync_contact_to_crm(self, contact_id: int) -> dict:
|
||||
"""Sync contact to external CRM with retry on transient failures."""
|
||||
from apps.crm.services import CRMClient
|
||||
|
||||
try:
|
||||
result = CRMClient().sync(contact_id)
|
||||
return result
|
||||
except CRMClient.RateLimitError as exc:
|
||||
# Specific retry delay from response header
|
||||
raise self.retry(exc=exc, countdown=int(exc.retry_after))
|
||||
```
|
||||
|
||||
### Idempotent Task Pattern
|
||||
|
||||
Design tasks so they can safely run multiple times with the same inputs:
|
||||
|
||||
```python
|
||||
@shared_task(name='orders.mark_shipped')
|
||||
def mark_order_shipped(order_id: int, tracking_number: str) -> None:
|
||||
"""Mark order as shipped — safe to run multiple times."""
|
||||
from apps.orders.models import Order
|
||||
|
||||
updated = Order.objects.filter(
|
||||
pk=order_id,
|
||||
status=Order.Status.PROCESSING, # Guard: only update if not already shipped
|
||||
).update(
|
||||
status=Order.Status.SHIPPED,
|
||||
tracking_number=tracking_number,
|
||||
)
|
||||
|
||||
if not updated:
|
||||
logger.info('mark_order_shipped: order %s already shipped or not found', order_id)
|
||||
```
|
||||
|
||||
### Task with Soft Time Limit
|
||||
|
||||
```python
|
||||
from celery.exceptions import SoftTimeLimitExceeded
|
||||
|
||||
@shared_task(
|
||||
bind=True,
|
||||
name='reports.generate_pdf',
|
||||
soft_time_limit=120,
|
||||
time_limit=150,
|
||||
)
|
||||
def generate_pdf_report(self, report_id: int) -> str:
|
||||
"""Generate PDF report with graceful timeout handling."""
|
||||
from apps.reports.services import PDFGenerator
|
||||
|
||||
try:
|
||||
path = PDFGenerator.build(report_id)
|
||||
return path
|
||||
except SoftTimeLimitExceeded:
|
||||
# Clean up partial files before hard kill
|
||||
PDFGenerator.cleanup(report_id)
|
||||
raise
|
||||
```
|
||||
|
||||
## Calling Tasks
|
||||
|
||||
```python
|
||||
from datetime import timedelta
|
||||
from django.utils import timezone
|
||||
|
||||
# Fire and forget (async)
|
||||
send_welcome_email.delay(user.pk)
|
||||
|
||||
# Schedule in the future
|
||||
send_reminder.apply_async(args=[user.pk], countdown=3600) # 1 hour from now
|
||||
send_reminder.apply_async(args=[user.pk], eta=timezone.now() + timedelta(days=1))
|
||||
|
||||
# Apply with queue routing
|
||||
sync_contact_to_crm.apply_async(args=[contact.pk], queue='high_priority')
|
||||
|
||||
# Run synchronously (tests / debugging only)
|
||||
result = generate_pdf_report.apply(args=[report.pk])
|
||||
```
|
||||
|
||||
## Beat Scheduling (Periodic Tasks)
|
||||
|
||||
### Code-Defined Schedule
|
||||
|
||||
```python
|
||||
# config/settings/base.py
|
||||
from celery.schedules import crontab
|
||||
|
||||
CELERY_BEAT_SCHEDULE = {
|
||||
'cleanup-expired-sessions': {
|
||||
'task': 'users.cleanup_expired_sessions',
|
||||
'schedule': crontab(hour=2, minute=0), # 2am daily
|
||||
},
|
||||
'sync-inventory': {
|
||||
'task': 'products.sync_inventory',
|
||||
'schedule': 60.0, # every 60 seconds
|
||||
},
|
||||
'weekly-digest': {
|
||||
'task': 'notifications.send_weekly_digest',
|
||||
'schedule': crontab(day_of_week='monday', hour=8, minute=0),
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
### Database-Defined Schedule (via django-celery-beat)
|
||||
|
||||
```python
|
||||
# Manage periodic tasks from Django admin or code
|
||||
from django_celery_beat.models import PeriodicTask, CrontabSchedule
|
||||
import json
|
||||
|
||||
schedule, _ = CrontabSchedule.objects.get_or_create(
|
||||
hour='*/6', minute='0',
|
||||
timezone='UTC',
|
||||
)
|
||||
|
||||
PeriodicTask.objects.update_or_create(
|
||||
name='Sync inventory every 6 hours',
|
||||
defaults={
|
||||
'crontab': schedule,
|
||||
'task': 'products.sync_inventory',
|
||||
'args': json.dumps([]),
|
||||
'enabled': True,
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## Canvas: Chaining and Grouping Tasks
|
||||
|
||||
```python
|
||||
from celery import chain, group, chord
|
||||
|
||||
# Chain: run tasks sequentially, passing results
|
||||
pipeline = chain(
|
||||
fetch_data.s(source_id),
|
||||
transform_data.s(), # receives fetch_data result as first arg
|
||||
load_to_warehouse.s(),
|
||||
)
|
||||
pipeline.delay()
|
||||
|
||||
# Group: run tasks in parallel
|
||||
parallel = group(
|
||||
send_welcome_email.s(user_id)
|
||||
for user_id in new_user_ids
|
||||
)
|
||||
parallel.delay()
|
||||
|
||||
# Chord: parallel tasks + callback when all complete
|
||||
result = chord(
|
||||
group(process_chunk.s(chunk) for chunk in data_chunks),
|
||||
aggregate_results.s(), # called with list of chunk results
|
||||
)
|
||||
result.delay()
|
||||
```
|
||||
|
||||
## Error Handling and Dead Letter Queue
|
||||
|
||||
```python
|
||||
# apps/core/tasks.py
|
||||
from celery.signals import task_failure
|
||||
|
||||
@task_failure.connect
|
||||
def on_task_failure(sender, task_id, exception, args, kwargs, traceback, einfo, **kw):
|
||||
"""Log all task failures to Sentry / alerting."""
|
||||
import sentry_sdk
|
||||
with sentry_sdk.new_scope() as scope:
|
||||
scope.set_context('celery', {
|
||||
'task': sender.name,
|
||||
'task_id': task_id,
|
||||
'args': args,
|
||||
'kwargs': kwargs,
|
||||
})
|
||||
sentry_sdk.capture_exception(exception)
|
||||
```
|
||||
|
||||
```python
|
||||
# Route failed tasks to dead-letter queue after max retries
|
||||
@shared_task(
|
||||
bind=True,
|
||||
max_retries=3,
|
||||
name='payments.charge_card',
|
||||
)
|
||||
def charge_card(self, order_id: int) -> None:
|
||||
from apps.payments.models import Order, FailedCharge
|
||||
|
||||
try:
|
||||
_do_charge(order_id)
|
||||
except Exception as exc:
|
||||
if self.request.retries >= self.max_retries:
|
||||
# Persist to dead-letter table for manual review
|
||||
FailedCharge.objects.create(
|
||||
order_id=order_id,
|
||||
error=str(exc),
|
||||
task_id=self.request.id,
|
||||
)
|
||||
return # Don't raise — task is permanently failed
|
||||
raise self.retry(exc=exc)
|
||||
```
|
||||
|
||||
## Testing Celery Tasks
|
||||
|
||||
### Unit Testing (No Broker)
|
||||
|
||||
```python
|
||||
# tests/test_tasks.py
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from apps.notifications.tasks import send_welcome_email
|
||||
|
||||
class TestSendWelcomeEmail:
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_sends_email_to_existing_user(self, user):
|
||||
with patch('apps.notifications.services.EmailService') as mock_email:
|
||||
send_welcome_email(user.pk)
|
||||
mock_email.send_welcome.assert_called_once_with(user)
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_skips_missing_user_gracefully(self):
|
||||
"""Should not raise when user is deleted between enqueue and execute."""
|
||||
send_welcome_email(99999) # Non-existent user — must not raise
|
||||
```
|
||||
|
||||
### Integration Testing with CELERY_TASK_ALWAYS_EAGER
|
||||
|
||||
```python
|
||||
# config/settings/test.py
|
||||
CELERY_TASK_ALWAYS_EAGER = True # Run tasks synchronously in tests
|
||||
CELERY_TASK_EAGER_PROPAGATES = True # Re-raise exceptions from tasks
|
||||
|
||||
# tests/test_integration.py
|
||||
@pytest.mark.django_db
|
||||
def test_registration_triggers_welcome_email(client):
|
||||
with patch('apps.notifications.services.EmailService') as mock_email:
|
||||
response = client.post('/api/users/', {
|
||||
'email': 'new@example.com',
|
||||
'password': 'strongpass123',
|
||||
})
|
||||
|
||||
assert response.status_code == 201
|
||||
mock_email.send_welcome.assert_called_once()
|
||||
```
|
||||
|
||||
### Testing Retries
|
||||
|
||||
```python
|
||||
@pytest.mark.django_db
|
||||
def test_task_retries_on_connection_error():
|
||||
with patch('apps.crm.services.CRMClient.sync') as mock_sync:
|
||||
mock_sync.side_effect = ConnectionError('timeout')
|
||||
|
||||
with pytest.raises(ConnectionError):
|
||||
sync_contact_to_crm.apply(args=[1], throw=True)
|
||||
|
||||
assert mock_sync.call_count == 1 # First attempt only when eager
|
||||
```
|
||||
|
||||
## Monitoring
|
||||
|
||||
```bash
|
||||
# Inspect active workers and queues
|
||||
celery -A config inspect active
|
||||
celery -A config inspect stats
|
||||
celery -A config inspect reserved
|
||||
|
||||
# Check queue lengths (Redis)
|
||||
redis-cli llen celery
|
||||
|
||||
# Flower: web-based real-time monitor
|
||||
pip install flower
|
||||
celery -A config flower --port=5555
|
||||
```
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
```python
|
||||
# BAD: Passing model instances — they may be stale by execution time
|
||||
send_welcome_email.delay(user) # Never pass ORM objects
|
||||
send_welcome_email.delay(user.pk) # Always pass PKs
|
||||
|
||||
# BAD: Calling tasks synchronously in production views
|
||||
result = generate_report.apply() # Blocks the request thread
|
||||
|
||||
# BAD: Non-idempotent task without guards
|
||||
@shared_task
|
||||
def charge_and_fulfill(order_id):
|
||||
order.charge() # May charge twice if task retries!
|
||||
order.fulfill()
|
||||
|
||||
# GOOD: Idempotent with status guard
|
||||
@shared_task
|
||||
def charge_and_fulfill(order_id):
|
||||
order = Order.objects.select_for_update().get(pk=order_id)
|
||||
if order.status != Order.Status.PENDING:
|
||||
return # Already processed
|
||||
order.charge()
|
||||
order.fulfill()
|
||||
```
|
||||
|
||||
## Production Checklist
|
||||
|
||||
| Check | Setting |
|
||||
|-------|---------|
|
||||
| Worker restarts on crash | `supervisord` or `systemd` unit |
|
||||
| `CELERY_TASK_ACKS_LATE = True` | Re-queue tasks on worker crash |
|
||||
| `CELERY_WORKER_PREFETCH_MULTIPLIER = 1` | Fair distribution of long tasks |
|
||||
| Separate queues per priority | `-Q default,high_priority,low_priority` |
|
||||
| `CELERY_TASK_SOFT_TIME_LIMIT` set | Graceful timeout before hard kill |
|
||||
| Sentry integration | Capture all `task_failure` signals |
|
||||
| Flower or other monitor | Visibility into queue depths |
|
||||
| Beat runs on single node only | Prevents duplicate scheduled task execution |
|
||||
|
||||
## Related Skills
|
||||
|
||||
- `django-patterns` — ORM, service layer, and project structure
|
||||
- `django-tdd` — Testing Django models, views, and services
|
||||
- `python-testing` — pytest configuration and fixtures
|
||||
92
skills/frontend-design-direction/SKILL.md
Normal file
92
skills/frontend-design-direction/SKILL.md
Normal file
@@ -0,0 +1,92 @@
|
||||
---
|
||||
name: frontend-design-direction
|
||||
description: Set an ECC-specific frontend design direction for production UI work. Use when building or improving websites, dashboards, applications, components, landing pages, visual tools, or any web UI that needs stronger product-specific design judgment.
|
||||
origin: community
|
||||
---
|
||||
|
||||
# Frontend Design Direction
|
||||
|
||||
Use this skill when the work is not just making UI function, but making it feel
|
||||
purposeful, polished, and appropriate to the product domain.
|
||||
|
||||
Source: salvaged from stale community PR #1659 by `linus707`.
|
||||
|
||||
Note: ECC intentionally does not rebundle the canonical Anthropic
|
||||
`frontend-design` skill. Install that from `anthropics/skills` when you want the
|
||||
official upstream skill. This skill is the ECC-specific design-direction salvage
|
||||
of the useful local guidance from #1659.
|
||||
|
||||
## When to Use
|
||||
|
||||
- The user asks to build a web page, app, dashboard, artifact, component, or UI.
|
||||
- The user asks to make an interface more polished, distinctive, beautiful, or
|
||||
less generic.
|
||||
- The implementation needs visual hierarchy, typography, color, motion, layout,
|
||||
and interaction choices.
|
||||
- The current UI works but reads as flat, generic, templated, or mismatched to
|
||||
the audience.
|
||||
|
||||
## Design Direction
|
||||
|
||||
Before coding, choose a specific direction:
|
||||
|
||||
1. Purpose: what job does the interface do?
|
||||
2. Audience: who repeats this workflow, and what do they need to scan first?
|
||||
3. Tone: utilitarian, editorial, playful, industrial, refined, technical,
|
||||
maximal, minimal, dense, calm, or another explicit direction.
|
||||
4. Memorable detail: one design idea that makes the result feel intentional.
|
||||
5. Constraints: framework, accessibility, performance, responsiveness, and
|
||||
existing design system.
|
||||
|
||||
Match the direction to the domain. A SaaS operations tool should usually be
|
||||
dense, quiet, and scannable. A portfolio, launch page, game, or editorial piece
|
||||
can be more expressive. Do not force a landing-page composition onto a tool that
|
||||
needs repeated daily use.
|
||||
|
||||
## Implementation Guidance
|
||||
|
||||
- Build the actual usable experience as the first screen unless the user
|
||||
explicitly asks for marketing copy.
|
||||
- Use existing project components, tokens, icon libraries, and routing patterns
|
||||
before introducing a new visual system.
|
||||
- Use real or generated visual assets when the interface depends on images,
|
||||
products, places, people, gameplay, charts, or inspectable media.
|
||||
- Prefer contextual typography and spacing over generic oversized hero text.
|
||||
- Keep palettes multi-dimensional: avoid a UI dominated by one hue family.
|
||||
- Use CSS variables or existing design tokens so the direction remains
|
||||
coherent across states.
|
||||
- Design responsive constraints explicitly: grids, aspect ratios, min/max
|
||||
sizes, stable toolbars, and fixed-format controls should not shift when labels
|
||||
or hover states appear.
|
||||
- Use motion sparingly but deliberately. Prefer high-signal transitions that
|
||||
clarify state over decorative animation.
|
||||
- Verify text fit on mobile and desktop. Long labels must wrap or resize
|
||||
cleanly rather than overflowing.
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
- Do not default to common generated patterns: purple gradients, decorative
|
||||
blobs, oversized cards, vague hero copy, or stock-like atmospheric media.
|
||||
- Do not add UI cards inside other cards.
|
||||
- Do not use a single decorative style everywhere when the domain calls for
|
||||
restraint.
|
||||
- Do not hide the primary product, tool, object, or workflow behind generic
|
||||
marketing sections.
|
||||
- Do not add a new dependency for a design flourish unless it clearly pays for
|
||||
itself.
|
||||
- Do not describe the UI's features inside the UI when the controls can speak
|
||||
for themselves.
|
||||
|
||||
## Review Checklist
|
||||
|
||||
- The first viewport immediately communicates the product, workflow, or object.
|
||||
- The visual hierarchy supports scanning and repeated use.
|
||||
- Typography fits the container and does not overlap adjacent content.
|
||||
- Color choices have contrast and do not collapse into a one-note palette.
|
||||
- Icons are used for familiar tool actions where available.
|
||||
- Responsive layout has stable dimensions for boards, grids, toolbars,
|
||||
controls, tiles, and counters.
|
||||
- Assets render and carry the subject matter instead of acting as filler.
|
||||
- Motion improves orientation and does not mask sluggishness.
|
||||
- The result matches the repo's existing frontend conventions unless there is a
|
||||
clear reason to depart.
|
||||
151
skills/make-interfaces-feel-better/SKILL.md
Normal file
151
skills/make-interfaces-feel-better/SKILL.md
Normal file
@@ -0,0 +1,151 @@
|
||||
---
|
||||
name: make-interfaces-feel-better
|
||||
description: Apply concrete design-engineering details that make interfaces feel polished. Use when reviewing or improving UI spacing, typography, borders, shadows, motion, hit areas, icons, text wrapping, and interaction states.
|
||||
origin: community
|
||||
---
|
||||
|
||||
# Make Interfaces Feel Better
|
||||
|
||||
Use this skill for the small design-engineering details that compound into a
|
||||
more polished interface.
|
||||
|
||||
Source: salvaged from stale community PR #1659 by `linus707`.
|
||||
|
||||
## When to Use
|
||||
|
||||
- The user says the UI feels off, flat, generic, cramped, jumpy, or unfinished.
|
||||
- You are building controls, cards, lists, dashboards, navigation, forms, or
|
||||
toolbars.
|
||||
- A component needs hover, active, focus, enter, exit, loading, or empty states.
|
||||
- A frontend review needs specific before/after recommendations.
|
||||
|
||||
## Core Principles
|
||||
|
||||
### Concentric Radius
|
||||
|
||||
For nearby nested rounded surfaces:
|
||||
|
||||
```text
|
||||
outer radius = inner radius + padding
|
||||
```
|
||||
|
||||
If padding is large, treat layers as separate surfaces instead of forcing the
|
||||
math. The point is optical coherence, not formula worship.
|
||||
|
||||
### Optical Alignment
|
||||
|
||||
Geometric centering is not always visual centering. Icon buttons, play
|
||||
triangles, arrows, stars, and asymmetric icons often need a small offset. Fix the
|
||||
SVG when possible; otherwise adjust with a pixel-level margin or padding change.
|
||||
|
||||
### Shadows And Borders
|
||||
|
||||
Use borders for separation and focus rings. Use layered shadows when a card,
|
||||
button, dropdown, or popover needs depth. Shadows should be transparent and
|
||||
subtle enough to work across backgrounds.
|
||||
|
||||
### Text Wrapping
|
||||
|
||||
- Use `text-wrap: balance` on headings and short titles.
|
||||
- Use `text-wrap: pretty` on short-to-medium body text, captions, descriptions,
|
||||
and list items.
|
||||
- Avoid both on long prose, code, and preformatted content.
|
||||
- Use `font-variant-numeric: tabular-nums` for counters, timers, prices, tables,
|
||||
and other updating numbers.
|
||||
|
||||
### Font Smoothing
|
||||
|
||||
On macOS, apply antialiased font smoothing at the root layout when the project
|
||||
does not already do so:
|
||||
|
||||
```css
|
||||
html {
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
}
|
||||
```
|
||||
|
||||
### Image Outlines
|
||||
|
||||
Images often need a subtle inset outline so their edges do not blur into the
|
||||
surface.
|
||||
|
||||
```css
|
||||
img {
|
||||
outline: 1px solid rgba(0, 0, 0, 0.1);
|
||||
outline-offset: -1px;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
img {
|
||||
outline-color: rgba(255, 255, 255, 0.1);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Use neutral black or white alpha outlines. Do not tint image outlines with the
|
||||
brand palette.
|
||||
|
||||
### Motion
|
||||
|
||||
Use CSS transitions for interactive state changes because they can retarget
|
||||
when the user changes intent mid-motion. Reserve keyframes for staged
|
||||
one-shot entrances or loading sequences.
|
||||
|
||||
Good motion defaults:
|
||||
|
||||
- Enter: combine opacity, small `translateY`, and optionally blur.
|
||||
- Exit: shorter and quieter than enter, usually 150ms.
|
||||
- Press: `scale(0.96)` for tactile buttons, with a way to disable it when the
|
||||
movement distracts.
|
||||
- Icon swaps: cross-fade with opacity, scale, and blur instead of instant
|
||||
visibility toggles.
|
||||
|
||||
### Transition Scope
|
||||
|
||||
Never use `transition: all`. Specify the changed properties:
|
||||
|
||||
```css
|
||||
.button {
|
||||
transition-property: transform, background-color, box-shadow;
|
||||
transition-duration: 150ms;
|
||||
transition-timing-function: ease-out;
|
||||
}
|
||||
```
|
||||
|
||||
Use `will-change` only for first-frame stutter on compositor-friendly
|
||||
properties such as `transform`, `opacity`, and `filter`. Never use
|
||||
`will-change: all`.
|
||||
|
||||
### Hit Areas
|
||||
|
||||
Interactive controls should have at least a 40x40px hit area, ideally 44x44px
|
||||
where the layout allows it. Expand with a pseudo-element when the visible icon
|
||||
is smaller, but do not let expanded hit areas overlap.
|
||||
|
||||
## Review Output
|
||||
|
||||
When reviewing a UI polish pass, report concrete changes in before/after rows:
|
||||
|
||||
| Principle | Before | After |
|
||||
| --- | --- | --- |
|
||||
| Concentric radius | Same radius on parent and child | Parent radius accounts for padding |
|
||||
| Tabular numbers | Counter shifts as digits change | Counter uses `tabular-nums` |
|
||||
| Transition scope | `transition: all` | Explicit transition properties |
|
||||
|
||||
Include file paths and properties when they are not obvious from the snippets.
|
||||
Omit principles that you checked but did not change.
|
||||
|
||||
## Checklist
|
||||
|
||||
- Nested rounded elements are optically coherent.
|
||||
- Icons are visually centered.
|
||||
- Buttons, cards, and popovers use borders or shadows for the right reason.
|
||||
- Headings and short text avoid awkward wrapping.
|
||||
- Dynamic numbers use tabular numerals.
|
||||
- Images have neutral outlines where needed.
|
||||
- Enter and exit animations are split, subtle, and interruptible where
|
||||
appropriate.
|
||||
- Buttons have tactile active states without exaggerated motion.
|
||||
- `transition: all` and `will-change: all` are absent.
|
||||
- Small controls still have usable hit areas.
|
||||
140
skills/skill-scout/SKILL.md
Normal file
140
skills/skill-scout/SKILL.md
Normal file
@@ -0,0 +1,140 @@
|
||||
---
|
||||
name: skill-scout
|
||||
description: Search existing local, marketplace, GitHub, and web skill sources before creating a new skill. Use when the user wants to create, build, fork, or find a skill for a workflow.
|
||||
origin: community
|
||||
---
|
||||
|
||||
# Skill Scout
|
||||
|
||||
Use this skill before creating a new skill. The goal is to avoid duplicating
|
||||
existing community or marketplace work, while still vetting anything external
|
||||
before adoption.
|
||||
|
||||
Source: salvaged from stale community PR #1232 by `redminwang`.
|
||||
|
||||
## When to Use
|
||||
|
||||
- The user says "create a skill", "build a skill", "make a skill", or "new
|
||||
skill".
|
||||
- The user asks "is there a skill for X?" or "does a skill exist that does Y?"
|
||||
- The user describes a workflow and you are about to suggest creating a new
|
||||
skill.
|
||||
- The user wants to fork or extend an existing skill.
|
||||
|
||||
If the user explicitly says to skip search or create from scratch, acknowledge
|
||||
that and proceed with the requested creation workflow.
|
||||
|
||||
## How It Works
|
||||
|
||||
### Step 1 - Capture Intent
|
||||
|
||||
Extract:
|
||||
|
||||
- The task the skill should perform.
|
||||
- The trigger conditions for using it.
|
||||
- The domain, tools, frameworks, or data sources involved.
|
||||
- Three to five search keywords plus useful synonyms.
|
||||
|
||||
### Step 2 - Search Local Sources
|
||||
|
||||
Search installed and marketplace skill names first. Local sources are preferred
|
||||
because they are already part of the user's environment.
|
||||
|
||||
```bash
|
||||
find ~/.claude/skills -maxdepth 2 -name SKILL.md 2>/dev/null | grep -iE "keyword|synonym"
|
||||
find ~/.claude/plugins/marketplaces -path '*/skills/*/SKILL.md' 2>/dev/null | grep -iE "keyword|synonym"
|
||||
```
|
||||
|
||||
Then search frontmatter descriptions:
|
||||
|
||||
```bash
|
||||
grep -RilE "keyword|synonym" ~/.claude/skills ~/.claude/plugins/marketplaces 2>/dev/null
|
||||
```
|
||||
|
||||
### Step 3 - Search Remote Sources
|
||||
|
||||
Use available GitHub and web search tools. Prefer concise queries:
|
||||
|
||||
```bash
|
||||
gh search repos "claude code skill keyword" --limit 10 --sort stars
|
||||
gh search code "name: keyword" --filename SKILL.md --limit 10
|
||||
```
|
||||
|
||||
For web search, use at most three targeted queries such as:
|
||||
|
||||
```text
|
||||
"claude code skill" keyword
|
||||
"SKILL.md" keyword
|
||||
"everything-claude-code" keyword
|
||||
```
|
||||
|
||||
### Step 4 - Vet External Matches
|
||||
|
||||
Before recommending any external skill for adoption or forking:
|
||||
|
||||
- Read the `SKILL.md` frontmatter and instructions.
|
||||
- Look for unexpected shell commands, file writes, network calls, credential
|
||||
handling, or package installs.
|
||||
- Check whether the repository appears maintained.
|
||||
- Prefer copying into a fresh local branch and reviewing the diff over editing
|
||||
marketplace originals.
|
||||
|
||||
### Step 5 - Rank Results
|
||||
|
||||
Rank candidates by:
|
||||
|
||||
1. Exact keyword match in the skill name.
|
||||
2. Keyword or synonym match in description.
|
||||
3. Local installed or marketplace source.
|
||||
4. Maintained GitHub source with recent activity.
|
||||
5. Web-only mention.
|
||||
|
||||
Cap the final list at 10 results.
|
||||
|
||||
### Step 6 - Present Decision Options
|
||||
|
||||
Give the user a short table:
|
||||
|
||||
| Option | Meaning |
|
||||
| --- | --- |
|
||||
| Use existing | Invoke or install a matching skill as-is. |
|
||||
| Fork or extend | Copy the closest skill and modify it. |
|
||||
| Create fresh | Build a new skill after confirming no close match exists. |
|
||||
|
||||
Only create a new skill after the user chooses that path or after the search
|
||||
finds no close match.
|
||||
|
||||
## Examples
|
||||
|
||||
### Result Table
|
||||
|
||||
```markdown
|
||||
| # | Skill | Source | Why it matches | Gap |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| 1 | article-writing | Local ECC | Drafts articles and guides | Not focused on release notes |
|
||||
| 2 | content-engine | Local ECC | Multi-format content workflow | Heavier than needed |
|
||||
| 3 | blog-writer | GitHub | Blog writing skill with recent commits | Needs security review |
|
||||
```
|
||||
|
||||
### User-Facing Summary
|
||||
|
||||
```markdown
|
||||
I found two close local matches and one external candidate. The closest fit is
|
||||
`article-writing`; it covers drafting and revision, but it does not include the
|
||||
release-note checklist you asked for. I can either use it as-is, fork it into a
|
||||
release-note variant, or create a fresh skill.
|
||||
```
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
- Do not jump directly to new skill creation when a search is reasonable.
|
||||
- Do not install external skills without reading them first.
|
||||
- Do not present a long unranked list of weak matches.
|
||||
- Do not treat web-only mentions as trusted sources.
|
||||
- Do not edit installed marketplace originals in place.
|
||||
|
||||
## Related
|
||||
|
||||
- `search-first` - General search-before-building workflow.
|
||||
- `skill-stocktake` - Audit installed skills for health, duplicates, and gaps.
|
||||
- `agent-sort` - Categorize and organize existing agents and skills.
|
||||
82
tests/ci/code-reviewer-false-positive-guard.test.js
Normal file
82
tests/ci/code-reviewer-false-positive-guard.test.js
Normal file
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env node
|
||||
'use strict';
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const repoRoot = path.resolve(__dirname, '..', '..');
|
||||
const reviewerPath = path.join(repoRoot, 'agents', 'code-reviewer.md');
|
||||
|
||||
const requiredHeadings = [
|
||||
'## Confidence-Based Filtering',
|
||||
'### Pre-Report Gate',
|
||||
'### HIGH / CRITICAL Require Proof',
|
||||
'### It Is Acceptable And Expected To Return Zero Findings',
|
||||
'## Common False Positives - Skip These',
|
||||
];
|
||||
|
||||
const requiredPatterns = [
|
||||
/Can I cite the exact line/i,
|
||||
/concrete failure mode/i,
|
||||
/Have I read the surrounding context/i,
|
||||
/Severity inflation/i,
|
||||
/exact snippet and line number/i,
|
||||
/specific failure scenario/i,
|
||||
/demote to MEDIUM or drop/i,
|
||||
/clean review is a valid review/i,
|
||||
/Manufactured findings/i,
|
||||
/Common False Positives/i,
|
||||
/Consider adding error handling/i,
|
||||
/Missing input validation/i,
|
||||
/Magic number/i,
|
||||
/Would a senior engineer on this\s+team actually change this in review/i,
|
||||
/Do not withhold approval to appear rigorous/i,
|
||||
];
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
function test(name, fn) {
|
||||
try {
|
||||
fn();
|
||||
console.log(` PASS ${name}`);
|
||||
passed++;
|
||||
} catch (error) {
|
||||
console.log(` FAIL ${name}`);
|
||||
console.log(` Error: ${error.message}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
function readReviewer() {
|
||||
return fs.readFileSync(reviewerPath, 'utf8');
|
||||
}
|
||||
|
||||
console.log('\n=== Testing code-reviewer false-positive guardrails ===\n');
|
||||
|
||||
for (const heading of requiredHeadings) {
|
||||
test(`code-reviewer.md contains heading: ${heading}`, () => {
|
||||
const source = readReviewer();
|
||||
assert.ok(source.includes(heading), `code-reviewer.md missing required heading "${heading}"`);
|
||||
});
|
||||
}
|
||||
|
||||
for (const pattern of requiredPatterns) {
|
||||
test(`code-reviewer.md matches ${pattern}`, () => {
|
||||
const source = readReviewer();
|
||||
assert.ok(pattern.test(source), `code-reviewer.md missing required pattern ${pattern}`);
|
||||
});
|
||||
}
|
||||
|
||||
test('code-reviewer.md retains the >80% confidence threshold', () => {
|
||||
const source = readReviewer();
|
||||
assert.ok(/>\s*80%\s*confident/i.test(source), 'code-reviewer.md missing >80% confidence threshold');
|
||||
});
|
||||
|
||||
if (failed > 0) {
|
||||
console.log(`\nFailed: ${failed}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nPassed: ${passed}`);
|
||||
417
tests/docs/evaluator-rag-prototype.test.js
Normal file
417
tests/docs/evaluator-rag-prototype.test.js
Normal file
@@ -0,0 +1,417 @@
|
||||
'use strict';
|
||||
|
||||
const assert = require('assert');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const repoRoot = path.resolve(__dirname, '..', '..');
|
||||
const fixtureRoot = path.join(repoRoot, 'examples', 'evaluator-rag-prototype');
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
|
||||
function test(name, fn) {
|
||||
try {
|
||||
fn();
|
||||
console.log(` ✓ ${name}`);
|
||||
passed++;
|
||||
} catch (error) {
|
||||
console.log(` ✗ ${name}`);
|
||||
console.log(` Error: ${error.message}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
function read(relativePath) {
|
||||
return fs.readFileSync(path.join(repoRoot, relativePath), 'utf8');
|
||||
}
|
||||
|
||||
function readJson(fileName) {
|
||||
return JSON.parse(fs.readFileSync(path.join(fixtureRoot, fileName), 'utf8'));
|
||||
}
|
||||
|
||||
function readFixtureJson(relativePath) {
|
||||
return JSON.parse(fs.readFileSync(path.join(fixtureRoot, relativePath), 'utf8'));
|
||||
}
|
||||
|
||||
console.log('\n=== Testing evaluator RAG prototype ===\n');
|
||||
|
||||
test('architecture doc records the artifact contract and reference pressure', () => {
|
||||
const source = read('docs/architecture/evaluator-rag-prototype.md');
|
||||
|
||||
for (const required of [
|
||||
'Scenario spec',
|
||||
'Trace',
|
||||
'Report',
|
||||
'Candidate playbook',
|
||||
'Verifier result',
|
||||
'Meta-Harness',
|
||||
'Autocontext',
|
||||
'Claude HUD',
|
||||
'Hermes Agent',
|
||||
'dmux, Orca, Superset, and Ghast',
|
||||
'ECC Tools'
|
||||
]) {
|
||||
assert.ok(source.includes(required), `Missing doc requirement: ${required}`);
|
||||
}
|
||||
});
|
||||
|
||||
test('fixtures use one scenario id and declare read-only behavior', () => {
|
||||
const scenario = readJson('scenario.json');
|
||||
const trace = readJson('trace.json');
|
||||
const report = readJson('report.json');
|
||||
const verifier = readJson('verifier-result.json');
|
||||
|
||||
assert.strictEqual(scenario.schema_version, 'ecc.evaluator-rag.scenario.v1');
|
||||
assert.strictEqual(trace.schema_version, 'ecc.evaluator-rag.trace.v1');
|
||||
assert.strictEqual(report.schema_version, 'ecc.evaluator-rag.report.v1');
|
||||
assert.strictEqual(verifier.schema_version, 'ecc.evaluator-rag.verifier.v1');
|
||||
|
||||
for (const artifact of [trace, report, verifier]) {
|
||||
assert.strictEqual(artifact.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(artifact.read_only, true);
|
||||
}
|
||||
});
|
||||
|
||||
test('trace covers the full self-improving harness loop', () => {
|
||||
const trace = readJson('trace.json');
|
||||
const phases = trace.events.map(event => event.phase);
|
||||
|
||||
for (const phase of ['observation', 'retrieval', 'proposal', 'verification', 'promotion']) {
|
||||
assert.ok(phases.includes(phase), `Missing trace phase ${phase}`);
|
||||
}
|
||||
|
||||
assert.ok(trace.events.some(event => event.promoted_candidate_id === 'maintainer-salvage-branch'));
|
||||
});
|
||||
|
||||
test('scenario blocks unsafe write actions and release actions', () => {
|
||||
const scenario = readJson('scenario.json');
|
||||
const forbidden = scenario.forbidden_actions.join('\n');
|
||||
|
||||
for (const blocked of [
|
||||
'closing, reopening, or commenting on PRs',
|
||||
'merging PRs',
|
||||
'creating release tags',
|
||||
'publishing packages or plugins',
|
||||
'copying private paths, secrets, or raw personal context',
|
||||
'blindly cherry-picking bulk localization'
|
||||
]) {
|
||||
assert.ok(forbidden.includes(blocked), `Missing forbidden action: ${blocked}`);
|
||||
}
|
||||
});
|
||||
|
||||
test('verifier accepts maintainer salvage and rejects blind translation imports', () => {
|
||||
const verifier = readJson('verifier-result.json');
|
||||
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'maintainer-salvage-branch');
|
||||
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'blind-cherry-pick-translations');
|
||||
|
||||
assert.ok(accepted, 'Missing accepted maintainer salvage candidate');
|
||||
assert.ok(rejected, 'Missing rejected blind cherry-pick candidate');
|
||||
assert.strictEqual(accepted.decision, 'accepted');
|
||||
assert.strictEqual(rejected.decision, 'rejected');
|
||||
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
|
||||
assert.ok(accepted.score > rejected.score);
|
||||
assert.ok(rejected.reasons.join('\n').includes('translator/manual review'));
|
||||
});
|
||||
|
||||
test('candidate playbook preserves stale-salvage operating rules', () => {
|
||||
const playbook = read('examples/evaluator-rag-prototype/candidate-playbook.md');
|
||||
|
||||
for (const required of [
|
||||
'docs/stale-pr-salvage-ledger.md',
|
||||
'source PR',
|
||||
'maintainer-owned branch',
|
||||
'Preserve attribution',
|
||||
'translator/manual review',
|
||||
'private operator context',
|
||||
'git diff --check'
|
||||
]) {
|
||||
assert.ok(playbook.includes(required), `Missing playbook rule: ${required}`);
|
||||
}
|
||||
});
|
||||
|
||||
test('roadmap points to the evaluator RAG prototype and keeps hosted integration open', () => {
|
||||
const roadmap = read('docs/ECC-2.0-GA-ROADMAP.md');
|
||||
|
||||
assert.ok(roadmap.includes('docs/architecture/evaluator-rag-prototype.md'));
|
||||
assert.ok(roadmap.includes('examples/evaluator-rag-prototype/'));
|
||||
assert.ok(roadmap.includes('Local corpus complete; hosted integration remains future'));
|
||||
});
|
||||
|
||||
test('billing readiness scenario rejects launch copy overclaims', () => {
|
||||
const scenario = readFixtureJson('billing-marketplace-readiness/scenario.json');
|
||||
const trace = readFixtureJson('billing-marketplace-readiness/trace.json');
|
||||
const report = readFixtureJson('billing-marketplace-readiness/report.json');
|
||||
const verifier = readFixtureJson('billing-marketplace-readiness/verifier-result.json');
|
||||
const playbook = read('examples/evaluator-rag-prototype/billing-marketplace-readiness/candidate-playbook.md');
|
||||
|
||||
assert.strictEqual(scenario.scenario_id, 'billing-marketplace-readiness');
|
||||
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(report.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(trace.read_only, true);
|
||||
assert.strictEqual(report.read_only, true);
|
||||
assert.strictEqual(verifier.read_only, true);
|
||||
|
||||
for (const blocked of [
|
||||
'creating or editing GitHub Marketplace listings',
|
||||
'changing plan limits, subscriptions, seats, or entitlements',
|
||||
'posting announcement copy',
|
||||
'claiming live billing readiness from dry-run evidence alone'
|
||||
]) {
|
||||
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing billing forbidden action: ${blocked}`);
|
||||
}
|
||||
|
||||
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'evidence-backed-billing-check');
|
||||
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'announcement-first-billing-copy');
|
||||
|
||||
assert.ok(accepted, 'Missing accepted billing evidence candidate');
|
||||
assert.ok(rejected, 'Missing rejected announcement-overclaim candidate');
|
||||
assert.strictEqual(accepted.decision, 'accepted');
|
||||
assert.strictEqual(rejected.decision, 'rejected');
|
||||
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
|
||||
assert.ok(rejected.reasons.join('\n').includes('roadmap acceptance criteria'));
|
||||
assert.ok(playbook.includes('remove-before-publication'));
|
||||
assert.ok(playbook.includes('https://github.com/marketplace/ecc-tools'));
|
||||
});
|
||||
|
||||
test('ci failure diagnosis scenario rejects rerun-only fixes', () => {
|
||||
const scenario = readFixtureJson('ci-failure-diagnosis/scenario.json');
|
||||
const trace = readFixtureJson('ci-failure-diagnosis/trace.json');
|
||||
const report = readFixtureJson('ci-failure-diagnosis/report.json');
|
||||
const verifier = readFixtureJson('ci-failure-diagnosis/verifier-result.json');
|
||||
const playbook = read('examples/evaluator-rag-prototype/ci-failure-diagnosis/candidate-playbook.md');
|
||||
|
||||
assert.strictEqual(scenario.scenario_id, 'ci-failure-diagnosis');
|
||||
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(report.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(trace.read_only, true);
|
||||
assert.strictEqual(report.read_only, true);
|
||||
assert.strictEqual(verifier.read_only, true);
|
||||
|
||||
for (const blocked of [
|
||||
'rerunning CI until it passes without diagnosing the failure',
|
||||
'pushing speculative fixes without a captured failing log excerpt',
|
||||
'weakening or deleting tests to silence a failure',
|
||||
'merging or publishing while required checks are red'
|
||||
]) {
|
||||
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing CI forbidden action: ${blocked}`);
|
||||
}
|
||||
|
||||
for (const required of [
|
||||
'failing job and step are named',
|
||||
'captured log excerpt is linked or summarized',
|
||||
'changed-file context is compared to the failing step',
|
||||
'local reproduction or regression command is named'
|
||||
]) {
|
||||
assert.ok(scenario.acceptance_gates.includes(required), `Missing CI acceptance gate: ${required}`);
|
||||
}
|
||||
|
||||
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'log-backed-minimal-fix');
|
||||
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'rerun-only-green-wait');
|
||||
|
||||
assert.ok(accepted, 'Missing accepted log-backed CI candidate');
|
||||
assert.ok(rejected, 'Missing rejected rerun-only CI candidate');
|
||||
assert.strictEqual(accepted.decision, 'accepted');
|
||||
assert.strictEqual(rejected.decision, 'rejected');
|
||||
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
|
||||
assert.ok(rejected.reasons.join('\n').includes('failing log excerpt'));
|
||||
assert.ok(playbook.includes('gh run view <run-id> --log-failed'));
|
||||
assert.ok(playbook.includes('Full required GitHub Actions matrix before merge'));
|
||||
});
|
||||
|
||||
test('harness config quality scenario rejects unsupported parity claims', () => {
|
||||
const scenario = readFixtureJson('harness-config-quality/scenario.json');
|
||||
const trace = readFixtureJson('harness-config-quality/trace.json');
|
||||
const report = readFixtureJson('harness-config-quality/report.json');
|
||||
const verifier = readFixtureJson('harness-config-quality/verifier-result.json');
|
||||
const playbook = read('examples/evaluator-rag-prototype/harness-config-quality/candidate-playbook.md');
|
||||
|
||||
assert.strictEqual(scenario.scenario_id, 'harness-config-quality');
|
||||
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(report.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(trace.read_only, true);
|
||||
assert.strictEqual(report.read_only, true);
|
||||
assert.strictEqual(verifier.read_only, true);
|
||||
|
||||
for (const blocked of [
|
||||
'claiming native support for instruction-backed or reference-only harnesses',
|
||||
'copying Claude hook semantics into Codex, Gemini, Zed, or OpenCode without adapter evidence',
|
||||
'silently overwriting existing user MCP, hook, plugin, command, or rule config',
|
||||
'publishing packages or plugins from this evaluator run'
|
||||
]) {
|
||||
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing harness forbidden action: ${blocked}`);
|
||||
}
|
||||
|
||||
for (const required of [
|
||||
'adapter state is retrieved from the matrix',
|
||||
'install or onramp path is named',
|
||||
'verification command is named',
|
||||
'config-preservation behavior is explicit'
|
||||
]) {
|
||||
assert.ok(scenario.acceptance_gates.includes(required), `Missing harness acceptance gate: ${required}`);
|
||||
}
|
||||
|
||||
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'adapter-matrix-backed-drift-check');
|
||||
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'unsupported-hook-parity-claim');
|
||||
|
||||
assert.ok(accepted, 'Missing accepted adapter-matrix candidate');
|
||||
assert.ok(rejected, 'Missing rejected unsupported parity candidate');
|
||||
assert.strictEqual(accepted.decision, 'accepted');
|
||||
assert.strictEqual(rejected.decision, 'rejected');
|
||||
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
|
||||
assert.ok(rejected.reasons.join('\n').includes('native support'));
|
||||
assert.ok(playbook.includes('npm run harness:adapters -- --check'));
|
||||
assert.ok(playbook.includes('node tests/docs/mcp-management-docs.test.js'));
|
||||
});
|
||||
|
||||
test('AgentShield policy exception scenario rejects blanket suppression', () => {
|
||||
const scenario = readFixtureJson('agentshield-policy-exception/scenario.json');
|
||||
const trace = readFixtureJson('agentshield-policy-exception/trace.json');
|
||||
const report = readFixtureJson('agentshield-policy-exception/report.json');
|
||||
const verifier = readFixtureJson('agentshield-policy-exception/verifier-result.json');
|
||||
const playbook = read('examples/evaluator-rag-prototype/agentshield-policy-exception/candidate-playbook.md');
|
||||
|
||||
assert.strictEqual(scenario.scenario_id, 'agentshield-policy-exception');
|
||||
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(report.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(trace.read_only, true);
|
||||
assert.strictEqual(report.read_only, true);
|
||||
assert.strictEqual(verifier.read_only, true);
|
||||
|
||||
for (const blocked of [
|
||||
'approving policy exceptions without SARIF or report evidence',
|
||||
'treating expired exceptions as active',
|
||||
'blanket-suppressing AgentShield policy packs or organization-policy gates',
|
||||
'editing AgentShield code or policy files from this ECC evaluator run'
|
||||
]) {
|
||||
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing AgentShield forbidden action: ${blocked}`);
|
||||
}
|
||||
|
||||
for (const required of [
|
||||
'SARIF or report evidence is named',
|
||||
'owner, ticket, scope, and expiry state are recorded',
|
||||
'expired exceptions stay rejected or enforced',
|
||||
'remediation versus time-boxed exception decision is explicit'
|
||||
]) {
|
||||
assert.ok(scenario.acceptance_gates.includes(required), `Missing AgentShield acceptance gate: ${required}`);
|
||||
}
|
||||
|
||||
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'sarif-backed-timeboxed-exception-review');
|
||||
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'blanket-policy-suppression');
|
||||
|
||||
assert.ok(accepted, 'Missing accepted AgentShield exception candidate');
|
||||
assert.ok(rejected, 'Missing rejected blanket suppression candidate');
|
||||
assert.strictEqual(accepted.decision, 'accepted');
|
||||
assert.strictEqual(rejected.decision, 'rejected');
|
||||
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
|
||||
assert.ok(rejected.reasons.join('\n').includes('blanket-suppresses'));
|
||||
assert.ok(playbook.includes('agentshield-policy/*'));
|
||||
assert.ok(playbook.includes('owner, ticket, scope, expiry'));
|
||||
assert.ok(playbook.includes('npx ecc-agentshield scan --format json'));
|
||||
});
|
||||
|
||||
test('skill quality evidence scenario rejects vague rewrites', () => {
|
||||
const scenario = readFixtureJson('skill-quality-evidence/scenario.json');
|
||||
const trace = readFixtureJson('skill-quality-evidence/trace.json');
|
||||
const report = readFixtureJson('skill-quality-evidence/report.json');
|
||||
const verifier = readFixtureJson('skill-quality-evidence/verifier-result.json');
|
||||
const playbook = read('examples/evaluator-rag-prototype/skill-quality-evidence/candidate-playbook.md');
|
||||
|
||||
assert.strictEqual(scenario.scenario_id, 'skill-quality-evidence');
|
||||
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(report.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(trace.read_only, true);
|
||||
assert.strictEqual(report.read_only, true);
|
||||
assert.strictEqual(verifier.read_only, true);
|
||||
|
||||
for (const blocked of [
|
||||
'promoting a skill rewrite without examples, validation, or observed failure evidence',
|
||||
'adding broad multi-domain skills that duplicate existing focused skills',
|
||||
'copying private operator context, secrets, tokens, or personal paths into skills',
|
||||
'claiming a skill-quality improvement without a reference set or regression command'
|
||||
]) {
|
||||
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing skill-quality forbidden action: ${blocked}`);
|
||||
}
|
||||
|
||||
for (const required of [
|
||||
'changed skill or guidance surface is named',
|
||||
'observed failure, user feedback, or reference-set gap is recorded',
|
||||
'validation command is named',
|
||||
'example or regression evidence is attached'
|
||||
]) {
|
||||
assert.ok(scenario.acceptance_gates.includes(required), `Missing skill-quality acceptance gate: ${required}`);
|
||||
}
|
||||
|
||||
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'evidence-backed-skill-amendment');
|
||||
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'vague-skill-rewrite');
|
||||
|
||||
assert.ok(accepted, 'Missing accepted skill-quality candidate');
|
||||
assert.ok(rejected, 'Missing rejected vague rewrite candidate');
|
||||
assert.strictEqual(accepted.decision, 'accepted');
|
||||
assert.strictEqual(rejected.decision, 'rejected');
|
||||
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
|
||||
assert.ok(rejected.reasons.join('\n').includes('does not include working examples'));
|
||||
assert.ok(playbook.includes('docs/SKILL-DEVELOPMENT-GUIDE.md'));
|
||||
assert.ok(playbook.includes('node scripts/ci/validate-skills.js'));
|
||||
assert.ok(playbook.includes('observed skill-run failure'));
|
||||
});
|
||||
|
||||
test('deep analyzer evidence scenario rejects no-corpus analyzer changes', () => {
|
||||
const scenario = readFixtureJson('deep-analyzer-evidence/scenario.json');
|
||||
const trace = readFixtureJson('deep-analyzer-evidence/trace.json');
|
||||
const report = readFixtureJson('deep-analyzer-evidence/report.json');
|
||||
const verifier = readFixtureJson('deep-analyzer-evidence/verifier-result.json');
|
||||
const playbook = read('examples/evaluator-rag-prototype/deep-analyzer-evidence/candidate-playbook.md');
|
||||
|
||||
assert.strictEqual(scenario.scenario_id, 'deep-analyzer-evidence');
|
||||
assert.strictEqual(trace.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(report.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(verifier.scenario_id, scenario.scenario_id);
|
||||
assert.strictEqual(trace.read_only, true);
|
||||
assert.strictEqual(report.read_only, true);
|
||||
assert.strictEqual(verifier.read_only, true);
|
||||
|
||||
for (const blocked of [
|
||||
'promoting repository, commit, architecture, or deep-analysis changes without analyzer corpus evidence',
|
||||
'suppressing the Deep Analyzer Evidence risk bucket without co-located corpus, snapshot, fixture, or benchmark evidence',
|
||||
'changing analyzer thresholds or classifications without expected-output comparison',
|
||||
'posting PR comments, check runs, or Linear sync updates from this read-only evaluator run'
|
||||
]) {
|
||||
assert.ok(scenario.forbidden_actions.includes(blocked), `Missing deep-analyzer forbidden action: ${blocked}`);
|
||||
}
|
||||
|
||||
for (const required of [
|
||||
'changed analyzer surface is named',
|
||||
'maintained corpus or reference-set path is included',
|
||||
'expected analyzer outputs are compared',
|
||||
'representative repository shape or commit history is described',
|
||||
'regression command is named'
|
||||
]) {
|
||||
assert.ok(scenario.acceptance_gates.includes(required), `Missing deep-analyzer acceptance gate: ${required}`);
|
||||
}
|
||||
|
||||
const accepted = verifier.candidates.find(candidate => candidate.candidate_id === 'corpus-backed-analyzer-change');
|
||||
const rejected = verifier.candidates.find(candidate => candidate.candidate_id === 'threshold-only-analyzer-rewrite');
|
||||
|
||||
assert.ok(accepted, 'Missing accepted deep-analyzer candidate');
|
||||
assert.ok(rejected, 'Missing rejected threshold-only analyzer candidate');
|
||||
assert.strictEqual(accepted.decision, 'accepted');
|
||||
assert.strictEqual(rejected.decision, 'rejected');
|
||||
assert.strictEqual(verifier.promoted_candidate_id, accepted.candidate_id);
|
||||
assert.ok(rejected.reasons.join('\n').includes('does not compare expected outputs'));
|
||||
assert.ok(playbook.includes('../ECC-Tools/src/analyzers/fixtures/deep-analyzer-corpus.ts'));
|
||||
assert.ok(playbook.includes('npm test -- src/analyzers/deep-analyzer-corpus.test.ts src/lib/analyzer.compare.test.ts'));
|
||||
assert.ok(playbook.includes('Deep Analyzer Evidence'));
|
||||
});
|
||||
|
||||
if (failed > 0) {
|
||||
console.log(`\nFailed: ${failed}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nPassed: ${passed}`);
|
||||
@@ -46,11 +46,20 @@ test('stale PR salvage ledger preserves representative source attribution', () =
|
||||
|
||||
for (const pr of [
|
||||
'#1309',
|
||||
'#1232',
|
||||
'#1304',
|
||||
'#1322',
|
||||
'#1326',
|
||||
'#1310',
|
||||
'#1325',
|
||||
'#1413',
|
||||
'#1414',
|
||||
'#1478',
|
||||
'#1493',
|
||||
'#1528/#1529/#1547',
|
||||
'#1603',
|
||||
'#1658',
|
||||
'#1659',
|
||||
'#1674',
|
||||
'#1687',
|
||||
'#1705/#1780',
|
||||
@@ -71,10 +80,13 @@ test('stale PR salvage ledger records skipped junk and superseded work', () => {
|
||||
assert.ok(source.includes('too low-signal'));
|
||||
});
|
||||
|
||||
test('stale PR salvage ledger keeps the zh-CN tail manual-review only', () => {
|
||||
test('stale PR salvage ledger keeps localization tails manual-review only', () => {
|
||||
const source = read('docs/stale-pr-salvage-ledger.md');
|
||||
|
||||
assert.ok(source.includes('Only the #1687 localization tail remains'));
|
||||
assert.ok(source.includes('The remaining plausibly useful backlog is translation/localization work'));
|
||||
assert.ok(source.includes('#1687 zh-CN localization tail'));
|
||||
assert.ok(source.includes('#1609 Persian README translation'));
|
||||
assert.ok(source.includes('#1563 zh-TW README sync'));
|
||||
assert.ok(source.includes('translator/manual review'));
|
||||
assert.ok(source.includes('Do not import stale top-level docs'));
|
||||
});
|
||||
@@ -88,10 +100,54 @@ test('legacy inventory and roadmap link to the durable salvage ledger', () => {
|
||||
assert.ok(roadmap.includes('#1687 translator/manual'));
|
||||
});
|
||||
|
||||
test('stale PR salvage ledger records the May 12 gap pass', () => {
|
||||
const source = read('docs/stale-pr-salvage-ledger.md');
|
||||
|
||||
for (const pr of [
|
||||
'#1310',
|
||||
'#1325',
|
||||
'#1360',
|
||||
'#1414',
|
||||
'#1415',
|
||||
'#1478',
|
||||
'#1438',
|
||||
'#1504',
|
||||
'#1508',
|
||||
'#1563/#1564/#1565',
|
||||
'#1567',
|
||||
'#1570',
|
||||
'#1584',
|
||||
'#1589',
|
||||
'#1594',
|
||||
'#1597',
|
||||
'#1602',
|
||||
'#1603',
|
||||
'#1604',
|
||||
'#1609',
|
||||
'#1613',
|
||||
'#1631',
|
||||
'#1648',
|
||||
'#1658',
|
||||
'#1693',
|
||||
]) {
|
||||
assert.ok(source.includes(pr), `Missing May 12 gap-pass PR ${pr}`);
|
||||
}
|
||||
|
||||
assert.ok(source.includes('Django/Celery maintainer branch'));
|
||||
assert.ok(source.includes('already preserved in #1770'));
|
||||
assert.ok(source.includes('already preserved in #1769'));
|
||||
assert.ok(source.includes('already preserved in #1766'));
|
||||
assert.ok(source.includes('GateGuard subagent file-gate bypass'));
|
||||
assert.ok(source.includes('HTTP MCP reachability handling'));
|
||||
assert.ok(source.includes('current managed installer/profile flow'));
|
||||
assert.ok(source.includes('false-positive proof gate'));
|
||||
assert.ok(source.includes('session_id` from stdin JSON'));
|
||||
assert.ok(source.includes('Already present as `skills/redis-patterns/`'));
|
||||
});
|
||||
|
||||
if (failed > 0) {
|
||||
console.log(`\nFailed: ${failed}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nPassed: ${passed}`);
|
||||
|
||||
|
||||
@@ -71,9 +71,16 @@ function buildExpectedPublishPaths(repoRoot) {
|
||||
"agent.yaml",
|
||||
"VERSION",
|
||||
]
|
||||
const exclusionPaths = [
|
||||
"!**/__pycache__/**",
|
||||
"!**/*.pyc",
|
||||
"!**/*.pyo",
|
||||
"!**/*.pyd",
|
||||
"!**/.pytest_cache/**",
|
||||
]
|
||||
|
||||
const combined = new Set(
|
||||
[...modules.flatMap((module) => module.paths || []), ...extraPaths].map(normalizePublishPath)
|
||||
[...modules.flatMap((module) => module.paths || []), ...extraPaths, ...exclusionPaths].map(normalizePublishPath)
|
||||
)
|
||||
|
||||
return [...combined]
|
||||
@@ -139,6 +146,17 @@ function main() {
|
||||
`npm pack should not include ${excludedPath}`
|
||||
)
|
||||
}
|
||||
|
||||
for (const packagedPath of packagedPaths) {
|
||||
assert.ok(
|
||||
!packagedPath.includes("__pycache__/"),
|
||||
`npm pack should not include Python bytecode cache path ${packagedPath}`
|
||||
)
|
||||
assert.ok(
|
||||
!/\.py[cod]$/.test(packagedPath),
|
||||
`npm pack should not include Python bytecode file ${packagedPath}`
|
||||
)
|
||||
}
|
||||
}],
|
||||
]
|
||||
|
||||
|
||||
@@ -62,6 +62,24 @@ function seedMinimalRepo(rootDir, overrides = {}) {
|
||||
'ecc2/src/session/store.rs': 'insert_tool_log query_tool_logs',
|
||||
'ecc2/src/session/manager.rs': 'sync_tool_activity_metrics tool-usage.jsonl',
|
||||
'docs/architecture/observability-readiness.md': 'node scripts/observability-readiness.js --format json',
|
||||
'docs/architecture/hud-status-session-control.md': [
|
||||
'context toolCalls activeAgents todos checks cost risk queueState',
|
||||
'create resume status stop diff pr mergeQueue conflictQueue',
|
||||
'Linear GitHub handoff'
|
||||
].join('\n'),
|
||||
'examples/hud-status-contract.json': JSON.stringify({
|
||||
schema_version: 'ecc.hud-status.v1',
|
||||
context: {},
|
||||
toolCalls: {},
|
||||
activeAgents: [],
|
||||
todos: {},
|
||||
checks: {},
|
||||
cost: {},
|
||||
risk: {},
|
||||
queueState: {},
|
||||
sessionControls: {},
|
||||
sync: {}
|
||||
}, null, 2),
|
||||
'docs/releases/2.0.0-rc.1/quickstart.md': 'observability-readiness.md',
|
||||
'docs/releases/2.0.0-rc.1/release-notes.md': 'observability-readiness.md'
|
||||
};
|
||||
@@ -195,6 +213,23 @@ function runTests() {
|
||||
}
|
||||
})) passed++; else failed++;
|
||||
|
||||
if (test('missing HUD status contract fails without disturbing core tool checks', () => {
|
||||
const projectRoot = createTempDir('observability-readiness-hud-fail-');
|
||||
|
||||
try {
|
||||
seedMinimalRepo(projectRoot, {
|
||||
'examples/hud-status-contract.json': null
|
||||
});
|
||||
const report = buildReport(projectRoot);
|
||||
|
||||
assert.strictEqual(report.ready, false);
|
||||
assert.ok(report.checks.some(check => check.id === 'hud-status-control-contract' && !check.pass));
|
||||
assert.ok(report.checks.some(check => check.id === 'loop-status-live-signal' && check.pass));
|
||||
} finally {
|
||||
cleanup(projectRoot);
|
||||
}
|
||||
})) passed++; else failed++;
|
||||
|
||||
console.log('\nResults:');
|
||||
console.log(` Passed: ${passed}`);
|
||||
console.log(` Failed: ${failed}`);
|
||||
|
||||
Reference in New Issue
Block a user