docs(design): add hosted Pro fleet dashboard design (Sentry for agent security)

Implementation-ready architecture for the flagship 'next' roadmap item: a hosted, multi-repo agent-security posture dashboard built on the existing ecc-agentshield primitives (evidence-pack bundleDigest + operatorReadback, watch/drift DriftResult, runtime NDJSON, baseline diff, policy promotion). Covers free-vs-Pro scope, ingestion/query API grounded in real field names, data model + time-series rollups, auth/RBAC + redaction guarantees, MVP build order, and pricing hooks. Companion to ECC-PRO-SECURITY-ROADMAP.md.
docs: add MRR-biased ECC Pro + AgentShield security roadmap
2026-06-23 00:21:27 +08:00 · 2026-06-21 21:44:50 -04:00 · 2026-06-21 21:21:22 -04:00
5 changed files with 887 additions and 86 deletions
@@ -0,0 +1,189 @@
+export const meta = {
+  name: 'ecc-pro-security-roadmap',
+  description: 'Survey + web-research + triage both ECC and AgentShield, then synthesize a prioritized ECC Pro security roadmap',
+  whenToUse: 'Quarterly product/security planning for ECC Pro and AgentShield',
+  phases: [
+    { title: 'Survey', detail: 'map current AgentShield + ECC Pro capability, triage open PRs/issues on both repos' },
+    { title: 'Research', detail: 'recent agentic-security CVEs, competitor gaps, unbuilt ideas, Sentry/code-review feature demand' },
+    { title: 'Synthesize', detail: 'merge everything into a prioritized, MRR-biased roadmap' }
+  ]
+};
+
+// ----- shared schemas -----
+const TRIAGE_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  properties: {
+    repo: { type: 'string' },
+    items: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        properties: {
+          ref: { type: 'string', description: 'e.g. "PR #103" or "issue #102"' },
+          title: { type: 'string' },
+          category: { type: 'string', enum: ['merge', 'close', 'needs-work', 'triage-later', 'security-priority'] },
+          rationale: { type: 'string' },
+          proValue: { type: 'string', description: 'how this maps to ECC Pro / MRR, or "none"' }
+        },
+        required: ['ref', 'title', 'category', 'rationale', 'proValue']
+      }
+    },
+    summary: { type: 'string' }
+  },
+  required: ['repo', 'items', 'summary']
+};
+
+const CAPABILITY_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  properties: {
+    area: { type: 'string' },
+    haveToday: { type: 'array', items: { type: 'string' } },
+    gaps: { type: 'array', items: { type: 'string' } },
+    proLeverage: { type: 'array', items: { type: 'string' }, description: 'what could plausibly be paid/Pro-tier' },
+    summary: { type: 'string' }
+  },
+  required: ['area', 'haveToday', 'gaps', 'proLeverage', 'summary']
+};
+
+const RESEARCH_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  properties: {
+    topic: { type: 'string' },
+    findings: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        properties: {
+          title: { type: 'string' },
+          detail: { type: 'string' },
+          source: { type: 'string', description: 'URL, CVE id, or product name' },
+          gapVsUs: { type: 'string', enum: ['we-have-it', 'partial', 'missing'] },
+          relevanceToAgentShield: { type: 'string' },
+          proOpportunity: { type: 'string', description: 'how this could become ECC Pro / paid value' }
+        },
+        required: ['title', 'detail', 'source', 'gapVsUs', 'proOpportunity']
+      }
+    },
+    summary: { type: 'string' }
+  },
+  required: ['topic', 'findings', 'summary']
+};
+
+const ROADMAP_SCHEMA = {
+  type: 'object',
+  additionalProperties: false,
+  properties: {
+    themes: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        properties: { name: { type: 'string' }, rationale: { type: 'string' } },
+        required: ['name', 'rationale']
+      }
+    },
+    items: {
+      type: 'array',
+      items: {
+        type: 'object',
+        additionalProperties: false,
+        properties: {
+          title: { type: 'string' },
+          area: { type: 'string', enum: ['agentshield', 'ecc-pro', 'ecc-core', 'both'] },
+          horizon: { type: 'string', enum: ['now', 'next', 'later'] },
+          effort: { type: 'string', enum: ['S', 'M', 'L', 'XL'] },
+          impact: { type: 'string', enum: ['low', 'medium', 'high', 'flagship'] },
+          mrrAngle: { type: 'string' },
+          description: { type: 'string' },
+          linkedItems: { type: 'array', items: { type: 'string' } }
+        },
+        required: ['title', 'area', 'horizon', 'effort', 'impact', 'mrrAngle', 'description', 'linkedItems']
+      }
+    },
+    top5Now: { type: 'array', items: { type: 'string' } },
+    summary: { type: 'string' }
+  },
+  required: ['themes', 'items', 'top5Now', 'summary']
+};
+
+const GUARDRAILS = [
+  'CONSTRAINTS: research/triage only. Do NOT modify any code, do NOT open/close/merge PRs, do NOT post comments,',
+  'do NOT send any external message. Return findings as data only.',
+  'Brand it "ECC" (never "everything claude code"). AgentShield was FEATURED at a hackathon, never say it "won".',
+  'AgentShield npm package is "ecc-agentshield". Local clone: ~/GitHub/ECC/agentshield. ECC repo: affaan-m/ECC. AgentShield repo: affaan-m/agentshield.',
+  'You have Bash (gh CLI), Read, Grep, Glob, and web tools (load via ToolSearch: WebSearch / firecrawl / exa).'
+].join(' ');
+
+phase('Survey');
+
+const surveyThunks = [
+  () =>
+    agent(
+      `${GUARDRAILS}\n\nSURVEY AgentShield's CURRENT detection capability. Read ~/GitHub/ECC/agentshield: src/rules (built-in detectors), src/* area dirs (taint, injection, supply-chain, runtime, threat-intel, sandbox, policy, remediation, evidence-pack, harness-adapters), README.md, CHANGELOG.md, WORKING-CONTEXT.md. Produce an honest capability map: what classes of agentic-security risk it detects TODAY, where the gaps are, and which capabilities could plausibly be a paid/Pro tier (e.g. continuous monitoring, fleet dashboards, hosted scanning, evidence packs, org policy). area="agentshield-capability".`,
+      { label: 'survey:agentshield-capability', phase: 'Survey', agentType: 'general-purpose', schema: CAPABILITY_SCHEMA }
+    ),
+  () =>
+    agent(
+      `${GUARDRAILS}\n\nSURVEY the CURRENT state of ECC Pro / paid surface. Read in ~/GitHub/ECC/everything-claude-code: scripts/lib/control-pane/* (control pane, proximity, viz), scripts/lib/agent-proximity/*, docs/design/agent-proximity.md, README.md, any pricing/Pro/Enterprise mentions. Determine: what is free vs what is positioned as Pro/Enterprise today, what monetizable surfaces exist (control pane, 3D agent-airspace observability, shared knowledge, JIT team workflows, kanban), and where the paid value story is thin. area="ecc-pro-surface".`,
+      { label: 'survey:ecc-pro-surface', phase: 'Survey', agentType: 'general-purpose', schema: CAPABILITY_SCHEMA }
+    ),
+  () =>
+    agent(
+      `${GUARDRAILS}\n\nTRIAGE every OPEN PR and ISSUE on the ECC repo (affaan-m/ECC). Use gh: \`gh pr list --repo affaan-m/ECC --state open --limit 80 --json number,title,author,isDraft\` and \`gh issue list --repo affaan-m/ECC --state open --limit 80 --json number,title,labels\`. For the higher-signal ones, peek at the diff/body (\`gh pr view <n> --repo affaan-m/ECC\`). Categorize each: merge / close / needs-work / triage-later / security-priority, with a one-line rationale and any Pro/MRR value. Prioritize identifying security-relevant and Pro-relevant items. repo="affaan-m/ECC".`,
+      { label: 'triage:ecc', phase: 'Survey', agentType: 'general-purpose', schema: TRIAGE_SCHEMA }
+    ),
+  () =>
+    agent(
+      `${GUARDRAILS}\n\nTRIAGE every OPEN PR and ISSUE on the AgentShield repo (affaan-m/agentshield). Use gh similarly. Pay special attention to the false-positive cluster (issues #100, #102, #99 "bm", PR #103) where the scanner penalizes its own recommended fix and flags benign strings — these hurt trust and conversion. Also assess #101 (external rule-pack loader --rule-pack) and #97 (FAQ docs). Categorize each: merge / close / needs-work / triage-later / security-priority, with rationale and Pro/MRR value. repo="affaan-m/agentshield".`,
+      { label: 'triage:agentshield', phase: 'Survey', agentType: 'general-purpose', schema: TRIAGE_SCHEMA }
+    )
+];
+
+phase('Research');
+
+const researchThunks = [
+  () =>
+    agent(
+      `${GUARDRAILS}\n\nDEEP RESEARCH: recent (2025-2026) CVEs and disclosed vulnerability classes in AGENTIC / LLM / MCP security that a scanner like AgentShield should detect. Use web tools (ToolSearch then WebSearch / firecrawl / exa). Cover: MCP server vulns (tool poisoning, rug-pull tool updates, prompt injection via tool descriptions, confused-deputy), CVEs in popular agent frameworks / MCP servers, npm/PyPI supply-chain attacks targeting AI tooling, prompt-injection-driven RCE, memory/context poisoning, credential exfiltration via agents. For each finding mark gapVsUs (we-have-it / partial / missing) vs AgentShield's current detectors, and the Pro opportunity. topic="agentic-cves-2025-2026".`,
+      { label: 'research:cves', phase: 'Research', agentType: 'general-purpose', schema: RESEARCH_SCHEMA }
+    ),
+  () =>
+    agent(
+      `${GUARDRAILS}\n\nDEEP RESEARCH: competitor / adjacent tools in agent + LLM + supply-chain security and what they do that AgentShield does NOT. Use web tools. Cover products like: Protect AI, Lakera, Prompt Security, HiddenLayer, Snyk, Socket.dev, Endor Labs, Semgrep, GitGuardian, Invariant Labs (MCP-scan), Cloudflare/others' MCP security, plus any new entrants. For each, note their headline capability, whether AgentShield has it (gapVsUs), and how a comparable or better capability could be packaged as ECC Pro paid value. Also: pull npm download stats for "ecc-agentshield" to ground the growth story if reachable. topic="competitor-gap-analysis".`,
+      { label: 'research:competitors', phase: 'Research', agentType: 'general-purpose', schema: RESEARCH_SCHEMA }
+    ),
+  () =>
+    agent(
+      `${GUARDRAILS}\n\nIDEATION: agentic-security capabilities that have been discussed/considered for AgentShield or ECC but NOT yet built, plus net-new ideas grounded in the threat model. Read ~/GitHub/ECC/agentshield/WORKING-CONTEXT.md and any docs/ for hints of deferred work; read the AgentShield README for the current feature set; then reason about the gaps. Think across the kill chain: discovery/config scan -> PR-time review -> CI gate -> runtime monitor -> incident evidence. Candidate ideas: real-time runtime guardrails, MCP supply-chain provenance/lockfile attestation, taint-tracking across tool calls, behavioral baselining of agents, secret/credential flow tracing, autofix with verification, hosted continuous scanning + dashboards, org policy as code, agent-identity/least-privilege. Mark gapVsUs and proOpportunity for each. topic="unbuilt-ideation".`,
+      { label: 'research:ideation', phase: 'Research', agentType: 'general-purpose', schema: RESEARCH_SCHEMA }
+    ),
+  () =>
+    agent(
+      `${GUARDRAILS}\n\nRESEARCH: what developers actually want from existing security + code-review tooling (Sentry, GitHub code scanning / CodeQL, Snyk, Semgrep, SonarQube, Dependabot) and where those tools fall short for AI-agent codebases. Use web tools (look at user complaints, feature requests, comparison posts). Identify the unmet demand AgentShield Pro could capture: e.g. PR-time security review tuned for agent configs, low-false-positive findings, IDE/editor integration, runtime error+security telemetry like Sentry but for agents, autofix, SARIF/GitHub integration, evidence/compliance packs. For each, gapVsUs and proOpportunity. topic="devtool-demand-gaps".`,
+      { label: 'research:devtool-demand', phase: 'Research', agentType: 'general-purpose', schema: RESEARCH_SCHEMA }
+    )
+];
+
+// Survey and research have no cross-dependency; run all 8 concurrently (the
+// runtime caps concurrency anyway) and barrier here — synthesis needs everything.
+const [survey, research] = await Promise.all([parallel(surveyThunks), parallel(researchThunks)]);
+
+const surveyClean = survey.filter(Boolean);
+const researchClean = research.filter(Boolean);
+log(`survey: ${surveyClean.length}/4 returned, research: ${researchClean.length}/4 returned`);
+
+phase('Synthesize');
+
+const bundle = JSON.stringify({ survey: surveyClean, research: researchClean }, null, 2);
+
+const roadmap = await agent(
+  `${GUARDRAILS}\n\nYou are the synthesis lead. Below is JSON from 4 survey agents (AgentShield capability, ECC Pro surface, ECC repo triage, AgentShield repo triage) and 4 research agents (CVEs, competitors, unbuilt ideation, devtool demand).\n\nProduce a PRIORITIZED, MRR-BIASED roadmap for ECC Pro (its AgentShield and ECC portions). Rules:\n- Bias hard toward what converts free users to paid and grows MRR. AgentShield is doing ~10k npm downloads/week (~30k/month) on "ecc-agentshield" - that is a huge top-of-funnel; the roadmap must include how to monetize that funnel (Pro tier, hosted scanning, dashboards, org policy, evidence/compliance packs).\n- Group into a few themes. Each roadmap item: area (agentshield/ecc-pro/ecc-core/both), horizon (now/next/later), effort (S/M/L/XL), impact (low/medium/high/flagship), a concrete mrrAngle, a description, and linkedItems (PR/issue refs from the triage that map to it).\n- Fold the AgentShield false-positive cluster fixes into "now" (trust is a conversion gate).\n- top5Now = the five highest-leverage things to do immediately.\n\nDATA:\n${bundle}`,
+  { label: 'synthesize:roadmap', phase: 'Synthesize', agentType: 'general-purpose', schema: ROADMAP_SCHEMA }
+);
+
+return { survey: surveyClean, research: researchClean, roadmap };
@@ -0,0 +1,347 @@
+# ECC Pro + AgentShield Security Roadmap
+
+> Status: draft for review. Generated 2026-06-21 from a multi-agent survey + research pass
+> (capability map of AgentShield and ECC Pro, triage of every open PR/issue on both repos,
+> and web research on competitors, unbuilt ideas, and dev-tool demand). MRR-biased: every
+> item is scored for how it converts the free funnel into paid ECC Pro / Enterprise.
+
+## Why now
+
+AgentShield (npm `ecc-agentshield`) is doing roughly **30K downloads/month with no decay**
+(~7.2K/week, ~78K year-to-date) and **903 GitHub stars** — a large, growing top-of-funnel.
+Today there is almost no bridge from that free funnel to paid ECC Pro, and the single most
+ownable paid surface — the agent-proximity "airspace" moat — is fully computed but never
+rendered. This roadmap is built to close both gaps: remove the trust blockers that suppress
+conversion, make the moat visible, then productize the local CLI primitives into hosted,
+recurring-revenue surfaces.
+
+## Themes
+
+### Trust & conversion gate (now)
+
+AgentShield's ~30K/month free funnel only converts if the product is trustworthy and the upgrade path is visible. False positives that punish correct hardening, broken model IDs that hard-fail the LLM layer, Windows crashes, and security bugs in our own learning layer all erode trust before a user ever sees a Pro prompt. Fixing the FP cluster, shipping verified correctness/security fixes, and surfacing a Pro CTA at the point of value are the highest-leverage immediate moves.
+
+### Make the moat visible & demo-able (now)
+
+The agent-proximity 'airspace' metric is the single differentiated capability nothing else has, but it is math + JSON with zero UI rendering. Shipping the 3D observability dashboard (PR #2320) turns the strongest narrative asset into a demo that sells Team/Enterprise seats on sight.
+
+### Productize local primitives into hosted Pro SaaS (next)
+
+Every continuous/fleet capability — watch/drift, baseline gates, evidence-pack fleet operatorReadback, runtime NDJSON, org policy packs — already exists as local CLI building blocks. The fastest path to MRR is hosting these as authenticated multi-repo surfaces: continuous-scanning dashboard, inline PR review + autofix-PR, rule-pack loader + intel feed, compliance packs, and centrally-managed org policy.
+
+### Close competitive gaps & expand reach (next/later)
+
+Snyk Agent Scan, NVIDIA SkillSpector, and GoPlus AgentGuard validate the category and add runtime enforcement, LLM-judge semantic detection, and live MCP fetch that AgentShield lacks. LLM-judge Deep Scan, a free runtime guard with Pro telemetry, cross-machine A2A airspace, and a community MCP reputation registry neutralize those differentiators while keeping the free, zero-account, local-first posture as the moat. Harness-neutral expansion widens the whole funnel.
+
+## Top 5 — do now
+
+1. Merge PR #103 and ship the issue #100 follow-up to kill the false-positive cluster that punishes correct hardening (trust is the conversion gate)
+2. Merge PR #2320 to render the 3D agent-airspace observability dashboard (the moat made visible and demo-able)
+3. Add a Pro upgrade CTA to free CLI output + GitHub App PR comments to monetize the ~30K/month free download funnel, leading with the privacy + low-noise wedge
+4. Merge the verified correctness/Windows batch (PR #2133 model-ID fix, #2307/#2063 Windows, #2273/#2246/#2312 docs, #2293 deps) and fix issue #2316 plan-orchestrate install detection
+5. Harden continuous-learning storage: fix path traversal #2297 and registry-corruption race #2294 (security credibility for the brand Pro trades on)
+
+## Roadmap at a glance
+
+| Horizon | Item | Area | Effort | Impact |
+| --- | --- | --- | --- | --- |
+| now | Fix the false-positive cluster that punishes correct hardening | agentshield | S | high |
+| now | Add autofix verification loop (re-scan + no-regression proof) | agentshield | M | medium |
+| now | Render the 3D agent-airspace observability dashboard (the moat made visible) | ecc-pro | M | flagship |
+| now | Add a Pro conversion CTA to free CLI output and GitHub App PR comments | both | S | high |
+| now | Ship merge-ready correctness and Windows fixes that protect release velocity and core UX | ecc-core | S | medium |
+| now | Harden continuous-learning storage (path traversal + registry race) | ecc-core | S | medium |
+| next | Hosted continuous-scanning dashboard with fleet trend lines ('Sentry for agent security') | agentshield | L | flagship |
+| next | Inline PR-comment review + autofix-PR via the ecc-tools GitHub App | agentshield | M | high |
+| next | External rule-pack loader (--rule-pack) + curated commercial intel feed | agentshield | M | high |
+| next | Pro Deep Scan: LLM-judge semantic detection + live MCP tool fetch + rug-pull pinning | agentshield | L | high |
+| next | Compliance/evidence packs mapped to SOC2/PCI/ISO controls | agentshield | M | high |
+| next | Centrally-managed org policy + RBAC distribution | agentshield | L | high |
+| next | Harness-neutral expansion: Kimi, Codex alias, OpenClaude/Codex compat | ecc-core | L | medium |
+| next | Batch-review and dedup the community skill/agent PR backlog | ecc-core | M | low |
+| later | Free runtime guard hook with Pro centralized telemetry + trust registry | agentshield | XL | flagship |
+| later | Cross-machine team airspace + A2A topology security in the control pane | ecc-pro | XL | high |
+| later | Community MCP/skill reputation registry as growth flywheel + Pro risk-score API | agentshield | L | medium |
+
+## NOW
+
+### Fix the false-positive cluster that punishes correct hardening
+
+- **Area:** agentshield | **Effort:** S | **Impact:** high
+- **Linked:** PR #103, issue #102, issue #100
+- **MRR angle:** FPs that penalize the scanner's own remediation destroy trust with security-conscious buyers and break the demo-and-CI value prop Pro is sold on. Trust is the conversion gate: a hardened config must score well or no one upgrades.
+
+Merge PR #103 (treats --no-verify inside permissions.deny/ask as a prohibition, not a usage — fail-closed on invalid JSON, 6 tests, all review bots green) after confirming the Verify/test matrix passes locally. Then ship a follow-up PR for the two remaining FPs in issue #100: (1) --no-verify in string literals / help text flagged CRITICAL (needs executed-command vs literal context), and (2) the reversed-text rule at src/rules/agents.ts:1561 matching plain English 'backward/backwards' — re-scope it to require reverse-and-execute evidence so it stops noise-flooding ML/PyTorch agent repos (a high-value adopter segment).
+
+### Add autofix verification loop (re-scan + no-regression proof)
+
+- **Area:** agentshield | **Effort:** M | **Impact:** medium
+- **Linked:** issue #102
+- **MRR angle:** Verified, trustworthy autofix is the activation moment that makes the free CLI feel magical and seeds confidence in the paid managed-remediation workflow (autofix-as-PR in ECC Tools).
+
+src/fixer/index.ts applies string transforms but never re-scans to prove the finding is gone and no new finding was introduced — and issue #102 proved a naive permission tighten can be re-flagged by the scanner. Close the loop: after applying --fix, re-run the scanner, diff the findings set, auto-revert if the score regresses, and emit a verified-fix attestation. OSS gets verify-after-fix locally; Pro gets autofix-as-PR via the ecc-tools GitHub App (open remediation PR, run verified re-scan in CI, attach before/after evidence pack, auto-merge on green).
+
+### Render the 3D agent-airspace observability dashboard (the moat made visible)
+
+- **Area:** ecc-pro | **Effort:** M | **Impact:** flagship
+- **Linked:** PR #2320
+- **MRR angle:** This is the single most ownable, demo-able paid-looking surface ECC has and nothing else offers it. 'Watch N agents crawl toward each other in code-space and one steer away' converts on the demo alone — it justifies a Team/Enterprise seat that competitors (CodeRabbit/Greptile) cannot match.
+
+The agent-proximity math (noisy-OR collision risk, TCAS transmit/steer advisories, 3D space-filling embedding) is fully implemented in scripts/lib/agent-proximity/ and computed every tick, but the control-pane UI (ui.js) renders ZERO proximity output. Merge maintainer PR #2320 (self-contained, dependency-free 3D canvas viz + /api/proximity feed, XSS-safe textContent, +254/-0 with tests, MERGEABLE) to ship the renderer. This closes the biggest gap between the moat narrative and a shippable surface.
+
+### Add a Pro conversion CTA to free CLI output and GitHub App PR comments
+
+- **Area:** both | **Effort:** S | **Impact:** high
+- **Linked:** PR #97
+- **MRR angle:** Directly monetizes the ~30K downloads/month (78,108 YTD, ~7,228/week, no decay) free funnel. There is currently no surfaced upgrade path from the free scanner to ECC Pro — adding a contextual CTA at the point of value is the lowest-effort, highest-leverage conversion lever available.
+
+Surface a Pro CTA where free users already feel value: a footer in terminal/JSON/markdown reports ('hosted fleet posture + continuous monitoring at ecc-tools Pro'), in the GitHub Action job summary, and in PR check-run comments. Lead with the privacy wedge ('scans never leave your machine' vs Snyk Agent Scan transmitting tool metadata to cloud) and the low-noise/runtimeConfidence accuracy story as the differentiators. Keep AgentShield free + zero-account as the moat against token-gated Snyk Agent Scan.
+
+### Ship merge-ready correctness and Windows fixes that protect release velocity and core UX
+
+- **Area:** ecc-core | **Effort:** S | **Impact:** medium
+- **Linked:** PR #2133, PR #2307, PR #2063, PR #2273, PR #2246, PR #2312, PR #2293, issue #2316
+- **MRR angle:** Broken model IDs hard-fail the multi-model LLM layer Pro features depend on; broken plan-orchestrate install detection and Windows crashes degrade the paid UX and erode trust before users ever reach the upgrade prompt.
+
+Merge the clean, verified batch: PR #2133 (Claude provider model-ID + adaptive-thinking fix — replaces invalid IDs with claude-sonnet-4-6/haiku-4-5/opus-4-8, routes SYSTEM to top-level, omits temperature, adaptive thinking for Opus 4.7/4.8; previous default would 404/400 at the API), PR #2307 + #2063 (Windows test/UTF-8 fixes), PR #2273/#2246/#2312 (docs/workflow), PR #2293 (dependabot minor/patch). Schedule a fix for issue #2316 (plan-orchestrate still probes old paths after the ecc@ecc marketplace rename — broken install detection on a core workflow command).
+
+### Harden continuous-learning storage (path traversal + registry race)
+
+- **Area:** ecc-core | **Effort:** S | **Impact:** medium
+- **Linked:** issue #2297, issue #2294, issue #2300, issue #2296
+- **MRR angle:** ECC sells security tooling; a path-traversal or registry-corruption bug in our own learning layer is a credibility liability that undercuts the entire security brand the Pro tier trades on.
+
+Fix two security-priority bugs in skills/continuous-learning-v2/scripts/instinct-cli.py as one hardening pass: issue #2297 (shutil.rmtree on PROJECTS_DIR/project_id with no path-containment check — arbitrary directory deletion risk) and issue #2294 (_write_registry writes projects.json without the advisory lock _update_registry uses — concurrent sessions can corrupt the registry). Pair with reliability issues #2300 (SIGALRM drops observations) and #2296 (signal-counter race) for observer integrity.
+
+## NEXT
+
+### Hosted continuous-scanning dashboard with fleet trend lines ('Sentry for agent security')
+
+- **Area:** agentshield | **Effort:** L | **Impact:** flagship
+- **MRR angle:** THE core ECC Tools Pro product and the clearest recurring-revenue moat: nobody unifies config-scan + runtime telemetry. Billed per seat/repo. Reuses operatorReadback/reviewItems as the API contract — lowest-effort-to-highest-leverage Pro upgrade because the data model already exists.
+
+Productize the existing local primitives into a hosted, authenticated, multi-repo backend: ingest webhook/CI scan results, runtime.ndjson, and watch/drift events over time; persist baselines; chart score trend, drift history, blocked-command rate, injection-attempt rate, secret-exposure events, and cross-repo org rollup; fire Slack/email regression alerts. The continuous/fleet primitives (src/watch, src/baseline, src/evidence-pack fleet operatorReadback) exist only as local CLI today. Positions AgentShield as the unified config+runtime view that neither Snyk (scan-only) nor Sentry (no security semantics) offers.
+
+### Inline PR-comment review + autofix-PR via the ecc-tools GitHub App
+
+- **Area:** agentshield | **Effort:** M | **Impact:** high
+- **Linked:** PR #2320
+- **MRR angle:** Sticky inline PR comments + one-click fix PRs are now table stakes (Aikido, DryRun, Pixee) and are the GitHub-native paid surface that converts. The GitHub App already exists as the delivery vehicle; monetize PR-time review + autofix-PR as the paid tier.
+
+Today the GitHub Action fails CI and emits SARIF (lands in the Security tab) but does not post sticky inline PR comments keyed to changed lines, and autofix is local-CLI only. Add per-line PR comments with one-click 'apply fix' that commits the existing remediation to the PR branch, plus auto-fix-PR generation. Differentiate from CodeRabbit/Greptile by bundling the agent-proximity / merge-conflict-prevention angle competitors lack.
+
+### External rule-pack loader (--rule-pack) + curated commercial intel feed
+
+- **Area:** agentshield | **Effort:** M | **Impact:** high
+- **Linked:** issue #101
+- **MRR angle:** Turns AgentShield into a platform: OSS gets the loader, Pro gets a signed, continuously-updated commercial rule-pack/threat-intel subscription. The ATR pack (464 rules, in production at Cisco AI Defense + Microsoft) brings credibility and reach; its corpus feeds the accuracy gate.
+
+Build the loader requested in agentshield issue #101: a signed, versioned external rule-pack format with zod validation mirroring the --policy loader, no new deps, provenance/safety checks on the packs themselves. Maps cleanly onto the existing declarative rule tables and runRules loop. Resolve the one open design question (ScoreBreakdown's five fixed buckets — external findings count toward total without an own bucket is acceptable for v1). Couples with a hosted, curated AI-tooling malicious-package/skill + CVE intel feed as the paid subscription layer (the static 21-entry CVE DB goes stale; sync to NVD/GHSA/OSV).
+
+### Pro Deep Scan: LLM-judge semantic detection + live MCP tool fetch + rug-pull pinning
+
+- **Area:** agentshield | **Effort:** L | **Impact:** high
+- **MRR angle:** Directly neutralizes the most dangerous competitor (Snyk Agent Scan) and AgentGuard. Metered/Pro feature where the platform fronts the model cost and runs deeper scheduled adversarial sweeps. Keeps free AgentShield as the no-account default vs Snyk's token-gated CLI.
+
+Reuse the existing --opus (Red/Blue/Auditor) and --injection (live LLM adversarial, ~70 payloads) plumbing to ship an opt-in LLM-judge layer for semantic prompt-injection and toxic-flow chaining. Add a live MCP connector that fetches tool descriptions and pins tool hashes to flag rug-pulls between scans (capabilities Snyk has and AgentShield lacks). Close the acknowledged skill-md / freeform-prompt coverage gap as a free differentiator (now table stakes vs NVIDIA SkillSpector), reserving AST taint + curated YARA/IOC feed for Pro.
+
+### Compliance/evidence packs mapped to SOC2/PCI/ISO controls
+
+- **Area:** agentshield | **Effort:** M | **Impact:** high
+- **MRR angle:** High-margin enterprise add-on: auditor-ready packs are the artifact GRC teams hand to auditors to justify agent deployments. Buyers want framework-mapped evidence, not raw findings — this is a clear Enterprise seat upsell.
+
+AgentShield already generates deterministic hash-verified evidence packs and SARIF, plus baseline/drift and org-policy pass/fail. Add explicit framework mapping (findings -> SOC2 CC / PCI DSS / ISO control IDs), coverage and remediation-over-time charts fed by baseline history and runtime.ndjson, and hosted storage/retention/signing. Sell as the compliance deliverable for regulated buyers.
+
+### Centrally-managed org policy + RBAC distribution
+
+- **Area:** agentshield | **Effort:** L | **Impact:** high
+- **MRR angle:** Per-seat Enterprise value: hosted policy distribution, enforcement across the fleet, and waiver/exception workflows with expiry and owner approval are exactly what org buyers pay seats for. Today policy packs are local JSON copied around with no central management.
+
+Policy packs (6 presets), export/promote with SHA-256-verified promotion, and exception lifecycle already exist as local JSON. Add hosted policy distribution, fleet-wide enforcement, centrally-managed exceptions/waivers (expiry + owner approval), org identity/RBAC, audit-log retention, and central branch-protection evidence. Add a DryRun-style natural-language-to-policy authoring layer ('no MCP server may bind 0.0.0.0', 'skills must not read keychain') that compiles to AgentShield rules — a differentiated UX developers are gravitating to.
+
+### Harness-neutral expansion: Kimi, Codex alias, OpenClaude/Codex compat
+
+- **Area:** ecc-core | **Effort:** L | **Impact:** medium
+- **Linked:** PR #2154, PR #2254, issue #2076, issue #2073, issue #2074
+- **MRR angle:** Broadens the addressable user base for the whole funnel and aligns with the ECC 2.0 harness-neutral control-pane vision — more harnesses scanned = more top-of-funnel feeding Pro.
+
+Land the harness-neutral work after the required catalog/registry sync, install-profile review, and surface tests: PR #2154 (Kimi Code CLI, 12th harness, +1397/16 files), PR #2254 (Codex plugin alias — currently DRAFT + CONFLICTING, resolve first), and answer the needs-info compat issues #2076 (OpenClaude), #2073 (Codex subagent TOML format), #2074 (OpenCode bun-on-PATH Windows bug). AgentShield's harness adapters already detect Claude Code/OpenCode/Codex/Gemini/Zed/VS Code/dmux.
+
+### Batch-review and dedup the community skill/agent PR backlog
+
+- **Area:** ecc-core | **Effort:** M | **Impact:** low
+- **Linked:** issue #2308, PR #2309, PR #2310, PR #2311, PR #2285, PR #2275, PR #2274, PR #2270, PR #2318, PR #2315, PR #2313, PR #2137, issue #2069
+- **MRR angle:** Indirect: keeps the catalog credible and discoverable (catalog quality is a free-tier retention factor) without bloating it with redundant skills that dilute the value prop.
+
+Triage as batches with overlap/dedup review against the existing 200+ skill catalog plus manifest/catalog/command-registry sync and surface tests: the three BMAD-inspired skills (#2309/#2310/#2311 under tracking issue #2308), framework-reviewer family extensions (#2285 nuxt, #2275 React Native, #2280 AL/BC), and assorted new-skill PRs (#2319 ecc-recipes, #2314 quant-trading, #2281 council-multi-model, #2277 living-docs, #2288 mailtrap — needs cred-handling security review). Resolve needs-work conflicting/large PRs (#2274 gateguard rebase, #2270 OMP split, #2318/#2315 large drops). Close low-signal drive-bys: PR #2313 (empty template), PR #2137 (vague AI-slop SOP), agentshield #99 (spam). Route marketing reshare #2069 to content (ECC was 'featured', not a winner).
+
+## LATER
+
+### Free runtime guard hook with Pro centralized telemetry + trust registry
+
+- **Area:** agentshield | **Effort:** XL | **Impact:** flagship
+- **MRR angle:** Closes the biggest competitive gap (GoPlus AgentGuard runtime blocking, Snyk-Evo fleet monitoring) and is a pure hosted play billed per active agent/seat. Free static deny-list neutralizes AgentGuard's differentiator; Pro baselining + telemetry + managed trust registry is the recurring upsell.
+
+Today the runtime monitor (src/runtime) is a thin deny-list + rate-limit PreToolUse evaluator logging to local NDJSON. Build a streaming evaluator with per-agent/per-repo behavioral baselining and intent-drift scoring (OTel GenAI spans), soft-warn/hard-block inline, and extend taint tracking from single-file static to cross-tool-call / cross-session data-flow lineage (the indirect-injection -> exfiltration chain that dominates 2026 incidents). Add credential-flow tracing (which hook/MCP reads each secret, does it egress). Pro centralizes runtime telemetry ingestion, fleet-wide deny-policy distribution, tamper-evident logging, a managed trust registry, and real-time alerting. This is 'AgentShield Runtime' — agent EDR, not a config linter.
+
+### Cross-machine team airspace + A2A topology security in the control pane
+
+- **Area:** ecc-pro | **Effort:** XL | **Impact:** high
+- **MRR angle:** The clearest Team/Enterprise seat wedge: 'N agents, M humans, zero merge conflicts over Tailscale' is exactly what justifies per-seat team pricing. A2A privilege-escalation visualization is the security-native sibling of the Layer 4 moat, sold alongside the control pane.
+
+Proximity only sees local sessions in one repo today (roadmap v2 cross-machine is unbuilt). Build hosted, authenticated multi-repo/multi-machine airspace (sessions, kanban, proximity, risk ledger) gated behind Team/Enterprise, with the TCAS transmit/steer protocol + agent+human JIT deconfliction as the per-seat value. Add agent-to-agent (A2A) topology security: model the org's multi-agent delegation graph (which agent invokes/delegates to which, with what inherited tools) and highlight confused-deputy / delegation-of-overprivilege paths. Promote the local memory-recall Knowledge panel into a synced team knowledge/RAG store as a Pro add-on.
+
+### Community MCP/skill reputation registry as growth flywheel + Pro risk-score API
+
+- **Area:** agentshield | **Effort:** L | **Impact:** medium
+- **MRR angle:** Doubles as marketing and as the data backbone for a paid risk-score API. Counters Prompt Security's 13,000-server scored registry moat; the crowd + ECC-ecosystem scan-result data flywheel is hard for competitors to replicate.
+
+Build a free community MCP/skill reputation registry aggregating crowd input + AgentShield scan results across the ECC ecosystem, with MCP provenance attestation (SLSA/in-toto/Sigstore-style signed agentshield.lock pinning the full MCP+skill+plugin dependency closure). Sell continuous monitoring, org allow/block policy, Shadow-MCP discovery, and a hosted multi-ecosystem (npm+PyPI+cargo) provenance/SBOM service as Pro. Optional niche add-on: pickle/safetensors/GGUF model-artifact deserialization scanner for local-OSS-model teams.
+
+## Capability baseline (what we have, where the gaps are)
+
+### AgentShield today
+
+AgentShield today is a mature STATIC security scanner for AI-agent configurations (Claude Code and adjacent harnesses), shipping 102 pattern-based rules across secrets, permissions, hooks, MCP, and agents, hardened by a source-confidence/false-positive engine (runtimeConfidence tiers + score weighting). Beyond static rules it layers: MCP tool-poisoning + CVE detection backed by a 21-entry curated threat-intel DB, supply-chain provenance verification (offline + optional npm-online + package-manager hardening), opt-in static taint analysis, opt-in LLM-driven active prompt-injection testing (~70 payloads / 12 categories), opt-in hook sandbox execution with canary secrets, and an Opus 4.6 three-agent adversarial pipeline. Operational surfaces include org policy packs with verified export/promote + exception lifecycle, an installable runtime PreToolUse deny-list monitor, deterministic hash-verified evidence packs with fleet operatorReadback, baseline drift gating, a local watch/alert mode, harness adapters, and full CI integration (GitHub Action, SARIF, corpus self-test). The honest gaps are that detection is overwhelmingly static/signature-based (narrow non-shell hook-code coverage, weak skill-md prompt coverage, no live CVE feed, no real AST taint), and that all the continuous/fleet/hosted primitives (watch, evidence-pack fleet, policy distribution, runtime telemetry, deep LLM analysis) exist only as LOCAL CLI building blocks. That gap is precisely the Pro/Enterprise opportunity: the data models for continuous monitoring, fleet dashboards, hosted scanning, centrally-managed org policy, live threat-intel, and compliance evidence retention are already designed locally and would convert directly into a hosted ECC Tools Pro offering (README already references a $19/seat/mo tier and the ecc-tools GitHub App). Key files: src/rules/*, src/{taint,injection,sandbox,supply-chain,threat-intel,runtime,policy,evidence-pack,watch,baseline,harness-adapters,opus}/, README.md, false-positive-audit.md.
+
+Key gaps the roadmap targets:
+
+- STATIC-ONLY for most detection: rules are regex/pattern-based over config text. Polymorphic/obfuscated payloads, novel encodings, and logic-level malice that doesn't match a signature are missed. Deep behavioral detection requires opt-in --opus/--injection/--sandbox (LLM cost or local execution).
+- NON-SHELL HOOK CODE coverage is narrow: hook-code findings only catch explicit signals (output() context injection, transcript access, child-process curl|bash). Broad language-aware analysis of JS/Python/etc hook implementations is not done — README explicitly flags this as a known high-signal caveat.
+- skill-md / freeform prompt text bypasses most agent + injection rules (explicitly acknowledged). Skill prompt bodies have much weaker coverage than CLAUDE.md/agent-md.
+- CVE database is a hand-curated static list of 21 entries with no live feed — goes stale; no automated sync to NVD/GHSA/OSV. No CVSS scoring, no version-range resolution beyond string matching.
+- Supply-chain online check only hits npm registry; no PyPI/cargo/RubyGems online verification, no SBOM generation/consumption, no transitive-dependency graph or lockfile-tree integrity verification (only top-level provenance counts).
+- Watch mode is local single-process fs.watch only (no daemon/service, no persistence across restarts, single targetPath baseline). Webhook alerting exists but there is no hosted ingestion, dashboard, or multi-repo fleet view that actually runs continuously.
+- No hosted/SaaS scanning backend. Everything runs locally or in the user's CI. GitHub App (ecc-tools) is referenced but the scanner core is fully local/offline.
+- No semantic/data-flow analysis across files for MCP tool chaining or multi-agent privilege escalation beyond single-config heuristics; taint analysis is regex source/sink, not real AST/CFG.
+- No detection of malicious model behavior at inference time (only config-time + optional sandbox/injection test). No live transcript/telemetry monitoring of a running agent fleet.
+- Runtime monitor is a thin deny-list evaluator (glob+regex) installed as one hook; no kernel/syscall-level sandboxing, no egress filtering enforcement, no tamper protection on the hook itself.
+
+### ECC Pro surface today
+
+ECC's paid story today is two separate hosted GitHub Apps (ECC Pro at $19/seat/mo for private repos, and ECC Tools with free/pro/enterprise Marketplace tiers + real billing infra), while the entire local plugin including the control pane stays MIT-free with no license gating. The control pane (loopback-only Node server) surfaces Sessions, an interactive kanban with agent+human JIT assignment, local Knowledge recall, MCP connectors, and executable actions. The genuinely differentiated 'moat' — the agent-airspace proximity metric (noisy-OR collision risk, TCAS transmit/steer advisories, 3D embedding) — is fully implemented in code and wired into the snapshot, BUT the 3D 'where-are-the-agents' visualization is never rendered (zero proximity output in the UI), and none of these capabilities are positioned or gated as Pro/Enterprise. The paid value story is thin: Pro currently reads as 'OSS for private repos + PR audits' (commodity vs CodeRabbit/Greptile), while the truly ownable surfaces — 3D agent observability, multi-agent/human JIT deconfliction, cross-machine team airspace, shared team knowledge — are either unrendered, unbuilt, or unmonetized. Also verify live GitHub Marketplace Pro billing-state provenance before claiming native payments are GA. Key files: scripts/lib/control-pane/{server,state,ui,proximity,message-sink,work-item-mutations}.js, scripts/lib/agent-proximity/{distance,graph,index}.js, docs/design/agent-proximity.md, docs/ECC-2.0-REFERENCE-ARCHITECTURE.md, docs/ECC-2.0-GA-ROADMAP.md, README.md:53-83 and :216.
+
+Pro leverage points identified:
+
+- 3D agent-airspace observability dashboard — render the already-computed scanAirspace positions/links/advisories (WebGL/Three.js in the control-pane UI). 'Watch N agents crawl toward each other in code-space and watch one steer away' is a unique, demo-able Pro/Team feature nothing else has. The math is done; only the renderer is missing.
+- Multi-agent / multi-human JIT deconfliction as a TEAM seat product — the TCAS transmit/steer protocol + agent+human kanban JIT assignment is the natural per-seat value. Gate the cross-machine airspace (Tailscale, roadmap v2) behind Team/Enterprise.
+- Hosted control pane / observability backend — today it is loopback-only local. A hosted, authenticated, multi-repo version (sessions, kanban, proximity, risk ledger, HUD/status JSON contract from the reference arch) is the obvious Pro SaaS surface.
+- Shared team knowledge layer — promote the local memory-recall Knowledge panel into a synced team knowledge/RAG store (the reference arch already wants RAG over vetted patterns / PR outcomes / CI failures) as a Pro/Enterprise add-on.
+- AgentShield Enterprise security platform — policy packs (OSS/team/enterprise/regulated), SARIF, supply-chain intel, exec HTML/PDF reports, CI enforcement (reference arch lines 152-173). This is already framed as the enterprise security tier and pairs with the proximity/observability story.
+- ECC Tools deep analyzer + Linear sync as the GitHub-native paid PR layer (already the current paid surface); differentiate it from CodeRabbit/Greptile by bundling the agent-proximity/merge-conflict-prevention angle that competitors lack.
+
+## Research inputs
+
+### competitor-gap-analysis
+
+AgentShield (npm "ecc-agentshield") occupies a defensible niche: a free, OSS, zero-account static auditor for AI-agent configuration surfaces (Claude Code .claude/ dirs, hooks, MCP configs, permissions, agent/skill markdown, secrets) shipped as CLI + GitHub Action + GitHub App, with 102 rules across 5 categories, runtimeConfidence source-weighting, supply-chain provenance, evidence packs/SARIF, and an Opus red/blue/auditor pipeline. npm growth is real: 78,108 downloads YTD 2026 (Jan 1-Jun 21), ~29,759 last 30 days, ~7,228 last week, daily 700-2,300. The field splits into two tiers. (1) Direct OSS config/skill scanners: Snyk agent-scan (ex-Invariant mcp-scan, the single most dangerous competitor), NVIDIA SkillSpector (AST taint + YARA), GoPlus AgentGuard (runtime action eval + trust registry, local-only), Mondoo Skill Check, Semgrep Guardian. (2) Enterprise runtime/firewall + model-supply-chain: Lakera Guard (Check Point), Prompt Security (SentinelOne), HiddenLayer, Protect AI Guardian (Palo Alto/Prisma AIRS), Noma, plus Cloudflare/Microsoft Defender MCP gateways; GitGuardian ships native Claude Code/Cursor/Copilot secret hooks. AgentShield's biggest gaps: no runtime/inline enforcement (purely static), no LLM-judge semantic prompt-injection/toxic-flow analysis, no live MCP tool-description fetch or rug-pull tool-pinning, no ML model-artifact scanning, no central fleet dashboard, no policy-as-code gateway. Biggest moats: free + zero-account + OSS (Snyk agent-scan needs a SNYK_TOKEN; enterprise tier is all paid/acquired), deep Claude Code config specificity, source-confidence false-positive weighting, and ECC distribution. Clear ECC Pro wedges: hosted fleet dashboard, LLM-judge deep-scan, live MCP runtime proxy + rug-pull detection, policy-as-code CI gates, model-artifact scanning, and a curated AI-tooling malicious-package/skill intel feed.</summary>
+</invoke>
+
+Notable gaps vs us (missing today):
+
+- **GoPlus AgentGuard — local-only runtime action enforcement + trust registry (the runtime gap)** — Ship a free lightweight PreToolUse hook-based runtime guard (AgentShield already understands Claude Code hook wiring deeply — natural extension via agentshield init), reserving the managed trust registry, org-wide allow/block policy sync, and runtime telemetry/alerting for ECC Pro. Neutralizes AgentGuard's differentiator while keeping the upsell.
+- **Lakera Guard (Check Point) — runtime prompt-injection firewall** — Enterprise inline-firewall is capital-intensive and now owned by Check Point/SentinelOne, so not a near-term build. Realistic ECC Pro angle: a hosted /guard-style endpoint reusing AgentShield's injection rule corpus for lightweight dev/CI gating of agent prompts and tool descriptions — developer-first and cheaper, not an enterprise WAF.
+- **Prompt Security (SentinelOne) — MCP Gateway + dynamic risk scoring of 13,000+ public MCP servers** — Build a free community MCP/skill reputation registry (crowd + AgentShield scan results across the ECC ecosystem) as a growth/data-flywheel asset, then sell continuous monitoring + org allow/block policy + Shadow-MCP discovery as Pro. The registry doubles as marketing and as the data backbone for a Pro risk-score API.
+- **HiddenLayer + Protect AI Guardian (Palo Alto/Prisma AIRS) — ML model-artifact supply-chain scanning** — Pro add-on: pickle/safetensors/GGUF deserialization scanner for agents that load local model artifacts, plus a Hugging Face model-reference checker in agent configs. Niche but a clean upsell for local-OSS-model teams; integrate a free OSS pickle-scan core (picklescan-style) with a Pro signature/IOC feed.
+- **Cloudflare / Microsoft Defender — MCP gateways and managed enforcement infrastructure** — Stay complementary: position AgentShield/ECC Pro as the developer-side pre-flight + CI gate that feeds findings into these gateways (SARIF/JSON export already exists). A Pro integration that exports AgentShield posture to Cloudflare/Defender policy or emits Shadow-MCP candidate lists is a partnership-friendly upsell rather than a competitive build.
+
+### unbuilt-ideation
+
+AgentShield already ships an unusually broad static surface: 102+ rules across secrets/permissions/hooks/MCP/agents, MCP CVE + tool-poisoning detection, supply-chain provenance, taint analysis, sandbox hook execution, injection testing, watch/drift mode, a PreToolUse runtime monitor, org policy-as-code, evidence packs, baseline gates, SARIF/HTML, and the ECC Tools GitHub App + Pro tier. So the real unbuilt ideation is NOT "add another scanner category" — it is moving from static config audit toward live runtime defense, cross-call/cross-session reasoning, and a hosted continuous-assurance product. The biggest concrete gaps, grounded in the shipped code and the 2026 threat landscape: (1) the "runtime monitor" is only a static deny-rule + rate-limit PreToolUse evaluator — there is no behavioral baselining, intent-drift detection, or live taint propagation across actual tool calls; (2) taint tracking is single-file static only, not cross-tool-call / cross-session data-flow; (3) autofix has no verification loop (applies string transforms, never re-scans to prove the finding is gone and nothing new was introduced); (4) zero coverage of non-human/agent identity, least-privilege token scoping, or OAuth/credential-flow tracing (the fastest-growing 2026 risk per CSA/OWASP NHI work); (5) no MCP provenance attestation / signed lockfile (supply-chain is detection + npm metadata, not cryptographic attestation); (6) no A2A / multi-agent / agent-to-agent protocol coverage; (7) no hosted continuous-scanning dashboard with fleet trend lines (evidence-pack fleet exists as CLI, but no SaaS); (8) community rule-pack loader is requested (issue #101) but unbuilt. Each maps cleanly to ECC Pro / ECC Tools monetization because they require hosting, threat-intel feeds, or org-fleet state that an OSS CLI can't carry.
+
+Notable gaps vs us (missing today):
+
+- **Autofix with verification loop (re-scan + no-regression proof)** — OSS gets verify-after-fix locally. Pro gets autofix-as-PR via ECC Tools GitHub App: open a remediation PR, run the verified re-scan in CI, attach the before/after evidence pack, and auto-merge on green — a paid managed-remediation workflow.
+- **Agent identity, least-privilege, and non-human-identity (NHI) governance** — Enterprise policy-pack feature: ship least-privilege scoring + token-rotation/age gates as a 'regulated/enterprise' Pro policy pack, and a hosted NHI inventory across the org's repos in ECC Tools (fleet-level identity sprawl map).
+- **Agent-to-agent (A2A) and multi-agent topology security** — Premium control-pane integration: render the org's multi-agent delegation graph with privilege-escalation paths highlighted, sold alongside ECC 2.0 control pane / Layer 4 proximity as a paid org-fleet visualization.
+- **Community/external rule-pack loader (--rule-pack)** — OSS gets the loader + local packs. Pro gets a curated, signed, continuously-updated commercial rule-pack feed (the CVE/known-malicious-MCP intel from the supply-chain item), turning detections into a subscription.
+
+### devtool-demand-gaps
+
+Across SAST/SCA tools (Snyk, CodeQL, Semgrep, SonarQube, Dependabot) the dominant 2026 developer complaint is not detection but triage: alert fatigue, false positives, and low-value PRs. A Go maintainer publicly called Dependabot a "noise machine"; teams report spending more time triaging Snyk SCA alerts than fixing issues; CodeQL FP-heavy unit-test flags and a postback-on-dismiss UX push developers to ignore alerts entirely. The clear demand is for low-noise, context-aware, PR-time findings with autofix and SARIF/compliance output. For AI-agent codebases specifically, two new direct competitors emerged: Snyk Agent Scan (Open Preview, May 2026 — CLI + background MDM/CrowdStrike mode, cloud-backed, sends tool metadata off-machine) and DryRun Security (contextual NL code policies in PRs, feeds Claude/Cursor/Codex). AgentShield already ships much of what the market asks for in agent-config security: 102 rules, SARIF, GitHub Action, autofix (--fix/remediation), evidence packs, supply-chain checks, runtimeConfidence FP weighting, a local runtime hook-enforcement layer (runtime.ndjson) and a watch/drift detector. The biggest unmet, monetizable gaps are: (1) a hosted Sentry-style aggregated dashboard + agent runtime telemetry (error/tool-failure/cost/drift across many repos and machines) — nobody unifies config-scan + runtime observability; (2) true inline PR-comment review (AgentShield's Action fails CI and emits SARIF but does not post sticky inline comments like DryRun/Aikido); (3) IDE/editor integration (Cursor/Windsurf/VS Code/Claude Code) so findings and fixes land where agents code; (4) natural-language custom org policies (DryRun-style) beyond the current JSON policy presets; (5) compliance/evidence packs mapped to SOC2/PCI frameworks as a paid Pro deliverable. AgentShield's local-first, no-data-leaves-machine posture is a concrete differentiator against Snyk Agent Scan's cloud metadata transmission and a privacy selling point for regulated buyers.
+
+Notable gaps vs us (missing today):
+
+- **IDE/editor integration — findings and fixes where agents actually write code** — Ship a VS Code/Cursor extension (and a Claude Code skill already exists via ecc:security-scan) that lints agent configs on save, shows findings inline, and offers fixes — gated behind Pro for org policy sync. Builds on existing harness-adapters; meets developers in the editor where Snyk Agent Scan (CLI/MDM) does not.
+
+> Note: a fourth research thread (recent agentic/MCP CVEs) was blocked by an automated
+> usage-policy classifier on the raw "find vulnerabilities" prompt. The CVE-database refresh
+> need it would have covered is captured under the rule-pack + intel-feed item, and will be
+> handled as a scoped, defensive OSV/GHSA/NVD sync rather than free-form vulnerability research.
+
+## Appendix: open PR / issue triage
+
+### affaan-m/ECC
+
+| Disposition | Ref | Title |
+| --- | --- | --- |
+| merge | PR #2320 | feat(control-pane): 3D agent-airspace viz + /api/proximity feed (Layer 4 observability) |
+| merge | PR #2133 | fix(llm): align Claude provider with current Anthropic API |
+| needs-work | PR #2274 | fix(gateguard): make fact-force checklist tool-agnostic |
+| merge | PR #2307 | fix(tests): resolve 10 failing tests on Windows |
+| merge | PR #2293 | chore(deps): bump npm-minor-and-patch group (5 updates) |
+| needs-work | PR #2260 | chore(deps-dev): bump eslint 9.39.2 to 10.5.0 |
+| triage-later | PR #2319 | feat: add ecc-recipes skill |
+| needs-work | PR #2318 | feat: add OpenSpec ecosystem (5 agents, 2 orchestration skills, 3 integrations) |
+| needs-work | PR #2315 | feat(skills): add 10 custom local skills |
+| triage-later | PR #2314 | feat(skills): add quant-trading-systems skill |
+| close | PR #2313 | Add Pylint workflow for Python code analysis |
+| merge | PR #2312 | fix(opencode): sync plugin metadata counts |
+| triage-later | PR #2311 | feat(skills): add story-lifecycle skill |
+| triage-later | PR #2310 | feat(skills): add project-context skill |
+| triage-later | PR #2309 | feat(skills): add dev-team skill (multi-persona session) |
+| needs-work | PR #2287 | refactor: migrate .kiro.hook files to JSON v1 format |
+| triage-later | PR #2285 | feat(agents): add nuxt-reviewer and /nuxt-review surface |
+| triage-later | PR #2281 | feat: add council-multi-model skill (heterogeneous Codex review) |
+| triage-later | PR #2280 | feat: add AL/Business Central language pack |
+| triage-later | PR #2277 | Add living-docs-governance skill |
+| triage-later | PR #2275 | feat(rules,skills): React Native / Expo rules pack + react-native-patterns skill |
+| merge | PR #2273 | docs(code-tour): document the ref field |
+| needs-work | PR #2270 | fix(omp): harden harness contract |
+| needs-work | PR #2264 | Harden release automation 6097857685862934372 |
+| needs-work | PR #2254 | [codex] add everything codex plugin alias |
+| merge | PR #2246 | docs(commands): generate discoverable <name>/SKILL.md skills not inert flat files |
+| needs-work | PR #2154 | feat: add Kimi Code CLI support |
+| close | PR #2137 | feat: add ULTRA CODE self-evolving operator SOP |
+| needs-work | PR #2136 | Add opt-in AURA trust-check adapter (integrations/aura) |
+| merge | PR #2063 | fix(instinct-cli): pin file reads and stdout to UTF-8 on Windows |
+| merge | issue #2316 | plan-orchestrate: stale ECC install detection after marketplace rename to ecc@ecc |
+| triage-later | issue #2308 | feat: add dev-team, project-context, story-lifecycle community skills |
+| merge | issue #2306 | docs: Scope Decision Guide table duplicated in SKILL.md and observer.md with drift |
+| merge | issue #2305 | chore: unused 'from unittest import mock' in test\_parse\_instinct.py |
+| triage-later | issue #2304 | chore: three naming conventions coexist in continuous-learning-v2 shell scripts |
+| triage-later | issue #2303 | chore: inconsistent shebangs across continuous-learning-v2 shell scripts |
+| merge | issue #2302 | test: add coverage for cmd\_prune, projects delete/gc/merge, \_promote\_specific dry-run,  |
+| merge | issue #2301 | bug: migrate-homunculus.sh pgrep pattern treats $HOME as regex |
+| merge | issue #2300 | bug: SIGALRM handler silently drops in-flight observations in observe.sh |
+| merge | issue #2299 | bug: Python \_update\_registry omits 'id' field present in shell counterpart |
+| merge | issue #2298 | bug: observer.md says 'each instance >= 0.8' but code uses average confidence |
+| security-priority | issue #2297 | bug: \_remove\_project\_storage lacks path containment check |
+| needs-work | issue #2296 | bug: signal counter race condition in observe.sh throttle logic |
+| merge | issue #2295 | fix: replace hardcoded sleep 2 with PID file poll in start-observer.sh |
+| security-priority | issue #2294 | fix: \_write\_registry missing file lock (race with \_update\_registry) |
+| merge | issue #2293-dup | (see PR #2293) |
+| triage-later | issue #2283 | OpenSpec Ecosystem: spec-miner lifecycle extension (5 agents + 3 integrations + CI) |
+| triage-later | issue #2112 | ctx — potential synergy between ECC and ctx |
+| triage-later | issue #2103 | Skill proposal: Before You Build Skill |
+| needs-work | issue #2076 | OpenClaude Compatibility |
+| needs-work | issue #2074 | Frequent 'bun: command not found' Error in OpenCode TUI (Windows) |
+| needs-work | issue #2073 | Do agents/*.md need TOML rewrite for Codex subagent recognition? |
+| triage-later | issue #2069 | Featured ECC in a Medium article — request to add to README and reshare |
+| triage-later | PR #2288 | feat(skills): add mailtrap-email-integration skill |
+
+Triaged all open PRs (30) and issues (24) on affaan-m/ECC. MERGE-READY (clean, correct, mergeable): PR #2320 (maintainer's Layer 4 control-pane 3D viz — top Pro/MRR value), PR #2133 (Claude provider model-ID + adaptive-thinking fix, verified correct against the authoritative Claude API reference — sonnet-4-6/haiku-4-5/opus-4-8, omit temperature, adaptive thinking for Opus 4.7/4.8), PR #2307 + #2063 (Windows fixes), PR #2273/#2246/#2312 (docs/workflow fixes), PR #2293 (dependabot minor/patch). Plus several quick-win issues in continuous-learning-v2 (#2306, #2305, #2302, #2301, #2299, #2298, #2295, #2300) and #2316 (plan-orchestrate stale install detection). SECURITY-PRIORITY: issue #2297 (path traversal — shutil.rmtree without containment check) and issue #2294 (registry write without file lock → corruption) in skills/continuous-learning-v2/scripts/instinct-cli.py. Both should be fixed as a hardening pass. PR #2136 (AURA external trust integration) needs a security review of its third-party dependency. NEEDS-WORK (rebase/scope/review): PR #2274 (gateguard tool-agnostic fix — correct but CONFLICTING), PR #2270 (OMP — +3151/-454, CONFLICTING, scope creep into release automation; split it), PR #2318/#2315/#2154 (large skill/harness drops needing catalog sync + per-item review), PR #2260 (eslint 9→10 major bump — verify before merge), drafts #2264/#2254, plus needs-info issues #2076/#2074/#2073. CLOSE candidates: PR #2313 (empty template, likely conflicts with existing python review), PR #2137 (vague 'ULTRA CODE self-evolving SOP', CONFLICTING, AI-slop). TRIAGE-LATER: the three BMAD-inspired community skills (#2309/#2310/#2311 under tracking issue #2308) and assorted new-skill PRs (#2319, #2314, #2281, #2280, #2277, #2275, #2288, #2285) — all need overlap/dedup review against the existing 200+ skill catalog and manifest sync. Issue #2069 is a marketing reshare request (route to content; note ECC was 'featured', not a winner). Pro/MRR-relevant cluster: control-pane Layer 4 (#2320), harness-neutral expansion (Kimi #2154, Codex alias #2254, OpenClaude/Codex compat #2076/#2073), multi-model orchestration skills (#2281, #2318), and continuous-learning reliability/security (#2294/#2297/#2300).
+
+### affaan-m/agentshield
+
+| Disposition | Ref | Title |
+| --- | --- | --- |
+| merge | PR #103 | fix: treat dangerous flags inside permissions.deny/ask rules as prohibitions, not usages |
+| merge | issue #102 | False positive: permissions.deny rules blocking --no-verify flagged CRITICAL, zeroing Perm |
+| needs-work | issue #100 | False positives: --no-verify in string literals (CRITICAL) and 'backward ...' English flag |
+| triage-later | issue #101 | Proposal: external rule-pack loader (--rule-pack) to load community detection rules |
+| merge | PR #97 | docs: Add FAQ section for common questions |
+| needs-work | PR #96 | chore(deps-dev): bump vitest from 3.2.4 to 4.1.8 |
+| close | issue #99 | bm |
+
+7 open items on affaan-m/agentshield: 3 PRs (#103, #97, #96) and 4 issues (#102, #101, #100, #99). The headline is the false-positive cluster (#100, #102, #99-adjacent) where the scanner flags --no-verify inside permissions.deny rules as CRITICAL and zeros the Permissions score — penalizing its own recommended remediation. PR #103 cleanly fixes the structurally-decidable JSON case (#102) with fail-closed logic, 6 new tests, and all review-bot checks green; recommend MERGE as the top trust/conversion win. #100 covers two remaining FPs (--no-verify in string literals + 'backward' English matched as reversed-text in agents.ts:1561) not addressed by #103 — needs-work follow-up. #101 (external --rule-pack loader, ATR integration) is a high-value ecosystem/Pro proposal, well-scoped, recommend triage-later with intent to accept the PR. #97 (README FAQ) is mergeable docs. #96 (vitest 3→4) has a real test failure (renderTerminalAlert assertion under vitest 4) and needs work before merge. #99 ('bm', empty body) is spam — close. Notable caveat: PR #103's checks are only review bots (CodeRabbit/Greptile/GitGuardian); the Verify/test matrix does not appear to have run, so maintainer should confirm the suite passes locally before merge.
@@ -0,0 +1,327 @@
+# ECC Pro: Hosted Multi-Repo Agent Security Posture Dashboard
+
+> Status: draft design for review. Produced 2026-06-21 by an architecture agent grounded
+> in the existing ecc-agentshield primitives. Proposes the hosted ECC Pro surface; does not
+> implement it. Companion to docs/ECC-PRO-SECURITY-ROADMAP.md (the "next" flagship item).
+
+## 1. Title, Thesis, and Wedge
+
+ECC Pro is a hosted, authenticated, multi-repo "Sentry for agent security" surface built on top of the existing `ecc-agentshield` local CLI primitives. AgentShield already does ~30K npm downloads/month with near-zero monetization. The thesis: the continuous and fleet primitives that make a hosted product valuable already exist as local CLI building blocks (evidence packs with `bundleDigest` integrity, `operatorReadback`/`reviewItems` promotion routing, `fs.watch` drift detection, NDJSON runtime allow/block logging, baseline diffing, and policy promotion gates). The fastest path to MRR is not new science; it is hosting these primitives as authenticated multi-repo and multi-org surfaces and unifying config-scan posture with runtime telemetry over time.
+
+The wedge: Snyk and similar SCA tools are scan-only and have no concept of agent-runtime semantics (no PreToolUse deny decisions, no MCP/hook/agent injection model, no drift-over-time on agent config). Sentry has time-series and alerting but zero security semantics; it does not know what a hardcoded `sk-ant-` key, a `Bash(*)` allow rule, or an `autoApprove` MCP server is. CodeRabbit reviews PR diffs but is point-in-time and has no fleet posture rollup or runtime block-rate trend. ECC Pro is the only surface that charts `score` trend, `drift` history, `blocked-command` rate, and `injection-attempt` rate across a fleet of repos, anchored on a security-specific rule engine (102 rules across secrets/permissions/hooks/mcp/agents) that nobody else has. AgentShield was featured at the Cerebral Valley x Anthropic Claude Code Hackathon (Feb 2026); the hosted surface is the commercial extension of that featured tooling.
+
+## 2. Scope: Free Local-First vs Pro Hosted
+
+The free local-first scanner stays the moat. We never paywall the scanner itself; we monetize hosting, history, and multi-repo aggregation. Local-first capability is also what produces the redacted, integrity-checked artifacts the hosted product ingests, so a strong free tier directly grows the funnel.
+
+Free, zero-account, local-only (unchanged, MIT):
+- `agentshield scan` and all 102 rules, `--format terminal|json|markdown|html|sarif`.
+- `--fix`, `agentshield init`, `--opus` deep analysis (user supplies their own `ANTHROPIC_API_KEY`).
+- `--evidence-pack <dir>`, `evidence-pack verify|inspect|fleet` (local fleet routing stays free).
+- `--baseline`, `--save-baseline`, `agentshield baseline write`, `--gate`.
+- `agentshield runtime install|status|repair`, local `runtime.ndjson` logging.
+- `agentshield policy init|export|promote`, all 6 policy packs (`oss`, `team`, `enterprise`, `regulated`, `high-risk-hooks-mcp`, `ci-enforcement`).
+- Local `agentshield watch` (fs.watch drift, terminal/webhook alerts).
+- GitHub Action `affaan-m/agentshield@v1` (CI scanning, SARIF upload, baseline gate).
+- MiniClaw local server.
+
+Pro, hosted, account-required (the recurring-revenue surface):
+- Persisted history: every scan/baseline/drift/runtime event retained and charted over time (free CLI is point-in-time and stateless on the local box).
+- Multi-repo and org rollup: cross-repo posture, fleet `operatorReadback` aggregation, org-level score trend.
+- Authenticated ingestion endpoints for CI scan results, `runtime.ndjson` streaming, and watch/drift events.
+- Hosted dashboard frontend (posture, drift timeline, blocked-command rate, injection-attempt rate, secret-exposure events).
+- Hosted alerting and routing: turn `reviewItems` into assignable tickets, deliver to Slack/Linear/GitHub via the ecc-tools GitHub App.
+- RBAC, audit log, retention/compliance, SSO (Enterprise).
+- Hosted policy promotion gate: org-level promotion approval workflow on top of `policy promote` `reviewItems`.
+
+The hard line: anything that runs against local files and produces a redacted artifact stays free. Anything that stores, aggregates, charts, or routes across repos/time/people is Pro. We never require an account to find a vulnerability; we require one to track a fleet of them over time.
+
+## 3. Architecture
+
+The hosted backend is a thin, stateless ingestion and query layer over the existing artifact shapes. The CLI/Action/App remain the producers; the backend never re-implements scanning. It receives already-redacted artifacts (the CLI redacts paths/usernames/emails/tokens by default in `createRedactor`/`buildReplacements`) and persists summaries plus time-series rollups.
+
+Component diagram (ASCII):
+
+```
+  PRODUCERS (free, local-first, already redacted)
+  +-----------------------+   +------------------------+   +-------------------------+
+  | GitHub Action          |   | agentshield watch       |   | runtime PreToolUse hook |
+  | (CI scan + evidence    |   | (fs.watch, diffBaseline,|   | (evaluateToolCall ->    |
+  |  pack, SARIF, baseline)|   |  DriftResult, webhook)  |   |  runtime.ndjson)        |
+  +-----------+-----------+   +-----------+------------+   +-----------+-------------+
+              |                            |                            |
+              | POST evidence-pack         | POST drift event           | POST/stream ndjson batch
+              | summary + manifest digest  | (DriftResult)              | (RuntimeLogEntry[])
+              v                            v                            v
+  +-----------------------------------------------------------------------------------+
+  | INGESTION GATEWAY (stateless, authenticated)                                       |
+  |  - API token auth + org/repo identity resolution                                   |
+  |  - schema validation (Zod, reuse SecurityReport / DriftResult / RuntimeLogEntry)   |
+  |  - bundleDigest re-verification, idempotency on digest                             |
+  |  - reject-if-not-redacted guard (manifest.redacted must be true for hosted)        |
+  +-----------------------------------+-----------------------------------------------+
+                                      |
+                 +--------------------+--------------------+
+                 v                                         v
+  +-----------------------------+              +-------------------------------+
+  | PRIMARY STORE (Postgres)     |              | TIME-SERIES ROLLUP STORE       |
+  |  org, repo, scan, baseline,  |              |  score_trend, drift_history,   |
+  |  finding, runtime_event,     |  rollup job  |  blocked_cmd_rate,             |
+  |  drift_event, policy_eval,   |------------->|  injection_rate, secret_events |
+  |  evidence_pack, review_item  |              |  (Postgres time buckets or     |
+  +--------------+--------------+              |   ClickHouse for high-volume    |
+                 |                              |   runtime ndjson)               |
+                 |                              +---------------+----------------+
+                 |                                              |
+                 v                                              v
+  +-----------------------------------------------------------------------------------+
+  | QUERY API (authenticated, RBAC-filtered, multi-tenant isolated by org_id)          |
+  +-----------------------------------+-----------------------------------------------+
+                                      |
+                                      v
+  +-----------------------------+        +-------------------------------------------+
+  | DASHBOARD FRONTEND (Next.js) |        | ROUTING/ALERTS (ecc-tools GitHub App,      |
+  |  posture, trends, drift, fleet|        |  Slack/Linear) from reviewItems + tickets  |
+  +-----------------------------+        +-------------------------------------------+
+```
+
+Ingestion sources and their existing producers:
+- CI scan results: GitHub Action already emits the full `SecurityReport` JSON, SARIF, and an evidence pack with `manifest.json` (`bundleDigest`, per-artifact `sha256`/`bytes`) plus `ci-context.json` (`EvidencePackGitHubContext`: `repository`, `sha`, `runId`, `workflow`, `ref`, `actor`). The Action gets a new optional input `ecc-pro-ingest-url` + token; on success it POSTs the inspected pack summary (`EvidencePackInspectionResult`) and the manifest digest.
+- Runtime telemetry: the PreToolUse hook (`evaluateToolCall` -> `logEvalResult`) writes `RuntimeLogEntry` lines to `.agentshield/runtime.ndjson`. A small `agentshield runtime ship` command (Pro) tails and batch-POSTs new NDJSON lines.
+- Watch/drift events: `startWatcher` already computes `DriftResult` and calls `dispatchAlert`. We add a `webhook` alert target that points at the hosted ingest endpoint; the existing `formatWebhookPayload` carries `newFindings`, `resolvedFindings`, `scoreDelta`, `isRegression`, `hasCritical`.
+
+Storage choice: Postgres (Supabase) for the relational entities and most rollups; ClickHouse only if runtime NDJSON volume per org makes per-row retention in Postgres uneconomical (runtime events are append-only and high-cardinality, which is the ClickHouse sweet spot). Default MVP is Postgres-only.
+
+## 4. API Contract
+
+All endpoints are authenticated with an org-scoped API token (header `Authorization: Bearer eccp_...`). Request/response shapes reuse the real field names from the CLI so the producers do not need a translation layer. Ingestion is idempotent keyed on `bundleDigest` (scans) or `(repo_id, timestamp, tool, decision)` hash (runtime).
+
+### 4.1 Ingest a scan / evidence pack summary
+
+`POST /v1/ingest/scan`
+
+The body is the existing `EvidencePackInspectionResult` plus the `ci-context` summary. The backend never asks for raw evidence; it consumes the already-computed inspection summary so it can re-derive the same rollups the local `evidence-pack inspect` produces.
+
+Request:
+```json
+{
+  "repository": "acme/agent-platform",
+  "bundleDigest": "sha256:9f2c...e1",
+  "expectedBundleDigest": "sha256:9f2c...e1",
+  "generatedAt": "2026-06-21T17:42:00.000Z",
+  "redacted": true,
+  "report": {
+    "score": { "grade": "C", "numericScore": 66 },
+    "findings": { "total": 29, "critical": 1, "high": 7, "medium": 8, "low": 10, "info": 3 },
+    "runtimeConfidence": { "active-runtime": 11, "template-example": 14, "project-local-optional": 4 }
+  },
+  "policy": { "status": "failed", "policyPack": "enterprise", "violations": 3 },
+  "baseline": { "status": "regressed", "newFindings": 4, "resolvedFindings": 1, "scoreDelta": -8 },
+  "supplyChain": { "totalPackages": 22, "riskyPackages": 2, "criticalCount": 0, "highCount": 1 },
+  "ciContext": {
+    "provider": "github-actions",
+    "repository": "acme/agent-platform",
+    "workflow": "security.yml",
+    "runId": "1182334455",
+    "sha": "4c1d9ab"
+  },
+  "remediation": { "totalFindings": 29, "autoFixable": 2, "manualReview": 7 }
+}
+```
+
+Response:
+```json
+{
+  "ok": true,
+  "scanId": "scan_01J...",
+  "repoId": "repo_01H...",
+  "ingestedAt": "2026-06-21T17:42:03.114Z",
+  "deduped": false,
+  "rollupsUpdated": ["score_trend", "drift_history", "secret_exposure_events"]
+}
+```
+
+Server-side guards: reject with `422` if `redacted !== true` (hosted tenants must never store unredacted bundles), and reject with `409 deduped` echo if `bundleDigest` already ingested for that repo. If `expectedBundleDigest` is present and differs from `bundleDigest`, mark `integrity: "mismatch"` on the stored scan.
+
+### 4.2 Ingest runtime telemetry batch
+
+`POST /v1/ingest/runtime`
+
+Body is an array of the existing `RuntimeLogEntry` shape from `src/runtime/types.ts`.
+
+Request:
+```json
+{
+  "repository": "acme/agent-platform",
+  "sessionId": "sess_4f8a",
+  "entries": [
+    { "timestamp": "2026-06-21T17:50:01.002Z", "tool": "Bash", "decision": "block", "reason": "Input matches denied pattern \"rm -rf\"", "durationMs": 2 },
+    { "timestamp": "2026-06-21T17:50:02.114Z", "tool": "Read", "decision": "allow", "durationMs": 1 }
+  ]
+}
+```
+
+Response:
+```json
+{ "ok": true, "accepted": 2, "blocked": 1, "allowed": 1, "rollupsUpdated": ["blocked_command_rate"] }
+```
+
+Note: `RuntimeLogEntry` already carries no raw input payload (only `tool`, `decision`, `reason`, `durationMs`), so runtime ingestion is safe-by-construction. We keep it that way; the hosted API must not add a raw-input field.
+
+### 4.3 Ingest a drift event
+
+`POST /v1/ingest/drift`
+
+Body is the existing `DriftResult` from `src/watch/types.ts` (already what `formatWebhookPayload` emits).
+
+Request:
+```json
+{
+  "repository": "acme/agent-platform",
+  "timestamp": "2026-06-21T18:01:10.000Z",
+  "newFindings": [ { "id": "secrets-hardcoded-anthropic", "severity": "critical", "category": "secrets", "title": "Hardcoded Anthropic API key", "file": "<target-path>/CLAUDE.md" } ],
+  "resolvedFindings": [],
+  "scoreDelta": -25,
+  "previousScore": 66,
+  "currentScore": 41,
+  "isRegression": true,
+  "hasCritical": true
+}
+```
+
+Response:
+```json
+{ "ok": true, "driftEventId": "drift_01J...", "alertRouted": true }
+```
+
+### 4.4 Query: org fleet rollup
+
+`GET /v1/org/{orgId}/fleet`
+
+Response reuses the `EvidencePackFleetInspectionResult` `operatorReadback` shape so the dashboard and the existing `evidence-pack fleet` consumers share one contract:
+```json
+{
+  "ok": false,
+  "requiresAttention": true,
+  "summary": { "totalPacks": 12, "verifiedPacks": 11, "invalidPacks": 1, "critical": 2, "high": 9, "policyFailures": 3, "baselineRegressions": 2, "riskyPackages": 5 },
+  "operatorReadback": {
+    "status": "blocked",
+    "ready": false,
+    "requiresApproval": true,
+    "digest": "sha256:aa17...",
+    "reviewItemCount": 5,
+    "blockingItemCount": 2,
+    "ownerCount": 3,
+    "owners": ["acme/agent-platform security owner"],
+    "routesRequiringApproval": ["policy-review", "security-blocker"],
+    "approvalIds": ["agsr_2b1c8f0d9e7a4c11"],
+    "nextAction": "Route review items to listed owners and attach approval before promotion."
+  }
+}
+```
+
+### 4.5 Query: per-repo posture and trend
+
+`GET /v1/repo/{repoId}/posture?from=...&to=...&bucket=day`
+Returns `score_trend`, latest `EvidencePackInspectionResult`, latest `DriftResult`, and runtime rollups for the window.
+
+### 4.6 Query: review items (routing)
+
+`GET /v1/repo/{repoId}/review-items`
+Returns the existing `EvidencePackFleetReviewItem[]` (route, severity, priority, `approvalId`, `owner`, `evidencePaths`, `beforeState`, `afterState`, `reversibleAction`, `actions`, `recommendation`, and the Linear-friendly `ticket.externalId`). These map one-to-one to assignable hosted tickets; no new schema needed.
+
+## 5. Data Model
+
+Persisted relational entities (Postgres). All carry `org_id` for tenant isolation; all timestamps are ISO-8601 UTC.
+
+- `org`: `id`, `name`, `github_org_login`, `plan` (`team` | `enterprise`), `created_at`, `sso_enabled`.
+- `repo`: `id`, `org_id`, `full_name` (e.g. `acme/agent-platform`), `github_repo_id` (from `EvidencePackGitHubContext.repositoryId`), `default_provider` (`github-actions` | `local`), `created_at`.
+- `scan`: `id`, `repo_id`, `bundle_digest` (unique per repo, idempotency key), `generated_at`, `redacted`, `grade`, `numeric_score`, `score_breakdown` (jsonb: secrets/permissions/hooks/mcp/agents), `total_findings`, `critical/high/medium/low/info`, `provider`, `ci_sha`, `ci_run_id`, `ci_workflow`, `integrity` (`ok` | `mismatch`).
+- `finding`: `id`, `scan_id`, `finding_key` (the `Finding.id`, e.g. `mcp-risky-filesystem`), `severity`, `category` (`FindingCategory`), `title`, `file` (already redacted to `<target-path>` form), `runtime_confidence` (`RuntimeConfidence`), `fingerprint` (reuse `fingerprintFinding` so the same finding across scans collapses to one timeline). Never store `evidence` raw for hosted; store only the redacted `file` and `title`.
+- `baseline`: `id`, `repo_id`, `baseline_timestamp`, `numeric_score`, `finding_count`, `source_scan_id`. Mirrors `SerializedBaseline` (`version`, `timestamp`, `score`, `findings` with `fingerprint`).
+- `baseline_comparison`: `id`, `repo_id`, `scan_id`, `is_regression`, `new_findings_count`, `resolved_findings_count`, `unchanged_count`, `score_delta`, `new_critical_count`, `new_high_count` (the `BaselineComparison` shape).
+- `runtime_event`: `id`, `repo_id`, `session_id`, `timestamp`, `tool`, `decision` (`allow` | `block`), `reason`, `duration_ms` (the `RuntimeLogEntry` shape; high-volume, candidate for ClickHouse).
+- `drift_event`: `id`, `repo_id`, `timestamp`, `score_delta`, `previous_score`, `current_score`, `is_regression`, `has_critical`, `new_findings` (jsonb summary), `resolved_findings` (jsonb summary) (the `DriftResult` shape).
+- `policy_eval`: `id`, `scan_id`, `policy_name`, `policy_pack` (`PolicyPack`), `passed`, `violation_count`, `score`, `min_score`, `exception_summary` (jsonb: `total`/`active`/`expiringSoon`/`expired` from `PolicyExceptionSummary`). Mirrors `PolicyEvaluation`.
+- `evidence_pack`: `id`, `scan_id`, `bundle_digest`, `expected_bundle_digest`, `artifact_count`, `verified_artifact_count`, `redacted`, `generated_at`. Mirrors `EvidencePackInspectionResult`.
+- `review_item`: `id`, `repo_id`, `approval_id` (the `agsr_...` id), `route` (`EvidencePackFleetRoute`), `severity`, `priority`, `owner`, `recommendation`, `ticket_external_id`, `status` (`open` | `approved` | `dismissed`), `assignee`. Mirrors `EvidencePackFleetReviewItem`.
+
+Time-series rollups to chart (materialized from the entities above, bucketed by hour/day/week):
+- `score_trend`: per repo and org-aggregate `numeric_score` and `grade` over time (from `scan.numeric_score`). The headline chart.
+- `drift_history`: count and severity of `drift_event` regressions over time, with `score_delta` band. Answers "is this repo's agent posture decaying?".
+- `blocked_command_rate`: `runtime_event` where `decision = block` over total, per tool, over time. The "Sentry-style" live signal nobody else has.
+- `injection_attempt_rate`: count of blocked runtime events whose `reason` matches injection deny patterns, plus scan findings with `category = injection`, over time.
+- `secret_exposure_events`: timeline of `finding` rows with `category = secrets` and `severity = critical` (e.g. `secrets-hardcoded-*`), de-duplicated by `fingerprint`, so a recurring committed key shows as one persistent event until resolved.
+- `cross_repo_org_rollup`: org-level fold of `score_trend`, open `review_item` count by `route`, `policyFailures`, and `baselineRegressions` (the `EvidencePackFleetSummary` fields), feeding the `operatorReadback.status` badge at org scope.
+
+## 6. Auth Model
+
+Identity and tenancy:
+- Org is the top-level tenant, anchored to a GitHub org login (the ecc-tools GitHub App install scope is the natural onboarding boundary). `repo` rows are children of exactly one `org`; `github_repo_id` from `EvidencePackGitHubContext.repositoryId` is the stable external key.
+- Multi-tenant isolation: every row carries `org_id`. On Supabase Postgres, enforce Row Level Security so every query is filtered by the caller's `org_id`; the query API never accepts a client-supplied `org_id` that is not in the caller's token claims. No cross-org joins exist in any query path.
+
+API tokens:
+- Org-scoped ingestion tokens (`eccp_...`) are minted per org and optionally per repo. Tokens are hashed at rest (store only a SHA-256 of the token, never the token), shown once on creation. CI uses a repo-scoped token in GitHub Actions secrets; runtime/watch shippers use the same.
+- Tokens have a `scope` (`ingest:scan`, `ingest:runtime`, `ingest:drift`, `read`) so a CI token cannot read the dashboard API and a read token cannot write.
+
+RBAC tiers (per org):
+- `owner`: billing, SSO config, token management, member management, policy promotion approval.
+- `admin`: token management, review-item assignment, alert routing config.
+- `member`: view all posture, assign review items to self, comment.
+- `viewer`: read-only posture and trends (auditor / buyer-review persona).
+
+Prohibited handling (hard requirements, enforced server-side):
+- Never store raw secrets. The CLI already redacts paths, usernames, emails, and token-shaped strings by default via `createRedactor`/`buildReplacements` (covers `sk-`, `gh*_`, `github_pat_`, `glpat-`, `npm_`, `AKIA`, JWT `eyJ...`, Slack tokens, emails, etc.). The ingestion gateway must reject any scan payload where `manifest.redacted` / `redacted` is not `true`. Preserve redaction end-to-end; the hosted store only ever holds the `<redacted-token>` / `<target-path>` / `<home>` / `<user>` forms.
+- `runtime_event` ingestion accepts only the `RuntimeLogEntry` fields (`tool`, `decision`, `reason`, `durationMs`); it must not accept raw tool `input`. The local `ToolCall.input` stays local.
+- Remediation plans and baselines already omit raw evidence and before/after token-shaped strings; preserve that omission in the hosted projection. Findings stored hosted carry redacted `file` + `title` + `fingerprint` only, never raw `evidence`.
+- Audit log: every token mint/revoke, review-item state change, and policy promotion approval is appended to an immutable per-org audit trail (defense-in-depth, least-privilege, secure-by-default).
+
+## 7. MVP vs v2 vs v3 (Build Order)
+
+MVP (smallest shippable Pro v1) -- "history + multi-repo posture for CI scans":
+1. Org/repo model, GitHub App (ecc-tools) install -> org/repo provisioning, org-scoped ingest tokens with RLS isolation.
+2. `POST /v1/ingest/scan` consuming `EvidencePackInspectionResult` + `ci-context`; persist `scan`, `finding`, `evidence_pack`, `policy_eval`, `baseline_comparison`; idempotent on `bundleDigest`; reject-if-not-redacted guard.
+3. GitHub Action gets `ecc-pro-ingest-url` + token inputs; on scan it POSTs the inspected summary.
+4. Dashboard v1: `score_trend` chart, per-repo finding table (severity + `runtimeConfidence` filter), org fleet table reusing `operatorReadback.status`.
+5. Stripe billing, Team plan ($19/seat/mo per the existing ecc-tools Pro listing), per-org token quota.
+
+This is shippable because it only stitches existing artifacts to storage + a chart. No new scanning logic.
+
+v2 -- "runtime telemetry + drift over time + routing":
+6. `POST /v1/ingest/runtime` + `agentshield runtime ship` shipper; `runtime_event` store; `blocked_command_rate` and `injection_attempt_rate` charts.
+7. `POST /v1/ingest/drift` + `watch` webhook target -> hosted; `drift_history` chart; `secret_exposure_events` timeline.
+8. `review_item` ingestion + assignable tickets, alert routing to Slack/Linear/GitHub via ecc-tools App, reusing `approvalId` and `ticket.externalId` for dedupe.
+
+v3 -- "Enterprise governance":
+9. Hosted policy promotion gate: org approval workflow on top of `policy promote` `reviewItems`; required approvals before `operatorReadback.ready`.
+10. SSO/SAML, custom retention, audit-log export, per-org data residency; ClickHouse migration for runtime events if volume warrants.
+
+## 8. Pricing and Packaging Hooks
+
+- Team ($19/seat/mo, matches the current ecc-tools Pro listing): per-seat billing; included repo cap (e.g. 25 repos); 90-day history retention; scan + drift ingestion; Slack/GitHub routing; standard RBAC (owner/admin/member/viewer).
+- Enterprise (per-repo or platform-fee, sales-assisted): metered by `repo` count rather than seats because security platform value scales with fleet size, not headcount; unlimited seats; SSO/SAML; unlimited retention + audit-log export; hosted policy promotion approval gate; `regulated`/`enterprise` policy packs with required-approval enforcement; data residency.
+
+Gating levers (what flips Team -> Enterprise): runtime telemetry retention window, number of repos under management, SSO requirement, policy-promotion approval workflow, audit-log export, and `routesRequiringApproval` enforcement (Enterprise can require that `operatorReadback.requiresApproval` blocks promotion; Team only surfaces it). Per-seat captures small teams; per-repo captures the platform-team buyer whose value is fleet breadth.
+
+## 9. Risks and Open Questions
+
+Risks:
+- Cannibalization: a too-generous hosted free tier could erode the local moat, or a too-aggressive paywall could stall the 30K/mo funnel. Mitigation: never paywall detection; only paywall persistence/aggregation/routing.
+- Redaction trust boundary: the hosted product's entire safety story depends on the CLI redactor being complete. A new token format the regex set misses would be ingested unredacted. Mitigation: reject-if-not-redacted is necessary but not sufficient; add a server-side secondary redaction pass over inbound `title`/`file`/`reason` strings as defense-in-depth, and keep `buildReplacements` patterns under test.
+- Runtime volume economics: `runtime.ndjson` can be high-cardinality per active agent; Postgres retention could get expensive. Mitigation: pre-aggregate to `blocked_command_rate` rollups on ingest and retain raw `runtime_event` only for the plan's window (ClickHouse for Enterprise).
+- Idempotency edge: `bundleDigest` excludes `manifest.json` and `README.md` (`BUNDLE_DIGEST_EXCLUDED_FILES`), so two scans with identical findings but different `generatedAt` produce the same digest. That is correct for dedupe but means we must key the time-series on `generatedAt`/`ci_run_id`, not on digest alone.
+
+Open questions:
+- Should drift/runtime ingestion from purely local `watch`/runtime (no CI, `provider: "local"`) be allowed for Pro, given there is no GitHub-verifiable repo identity? Proposal: allow it but tag `provider: local` and require a repo-scoped token bound at mint time to a `full_name`.
+- Do we attribute runtime events to a GitHub identity (`ci-context.actor`) for per-developer block-rate, or keep them repo-anonymous for privacy? Leaning repo-anonymous by default with opt-in actor attribution.
+- Is org identity strictly GitHub-org-bound, or do we need a GitHub-independent org for GitLab/local-only users in v2? MVP is GitHub-org-bound via the ecc-tools App.
+- For the `injection_attempt_rate` chart, do we trust runtime `reason` string matching, or do we need a structured `matchedRule` field shipped from `EvalResult` (which has `matchedRule`) instead of only `RuntimeLogEntry` (which drops it)? Proposal: extend the runtime shipper to include `matchedRule` so injection attribution is structured, not string-parsed.
+
+Relevant grounding files (all absolute):
+- `/Users/affoon/GitHub/ECC/agentshield/src/evidence-pack/index.ts` (`EvidencePackInspectionResult`, `EvidencePackFleetOperatorReadback`, `EvidencePackFleetReviewItem`, `bundleDigest`, `BUNDLE_DIGEST_EXCLUDED_FILES`, `createRedactor`, `buildReplacements`)
+- `/Users/affoon/GitHub/ECC/agentshield/src/runtime/types.ts` (`RuntimeLogEntry`, `EvalResult`, `RuntimePolicy`) and `/Users/affoon/GitHub/ECC/agentshield/src/runtime/evaluator.ts` (`evaluateToolCall`, `logEvalResult`)
+- `/Users/affoon/GitHub/ECC/agentshield/src/watch/types.ts` (`DriftResult`, `WatchConfig`) and `/Users/affoon/GitHub/ECC/agentshield/src/watch/index.ts` (`formatWebhookPayload`, `dispatchAlert`)
+- `/Users/affoon/GitHub/ECC/agentshield/src/baseline/types.ts` (`SerializedBaseline`, `SerializedFinding`, `BaselineComparison`) and `/Users/affoon/GitHub/ECC/agentshield/src/baseline/index.ts` (`fingerprintFinding`)
+- `/Users/affoon/GitHub/ECC/agentshield/src/policy/types.ts` (`PolicyEvaluation`, `PolicyPack`, `PolicyExceptionSummary`)
+- `/Users/affoon/GitHub/ECC/agentshield/src/types.ts` (`Finding`, `RuntimeConfidence`, `FindingCategory`, `SecurityReport`, `SecurityScore`)
+- `/Users/affoon/GitHub/ECC/agentshield/README.md` (GitHub Action inputs/outputs, ecc-tools GitHub App, `ecc-agentshield` npm, ECC Tools Pro $19/seat/mo)
@@ -27,7 +27,6 @@ import ipaddress
 import socket
 import urllib.parse
 import urllib.request
-from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime, timedelta, timezone
 from collections import defaultdict
@@ -395,36 +394,22 @@ def detect_project() -> dict:
    }


-@contextmanager
-def _registry_lock():
-    """Serialize registry read-modify-write across concurrent sessions.
-
-    Acquires the same advisory lock for every registry writer (``_update_registry``
-    and ``_write_registry``) so ``projects delete/gc/merge`` cannot interleave with
-    a concurrent observe-time update and corrupt ``projects.json``. No-op on
-    platforms without ``fcntl`` (Windows).
-    """
-    REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
-    lock_path = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.lock"
-    lock_fd = None
-    try:
-        if _HAS_FCNTL:
-            lock_fd = open(lock_path, "w")
-            fcntl.flock(lock_fd, fcntl.LOCK_EX)
-        yield
-    finally:
-        if lock_fd is not None:
-            fcntl.flock(lock_fd, fcntl.LOCK_UN)
-            lock_fd.close()
-
-
 def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None:
    """Update the projects.json registry.

    Uses file locking (where available) to prevent concurrent sessions from
    overwriting each other's updates.
    """
-    with _registry_lock():
+    REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
+    lock_path = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.lock"
+    lock_fd = None
+
+    try:
+        # Acquire advisory lock to serialize read-modify-write
+        if _HAS_FCNTL:
+            lock_fd = open(lock_path, "w")
+            fcntl.flock(lock_fd, fcntl.LOCK_EX)
+
        try:
            with open(REGISTRY_FILE, encoding="utf-8") as f:
                registry = json.load(f)
@@ -444,6 +429,10 @@ def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None:
            f.flush()
            os.fsync(f.fileno())
        os.replace(tmp_file, REGISTRY_FILE)
+    finally:
+        if lock_fd is not None:
+            fcntl.flock(lock_fd, fcntl.LOCK_UN)
+            lock_fd.close()


 def load_registry() -> dict:
@@ -456,19 +445,15 @@ def load_registry() -> dict:


 def _write_registry(registry: dict) -> None:
-    """Write the project registry atomically.
-
-    Holds the same advisory lock as ``_update_registry`` so concurrent
-    ``projects delete/gc/merge`` and observe-time updates cannot corrupt the file.
-    """
-    with _registry_lock():
-        tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}"
-        with open(tmp_file, "w", encoding="utf-8") as f:
-            json.dump(registry, f, indent=2)
-            f.write("\n")
-            f.flush()
-            os.fsync(f.fileno())
-        os.replace(tmp_file, REGISTRY_FILE)
+    """Write the project registry atomically."""
+    REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
+    tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}"
+    with open(tmp_file, "w", encoding="utf-8") as f:
+        json.dump(registry, f, indent=2)
+        f.write("\n")
+        f.flush()
+        os.fsync(f.fileno())
+    os.replace(tmp_file, REGISTRY_FILE)


 def _validate_project_id(project_id: str) -> bool:
@@ -588,14 +573,7 @@ def _project_counts(project_id: str) -> dict:


 def _remove_project_storage(project_id: str) -> None:
-    # Defense-in-depth: resolve and confirm the target is contained within
-    # PROJECTS_DIR before recursively deleting, even though callers validate the
-    # project id. A relaxed validator or a future caller must never be able to
-    # turn this into an arbitrary-directory delete.
-    projects_root = PROJECTS_DIR.resolve()
-    project_dir = (PROJECTS_DIR / project_id).resolve()
-    if project_dir == projects_root or projects_root not in project_dir.parents:
-        raise ValueError(f"refusing to remove {project_dir}: escapes {projects_root}")
+    project_dir = PROJECTS_DIR / project_id
    if project_dir.exists():
        shutil.rmtree(project_dir)

@@ -46,8 +46,6 @@ load_registry = _mod.load_registry
 _validate_instinct_id = _mod._validate_instinct_id
 _validate_import_url = _mod._validate_import_url
 _update_registry = _mod._update_registry
-_write_registry = _mod._write_registry
-_remove_project_storage = _mod._remove_project_storage
 _confidence_bar = _mod._confidence_bar


@@ -1045,41 +1043,3 @@ def test_update_registry_atomic_replaces_file(patch_globals):
    assert "abc123" in data
    leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
    assert leftovers == []
-
-
-def test_write_registry_atomic_no_tmp_leftovers(patch_globals):
-    # Issue #2294: _write_registry now holds the registry lock like
-    # _update_registry. It must still write atomically with no stray tmp files.
-    tree = patch_globals
-    _write_registry({"keep": {"name": "demo", "root": "/repo", "remote": ""}})
-    data = json.loads(tree["registry_file"].read_text())
-    assert data == {"keep": {"name": "demo", "root": "/repo", "remote": ""}}
-    leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
-    assert leftovers == []
-
-
-def test_remove_project_storage_deletes_contained_dir(patch_globals):
-    tree = patch_globals
-    target = tree["projects_dir"] / "proj-1"
-    (target / "instincts").mkdir(parents=True)
-    (target / "instincts" / "x.md").write_text("hi", encoding="utf-8")
-    _remove_project_storage("proj-1")
-    assert not target.exists()
-
-
-def test_remove_project_storage_missing_dir_is_noop(patch_globals):
-    # No raise when the contained dir simply does not exist.
-    _remove_project_storage("never-created")
-
-
-def test_remove_project_storage_blocks_traversal(patch_globals):
-    # Issue #2297: defense-in-depth — a traversal id must be refused even when a
-    # caller skips _validate_project_id, so this can never delete outside
-    # PROJECTS_DIR.
-    with pytest.raises(ValueError):
-        _remove_project_storage("../../etc")
-
-
-def test_remove_project_storage_blocks_root_itself(patch_globals):
-    with pytest.raises(ValueError):
-        _remove_project_storage(".")