mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-06-23 00:21:27 +08:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a26a0d57f5 | |||
| f698e3edfd |
@@ -0,0 +1,189 @@
|
||||
export const meta = {
|
||||
name: 'ecc-pro-security-roadmap',
|
||||
description: 'Survey + web-research + triage both ECC and AgentShield, then synthesize a prioritized ECC Pro security roadmap',
|
||||
whenToUse: 'Quarterly product/security planning for ECC Pro and AgentShield',
|
||||
phases: [
|
||||
{ title: 'Survey', detail: 'map current AgentShield + ECC Pro capability, triage open PRs/issues on both repos' },
|
||||
{ title: 'Research', detail: 'recent agentic-security CVEs, competitor gaps, unbuilt ideas, Sentry/code-review feature demand' },
|
||||
{ title: 'Synthesize', detail: 'merge everything into a prioritized, MRR-biased roadmap' }
|
||||
]
|
||||
};
|
||||
|
||||
// ----- shared schemas -----
|
||||
const TRIAGE_SCHEMA = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
repo: { type: 'string' },
|
||||
items: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
ref: { type: 'string', description: 'e.g. "PR #103" or "issue #102"' },
|
||||
title: { type: 'string' },
|
||||
category: { type: 'string', enum: ['merge', 'close', 'needs-work', 'triage-later', 'security-priority'] },
|
||||
rationale: { type: 'string' },
|
||||
proValue: { type: 'string', description: 'how this maps to ECC Pro / MRR, or "none"' }
|
||||
},
|
||||
required: ['ref', 'title', 'category', 'rationale', 'proValue']
|
||||
}
|
||||
},
|
||||
summary: { type: 'string' }
|
||||
},
|
||||
required: ['repo', 'items', 'summary']
|
||||
};
|
||||
|
||||
const CAPABILITY_SCHEMA = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
area: { type: 'string' },
|
||||
haveToday: { type: 'array', items: { type: 'string' } },
|
||||
gaps: { type: 'array', items: { type: 'string' } },
|
||||
proLeverage: { type: 'array', items: { type: 'string' }, description: 'what could plausibly be paid/Pro-tier' },
|
||||
summary: { type: 'string' }
|
||||
},
|
||||
required: ['area', 'haveToday', 'gaps', 'proLeverage', 'summary']
|
||||
};
|
||||
|
||||
const RESEARCH_SCHEMA = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
topic: { type: 'string' },
|
||||
findings: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
title: { type: 'string' },
|
||||
detail: { type: 'string' },
|
||||
source: { type: 'string', description: 'URL, CVE id, or product name' },
|
||||
gapVsUs: { type: 'string', enum: ['we-have-it', 'partial', 'missing'] },
|
||||
relevanceToAgentShield: { type: 'string' },
|
||||
proOpportunity: { type: 'string', description: 'how this could become ECC Pro / paid value' }
|
||||
},
|
||||
required: ['title', 'detail', 'source', 'gapVsUs', 'proOpportunity']
|
||||
}
|
||||
},
|
||||
summary: { type: 'string' }
|
||||
},
|
||||
required: ['topic', 'findings', 'summary']
|
||||
};
|
||||
|
||||
const ROADMAP_SCHEMA = {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
themes: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
properties: { name: { type: 'string' }, rationale: { type: 'string' } },
|
||||
required: ['name', 'rationale']
|
||||
}
|
||||
},
|
||||
items: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
additionalProperties: false,
|
||||
properties: {
|
||||
title: { type: 'string' },
|
||||
area: { type: 'string', enum: ['agentshield', 'ecc-pro', 'ecc-core', 'both'] },
|
||||
horizon: { type: 'string', enum: ['now', 'next', 'later'] },
|
||||
effort: { type: 'string', enum: ['S', 'M', 'L', 'XL'] },
|
||||
impact: { type: 'string', enum: ['low', 'medium', 'high', 'flagship'] },
|
||||
mrrAngle: { type: 'string' },
|
||||
description: { type: 'string' },
|
||||
linkedItems: { type: 'array', items: { type: 'string' } }
|
||||
},
|
||||
required: ['title', 'area', 'horizon', 'effort', 'impact', 'mrrAngle', 'description', 'linkedItems']
|
||||
}
|
||||
},
|
||||
top5Now: { type: 'array', items: { type: 'string' } },
|
||||
summary: { type: 'string' }
|
||||
},
|
||||
required: ['themes', 'items', 'top5Now', 'summary']
|
||||
};
|
||||
|
||||
const GUARDRAILS = [
|
||||
'CONSTRAINTS: research/triage only. Do NOT modify any code, do NOT open/close/merge PRs, do NOT post comments,',
|
||||
'do NOT send any external message. Return findings as data only.',
|
||||
'Brand it "ECC" (never "everything claude code"). AgentShield was FEATURED at a hackathon, never say it "won".',
|
||||
'AgentShield npm package is "ecc-agentshield". Local clone: ~/GitHub/ECC/agentshield. ECC repo: affaan-m/ECC. AgentShield repo: affaan-m/agentshield.',
|
||||
'You have Bash (gh CLI), Read, Grep, Glob, and web tools (load via ToolSearch: WebSearch / firecrawl / exa).'
|
||||
].join(' ');
|
||||
|
||||
phase('Survey');
|
||||
|
||||
const surveyThunks = [
|
||||
() =>
|
||||
agent(
|
||||
`${GUARDRAILS}\n\nSURVEY AgentShield's CURRENT detection capability. Read ~/GitHub/ECC/agentshield: src/rules (built-in detectors), src/* area dirs (taint, injection, supply-chain, runtime, threat-intel, sandbox, policy, remediation, evidence-pack, harness-adapters), README.md, CHANGELOG.md, WORKING-CONTEXT.md. Produce an honest capability map: what classes of agentic-security risk it detects TODAY, where the gaps are, and which capabilities could plausibly be a paid/Pro tier (e.g. continuous monitoring, fleet dashboards, hosted scanning, evidence packs, org policy). area="agentshield-capability".`,
|
||||
{ label: 'survey:agentshield-capability', phase: 'Survey', agentType: 'general-purpose', schema: CAPABILITY_SCHEMA }
|
||||
),
|
||||
() =>
|
||||
agent(
|
||||
`${GUARDRAILS}\n\nSURVEY the CURRENT state of ECC Pro / paid surface. Read in ~/GitHub/ECC/everything-claude-code: scripts/lib/control-pane/* (control pane, proximity, viz), scripts/lib/agent-proximity/*, docs/design/agent-proximity.md, README.md, any pricing/Pro/Enterprise mentions. Determine: what is free vs what is positioned as Pro/Enterprise today, what monetizable surfaces exist (control pane, 3D agent-airspace observability, shared knowledge, JIT team workflows, kanban), and where the paid value story is thin. area="ecc-pro-surface".`,
|
||||
{ label: 'survey:ecc-pro-surface', phase: 'Survey', agentType: 'general-purpose', schema: CAPABILITY_SCHEMA }
|
||||
),
|
||||
() =>
|
||||
agent(
|
||||
`${GUARDRAILS}\n\nTRIAGE every OPEN PR and ISSUE on the ECC repo (affaan-m/ECC). Use gh: \`gh pr list --repo affaan-m/ECC --state open --limit 80 --json number,title,author,isDraft\` and \`gh issue list --repo affaan-m/ECC --state open --limit 80 --json number,title,labels\`. For the higher-signal ones, peek at the diff/body (\`gh pr view <n> --repo affaan-m/ECC\`). Categorize each: merge / close / needs-work / triage-later / security-priority, with a one-line rationale and any Pro/MRR value. Prioritize identifying security-relevant and Pro-relevant items. repo="affaan-m/ECC".`,
|
||||
{ label: 'triage:ecc', phase: 'Survey', agentType: 'general-purpose', schema: TRIAGE_SCHEMA }
|
||||
),
|
||||
() =>
|
||||
agent(
|
||||
`${GUARDRAILS}\n\nTRIAGE every OPEN PR and ISSUE on the AgentShield repo (affaan-m/agentshield). Use gh similarly. Pay special attention to the false-positive cluster (issues #100, #102, #99 "bm", PR #103) where the scanner penalizes its own recommended fix and flags benign strings — these hurt trust and conversion. Also assess #101 (external rule-pack loader --rule-pack) and #97 (FAQ docs). Categorize each: merge / close / needs-work / triage-later / security-priority, with rationale and Pro/MRR value. repo="affaan-m/agentshield".`,
|
||||
{ label: 'triage:agentshield', phase: 'Survey', agentType: 'general-purpose', schema: TRIAGE_SCHEMA }
|
||||
)
|
||||
];
|
||||
|
||||
phase('Research');
|
||||
|
||||
const researchThunks = [
|
||||
() =>
|
||||
agent(
|
||||
`${GUARDRAILS}\n\nDEEP RESEARCH: recent (2025-2026) CVEs and disclosed vulnerability classes in AGENTIC / LLM / MCP security that a scanner like AgentShield should detect. Use web tools (ToolSearch then WebSearch / firecrawl / exa). Cover: MCP server vulns (tool poisoning, rug-pull tool updates, prompt injection via tool descriptions, confused-deputy), CVEs in popular agent frameworks / MCP servers, npm/PyPI supply-chain attacks targeting AI tooling, prompt-injection-driven RCE, memory/context poisoning, credential exfiltration via agents. For each finding mark gapVsUs (we-have-it / partial / missing) vs AgentShield's current detectors, and the Pro opportunity. topic="agentic-cves-2025-2026".`,
|
||||
{ label: 'research:cves', phase: 'Research', agentType: 'general-purpose', schema: RESEARCH_SCHEMA }
|
||||
),
|
||||
() =>
|
||||
agent(
|
||||
`${GUARDRAILS}\n\nDEEP RESEARCH: competitor / adjacent tools in agent + LLM + supply-chain security and what they do that AgentShield does NOT. Use web tools. Cover products like: Protect AI, Lakera, Prompt Security, HiddenLayer, Snyk, Socket.dev, Endor Labs, Semgrep, GitGuardian, Invariant Labs (MCP-scan), Cloudflare/others' MCP security, plus any new entrants. For each, note their headline capability, whether AgentShield has it (gapVsUs), and how a comparable or better capability could be packaged as ECC Pro paid value. Also: pull npm download stats for "ecc-agentshield" to ground the growth story if reachable. topic="competitor-gap-analysis".`,
|
||||
{ label: 'research:competitors', phase: 'Research', agentType: 'general-purpose', schema: RESEARCH_SCHEMA }
|
||||
),
|
||||
() =>
|
||||
agent(
|
||||
`${GUARDRAILS}\n\nIDEATION: agentic-security capabilities that have been discussed/considered for AgentShield or ECC but NOT yet built, plus net-new ideas grounded in the threat model. Read ~/GitHub/ECC/agentshield/WORKING-CONTEXT.md and any docs/ for hints of deferred work; read the AgentShield README for the current feature set; then reason about the gaps. Think across the kill chain: discovery/config scan -> PR-time review -> CI gate -> runtime monitor -> incident evidence. Candidate ideas: real-time runtime guardrails, MCP supply-chain provenance/lockfile attestation, taint-tracking across tool calls, behavioral baselining of agents, secret/credential flow tracing, autofix with verification, hosted continuous scanning + dashboards, org policy as code, agent-identity/least-privilege. Mark gapVsUs and proOpportunity for each. topic="unbuilt-ideation".`,
|
||||
{ label: 'research:ideation', phase: 'Research', agentType: 'general-purpose', schema: RESEARCH_SCHEMA }
|
||||
),
|
||||
() =>
|
||||
agent(
|
||||
`${GUARDRAILS}\n\nRESEARCH: what developers actually want from existing security + code-review tooling (Sentry, GitHub code scanning / CodeQL, Snyk, Semgrep, SonarQube, Dependabot) and where those tools fall short for AI-agent codebases. Use web tools (look at user complaints, feature requests, comparison posts). Identify the unmet demand AgentShield Pro could capture: e.g. PR-time security review tuned for agent configs, low-false-positive findings, IDE/editor integration, runtime error+security telemetry like Sentry but for agents, autofix, SARIF/GitHub integration, evidence/compliance packs. For each, gapVsUs and proOpportunity. topic="devtool-demand-gaps".`,
|
||||
{ label: 'research:devtool-demand', phase: 'Research', agentType: 'general-purpose', schema: RESEARCH_SCHEMA }
|
||||
)
|
||||
];
|
||||
|
||||
// Survey and research have no cross-dependency; run all 8 concurrently (the
|
||||
// runtime caps concurrency anyway) and barrier here — synthesis needs everything.
|
||||
const [survey, research] = await Promise.all([parallel(surveyThunks), parallel(researchThunks)]);
|
||||
|
||||
const surveyClean = survey.filter(Boolean);
|
||||
const researchClean = research.filter(Boolean);
|
||||
log(`survey: ${surveyClean.length}/4 returned, research: ${researchClean.length}/4 returned`);
|
||||
|
||||
phase('Synthesize');
|
||||
|
||||
const bundle = JSON.stringify({ survey: surveyClean, research: researchClean }, null, 2);
|
||||
|
||||
const roadmap = await agent(
|
||||
`${GUARDRAILS}\n\nYou are the synthesis lead. Below is JSON from 4 survey agents (AgentShield capability, ECC Pro surface, ECC repo triage, AgentShield repo triage) and 4 research agents (CVEs, competitors, unbuilt ideation, devtool demand).\n\nProduce a PRIORITIZED, MRR-BIASED roadmap for ECC Pro (its AgentShield and ECC portions). Rules:\n- Bias hard toward what converts free users to paid and grows MRR. AgentShield is doing ~10k npm downloads/week (~30k/month) on "ecc-agentshield" - that is a huge top-of-funnel; the roadmap must include how to monetize that funnel (Pro tier, hosted scanning, dashboards, org policy, evidence/compliance packs).\n- Group into a few themes. Each roadmap item: area (agentshield/ecc-pro/ecc-core/both), horizon (now/next/later), effort (S/M/L/XL), impact (low/medium/high/flagship), a concrete mrrAngle, a description, and linkedItems (PR/issue refs from the triage that map to it).\n- Fold the AgentShield false-positive cluster fixes into "now" (trust is a conversion gate).\n- top5Now = the five highest-leverage things to do immediately.\n\nDATA:\n${bundle}`,
|
||||
{ label: 'synthesize:roadmap', phase: 'Synthesize', agentType: 'general-purpose', schema: ROADMAP_SCHEMA }
|
||||
);
|
||||
|
||||
return { survey: surveyClean, research: researchClean, roadmap };
|
||||
@@ -0,0 +1,347 @@
|
||||
# ECC Pro + AgentShield Security Roadmap
|
||||
|
||||
> Status: draft for review. Generated 2026-06-21 from a multi-agent survey + research pass
|
||||
> (capability map of AgentShield and ECC Pro, triage of every open PR/issue on both repos,
|
||||
> and web research on competitors, unbuilt ideas, and dev-tool demand). MRR-biased: every
|
||||
> item is scored for how it converts the free funnel into paid ECC Pro / Enterprise.
|
||||
|
||||
## Why now
|
||||
|
||||
AgentShield (npm `ecc-agentshield`) is doing roughly **30K downloads/month with no decay**
|
||||
(~7.2K/week, ~78K year-to-date) and **903 GitHub stars** — a large, growing top-of-funnel.
|
||||
Today there is almost no bridge from that free funnel to paid ECC Pro, and the single most
|
||||
ownable paid surface — the agent-proximity "airspace" moat — is fully computed but never
|
||||
rendered. This roadmap is built to close both gaps: remove the trust blockers that suppress
|
||||
conversion, make the moat visible, then productize the local CLI primitives into hosted,
|
||||
recurring-revenue surfaces.
|
||||
|
||||
## Themes
|
||||
|
||||
### Trust & conversion gate (now)
|
||||
|
||||
AgentShield's ~30K/month free funnel only converts if the product is trustworthy and the upgrade path is visible. False positives that punish correct hardening, broken model IDs that hard-fail the LLM layer, Windows crashes, and security bugs in our own learning layer all erode trust before a user ever sees a Pro prompt. Fixing the FP cluster, shipping verified correctness/security fixes, and surfacing a Pro CTA at the point of value are the highest-leverage immediate moves.
|
||||
|
||||
### Make the moat visible & demo-able (now)
|
||||
|
||||
The agent-proximity 'airspace' metric is the single differentiated capability nothing else has, but it is math + JSON with zero UI rendering. Shipping the 3D observability dashboard (PR #2320) turns the strongest narrative asset into a demo that sells Team/Enterprise seats on sight.
|
||||
|
||||
### Productize local primitives into hosted Pro SaaS (next)
|
||||
|
||||
Every continuous/fleet capability — watch/drift, baseline gates, evidence-pack fleet operatorReadback, runtime NDJSON, org policy packs — already exists as local CLI building blocks. The fastest path to MRR is hosting these as authenticated multi-repo surfaces: continuous-scanning dashboard, inline PR review + autofix-PR, rule-pack loader + intel feed, compliance packs, and centrally-managed org policy.
|
||||
|
||||
### Close competitive gaps & expand reach (next/later)
|
||||
|
||||
Snyk Agent Scan, NVIDIA SkillSpector, and GoPlus AgentGuard validate the category and add runtime enforcement, LLM-judge semantic detection, and live MCP fetch that AgentShield lacks. LLM-judge Deep Scan, a free runtime guard with Pro telemetry, cross-machine A2A airspace, and a community MCP reputation registry neutralize those differentiators while keeping the free, zero-account, local-first posture as the moat. Harness-neutral expansion widens the whole funnel.
|
||||
|
||||
## Top 5 — do now
|
||||
|
||||
1. Merge PR #103 and ship the issue #100 follow-up to kill the false-positive cluster that punishes correct hardening (trust is the conversion gate)
|
||||
2. Merge PR #2320 to render the 3D agent-airspace observability dashboard (the moat made visible and demo-able)
|
||||
3. Add a Pro upgrade CTA to free CLI output + GitHub App PR comments to monetize the ~30K/month free download funnel, leading with the privacy + low-noise wedge
|
||||
4. Merge the verified correctness/Windows batch (PR #2133 model-ID fix, #2307/#2063 Windows, #2273/#2246/#2312 docs, #2293 deps) and fix issue #2316 plan-orchestrate install detection
|
||||
5. Harden continuous-learning storage: fix path traversal #2297 and registry-corruption race #2294 (security credibility for the brand Pro trades on)
|
||||
|
||||
## Roadmap at a glance
|
||||
|
||||
| Horizon | Item | Area | Effort | Impact |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| now | Fix the false-positive cluster that punishes correct hardening | agentshield | S | high |
|
||||
| now | Add autofix verification loop (re-scan + no-regression proof) | agentshield | M | medium |
|
||||
| now | Render the 3D agent-airspace observability dashboard (the moat made visible) | ecc-pro | M | flagship |
|
||||
| now | Add a Pro conversion CTA to free CLI output and GitHub App PR comments | both | S | high |
|
||||
| now | Ship merge-ready correctness and Windows fixes that protect release velocity and core UX | ecc-core | S | medium |
|
||||
| now | Harden continuous-learning storage (path traversal + registry race) | ecc-core | S | medium |
|
||||
| next | Hosted continuous-scanning dashboard with fleet trend lines ('Sentry for agent security') | agentshield | L | flagship |
|
||||
| next | Inline PR-comment review + autofix-PR via the ecc-tools GitHub App | agentshield | M | high |
|
||||
| next | External rule-pack loader (--rule-pack) + curated commercial intel feed | agentshield | M | high |
|
||||
| next | Pro Deep Scan: LLM-judge semantic detection + live MCP tool fetch + rug-pull pinning | agentshield | L | high |
|
||||
| next | Compliance/evidence packs mapped to SOC2/PCI/ISO controls | agentshield | M | high |
|
||||
| next | Centrally-managed org policy + RBAC distribution | agentshield | L | high |
|
||||
| next | Harness-neutral expansion: Kimi, Codex alias, OpenClaude/Codex compat | ecc-core | L | medium |
|
||||
| next | Batch-review and dedup the community skill/agent PR backlog | ecc-core | M | low |
|
||||
| later | Free runtime guard hook with Pro centralized telemetry + trust registry | agentshield | XL | flagship |
|
||||
| later | Cross-machine team airspace + A2A topology security in the control pane | ecc-pro | XL | high |
|
||||
| later | Community MCP/skill reputation registry as growth flywheel + Pro risk-score API | agentshield | L | medium |
|
||||
|
||||
## NOW
|
||||
|
||||
### Fix the false-positive cluster that punishes correct hardening
|
||||
|
||||
- **Area:** agentshield | **Effort:** S | **Impact:** high
|
||||
- **Linked:** PR #103, issue #102, issue #100
|
||||
- **MRR angle:** FPs that penalize the scanner's own remediation destroy trust with security-conscious buyers and break the demo-and-CI value prop Pro is sold on. Trust is the conversion gate: a hardened config must score well or no one upgrades.
|
||||
|
||||
Merge PR #103 (treats --no-verify inside permissions.deny/ask as a prohibition, not a usage — fail-closed on invalid JSON, 6 tests, all review bots green) after confirming the Verify/test matrix passes locally. Then ship a follow-up PR for the two remaining FPs in issue #100: (1) --no-verify in string literals / help text flagged CRITICAL (needs executed-command vs literal context), and (2) the reversed-text rule at src/rules/agents.ts:1561 matching plain English 'backward/backwards' — re-scope it to require reverse-and-execute evidence so it stops noise-flooding ML/PyTorch agent repos (a high-value adopter segment).
|
||||
|
||||
### Add autofix verification loop (re-scan + no-regression proof)
|
||||
|
||||
- **Area:** agentshield | **Effort:** M | **Impact:** medium
|
||||
- **Linked:** issue #102
|
||||
- **MRR angle:** Verified, trustworthy autofix is the activation moment that makes the free CLI feel magical and seeds confidence in the paid managed-remediation workflow (autofix-as-PR in ECC Tools).
|
||||
|
||||
src/fixer/index.ts applies string transforms but never re-scans to prove the finding is gone and no new finding was introduced — and issue #102 proved a naive permission tighten can be re-flagged by the scanner. Close the loop: after applying --fix, re-run the scanner, diff the findings set, auto-revert if the score regresses, and emit a verified-fix attestation. OSS gets verify-after-fix locally; Pro gets autofix-as-PR via the ecc-tools GitHub App (open remediation PR, run verified re-scan in CI, attach before/after evidence pack, auto-merge on green).
|
||||
|
||||
### Render the 3D agent-airspace observability dashboard (the moat made visible)
|
||||
|
||||
- **Area:** ecc-pro | **Effort:** M | **Impact:** flagship
|
||||
- **Linked:** PR #2320
|
||||
- **MRR angle:** This is the single most ownable, demo-able paid-looking surface ECC has and nothing else offers it. 'Watch N agents crawl toward each other in code-space and one steer away' converts on the demo alone — it justifies a Team/Enterprise seat that competitors (CodeRabbit/Greptile) cannot match.
|
||||
|
||||
The agent-proximity math (noisy-OR collision risk, TCAS transmit/steer advisories, 3D space-filling embedding) is fully implemented in scripts/lib/agent-proximity/ and computed every tick, but the control-pane UI (ui.js) renders ZERO proximity output. Merge maintainer PR #2320 (self-contained, dependency-free 3D canvas viz + /api/proximity feed, XSS-safe textContent, +254/-0 with tests, MERGEABLE) to ship the renderer. This closes the biggest gap between the moat narrative and a shippable surface.
|
||||
|
||||
### Add a Pro conversion CTA to free CLI output and GitHub App PR comments
|
||||
|
||||
- **Area:** both | **Effort:** S | **Impact:** high
|
||||
- **Linked:** PR #97
|
||||
- **MRR angle:** Directly monetizes the ~30K downloads/month (78,108 YTD, ~7,228/week, no decay) free funnel. There is currently no surfaced upgrade path from the free scanner to ECC Pro — adding a contextual CTA at the point of value is the lowest-effort, highest-leverage conversion lever available.
|
||||
|
||||
Surface a Pro CTA where free users already feel value: a footer in terminal/JSON/markdown reports ('hosted fleet posture + continuous monitoring at ecc-tools Pro'), in the GitHub Action job summary, and in PR check-run comments. Lead with the privacy wedge ('scans never leave your machine' vs Snyk Agent Scan transmitting tool metadata to cloud) and the low-noise/runtimeConfidence accuracy story as the differentiators. Keep AgentShield free + zero-account as the moat against token-gated Snyk Agent Scan.
|
||||
|
||||
### Ship merge-ready correctness and Windows fixes that protect release velocity and core UX
|
||||
|
||||
- **Area:** ecc-core | **Effort:** S | **Impact:** medium
|
||||
- **Linked:** PR #2133, PR #2307, PR #2063, PR #2273, PR #2246, PR #2312, PR #2293, issue #2316
|
||||
- **MRR angle:** Broken model IDs hard-fail the multi-model LLM layer Pro features depend on; broken plan-orchestrate install detection and Windows crashes degrade the paid UX and erode trust before users ever reach the upgrade prompt.
|
||||
|
||||
Merge the clean, verified batch: PR #2133 (Claude provider model-ID + adaptive-thinking fix — replaces invalid IDs with claude-sonnet-4-6/haiku-4-5/opus-4-8, routes SYSTEM to top-level, omits temperature, adaptive thinking for Opus 4.7/4.8; previous default would 404/400 at the API), PR #2307 + #2063 (Windows test/UTF-8 fixes), PR #2273/#2246/#2312 (docs/workflow), PR #2293 (dependabot minor/patch). Schedule a fix for issue #2316 (plan-orchestrate still probes old paths after the ecc@ecc marketplace rename — broken install detection on a core workflow command).
|
||||
|
||||
### Harden continuous-learning storage (path traversal + registry race)
|
||||
|
||||
- **Area:** ecc-core | **Effort:** S | **Impact:** medium
|
||||
- **Linked:** issue #2297, issue #2294, issue #2300, issue #2296
|
||||
- **MRR angle:** ECC sells security tooling; a path-traversal or registry-corruption bug in our own learning layer is a credibility liability that undercuts the entire security brand the Pro tier trades on.
|
||||
|
||||
Fix two security-priority bugs in skills/continuous-learning-v2/scripts/instinct-cli.py as one hardening pass: issue #2297 (shutil.rmtree on PROJECTS_DIR/project_id with no path-containment check — arbitrary directory deletion risk) and issue #2294 (_write_registry writes projects.json without the advisory lock _update_registry uses — concurrent sessions can corrupt the registry). Pair with reliability issues #2300 (SIGALRM drops observations) and #2296 (signal-counter race) for observer integrity.
|
||||
|
||||
## NEXT
|
||||
|
||||
### Hosted continuous-scanning dashboard with fleet trend lines ('Sentry for agent security')
|
||||
|
||||
- **Area:** agentshield | **Effort:** L | **Impact:** flagship
|
||||
- **MRR angle:** THE core ECC Tools Pro product and the clearest recurring-revenue moat: nobody unifies config-scan + runtime telemetry. Billed per seat/repo. Reuses operatorReadback/reviewItems as the API contract — lowest-effort-to-highest-leverage Pro upgrade because the data model already exists.
|
||||
|
||||
Productize the existing local primitives into a hosted, authenticated, multi-repo backend: ingest webhook/CI scan results, runtime.ndjson, and watch/drift events over time; persist baselines; chart score trend, drift history, blocked-command rate, injection-attempt rate, secret-exposure events, and cross-repo org rollup; fire Slack/email regression alerts. The continuous/fleet primitives (src/watch, src/baseline, src/evidence-pack fleet operatorReadback) exist only as local CLI today. Positions AgentShield as the unified config+runtime view that neither Snyk (scan-only) nor Sentry (no security semantics) offers.
|
||||
|
||||
### Inline PR-comment review + autofix-PR via the ecc-tools GitHub App
|
||||
|
||||
- **Area:** agentshield | **Effort:** M | **Impact:** high
|
||||
- **Linked:** PR #2320
|
||||
- **MRR angle:** Sticky inline PR comments + one-click fix PRs are now table stakes (Aikido, DryRun, Pixee) and are the GitHub-native paid surface that converts. The GitHub App already exists as the delivery vehicle; monetize PR-time review + autofix-PR as the paid tier.
|
||||
|
||||
Today the GitHub Action fails CI and emits SARIF (lands in the Security tab) but does not post sticky inline PR comments keyed to changed lines, and autofix is local-CLI only. Add per-line PR comments with one-click 'apply fix' that commits the existing remediation to the PR branch, plus auto-fix-PR generation. Differentiate from CodeRabbit/Greptile by bundling the agent-proximity / merge-conflict-prevention angle competitors lack.
|
||||
|
||||
### External rule-pack loader (--rule-pack) + curated commercial intel feed
|
||||
|
||||
- **Area:** agentshield | **Effort:** M | **Impact:** high
|
||||
- **Linked:** issue #101
|
||||
- **MRR angle:** Turns AgentShield into a platform: OSS gets the loader, Pro gets a signed, continuously-updated commercial rule-pack/threat-intel subscription. The ATR pack (464 rules, in production at Cisco AI Defense + Microsoft) brings credibility and reach; its corpus feeds the accuracy gate.
|
||||
|
||||
Build the loader requested in agentshield issue #101: a signed, versioned external rule-pack format with zod validation mirroring the --policy loader, no new deps, provenance/safety checks on the packs themselves. Maps cleanly onto the existing declarative rule tables and runRules loop. Resolve the one open design question (ScoreBreakdown's five fixed buckets — external findings count toward total without an own bucket is acceptable for v1). Couples with a hosted, curated AI-tooling malicious-package/skill + CVE intel feed as the paid subscription layer (the static 21-entry CVE DB goes stale; sync to NVD/GHSA/OSV).
|
||||
|
||||
### Pro Deep Scan: LLM-judge semantic detection + live MCP tool fetch + rug-pull pinning
|
||||
|
||||
- **Area:** agentshield | **Effort:** L | **Impact:** high
|
||||
- **MRR angle:** Directly neutralizes the most dangerous competitor (Snyk Agent Scan) and AgentGuard. Metered/Pro feature where the platform fronts the model cost and runs deeper scheduled adversarial sweeps. Keeps free AgentShield as the no-account default vs Snyk's token-gated CLI.
|
||||
|
||||
Reuse the existing --opus (Red/Blue/Auditor) and --injection (live LLM adversarial, ~70 payloads) plumbing to ship an opt-in LLM-judge layer for semantic prompt-injection and toxic-flow chaining. Add a live MCP connector that fetches tool descriptions and pins tool hashes to flag rug-pulls between scans (capabilities Snyk has and AgentShield lacks). Close the acknowledged skill-md / freeform-prompt coverage gap as a free differentiator (now table stakes vs NVIDIA SkillSpector), reserving AST taint + curated YARA/IOC feed for Pro.
|
||||
|
||||
### Compliance/evidence packs mapped to SOC2/PCI/ISO controls
|
||||
|
||||
- **Area:** agentshield | **Effort:** M | **Impact:** high
|
||||
- **MRR angle:** High-margin enterprise add-on: auditor-ready packs are the artifact GRC teams hand to auditors to justify agent deployments. Buyers want framework-mapped evidence, not raw findings — this is a clear Enterprise seat upsell.
|
||||
|
||||
AgentShield already generates deterministic hash-verified evidence packs and SARIF, plus baseline/drift and org-policy pass/fail. Add explicit framework mapping (findings -> SOC2 CC / PCI DSS / ISO control IDs), coverage and remediation-over-time charts fed by baseline history and runtime.ndjson, and hosted storage/retention/signing. Sell as the compliance deliverable for regulated buyers.
|
||||
|
||||
### Centrally-managed org policy + RBAC distribution
|
||||
|
||||
- **Area:** agentshield | **Effort:** L | **Impact:** high
|
||||
- **MRR angle:** Per-seat Enterprise value: hosted policy distribution, enforcement across the fleet, and waiver/exception workflows with expiry and owner approval are exactly what org buyers pay seats for. Today policy packs are local JSON copied around with no central management.
|
||||
|
||||
Policy packs (6 presets), export/promote with SHA-256-verified promotion, and exception lifecycle already exist as local JSON. Add hosted policy distribution, fleet-wide enforcement, centrally-managed exceptions/waivers (expiry + owner approval), org identity/RBAC, audit-log retention, and central branch-protection evidence. Add a DryRun-style natural-language-to-policy authoring layer ('no MCP server may bind 0.0.0.0', 'skills must not read keychain') that compiles to AgentShield rules — a differentiated UX developers are gravitating to.
|
||||
|
||||
### Harness-neutral expansion: Kimi, Codex alias, OpenClaude/Codex compat
|
||||
|
||||
- **Area:** ecc-core | **Effort:** L | **Impact:** medium
|
||||
- **Linked:** PR #2154, PR #2254, issue #2076, issue #2073, issue #2074
|
||||
- **MRR angle:** Broadens the addressable user base for the whole funnel and aligns with the ECC 2.0 harness-neutral control-pane vision — more harnesses scanned = more top-of-funnel feeding Pro.
|
||||
|
||||
Land the harness-neutral work after the required catalog/registry sync, install-profile review, and surface tests: PR #2154 (Kimi Code CLI, 12th harness, +1397/16 files), PR #2254 (Codex plugin alias — currently DRAFT + CONFLICTING, resolve first), and answer the needs-info compat issues #2076 (OpenClaude), #2073 (Codex subagent TOML format), #2074 (OpenCode bun-on-PATH Windows bug). AgentShield's harness adapters already detect Claude Code/OpenCode/Codex/Gemini/Zed/VS Code/dmux.
|
||||
|
||||
### Batch-review and dedup the community skill/agent PR backlog
|
||||
|
||||
- **Area:** ecc-core | **Effort:** M | **Impact:** low
|
||||
- **Linked:** issue #2308, PR #2309, PR #2310, PR #2311, PR #2285, PR #2275, PR #2274, PR #2270, PR #2318, PR #2315, PR #2313, PR #2137, issue #2069
|
||||
- **MRR angle:** Indirect: keeps the catalog credible and discoverable (catalog quality is a free-tier retention factor) without bloating it with redundant skills that dilute the value prop.
|
||||
|
||||
Triage as batches with overlap/dedup review against the existing 200+ skill catalog plus manifest/catalog/command-registry sync and surface tests: the three BMAD-inspired skills (#2309/#2310/#2311 under tracking issue #2308), framework-reviewer family extensions (#2285 nuxt, #2275 React Native, #2280 AL/BC), and assorted new-skill PRs (#2319 ecc-recipes, #2314 quant-trading, #2281 council-multi-model, #2277 living-docs, #2288 mailtrap — needs cred-handling security review). Resolve needs-work conflicting/large PRs (#2274 gateguard rebase, #2270 OMP split, #2318/#2315 large drops). Close low-signal drive-bys: PR #2313 (empty template), PR #2137 (vague AI-slop SOP), agentshield #99 (spam). Route marketing reshare #2069 to content (ECC was 'featured', not a winner).
|
||||
|
||||
## LATER
|
||||
|
||||
### Free runtime guard hook with Pro centralized telemetry + trust registry
|
||||
|
||||
- **Area:** agentshield | **Effort:** XL | **Impact:** flagship
|
||||
- **MRR angle:** Closes the biggest competitive gap (GoPlus AgentGuard runtime blocking, Snyk-Evo fleet monitoring) and is a pure hosted play billed per active agent/seat. Free static deny-list neutralizes AgentGuard's differentiator; Pro baselining + telemetry + managed trust registry is the recurring upsell.
|
||||
|
||||
Today the runtime monitor (src/runtime) is a thin deny-list + rate-limit PreToolUse evaluator logging to local NDJSON. Build a streaming evaluator with per-agent/per-repo behavioral baselining and intent-drift scoring (OTel GenAI spans), soft-warn/hard-block inline, and extend taint tracking from single-file static to cross-tool-call / cross-session data-flow lineage (the indirect-injection -> exfiltration chain that dominates 2026 incidents). Add credential-flow tracing (which hook/MCP reads each secret, does it egress). Pro centralizes runtime telemetry ingestion, fleet-wide deny-policy distribution, tamper-evident logging, a managed trust registry, and real-time alerting. This is 'AgentShield Runtime' — agent EDR, not a config linter.
|
||||
|
||||
### Cross-machine team airspace + A2A topology security in the control pane
|
||||
|
||||
- **Area:** ecc-pro | **Effort:** XL | **Impact:** high
|
||||
- **MRR angle:** The clearest Team/Enterprise seat wedge: 'N agents, M humans, zero merge conflicts over Tailscale' is exactly what justifies per-seat team pricing. A2A privilege-escalation visualization is the security-native sibling of the Layer 4 moat, sold alongside the control pane.
|
||||
|
||||
Proximity only sees local sessions in one repo today (roadmap v2 cross-machine is unbuilt). Build hosted, authenticated multi-repo/multi-machine airspace (sessions, kanban, proximity, risk ledger) gated behind Team/Enterprise, with the TCAS transmit/steer protocol + agent+human JIT deconfliction as the per-seat value. Add agent-to-agent (A2A) topology security: model the org's multi-agent delegation graph (which agent invokes/delegates to which, with what inherited tools) and highlight confused-deputy / delegation-of-overprivilege paths. Promote the local memory-recall Knowledge panel into a synced team knowledge/RAG store as a Pro add-on.
|
||||
|
||||
### Community MCP/skill reputation registry as growth flywheel + Pro risk-score API
|
||||
|
||||
- **Area:** agentshield | **Effort:** L | **Impact:** medium
|
||||
- **MRR angle:** Doubles as marketing and as the data backbone for a paid risk-score API. Counters Prompt Security's 13,000-server scored registry moat; the crowd + ECC-ecosystem scan-result data flywheel is hard for competitors to replicate.
|
||||
|
||||
Build a free community MCP/skill reputation registry aggregating crowd input + AgentShield scan results across the ECC ecosystem, with MCP provenance attestation (SLSA/in-toto/Sigstore-style signed agentshield.lock pinning the full MCP+skill+plugin dependency closure). Sell continuous monitoring, org allow/block policy, Shadow-MCP discovery, and a hosted multi-ecosystem (npm+PyPI+cargo) provenance/SBOM service as Pro. Optional niche add-on: pickle/safetensors/GGUF model-artifact deserialization scanner for local-OSS-model teams.
|
||||
|
||||
## Capability baseline (what we have, where the gaps are)
|
||||
|
||||
### AgentShield today
|
||||
|
||||
AgentShield today is a mature STATIC security scanner for AI-agent configurations (Claude Code and adjacent harnesses), shipping 102 pattern-based rules across secrets, permissions, hooks, MCP, and agents, hardened by a source-confidence/false-positive engine (runtimeConfidence tiers + score weighting). Beyond static rules it layers: MCP tool-poisoning + CVE detection backed by a 21-entry curated threat-intel DB, supply-chain provenance verification (offline + optional npm-online + package-manager hardening), opt-in static taint analysis, opt-in LLM-driven active prompt-injection testing (~70 payloads / 12 categories), opt-in hook sandbox execution with canary secrets, and an Opus 4.6 three-agent adversarial pipeline. Operational surfaces include org policy packs with verified export/promote + exception lifecycle, an installable runtime PreToolUse deny-list monitor, deterministic hash-verified evidence packs with fleet operatorReadback, baseline drift gating, a local watch/alert mode, harness adapters, and full CI integration (GitHub Action, SARIF, corpus self-test). The honest gaps are that detection is overwhelmingly static/signature-based (narrow non-shell hook-code coverage, weak skill-md prompt coverage, no live CVE feed, no real AST taint), and that all the continuous/fleet/hosted primitives (watch, evidence-pack fleet, policy distribution, runtime telemetry, deep LLM analysis) exist only as LOCAL CLI building blocks. That gap is precisely the Pro/Enterprise opportunity: the data models for continuous monitoring, fleet dashboards, hosted scanning, centrally-managed org policy, live threat-intel, and compliance evidence retention are already designed locally and would convert directly into a hosted ECC Tools Pro offering (README already references a $19/seat/mo tier and the ecc-tools GitHub App). Key files: src/rules/*, src/{taint,injection,sandbox,supply-chain,threat-intel,runtime,policy,evidence-pack,watch,baseline,harness-adapters,opus}/, README.md, false-positive-audit.md.
|
||||
|
||||
Key gaps the roadmap targets:
|
||||
|
||||
- STATIC-ONLY for most detection: rules are regex/pattern-based over config text. Polymorphic/obfuscated payloads, novel encodings, and logic-level malice that doesn't match a signature are missed. Deep behavioral detection requires opt-in --opus/--injection/--sandbox (LLM cost or local execution).
|
||||
- NON-SHELL HOOK CODE coverage is narrow: hook-code findings only catch explicit signals (output() context injection, transcript access, child-process curl|bash). Broad language-aware analysis of JS/Python/etc hook implementations is not done — README explicitly flags this as a known high-signal caveat.
|
||||
- skill-md / freeform prompt text bypasses most agent + injection rules (explicitly acknowledged). Skill prompt bodies have much weaker coverage than CLAUDE.md/agent-md.
|
||||
- CVE database is a hand-curated static list of 21 entries with no live feed — goes stale; no automated sync to NVD/GHSA/OSV. No CVSS scoring, no version-range resolution beyond string matching.
|
||||
- Supply-chain online check only hits npm registry; no PyPI/cargo/RubyGems online verification, no SBOM generation/consumption, no transitive-dependency graph or lockfile-tree integrity verification (only top-level provenance counts).
|
||||
- Watch mode is local single-process fs.watch only (no daemon/service, no persistence across restarts, single targetPath baseline). Webhook alerting exists but there is no hosted ingestion, dashboard, or multi-repo fleet view that actually runs continuously.
|
||||
- No hosted/SaaS scanning backend. Everything runs locally or in the user's CI. GitHub App (ecc-tools) is referenced but the scanner core is fully local/offline.
|
||||
- No semantic/data-flow analysis across files for MCP tool chaining or multi-agent privilege escalation beyond single-config heuristics; taint analysis is regex source/sink, not real AST/CFG.
|
||||
- No detection of malicious model behavior at inference time (only config-time + optional sandbox/injection test). No live transcript/telemetry monitoring of a running agent fleet.
|
||||
- Runtime monitor is a thin deny-list evaluator (glob+regex) installed as one hook; no kernel/syscall-level sandboxing, no egress filtering enforcement, no tamper protection on the hook itself.
|
||||
|
||||
### ECC Pro surface today
|
||||
|
||||
ECC's paid story today is two separate hosted GitHub Apps (ECC Pro at $19/seat/mo for private repos, and ECC Tools with free/pro/enterprise Marketplace tiers + real billing infra), while the entire local plugin including the control pane stays MIT-free with no license gating. The control pane (loopback-only Node server) surfaces Sessions, an interactive kanban with agent+human JIT assignment, local Knowledge recall, MCP connectors, and executable actions. The genuinely differentiated 'moat' — the agent-airspace proximity metric (noisy-OR collision risk, TCAS transmit/steer advisories, 3D embedding) — is fully implemented in code and wired into the snapshot, BUT the 3D 'where-are-the-agents' visualization is never rendered (zero proximity output in the UI), and none of these capabilities are positioned or gated as Pro/Enterprise. The paid value story is thin: Pro currently reads as 'OSS for private repos + PR audits' (commodity vs CodeRabbit/Greptile), while the truly ownable surfaces — 3D agent observability, multi-agent/human JIT deconfliction, cross-machine team airspace, shared team knowledge — are either unrendered, unbuilt, or unmonetized. Also verify live GitHub Marketplace Pro billing-state provenance before claiming native payments are GA. Key files: scripts/lib/control-pane/{server,state,ui,proximity,message-sink,work-item-mutations}.js, scripts/lib/agent-proximity/{distance,graph,index}.js, docs/design/agent-proximity.md, docs/ECC-2.0-REFERENCE-ARCHITECTURE.md, docs/ECC-2.0-GA-ROADMAP.md, README.md:53-83 and :216.
|
||||
|
||||
Pro leverage points identified:
|
||||
|
||||
- 3D agent-airspace observability dashboard — render the already-computed scanAirspace positions/links/advisories (WebGL/Three.js in the control-pane UI). 'Watch N agents crawl toward each other in code-space and watch one steer away' is a unique, demo-able Pro/Team feature nothing else has. The math is done; only the renderer is missing.
|
||||
- Multi-agent / multi-human JIT deconfliction as a TEAM seat product — the TCAS transmit/steer protocol + agent+human kanban JIT assignment is the natural per-seat value. Gate the cross-machine airspace (Tailscale, roadmap v2) behind Team/Enterprise.
|
||||
- Hosted control pane / observability backend — today it is loopback-only local. A hosted, authenticated, multi-repo version (sessions, kanban, proximity, risk ledger, HUD/status JSON contract from the reference arch) is the obvious Pro SaaS surface.
|
||||
- Shared team knowledge layer — promote the local memory-recall Knowledge panel into a synced team knowledge/RAG store (the reference arch already wants RAG over vetted patterns / PR outcomes / CI failures) as a Pro/Enterprise add-on.
|
||||
- AgentShield Enterprise security platform — policy packs (OSS/team/enterprise/regulated), SARIF, supply-chain intel, exec HTML/PDF reports, CI enforcement (reference arch lines 152-173). This is already framed as the enterprise security tier and pairs with the proximity/observability story.
|
||||
- ECC Tools deep analyzer + Linear sync as the GitHub-native paid PR layer (already the current paid surface); differentiate it from CodeRabbit/Greptile by bundling the agent-proximity/merge-conflict-prevention angle that competitors lack.
|
||||
|
||||
## Research inputs
|
||||
|
||||
### competitor-gap-analysis
|
||||
|
||||
AgentShield (npm "ecc-agentshield") occupies a defensible niche: a free, OSS, zero-account static auditor for AI-agent configuration surfaces (Claude Code .claude/ dirs, hooks, MCP configs, permissions, agent/skill markdown, secrets) shipped as CLI + GitHub Action + GitHub App, with 102 rules across 5 categories, runtimeConfidence source-weighting, supply-chain provenance, evidence packs/SARIF, and an Opus red/blue/auditor pipeline. npm growth is real: 78,108 downloads YTD 2026 (Jan 1-Jun 21), ~29,759 last 30 days, ~7,228 last week, daily 700-2,300. The field splits into two tiers. (1) Direct OSS config/skill scanners: Snyk agent-scan (ex-Invariant mcp-scan, the single most dangerous competitor), NVIDIA SkillSpector (AST taint + YARA), GoPlus AgentGuard (runtime action eval + trust registry, local-only), Mondoo Skill Check, Semgrep Guardian. (2) Enterprise runtime/firewall + model-supply-chain: Lakera Guard (Check Point), Prompt Security (SentinelOne), HiddenLayer, Protect AI Guardian (Palo Alto/Prisma AIRS), Noma, plus Cloudflare/Microsoft Defender MCP gateways; GitGuardian ships native Claude Code/Cursor/Copilot secret hooks. AgentShield's biggest gaps: no runtime/inline enforcement (purely static), no LLM-judge semantic prompt-injection/toxic-flow analysis, no live MCP tool-description fetch or rug-pull tool-pinning, no ML model-artifact scanning, no central fleet dashboard, no policy-as-code gateway. Biggest moats: free + zero-account + OSS (Snyk agent-scan needs a SNYK_TOKEN; enterprise tier is all paid/acquired), deep Claude Code config specificity, source-confidence false-positive weighting, and ECC distribution. Clear ECC Pro wedges: hosted fleet dashboard, LLM-judge deep-scan, live MCP runtime proxy + rug-pull detection, policy-as-code CI gates, model-artifact scanning, and a curated AI-tooling malicious-package/skill intel feed.</summary>
|
||||
</invoke>
|
||||
|
||||
Notable gaps vs us (missing today):
|
||||
|
||||
- **GoPlus AgentGuard — local-only runtime action enforcement + trust registry (the runtime gap)** — Ship a free lightweight PreToolUse hook-based runtime guard (AgentShield already understands Claude Code hook wiring deeply — natural extension via agentshield init), reserving the managed trust registry, org-wide allow/block policy sync, and runtime telemetry/alerting for ECC Pro. Neutralizes AgentGuard's differentiator while keeping the upsell.
|
||||
- **Lakera Guard (Check Point) — runtime prompt-injection firewall** — Enterprise inline-firewall is capital-intensive and now owned by Check Point/SentinelOne, so not a near-term build. Realistic ECC Pro angle: a hosted /guard-style endpoint reusing AgentShield's injection rule corpus for lightweight dev/CI gating of agent prompts and tool descriptions — developer-first and cheaper, not an enterprise WAF.
|
||||
- **Prompt Security (SentinelOne) — MCP Gateway + dynamic risk scoring of 13,000+ public MCP servers** — Build a free community MCP/skill reputation registry (crowd + AgentShield scan results across the ECC ecosystem) as a growth/data-flywheel asset, then sell continuous monitoring + org allow/block policy + Shadow-MCP discovery as Pro. The registry doubles as marketing and as the data backbone for a Pro risk-score API.
|
||||
- **HiddenLayer + Protect AI Guardian (Palo Alto/Prisma AIRS) — ML model-artifact supply-chain scanning** — Pro add-on: pickle/safetensors/GGUF deserialization scanner for agents that load local model artifacts, plus a Hugging Face model-reference checker in agent configs. Niche but a clean upsell for local-OSS-model teams; integrate a free OSS pickle-scan core (picklescan-style) with a Pro signature/IOC feed.
|
||||
- **Cloudflare / Microsoft Defender — MCP gateways and managed enforcement infrastructure** — Stay complementary: position AgentShield/ECC Pro as the developer-side pre-flight + CI gate that feeds findings into these gateways (SARIF/JSON export already exists). A Pro integration that exports AgentShield posture to Cloudflare/Defender policy or emits Shadow-MCP candidate lists is a partnership-friendly upsell rather than a competitive build.
|
||||
|
||||
### unbuilt-ideation
|
||||
|
||||
AgentShield already ships an unusually broad static surface: 102+ rules across secrets/permissions/hooks/MCP/agents, MCP CVE + tool-poisoning detection, supply-chain provenance, taint analysis, sandbox hook execution, injection testing, watch/drift mode, a PreToolUse runtime monitor, org policy-as-code, evidence packs, baseline gates, SARIF/HTML, and the ECC Tools GitHub App + Pro tier. So the real unbuilt ideation is NOT "add another scanner category" — it is moving from static config audit toward live runtime defense, cross-call/cross-session reasoning, and a hosted continuous-assurance product. The biggest concrete gaps, grounded in the shipped code and the 2026 threat landscape: (1) the "runtime monitor" is only a static deny-rule + rate-limit PreToolUse evaluator — there is no behavioral baselining, intent-drift detection, or live taint propagation across actual tool calls; (2) taint tracking is single-file static only, not cross-tool-call / cross-session data-flow; (3) autofix has no verification loop (applies string transforms, never re-scans to prove the finding is gone and nothing new was introduced); (4) zero coverage of non-human/agent identity, least-privilege token scoping, or OAuth/credential-flow tracing (the fastest-growing 2026 risk per CSA/OWASP NHI work); (5) no MCP provenance attestation / signed lockfile (supply-chain is detection + npm metadata, not cryptographic attestation); (6) no A2A / multi-agent / agent-to-agent protocol coverage; (7) no hosted continuous-scanning dashboard with fleet trend lines (evidence-pack fleet exists as CLI, but no SaaS); (8) community rule-pack loader is requested (issue #101) but unbuilt. Each maps cleanly to ECC Pro / ECC Tools monetization because they require hosting, threat-intel feeds, or org-fleet state that an OSS CLI can't carry.
|
||||
|
||||
Notable gaps vs us (missing today):
|
||||
|
||||
- **Autofix with verification loop (re-scan + no-regression proof)** — OSS gets verify-after-fix locally. Pro gets autofix-as-PR via ECC Tools GitHub App: open a remediation PR, run the verified re-scan in CI, attach the before/after evidence pack, and auto-merge on green — a paid managed-remediation workflow.
|
||||
- **Agent identity, least-privilege, and non-human-identity (NHI) governance** — Enterprise policy-pack feature: ship least-privilege scoring + token-rotation/age gates as a 'regulated/enterprise' Pro policy pack, and a hosted NHI inventory across the org's repos in ECC Tools (fleet-level identity sprawl map).
|
||||
- **Agent-to-agent (A2A) and multi-agent topology security** — Premium control-pane integration: render the org's multi-agent delegation graph with privilege-escalation paths highlighted, sold alongside ECC 2.0 control pane / Layer 4 proximity as a paid org-fleet visualization.
|
||||
- **Community/external rule-pack loader (--rule-pack)** — OSS gets the loader + local packs. Pro gets a curated, signed, continuously-updated commercial rule-pack feed (the CVE/known-malicious-MCP intel from the supply-chain item), turning detections into a subscription.
|
||||
|
||||
### devtool-demand-gaps
|
||||
|
||||
Across SAST/SCA tools (Snyk, CodeQL, Semgrep, SonarQube, Dependabot) the dominant 2026 developer complaint is not detection but triage: alert fatigue, false positives, and low-value PRs. A Go maintainer publicly called Dependabot a "noise machine"; teams report spending more time triaging Snyk SCA alerts than fixing issues; CodeQL FP-heavy unit-test flags and a postback-on-dismiss UX push developers to ignore alerts entirely. The clear demand is for low-noise, context-aware, PR-time findings with autofix and SARIF/compliance output. For AI-agent codebases specifically, two new direct competitors emerged: Snyk Agent Scan (Open Preview, May 2026 — CLI + background MDM/CrowdStrike mode, cloud-backed, sends tool metadata off-machine) and DryRun Security (contextual NL code policies in PRs, feeds Claude/Cursor/Codex). AgentShield already ships much of what the market asks for in agent-config security: 102 rules, SARIF, GitHub Action, autofix (--fix/remediation), evidence packs, supply-chain checks, runtimeConfidence FP weighting, a local runtime hook-enforcement layer (runtime.ndjson) and a watch/drift detector. The biggest unmet, monetizable gaps are: (1) a hosted Sentry-style aggregated dashboard + agent runtime telemetry (error/tool-failure/cost/drift across many repos and machines) — nobody unifies config-scan + runtime observability; (2) true inline PR-comment review (AgentShield's Action fails CI and emits SARIF but does not post sticky inline comments like DryRun/Aikido); (3) IDE/editor integration (Cursor/Windsurf/VS Code/Claude Code) so findings and fixes land where agents code; (4) natural-language custom org policies (DryRun-style) beyond the current JSON policy presets; (5) compliance/evidence packs mapped to SOC2/PCI frameworks as a paid Pro deliverable. AgentShield's local-first, no-data-leaves-machine posture is a concrete differentiator against Snyk Agent Scan's cloud metadata transmission and a privacy selling point for regulated buyers.
|
||||
|
||||
Notable gaps vs us (missing today):
|
||||
|
||||
- **IDE/editor integration — findings and fixes where agents actually write code** — Ship a VS Code/Cursor extension (and a Claude Code skill already exists via ecc:security-scan) that lints agent configs on save, shows findings inline, and offers fixes — gated behind Pro for org policy sync. Builds on existing harness-adapters; meets developers in the editor where Snyk Agent Scan (CLI/MDM) does not.
|
||||
|
||||
> Note: a fourth research thread (recent agentic/MCP CVEs) was blocked by an automated
|
||||
> usage-policy classifier on the raw "find vulnerabilities" prompt. The CVE-database refresh
|
||||
> need it would have covered is captured under the rule-pack + intel-feed item, and will be
|
||||
> handled as a scoped, defensive OSV/GHSA/NVD sync rather than free-form vulnerability research.
|
||||
|
||||
## Appendix: open PR / issue triage
|
||||
|
||||
### affaan-m/ECC
|
||||
|
||||
| Disposition | Ref | Title |
|
||||
| --- | --- | --- |
|
||||
| merge | PR #2320 | feat(control-pane): 3D agent-airspace viz + /api/proximity feed (Layer 4 observability) |
|
||||
| merge | PR #2133 | fix(llm): align Claude provider with current Anthropic API |
|
||||
| needs-work | PR #2274 | fix(gateguard): make fact-force checklist tool-agnostic |
|
||||
| merge | PR #2307 | fix(tests): resolve 10 failing tests on Windows |
|
||||
| merge | PR #2293 | chore(deps): bump npm-minor-and-patch group (5 updates) |
|
||||
| needs-work | PR #2260 | chore(deps-dev): bump eslint 9.39.2 to 10.5.0 |
|
||||
| triage-later | PR #2319 | feat: add ecc-recipes skill |
|
||||
| needs-work | PR #2318 | feat: add OpenSpec ecosystem (5 agents, 2 orchestration skills, 3 integrations) |
|
||||
| needs-work | PR #2315 | feat(skills): add 10 custom local skills |
|
||||
| triage-later | PR #2314 | feat(skills): add quant-trading-systems skill |
|
||||
| close | PR #2313 | Add Pylint workflow for Python code analysis |
|
||||
| merge | PR #2312 | fix(opencode): sync plugin metadata counts |
|
||||
| triage-later | PR #2311 | feat(skills): add story-lifecycle skill |
|
||||
| triage-later | PR #2310 | feat(skills): add project-context skill |
|
||||
| triage-later | PR #2309 | feat(skills): add dev-team skill (multi-persona session) |
|
||||
| needs-work | PR #2287 | refactor: migrate .kiro.hook files to JSON v1 format |
|
||||
| triage-later | PR #2285 | feat(agents): add nuxt-reviewer and /nuxt-review surface |
|
||||
| triage-later | PR #2281 | feat: add council-multi-model skill (heterogeneous Codex review) |
|
||||
| triage-later | PR #2280 | feat: add AL/Business Central language pack |
|
||||
| triage-later | PR #2277 | Add living-docs-governance skill |
|
||||
| triage-later | PR #2275 | feat(rules,skills): React Native / Expo rules pack + react-native-patterns skill |
|
||||
| merge | PR #2273 | docs(code-tour): document the ref field |
|
||||
| needs-work | PR #2270 | fix(omp): harden harness contract |
|
||||
| needs-work | PR #2264 | Harden release automation 6097857685862934372 |
|
||||
| needs-work | PR #2254 | [codex] add everything codex plugin alias |
|
||||
| merge | PR #2246 | docs(commands): generate discoverable <name>/SKILL.md skills not inert flat files |
|
||||
| needs-work | PR #2154 | feat: add Kimi Code CLI support |
|
||||
| close | PR #2137 | feat: add ULTRA CODE self-evolving operator SOP |
|
||||
| needs-work | PR #2136 | Add opt-in AURA trust-check adapter (integrations/aura) |
|
||||
| merge | PR #2063 | fix(instinct-cli): pin file reads and stdout to UTF-8 on Windows |
|
||||
| merge | issue #2316 | plan-orchestrate: stale ECC install detection after marketplace rename to ecc@ecc |
|
||||
| triage-later | issue #2308 | feat: add dev-team, project-context, story-lifecycle community skills |
|
||||
| merge | issue #2306 | docs: Scope Decision Guide table duplicated in SKILL.md and observer.md with drift |
|
||||
| merge | issue #2305 | chore: unused 'from unittest import mock' in test\_parse\_instinct.py |
|
||||
| triage-later | issue #2304 | chore: three naming conventions coexist in continuous-learning-v2 shell scripts |
|
||||
| triage-later | issue #2303 | chore: inconsistent shebangs across continuous-learning-v2 shell scripts |
|
||||
| merge | issue #2302 | test: add coverage for cmd\_prune, projects delete/gc/merge, \_promote\_specific dry-run, |
|
||||
| merge | issue #2301 | bug: migrate-homunculus.sh pgrep pattern treats $HOME as regex |
|
||||
| merge | issue #2300 | bug: SIGALRM handler silently drops in-flight observations in observe.sh |
|
||||
| merge | issue #2299 | bug: Python \_update\_registry omits 'id' field present in shell counterpart |
|
||||
| merge | issue #2298 | bug: observer.md says 'each instance >= 0.8' but code uses average confidence |
|
||||
| security-priority | issue #2297 | bug: \_remove\_project\_storage lacks path containment check |
|
||||
| needs-work | issue #2296 | bug: signal counter race condition in observe.sh throttle logic |
|
||||
| merge | issue #2295 | fix: replace hardcoded sleep 2 with PID file poll in start-observer.sh |
|
||||
| security-priority | issue #2294 | fix: \_write\_registry missing file lock (race with \_update\_registry) |
|
||||
| merge | issue #2293-dup | (see PR #2293) |
|
||||
| triage-later | issue #2283 | OpenSpec Ecosystem: spec-miner lifecycle extension (5 agents + 3 integrations + CI) |
|
||||
| triage-later | issue #2112 | ctx — potential synergy between ECC and ctx |
|
||||
| triage-later | issue #2103 | Skill proposal: Before You Build Skill |
|
||||
| needs-work | issue #2076 | OpenClaude Compatibility |
|
||||
| needs-work | issue #2074 | Frequent 'bun: command not found' Error in OpenCode TUI (Windows) |
|
||||
| needs-work | issue #2073 | Do agents/*.md need TOML rewrite for Codex subagent recognition? |
|
||||
| triage-later | issue #2069 | Featured ECC in a Medium article — request to add to README and reshare |
|
||||
| triage-later | PR #2288 | feat(skills): add mailtrap-email-integration skill |
|
||||
|
||||
Triaged all open PRs (30) and issues (24) on affaan-m/ECC. MERGE-READY (clean, correct, mergeable): PR #2320 (maintainer's Layer 4 control-pane 3D viz — top Pro/MRR value), PR #2133 (Claude provider model-ID + adaptive-thinking fix, verified correct against the authoritative Claude API reference — sonnet-4-6/haiku-4-5/opus-4-8, omit temperature, adaptive thinking for Opus 4.7/4.8), PR #2307 + #2063 (Windows fixes), PR #2273/#2246/#2312 (docs/workflow fixes), PR #2293 (dependabot minor/patch). Plus several quick-win issues in continuous-learning-v2 (#2306, #2305, #2302, #2301, #2299, #2298, #2295, #2300) and #2316 (plan-orchestrate stale install detection). SECURITY-PRIORITY: issue #2297 (path traversal — shutil.rmtree without containment check) and issue #2294 (registry write without file lock → corruption) in skills/continuous-learning-v2/scripts/instinct-cli.py. Both should be fixed as a hardening pass. PR #2136 (AURA external trust integration) needs a security review of its third-party dependency. NEEDS-WORK (rebase/scope/review): PR #2274 (gateguard tool-agnostic fix — correct but CONFLICTING), PR #2270 (OMP — +3151/-454, CONFLICTING, scope creep into release automation; split it), PR #2318/#2315/#2154 (large skill/harness drops needing catalog sync + per-item review), PR #2260 (eslint 9→10 major bump — verify before merge), drafts #2264/#2254, plus needs-info issues #2076/#2074/#2073. CLOSE candidates: PR #2313 (empty template, likely conflicts with existing python review), PR #2137 (vague 'ULTRA CODE self-evolving SOP', CONFLICTING, AI-slop). TRIAGE-LATER: the three BMAD-inspired community skills (#2309/#2310/#2311 under tracking issue #2308) and assorted new-skill PRs (#2319, #2314, #2281, #2280, #2277, #2275, #2288, #2285) — all need overlap/dedup review against the existing 200+ skill catalog and manifest sync. Issue #2069 is a marketing reshare request (route to content; note ECC was 'featured', not a winner). Pro/MRR-relevant cluster: control-pane Layer 4 (#2320), harness-neutral expansion (Kimi #2154, Codex alias #2254, OpenClaude/Codex compat #2076/#2073), multi-model orchestration skills (#2281, #2318), and continuous-learning reliability/security (#2294/#2297/#2300).
|
||||
|
||||
### affaan-m/agentshield
|
||||
|
||||
| Disposition | Ref | Title |
|
||||
| --- | --- | --- |
|
||||
| merge | PR #103 | fix: treat dangerous flags inside permissions.deny/ask rules as prohibitions, not usages |
|
||||
| merge | issue #102 | False positive: permissions.deny rules blocking --no-verify flagged CRITICAL, zeroing Perm |
|
||||
| needs-work | issue #100 | False positives: --no-verify in string literals (CRITICAL) and 'backward ...' English flag |
|
||||
| triage-later | issue #101 | Proposal: external rule-pack loader (--rule-pack) to load community detection rules |
|
||||
| merge | PR #97 | docs: Add FAQ section for common questions |
|
||||
| needs-work | PR #96 | chore(deps-dev): bump vitest from 3.2.4 to 4.1.8 |
|
||||
| close | issue #99 | bm |
|
||||
|
||||
7 open items on affaan-m/agentshield: 3 PRs (#103, #97, #96) and 4 issues (#102, #101, #100, #99). The headline is the false-positive cluster (#100, #102, #99-adjacent) where the scanner flags --no-verify inside permissions.deny rules as CRITICAL and zeros the Permissions score — penalizing its own recommended remediation. PR #103 cleanly fixes the structurally-decidable JSON case (#102) with fail-closed logic, 6 new tests, and all review-bot checks green; recommend MERGE as the top trust/conversion win. #100 covers two remaining FPs (--no-verify in string literals + 'backward' English matched as reversed-text in agents.ts:1561) not addressed by #103 — needs-work follow-up. #101 (external --rule-pack loader, ATR integration) is a high-value ecosystem/Pro proposal, well-scoped, recommend triage-later with intent to accept the PR. #97 (README FAQ) is mergeable docs. #96 (vitest 3→4) has a real test failure (renderTerminalAlert assertion under vitest 4) and needs work before merge. #99 ('bm', empty body) is spam — close. Notable caveat: PR #103's checks are only review bots (CodeRabbit/Greptile/GitGuardian); the Verify/test matrix does not appear to have run, so maintainer should confirm the suite passes locally before merge.
|
||||
@@ -0,0 +1,327 @@
|
||||
# ECC Pro: Hosted Multi-Repo Agent Security Posture Dashboard
|
||||
|
||||
> Status: draft design for review. Produced 2026-06-21 by an architecture agent grounded
|
||||
> in the existing ecc-agentshield primitives. Proposes the hosted ECC Pro surface; does not
|
||||
> implement it. Companion to docs/ECC-PRO-SECURITY-ROADMAP.md (the "next" flagship item).
|
||||
|
||||
## 1. Title, Thesis, and Wedge
|
||||
|
||||
ECC Pro is a hosted, authenticated, multi-repo "Sentry for agent security" surface built on top of the existing `ecc-agentshield` local CLI primitives. AgentShield already does ~30K npm downloads/month with near-zero monetization. The thesis: the continuous and fleet primitives that make a hosted product valuable already exist as local CLI building blocks (evidence packs with `bundleDigest` integrity, `operatorReadback`/`reviewItems` promotion routing, `fs.watch` drift detection, NDJSON runtime allow/block logging, baseline diffing, and policy promotion gates). The fastest path to MRR is not new science; it is hosting these primitives as authenticated multi-repo and multi-org surfaces and unifying config-scan posture with runtime telemetry over time.
|
||||
|
||||
The wedge: Snyk and similar SCA tools are scan-only and have no concept of agent-runtime semantics (no PreToolUse deny decisions, no MCP/hook/agent injection model, no drift-over-time on agent config). Sentry has time-series and alerting but zero security semantics; it does not know what a hardcoded `sk-ant-` key, a `Bash(*)` allow rule, or an `autoApprove` MCP server is. CodeRabbit reviews PR diffs but is point-in-time and has no fleet posture rollup or runtime block-rate trend. ECC Pro is the only surface that charts `score` trend, `drift` history, `blocked-command` rate, and `injection-attempt` rate across a fleet of repos, anchored on a security-specific rule engine (102 rules across secrets/permissions/hooks/mcp/agents) that nobody else has. AgentShield was featured at the Cerebral Valley x Anthropic Claude Code Hackathon (Feb 2026); the hosted surface is the commercial extension of that featured tooling.
|
||||
|
||||
## 2. Scope: Free Local-First vs Pro Hosted
|
||||
|
||||
The free local-first scanner stays the moat. We never paywall the scanner itself; we monetize hosting, history, and multi-repo aggregation. Local-first capability is also what produces the redacted, integrity-checked artifacts the hosted product ingests, so a strong free tier directly grows the funnel.
|
||||
|
||||
Free, zero-account, local-only (unchanged, MIT):
|
||||
- `agentshield scan` and all 102 rules, `--format terminal|json|markdown|html|sarif`.
|
||||
- `--fix`, `agentshield init`, `--opus` deep analysis (user supplies their own `ANTHROPIC_API_KEY`).
|
||||
- `--evidence-pack <dir>`, `evidence-pack verify|inspect|fleet` (local fleet routing stays free).
|
||||
- `--baseline`, `--save-baseline`, `agentshield baseline write`, `--gate`.
|
||||
- `agentshield runtime install|status|repair`, local `runtime.ndjson` logging.
|
||||
- `agentshield policy init|export|promote`, all 6 policy packs (`oss`, `team`, `enterprise`, `regulated`, `high-risk-hooks-mcp`, `ci-enforcement`).
|
||||
- Local `agentshield watch` (fs.watch drift, terminal/webhook alerts).
|
||||
- GitHub Action `affaan-m/agentshield@v1` (CI scanning, SARIF upload, baseline gate).
|
||||
- MiniClaw local server.
|
||||
|
||||
Pro, hosted, account-required (the recurring-revenue surface):
|
||||
- Persisted history: every scan/baseline/drift/runtime event retained and charted over time (free CLI is point-in-time and stateless on the local box).
|
||||
- Multi-repo and org rollup: cross-repo posture, fleet `operatorReadback` aggregation, org-level score trend.
|
||||
- Authenticated ingestion endpoints for CI scan results, `runtime.ndjson` streaming, and watch/drift events.
|
||||
- Hosted dashboard frontend (posture, drift timeline, blocked-command rate, injection-attempt rate, secret-exposure events).
|
||||
- Hosted alerting and routing: turn `reviewItems` into assignable tickets, deliver to Slack/Linear/GitHub via the ecc-tools GitHub App.
|
||||
- RBAC, audit log, retention/compliance, SSO (Enterprise).
|
||||
- Hosted policy promotion gate: org-level promotion approval workflow on top of `policy promote` `reviewItems`.
|
||||
|
||||
The hard line: anything that runs against local files and produces a redacted artifact stays free. Anything that stores, aggregates, charts, or routes across repos/time/people is Pro. We never require an account to find a vulnerability; we require one to track a fleet of them over time.
|
||||
|
||||
## 3. Architecture
|
||||
|
||||
The hosted backend is a thin, stateless ingestion and query layer over the existing artifact shapes. The CLI/Action/App remain the producers; the backend never re-implements scanning. It receives already-redacted artifacts (the CLI redacts paths/usernames/emails/tokens by default in `createRedactor`/`buildReplacements`) and persists summaries plus time-series rollups.
|
||||
|
||||
Component diagram (ASCII):
|
||||
|
||||
```
|
||||
PRODUCERS (free, local-first, already redacted)
|
||||
+-----------------------+ +------------------------+ +-------------------------+
|
||||
| GitHub Action | | agentshield watch | | runtime PreToolUse hook |
|
||||
| (CI scan + evidence | | (fs.watch, diffBaseline,| | (evaluateToolCall -> |
|
||||
| pack, SARIF, baseline)| | DriftResult, webhook) | | runtime.ndjson) |
|
||||
+-----------+-----------+ +-----------+------------+ +-----------+-------------+
|
||||
| | |
|
||||
| POST evidence-pack | POST drift event | POST/stream ndjson batch
|
||||
| summary + manifest digest | (DriftResult) | (RuntimeLogEntry[])
|
||||
v v v
|
||||
+-----------------------------------------------------------------------------------+
|
||||
| INGESTION GATEWAY (stateless, authenticated) |
|
||||
| - API token auth + org/repo identity resolution |
|
||||
| - schema validation (Zod, reuse SecurityReport / DriftResult / RuntimeLogEntry) |
|
||||
| - bundleDigest re-verification, idempotency on digest |
|
||||
| - reject-if-not-redacted guard (manifest.redacted must be true for hosted) |
|
||||
+-----------------------------------+-----------------------------------------------+
|
||||
|
|
||||
+--------------------+--------------------+
|
||||
v v
|
||||
+-----------------------------+ +-------------------------------+
|
||||
| PRIMARY STORE (Postgres) | | TIME-SERIES ROLLUP STORE |
|
||||
| org, repo, scan, baseline, | | score_trend, drift_history, |
|
||||
| finding, runtime_event, | rollup job | blocked_cmd_rate, |
|
||||
| drift_event, policy_eval, |------------->| injection_rate, secret_events |
|
||||
| evidence_pack, review_item | | (Postgres time buckets or |
|
||||
+--------------+--------------+ | ClickHouse for high-volume |
|
||||
| | runtime ndjson) |
|
||||
| +---------------+----------------+
|
||||
| |
|
||||
v v
|
||||
+-----------------------------------------------------------------------------------+
|
||||
| QUERY API (authenticated, RBAC-filtered, multi-tenant isolated by org_id) |
|
||||
+-----------------------------------+-----------------------------------------------+
|
||||
|
|
||||
v
|
||||
+-----------------------------+ +-------------------------------------------+
|
||||
| DASHBOARD FRONTEND (Next.js) | | ROUTING/ALERTS (ecc-tools GitHub App, |
|
||||
| posture, trends, drift, fleet| | Slack/Linear) from reviewItems + tickets |
|
||||
+-----------------------------+ +-------------------------------------------+
|
||||
```
|
||||
|
||||
Ingestion sources and their existing producers:
|
||||
- CI scan results: GitHub Action already emits the full `SecurityReport` JSON, SARIF, and an evidence pack with `manifest.json` (`bundleDigest`, per-artifact `sha256`/`bytes`) plus `ci-context.json` (`EvidencePackGitHubContext`: `repository`, `sha`, `runId`, `workflow`, `ref`, `actor`). The Action gets a new optional input `ecc-pro-ingest-url` + token; on success it POSTs the inspected pack summary (`EvidencePackInspectionResult`) and the manifest digest.
|
||||
- Runtime telemetry: the PreToolUse hook (`evaluateToolCall` -> `logEvalResult`) writes `RuntimeLogEntry` lines to `.agentshield/runtime.ndjson`. A small `agentshield runtime ship` command (Pro) tails and batch-POSTs new NDJSON lines.
|
||||
- Watch/drift events: `startWatcher` already computes `DriftResult` and calls `dispatchAlert`. We add a `webhook` alert target that points at the hosted ingest endpoint; the existing `formatWebhookPayload` carries `newFindings`, `resolvedFindings`, `scoreDelta`, `isRegression`, `hasCritical`.
|
||||
|
||||
Storage choice: Postgres (Supabase) for the relational entities and most rollups; ClickHouse only if runtime NDJSON volume per org makes per-row retention in Postgres uneconomical (runtime events are append-only and high-cardinality, which is the ClickHouse sweet spot). Default MVP is Postgres-only.
|
||||
|
||||
## 4. API Contract
|
||||
|
||||
All endpoints are authenticated with an org-scoped API token (header `Authorization: Bearer eccp_...`). Request/response shapes reuse the real field names from the CLI so the producers do not need a translation layer. Ingestion is idempotent keyed on `bundleDigest` (scans) or `(repo_id, timestamp, tool, decision)` hash (runtime).
|
||||
|
||||
### 4.1 Ingest a scan / evidence pack summary
|
||||
|
||||
`POST /v1/ingest/scan`
|
||||
|
||||
The body is the existing `EvidencePackInspectionResult` plus the `ci-context` summary. The backend never asks for raw evidence; it consumes the already-computed inspection summary so it can re-derive the same rollups the local `evidence-pack inspect` produces.
|
||||
|
||||
Request:
|
||||
```json
|
||||
{
|
||||
"repository": "acme/agent-platform",
|
||||
"bundleDigest": "sha256:9f2c...e1",
|
||||
"expectedBundleDigest": "sha256:9f2c...e1",
|
||||
"generatedAt": "2026-06-21T17:42:00.000Z",
|
||||
"redacted": true,
|
||||
"report": {
|
||||
"score": { "grade": "C", "numericScore": 66 },
|
||||
"findings": { "total": 29, "critical": 1, "high": 7, "medium": 8, "low": 10, "info": 3 },
|
||||
"runtimeConfidence": { "active-runtime": 11, "template-example": 14, "project-local-optional": 4 }
|
||||
},
|
||||
"policy": { "status": "failed", "policyPack": "enterprise", "violations": 3 },
|
||||
"baseline": { "status": "regressed", "newFindings": 4, "resolvedFindings": 1, "scoreDelta": -8 },
|
||||
"supplyChain": { "totalPackages": 22, "riskyPackages": 2, "criticalCount": 0, "highCount": 1 },
|
||||
"ciContext": {
|
||||
"provider": "github-actions",
|
||||
"repository": "acme/agent-platform",
|
||||
"workflow": "security.yml",
|
||||
"runId": "1182334455",
|
||||
"sha": "4c1d9ab"
|
||||
},
|
||||
"remediation": { "totalFindings": 29, "autoFixable": 2, "manualReview": 7 }
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"ok": true,
|
||||
"scanId": "scan_01J...",
|
||||
"repoId": "repo_01H...",
|
||||
"ingestedAt": "2026-06-21T17:42:03.114Z",
|
||||
"deduped": false,
|
||||
"rollupsUpdated": ["score_trend", "drift_history", "secret_exposure_events"]
|
||||
}
|
||||
```
|
||||
|
||||
Server-side guards: reject with `422` if `redacted !== true` (hosted tenants must never store unredacted bundles), and reject with `409 deduped` echo if `bundleDigest` already ingested for that repo. If `expectedBundleDigest` is present and differs from `bundleDigest`, mark `integrity: "mismatch"` on the stored scan.
|
||||
|
||||
### 4.2 Ingest runtime telemetry batch
|
||||
|
||||
`POST /v1/ingest/runtime`
|
||||
|
||||
Body is an array of the existing `RuntimeLogEntry` shape from `src/runtime/types.ts`.
|
||||
|
||||
Request:
|
||||
```json
|
||||
{
|
||||
"repository": "acme/agent-platform",
|
||||
"sessionId": "sess_4f8a",
|
||||
"entries": [
|
||||
{ "timestamp": "2026-06-21T17:50:01.002Z", "tool": "Bash", "decision": "block", "reason": "Input matches denied pattern \"rm -rf\"", "durationMs": 2 },
|
||||
{ "timestamp": "2026-06-21T17:50:02.114Z", "tool": "Read", "decision": "allow", "durationMs": 1 }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{ "ok": true, "accepted": 2, "blocked": 1, "allowed": 1, "rollupsUpdated": ["blocked_command_rate"] }
|
||||
```
|
||||
|
||||
Note: `RuntimeLogEntry` already carries no raw input payload (only `tool`, `decision`, `reason`, `durationMs`), so runtime ingestion is safe-by-construction. We keep it that way; the hosted API must not add a raw-input field.
|
||||
|
||||
### 4.3 Ingest a drift event
|
||||
|
||||
`POST /v1/ingest/drift`
|
||||
|
||||
Body is the existing `DriftResult` from `src/watch/types.ts` (already what `formatWebhookPayload` emits).
|
||||
|
||||
Request:
|
||||
```json
|
||||
{
|
||||
"repository": "acme/agent-platform",
|
||||
"timestamp": "2026-06-21T18:01:10.000Z",
|
||||
"newFindings": [ { "id": "secrets-hardcoded-anthropic", "severity": "critical", "category": "secrets", "title": "Hardcoded Anthropic API key", "file": "<target-path>/CLAUDE.md" } ],
|
||||
"resolvedFindings": [],
|
||||
"scoreDelta": -25,
|
||||
"previousScore": 66,
|
||||
"currentScore": 41,
|
||||
"isRegression": true,
|
||||
"hasCritical": true
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{ "ok": true, "driftEventId": "drift_01J...", "alertRouted": true }
|
||||
```
|
||||
|
||||
### 4.4 Query: org fleet rollup
|
||||
|
||||
`GET /v1/org/{orgId}/fleet`
|
||||
|
||||
Response reuses the `EvidencePackFleetInspectionResult` `operatorReadback` shape so the dashboard and the existing `evidence-pack fleet` consumers share one contract:
|
||||
```json
|
||||
{
|
||||
"ok": false,
|
||||
"requiresAttention": true,
|
||||
"summary": { "totalPacks": 12, "verifiedPacks": 11, "invalidPacks": 1, "critical": 2, "high": 9, "policyFailures": 3, "baselineRegressions": 2, "riskyPackages": 5 },
|
||||
"operatorReadback": {
|
||||
"status": "blocked",
|
||||
"ready": false,
|
||||
"requiresApproval": true,
|
||||
"digest": "sha256:aa17...",
|
||||
"reviewItemCount": 5,
|
||||
"blockingItemCount": 2,
|
||||
"ownerCount": 3,
|
||||
"owners": ["acme/agent-platform security owner"],
|
||||
"routesRequiringApproval": ["policy-review", "security-blocker"],
|
||||
"approvalIds": ["agsr_2b1c8f0d9e7a4c11"],
|
||||
"nextAction": "Route review items to listed owners and attach approval before promotion."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4.5 Query: per-repo posture and trend
|
||||
|
||||
`GET /v1/repo/{repoId}/posture?from=...&to=...&bucket=day`
|
||||
Returns `score_trend`, latest `EvidencePackInspectionResult`, latest `DriftResult`, and runtime rollups for the window.
|
||||
|
||||
### 4.6 Query: review items (routing)
|
||||
|
||||
`GET /v1/repo/{repoId}/review-items`
|
||||
Returns the existing `EvidencePackFleetReviewItem[]` (route, severity, priority, `approvalId`, `owner`, `evidencePaths`, `beforeState`, `afterState`, `reversibleAction`, `actions`, `recommendation`, and the Linear-friendly `ticket.externalId`). These map one-to-one to assignable hosted tickets; no new schema needed.
|
||||
|
||||
## 5. Data Model
|
||||
|
||||
Persisted relational entities (Postgres). All carry `org_id` for tenant isolation; all timestamps are ISO-8601 UTC.
|
||||
|
||||
- `org`: `id`, `name`, `github_org_login`, `plan` (`team` | `enterprise`), `created_at`, `sso_enabled`.
|
||||
- `repo`: `id`, `org_id`, `full_name` (e.g. `acme/agent-platform`), `github_repo_id` (from `EvidencePackGitHubContext.repositoryId`), `default_provider` (`github-actions` | `local`), `created_at`.
|
||||
- `scan`: `id`, `repo_id`, `bundle_digest` (unique per repo, idempotency key), `generated_at`, `redacted`, `grade`, `numeric_score`, `score_breakdown` (jsonb: secrets/permissions/hooks/mcp/agents), `total_findings`, `critical/high/medium/low/info`, `provider`, `ci_sha`, `ci_run_id`, `ci_workflow`, `integrity` (`ok` | `mismatch`).
|
||||
- `finding`: `id`, `scan_id`, `finding_key` (the `Finding.id`, e.g. `mcp-risky-filesystem`), `severity`, `category` (`FindingCategory`), `title`, `file` (already redacted to `<target-path>` form), `runtime_confidence` (`RuntimeConfidence`), `fingerprint` (reuse `fingerprintFinding` so the same finding across scans collapses to one timeline). Never store `evidence` raw for hosted; store only the redacted `file` and `title`.
|
||||
- `baseline`: `id`, `repo_id`, `baseline_timestamp`, `numeric_score`, `finding_count`, `source_scan_id`. Mirrors `SerializedBaseline` (`version`, `timestamp`, `score`, `findings` with `fingerprint`).
|
||||
- `baseline_comparison`: `id`, `repo_id`, `scan_id`, `is_regression`, `new_findings_count`, `resolved_findings_count`, `unchanged_count`, `score_delta`, `new_critical_count`, `new_high_count` (the `BaselineComparison` shape).
|
||||
- `runtime_event`: `id`, `repo_id`, `session_id`, `timestamp`, `tool`, `decision` (`allow` | `block`), `reason`, `duration_ms` (the `RuntimeLogEntry` shape; high-volume, candidate for ClickHouse).
|
||||
- `drift_event`: `id`, `repo_id`, `timestamp`, `score_delta`, `previous_score`, `current_score`, `is_regression`, `has_critical`, `new_findings` (jsonb summary), `resolved_findings` (jsonb summary) (the `DriftResult` shape).
|
||||
- `policy_eval`: `id`, `scan_id`, `policy_name`, `policy_pack` (`PolicyPack`), `passed`, `violation_count`, `score`, `min_score`, `exception_summary` (jsonb: `total`/`active`/`expiringSoon`/`expired` from `PolicyExceptionSummary`). Mirrors `PolicyEvaluation`.
|
||||
- `evidence_pack`: `id`, `scan_id`, `bundle_digest`, `expected_bundle_digest`, `artifact_count`, `verified_artifact_count`, `redacted`, `generated_at`. Mirrors `EvidencePackInspectionResult`.
|
||||
- `review_item`: `id`, `repo_id`, `approval_id` (the `agsr_...` id), `route` (`EvidencePackFleetRoute`), `severity`, `priority`, `owner`, `recommendation`, `ticket_external_id`, `status` (`open` | `approved` | `dismissed`), `assignee`. Mirrors `EvidencePackFleetReviewItem`.
|
||||
|
||||
Time-series rollups to chart (materialized from the entities above, bucketed by hour/day/week):
|
||||
- `score_trend`: per repo and org-aggregate `numeric_score` and `grade` over time (from `scan.numeric_score`). The headline chart.
|
||||
- `drift_history`: count and severity of `drift_event` regressions over time, with `score_delta` band. Answers "is this repo's agent posture decaying?".
|
||||
- `blocked_command_rate`: `runtime_event` where `decision = block` over total, per tool, over time. The "Sentry-style" live signal nobody else has.
|
||||
- `injection_attempt_rate`: count of blocked runtime events whose `reason` matches injection deny patterns, plus scan findings with `category = injection`, over time.
|
||||
- `secret_exposure_events`: timeline of `finding` rows with `category = secrets` and `severity = critical` (e.g. `secrets-hardcoded-*`), de-duplicated by `fingerprint`, so a recurring committed key shows as one persistent event until resolved.
|
||||
- `cross_repo_org_rollup`: org-level fold of `score_trend`, open `review_item` count by `route`, `policyFailures`, and `baselineRegressions` (the `EvidencePackFleetSummary` fields), feeding the `operatorReadback.status` badge at org scope.
|
||||
|
||||
## 6. Auth Model
|
||||
|
||||
Identity and tenancy:
|
||||
- Org is the top-level tenant, anchored to a GitHub org login (the ecc-tools GitHub App install scope is the natural onboarding boundary). `repo` rows are children of exactly one `org`; `github_repo_id` from `EvidencePackGitHubContext.repositoryId` is the stable external key.
|
||||
- Multi-tenant isolation: every row carries `org_id`. On Supabase Postgres, enforce Row Level Security so every query is filtered by the caller's `org_id`; the query API never accepts a client-supplied `org_id` that is not in the caller's token claims. No cross-org joins exist in any query path.
|
||||
|
||||
API tokens:
|
||||
- Org-scoped ingestion tokens (`eccp_...`) are minted per org and optionally per repo. Tokens are hashed at rest (store only a SHA-256 of the token, never the token), shown once on creation. CI uses a repo-scoped token in GitHub Actions secrets; runtime/watch shippers use the same.
|
||||
- Tokens have a `scope` (`ingest:scan`, `ingest:runtime`, `ingest:drift`, `read`) so a CI token cannot read the dashboard API and a read token cannot write.
|
||||
|
||||
RBAC tiers (per org):
|
||||
- `owner`: billing, SSO config, token management, member management, policy promotion approval.
|
||||
- `admin`: token management, review-item assignment, alert routing config.
|
||||
- `member`: view all posture, assign review items to self, comment.
|
||||
- `viewer`: read-only posture and trends (auditor / buyer-review persona).
|
||||
|
||||
Prohibited handling (hard requirements, enforced server-side):
|
||||
- Never store raw secrets. The CLI already redacts paths, usernames, emails, and token-shaped strings by default via `createRedactor`/`buildReplacements` (covers `sk-`, `gh*_`, `github_pat_`, `glpat-`, `npm_`, `AKIA`, JWT `eyJ...`, Slack tokens, emails, etc.). The ingestion gateway must reject any scan payload where `manifest.redacted` / `redacted` is not `true`. Preserve redaction end-to-end; the hosted store only ever holds the `<redacted-token>` / `<target-path>` / `<home>` / `<user>` forms.
|
||||
- `runtime_event` ingestion accepts only the `RuntimeLogEntry` fields (`tool`, `decision`, `reason`, `durationMs`); it must not accept raw tool `input`. The local `ToolCall.input` stays local.
|
||||
- Remediation plans and baselines already omit raw evidence and before/after token-shaped strings; preserve that omission in the hosted projection. Findings stored hosted carry redacted `file` + `title` + `fingerprint` only, never raw `evidence`.
|
||||
- Audit log: every token mint/revoke, review-item state change, and policy promotion approval is appended to an immutable per-org audit trail (defense-in-depth, least-privilege, secure-by-default).
|
||||
|
||||
## 7. MVP vs v2 vs v3 (Build Order)
|
||||
|
||||
MVP (smallest shippable Pro v1) -- "history + multi-repo posture for CI scans":
|
||||
1. Org/repo model, GitHub App (ecc-tools) install -> org/repo provisioning, org-scoped ingest tokens with RLS isolation.
|
||||
2. `POST /v1/ingest/scan` consuming `EvidencePackInspectionResult` + `ci-context`; persist `scan`, `finding`, `evidence_pack`, `policy_eval`, `baseline_comparison`; idempotent on `bundleDigest`; reject-if-not-redacted guard.
|
||||
3. GitHub Action gets `ecc-pro-ingest-url` + token inputs; on scan it POSTs the inspected summary.
|
||||
4. Dashboard v1: `score_trend` chart, per-repo finding table (severity + `runtimeConfidence` filter), org fleet table reusing `operatorReadback.status`.
|
||||
5. Stripe billing, Team plan ($19/seat/mo per the existing ecc-tools Pro listing), per-org token quota.
|
||||
|
||||
This is shippable because it only stitches existing artifacts to storage + a chart. No new scanning logic.
|
||||
|
||||
v2 -- "runtime telemetry + drift over time + routing":
|
||||
6. `POST /v1/ingest/runtime` + `agentshield runtime ship` shipper; `runtime_event` store; `blocked_command_rate` and `injection_attempt_rate` charts.
|
||||
7. `POST /v1/ingest/drift` + `watch` webhook target -> hosted; `drift_history` chart; `secret_exposure_events` timeline.
|
||||
8. `review_item` ingestion + assignable tickets, alert routing to Slack/Linear/GitHub via ecc-tools App, reusing `approvalId` and `ticket.externalId` for dedupe.
|
||||
|
||||
v3 -- "Enterprise governance":
|
||||
9. Hosted policy promotion gate: org approval workflow on top of `policy promote` `reviewItems`; required approvals before `operatorReadback.ready`.
|
||||
10. SSO/SAML, custom retention, audit-log export, per-org data residency; ClickHouse migration for runtime events if volume warrants.
|
||||
|
||||
## 8. Pricing and Packaging Hooks
|
||||
|
||||
- Team ($19/seat/mo, matches the current ecc-tools Pro listing): per-seat billing; included repo cap (e.g. 25 repos); 90-day history retention; scan + drift ingestion; Slack/GitHub routing; standard RBAC (owner/admin/member/viewer).
|
||||
- Enterprise (per-repo or platform-fee, sales-assisted): metered by `repo` count rather than seats because security platform value scales with fleet size, not headcount; unlimited seats; SSO/SAML; unlimited retention + audit-log export; hosted policy promotion approval gate; `regulated`/`enterprise` policy packs with required-approval enforcement; data residency.
|
||||
|
||||
Gating levers (what flips Team -> Enterprise): runtime telemetry retention window, number of repos under management, SSO requirement, policy-promotion approval workflow, audit-log export, and `routesRequiringApproval` enforcement (Enterprise can require that `operatorReadback.requiresApproval` blocks promotion; Team only surfaces it). Per-seat captures small teams; per-repo captures the platform-team buyer whose value is fleet breadth.
|
||||
|
||||
## 9. Risks and Open Questions
|
||||
|
||||
Risks:
|
||||
- Cannibalization: a too-generous hosted free tier could erode the local moat, or a too-aggressive paywall could stall the 30K/mo funnel. Mitigation: never paywall detection; only paywall persistence/aggregation/routing.
|
||||
- Redaction trust boundary: the hosted product's entire safety story depends on the CLI redactor being complete. A new token format the regex set misses would be ingested unredacted. Mitigation: reject-if-not-redacted is necessary but not sufficient; add a server-side secondary redaction pass over inbound `title`/`file`/`reason` strings as defense-in-depth, and keep `buildReplacements` patterns under test.
|
||||
- Runtime volume economics: `runtime.ndjson` can be high-cardinality per active agent; Postgres retention could get expensive. Mitigation: pre-aggregate to `blocked_command_rate` rollups on ingest and retain raw `runtime_event` only for the plan's window (ClickHouse for Enterprise).
|
||||
- Idempotency edge: `bundleDigest` excludes `manifest.json` and `README.md` (`BUNDLE_DIGEST_EXCLUDED_FILES`), so two scans with identical findings but different `generatedAt` produce the same digest. That is correct for dedupe but means we must key the time-series on `generatedAt`/`ci_run_id`, not on digest alone.
|
||||
|
||||
Open questions:
|
||||
- Should drift/runtime ingestion from purely local `watch`/runtime (no CI, `provider: "local"`) be allowed for Pro, given there is no GitHub-verifiable repo identity? Proposal: allow it but tag `provider: local` and require a repo-scoped token bound at mint time to a `full_name`.
|
||||
- Do we attribute runtime events to a GitHub identity (`ci-context.actor`) for per-developer block-rate, or keep them repo-anonymous for privacy? Leaning repo-anonymous by default with opt-in actor attribution.
|
||||
- Is org identity strictly GitHub-org-bound, or do we need a GitHub-independent org for GitLab/local-only users in v2? MVP is GitHub-org-bound via the ecc-tools App.
|
||||
- For the `injection_attempt_rate` chart, do we trust runtime `reason` string matching, or do we need a structured `matchedRule` field shipped from `EvalResult` (which has `matchedRule`) instead of only `RuntimeLogEntry` (which drops it)? Proposal: extend the runtime shipper to include `matchedRule` so injection attribution is structured, not string-parsed.
|
||||
|
||||
Relevant grounding files (all absolute):
|
||||
- `/Users/affoon/GitHub/ECC/agentshield/src/evidence-pack/index.ts` (`EvidencePackInspectionResult`, `EvidencePackFleetOperatorReadback`, `EvidencePackFleetReviewItem`, `bundleDigest`, `BUNDLE_DIGEST_EXCLUDED_FILES`, `createRedactor`, `buildReplacements`)
|
||||
- `/Users/affoon/GitHub/ECC/agentshield/src/runtime/types.ts` (`RuntimeLogEntry`, `EvalResult`, `RuntimePolicy`) and `/Users/affoon/GitHub/ECC/agentshield/src/runtime/evaluator.ts` (`evaluateToolCall`, `logEvalResult`)
|
||||
- `/Users/affoon/GitHub/ECC/agentshield/src/watch/types.ts` (`DriftResult`, `WatchConfig`) and `/Users/affoon/GitHub/ECC/agentshield/src/watch/index.ts` (`formatWebhookPayload`, `dispatchAlert`)
|
||||
- `/Users/affoon/GitHub/ECC/agentshield/src/baseline/types.ts` (`SerializedBaseline`, `SerializedFinding`, `BaselineComparison`) and `/Users/affoon/GitHub/ECC/agentshield/src/baseline/index.ts` (`fingerprintFinding`)
|
||||
- `/Users/affoon/GitHub/ECC/agentshield/src/policy/types.ts` (`PolicyEvaluation`, `PolicyPack`, `PolicyExceptionSummary`)
|
||||
- `/Users/affoon/GitHub/ECC/agentshield/src/types.ts` (`Finding`, `RuntimeConfidence`, `FindingCategory`, `SecurityReport`, `SecurityScore`)
|
||||
- `/Users/affoon/GitHub/ECC/agentshield/README.md` (GitHub Action inputs/outputs, ecc-tools GitHub App, `ecc-agentshield` npm, ECC Tools Pro $19/seat/mo)
|
||||
@@ -27,7 +27,6 @@ import ipaddress
|
||||
import socket
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from collections import defaultdict
|
||||
@@ -395,36 +394,22 @@ def detect_project() -> dict:
|
||||
}
|
||||
|
||||
|
||||
@contextmanager
|
||||
def _registry_lock():
|
||||
"""Serialize registry read-modify-write across concurrent sessions.
|
||||
|
||||
Acquires the same advisory lock for every registry writer (``_update_registry``
|
||||
and ``_write_registry``) so ``projects delete/gc/merge`` cannot interleave with
|
||||
a concurrent observe-time update and corrupt ``projects.json``. No-op on
|
||||
platforms without ``fcntl`` (Windows).
|
||||
"""
|
||||
REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_path = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.lock"
|
||||
lock_fd = None
|
||||
try:
|
||||
if _HAS_FCNTL:
|
||||
lock_fd = open(lock_path, "w")
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_EX)
|
||||
yield
|
||||
finally:
|
||||
if lock_fd is not None:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||
lock_fd.close()
|
||||
|
||||
|
||||
def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None:
|
||||
"""Update the projects.json registry.
|
||||
|
||||
Uses file locking (where available) to prevent concurrent sessions from
|
||||
overwriting each other's updates.
|
||||
"""
|
||||
with _registry_lock():
|
||||
REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
lock_path = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.lock"
|
||||
lock_fd = None
|
||||
|
||||
try:
|
||||
# Acquire advisory lock to serialize read-modify-write
|
||||
if _HAS_FCNTL:
|
||||
lock_fd = open(lock_path, "w")
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_EX)
|
||||
|
||||
try:
|
||||
with open(REGISTRY_FILE, encoding="utf-8") as f:
|
||||
registry = json.load(f)
|
||||
@@ -444,6 +429,10 @@ def _update_registry(pid: str, pname: str, proot: str, premote: str) -> None:
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_file, REGISTRY_FILE)
|
||||
finally:
|
||||
if lock_fd is not None:
|
||||
fcntl.flock(lock_fd, fcntl.LOCK_UN)
|
||||
lock_fd.close()
|
||||
|
||||
|
||||
def load_registry() -> dict:
|
||||
@@ -456,19 +445,15 @@ def load_registry() -> dict:
|
||||
|
||||
|
||||
def _write_registry(registry: dict) -> None:
|
||||
"""Write the project registry atomically.
|
||||
|
||||
Holds the same advisory lock as ``_update_registry`` so concurrent
|
||||
``projects delete/gc/merge`` and observe-time updates cannot corrupt the file.
|
||||
"""
|
||||
with _registry_lock():
|
||||
tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}"
|
||||
with open(tmp_file, "w", encoding="utf-8") as f:
|
||||
json.dump(registry, f, indent=2)
|
||||
f.write("\n")
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_file, REGISTRY_FILE)
|
||||
"""Write the project registry atomically."""
|
||||
REGISTRY_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_file = REGISTRY_FILE.parent / f".{REGISTRY_FILE.name}.tmp.{os.getpid()}"
|
||||
with open(tmp_file, "w", encoding="utf-8") as f:
|
||||
json.dump(registry, f, indent=2)
|
||||
f.write("\n")
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_file, REGISTRY_FILE)
|
||||
|
||||
|
||||
def _validate_project_id(project_id: str) -> bool:
|
||||
@@ -588,14 +573,7 @@ def _project_counts(project_id: str) -> dict:
|
||||
|
||||
|
||||
def _remove_project_storage(project_id: str) -> None:
|
||||
# Defense-in-depth: resolve and confirm the target is contained within
|
||||
# PROJECTS_DIR before recursively deleting, even though callers validate the
|
||||
# project id. A relaxed validator or a future caller must never be able to
|
||||
# turn this into an arbitrary-directory delete.
|
||||
projects_root = PROJECTS_DIR.resolve()
|
||||
project_dir = (PROJECTS_DIR / project_id).resolve()
|
||||
if project_dir == projects_root or projects_root not in project_dir.parents:
|
||||
raise ValueError(f"refusing to remove {project_dir}: escapes {projects_root}")
|
||||
project_dir = PROJECTS_DIR / project_id
|
||||
if project_dir.exists():
|
||||
shutil.rmtree(project_dir)
|
||||
|
||||
|
||||
@@ -46,8 +46,6 @@ load_registry = _mod.load_registry
|
||||
_validate_instinct_id = _mod._validate_instinct_id
|
||||
_validate_import_url = _mod._validate_import_url
|
||||
_update_registry = _mod._update_registry
|
||||
_write_registry = _mod._write_registry
|
||||
_remove_project_storage = _mod._remove_project_storage
|
||||
_confidence_bar = _mod._confidence_bar
|
||||
|
||||
|
||||
@@ -1045,41 +1043,3 @@ def test_update_registry_atomic_replaces_file(patch_globals):
|
||||
assert "abc123" in data
|
||||
leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
|
||||
assert leftovers == []
|
||||
|
||||
|
||||
def test_write_registry_atomic_no_tmp_leftovers(patch_globals):
|
||||
# Issue #2294: _write_registry now holds the registry lock like
|
||||
# _update_registry. It must still write atomically with no stray tmp files.
|
||||
tree = patch_globals
|
||||
_write_registry({"keep": {"name": "demo", "root": "/repo", "remote": ""}})
|
||||
data = json.loads(tree["registry_file"].read_text())
|
||||
assert data == {"keep": {"name": "demo", "root": "/repo", "remote": ""}}
|
||||
leftovers = list(tree["registry_file"].parent.glob(".projects.json.tmp.*"))
|
||||
assert leftovers == []
|
||||
|
||||
|
||||
def test_remove_project_storage_deletes_contained_dir(patch_globals):
|
||||
tree = patch_globals
|
||||
target = tree["projects_dir"] / "proj-1"
|
||||
(target / "instincts").mkdir(parents=True)
|
||||
(target / "instincts" / "x.md").write_text("hi", encoding="utf-8")
|
||||
_remove_project_storage("proj-1")
|
||||
assert not target.exists()
|
||||
|
||||
|
||||
def test_remove_project_storage_missing_dir_is_noop(patch_globals):
|
||||
# No raise when the contained dir simply does not exist.
|
||||
_remove_project_storage("never-created")
|
||||
|
||||
|
||||
def test_remove_project_storage_blocks_traversal(patch_globals):
|
||||
# Issue #2297: defense-in-depth — a traversal id must be refused even when a
|
||||
# caller skips _validate_project_id, so this can never delete outside
|
||||
# PROJECTS_DIR.
|
||||
with pytest.raises(ValueError):
|
||||
_remove_project_storage("../../etc")
|
||||
|
||||
|
||||
def test_remove_project_storage_blocks_root_itself(patch_globals):
|
||||
with pytest.raises(ValueError):
|
||||
_remove_project_storage(".")
|
||||
|
||||
Reference in New Issue
Block a user