Compare commits

..

5 Commits

Author SHA1 Message Date
Affaan Mustafa
8e55f4f117 feat(ecc2): implement agent status panel with Table widget (#773)
- Table widget with columns: ID, Agent, State, Branch, Tokens, Duration
- Color-coded states: green=Running, yellow=Idle, red=Failed, gray=Stopped, blue=Completed
- Summary bar with running/completed/failed counts
- Row selection highlighting
2026-03-24 03:54:15 -07:00
Affaan Mustafa
67306c22cd test: align antigravity manifest expectations 2026-03-24 03:50:46 -07:00
Affaan Mustafa
b2407ab3f5 fix(ecc2): sync catalog counts for scaffold CI 2026-03-24 03:43:48 -07:00
Affaan Mustafa
00dce30d3b feat: scaffold ECC 2.0 Rust TUI — agentic IDE control plane
Initial scaffold for ECC 2.0, a terminal-native agentic IDE built with
Ratatui. Compiles to a 3.4MB single binary.

Core modules:
- Session manager with SQLite-backed state store
- TUI dashboard with split-pane layout (sessions, output, metrics)
- Worktree orchestration (auto-create per agent session)
- Observability with tool call risk scoring
- Inter-agent communication via SQLite mailbox
- Background daemon with heartbeat monitoring
- CLI with start/stop/sessions/status/daemon subcommands

Tech stack: Rust + Ratatui + Crossterm + Tokio + rusqlite + git2 + clap
2026-03-24 03:43:05 -07:00
Affaan Mustafa
df4f2df297 feat: add 6 gap-closing skills — browser QA, design system, product lens, canary watch, benchmark, safety guard
Closes competitive gaps with gstack:
- browser-qa: automated visual testing via browser MCP
- design-system: generate, audit, and detect AI slop in UI
- product-lens: product diagnostic, founder review, feature prioritization
- canary-watch: post-deploy monitoring with alert thresholds
- benchmark: performance baseline and regression detection
- safety-guard: prevent destructive operations in autonomous sessions
2026-03-23 04:31:17 -07:00
36 changed files with 3948 additions and 471 deletions

View File

@@ -13,7 +13,7 @@
{
"name": "everything-claude-code",
"source": "./",
"description": "The most comprehensive Claude Code plugin — 28+ agents, 116+ skills, 57+ commands, and production-ready hooks for TDD, security scanning, code review, and continuous learning",
"description": "The most comprehensive Claude Code plugin — 14+ agents, 56+ skills, 33+ commands, and production-ready hooks for TDD, security scanning, code review, and continuous learning",
"version": "1.9.0",
"author": {
"name": "Affaan Mustafa",

3
.gitignore vendored
View File

@@ -83,6 +83,9 @@ temp/
*.bak
*.backup
# Rust build artifacts
ecc2/target/
# Bootstrap pipeline outputs
# Generated lock files in tool subdirectories
.opencode/package-lock.json

View File

@@ -1,6 +1,6 @@
# Everything Claude Code (ECC) — Agent Instructions
This is a **production-ready AI coding plugin** providing 28 specialized agents, 119 skills, 60 commands, and automated hook workflows for software development.
This is a **production-ready AI coding plugin** providing 28 specialized agents, 125 skills, 60 commands, and automated hook workflows for software development.
**Version:** 1.9.0

View File

@@ -212,7 +212,7 @@ For manual install instructions see the README in the `rules/` folder.
/plugin list everything-claude-code@everything-claude-code
```
**That's it!** You now have access to 28 agents, 119 skills, and 60 commands.
**That's it!** You now have access to 28 agents, 125 skills, and 60 commands.
---
@@ -1085,7 +1085,7 @@ The configuration is automatically detected from `.opencode/opencode.json`.
|---------|-------------|----------|--------|
| Agents | ✅ 28 agents | ✅ 12 agents | **Claude Code leads** |
| Commands | ✅ 60 commands | ✅ 31 commands | **Claude Code leads** |
| Skills | ✅ 119 skills | ✅ 37 skills | **Claude Code leads** |
| Skills | ✅ 125 skills | ✅ 37 skills | **Claude Code leads** |
| Hooks | ✅ 8 event types | ✅ 11 events | **OpenCode has more!** |
| Rules | ✅ 29 rules | ✅ 13 instructions | **Claude Code leads** |
| MCP Servers | ✅ 14 servers | ✅ Full | **Full parity** |

2016
ecc2/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

52
ecc2/Cargo.toml Normal file
View File

@@ -0,0 +1,52 @@
[package]
name = "ecc-tui"
version = "0.1.0"
edition = "2021"
description = "ECC 2.0 — Agentic IDE control plane with TUI dashboard"
license = "MIT"
authors = ["Affaan Mustafa <me@affaanmustafa.com>"]
repository = "https://github.com/affaan-m/everything-claude-code"
[dependencies]
# TUI
ratatui = "0.29"
crossterm = "0.28"
# Async runtime
tokio = { version = "1", features = ["full"] }
# State store
rusqlite = { version = "0.32", features = ["bundled"] }
# Git integration
git2 = "0.19"
# Serialization
serde = { version = "1", features = ["derive"] }
serde_json = "1"
toml = "0.8"
# CLI
clap = { version = "4", features = ["derive"] }
# Logging & tracing
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
# Error handling
anyhow = "1"
thiserror = "2"
# Time
chrono = { version = "0.4", features = ["serde"] }
# UUID for session IDs
uuid = { version = "1", features = ["v4"] }
# Directory paths
dirs = "6"
[profile.release]
lto = true
codegen-units = 1
strip = true

36
ecc2/src/comms/mod.rs Normal file
View File

@@ -0,0 +1,36 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use crate::session::store::StateStore;
/// Message types for inter-agent communication.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum MessageType {
/// Task handoff from one agent to another
TaskHandoff { task: String, context: String },
/// Agent requesting information from another
Query { question: String },
/// Response to a query
Response { answer: String },
/// Notification of completion
Completed {
summary: String,
files_changed: Vec<String>,
},
/// Conflict detected (e.g., two agents editing the same file)
Conflict { file: String, description: String },
}
/// Send a structured message between sessions.
pub fn send(db: &StateStore, from: &str, to: &str, msg: &MessageType) -> Result<()> {
let content = serde_json::to_string(msg)?;
let msg_type = match msg {
MessageType::TaskHandoff { .. } => "task_handoff",
MessageType::Query { .. } => "query",
MessageType::Response { .. } => "response",
MessageType::Completed { .. } => "completed",
MessageType::Conflict { .. } => "conflict",
};
db.send_message(from, to, &content, msg_type)?;
Ok(())
}

54
ecc2/src/config/mod.rs Normal file
View File

@@ -0,0 +1,54 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Config {
pub db_path: PathBuf,
pub worktree_root: PathBuf,
pub max_parallel_sessions: usize,
pub max_parallel_worktrees: usize,
pub session_timeout_secs: u64,
pub heartbeat_interval_secs: u64,
pub default_agent: String,
pub theme: Theme,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Theme {
Dark,
Light,
}
impl Default for Config {
fn default() -> Self {
let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("."));
Self {
db_path: home.join(".claude").join("ecc2.db"),
worktree_root: PathBuf::from("/tmp/ecc-worktrees"),
max_parallel_sessions: 8,
max_parallel_worktrees: 6,
session_timeout_secs: 3600,
heartbeat_interval_secs: 30,
default_agent: "claude".to_string(),
theme: Theme::Dark,
}
}
}
impl Config {
pub fn load() -> Result<Self> {
let config_path = dirs::home_dir()
.unwrap_or_else(|| PathBuf::from("."))
.join(".claude")
.join("ecc2.toml");
if config_path.exists() {
let content = std::fs::read_to_string(&config_path)?;
let config: Config = toml::from_str(&content)?;
Ok(config)
} else {
Ok(Config::default())
}
}
}

97
ecc2/src/main.rs Normal file
View File

@@ -0,0 +1,97 @@
mod comms;
mod config;
mod observability;
mod session;
mod tui;
mod worktree;
use anyhow::Result;
use clap::Parser;
use tracing_subscriber::EnvFilter;
#[derive(Parser, Debug)]
#[command(name = "ecc", version, about = "ECC 2.0 — Agentic IDE control plane")]
struct Cli {
#[command(subcommand)]
command: Option<Commands>,
}
#[derive(clap::Subcommand, Debug)]
enum Commands {
/// Launch the TUI dashboard
Dashboard,
/// Start a new agent session
Start {
/// Task description for the agent
#[arg(short, long)]
task: String,
/// Agent type (claude, codex, custom)
#[arg(short, long, default_value = "claude")]
agent: String,
/// Create a dedicated worktree for this session
#[arg(short, long)]
worktree: bool,
},
/// List active sessions
Sessions,
/// Show session details
Status {
/// Session ID or alias
session_id: Option<String>,
},
/// Stop a running session
Stop {
/// Session ID or alias
session_id: String,
},
/// Run as background daemon
Daemon,
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(EnvFilter::from_default_env())
.init();
let cli = Cli::parse();
let cfg = config::Config::load()?;
let db = session::store::StateStore::open(&cfg.db_path)?;
match cli.command {
Some(Commands::Dashboard) | None => {
tui::app::run(db, cfg).await?;
}
Some(Commands::Start {
task,
agent,
worktree: use_worktree,
}) => {
let session_id =
session::manager::create_session(&db, &cfg, &task, &agent, use_worktree).await?;
println!("Session started: {session_id}");
}
Some(Commands::Sessions) => {
let sessions = session::manager::list_sessions(&db)?;
for s in sessions {
println!("{} [{}] {}", s.id, s.state, s.task);
}
}
Some(Commands::Status { session_id }) => {
let id = session_id.unwrap_or_else(|| "latest".to_string());
let status = session::manager::get_status(&db, &id)?;
println!("{status}");
}
Some(Commands::Stop { session_id }) => {
session::manager::stop_session(&db, &session_id).await?;
println!("Session stopped: {session_id}");
}
Some(Commands::Daemon) => {
println!("Starting ECC daemon...");
session::daemon::run(db, cfg).await?;
}
}
Ok(())
}

View File

@@ -0,0 +1,54 @@
use anyhow::Result;
use serde::{Deserialize, Serialize};
use crate::session::store::StateStore;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCallEvent {
pub session_id: String,
pub tool_name: String,
pub input_summary: String,
pub output_summary: String,
pub duration_ms: u64,
pub risk_score: f64,
}
impl ToolCallEvent {
/// Compute risk score based on tool type and input patterns.
pub fn compute_risk(tool_name: &str, input: &str) -> f64 {
let mut score: f64 = 0.0;
// Destructive tools get higher base risk
match tool_name {
"Bash" => score += 0.3,
"Write" => score += 0.2,
"Edit" => score += 0.1,
_ => score += 0.05,
}
// Dangerous patterns in bash commands
if tool_name == "Bash" {
if input.contains("rm -rf") || input.contains("--force") {
score += 0.4;
}
if input.contains("git push") || input.contains("git reset") {
score += 0.3;
}
if input.contains("sudo") || input.contains("chmod 777") {
score += 0.5;
}
}
score.min(1.0)
}
}
pub fn log_tool_call(db: &StateStore, event: &ToolCallEvent) -> Result<()> {
db.send_message(
&event.session_id,
"observability",
&serde_json::to_string(event)?,
"tool_call",
)?;
Ok(())
}

View File

@@ -0,0 +1,46 @@
use anyhow::Result;
use std::time::Duration;
use tokio::time;
use super::store::StateStore;
use super::SessionState;
use crate::config::Config;
/// Background daemon that monitors sessions, handles heartbeats,
/// and cleans up stale resources.
pub async fn run(db: StateStore, cfg: Config) -> Result<()> {
tracing::info!("ECC daemon started");
let heartbeat_interval = Duration::from_secs(cfg.heartbeat_interval_secs);
let timeout = Duration::from_secs(cfg.session_timeout_secs);
loop {
if let Err(e) = check_sessions(&db, timeout) {
tracing::error!("Session check failed: {e}");
}
time::sleep(heartbeat_interval).await;
}
}
fn check_sessions(db: &StateStore, timeout: Duration) -> Result<()> {
let sessions = db.list_sessions()?;
for session in sessions {
if session.state != SessionState::Running {
continue;
}
let elapsed = chrono::Utc::now()
.signed_duration_since(session.updated_at)
.to_std()
.unwrap_or(Duration::ZERO);
if elapsed > timeout {
tracing::warn!("Session {} timed out after {:?}", session.id, elapsed);
db.update_state(&session.id, &SessionState::Failed)?;
}
}
Ok(())
}

View File

@@ -0,0 +1,76 @@
use anyhow::Result;
use std::fmt;
use super::store::StateStore;
use super::{Session, SessionMetrics, SessionState};
use crate::config::Config;
use crate::worktree;
pub async fn create_session(
db: &StateStore,
cfg: &Config,
task: &str,
agent_type: &str,
use_worktree: bool,
) -> Result<String> {
let id = uuid::Uuid::new_v4().to_string()[..8].to_string();
let now = chrono::Utc::now();
let wt = if use_worktree {
Some(worktree::create_for_session(&id, cfg)?)
} else {
None
};
let session = Session {
id: id.clone(),
task: task.to_string(),
agent_type: agent_type.to_string(),
state: SessionState::Pending,
worktree: wt,
created_at: now,
updated_at: now,
metrics: SessionMetrics::default(),
};
db.insert_session(&session)?;
Ok(id)
}
pub fn list_sessions(db: &StateStore) -> Result<Vec<Session>> {
db.list_sessions()
}
pub fn get_status(db: &StateStore, id: &str) -> Result<SessionStatus> {
let session = db
.get_session(id)?
.ok_or_else(|| anyhow::anyhow!("Session not found: {id}"))?;
Ok(SessionStatus(session))
}
pub async fn stop_session(db: &StateStore, id: &str) -> Result<()> {
db.update_state(id, &SessionState::Stopped)?;
Ok(())
}
pub struct SessionStatus(Session);
impl fmt::Display for SessionStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = &self.0;
writeln!(f, "Session: {}", s.id)?;
writeln!(f, "Task: {}", s.task)?;
writeln!(f, "Agent: {}", s.agent_type)?;
writeln!(f, "State: {}", s.state)?;
if let Some(ref wt) = s.worktree {
writeln!(f, "Branch: {}", wt.branch)?;
writeln!(f, "Worktree: {}", wt.path.display())?;
}
writeln!(f, "Tokens: {}", s.metrics.tokens_used)?;
writeln!(f, "Tools: {}", s.metrics.tool_calls)?;
writeln!(f, "Files: {}", s.metrics.files_changed)?;
writeln!(f, "Cost: ${:.4}", s.metrics.cost_usd)?;
writeln!(f, "Created: {}", s.created_at)?;
write!(f, "Updated: {}", s.updated_at)
}
}

59
ecc2/src/session/mod.rs Normal file
View File

@@ -0,0 +1,59 @@
pub mod daemon;
pub mod manager;
pub mod store;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::fmt;
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Session {
pub id: String,
pub task: String,
pub agent_type: String,
pub state: SessionState,
pub worktree: Option<WorktreeInfo>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub metrics: SessionMetrics,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum SessionState {
Pending,
Running,
Idle,
Completed,
Failed,
Stopped,
}
impl fmt::Display for SessionState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SessionState::Pending => write!(f, "pending"),
SessionState::Running => write!(f, "running"),
SessionState::Idle => write!(f, "idle"),
SessionState::Completed => write!(f, "completed"),
SessionState::Failed => write!(f, "failed"),
SessionState::Stopped => write!(f, "stopped"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WorktreeInfo {
pub path: PathBuf,
pub branch: String,
pub base_branch: String,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SessionMetrics {
pub tokens_used: u64,
pub tool_calls: u64,
pub files_changed: u32,
pub duration_secs: u64,
pub cost_usd: f64,
}

186
ecc2/src/session/store.rs Normal file
View File

@@ -0,0 +1,186 @@
use anyhow::Result;
use rusqlite::Connection;
use std::path::Path;
use super::{Session, SessionMetrics, SessionState};
pub struct StateStore {
conn: Connection,
}
impl StateStore {
pub fn open(path: &Path) -> Result<Self> {
let conn = Connection::open(path)?;
let store = Self { conn };
store.init_schema()?;
Ok(store)
}
fn init_schema(&self) -> Result<()> {
self.conn.execute_batch(
"
CREATE TABLE IF NOT EXISTS sessions (
id TEXT PRIMARY KEY,
task TEXT NOT NULL,
agent_type TEXT NOT NULL,
state TEXT NOT NULL DEFAULT 'pending',
worktree_path TEXT,
worktree_branch TEXT,
worktree_base TEXT,
tokens_used INTEGER DEFAULT 0,
tool_calls INTEGER DEFAULT 0,
files_changed INTEGER DEFAULT 0,
duration_secs INTEGER DEFAULT 0,
cost_usd REAL DEFAULT 0.0,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS tool_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT NOT NULL REFERENCES sessions(id),
tool_name TEXT NOT NULL,
input_summary TEXT,
output_summary TEXT,
duration_ms INTEGER,
risk_score REAL DEFAULT 0.0,
timestamp TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
from_session TEXT NOT NULL,
to_session TEXT NOT NULL,
content TEXT NOT NULL,
msg_type TEXT NOT NULL DEFAULT 'info',
read INTEGER DEFAULT 0,
timestamp TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_sessions_state ON sessions(state);
CREATE INDEX IF NOT EXISTS idx_tool_log_session ON tool_log(session_id);
CREATE INDEX IF NOT EXISTS idx_messages_to ON messages(to_session, read);
",
)?;
Ok(())
}
pub fn insert_session(&self, session: &Session) -> Result<()> {
self.conn.execute(
"INSERT INTO sessions (id, task, agent_type, state, worktree_path, worktree_branch, worktree_base, created_at, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
rusqlite::params![
session.id,
session.task,
session.agent_type,
session.state.to_string(),
session.worktree.as_ref().map(|w| w.path.to_string_lossy().to_string()),
session.worktree.as_ref().map(|w| w.branch.clone()),
session.worktree.as_ref().map(|w| w.base_branch.clone()),
session.created_at.to_rfc3339(),
session.updated_at.to_rfc3339(),
],
)?;
Ok(())
}
pub fn update_state(&self, session_id: &str, state: &SessionState) -> Result<()> {
self.conn.execute(
"UPDATE sessions SET state = ?1, updated_at = ?2 WHERE id = ?3",
rusqlite::params![
state.to_string(),
chrono::Utc::now().to_rfc3339(),
session_id,
],
)?;
Ok(())
}
pub fn update_metrics(&self, session_id: &str, metrics: &SessionMetrics) -> Result<()> {
self.conn.execute(
"UPDATE sessions SET tokens_used = ?1, tool_calls = ?2, files_changed = ?3, duration_secs = ?4, cost_usd = ?5, updated_at = ?6 WHERE id = ?7",
rusqlite::params![
metrics.tokens_used,
metrics.tool_calls,
metrics.files_changed,
metrics.duration_secs,
metrics.cost_usd,
chrono::Utc::now().to_rfc3339(),
session_id,
],
)?;
Ok(())
}
pub fn list_sessions(&self) -> Result<Vec<Session>> {
let mut stmt = self.conn.prepare(
"SELECT id, task, agent_type, state, worktree_path, worktree_branch, worktree_base,
tokens_used, tool_calls, files_changed, duration_secs, cost_usd,
created_at, updated_at
FROM sessions ORDER BY updated_at DESC",
)?;
let sessions = stmt
.query_map([], |row| {
let state_str: String = row.get(3)?;
let state = match state_str.as_str() {
"running" => SessionState::Running,
"idle" => SessionState::Idle,
"completed" => SessionState::Completed,
"failed" => SessionState::Failed,
"stopped" => SessionState::Stopped,
_ => SessionState::Pending,
};
let worktree_path: Option<String> = row.get(4)?;
let worktree = worktree_path.map(|p| super::WorktreeInfo {
path: std::path::PathBuf::from(p),
branch: row.get::<_, String>(5).unwrap_or_default(),
base_branch: row.get::<_, String>(6).unwrap_or_default(),
});
let created_str: String = row.get(12)?;
let updated_str: String = row.get(13)?;
Ok(Session {
id: row.get(0)?,
task: row.get(1)?,
agent_type: row.get(2)?,
state,
worktree,
created_at: chrono::DateTime::parse_from_rfc3339(&created_str)
.unwrap_or_default()
.with_timezone(&chrono::Utc),
updated_at: chrono::DateTime::parse_from_rfc3339(&updated_str)
.unwrap_or_default()
.with_timezone(&chrono::Utc),
metrics: SessionMetrics {
tokens_used: row.get(7)?,
tool_calls: row.get(8)?,
files_changed: row.get(9)?,
duration_secs: row.get(10)?,
cost_usd: row.get(11)?,
},
})
})?
.collect::<Result<Vec<_>, _>>()?;
Ok(sessions)
}
pub fn get_session(&self, id: &str) -> Result<Option<Session>> {
let sessions = self.list_sessions()?;
Ok(sessions
.into_iter()
.find(|s| s.id == id || s.id.starts_with(id)))
}
pub fn send_message(&self, from: &str, to: &str, content: &str, msg_type: &str) -> Result<()> {
self.conn.execute(
"INSERT INTO messages (from_session, to_session, content, msg_type, timestamp)
VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![from, to, content, msg_type, chrono::Utc::now().to_rfc3339()],
)?;
Ok(())
}
}

52
ecc2/src/tui/app.rs Normal file
View File

@@ -0,0 +1,52 @@
use anyhow::Result;
use crossterm::{
event::{self, Event, KeyCode, KeyModifiers},
execute,
terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen},
};
use ratatui::prelude::*;
use std::io;
use std::time::Duration;
use super::dashboard::Dashboard;
use crate::config::Config;
use crate::session::store::StateStore;
pub async fn run(db: StateStore, cfg: Config) -> Result<()> {
enable_raw_mode()?;
let mut stdout = io::stdout();
execute!(stdout, EnterAlternateScreen)?;
let backend = CrosstermBackend::new(stdout);
let mut terminal = Terminal::new(backend)?;
let mut dashboard = Dashboard::new(db, cfg);
loop {
terminal.draw(|frame| dashboard.render(frame))?;
if event::poll(Duration::from_millis(250))? {
if let Event::Key(key) = event::read()? {
match (key.modifiers, key.code) {
(KeyModifiers::CONTROL, KeyCode::Char('c')) => break,
(_, KeyCode::Char('q')) => break,
(_, KeyCode::Tab) => dashboard.next_pane(),
(KeyModifiers::SHIFT, KeyCode::BackTab) => dashboard.prev_pane(),
(_, KeyCode::Char('j')) | (_, KeyCode::Down) => dashboard.scroll_down(),
(_, KeyCode::Char('k')) | (_, KeyCode::Up) => dashboard.scroll_up(),
(_, KeyCode::Char('n')) => dashboard.new_session(),
(_, KeyCode::Char('s')) => dashboard.stop_selected(),
(_, KeyCode::Char('r')) => dashboard.refresh(),
(_, KeyCode::Char('?')) => dashboard.toggle_help(),
_ => {}
}
}
}
dashboard.tick().await;
}
disable_raw_mode()?;
execute!(terminal.backend_mut(), LeaveAlternateScreen)?;
Ok(())
}

614
ecc2/src/tui/dashboard.rs Normal file
View File

@@ -0,0 +1,614 @@
use ratatui::{
prelude::*,
widgets::{Block, Borders, Cell, HighlightSpacing, Paragraph, Row, Table, TableState, Tabs},
};
use crate::config::Config;
use crate::session::store::StateStore;
use crate::session::{Session, SessionState};
pub struct Dashboard {
db: StateStore,
cfg: Config,
sessions: Vec<Session>,
selected_pane: Pane,
selected_session: usize,
show_help: bool,
scroll_offset: usize,
}
#[derive(Debug, Default, PartialEq, Eq)]
struct SessionSummary {
total: usize,
pending: usize,
running: usize,
idle: usize,
completed: usize,
failed: usize,
stopped: usize,
}
#[derive(Debug, Clone, Copy, PartialEq)]
enum Pane {
Sessions,
Output,
Metrics,
}
impl Dashboard {
pub fn new(db: StateStore, cfg: Config) -> Self {
let sessions = db.list_sessions().unwrap_or_default();
Self {
db,
cfg,
sessions,
selected_pane: Pane::Sessions,
selected_session: 0,
show_help: false,
scroll_offset: 0,
}
}
pub fn render(&self, frame: &mut Frame) {
let chunks = Layout::default()
.direction(Direction::Vertical)
.constraints([
Constraint::Length(3), // Header
Constraint::Min(10), // Main content
Constraint::Length(3), // Status bar
])
.split(frame.area());
self.render_header(frame, chunks[0]);
if self.show_help {
self.render_help(frame, chunks[1]);
} else {
let main_chunks = Layout::default()
.direction(Direction::Horizontal)
.constraints([
Constraint::Percentage(50), // Session table
Constraint::Percentage(50), // Output/details
])
.split(chunks[1]);
self.render_sessions(frame, main_chunks[0]);
let right_chunks = Layout::default()
.direction(Direction::Vertical)
.constraints([
Constraint::Percentage(70), // Output
Constraint::Percentage(30), // Metrics
])
.split(main_chunks[1]);
self.render_output(frame, right_chunks[0]);
self.render_metrics(frame, right_chunks[1]);
}
self.render_status_bar(frame, chunks[2]);
}
fn render_header(&self, frame: &mut Frame, area: Rect) {
let running = self
.sessions
.iter()
.filter(|s| s.state == SessionState::Running)
.count();
let total = self.sessions.len();
let title = format!(" ECC 2.0 | {running} running / {total} total ");
let tabs = Tabs::new(vec!["Sessions", "Output", "Metrics"])
.block(Block::default().borders(Borders::ALL).title(title))
.select(match self.selected_pane {
Pane::Sessions => 0,
Pane::Output => 1,
Pane::Metrics => 2,
})
.highlight_style(
Style::default()
.fg(Color::Cyan)
.add_modifier(Modifier::BOLD),
);
frame.render_widget(tabs, area);
}
fn render_sessions(&self, frame: &mut Frame, area: Rect) {
let border_style = if self.selected_pane == Pane::Sessions {
Style::default().fg(Color::Cyan)
} else {
Style::default()
};
let block = Block::default()
.borders(Borders::ALL)
.title(" Sessions ")
.border_style(border_style);
let inner_area = block.inner(area);
frame.render_widget(block, area);
if inner_area.is_empty() {
return;
}
let summary = SessionSummary::from_sessions(&self.sessions);
let chunks = Layout::default()
.direction(Direction::Vertical)
.constraints([Constraint::Length(1), Constraint::Min(3)])
.split(inner_area);
frame.render_widget(Paragraph::new(summary_line(&summary)), chunks[0]);
let rows = self.sessions.iter().map(session_row);
let header = Row::new(["ID", "Agent", "State", "Branch", "Tokens", "Duration"])
.style(Style::default().add_modifier(Modifier::BOLD));
let widths = [
Constraint::Length(8),
Constraint::Length(10),
Constraint::Length(10),
Constraint::Min(12),
Constraint::Length(8),
Constraint::Length(8),
];
let table = Table::new(rows, widths)
.header(header)
.column_spacing(1)
.highlight_symbol(">> ")
.highlight_spacing(HighlightSpacing::Always)
.row_highlight_style(
Style::default()
.bg(Color::DarkGray)
.add_modifier(Modifier::BOLD),
);
let selected = if self.sessions.is_empty() {
None
} else {
Some(self.selected_session.min(self.sessions.len() - 1))
};
let mut table_state = TableState::default().with_selected(selected);
frame.render_stateful_widget(table, chunks[1], &mut table_state);
}
fn render_output(&self, frame: &mut Frame, area: Rect) {
let content = if let Some(session) = self.sessions.get(self.selected_session) {
format!(
"Agent output for session {}...\n\n(Live streaming coming soon)",
session.id
)
} else {
"No sessions. Press 'n' to start one.".to_string()
};
let border_style = if self.selected_pane == Pane::Output {
Style::default().fg(Color::Cyan)
} else {
Style::default()
};
let paragraph = Paragraph::new(content).block(
Block::default()
.borders(Borders::ALL)
.title(" Output ")
.border_style(border_style),
);
frame.render_widget(paragraph, area);
}
fn render_metrics(&self, frame: &mut Frame, area: Rect) {
let content = if let Some(session) = self.sessions.get(self.selected_session) {
let m = &session.metrics;
format!(
"Tokens: {} | Tools: {} | Files: {} | Cost: ${:.4} | Duration: {}s",
m.tokens_used, m.tool_calls, m.files_changed, m.cost_usd, m.duration_secs
)
} else {
"No metrics available".to_string()
};
let border_style = if self.selected_pane == Pane::Metrics {
Style::default().fg(Color::Cyan)
} else {
Style::default()
};
let paragraph = Paragraph::new(content).block(
Block::default()
.borders(Borders::ALL)
.title(" Metrics ")
.border_style(border_style),
);
frame.render_widget(paragraph, area);
}
fn render_status_bar(&self, frame: &mut Frame, area: Rect) {
let text = " [n]ew session [s]top [Tab] switch pane [j/k] scroll [?] help [q]uit ";
let paragraph = Paragraph::new(text)
.style(Style::default().fg(Color::DarkGray))
.block(Block::default().borders(Borders::ALL));
frame.render_widget(paragraph, area);
}
fn render_help(&self, frame: &mut Frame, area: Rect) {
let help = vec![
"Keyboard Shortcuts:",
"",
" n New session",
" s Stop selected session",
" Tab Next pane",
" S-Tab Previous pane",
" j/↓ Scroll down",
" k/↑ Scroll up",
" r Refresh",
" ? Toggle help",
" q/C-c Quit",
];
let paragraph = Paragraph::new(help.join("\n")).block(
Block::default()
.borders(Borders::ALL)
.title(" Help ")
.border_style(Style::default().fg(Color::Yellow)),
);
frame.render_widget(paragraph, area);
}
pub fn next_pane(&mut self) {
self.selected_pane = match self.selected_pane {
Pane::Sessions => Pane::Output,
Pane::Output => Pane::Metrics,
Pane::Metrics => Pane::Sessions,
};
}
pub fn prev_pane(&mut self) {
self.selected_pane = match self.selected_pane {
Pane::Sessions => Pane::Metrics,
Pane::Output => Pane::Sessions,
Pane::Metrics => Pane::Output,
};
}
pub fn scroll_down(&mut self) {
if self.selected_pane == Pane::Sessions && !self.sessions.is_empty() {
self.selected_session = (self.selected_session + 1).min(self.sessions.len() - 1);
} else {
self.scroll_offset = self.scroll_offset.saturating_add(1);
}
}
pub fn scroll_up(&mut self) {
if self.selected_pane == Pane::Sessions {
self.selected_session = self.selected_session.saturating_sub(1);
} else {
self.scroll_offset = self.scroll_offset.saturating_sub(1);
}
}
pub fn new_session(&mut self) {
// TODO: Open a dialog to create a new session
tracing::info!("New session dialog requested");
}
pub fn stop_selected(&mut self) {
if let Some(session) = self.sessions.get(self.selected_session) {
let _ = self.db.update_state(&session.id, &SessionState::Stopped);
self.refresh();
}
}
pub fn refresh(&mut self) {
self.sessions = self.db.list_sessions().unwrap_or_default();
self.sync_selection();
}
pub fn toggle_help(&mut self) {
self.show_help = !self.show_help;
}
pub async fn tick(&mut self) {
// Periodic refresh every few ticks
self.sessions = self.db.list_sessions().unwrap_or_default();
self.sync_selection();
}
fn sync_selection(&mut self) {
if self.sessions.is_empty() {
self.selected_session = 0;
} else {
self.selected_session = self.selected_session.min(self.sessions.len() - 1);
}
}
}
impl SessionSummary {
fn from_sessions(sessions: &[Session]) -> Self {
sessions.iter().fold(
Self {
total: sessions.len(),
..Self::default()
},
|mut summary, session| {
match session.state {
SessionState::Pending => summary.pending += 1,
SessionState::Running => summary.running += 1,
SessionState::Idle => summary.idle += 1,
SessionState::Completed => summary.completed += 1,
SessionState::Failed => summary.failed += 1,
SessionState::Stopped => summary.stopped += 1,
}
summary
},
)
}
}
fn session_row(session: &Session) -> Row<'static> {
Row::new(vec![
Cell::from(format_session_id(&session.id)),
Cell::from(session.agent_type.clone()),
Cell::from(session_state_label(&session.state)).style(
Style::default()
.fg(session_state_color(&session.state))
.add_modifier(Modifier::BOLD),
),
Cell::from(session_branch(session)),
Cell::from(session.metrics.tokens_used.to_string()),
Cell::from(format_duration(session.metrics.duration_secs)),
])
}
fn summary_line(summary: &SessionSummary) -> Line<'static> {
Line::from(vec![
Span::styled(
format!("Total {} ", summary.total),
Style::default().add_modifier(Modifier::BOLD),
),
summary_span("Running", summary.running, Color::Green),
summary_span("Idle", summary.idle, Color::Yellow),
summary_span("Completed", summary.completed, Color::Blue),
summary_span("Failed", summary.failed, Color::Red),
summary_span("Stopped", summary.stopped, Color::DarkGray),
summary_span("Pending", summary.pending, Color::Reset),
])
}
fn summary_span(label: &str, value: usize, color: Color) -> Span<'static> {
Span::styled(
format!("{label} {value} "),
Style::default().fg(color).add_modifier(Modifier::BOLD),
)
}
fn session_state_label(state: &SessionState) -> &'static str {
match state {
SessionState::Pending => "Pending",
SessionState::Running => "Running",
SessionState::Idle => "Idle",
SessionState::Completed => "Completed",
SessionState::Failed => "Failed",
SessionState::Stopped => "Stopped",
}
}
fn session_state_color(state: &SessionState) -> Color {
match state {
SessionState::Running => Color::Green,
SessionState::Idle => Color::Yellow,
SessionState::Failed => Color::Red,
SessionState::Stopped => Color::DarkGray,
SessionState::Completed => Color::Blue,
SessionState::Pending => Color::Reset,
}
}
fn format_session_id(id: &str) -> String {
id.chars().take(8).collect()
}
fn session_branch(session: &Session) -> String {
session
.worktree
.as_ref()
.map(|worktree| worktree.branch.clone())
.unwrap_or_else(|| "-".to_string())
}
fn format_duration(duration_secs: u64) -> String {
let hours = duration_secs / 3600;
let minutes = (duration_secs % 3600) / 60;
let seconds = duration_secs % 60;
format!("{hours:02}:{minutes:02}:{seconds:02}")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::session::{SessionMetrics, WorktreeInfo};
use chrono::Utc;
use ratatui::{backend::TestBackend, Terminal};
use std::path::PathBuf;
use uuid::Uuid;
#[test]
fn session_state_color_matches_requested_palette() {
assert_eq!(session_state_color(&SessionState::Running), Color::Green);
assert_eq!(session_state_color(&SessionState::Idle), Color::Yellow);
assert_eq!(session_state_color(&SessionState::Failed), Color::Red);
assert_eq!(session_state_color(&SessionState::Stopped), Color::DarkGray);
assert_eq!(session_state_color(&SessionState::Completed), Color::Blue);
}
#[test]
fn session_summary_counts_each_state() {
let sessions = vec![
sample_session(
"run-12345678",
"planner",
SessionState::Running,
Some("feat/run"),
128,
15,
),
sample_session(
"idle-12345678",
"reviewer",
SessionState::Idle,
Some("feat/idle"),
256,
30,
),
sample_session(
"done-12345678",
"architect",
SessionState::Completed,
Some("feat/done"),
512,
45,
),
sample_session(
"fail-12345678",
"worker",
SessionState::Failed,
Some("feat/fail"),
1024,
60,
),
sample_session(
"stop-12345678",
"security",
SessionState::Stopped,
None,
64,
10,
),
sample_session(
"pend-12345678",
"tdd",
SessionState::Pending,
Some("feat/pending"),
32,
5,
),
];
let summary = SessionSummary::from_sessions(&sessions);
assert_eq!(summary.total, 6);
assert_eq!(summary.running, 1);
assert_eq!(summary.idle, 1);
assert_eq!(summary.completed, 1);
assert_eq!(summary.failed, 1);
assert_eq!(summary.stopped, 1);
assert_eq!(summary.pending, 1);
}
#[test]
fn render_sessions_shows_summary_headers_and_selected_row() {
let dashboard = test_dashboard(
vec![
sample_session(
"run-12345678",
"planner",
SessionState::Running,
Some("feat/run"),
128,
15,
),
sample_session(
"done-87654321",
"reviewer",
SessionState::Completed,
Some("release/v1"),
2048,
125,
),
],
1,
);
let rendered = render_dashboard_text(&dashboard, 150, 24);
assert!(rendered.contains("ID"));
assert!(rendered.contains("Agent"));
assert!(rendered.contains("State"));
assert!(rendered.contains("Branch"));
assert!(rendered.contains("Tokens"));
assert!(rendered.contains("Duration"));
assert!(rendered.contains("Total 2"));
assert!(rendered.contains("Running 1"));
assert!(rendered.contains("Completed 1"));
assert!(rendered.contains(">> done-876"));
assert!(rendered.contains("reviewer"));
assert!(rendered.contains("release/v1"));
assert!(rendered.contains("00:02:05"));
}
fn test_dashboard(sessions: Vec<Session>, selected_session: usize) -> Dashboard {
Dashboard {
db: test_store(),
cfg: Config::default(),
sessions,
selected_pane: Pane::Sessions,
selected_session,
show_help: false,
scroll_offset: 0,
}
}
fn test_store() -> StateStore {
let db_path =
std::env::temp_dir().join(format!("ecc-dashboard-test-{}.db", Uuid::new_v4()));
StateStore::open(&db_path).expect("open test db")
}
fn sample_session(
id: &str,
agent_type: &str,
state: SessionState,
branch: Option<&str>,
tokens_used: u64,
duration_secs: u64,
) -> Session {
Session {
id: id.to_string(),
task: "Render dashboard rows".to_string(),
agent_type: agent_type.to_string(),
state,
worktree: branch.map(|branch| WorktreeInfo {
path: PathBuf::from(format!("/tmp/{branch}")),
branch: branch.to_string(),
base_branch: "main".to_string(),
}),
created_at: Utc::now(),
updated_at: Utc::now(),
metrics: SessionMetrics {
tokens_used,
tool_calls: 4,
files_changed: 2,
duration_secs,
cost_usd: 0.42,
},
}
}
fn render_dashboard_text(dashboard: &Dashboard, width: u16, height: u16) -> String {
let backend = TestBackend::new(width, height);
let mut terminal = Terminal::new(backend).expect("create terminal");
terminal
.draw(|frame| dashboard.render(frame))
.expect("render dashboard");
let buffer = terminal.backend().buffer();
buffer
.content
.chunks(buffer.area.width as usize)
.map(|cells| cells.iter().map(|cell| cell.symbol()).collect::<String>())
.collect::<Vec<_>>()
.join("\n")
}
}

3
ecc2/src/tui/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
pub mod app;
mod dashboard;
mod widgets;

6
ecc2/src/tui/widgets.rs Normal file
View File

@@ -0,0 +1,6 @@
// Custom TUI widgets for ECC 2.0
// TODO: Implement custom widgets:
// - TokenMeter: visual token usage bar with budget threshold
// - DiffViewer: side-by-side syntax-highlighted diff display
// - ProgressTimeline: session timeline with tool call markers
// - AgentTree: hierarchical view of parent/child agent sessions

84
ecc2/src/worktree/mod.rs Normal file
View File

@@ -0,0 +1,84 @@
use anyhow::{Context, Result};
use std::path::PathBuf;
use std::process::Command;
use crate::config::Config;
use crate::session::WorktreeInfo;
/// Create a new git worktree for an agent session.
pub fn create_for_session(session_id: &str, cfg: &Config) -> Result<WorktreeInfo> {
let branch = format!("ecc/{session_id}");
let path = cfg.worktree_root.join(session_id);
// Get current branch as base
let base = get_current_branch()?;
std::fs::create_dir_all(&cfg.worktree_root)
.context("Failed to create worktree root directory")?;
let output = Command::new("git")
.args(["worktree", "add", "-b", &branch])
.arg(&path)
.arg("HEAD")
.output()
.context("Failed to run git worktree add")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("git worktree add failed: {stderr}");
}
tracing::info!(
"Created worktree at {} on branch {}",
path.display(),
branch
);
Ok(WorktreeInfo {
path,
branch,
base_branch: base,
})
}
/// Remove a worktree and its branch.
pub fn remove(path: &PathBuf) -> Result<()> {
let output = Command::new("git")
.args(["worktree", "remove", "--force"])
.arg(path)
.output()
.context("Failed to remove worktree")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
tracing::warn!("Worktree removal warning: {stderr}");
}
Ok(())
}
/// List all active worktrees.
pub fn list() -> Result<Vec<String>> {
let output = Command::new("git")
.args(["worktree", "list", "--porcelain"])
.output()
.context("Failed to list worktrees")?;
let stdout = String::from_utf8_lossy(&output.stdout);
let worktrees: Vec<String> = stdout
.lines()
.filter(|l| l.starts_with("worktree "))
.map(|l| l.trim_start_matches("worktree ").to_string())
.collect();
Ok(worktrees)
}
fn get_current_branch() -> Result<String> {
let output = Command::new("git")
.args(["rev-parse", "--abbrev-ref", "HEAD"])
.output()
.context("Failed to get current branch")?;
Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
}

View File

@@ -12,7 +12,6 @@ CONFIG_FILE="$CODEX_HOME/config.toml"
AGENTS_FILE="$CODEX_HOME/AGENTS.md"
PROMPTS_DIR="$CODEX_HOME/prompts"
SKILLS_DIR="$CODEX_HOME/skills"
ROLE_DIR="$CODEX_HOME/agents"
HOOKS_DIR_EXPECT="${ECC_GLOBAL_HOOKS_DIR:-$CODEX_HOME/git-hooks}"
failures=0
@@ -90,14 +89,12 @@ fi
if [[ -f "$CONFIG_FILE" ]]; then
check_config_pattern '^multi_agent\s*=\s*true' "multi_agent is enabled"
check_config_absent '^\s*collab\s*=' "deprecated collab flag is absent"
check_config_pattern '^profile\s*=\s*"full-access"' "default profile is full-access"
check_config_pattern '^\[profiles\.full-access\]' "profiles.full-access exists"
check_config_pattern '^persistent_instructions\s*=' "persistent_instructions is configured"
check_config_pattern '^\[profiles\.strict\]' "profiles.strict exists"
check_config_pattern '^\[profiles\.yolo\]' "profiles.yolo exists"
for section in \
'mcp_servers.github' \
'mcp_servers.exa' \
'mcp_servers.memory' \
'mcp_servers.sequential-thinking' \
'mcp_servers.context7-mcp'
@@ -155,26 +152,6 @@ else
fail "Skills directory missing ($SKILLS_DIR)"
fi
if [[ -d "$ROLE_DIR" ]]; then
missing_roles=0
for role_file in explorer.toml reviewer.toml docs-researcher.toml; do
if [[ -f "$ROLE_DIR/$role_file" ]]; then
:
else
printf ' - missing agent role config: %s\n' "$role_file"
missing_roles=$((missing_roles + 1))
fi
done
if [[ "$missing_roles" -eq 0 ]]; then
ok "Global Codex agent role configs are present"
else
fail "$missing_roles required agent role configs are missing"
fi
else
fail "Agent role config directory missing ($ROLE_DIR)"
fi
if [[ -f "$PROMPTS_DIR/ecc-prompts-manifest.txt" ]]; then
ok "Command prompts manifest exists"
else

View File

@@ -28,8 +28,6 @@ CONFIG_FILE="$CODEX_HOME/config.toml"
AGENTS_FILE="$CODEX_HOME/AGENTS.md"
AGENTS_ROOT_SRC="$REPO_ROOT/AGENTS.md"
AGENTS_CODEX_SUPP_SRC="$REPO_ROOT/.codex/AGENTS.md"
ROLE_CONFIG_SRC="$REPO_ROOT/.codex/agents"
ROLE_CONFIG_DEST="$CODEX_HOME/agents"
SKILLS_SRC="$REPO_ROOT/.agents/skills"
SKILLS_DEST="$CODEX_HOME/skills"
PROMPTS_SRC="$REPO_ROOT/commands"
@@ -133,7 +131,6 @@ MCP_MERGE_SCRIPT="$REPO_ROOT/scripts/codex/merge-mcp-config.js"
require_path "$REPO_ROOT/AGENTS.md" "ECC AGENTS.md"
require_path "$AGENTS_CODEX_SUPP_SRC" "ECC Codex AGENTS supplement"
require_path "$ROLE_CONFIG_SRC" "ECC Codex agent config directory"
require_path "$SKILLS_SRC" "ECC skills directory"
require_path "$PROMPTS_SRC" "ECC commands directory"
require_path "$HOOKS_INSTALLER" "ECC global git hooks installer"
@@ -248,17 +245,6 @@ for skill_dir in "$SKILLS_SRC"/*; do
skills_count=$((skills_count + 1))
done
log "Syncing ECC Codex agent role configs"
run_or_echo "mkdir -p \"$ROLE_CONFIG_DEST\""
role_count=0
for role_file in "$ROLE_CONFIG_SRC"/*.toml; do
[[ -f "$role_file" ]] || continue
role_name="$(basename "$role_file")"
dest="$ROLE_CONFIG_DEST/$role_name"
run_or_echo "cp \"$role_file\" \"$dest\""
role_count=$((role_count + 1))
done
log "Generating prompt files from ECC commands"
run_or_echo "mkdir -p \"$PROMPTS_DEST\""
manifest="$PROMPTS_DEST/ecc-prompts-manifest.txt"
@@ -484,22 +470,21 @@ fi
log "Installing global git safety hooks"
if [[ "$MODE" == "dry-run" ]]; then
bash "$HOOKS_INSTALLER" --dry-run
"$HOOKS_INSTALLER" --dry-run
else
bash "$HOOKS_INSTALLER"
"$HOOKS_INSTALLER"
fi
log "Running global regression sanity check"
if [[ "$MODE" == "dry-run" ]]; then
printf '[dry-run] bash %s\n' "$SANITY_CHECKER"
printf '[dry-run] %s\n' "$SANITY_CHECKER"
else
bash "$SANITY_CHECKER"
"$SANITY_CHECKER"
fi
log "Sync complete"
log "Backup saved at: $BACKUP_DIR"
log "Skills synced: $skills_count"
log "Agent role configs synced: $role_count"
log "Prompts generated: $((prompt_count + extension_count)) (commands: $prompt_count, extensions: $extension_count)"
if [[ "$MODE" == "apply" ]]; then

87
skills/benchmark/SKILL.md Normal file
View File

@@ -0,0 +1,87 @@
# Benchmark — Performance Baseline & Regression Detection
## When to Use
- Before and after a PR to measure performance impact
- Setting up performance baselines for a project
- When users report "it feels slow"
- Before a launch — ensure you meet performance targets
- Comparing your stack against alternatives
## How It Works
### Mode 1: Page Performance
Measures real browser metrics via browser MCP:
```
1. Navigate to each target URL
2. Measure Core Web Vitals:
- LCP (Largest Contentful Paint) — target < 2.5s
- CLS (Cumulative Layout Shift) — target < 0.1
- INP (Interaction to Next Paint) — target < 200ms
- FCP (First Contentful Paint) — target < 1.8s
- TTFB (Time to First Byte) — target < 800ms
3. Measure resource sizes:
- Total page weight (target < 1MB)
- JS bundle size (target < 200KB gzipped)
- CSS size
- Image weight
- Third-party script weight
4. Count network requests
5. Check for render-blocking resources
```
### Mode 2: API Performance
Benchmarks API endpoints:
```
1. Hit each endpoint 100 times
2. Measure: p50, p95, p99 latency
3. Track: response size, status codes
4. Test under load: 10 concurrent requests
5. Compare against SLA targets
```
### Mode 3: Build Performance
Measures development feedback loop:
```
1. Cold build time
2. Hot reload time (HMR)
3. Test suite duration
4. TypeScript check time
5. Lint time
6. Docker build time
```
### Mode 4: Before/After Comparison
Run before and after a change to measure impact:
```
/benchmark baseline # saves current metrics
# ... make changes ...
/benchmark compare # compares against baseline
```
Output:
```
| Metric | Before | After | Delta | Verdict |
|--------|--------|-------|-------|---------|
| LCP | 1.2s | 1.4s | +200ms | ⚠ WARN |
| Bundle | 180KB | 175KB | -5KB | ✓ BETTER |
| Build | 12s | 14s | +2s | ⚠ WARN |
```
## Output
Stores baselines in `.ecc/benchmarks/` as JSON. Git-tracked so the team shares baselines.
## Integration
- CI: run `/benchmark compare` on every PR
- Pair with `/canary-watch` for post-deploy monitoring
- Pair with `/browser-qa` for full pre-ship checklist

View File

@@ -0,0 +1,81 @@
# Browser QA — Automated Visual Testing & Interaction
## When to Use
- After deploying a feature to staging/preview
- When you need to verify UI behavior across pages
- Before shipping — confirm layouts, forms, interactions actually work
- When reviewing PRs that touch frontend code
- Accessibility audits and responsive testing
## How It Works
Uses the browser automation MCP (claude-in-chrome, Playwright, or Puppeteer) to interact with live pages like a real user.
### Phase 1: Smoke Test
```
1. Navigate to target URL
2. Check for console errors (filter noise: analytics, third-party)
3. Verify no 4xx/5xx in network requests
4. Screenshot above-the-fold on desktop + mobile viewport
5. Check Core Web Vitals: LCP < 2.5s, CLS < 0.1, INP < 200ms
```
### Phase 2: Interaction Test
```
1. Click every nav link — verify no dead links
2. Submit forms with valid data — verify success state
3. Submit forms with invalid data — verify error state
4. Test auth flow: login → protected page → logout
5. Test critical user journeys (checkout, onboarding, search)
```
### Phase 3: Visual Regression
```
1. Screenshot key pages at 3 breakpoints (375px, 768px, 1440px)
2. Compare against baseline screenshots (if stored)
3. Flag layout shifts > 5px, missing elements, overflow
4. Check dark mode if applicable
```
### Phase 4: Accessibility
```
1. Run axe-core or equivalent on each page
2. Flag WCAG AA violations (contrast, labels, focus order)
3. Verify keyboard navigation works end-to-end
4. Check screen reader landmarks
```
## Output Format
```markdown
## QA Report — [URL] — [timestamp]
### Smoke Test
- Console errors: 0 critical, 2 warnings (analytics noise)
- Network: all 200/304, no failures
- Core Web Vitals: LCP 1.2s ✓, CLS 0.02 ✓, INP 89ms ✓
### Interactions
- [✓] Nav links: 12/12 working
- [✗] Contact form: missing error state for invalid email
- [✓] Auth flow: login/logout working
### Visual
- [✗] Hero section overflows on 375px viewport
- [✓] Dark mode: all pages consistent
### Accessibility
- 2 AA violations: missing alt text on hero image, low contrast on footer links
### Verdict: SHIP WITH FIXES (2 issues, 0 blockers)
```
## Integration
Works with any browser MCP:
- `mChild__claude-in-chrome__*` tools (preferred — uses your actual Chrome)
- Playwright via `mcp__browserbase__*`
- Direct Puppeteer scripts
Pair with `/canary-watch` for post-deploy monitoring.

View File

@@ -0,0 +1,93 @@
# Canary Watch — Post-Deploy Monitoring
## When to Use
- After deploying to production or staging
- After merging a risky PR
- When you want to verify a fix actually fixed it
- Continuous monitoring during a launch window
- After dependency upgrades
## How It Works
Monitors a deployed URL for regressions. Runs in a loop until stopped or until the watch window expires.
### What It Watches
```
1. HTTP Status — is the page returning 200?
2. Console Errors — new errors that weren't there before?
3. Network Failures — failed API calls, 5xx responses?
4. Performance — LCP/CLS/INP regression vs baseline?
5. Content — did key elements disappear? (h1, nav, footer, CTA)
6. API Health — are critical endpoints responding within SLA?
```
### Watch Modes
**Quick check** (default): single pass, report results
```
/canary-watch https://myapp.com
```
**Sustained watch**: check every N minutes for M hours
```
/canary-watch https://myapp.com --interval 5m --duration 2h
```
**Diff mode**: compare staging vs production
```
/canary-watch --compare https://staging.myapp.com https://myapp.com
```
### Alert Thresholds
```yaml
critical: # immediate alert
- HTTP status != 200
- Console error count > 5 (new errors only)
- LCP > 4s
- API endpoint returns 5xx
warning: # flag in report
- LCP increased > 500ms from baseline
- CLS > 0.1
- New console warnings
- Response time > 2x baseline
info: # log only
- Minor performance variance
- New network requests (third-party scripts added?)
```
### Notifications
When a critical threshold is crossed:
- Desktop notification (macOS/Linux)
- Optional: Slack/Discord webhook
- Log to `~/.claude/canary-watch.log`
## Output
```markdown
## Canary Report — myapp.com — 2026-03-23 03:15 PST
### Status: HEALTHY ✓
| Check | Result | Baseline | Delta |
|-------|--------|----------|-------|
| HTTP | 200 ✓ | 200 | — |
| Console errors | 0 ✓ | 0 | — |
| LCP | 1.8s ✓ | 1.6s | +200ms |
| CLS | 0.01 ✓ | 0.01 | — |
| API /health | 145ms ✓ | 120ms | +25ms |
### No regressions detected. Deploy is clean.
```
## Integration
Pair with:
- `/browser-qa` for pre-deploy verification
- Hooks: add as a PostToolUse hook on `git push` to auto-check after deploys
- CI: run in GitHub Actions after deploy step

View File

@@ -0,0 +1,76 @@
# Design System — Generate & Audit Visual Systems
## When to Use
- Starting a new project that needs a design system
- Auditing an existing codebase for visual consistency
- Before a redesign — understand what you have
- When the UI looks "off" but you can't pinpoint why
- Reviewing PRs that touch styling
## How It Works
### Mode 1: Generate Design System
Analyzes your codebase and generates a cohesive design system:
```
1. Scan CSS/Tailwind/styled-components for existing patterns
2. Extract: colors, typography, spacing, border-radius, shadows, breakpoints
3. Research 3 competitor sites for inspiration (via browser MCP)
4. Propose a design token set (JSON + CSS custom properties)
5. Generate DESIGN.md with rationale for each decision
6. Create an interactive HTML preview page (self-contained, no deps)
```
Output: `DESIGN.md` + `design-tokens.json` + `design-preview.html`
### Mode 2: Visual Audit
Scores your UI across 10 dimensions (0-10 each):
```
1. Color consistency — are you using your palette or random hex values?
2. Typography hierarchy — clear h1 > h2 > h3 > body > caption?
3. Spacing rhythm — consistent scale (4px/8px/16px) or arbitrary?
4. Component consistency — do similar elements look similar?
5. Responsive behavior — fluid or broken at breakpoints?
6. Dark mode — complete or half-done?
7. Animation — purposeful or gratuitous?
8. Accessibility — contrast ratios, focus states, touch targets
9. Information density — cluttered or clean?
10. Polish — hover states, transitions, loading states, empty states
```
Each dimension gets a score, specific examples, and a fix with exact file:line.
### Mode 3: AI Slop Detection
Identifies generic AI-generated design patterns:
```
- Gratuitous gradients on everything
- Purple-to-blue defaults
- "Glass morphism" cards with no purpose
- Rounded corners on things that shouldn't be rounded
- Excessive animations on scroll
- Generic hero with centered text over stock gradient
- Sans-serif font stack with no personality
```
## Examples
**Generate for a SaaS app:**
```
/design-system generate --style minimal --palette earth-tones
```
**Audit existing UI:**
```
/design-system audit --url http://localhost:3000 --pages / /pricing /docs
```
**Check for AI slop:**
```
/design-system slop-check
```

View File

@@ -1,9 +0,0 @@
# Hermes Generated Skills
This directory is reserved for skills distilled from Hermes session data, repeated Telegram asks, and self-improvement runs.
Rules:
- keep skills specific and evidence-backed
- prefer reusable operational patterns over one-off tasks
- mirror from `~/.hermes/skills/generated/` only after the pattern is stable
- do not overwrite unrelated ECC skills

View File

@@ -1,80 +0,0 @@
---
name: content-crosspost-ops
description: Evidence-first crossposting workflow for Hermes. Use when adapting posts, threads, demos, videos, or articles across LinkedIn, Threads, Bluesky, Farcaster, and YouTube Community while keeping per-platform copy distinct and verified.
metadata:
hermes:
tags: [generated, content, crosspost, workflow, verification]
---
# Content Crosspost Ops
Use this when the user wants Hermes to crosspost or repurpose content across multiple platforms, especially from Telegram-driven publishing requests.
## Skill Stack
Pull these imported skills into the workflow when relevant:
- `content-engine` for platform-native rewrites
- `crosspost` for sequencing and destination-specific adaptation
- `article-writing` when the source asset is long-form
- `video-editing` or `fal-ai-media` when the post should lead with a clip, frame, or visual
- `search-first` before claiming a platform or API supports a format
- `eval-harness` mindset for publish verification and status reporting
## When To Use
- user says `crosspost`, `post everywhere`, `put this on linkedin too`, or similar
- the source asset is an X post/thread, quote tweet, article, demo video, screenshot, or YouTube post
- the destination is a community thread or showcase channel like Discord's `built-with-claude`
- the user asks whether a new destination or post type is supported
## Workflow
1. Read the real source asset and any destination rules first. Do not draft from memory.
- if the user pasted thread requirements, comply with those requirements before drafting
2. If the request depends on platform capability, API support, or quota behavior, verify it before answering.
- if the user asks whether PostBridge can handle a destination or format, inspect the real wrapper, configs, or recent publish logs before promising support
- if the destination is unsupported, say `blocked by unsupported capability` and give the next viable path
3. Extract one core idea and a few specifics. Split multiple ideas into separate posts.
4. Write native variants instead of reusing the same copy:
- X: fast hook, minimal framing
- LinkedIn: strong first line, short paragraphs, explicit lesson or takeaway
- Threads, Bluesky, Farcaster: shorter, conversational, clearly distinct wording
- YouTube Community: lead with the result or takeaway, keep it media-friendly
5. Prefer native media when the user wants engagement:
- for quote tweets, articles, or external links, prefer screenshots or media over a bare outbound link when the platform rewards native assets
- if the user says the demo itself should lead, use the video or a frame from it instead of a generic screenshot
- for community showcase threads, prefer the strongest demo clip or screenshot pair the user explicitly pointed to
6. Use link placement intentionally:
- put external links in comments or replies when engagement is the goal and the platform supports it
- otherwise use a platform-native CTA such as `comment for link` only when it matches the user's instruction
7. Resolve account and auth blockers early for browser-only destinations:
- for Discord or other browser-only community shares, verify the active account and whether the destination is reachable before spending more turns on extra asset hunting or copy polish
- verify the active account before typing into a community or social composer
- if login is blocked by MFA or a missing verification code, use the checked-in helper path instead of ad hoc inline scripting and do at most one focused resend plus one fresh helper check
- if that still returns no matching code, stop and report `blocked on missing MFA code`
8. Execute in order:
- post the primary platform first
- stagger secondary destinations when requested, defaulting to 4 hours apart unless the user overrides it
- prefer PostBridge for supported platforms, browser flows only when required
9. Verify before claiming completion:
- capture a returned post ID, URL, API response, or an updated verification log
- when the user asks `did you do it?`, answer with the exact status for each platform: posted, queued, drafted, uploaded-only, blocked, or awaiting verification
- record every attempt with `/Users/affoon/.hermes/workspace/content/log_crosspost.py` or `/Users/affoon/.hermes/workspace/content/postbridge_publish.py`
- if the state is only drafted, uploaded-only, queued, blocked, or pending manual action, report that exact status
## Pitfalls
- do not post identical copy cross-platform
- do not assume platform support without checking
- do not ignore thread rules or platform-specific showcase requirements
- do not call a draft, composer state, or upload step `posted`
- do not keep searching unrelated systems after a login or MFA blocker is already the limiting step
- do not keep refining copy or looking for better assets once auth is the only blocker on a browser-only publish
- do not answer a support question with a guess when the wrapper, logs, or API response can settle it
- do not ignore the user's preference for screenshots or native media over raw links
## Verification
- `/Users/affoon/.hermes/workspace/content/crosspost-verification-latest.md` reflects the latest attempts
- each destination has an ID, URL, or explicit failure reason
- the copy and media logged match what was actually sent

View File

@@ -1,70 +0,0 @@
---
name: email-ops
description: Evidence-first mailbox triage and sent-mail-safe reply workflow for Hermes. Use when organizing folders, drafting or sending through Himalaya, or verifying a message landed in Sent.
origin: Hermes
---
# Email Ops
Use this when the user wants Hermes to clean a mailbox, move messages between folders, draft or send replies, or prove a message landed in Sent.
## Prerequisites
Before using this workflow:
- install and configure the Himalaya CLI for the target mailbox accounts
- confirm the account's Sent folder name if it differs from `Sent`
## Skill Stack
Pull these companion skills into the workflow when relevant:
- `investor-outreach` when the email is investor, partner, or sponsor facing
- `search-first` before assuming a mail API, folder name, or CLI flag works
- `eval-harness` mindset for Sent-folder verification and exact status reporting
## When To Use
- user asks to triage inbox or trash, rescue important mail, or delete only obvious spam
- user asks to draft or send email and wants the message to appear in the mailbox's Sent folder
- user wants proof of which account, folder, or message id was used
## Workflow
1. Read the exact mailbox constraint first. If the user says `himalaya only` or forbids Apple Mail or `osascript`, stay inside Himalaya.
2. Resolve account and folder explicitly:
- check `himalaya account list`
- use `himalaya envelope list -a <account> -f <folder> ...`
- never misuse `-s INBOX` as a folder selector
3. For triage, classify before acting:
- preserve investor, partner, scheduling, and user-sent threads
- move only after the folder and account are confirmed
- permanently delete only obvious spam or messages the user explicitly authorized
4. For replies or new mail:
- read the full thread first
- choose the sender account that matches the project or recipient
- compose non-interactively with piped `himalaya template send` or `message write`
- avoid editor-driven flows unless required
5. If the request mentions attachments or images:
- resolve the exact absolute file path before broad mailbox searching
- keep the task on the local send-and-verify path instead of branching into unrelated web or repo exploration
- if Mail.app fallback is needed, pass the attachment paths after the body: `osascript /Users/affoon/.hermes/scripts/send_mail.applescript "<sender>" "<recipient>" "<subject>" "<body>" "/absolute/file1" ...`
6. If the user wants an actual send and Himalaya fails with an IMAP append or save-copy error, fall back to `/Users/affoon/.hermes/scripts/send_mail.applescript` only when the user did not forbid Apple Mail or `osascript`, then verify Sent. If the user constrained the method to Himalaya only, report the exact blocked state instead of silently switching tools.
7. During long-running mailbox work, send a short progress update before more searching. If a budget warning says 3 or fewer tool calls remain, stop broad exploration and spend the remaining calls on the highest-confidence execution or verification step, or report exact status and next action.
8. If the user wants sent-mail evidence:
- verify via `himalaya envelope list -a <account> -f Sent ...` or the account's actual sent folder
- report the subject, recipient, account, and message id or date if available
9. Report exact status words: drafted, sent, moved, flagged, deleted, blocked, awaiting verification.
## Pitfalls
- do not claim a message was sent without Sent-folder verification
- do not use the wrong account just because it is default
- do not delete uncertain business mail during cleanup
- do not switch tools after the user constrained the method
- do not wander into unrelated searches while an attachment path or Sent verification is unresolved
- do not keep searching through the budget warning while the user is asking for a status update
## Verification
- the requested messages are present in the expected folder after the move
- sent mail appears in Sent for the correct account
- the final report includes counts or concrete message identifiers, not vague completion language

View File

@@ -1,71 +0,0 @@
---
name: finance-billing-ops
description: Evidence-first Stripe sales, billing incident, and team-pricing workflow for Hermes. Use when pulling sales, investigating duplicate charges or failed payments, checking whether team billing is real in code, or benchmarking pricing.
metadata:
hermes:
tags: [generated, finance, billing, stripe, pricing, workflow, verification]
---
# Finance Billing Ops
Use this when the user asks about Stripe sales, refunds, failed payments, duplicate charges, org or team billing behavior, pricing strategy, or whether the product logic matches the marketing copy.
## Skill Stack
Pull these imported skills into the workflow when relevant:
- `market-research` for competitor pricing, billing models, and sourced market context
- `deep-research` or `exa-search` when the answer depends on current public pricing or enforcement behavior
- `search-first` before inventing a Stripe, billing, or entitlement path
- `eval-harness` mindset for exact status reporting and separating proof from inference
- `agentic-engineering` and `plankton-code-quality` when the answer depends on checked-in ECC billing or entitlement code
## When To Use
- user says `pull in stripe data`, `any new sales`, `why was he charged`, `refund`, `duplicate charge`, `team billing`, `per seat`, or similar
- the question mixes revenue facts with product truth, for example whether team or org billing is actually implemented
- the user wants a pricing comparison against Greptile or similar competitors
## Workflow
1. Start with the freshest revenue evidence available:
- if a live Stripe pull exists, refresh it first
- otherwise read `/Users/affoon/.hermes/workspace/business/stripe-sales.md` and `/Users/affoon/.hermes/workspace/business/financial-status.md`
- always report the snapshot timestamp if the data is not live
2. Normalize the revenue picture before answering:
- separate paid sales, failed attempts, successful retries, `$0` invoices, refunds, disputes, and active subscriptions
- do not treat a transient decline as lost revenue if the same checkout later succeeded
- flag any duplicate subscriptions or repeated checkouts with exact timestamps
3. For a customer billing incident:
- identify the customer email, account login, subscription ids, checkout sessions, payment intents, and timing
- determine whether extra charges are duplicates, retries, or real extra entitlements
- if recommending refunds or consolidation, explain what product value the extra charges did or did not unlock
4. For org, seat, quota, or activation questions:
- inspect the checked-in billing and usage code before making claims
- verify checkout quantity handling, installation vs user usage keys, unit-count handling, seat registry or member sync, and quota stacking
- inspect the live pricing copy too, so you can call out mismatches between marketing and implementation
5. For pricing and competitor questions:
- use `market-research`, `deep-research`, or `exa-search` for current public evidence
- separate sourced facts from inference, and call out stale or incomplete pricing signals
6. Report in layers:
- current sales snapshot
- customer-impact diagnosis
- code-backed product truth
- recommendation or next action
7. If the user wants fixes after diagnosis:
- hand the implementation path to `agentic-engineering` and `plankton-code-quality`
- keep the evidence trail so copy changes, refunds, and code changes stay aligned
## Pitfalls
- do not claim `new sales` without saying whether the data is live or a saved snapshot
- do not mix failed attempts into net revenue if the payment later succeeded
- do not say `per seat` unless the code actually enforces seat behavior
- do not assume extra subscriptions increase quotas without verifying the entitlement path
- do not compare competitor pricing from memory when current public sources are available
## Verification
- the answer includes a snapshot timestamp or an explicit live-pull statement
- the answer separates fact, inference, and recommendation
- code-backed claims cite file paths or code areas
- customer-impact statements name the exact payment or subscription evidence they rely on

View File

@@ -1,57 +0,0 @@
---
name: knowledge-ops
description: Evidence-first memory and context retrieval workflow for Hermes. Use when the user asks what Hermes remembers, points to OpenClaw or Hermes memory, or wants context recovered from a compacted session without re-reading already loaded files.
origin: Hermes
---
# Knowledge Ops
Use this when the user asks Hermes to remember something, recover an older conversation, pull context from a compacted session, or find information that "should be in memory somewhere."
## Skill Stack
Pull these companion skills into the workflow when relevant:
- `continuous-learning-v2` for evidence-backed pattern capture and cross-session learning
- `search-first` before inventing a new lookup path or assuming a store is empty
- `eval-harness` mindset for exact source attribution and negative-search reporting
## When To Use
- user says `do you remember`, `it was in memory`, `it was in openclaw`, `find the old session`, or similar
- the prompt contains a compaction summary or `[Files already read ... do NOT re-read these]`
- the answer depends on Hermes workspace memory, Supermemory, session logs, or the historical knowledge base
## Workflow
1. Start from the evidence already in the prompt:
- treat compaction summaries and `do NOT re-read` markers as usable context
- do not waste turns re-reading the same files unless the summary is clearly insufficient
2. Search in a fixed order before saying `not found`:
- `mcp_supermemory_recall` with a targeted query
- grep `/Users/affoon/.hermes/workspace/memory/`
- grep `/Users/affoon/.hermes/workspace/` more broadly
- `session_search` for recent Hermes conversations
- grep `/Users/affoon/GitHub/affaans_knowledge_base/` or the OpenClaw archive for historical context
3. If the user says the answer is in a specific memory store, pivot there immediately:
- `openclaw memory` means favor the historical knowledge base or OpenClaw archive
- `not in this session` means stop digging through the current thread and move to persistent stores
4. Keep the search narrow and evidence-led:
- reuse names, dates, channels, account names, or quoted phrases from the user
- search the most likely store first instead of spraying generic queries everywhere
5. Report findings with source evidence:
- give the file path, session id, date, or memory store
- distinguish between a direct hit, a likely match, and an inference
6. If nothing turns up, say which sources were checked and what to try next. Do not say `not found` after a single failed search.
## Pitfalls
- do not ignore a compaction summary and start over from zero
- do not keep re-reading files the prompt says are already loaded
- do not answer from vague memory without a source path, date, or session reference
- do not stop after one failed memory source when others remain
## Verification
- the response names the source store or file
- the response separates direct evidence from inference
- failed lookups list the sources checked, not just a bare `not found`

View File

@@ -1,64 +0,0 @@
---
name: research-ops
description: Evidence-first research workflow for Hermes. Use when answering current questions, evaluating a market or tool, enriching leads, or deciding whether a request should become ongoing monitored data collection.
metadata:
hermes:
tags: [generated, research, market, discovery, monitoring, workflow, verification]
---
# Research Ops
Use this when the user asks Hermes to research something current, compare options, enrich people or companies, or turn repeated lookups into an ongoing monitoring workflow.
## Skill Stack
Pull these imported skills into the workflow when relevant:
- `deep-research` for multi-source cited synthesis
- `market-research` for decision-oriented framing
- `exa-search` for first-pass discovery and current-web retrieval
- `data-scraper-agent` when the user really needs recurring collection or monitoring
- `search-first` before building new scraping or enrichment logic
- `eval-harness` mindset for claim quality, freshness, and explicit uncertainty
## When To Use
- user says `research`, `look up`, `find`, `who should i talk to`, `what's the latest`, or similar
- the answer depends on current public information, external sources, or a ranked set of candidates
- the task sounds recurring enough that a scraper or scheduled monitor may be better than a one-off search
## Workflow
1. Classify the ask before searching:
- quick factual answer
- decision memo or comparison
- lead list or enrichment
- recurring monitoring request
2. Start with the fastest evidence path:
- use `exa-search` first for broad current-web discovery
- if the question is about a local wrapper, config, or checked-in code path, inspect the live local source before making any web claim
3. Deepen only where the evidence justifies it:
- use `deep-research` when the user needs synthesis, citations, or multiple angles
- use `market-research` when the result should end in a recommendation, ranking, or go/no-go call
4. Separate fact from inference:
- label sourced facts clearly
- label inferred fit, ranking, or recommendation as inference
- include dates when freshness matters
5. Decide whether this should stay manual:
- if the user will likely ask for the same scan repeatedly, use `data-scraper-agent` patterns or propose a monitored collection path instead of repeating the same manual research forever
6. Report with evidence:
- cite the source or local file behind each important claim
- if evidence is thin or conflicting, say so directly
## Pitfalls
- do not answer current questions from stale memory when a fresh search is cheap
- do not conflate local code-backed behavior with market or web evidence
- do not present unsourced numbers or rankings as facts
- do not spin up a heavy deep-research pass for a quick capability check that local code can answer
- do not keep one-off researching a repeated monitoring ask when automation is the better fit
## Verification
- important claims have a source, file path, or explicit inference label
- freshness-sensitive answers include concrete dates when relevant
- recurring-monitoring recommendations state whether the task should remain manual or graduate to a scraper/workflow

View File

@@ -1,64 +0,0 @@
---
name: terminal-ops
description: Evidence-first terminal and repo execution workflow for Hermes. Use when fixing CI or build failures, running commands in a repo, applying code changes, or proving what was actually executed, verified, and pushed.
metadata:
hermes:
tags: [generated, terminal, coding, ci, repo, workflow, verification]
---
# Terminal Ops
Use this when the user asks Hermes to fix code, resolve CI failures, run terminal commands in a repo, inspect git state, or push verified changes.
## Skill Stack
Pull these imported skills into the workflow when relevant:
- `agentic-engineering` for scoped decomposition and explicit done conditions
- `plankton-code-quality` for write-time quality expectations and linter discipline
- `eval-harness` for pass/fail verification after each change
- `search-first` before inventing a new helper, dependency, or abstraction
- `security-review` when secrets, auth, external inputs, or privileged operations are touched
## When To Use
- user says `fix`, `debug`, `run this`, `check the repo`, `push it`, or similar
- the task references CI failures, lint errors, build errors, tests, scripts, or a local repo path
- the answer depends on what a command, diff, branch, or verification step actually shows
## Workflow
1. Resolve the exact working surface first:
- use the user-provided absolute repo path when given
- if the target is not a git repo, do not reach for git-only steps
- prefer `/Users/affoon/GitHub/...` over any iCloud or Documents mirror
2. Inspect before editing:
- read the failing command, file, test, or CI error first
- check current branch and local state before changing or pushing anything
- if the prompt already includes loaded-file markers or a compaction summary, use that evidence instead of re-reading blindly
3. Keep fixes narrow and evidence-led:
- solve one dominant failure at a time
- prefer repo-local scripts, package scripts, and checked-in helpers over ad hoc one-liners
- if a dependency or helper is needed, use `search-first` before writing custom glue
4. Verify after each meaningful change:
- rerun the smallest command that proves the fix
- escalate to the broader build, lint, or test only after the local failure is addressed
- review the diff before any commit or push
5. Push only when the requested state is real:
- distinguish `changed locally`, `verified locally`, `committed`, and `pushed`
- if push is requested, use a non-interactive git flow and report the branch and result
6. Report exact status words:
- drafted, changed locally, verified locally, committed, pushed, blocked, awaiting verification
## Pitfalls
- do not guess the failure from memory when logs or tests can settle it
- do not work in `/Users/affoon/Documents/...` clones when `/Users/affoon/GitHub/...` exists
- do not use destructive git commands or revert unrelated local work
- do not claim `fixed` if the proving command was not rerun
- do not claim `pushed` if the change only exists locally
## Verification
- the response names the proving command or test and its result
- the response names the repo path and branch when git was involved
- any push claim includes the target branch and exact status

View File

@@ -0,0 +1,79 @@
# Product Lens — Think Before You Build
## When to Use
- Before starting any feature — validate the "why"
- Weekly product review — are we building the right thing?
- When stuck choosing between features
- Before a launch — sanity check the user journey
- When converting a vague idea into a spec
## How It Works
### Mode 1: Product Diagnostic
Like YC office hours but automated. Asks the hard questions:
```
1. Who is this for? (specific person, not "developers")
2. What's the pain? (quantify: how often, how bad, what do they do today?)
3. Why now? (what changed that makes this possible/necessary?)
4. What's the 10-star version? (if money/time were unlimited)
5. What's the MVP? (smallest thing that proves the thesis)
6. What's the anti-goal? (what are you explicitly NOT building?)
7. How do you know it's working? (metric, not vibes)
```
Output: a `PRODUCT-BRIEF.md` with answers, risks, and a go/no-go recommendation.
### Mode 2: Founder Review
Reviews your current project through a founder lens:
```
1. Read README, CLAUDE.md, package.json, recent commits
2. Infer: what is this trying to be?
3. Score: product-market fit signals (0-10)
- Usage growth trajectory
- Retention indicators (repeat contributors, return users)
- Revenue signals (pricing page, billing code, Stripe integration)
- Competitive moat (what's hard to copy?)
4. Identify: the one thing that would 10x this
5. Flag: things you're building that don't matter
```
### Mode 3: User Journey Audit
Maps the actual user experience:
```
1. Clone/install the product as a new user
2. Document every friction point (confusing steps, errors, missing docs)
3. Time each step
4. Compare to competitor onboarding
5. Score: time-to-value (how long until the user gets their first win?)
6. Recommend: top 3 fixes for onboarding
```
### Mode 4: Feature Prioritization
When you have 10 ideas and need to pick 2:
```
1. List all candidate features
2. Score each on: impact (1-5) × confidence (1-5) ÷ effort (1-5)
3. Rank by ICE score
4. Apply constraints: runway, team size, dependencies
5. Output: prioritized roadmap with rationale
```
## Output
All modes output actionable docs, not essays. Every recommendation has a specific next step.
## Integration
Pair with:
- `/browser-qa` to verify the user journey audit findings
- `/design-system audit` for visual polish assessment
- `/canary-watch` for post-launch monitoring

View File

@@ -0,0 +1,69 @@
# Safety Guard — Prevent Destructive Operations
## When to Use
- When working on production systems
- When agents are running autonomously (full-auto mode)
- When you want to restrict edits to a specific directory
- During sensitive operations (migrations, deploys, data changes)
## How It Works
Three modes of protection:
### Mode 1: Careful Mode
Intercepts destructive commands before execution and warns:
```
Watched patterns:
- rm -rf (especially /, ~, or project root)
- git push --force
- git reset --hard
- git checkout . (discard all changes)
- DROP TABLE / DROP DATABASE
- docker system prune
- kubectl delete
- chmod 777
- sudo rm
- npm publish (accidental publishes)
- Any command with --no-verify
```
When detected: shows what the command does, asks for confirmation, suggests safer alternative.
### Mode 2: Freeze Mode
Locks file edits to a specific directory tree:
```
/safety-guard freeze src/components/
```
Any Write/Edit outside `src/components/` is blocked with an explanation. Useful when you want an agent to focus on one area without touching unrelated code.
### Mode 3: Guard Mode (Careful + Freeze combined)
Both protections active. Maximum safety for autonomous agents.
```
/safety-guard guard --dir src/api/ --allow-read-all
```
Agents can read anything but only write to `src/api/`. Destructive commands are blocked everywhere.
### Unlock
```
/safety-guard off
```
## Implementation
Uses PreToolUse hooks to intercept Bash, Write, Edit, and MultiEdit tool calls. Checks the command/path against the active rules before allowing execution.
## Integration
- Enable by default for `codex -a never` sessions
- Pair with observability risk scoring in ECC 2.0
- Logs all blocked actions to `~/.claude/safety-guard.log`

View File

@@ -112,14 +112,17 @@ function runTests() {
);
})) passed++; else failed++;
if (test('resolves antigravity profiles by skipping incompatible dependency trees', () => {
if (test('resolves antigravity profiles while skipping only unsupported modules', () => {
const projectRoot = '/workspace/app';
const plan = resolveInstallPlan({ profileId: 'core', target: 'antigravity', projectRoot });
assert.deepStrictEqual(plan.selectedModuleIds, ['rules-core', 'agents-core', 'commands-core']);
assert.deepStrictEqual(
plan.selectedModuleIds,
['rules-core', 'agents-core', 'commands-core', 'platform-configs', 'workflow-quality']
);
assert.ok(plan.skippedModuleIds.includes('hooks-runtime'));
assert.ok(plan.skippedModuleIds.includes('platform-configs'));
assert.ok(plan.skippedModuleIds.includes('workflow-quality'));
assert.ok(!plan.skippedModuleIds.includes('platform-configs'));
assert.ok(!plan.skippedModuleIds.includes('workflow-quality'));
assert.strictEqual(plan.targetAdapterId, 'antigravity-project');
assert.strictEqual(plan.targetRoot, path.join(projectRoot, '.agent'));
})) passed++; else failed++;

View File

@@ -258,7 +258,7 @@ function runTests() {
}
})) passed++; else failed++;
if (test('installs antigravity manifest profiles while skipping incompatible modules', () => {
if (test('installs antigravity manifest profiles while skipping only unsupported modules', () => {
const homeDir = createTempDir('install-apply-home-');
const projectDir = createTempDir('install-apply-project-');
@@ -269,14 +269,18 @@ function runTests() {
assert.ok(fs.existsSync(path.join(projectDir, '.agent', 'rules', 'common-coding-style.md')));
assert.ok(fs.existsSync(path.join(projectDir, '.agent', 'skills', 'architect.md')));
assert.ok(fs.existsSync(path.join(projectDir, '.agent', 'workflows', 'plan.md')));
assert.ok(!fs.existsSync(path.join(projectDir, '.agent', 'skills', 'tdd-workflow', 'SKILL.md')));
assert.ok(fs.existsSync(path.join(projectDir, '.agent', 'skills', 'tdd-workflow', 'SKILL.md')));
const state = readJson(path.join(projectDir, '.agent', 'ecc-install-state.json'));
assert.strictEqual(state.request.profile, 'core');
assert.strictEqual(state.request.legacyMode, false);
assert.deepStrictEqual(state.resolution.selectedModules, ['rules-core', 'agents-core', 'commands-core']);
assert.ok(state.resolution.skippedModules.includes('workflow-quality'));
assert.ok(state.resolution.skippedModules.includes('platform-configs'));
assert.deepStrictEqual(
state.resolution.selectedModules,
['rules-core', 'agents-core', 'commands-core', 'platform-configs', 'workflow-quality']
);
assert.ok(state.resolution.skippedModules.includes('hooks-runtime'));
assert.ok(!state.resolution.skippedModules.includes('workflow-quality'));
assert.ok(!state.resolution.skippedModules.includes('platform-configs'));
} finally {
cleanup(homeDir);
cleanup(projectDir);