mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-04-09 10:53:34 +08:00
feat(skills): add skill-comply — automated behavioral compliance measurement (#724)
* feat(skills): add skill-comply — automated behavioral compliance measurement Automated compliance measurement for skills, rules, and agent definitions. Generates behavioral specs, runs scenarios at 3 strictness levels, classifies tool calls via LLM, and produces self-contained reports. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(skill-comply): address bot review feedback - AGENTS.md: fix stale skill count (115 → 117) in project structure - run.py: replace remaining print() with logger, add zero-division guard, create parent dirs for --output path - runner.py: add returncode check for claude subprocess, clarify relative_to path traversal validation - parser.py: use is_file() instead of exists(), catch KeyError for missing trace fields, add file check in parse_spec - classifier.py: log warnings on malformed classification output, guard against non-dict JSON responses - grader.py: filter negative indices from LLM classification Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
70
skills/skill-comply/scripts/scenario_generator.py
Normal file
70
skills/skill-comply/scripts/scenario_generator.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Generate pressure scenarios from skill + spec using LLM."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from scripts.utils import extract_yaml
|
||||
|
||||
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Scenario:
|
||||
id: str
|
||||
level: int
|
||||
level_name: str
|
||||
description: str
|
||||
prompt: str
|
||||
setup_commands: tuple[str, ...]
|
||||
|
||||
|
||||
def generate_scenarios(
|
||||
skill_path: Path,
|
||||
spec_yaml: str,
|
||||
model: str = "haiku",
|
||||
) -> list[Scenario]:
|
||||
"""Generate 3 scenarios with decreasing prompt strictness.
|
||||
|
||||
Calls claude -p with the scenario_generator prompt, parses YAML output.
|
||||
"""
|
||||
skill_content = skill_path.read_text()
|
||||
prompt_template = (PROMPTS_DIR / "scenario_generator.md").read_text()
|
||||
prompt = (
|
||||
prompt_template
|
||||
.replace("{skill_content}", skill_content)
|
||||
.replace("{spec_yaml}", spec_yaml)
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
["claude", "-p", prompt, "--model", model, "--output-format", "text"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"claude -p failed: {result.stderr}")
|
||||
|
||||
if not result.stdout.strip():
|
||||
raise RuntimeError("claude -p returned empty output")
|
||||
|
||||
raw_yaml = extract_yaml(result.stdout)
|
||||
parsed = yaml.safe_load(raw_yaml)
|
||||
|
||||
scenarios: list[Scenario] = []
|
||||
for s in parsed["scenarios"]:
|
||||
scenarios.append(Scenario(
|
||||
id=s["id"],
|
||||
level=s["level"],
|
||||
level_name=s["level_name"],
|
||||
description=s["description"],
|
||||
prompt=s["prompt"].strip(),
|
||||
setup_commands=tuple(s.get("setup_commands", [])),
|
||||
))
|
||||
|
||||
return sorted(scenarios, key=lambda s: s.level)
|
||||
Reference in New Issue
Block a user