docs: salvage focused stale PR contributions

- add Vite and Redis pattern skills from closed stale PRs

- add frontend-slides support assets

- port skill-comply runner fixes and LLM prompt/provider regressions

- harden agent frontmatter validation and sync catalog counts
This commit is contained in:
Affaan Mustafa
2026-05-11 05:18:18 -04:00
committed by Affaan Mustafa
parent d8f879e671
commit b39d2244cf
28 changed files with 2653 additions and 59 deletions

View File

@@ -15,6 +15,10 @@ from scripts.scenario_generator import Scenario
SANDBOX_BASE = Path("/tmp/skill-comply-sandbox")
ALLOWED_MODELS = frozenset({"haiku", "sonnet", "opus"})
# Shell builtins cannot be invoked via subprocess.run; cwd is already
# controlled by the cwd= keyword. Scenarios that include these in
# setup_commands (a common shell-style convention) must be tolerated.
SHELL_BUILTINS = frozenset({"cd", "pushd", "popd"})
@dataclass(frozen=True)
@@ -53,9 +57,22 @@ def run_scenario(
cwd=sandbox_dir,
)
if result.returncode != 0:
# claude -p returns rc=1 when --max-turns is reached, but the stream-json
# output is still complete and parseable. Treat this graceful termination
# as non-fatal so scenarios that hit the turn cap still produce usable
# observations.
nonfatal_max_turns = (
result.returncode == 1
and '"terminal_reason":"max_turns"' in result.stdout
)
if result.returncode != 0 and not nonfatal_max_turns:
# Include both stderr and stdout tails. claude -p often surfaces the
# actual failure context (model error JSON, partial stream-json) on
# stdout, while stderr carries generic transport / auth messages.
# Showing both dramatically reduces "rc=N: <empty>" debugging dead-ends.
raise RuntimeError(
f"claude -p failed (rc={result.returncode}): {result.stderr[:500]}"
f"claude -p failed (rc={result.returncode}): "
f"stderr={result.stderr[:500]!r} stdout_tail={result.stdout[-500:]!r}"
)
observations = _parse_stream_json(result.stdout)
@@ -86,7 +103,15 @@ def _setup_sandbox(sandbox_dir: Path, scenario: Scenario) -> None:
for cmd in scenario.setup_commands:
parts = shlex.split(cmd)
subprocess.run(parts, cwd=sandbox_dir, capture_output=True)
if not parts or parts[0] in SHELL_BUILTINS:
# Shell builtins (cd/pushd/popd) cannot run as subprocess; skip.
continue
try:
subprocess.run(parts, cwd=sandbox_dir, capture_output=True)
except FileNotFoundError:
# Setup tool not installed in this environment; skip rather than
# crash the whole scenario. The compliance run continues.
continue
def _parse_stream_json(stdout: str) -> list[ObservationEvent]:

View File

@@ -0,0 +1,172 @@
"""Tests for runner module — scenario execution + subprocess error handling."""
from __future__ import annotations
import subprocess
from dataclasses import dataclass
from unittest.mock import MagicMock, patch
import pytest
from scripts.runner import _setup_sandbox, run_scenario
@dataclass(frozen=True)
class _FakeScenario:
"""Minimal Scenario-like object for runner tests (avoids generator deps)."""
id: str
prompt: str = "do nothing"
setup_commands: tuple[str, ...] = ()
class TestSetupSandboxSkipsShellBuiltins:
"""Setup commands containing shell builtins (cd/pushd/popd) must be skipped.
Regression: subprocess.run(["cd", ...]) raises FileNotFoundError because
cd is a shell builtin, not an external binary. Real-world scenarios often
include "cd subdir" in setup_commands assuming shell semantics, so the
runner must tolerate this rather than crashing the whole scenario.
"""
def test_skips_cd(self, tmp_path):
scenario = _FakeScenario(
id="t1",
setup_commands=("cd subdir",),
)
called_args: list[list[str]] = []
def fake_run(args, **kwargs):
called_args.append(args)
return subprocess.CompletedProcess(args=args, returncode=0)
with patch("scripts.runner.subprocess.run", side_effect=fake_run):
_setup_sandbox(tmp_path, scenario)
# git init runs once; "cd subdir" must NOT be passed to subprocess
assert ["git", "init"] in called_args
assert ["cd", "subdir"] not in called_args
def test_skips_pushd_popd(self, tmp_path):
scenario = _FakeScenario(
id="t2",
setup_commands=("pushd dir", "popd"),
)
called_args: list[list[str]] = []
def fake_run(args, **kwargs):
called_args.append(args)
return subprocess.CompletedProcess(args=args, returncode=0)
with patch("scripts.runner.subprocess.run", side_effect=fake_run):
_setup_sandbox(tmp_path, scenario)
assert ["pushd", "dir"] not in called_args
assert ["popd"] not in called_args
def test_tolerates_missing_executable(self, tmp_path):
"""A scenario referencing an unavailable tool must not crash setup."""
scenario = _FakeScenario(
id="t3",
setup_commands=("nonexistent-tool-xyz arg",),
)
def fake_run(args, **kwargs):
if args[0] == "nonexistent-tool-xyz":
raise FileNotFoundError(2, "No such file or directory")
return subprocess.CompletedProcess(args=args, returncode=0)
with patch("scripts.runner.subprocess.run", side_effect=fake_run):
# Must NOT raise — missing tools are skipped, not fatal
_setup_sandbox(tmp_path, scenario)
def test_real_commands_still_run(self, tmp_path):
"""Skip logic must not break legitimate setup commands."""
scenario = _FakeScenario(
id="t4",
setup_commands=("touch file.txt", "cd ignored", "echo hi"),
)
called_args: list[list[str]] = []
def fake_run(args, **kwargs):
called_args.append(args)
return subprocess.CompletedProcess(args=args, returncode=0)
with patch("scripts.runner.subprocess.run", side_effect=fake_run):
_setup_sandbox(tmp_path, scenario)
# Real commands present, cd absent
assert ["touch", "file.txt"] in called_args
assert ["echo", "hi"] in called_args
assert ["cd", "ignored"] not in called_args
class TestRunScenarioMaxTurnsTermination:
"""rc=1 with terminal_reason=max_turns is graceful termination, not failure.
claude -p returns rc=1 when --max-turns is reached, but the stream-json
output is still valid. Treating this as RuntimeError aborts scenarios
that would have produced useful observations. Detect the marker in stdout
and downgrade rc=1 + max_turns to non-fatal.
"""
def test_rc1_with_max_turns_marker_returns_normally(self, tmp_path, monkeypatch):
scenario = _FakeScenario(id="mt1", prompt="long task", setup_commands=())
# Skip sandbox setup side effects
monkeypatch.setattr("scripts.runner._setup_sandbox", lambda *a, **kw: None)
max_turns_stdout = (
'{"type":"system","subtype":"init","session_id":"s1"}\n'
'{"type":"result","terminal_reason":"max_turns"}\n'
)
fake_result = subprocess.CompletedProcess(
args=["claude"], returncode=1, stdout=max_turns_stdout, stderr=""
)
with patch("scripts.runner.subprocess.run", return_value=fake_result):
# Must NOT raise — max_turns is graceful termination
run_scenario(scenario, model="haiku")
def test_rc1_without_max_turns_marker_still_raises(self, tmp_path, monkeypatch):
"""Real failures (rc≠0 with no max_turns marker) must still raise."""
scenario = _FakeScenario(id="mt2", prompt="oops", setup_commands=())
monkeypatch.setattr("scripts.runner._setup_sandbox", lambda *a, **kw: None)
fake_result = subprocess.CompletedProcess(
args=["claude"], returncode=1, stdout="", stderr="auth error"
)
with patch("scripts.runner.subprocess.run", return_value=fake_result):
with pytest.raises(RuntimeError, match="claude -p failed"):
run_scenario(scenario, model="haiku")
class TestRunScenarioErrorIncludesStdoutTail:
"""Error messages must include stdout tail, not only stderr.
When claude -p fails inside an LLM call, useful diagnostic context often
appears in stdout (partial stream-json events, model error JSON), not
stderr. Including stdout tail in the RuntimeError message dramatically
improves debug-ability without adding any new dependency.
"""
def test_error_message_contains_stdout_tail(self, tmp_path, monkeypatch):
scenario = _FakeScenario(id="e1", prompt="x", setup_commands=())
monkeypatch.setattr("scripts.runner._setup_sandbox", lambda *a, **kw: None)
diagnostic_marker = "DIAG_STDOUT_MARKER_xyz123"
fake_result = subprocess.CompletedProcess(
args=["claude"],
returncode=2,
stdout=f"some context {diagnostic_marker} more text",
stderr="generic error",
)
with patch("scripts.runner.subprocess.run", return_value=fake_result):
with pytest.raises(RuntimeError) as excinfo:
run_scenario(scenario, model="haiku")
# Stdout marker MUST appear in the error message
assert diagnostic_marker in str(excinfo.value)