docs: salvage focused stale PR contributions

- add Vite and Redis pattern skills from closed stale PRs - add frontend-slides support assets - port skill-comply runner fixes and LLM prompt/provider regressions - harden agent frontmatter validation and sync catalog counts
2026-05-18 06:43:05 +08:00 · 2026-05-11 05:18:18 -04:00
parent d8f879e671
commit b39d2244cf
28 changed files with 2653 additions and 59 deletions
--- a/skills/skill-comply/scripts/runner.py
+++ b/skills/skill-comply/scripts/runner.py
@@ -15,6 +15,10 @@ from scripts.scenario_generator import Scenario

 SANDBOX_BASE = Path("/tmp/skill-comply-sandbox")
 ALLOWED_MODELS = frozenset({"haiku", "sonnet", "opus"})
+# Shell builtins cannot be invoked via subprocess.run; cwd is already
+# controlled by the cwd= keyword. Scenarios that include these in
+# setup_commands (a common shell-style convention) must be tolerated.
+SHELL_BUILTINS = frozenset({"cd", "pushd", "popd"})


@dataclass(frozen=True)
@@ -53,9 +57,22 @@ def run_scenario(
        cwd=sandbox_dir,
    )

-    if result.returncode != 0:
+    # claude -p returns rc=1 when --max-turns is reached, but the stream-json
+    # output is still complete and parseable. Treat this graceful termination
+    # as non-fatal so scenarios that hit the turn cap still produce usable
+    # observations.
+    nonfatal_max_turns = (
+        result.returncode == 1
+        and '"terminal_reason":"max_turns"' in result.stdout
+    )
+    if result.returncode != 0 and not nonfatal_max_turns:
+        # Include both stderr and stdout tails. claude -p often surfaces the
+        # actual failure context (model error JSON, partial stream-json) on
+        # stdout, while stderr carries generic transport / auth messages.
+        # Showing both dramatically reduces "rc=N: <empty>" debugging dead-ends.
        raise RuntimeError(
-            f"claude -p failed (rc={result.returncode}): {result.stderr[:500]}"
+            f"claude -p failed (rc={result.returncode}): "
+            f"stderr={result.stderr[:500]!r} stdout_tail={result.stdout[-500:]!r}"
        )

    observations = _parse_stream_json(result.stdout)
@@ -86,7 +103,15 @@ def _setup_sandbox(sandbox_dir: Path, scenario: Scenario) -> None:

    for cmd in scenario.setup_commands:
        parts = shlex.split(cmd)
-        subprocess.run(parts, cwd=sandbox_dir, capture_output=True)
+        if not parts or parts[0] in SHELL_BUILTINS:
+            # Shell builtins (cd/pushd/popd) cannot run as subprocess; skip.
+            continue
+        try:
+            subprocess.run(parts, cwd=sandbox_dir, capture_output=True)
+        except FileNotFoundError:
+            # Setup tool not installed in this environment; skip rather than
+            # crash the whole scenario. The compliance run continues.
+            continue


 def _parse_stream_json(stdout: str) -> list[ObservationEvent]:
--- a/skills/skill-comply/tests/test_runner.py
+++ b/skills/skill-comply/tests/test_runner.py
@@ -0,0 +1,172 @@
+"""Tests for runner module — scenario execution + subprocess error handling."""
+
+from __future__ import annotations
+
+import subprocess
+from dataclasses import dataclass
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from scripts.runner import _setup_sandbox, run_scenario
+
+
+@dataclass(frozen=True)
+class _FakeScenario:
+    """Minimal Scenario-like object for runner tests (avoids generator deps)."""
+
+    id: str
+    prompt: str = "do nothing"
+    setup_commands: tuple[str, ...] = ()
+
+
+class TestSetupSandboxSkipsShellBuiltins:
+    """Setup commands containing shell builtins (cd/pushd/popd) must be skipped.
+
+    Regression: subprocess.run(["cd", ...]) raises FileNotFoundError because
+    cd is a shell builtin, not an external binary. Real-world scenarios often
+    include "cd subdir" in setup_commands assuming shell semantics, so the
+    runner must tolerate this rather than crashing the whole scenario.
+    """
+
+    def test_skips_cd(self, tmp_path):
+        scenario = _FakeScenario(
+            id="t1",
+            setup_commands=("cd subdir",),
+        )
+        called_args: list[list[str]] = []
+
+        def fake_run(args, **kwargs):
+            called_args.append(args)
+            return subprocess.CompletedProcess(args=args, returncode=0)
+
+        with patch("scripts.runner.subprocess.run", side_effect=fake_run):
+            _setup_sandbox(tmp_path, scenario)
+
+        # git init runs once; "cd subdir" must NOT be passed to subprocess
+        assert ["git", "init"] in called_args
+        assert ["cd", "subdir"] not in called_args
+
+    def test_skips_pushd_popd(self, tmp_path):
+        scenario = _FakeScenario(
+            id="t2",
+            setup_commands=("pushd dir", "popd"),
+        )
+        called_args: list[list[str]] = []
+
+        def fake_run(args, **kwargs):
+            called_args.append(args)
+            return subprocess.CompletedProcess(args=args, returncode=0)
+
+        with patch("scripts.runner.subprocess.run", side_effect=fake_run):
+            _setup_sandbox(tmp_path, scenario)
+
+        assert ["pushd", "dir"] not in called_args
+        assert ["popd"] not in called_args
+
+    def test_tolerates_missing_executable(self, tmp_path):
+        """A scenario referencing an unavailable tool must not crash setup."""
+        scenario = _FakeScenario(
+            id="t3",
+            setup_commands=("nonexistent-tool-xyz arg",),
+        )
+
+        def fake_run(args, **kwargs):
+            if args[0] == "nonexistent-tool-xyz":
+                raise FileNotFoundError(2, "No such file or directory")
+            return subprocess.CompletedProcess(args=args, returncode=0)
+
+        with patch("scripts.runner.subprocess.run", side_effect=fake_run):
+            # Must NOT raise — missing tools are skipped, not fatal
+            _setup_sandbox(tmp_path, scenario)
+
+    def test_real_commands_still_run(self, tmp_path):
+        """Skip logic must not break legitimate setup commands."""
+        scenario = _FakeScenario(
+            id="t4",
+            setup_commands=("touch file.txt", "cd ignored", "echo hi"),
+        )
+        called_args: list[list[str]] = []
+
+        def fake_run(args, **kwargs):
+            called_args.append(args)
+            return subprocess.CompletedProcess(args=args, returncode=0)
+
+        with patch("scripts.runner.subprocess.run", side_effect=fake_run):
+            _setup_sandbox(tmp_path, scenario)
+
+        # Real commands present, cd absent
+        assert ["touch", "file.txt"] in called_args
+        assert ["echo", "hi"] in called_args
+        assert ["cd", "ignored"] not in called_args
+
+
+class TestRunScenarioMaxTurnsTermination:
+    """rc=1 with terminal_reason=max_turns is graceful termination, not failure.
+
+    claude -p returns rc=1 when --max-turns is reached, but the stream-json
+    output is still valid. Treating this as RuntimeError aborts scenarios
+    that would have produced useful observations. Detect the marker in stdout
+    and downgrade rc=1 + max_turns to non-fatal.
+    """
+
+    def test_rc1_with_max_turns_marker_returns_normally(self, tmp_path, monkeypatch):
+        scenario = _FakeScenario(id="mt1", prompt="long task", setup_commands=())
+
+        # Skip sandbox setup side effects
+        monkeypatch.setattr("scripts.runner._setup_sandbox", lambda *a, **kw: None)
+
+        max_turns_stdout = (
+            '{"type":"system","subtype":"init","session_id":"s1"}\n'
+            '{"type":"result","terminal_reason":"max_turns"}\n'
+        )
+
+        fake_result = subprocess.CompletedProcess(
+            args=["claude"], returncode=1, stdout=max_turns_stdout, stderr=""
+        )
+
+        with patch("scripts.runner.subprocess.run", return_value=fake_result):
+            # Must NOT raise — max_turns is graceful termination
+            run_scenario(scenario, model="haiku")
+
+    def test_rc1_without_max_turns_marker_still_raises(self, tmp_path, monkeypatch):
+        """Real failures (rc≠0 with no max_turns marker) must still raise."""
+        scenario = _FakeScenario(id="mt2", prompt="oops", setup_commands=())
+        monkeypatch.setattr("scripts.runner._setup_sandbox", lambda *a, **kw: None)
+
+        fake_result = subprocess.CompletedProcess(
+            args=["claude"], returncode=1, stdout="", stderr="auth error"
+        )
+
+        with patch("scripts.runner.subprocess.run", return_value=fake_result):
+            with pytest.raises(RuntimeError, match="claude -p failed"):
+                run_scenario(scenario, model="haiku")
+
+
+class TestRunScenarioErrorIncludesStdoutTail:
+    """Error messages must include stdout tail, not only stderr.
+
+    When claude -p fails inside an LLM call, useful diagnostic context often
+    appears in stdout (partial stream-json events, model error JSON), not
+    stderr. Including stdout tail in the RuntimeError message dramatically
+    improves debug-ability without adding any new dependency.
+    """
+
+    def test_error_message_contains_stdout_tail(self, tmp_path, monkeypatch):
+        scenario = _FakeScenario(id="e1", prompt="x", setup_commands=())
+        monkeypatch.setattr("scripts.runner._setup_sandbox", lambda *a, **kw: None)
+
+        diagnostic_marker = "DIAG_STDOUT_MARKER_xyz123"
+        fake_result = subprocess.CompletedProcess(
+            args=["claude"],
+            returncode=2,
+            stdout=f"some context {diagnostic_marker} more text",
+            stderr="generic error",
+        )
+
+        with patch("scripts.runner.subprocess.run", return_value=fake_result):
+            with pytest.raises(RuntimeError) as excinfo:
+                run_scenario(scenario, model="haiku")
+
+        # Stdout marker MUST appear in the error message
+        assert diagnostic_marker in str(excinfo.value)