fix: greedy regex in validate-commands captures all refs per line, add 18 tests

The command cross-reference regex /^.*`\/(...)`.*$/gm only captured the LAST command ref per line due to greedy .* consuming earlier refs. Replaced with line-by-line processing using non-anchored regex to capture ALL command references. New tests: - 4 validate-commands multi-ref-per-line tests (regression) - 8 evaluate-session threshold boundary tests (new file) - 6 session-aliases edge case tests (cleanup, rename, path matching)
2026-05-15 21:33:04 +08:00 · 2026-02-13 01:52:30 -08:00
parent 6dcb5daa5c
commit c1919bb879
5 changed files with 339 additions and 7 deletions
--- a/scripts/ci/validate-commands.js
+++ b/scripts/ci/validate-commands.js
@@ -74,14 +74,17 @@ function validateCommands() {

    // Check cross-references to other commands (e.g., `/build-fix`)
    // Skip lines that describe hypothetical output (e.g., "→ Creates: `/new-table`")
-    const cmdRefs = contentNoCodeBlocks.matchAll(/^.*`\/([a-z][-a-z0-9]*)`.*$/gm);
-    for (const match of cmdRefs) {
-      const line = match[0];
+    // Process line-by-line so ALL command refs per line are captured
+    // (previous anchored regex /^.*`\/...`.*$/gm only matched the last ref per line)
+    for (const line of contentNoCodeBlocks.split('\n')) {
      if (/creates:|would create:/i.test(line)) continue;
-      const refName = match[1];
-      if (!validCommands.has(refName)) {
-        console.error(`ERROR: ${file} - references non-existent command /${refName}`);
-        hasErrors = true;
+      const lineRefs = line.matchAll(/`\/([a-z][-a-z0-9]*)`/g);
+      for (const match of lineRefs) {
+        const refName = match[1];
+        if (!validCommands.has(refName)) {
+          console.error(`ERROR: ${file} - references non-existent command /${refName}`);
+          hasErrors = true;
+        }
      }
    }

--- a/tests/ci/validators.test.js
+++ b/tests/ci/validators.test.js
@@ -634,6 +634,75 @@ function runTests() {
    cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir);
  })) passed++; else failed++;

+  if (test('captures ALL command references on a single line (multi-ref)', () => {
+    const testDir = createTestDir();
+    const agentsDir = createTestDir();
+    const skillsDir = createTestDir();
+    // Line with two command references — both should be detected
+    fs.writeFileSync(path.join(testDir, 'multi.md'),
+      '# Multi\nUse `/ghost-a` and `/ghost-b` together.');
+
+    const result = runValidatorWithDirs('validate-commands', {
+      COMMANDS_DIR: testDir, AGENTS_DIR: agentsDir, SKILLS_DIR: skillsDir
+    });
+    assert.strictEqual(result.code, 1, 'Should fail on broken refs');
+    // BOTH ghost-a AND ghost-b must be reported (this was the greedy regex bug)
+    assert.ok(result.stderr.includes('ghost-a'), 'Should report first ref /ghost-a');
+    assert.ok(result.stderr.includes('ghost-b'), 'Should report second ref /ghost-b');
+    cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir);
+  })) passed++; else failed++;
+
+  if (test('captures three command refs on one line', () => {
+    const testDir = createTestDir();
+    const agentsDir = createTestDir();
+    const skillsDir = createTestDir();
+    fs.writeFileSync(path.join(testDir, 'triple.md'),
+      '# Triple\nChain `/alpha`, `/beta`, and `/gamma` in order.');
+
+    const result = runValidatorWithDirs('validate-commands', {
+      COMMANDS_DIR: testDir, AGENTS_DIR: agentsDir, SKILLS_DIR: skillsDir
+    });
+    assert.strictEqual(result.code, 1, 'Should fail on all three broken refs');
+    assert.ok(result.stderr.includes('alpha'), 'Should report /alpha');
+    assert.ok(result.stderr.includes('beta'), 'Should report /beta');
+    assert.ok(result.stderr.includes('gamma'), 'Should report /gamma');
+    cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir);
+  })) passed++; else failed++;
+
+  if (test('multi-ref line with one valid and one invalid ref', () => {
+    const testDir = createTestDir();
+    const agentsDir = createTestDir();
+    const skillsDir = createTestDir();
+    // "real-cmd" exists, "fake-cmd" does not
+    fs.writeFileSync(path.join(testDir, 'real-cmd.md'), '# Real\nA real command.');
+    fs.writeFileSync(path.join(testDir, 'mixed.md'),
+      '# Mixed\nRun `/real-cmd` then `/fake-cmd`.');
+
+    const result = runValidatorWithDirs('validate-commands', {
+      COMMANDS_DIR: testDir, AGENTS_DIR: agentsDir, SKILLS_DIR: skillsDir
+    });
+    assert.strictEqual(result.code, 1, 'Should fail for the fake ref');
+    assert.ok(result.stderr.includes('fake-cmd'), 'Should report /fake-cmd');
+    // real-cmd should NOT appear in errors
+    assert.ok(!result.stderr.includes('real-cmd'), 'Should not report valid /real-cmd');
+    cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir);
+  })) passed++; else failed++;
+
+  if (test('creates: line with multiple refs skips entire line', () => {
+    const testDir = createTestDir();
+    const agentsDir = createTestDir();
+    const skillsDir = createTestDir();
+    // Both refs on a "Creates:" line should be skipped entirely
+    fs.writeFileSync(path.join(testDir, 'gen.md'),
+      '# Generator\nCreates: `/new-a` and `/new-b`');
+
+    const result = runValidatorWithDirs('validate-commands', {
+      COMMANDS_DIR: testDir, AGENTS_DIR: agentsDir, SKILLS_DIR: skillsDir
+    });
+    assert.strictEqual(result.code, 0, 'Should skip all refs on creates: line');
+    cleanupTestDir(testDir); cleanupTestDir(agentsDir); cleanupTestDir(skillsDir);
+  })) passed++; else failed++;
+
  if (test('validates valid workflow diagram with known agents', () => {
    const testDir = createTestDir();
    const agentsDir = createTestDir();
--- a/tests/hooks/evaluate-session.test.js
+++ b/tests/hooks/evaluate-session.test.js
@@ -0,0 +1,183 @@
+/**
+ * Tests for scripts/hooks/evaluate-session.js
+ *
+ * Tests the session evaluation threshold logic, config loading,
+ * and stdin parsing. Uses temporary JSONL transcript files.
+ *
+ * Run with: node tests/hooks/evaluate-session.test.js
+ */
+
+const assert = require('assert');
+const path = require('path');
+const fs = require('fs');
+const os = require('os');
+const { spawnSync, execFileSync } = require('child_process');
+
+const evaluateScript = path.join(__dirname, '..', '..', 'scripts', 'hooks', 'evaluate-session.js');
+
+// Test helpers
+function test(name, fn) {
+  try {
+    fn();
+    console.log(`  \u2713 ${name}`);
+    return true;
+  } catch (err) {
+    console.log(`  \u2717 ${name}`);
+    console.log(`    Error: ${err.message}`);
+    return false;
+  }
+}
+
+function createTestDir() {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'eval-session-test-'));
+}
+
+function cleanupTestDir(testDir) {
+  fs.rmSync(testDir, { recursive: true, force: true });
+}
+
+/**
+ * Create a JSONL transcript file with N user messages.
+ * Each line is a JSON object with `"type":"user"`.
+ */
+function createTranscript(dir, messageCount) {
+  const filePath = path.join(dir, 'transcript.jsonl');
+  const lines = [];
+  for (let i = 0; i < messageCount; i++) {
+    lines.push(JSON.stringify({ type: 'user', content: `Message ${i + 1}` }));
+    // Intersperse assistant messages to be realistic
+    lines.push(JSON.stringify({ type: 'assistant', content: `Response ${i + 1}` }));
+  }
+  fs.writeFileSync(filePath, lines.join('\n') + '\n');
+  return filePath;
+}
+
+/**
+ * Run evaluate-session.js with stdin providing the transcript_path.
+ * Uses spawnSync to capture both stdout and stderr regardless of exit code.
+ * Returns { code, stdout, stderr }.
+ */
+function runEvaluate(stdinJson) {
+  const result = spawnSync('node', [evaluateScript], {
+    encoding: 'utf8',
+    input: JSON.stringify(stdinJson),
+    timeout: 10000,
+  });
+  return {
+    code: result.status || 0,
+    stdout: result.stdout || '',
+    stderr: result.stderr || '',
+  };
+}
+
+function runTests() {
+  console.log('\n=== Testing evaluate-session.js ===\n');
+
+  let passed = 0;
+  let failed = 0;
+
+  // Threshold boundary tests (default minSessionLength = 10)
+  console.log('Threshold boundary (default min=10):');
+
+  if (test('skips session with 9 user messages (below threshold)', () => {
+    const testDir = createTestDir();
+    const transcript = createTranscript(testDir, 9);
+    const result = runEvaluate({ transcript_path: transcript });
+    assert.strictEqual(result.code, 0, 'Should exit 0');
+    // "too short" message should appear in stderr (log goes to stderr)
+    assert.ok(
+      result.stderr.includes('too short') || result.stderr.includes('9 messages'),
+      'Should indicate session too short'
+    );
+    cleanupTestDir(testDir);
+  })) passed++; else failed++;
+
+  if (test('evaluates session with exactly 10 user messages (at threshold)', () => {
+    const testDir = createTestDir();
+    const transcript = createTranscript(testDir, 10);
+    const result = runEvaluate({ transcript_path: transcript });
+    assert.strictEqual(result.code, 0, 'Should exit 0');
+    // Should NOT say "too short" — should say "evaluate for extractable patterns"
+    assert.ok(!result.stderr.includes('too short'), 'Should NOT say too short at threshold');
+    assert.ok(
+      result.stderr.includes('10 messages') || result.stderr.includes('evaluate'),
+      'Should indicate evaluation'
+    );
+    cleanupTestDir(testDir);
+  })) passed++; else failed++;
+
+  if (test('evaluates session with 11 user messages (above threshold)', () => {
+    const testDir = createTestDir();
+    const transcript = createTranscript(testDir, 11);
+    const result = runEvaluate({ transcript_path: transcript });
+    assert.strictEqual(result.code, 0);
+    assert.ok(!result.stderr.includes('too short'), 'Should NOT say too short');
+    assert.ok(result.stderr.includes('evaluate'), 'Should trigger evaluation');
+    cleanupTestDir(testDir);
+  })) passed++; else failed++;
+
+  // Edge cases
+  console.log('\nEdge cases:');
+
+  if (test('exits 0 with missing transcript_path', () => {
+    const result = runEvaluate({});
+    assert.strictEqual(result.code, 0, 'Should exit 0 gracefully');
+  })) passed++; else failed++;
+
+  if (test('exits 0 with non-existent transcript file', () => {
+    const result = runEvaluate({ transcript_path: '/nonexistent/path/transcript.jsonl' });
+    assert.strictEqual(result.code, 0, 'Should exit 0 gracefully');
+  })) passed++; else failed++;
+
+  if (test('exits 0 with invalid stdin JSON', () => {
+    // Pass raw string instead of JSON
+    const result = spawnSync('node', [evaluateScript], {
+      encoding: 'utf8',
+      input: 'not valid json at all',
+      timeout: 10000,
+    });
+    assert.strictEqual(result.status, 0, 'Should exit 0 even on bad stdin');
+  })) passed++; else failed++;
+
+  if (test('skips empty transcript file (0 user messages)', () => {
+    const testDir = createTestDir();
+    const filePath = path.join(testDir, 'empty.jsonl');
+    fs.writeFileSync(filePath, '');
+    const result = runEvaluate({ transcript_path: filePath });
+    assert.strictEqual(result.code, 0);
+    // 0 < 10, so should be "too short"
+    assert.ok(
+      result.stderr.includes('too short') || result.stderr.includes('0 messages'),
+      'Empty transcript should be too short'
+    );
+    cleanupTestDir(testDir);
+  })) passed++; else failed++;
+
+  if (test('counts only user messages (ignores assistant messages)', () => {
+    const testDir = createTestDir();
+    const filePath = path.join(testDir, 'mixed.jsonl');
+    // 5 user messages + 50 assistant messages — should still be "too short"
+    const lines = [];
+    for (let i = 0; i < 5; i++) {
+      lines.push(JSON.stringify({ type: 'user', content: `msg ${i}` }));
+    }
+    for (let i = 0; i < 50; i++) {
+      lines.push(JSON.stringify({ type: 'assistant', content: `resp ${i}` }));
+    }
+    fs.writeFileSync(filePath, lines.join('\n') + '\n');
+
+    const result = runEvaluate({ transcript_path: filePath });
+    assert.strictEqual(result.code, 0);
+    assert.ok(
+      result.stderr.includes('too short') || result.stderr.includes('5 messages'),
+      'Should count only user messages'
+    );
+    cleanupTestDir(testDir);
+  })) passed++; else failed++;
+
+  // Summary
+  console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`);
+  process.exit(failed > 0 ? 1 : 0);
+}
+
+runTests();
--- a/tests/lib/session-aliases.test.js
+++ b/tests/lib/session-aliases.test.js
@@ -543,6 +543,82 @@ function runTests() {
    assert.ok(data.metadata.lastUpdated);
  })) passed++; else failed++;

+  // cleanupAliases additional edge cases
+  console.log('\ncleanupAliases (edge cases):');
+
+  if (test('returns correct totalChecked when all removed', () => {
+    resetAliases();
+    aliases.setAlias('dead-1', '/dead/1');
+    aliases.setAlias('dead-2', '/dead/2');
+    aliases.setAlias('dead-3', '/dead/3');
+
+    const result = aliases.cleanupAliases(() => false); // none exist
+    assert.strictEqual(result.removed, 3);
+    assert.strictEqual(result.totalChecked, 3); // 0 remaining + 3 removed
+    assert.strictEqual(result.removedAliases.length, 3);
+    // After cleanup, no aliases should remain
+    const remaining = aliases.listAliases();
+    assert.strictEqual(remaining.length, 0);
+  })) passed++; else failed++;
+
+  if (test('cleanupAliases with empty aliases file does nothing', () => {
+    resetAliases();
+    const result = aliases.cleanupAliases(() => true);
+    assert.strictEqual(result.removed, 0);
+    assert.strictEqual(result.totalChecked, 0);
+    assert.strictEqual(result.removedAliases.length, 0);
+  })) passed++; else failed++;
+
+  if (test('cleanupAliases preserves aliases where sessionExists returns true', () => {
+    resetAliases();
+    aliases.setAlias('keep-me', '/sessions/real');
+    aliases.setAlias('remove-me', '/sessions/gone');
+
+    const result = aliases.cleanupAliases((p) => p === '/sessions/real');
+    assert.strictEqual(result.removed, 1);
+    assert.strictEqual(result.removedAliases[0].name, 'remove-me');
+    // keep-me should survive
+    const kept = aliases.resolveAlias('keep-me');
+    assert.ok(kept, 'keep-me should still exist');
+    assert.strictEqual(kept.sessionPath, '/sessions/real');
+  })) passed++; else failed++;
+
+  // renameAlias edge cases
+  console.log('\nrenameAlias (edge cases):');
+
+  if (test('rename preserves session path and title', () => {
+    resetAliases();
+    aliases.setAlias('src', '/my/session', 'My Feature');
+    const result = aliases.renameAlias('src', 'dst');
+    assert.strictEqual(result.success, true);
+    const resolved = aliases.resolveAlias('dst');
+    assert.ok(resolved);
+    assert.strictEqual(resolved.sessionPath, '/my/session');
+    assert.strictEqual(resolved.title, 'My Feature');
+  })) passed++; else failed++;
+
+  if (test('rename preserves original createdAt timestamp', () => {
+    resetAliases();
+    aliases.setAlias('orig', '/path', 'T');
+    const before = aliases.loadAliases().aliases['orig'].createdAt;
+    aliases.renameAlias('orig', 'renamed');
+    const after = aliases.loadAliases().aliases['renamed'].createdAt;
+    assert.strictEqual(after, before, 'createdAt should be preserved across rename');
+  })) passed++; else failed++;
+
+  // getAliasesForSession edge cases
+  console.log('\ngetAliasesForSession (edge cases):');
+
+  if (test('does not match partial session paths', () => {
+    resetAliases();
+    aliases.setAlias('full', '/sessions/abc123');
+    aliases.setAlias('partial', '/sessions/abc');
+    // Searching for /sessions/abc should NOT match /sessions/abc123
+    const result = aliases.getAliasesForSession('/sessions/abc');
+    assert.strictEqual(result.length, 1);
+    assert.strictEqual(result[0].name, 'partial');
+  })) passed++; else failed++;
+
  // Cleanup — restore both HOME and USERPROFILE (Windows)
  process.env.HOME = origHome;
  if (origUserProfile !== undefined) {
--- a/tests/run-all.js
+++ b/tests/run-all.js
@@ -16,6 +16,7 @@ const testFiles = [
  'lib/session-manager.test.js',
  'lib/session-aliases.test.js',
  'hooks/hooks.test.js',
+  'hooks/evaluate-session.test.js',
  'integration/hooks.test.js',
  'ci/validators.test.js',
  'scripts/setup-package-manager.test.js',