From e8495aa3fc3833697b208511c3edbe87fe731c90 Mon Sep 17 00:00:00 2001
From: Affaan Mustafa <me@affaanmustafa.com>
Date: Fri, 20 Mar 2026 05:56:21 -0700
Subject: [PATCH] feat: add MCP health-check hook (#711)

---
 hooks/hooks.json                     |  22 +
 scripts/hooks/mcp-health-check.js    | 588 +++++++++++++++++++++++++++
 tests/hooks/mcp-health-check.test.js | 266 ++++++++++++
 tests/integration/hooks.test.js      |  44 ++
 4 files changed, 920 insertions(+)
 create mode 100644 scripts/hooks/mcp-health-check.js
 create mode 100644 tests/hooks/mcp-health-check.test.js

diff --git a/hooks/hooks.json b/hooks/hooks.json
index d3432a2f..2efd0bdb 100644
--- a/hooks/hooks.json
+++ b/hooks/hooks.json
@@ -95,6 +95,16 @@
           }
         ],
         "description": "Capture governance events (secrets, policy violations, approval requests). Enable with ECC_GOVERNANCE_CAPTURE=1"
+      },
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/hooks/run-with-flags.js\" \"pre:mcp-health-check\" \"scripts/hooks/mcp-health-check.js\" \"standard,strict\""
+          }
+        ],
+        "description": "Check MCP server health before MCP tool execution and block unhealthy MCP calls"
       }
     ],
     "PreCompact": [
@@ -210,6 +220,18 @@
         "description": "Capture tool use results for continuous learning"
       }
     ],
+    "PostToolUseFailure": [
+      {
+        "matcher": "*",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/hooks/run-with-flags.js\" \"post:mcp-health-check\" \"scripts/hooks/mcp-health-check.js\" \"standard,strict\""
+          }
+        ],
+        "description": "Track failed MCP tool calls, mark unhealthy servers, and attempt reconnect"
+      }
+    ],
     "Stop": [
       {
         "matcher": "*",
diff --git a/scripts/hooks/mcp-health-check.js b/scripts/hooks/mcp-health-check.js
new file mode 100644
index 00000000..22213418
--- /dev/null
+++ b/scripts/hooks/mcp-health-check.js
@@ -0,0 +1,588 @@
+#!/usr/bin/env node
+'use strict';
+
+/**
+ * MCP health-check hook.
+ *
+ * Compatible with Claude Code's existing hook events:
+ * - PreToolUse: probe MCP server health before MCP tool execution
+ * - PostToolUseFailure: mark unhealthy servers, attempt reconnect, and re-probe
+ *
+ * The hook persists health state outside the conversation context so it
+ * survives compaction and later turns.
+ */
+
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+const http = require('http');
+const https = require('https');
+const { spawn, spawnSync } = require('child_process');
+
+const MAX_STDIN = 1024 * 1024;
+const DEFAULT_TTL_MS = 2 * 60 * 1000;
+const DEFAULT_TIMEOUT_MS = 5000;
+const DEFAULT_BACKOFF_MS = 30 * 1000;
+const MAX_BACKOFF_MS = 10 * 60 * 1000;
+const HEALTHY_HTTP_CODES = new Set([200, 201, 202, 204, 301, 302, 303, 304, 307, 308, 405]);
+const RECONNECT_STATUS_CODES = new Set([401, 403, 429, 503]);
+const FAILURE_PATTERNS = [
+  { code: 401, pattern: /\b401\b|unauthori[sz]ed|auth(?:entication)?\s+(?:failed|expired|invalid)/i },
+  { code: 403, pattern: /\b403\b|forbidden|permission denied/i },
+  { code: 429, pattern: /\b429\b|rate limit|too many requests/i },
+  { code: 503, pattern: /\b503\b|service unavailable|overloaded|temporarily unavailable/i },
+  { code: 'transport', pattern: /ECONNREFUSED|ENOTFOUND|EAI_AGAIN|timed? out|socket hang up|connection (?:failed|lost|reset|closed)/i }
+];
+
+function envNumber(name, fallback) {
+  const value = Number(process.env[name]);
+  return Number.isFinite(value) && value >= 0 ? value : fallback;
+}
+
+function stateFilePath() {
+  if (process.env.ECC_MCP_HEALTH_STATE_PATH) {
+    return path.resolve(process.env.ECC_MCP_HEALTH_STATE_PATH);
+  }
+  return path.join(os.homedir(), '.claude', 'mcp-health-cache.json');
+}
+
+function configPaths() {
+  if (process.env.ECC_MCP_CONFIG_PATH) {
+    return process.env.ECC_MCP_CONFIG_PATH
+      .split(path.delimiter)
+      .map(entry => entry.trim())
+      .filter(Boolean)
+      .map(entry => path.resolve(entry));
+  }
+
+  const cwd = process.cwd();
+  const home = os.homedir();
+
+  return [
+    path.join(cwd, '.claude.json'),
+    path.join(cwd, '.claude', 'settings.json'),
+    path.join(home, '.claude.json'),
+    path.join(home, '.claude', 'settings.json')
+  ];
+}
+
+function readJsonFile(filePath) {
+  try {
+    return JSON.parse(fs.readFileSync(filePath, 'utf8'));
+  } catch {
+    return null;
+  }
+}
+
+function loadState(filePath) {
+  const state = readJsonFile(filePath);
+  if (!state || typeof state !== 'object' || Array.isArray(state)) {
+    return { version: 1, servers: {} };
+  }
+
+  if (!state.servers || typeof state.servers !== 'object' || Array.isArray(state.servers)) {
+    state.servers = {};
+  }
+
+  return state;
+}
+
+function saveState(filePath, state) {
+  try {
+    fs.mkdirSync(path.dirname(filePath), { recursive: true });
+    fs.writeFileSync(filePath, JSON.stringify(state, null, 2));
+  } catch {
+    // Never block the hook on state persistence errors.
+  }
+}
+
+function readRawStdin() {
+  return new Promise(resolve => {
+    let raw = '';
+    process.stdin.setEncoding('utf8');
+    process.stdin.on('data', chunk => {
+      if (raw.length < MAX_STDIN) {
+        const remaining = MAX_STDIN - raw.length;
+        raw += chunk.substring(0, remaining);
+      }
+    });
+    process.stdin.on('end', () => resolve(raw));
+    process.stdin.on('error', () => resolve(raw));
+  });
+}
+
+function safeParse(raw) {
+  try {
+    return raw.trim() ? JSON.parse(raw) : {};
+  } catch {
+    return {};
+  }
+}
+
+function extractMcpTarget(input) {
+  const toolName = String(input.tool_name || input.name || '');
+  const explicitServer = input.server
+    || input.mcp_server
+    || input.tool_input?.server
+    || input.tool_input?.mcp_server
+    || input.tool_input?.connector
+    || null;
+  const explicitTool = input.tool
+    || input.mcp_tool
+    || input.tool_input?.tool
+    || input.tool_input?.mcp_tool
+    || null;
+
+  if (explicitServer) {
+    return {
+      server: String(explicitServer),
+      tool: explicitTool ? String(explicitTool) : toolName
+    };
+  }
+
+  if (!toolName.startsWith('mcp__')) {
+    return null;
+  }
+
+  const segments = toolName.slice(5).split('__');
+  if (segments.length < 2 || !segments[0]) {
+    return null;
+  }
+
+  return {
+    server: segments[0],
+    tool: segments.slice(1).join('__')
+  };
+}
+
+function resolveServerConfig(serverName) {
+  for (const filePath of configPaths()) {
+    const data = readJsonFile(filePath);
+    const server = data?.mcpServers?.[serverName]
+      || data?.mcp_servers?.[serverName]
+      || null;
+
+    if (server && typeof server === 'object' && !Array.isArray(server)) {
+      return {
+        config: server,
+        source: filePath
+      };
+    }
+  }
+
+  return null;
+}
+
+function markHealthy(state, serverName, now, details = {}) {
+  state.servers[serverName] = {
+    status: 'healthy',
+    checkedAt: now,
+    expiresAt: now + envNumber('ECC_MCP_HEALTH_TTL_MS', DEFAULT_TTL_MS),
+    failureCount: 0,
+    lastError: null,
+    lastFailureCode: null,
+    nextRetryAt: now,
+    lastRestoredAt: now,
+    ...details
+  };
+}
+
+function markUnhealthy(state, serverName, now, failureCode, errorMessage) {
+  const previous = state.servers[serverName] || {};
+  const failureCount = Number(previous.failureCount || 0) + 1;
+  const backoffBase = envNumber('ECC_MCP_HEALTH_BACKOFF_MS', DEFAULT_BACKOFF_MS);
+  const nextRetryDelay = Math.min(backoffBase * (2 ** Math.max(failureCount - 1, 0)), MAX_BACKOFF_MS);
+
+  state.servers[serverName] = {
+    status: 'unhealthy',
+    checkedAt: now,
+    expiresAt: now,
+    failureCount,
+    lastError: errorMessage || null,
+    lastFailureCode: failureCode || null,
+    nextRetryAt: now + nextRetryDelay,
+    lastRestoredAt: previous.lastRestoredAt || null
+  };
+}
+
+function failureSummary(input) {
+  const output = input.tool_output;
+  const pieces = [
+    typeof input.error === 'string' ? input.error : '',
+    typeof input.message === 'string' ? input.message : '',
+    typeof input.tool_response === 'string' ? input.tool_response : '',
+    typeof output === 'string' ? output : '',
+    typeof output?.output === 'string' ? output.output : '',
+    typeof output?.stderr === 'string' ? output.stderr : '',
+    typeof input.tool_input?.error === 'string' ? input.tool_input.error : ''
+  ].filter(Boolean);
+
+  return pieces.join('\n');
+}
+
+function detectFailureCode(text) {
+  const summary = String(text || '');
+  for (const entry of FAILURE_PATTERNS) {
+    if (entry.pattern.test(summary)) {
+      return entry.code;
+    }
+  }
+  return null;
+}
+
+function requestHttp(urlString, headers, timeoutMs) {
+  return new Promise(resolve => {
+    let settled = false;
+    let timedOut = false;
+
+    const url = new URL(urlString);
+    const client = url.protocol === 'https:' ? https : http;
+
+    const req = client.request(
+      url,
+      {
+        method: 'GET',
+        headers,
+      },
+      res => {
+        if (settled) return;
+        settled = true;
+        res.resume();
+        resolve({
+          ok: HEALTHY_HTTP_CODES.has(res.statusCode),
+          statusCode: res.statusCode,
+          reason: `HTTP ${res.statusCode}`
+        });
+      }
+    );
+
+    req.setTimeout(timeoutMs, () => {
+      timedOut = true;
+      req.destroy(new Error('timeout'));
+    });
+
+    req.on('error', error => {
+      if (settled) return;
+      settled = true;
+      resolve({
+        ok: false,
+        statusCode: null,
+        reason: timedOut ? 'request timed out' : error.message
+      });
+    });
+
+    req.end();
+  });
+}
+
+function probeCommandServer(serverName, config) {
+  return new Promise(resolve => {
+    const command = config.command;
+    const args = Array.isArray(config.args) ? config.args.map(arg => String(arg)) : [];
+    const timeoutMs = envNumber('ECC_MCP_HEALTH_TIMEOUT_MS', DEFAULT_TIMEOUT_MS);
+    const mergedEnv = {
+      ...process.env,
+      ...(config.env && typeof config.env === 'object' && !Array.isArray(config.env) ? config.env : {})
+    };
+
+    let stderr = '';
+    let done = false;
+
+    function finish(result) {
+      if (done) return;
+      done = true;
+      resolve(result);
+    }
+
+    let child;
+    try {
+      child = spawn(command, args, {
+        env: mergedEnv,
+        cwd: process.cwd(),
+        stdio: ['pipe', 'ignore', 'pipe']
+      });
+    } catch (error) {
+      finish({
+        ok: false,
+        statusCode: null,
+        reason: error.message
+      });
+      return;
+    }
+
+    child.stderr.on('data', chunk => {
+      if (stderr.length < 4000) {
+        const remaining = 4000 - stderr.length;
+        stderr += String(chunk).slice(0, remaining);
+      }
+    });
+
+    child.on('error', error => {
+      finish({
+        ok: false,
+        statusCode: null,
+        reason: error.message
+      });
+    });
+
+    child.on('exit', (code, signal) => {
+      finish({
+        ok: false,
+        statusCode: code,
+        reason: stderr.trim() || `process exited before handshake (${signal || code || 'unknown'})`
+      });
+    });
+
+    const timer = setTimeout(() => {
+      try {
+        child.kill('SIGTERM');
+      } catch {
+        // ignore
+      }
+
+      setTimeout(() => {
+        try {
+          child.kill('SIGKILL');
+        } catch {
+          // ignore
+        }
+      }, 200).unref?.();
+
+      finish({
+        ok: true,
+        statusCode: null,
+        reason: `${serverName} accepted a new stdio process`
+      });
+    }, timeoutMs);
+
+    if (typeof timer.unref === 'function') {
+      timer.unref();
+    }
+  });
+}
+
+async function probeServer(serverName, resolvedConfig) {
+  const config = resolvedConfig.config;
+
+  if (config.type === 'http' || config.url) {
+    const result = await requestHttp(config.url, config.headers || {}, envNumber('ECC_MCP_HEALTH_TIMEOUT_MS', DEFAULT_TIMEOUT_MS));
+
+    return {
+      ok: result.ok,
+      failureCode: RECONNECT_STATUS_CODES.has(result.statusCode) ? result.statusCode : null,
+      reason: result.reason,
+      source: resolvedConfig.source
+    };
+  }
+
+  if (config.command) {
+    const result = await probeCommandServer(serverName, config);
+
+    return {
+      ok: result.ok,
+      failureCode: RECONNECT_STATUS_CODES.has(result.statusCode) ? result.statusCode : null,
+      reason: result.reason,
+      source: resolvedConfig.source
+    };
+  }
+
+  return {
+    ok: false,
+    failureCode: null,
+    reason: 'unsupported MCP server config',
+    source: resolvedConfig.source
+  };
+}
+
+function reconnectCommand(serverName) {
+  const key = `ECC_MCP_RECONNECT_${String(serverName).toUpperCase().replace(/[^A-Z0-9]/g, '_')}`;
+  const command = process.env[key] || process.env.ECC_MCP_RECONNECT_COMMAND || '';
+  if (!command.trim()) {
+    return null;
+  }
+
+  return command.includes('{server}')
+    ? command.replace(/\{server\}/g, serverName)
+    : command;
+}
+
+function attemptReconnect(serverName) {
+  const command = reconnectCommand(serverName);
+  if (!command) {
+    return { attempted: false, success: false, reason: 'no reconnect command configured' };
+  }
+
+  const result = spawnSync(command, {
+    shell: true,
+    env: process.env,
+    cwd: process.cwd(),
+    encoding: 'utf8',
+    timeout: envNumber('ECC_MCP_RECONNECT_TIMEOUT_MS', DEFAULT_TIMEOUT_MS)
+  });
+
+  if (result.error) {
+    return { attempted: true, success: false, reason: result.error.message };
+  }
+
+  if (result.status !== 0) {
+    return {
+      attempted: true,
+      success: false,
+      reason: (result.stderr || result.stdout || `reconnect exited ${result.status}`).trim()
+    };
+  }
+
+  return { attempted: true, success: true, reason: 'reconnect command completed' };
+}
+
+function shouldFailOpen() {
+  return /^(1|true|yes)$/i.test(String(process.env.ECC_MCP_HEALTH_FAIL_OPEN || ''));
+}
+
+function emitLogs(logs) {
+  for (const line of logs) {
+    process.stderr.write(`${line}\n`);
+  }
+}
+
+async function handlePreToolUse(rawInput, input, target, statePathValue, now) {
+  const logs = [];
+  const state = loadState(statePathValue);
+  const previous = state.servers[target.server] || {};
+
+  if (previous.status === 'healthy' && Number(previous.expiresAt || 0) > now) {
+    return { rawInput, exitCode: 0, logs };
+  }
+
+  if (previous.status === 'unhealthy' && Number(previous.nextRetryAt || 0) > now) {
+    logs.push(
+      `[MCPHealthCheck] ${target.server} is marked unhealthy until ${new Date(previous.nextRetryAt).toISOString()}; skipping ${target.tool || 'tool'}`
+    );
+    return { rawInput, exitCode: shouldFailOpen() ? 0 : 2, logs };
+  }
+
+  const resolvedConfig = resolveServerConfig(target.server);
+  if (!resolvedConfig) {
+    logs.push(`[MCPHealthCheck] No MCP config found for ${target.server}; skipping preflight probe`);
+    return { rawInput, exitCode: 0, logs };
+  }
+
+  const probe = await probeServer(target.server, resolvedConfig);
+  if (probe.ok) {
+    markHealthy(state, target.server, now, { source: resolvedConfig.source });
+    saveState(statePathValue, state);
+
+    if (previous.status === 'unhealthy') {
+      logs.push(`[MCPHealthCheck] ${target.server} connection restored`);
+    }
+
+    return { rawInput, exitCode: 0, logs };
+  }
+
+  let reconnect = { attempted: false, success: false, reason: 'probe failed' };
+  if (probe.failureCode || previous.status === 'unhealthy') {
+    reconnect = attemptReconnect(target.server);
+    if (reconnect.success) {
+      const reprobe = await probeServer(target.server, resolvedConfig);
+      if (reprobe.ok) {
+        markHealthy(state, target.server, now, {
+          source: resolvedConfig.source,
+          restoredBy: 'reconnect-command'
+        });
+        saveState(statePathValue, state);
+        logs.push(`[MCPHealthCheck] ${target.server} connection restored after reconnect`);
+        return { rawInput, exitCode: 0, logs };
+      }
+      probe.reason = `${probe.reason}; reconnect reprobe failed: ${reprobe.reason}`;
+    }
+  }
+
+  markUnhealthy(state, target.server, now, probe.failureCode, probe.reason);
+  saveState(statePathValue, state);
+
+  const reconnectSuffix = reconnect.attempted
+    ? ` Reconnect attempt: ${reconnect.success ? 'ok' : reconnect.reason}.`
+    : '';
+  logs.push(
+    `[MCPHealthCheck] ${target.server} is unavailable (${probe.reason}). Blocking ${target.tool || 'tool'} so Claude can fall back to non-MCP tools.${reconnectSuffix}`
+  );
+
+  return { rawInput, exitCode: shouldFailOpen() ? 0 : 2, logs };
+}
+
+async function handlePostToolUseFailure(rawInput, input, target, statePathValue, now) {
+  const logs = [];
+  const summary = failureSummary(input);
+  const failureCode = detectFailureCode(summary);
+
+  if (!failureCode) {
+    return { rawInput, exitCode: 0, logs };
+  }
+
+  const state = loadState(statePathValue);
+  markUnhealthy(state, target.server, now, failureCode, summary.slice(0, 500));
+  saveState(statePathValue, state);
+
+  logs.push(`[MCPHealthCheck] ${target.server} reported ${failureCode}; marking server unhealthy and attempting reconnect`);
+
+  const reconnect = attemptReconnect(target.server);
+  if (!reconnect.attempted) {
+    logs.push(`[MCPHealthCheck] ${target.server} reconnect skipped: ${reconnect.reason}`);
+    return { rawInput, exitCode: 0, logs };
+  }
+
+  if (!reconnect.success) {
+    logs.push(`[MCPHealthCheck] ${target.server} reconnect failed: ${reconnect.reason}`);
+    return { rawInput, exitCode: 0, logs };
+  }
+
+  const resolvedConfig = resolveServerConfig(target.server);
+  if (!resolvedConfig) {
+    logs.push(`[MCPHealthCheck] ${target.server} reconnect completed but no config was available for a follow-up probe`);
+    return { rawInput, exitCode: 0, logs };
+  }
+
+  const reprobe = await probeServer(target.server, resolvedConfig);
+  if (!reprobe.ok) {
+    logs.push(`[MCPHealthCheck] ${target.server} reconnect command ran, but health probe still failed: ${reprobe.reason}`);
+    return { rawInput, exitCode: 0, logs };
+  }
+
+  const refreshed = loadState(statePathValue);
+  markHealthy(refreshed, target.server, now, {
+    source: resolvedConfig.source,
+    restoredBy: 'post-failure-reconnect'
+  });
+  saveState(statePathValue, refreshed);
+  logs.push(`[MCPHealthCheck] ${target.server} connection restored`);
+  return { rawInput, exitCode: 0, logs };
+}
+
+async function main() {
+  const rawInput = await readRawStdin();
+  const input = safeParse(rawInput);
+  const target = extractMcpTarget(input);
+
+  if (!target) {
+    process.stdout.write(rawInput);
+    process.exit(0);
+    return;
+  }
+
+  const eventName = process.env.CLAUDE_HOOK_EVENT_NAME || 'PreToolUse';
+  const now = Date.now();
+  const statePathValue = stateFilePath();
+
+  const result = eventName === 'PostToolUseFailure'
+    ? await handlePostToolUseFailure(rawInput, input, target, statePathValue, now)
+    : await handlePreToolUse(rawInput, input, target, statePathValue, now);
+
+  emitLogs(result.logs);
+  process.stdout.write(result.rawInput);
+  process.exit(result.exitCode);
+}
+
+main().catch(error => {
+  process.stderr.write(`[MCPHealthCheck] Unexpected error: ${error.message}\n`);
+  process.exit(0);
+});
diff --git a/tests/hooks/mcp-health-check.test.js b/tests/hooks/mcp-health-check.test.js
new file mode 100644
index 00000000..1d12da33
--- /dev/null
+++ b/tests/hooks/mcp-health-check.test.js
@@ -0,0 +1,266 @@
+/**
+ * Tests for scripts/hooks/mcp-health-check.js
+ *
+ * Run with: node tests/hooks/mcp-health-check.test.js
+ */
+
+const assert = require('assert');
+const fs = require('fs');
+const os = require('os');
+const path = require('path');
+const { spawnSync } = require('child_process');
+
+const script = path.join(__dirname, '..', '..', 'scripts', 'hooks', 'mcp-health-check.js');
+
+function test(name, fn) {
+  try {
+    fn();
+    console.log(`  ✓ ${name}`);
+    return true;
+  } catch (err) {
+    console.log(`  ✗ ${name}`);
+    console.log(`    Error: ${err.message}`);
+    return false;
+  }
+}
+
+async function asyncTest(name, fn) {
+  try {
+    await fn();
+    console.log(`  ✓ ${name}`);
+    return true;
+  } catch (err) {
+    console.log(`  ✗ ${name}`);
+    console.log(`    Error: ${err.message}`);
+    return false;
+  }
+}
+
+function createTempDir() {
+  return fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-mcp-health-'));
+}
+
+function cleanupTempDir(dirPath) {
+  fs.rmSync(dirPath, { recursive: true, force: true });
+}
+
+function writeConfig(configPath, body) {
+  fs.writeFileSync(configPath, JSON.stringify(body, null, 2));
+}
+
+function readState(statePath) {
+  return JSON.parse(fs.readFileSync(statePath, 'utf8'));
+}
+
+function createCommandConfig(scriptPath) {
+  return {
+    command: process.execPath,
+    args: [scriptPath]
+  };
+}
+
+function runHook(input, env = {}) {
+  const result = spawnSync('node', [script], {
+    input: JSON.stringify(input),
+    encoding: 'utf8',
+    env: {
+      ...process.env,
+      ECC_HOOK_PROFILE: 'standard',
+      ...env
+    },
+    timeout: 15000,
+    stdio: ['pipe', 'pipe', 'pipe']
+  });
+
+  return {
+    code: result.status || 0,
+    stdout: result.stdout || '',
+    stderr: result.stderr || ''
+  };
+}
+
+async function runTests() {
+  console.log('\n=== Testing mcp-health-check.js ===\n');
+
+  let passed = 0;
+  let failed = 0;
+
+  if (test('passes through non-MCP tools untouched', () => {
+    const result = runHook(
+      { tool_name: 'Read', tool_input: { file_path: 'README.md' } },
+      { CLAUDE_HOOK_EVENT_NAME: 'PreToolUse' }
+    );
+
+    assert.strictEqual(result.code, 0, 'Expected non-MCP tool to pass through');
+    assert.strictEqual(result.stderr, '', 'Expected no stderr for non-MCP tool');
+  })) passed++; else failed++;
+
+  if (await asyncTest('marks healthy command MCP servers and allows the tool call', async () => {
+    const tempDir = createTempDir();
+    const configPath = path.join(tempDir, 'claude.json');
+    const statePath = path.join(tempDir, 'mcp-health.json');
+    const serverScript = path.join(tempDir, 'healthy-server.js');
+
+    try {
+      fs.writeFileSync(serverScript, "setInterval(() => {}, 1000);\n");
+      writeConfig(configPath, {
+        mcpServers: {
+          mock: createCommandConfig(serverScript)
+        }
+      });
+
+      const input = { tool_name: 'mcp__mock__list_items', tool_input: {} };
+      const result = runHook(input, {
+        CLAUDE_HOOK_EVENT_NAME: 'PreToolUse',
+        ECC_MCP_CONFIG_PATH: configPath,
+        ECC_MCP_HEALTH_STATE_PATH: statePath,
+        ECC_MCP_HEALTH_TIMEOUT_MS: '100'
+      });
+
+      assert.strictEqual(result.code, 0, `Expected healthy server to pass, got ${result.code}`);
+      assert.strictEqual(result.stdout.trim(), JSON.stringify(input), 'Expected original JSON on stdout');
+
+      const state = readState(statePath);
+      assert.strictEqual(state.servers.mock.status, 'healthy', 'Expected mock server to be marked healthy');
+    } finally {
+      cleanupTempDir(tempDir);
+    }
+  })) passed++; else failed++;
+
+  if (await asyncTest('blocks unhealthy command MCP servers and records backoff state', async () => {
+    const tempDir = createTempDir();
+    const configPath = path.join(tempDir, 'claude.json');
+    const statePath = path.join(tempDir, 'mcp-health.json');
+    const serverScript = path.join(tempDir, 'unhealthy-server.js');
+
+    try {
+      fs.writeFileSync(serverScript, "process.exit(1);\n");
+      writeConfig(configPath, {
+        mcpServers: {
+          flaky: createCommandConfig(serverScript)
+        }
+      });
+
+      const result = runHook(
+        { tool_name: 'mcp__flaky__search', tool_input: {} },
+        {
+          CLAUDE_HOOK_EVENT_NAME: 'PreToolUse',
+          ECC_MCP_CONFIG_PATH: configPath,
+          ECC_MCP_HEALTH_STATE_PATH: statePath,
+          ECC_MCP_HEALTH_TIMEOUT_MS: '100'
+        }
+      );
+
+      assert.strictEqual(result.code, 2, 'Expected unhealthy server to block the MCP tool');
+      assert.ok(result.stderr.includes('Blocking search'), `Expected blocking message, got: ${result.stderr}`);
+
+      const state = readState(statePath);
+      assert.strictEqual(state.servers.flaky.status, 'unhealthy', 'Expected flaky server to be marked unhealthy');
+      assert.ok(state.servers.flaky.nextRetryAt > state.servers.flaky.checkedAt, 'Expected retry backoff to be recorded');
+    } finally {
+      cleanupTempDir(tempDir);
+    }
+  })) passed++; else failed++;
+
+  if (await asyncTest('fail-open mode warns but does not block unhealthy MCP servers', async () => {
+    const tempDir = createTempDir();
+    const configPath = path.join(tempDir, 'claude.json');
+    const statePath = path.join(tempDir, 'mcp-health.json');
+    const serverScript = path.join(tempDir, 'relaxed-server.js');
+
+    try {
+      fs.writeFileSync(serverScript, "process.exit(1);\n");
+      writeConfig(configPath, {
+        mcpServers: {
+          relaxed: createCommandConfig(serverScript)
+        }
+      });
+
+      const result = runHook(
+        { tool_name: 'mcp__relaxed__list', tool_input: {} },
+        {
+          CLAUDE_HOOK_EVENT_NAME: 'PreToolUse',
+          ECC_MCP_CONFIG_PATH: configPath,
+          ECC_MCP_HEALTH_STATE_PATH: statePath,
+          ECC_MCP_HEALTH_FAIL_OPEN: '1',
+          ECC_MCP_HEALTH_TIMEOUT_MS: '100'
+        }
+      );
+
+      assert.strictEqual(result.code, 0, 'Expected fail-open mode to allow execution');
+      assert.ok(result.stderr.includes('Blocking list') || result.stderr.includes('fall back'), 'Expected warning output in fail-open mode');
+    } finally {
+      cleanupTempDir(tempDir);
+    }
+  })) passed++; else failed++;
+
+  if (await asyncTest('post-failure reconnect command restores server health when a reprobe succeeds', async () => {
+    const tempDir = createTempDir();
+    const configPath = path.join(tempDir, 'claude.json');
+    const statePath = path.join(tempDir, 'mcp-health.json');
+    const switchFile = path.join(tempDir, 'server-mode.txt');
+    const reconnectFile = path.join(tempDir, 'reconnected.txt');
+    const probeScript = path.join(tempDir, 'probe-server.js');
+
+    fs.writeFileSync(switchFile, 'down');
+    fs.writeFileSync(
+      probeScript,
+      [
+        "const fs = require('fs');",
+        `const mode = fs.readFileSync(${JSON.stringify(switchFile)}, 'utf8').trim();`,
+        "if (mode === 'up') { setInterval(() => {}, 1000); } else { console.error('401 Unauthorized'); process.exit(1); }"
+      ].join('\n')
+    );
+
+    const reconnectScript = path.join(tempDir, 'reconnect.js');
+    fs.writeFileSync(
+      reconnectScript,
+      [
+        "const fs = require('fs');",
+        `fs.writeFileSync(${JSON.stringify(switchFile)}, 'up');`,
+        `fs.writeFileSync(${JSON.stringify(reconnectFile)}, 'done');`
+      ].join('\n')
+    );
+
+    try {
+      writeConfig(configPath, {
+        mcpServers: {
+          authy: createCommandConfig(probeScript)
+        }
+      });
+
+      const result = runHook(
+        {
+          tool_name: 'mcp__authy__messages',
+          tool_input: {},
+          error: '401 Unauthorized'
+        },
+        {
+          CLAUDE_HOOK_EVENT_NAME: 'PostToolUseFailure',
+          ECC_MCP_CONFIG_PATH: configPath,
+          ECC_MCP_HEALTH_STATE_PATH: statePath,
+          ECC_MCP_RECONNECT_COMMAND: `node ${JSON.stringify(reconnectScript)}`,
+          ECC_MCP_HEALTH_TIMEOUT_MS: '100'
+        }
+      );
+
+      assert.strictEqual(result.code, 0, 'Expected failure hook to remain non-blocking');
+      assert.ok(result.stderr.includes('reported 401'), `Expected reconnect log, got: ${result.stderr}`);
+      assert.ok(result.stderr.includes('connection restored'), `Expected restored log, got: ${result.stderr}`);
+      assert.ok(fs.existsSync(reconnectFile), 'Expected reconnect command to run');
+
+      const state = readState(statePath);
+      assert.strictEqual(state.servers.authy.status, 'healthy', 'Expected authy server to be restored after reconnect');
+    } finally {
+      cleanupTempDir(tempDir);
+    }
+  })) passed++; else failed++;
+
+  console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`);
+  process.exit(failed > 0 ? 1 : 0);
+}
+
+runTests().catch(error => {
+  console.error(error);
+  process.exit(1);
+});
diff --git a/tests/integration/hooks.test.js b/tests/integration/hooks.test.js
index df47e213..180b9e0e 100644
--- a/tests/integration/hooks.test.js
+++ b/tests/integration/hooks.test.js
@@ -313,6 +313,50 @@ async function runTests() {
     }
   })) passed++; else failed++;
 
+  if (await asyncTest('MCP health hook blocks unhealthy MCP tool calls through hooks.json', async () => {
+    const hookCommand = getHookCommandByDescription(
+      hooks,
+      'PreToolUse',
+      'Check MCP server health before MCP tool execution'
+    );
+
+    const testDir = createTestDir();
+    const configPath = path.join(testDir, 'claude.json');
+    const statePath = path.join(testDir, 'mcp-health.json');
+    const serverScript = path.join(testDir, 'broken-mcp.js');
+
+    try {
+      fs.writeFileSync(serverScript, 'process.exit(1);\n');
+      fs.writeFileSync(
+        configPath,
+        JSON.stringify({
+          mcpServers: {
+            broken: {
+              command: process.execPath,
+              args: [serverScript]
+            }
+          }
+        })
+      );
+
+      const result = await runHookCommand(
+        hookCommand,
+        { tool_name: 'mcp__broken__search', tool_input: {} },
+        {
+          CLAUDE_HOOK_EVENT_NAME: 'PreToolUse',
+          ECC_MCP_CONFIG_PATH: configPath,
+          ECC_MCP_HEALTH_STATE_PATH: statePath,
+          ECC_MCP_HEALTH_TIMEOUT_MS: '100'
+        }
+      );
+
+      assert.strictEqual(result.code, 2, 'Expected unhealthy MCP preflight to block');
+      assert.ok(result.stderr.includes('broken is unavailable'), `Expected health warning, got: ${result.stderr}`);
+    } finally {
+      cleanupTestDir(testDir);
+    }
+  })) passed++; else failed++;
+
   if (await asyncTest('hooks handle missing files gracefully', async () => {
     const testDir = createTestDir();
     const transcriptPath = path.join(testDir, 'nonexistent.jsonl');