mirror of
https://github.com/affaan-m/everything-claude-code.git
synced 2026-03-30 13:43:26 +08:00
feat: add MCP health-check hook (#711)
This commit is contained in:
@@ -95,6 +95,16 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Capture governance events (secrets, policy violations, approval requests). Enable with ECC_GOVERNANCE_CAPTURE=1"
|
"description": "Capture governance events (secrets, policy violations, approval requests). Enable with ECC_GOVERNANCE_CAPTURE=1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": "*",
|
||||||
|
"hooks": [
|
||||||
|
{
|
||||||
|
"type": "command",
|
||||||
|
"command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/hooks/run-with-flags.js\" \"pre:mcp-health-check\" \"scripts/hooks/mcp-health-check.js\" \"standard,strict\""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Check MCP server health before MCP tool execution and block unhealthy MCP calls"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"PreCompact": [
|
"PreCompact": [
|
||||||
@@ -210,6 +220,18 @@
|
|||||||
"description": "Capture tool use results for continuous learning"
|
"description": "Capture tool use results for continuous learning"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
"PostToolUseFailure": [
|
||||||
|
{
|
||||||
|
"matcher": "*",
|
||||||
|
"hooks": [
|
||||||
|
{
|
||||||
|
"type": "command",
|
||||||
|
"command": "node \"${CLAUDE_PLUGIN_ROOT}/scripts/hooks/run-with-flags.js\" \"post:mcp-health-check\" \"scripts/hooks/mcp-health-check.js\" \"standard,strict\""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Track failed MCP tool calls, mark unhealthy servers, and attempt reconnect"
|
||||||
|
}
|
||||||
|
],
|
||||||
"Stop": [
|
"Stop": [
|
||||||
{
|
{
|
||||||
"matcher": "*",
|
"matcher": "*",
|
||||||
|
|||||||
588
scripts/hooks/mcp-health-check.js
Normal file
588
scripts/hooks/mcp-health-check.js
Normal file
@@ -0,0 +1,588 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MCP health-check hook.
|
||||||
|
*
|
||||||
|
* Compatible with Claude Code's existing hook events:
|
||||||
|
* - PreToolUse: probe MCP server health before MCP tool execution
|
||||||
|
* - PostToolUseFailure: mark unhealthy servers, attempt reconnect, and re-probe
|
||||||
|
*
|
||||||
|
* The hook persists health state outside the conversation context so it
|
||||||
|
* survives compaction and later turns.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const fs = require('fs');
|
||||||
|
const os = require('os');
|
||||||
|
const path = require('path');
|
||||||
|
const http = require('http');
|
||||||
|
const https = require('https');
|
||||||
|
const { spawn, spawnSync } = require('child_process');
|
||||||
|
|
||||||
|
const MAX_STDIN = 1024 * 1024;
|
||||||
|
const DEFAULT_TTL_MS = 2 * 60 * 1000;
|
||||||
|
const DEFAULT_TIMEOUT_MS = 5000;
|
||||||
|
const DEFAULT_BACKOFF_MS = 30 * 1000;
|
||||||
|
const MAX_BACKOFF_MS = 10 * 60 * 1000;
|
||||||
|
const HEALTHY_HTTP_CODES = new Set([200, 201, 202, 204, 301, 302, 303, 304, 307, 308, 405]);
|
||||||
|
const RECONNECT_STATUS_CODES = new Set([401, 403, 429, 503]);
|
||||||
|
const FAILURE_PATTERNS = [
|
||||||
|
{ code: 401, pattern: /\b401\b|unauthori[sz]ed|auth(?:entication)?\s+(?:failed|expired|invalid)/i },
|
||||||
|
{ code: 403, pattern: /\b403\b|forbidden|permission denied/i },
|
||||||
|
{ code: 429, pattern: /\b429\b|rate limit|too many requests/i },
|
||||||
|
{ code: 503, pattern: /\b503\b|service unavailable|overloaded|temporarily unavailable/i },
|
||||||
|
{ code: 'transport', pattern: /ECONNREFUSED|ENOTFOUND|EAI_AGAIN|timed? out|socket hang up|connection (?:failed|lost|reset|closed)/i }
|
||||||
|
];
|
||||||
|
|
||||||
|
function envNumber(name, fallback) {
|
||||||
|
const value = Number(process.env[name]);
|
||||||
|
return Number.isFinite(value) && value >= 0 ? value : fallback;
|
||||||
|
}
|
||||||
|
|
||||||
|
function stateFilePath() {
|
||||||
|
if (process.env.ECC_MCP_HEALTH_STATE_PATH) {
|
||||||
|
return path.resolve(process.env.ECC_MCP_HEALTH_STATE_PATH);
|
||||||
|
}
|
||||||
|
return path.join(os.homedir(), '.claude', 'mcp-health-cache.json');
|
||||||
|
}
|
||||||
|
|
||||||
|
function configPaths() {
|
||||||
|
if (process.env.ECC_MCP_CONFIG_PATH) {
|
||||||
|
return process.env.ECC_MCP_CONFIG_PATH
|
||||||
|
.split(path.delimiter)
|
||||||
|
.map(entry => entry.trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
.map(entry => path.resolve(entry));
|
||||||
|
}
|
||||||
|
|
||||||
|
const cwd = process.cwd();
|
||||||
|
const home = os.homedir();
|
||||||
|
|
||||||
|
return [
|
||||||
|
path.join(cwd, '.claude.json'),
|
||||||
|
path.join(cwd, '.claude', 'settings.json'),
|
||||||
|
path.join(home, '.claude.json'),
|
||||||
|
path.join(home, '.claude', 'settings.json')
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
function readJsonFile(filePath) {
|
||||||
|
try {
|
||||||
|
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function loadState(filePath) {
|
||||||
|
const state = readJsonFile(filePath);
|
||||||
|
if (!state || typeof state !== 'object' || Array.isArray(state)) {
|
||||||
|
return { version: 1, servers: {} };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!state.servers || typeof state.servers !== 'object' || Array.isArray(state.servers)) {
|
||||||
|
state.servers = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
return state;
|
||||||
|
}
|
||||||
|
|
||||||
|
function saveState(filePath, state) {
|
||||||
|
try {
|
||||||
|
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
||||||
|
fs.writeFileSync(filePath, JSON.stringify(state, null, 2));
|
||||||
|
} catch {
|
||||||
|
// Never block the hook on state persistence errors.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function readRawStdin() {
|
||||||
|
return new Promise(resolve => {
|
||||||
|
let raw = '';
|
||||||
|
process.stdin.setEncoding('utf8');
|
||||||
|
process.stdin.on('data', chunk => {
|
||||||
|
if (raw.length < MAX_STDIN) {
|
||||||
|
const remaining = MAX_STDIN - raw.length;
|
||||||
|
raw += chunk.substring(0, remaining);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
process.stdin.on('end', () => resolve(raw));
|
||||||
|
process.stdin.on('error', () => resolve(raw));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function safeParse(raw) {
|
||||||
|
try {
|
||||||
|
return raw.trim() ? JSON.parse(raw) : {};
|
||||||
|
} catch {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractMcpTarget(input) {
|
||||||
|
const toolName = String(input.tool_name || input.name || '');
|
||||||
|
const explicitServer = input.server
|
||||||
|
|| input.mcp_server
|
||||||
|
|| input.tool_input?.server
|
||||||
|
|| input.tool_input?.mcp_server
|
||||||
|
|| input.tool_input?.connector
|
||||||
|
|| null;
|
||||||
|
const explicitTool = input.tool
|
||||||
|
|| input.mcp_tool
|
||||||
|
|| input.tool_input?.tool
|
||||||
|
|| input.tool_input?.mcp_tool
|
||||||
|
|| null;
|
||||||
|
|
||||||
|
if (explicitServer) {
|
||||||
|
return {
|
||||||
|
server: String(explicitServer),
|
||||||
|
tool: explicitTool ? String(explicitTool) : toolName
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!toolName.startsWith('mcp__')) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const segments = toolName.slice(5).split('__');
|
||||||
|
if (segments.length < 2 || !segments[0]) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
server: segments[0],
|
||||||
|
tool: segments.slice(1).join('__')
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function resolveServerConfig(serverName) {
|
||||||
|
for (const filePath of configPaths()) {
|
||||||
|
const data = readJsonFile(filePath);
|
||||||
|
const server = data?.mcpServers?.[serverName]
|
||||||
|
|| data?.mcp_servers?.[serverName]
|
||||||
|
|| null;
|
||||||
|
|
||||||
|
if (server && typeof server === 'object' && !Array.isArray(server)) {
|
||||||
|
return {
|
||||||
|
config: server,
|
||||||
|
source: filePath
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function markHealthy(state, serverName, now, details = {}) {
|
||||||
|
state.servers[serverName] = {
|
||||||
|
status: 'healthy',
|
||||||
|
checkedAt: now,
|
||||||
|
expiresAt: now + envNumber('ECC_MCP_HEALTH_TTL_MS', DEFAULT_TTL_MS),
|
||||||
|
failureCount: 0,
|
||||||
|
lastError: null,
|
||||||
|
lastFailureCode: null,
|
||||||
|
nextRetryAt: now,
|
||||||
|
lastRestoredAt: now,
|
||||||
|
...details
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function markUnhealthy(state, serverName, now, failureCode, errorMessage) {
|
||||||
|
const previous = state.servers[serverName] || {};
|
||||||
|
const failureCount = Number(previous.failureCount || 0) + 1;
|
||||||
|
const backoffBase = envNumber('ECC_MCP_HEALTH_BACKOFF_MS', DEFAULT_BACKOFF_MS);
|
||||||
|
const nextRetryDelay = Math.min(backoffBase * (2 ** Math.max(failureCount - 1, 0)), MAX_BACKOFF_MS);
|
||||||
|
|
||||||
|
state.servers[serverName] = {
|
||||||
|
status: 'unhealthy',
|
||||||
|
checkedAt: now,
|
||||||
|
expiresAt: now,
|
||||||
|
failureCount,
|
||||||
|
lastError: errorMessage || null,
|
||||||
|
lastFailureCode: failureCode || null,
|
||||||
|
nextRetryAt: now + nextRetryDelay,
|
||||||
|
lastRestoredAt: previous.lastRestoredAt || null
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function failureSummary(input) {
|
||||||
|
const output = input.tool_output;
|
||||||
|
const pieces = [
|
||||||
|
typeof input.error === 'string' ? input.error : '',
|
||||||
|
typeof input.message === 'string' ? input.message : '',
|
||||||
|
typeof input.tool_response === 'string' ? input.tool_response : '',
|
||||||
|
typeof output === 'string' ? output : '',
|
||||||
|
typeof output?.output === 'string' ? output.output : '',
|
||||||
|
typeof output?.stderr === 'string' ? output.stderr : '',
|
||||||
|
typeof input.tool_input?.error === 'string' ? input.tool_input.error : ''
|
||||||
|
].filter(Boolean);
|
||||||
|
|
||||||
|
return pieces.join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectFailureCode(text) {
|
||||||
|
const summary = String(text || '');
|
||||||
|
for (const entry of FAILURE_PATTERNS) {
|
||||||
|
if (entry.pattern.test(summary)) {
|
||||||
|
return entry.code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function requestHttp(urlString, headers, timeoutMs) {
|
||||||
|
return new Promise(resolve => {
|
||||||
|
let settled = false;
|
||||||
|
let timedOut = false;
|
||||||
|
|
||||||
|
const url = new URL(urlString);
|
||||||
|
const client = url.protocol === 'https:' ? https : http;
|
||||||
|
|
||||||
|
const req = client.request(
|
||||||
|
url,
|
||||||
|
{
|
||||||
|
method: 'GET',
|
||||||
|
headers,
|
||||||
|
},
|
||||||
|
res => {
|
||||||
|
if (settled) return;
|
||||||
|
settled = true;
|
||||||
|
res.resume();
|
||||||
|
resolve({
|
||||||
|
ok: HEALTHY_HTTP_CODES.has(res.statusCode),
|
||||||
|
statusCode: res.statusCode,
|
||||||
|
reason: `HTTP ${res.statusCode}`
|
||||||
|
});
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
req.setTimeout(timeoutMs, () => {
|
||||||
|
timedOut = true;
|
||||||
|
req.destroy(new Error('timeout'));
|
||||||
|
});
|
||||||
|
|
||||||
|
req.on('error', error => {
|
||||||
|
if (settled) return;
|
||||||
|
settled = true;
|
||||||
|
resolve({
|
||||||
|
ok: false,
|
||||||
|
statusCode: null,
|
||||||
|
reason: timedOut ? 'request timed out' : error.message
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
req.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function probeCommandServer(serverName, config) {
|
||||||
|
return new Promise(resolve => {
|
||||||
|
const command = config.command;
|
||||||
|
const args = Array.isArray(config.args) ? config.args.map(arg => String(arg)) : [];
|
||||||
|
const timeoutMs = envNumber('ECC_MCP_HEALTH_TIMEOUT_MS', DEFAULT_TIMEOUT_MS);
|
||||||
|
const mergedEnv = {
|
||||||
|
...process.env,
|
||||||
|
...(config.env && typeof config.env === 'object' && !Array.isArray(config.env) ? config.env : {})
|
||||||
|
};
|
||||||
|
|
||||||
|
let stderr = '';
|
||||||
|
let done = false;
|
||||||
|
|
||||||
|
function finish(result) {
|
||||||
|
if (done) return;
|
||||||
|
done = true;
|
||||||
|
resolve(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
let child;
|
||||||
|
try {
|
||||||
|
child = spawn(command, args, {
|
||||||
|
env: mergedEnv,
|
||||||
|
cwd: process.cwd(),
|
||||||
|
stdio: ['pipe', 'ignore', 'pipe']
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
finish({
|
||||||
|
ok: false,
|
||||||
|
statusCode: null,
|
||||||
|
reason: error.message
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
child.stderr.on('data', chunk => {
|
||||||
|
if (stderr.length < 4000) {
|
||||||
|
const remaining = 4000 - stderr.length;
|
||||||
|
stderr += String(chunk).slice(0, remaining);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on('error', error => {
|
||||||
|
finish({
|
||||||
|
ok: false,
|
||||||
|
statusCode: null,
|
||||||
|
reason: error.message
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on('exit', (code, signal) => {
|
||||||
|
finish({
|
||||||
|
ok: false,
|
||||||
|
statusCode: code,
|
||||||
|
reason: stderr.trim() || `process exited before handshake (${signal || code || 'unknown'})`
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
try {
|
||||||
|
child.kill('SIGTERM');
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
setTimeout(() => {
|
||||||
|
try {
|
||||||
|
child.kill('SIGKILL');
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}, 200).unref?.();
|
||||||
|
|
||||||
|
finish({
|
||||||
|
ok: true,
|
||||||
|
statusCode: null,
|
||||||
|
reason: `${serverName} accepted a new stdio process`
|
||||||
|
});
|
||||||
|
}, timeoutMs);
|
||||||
|
|
||||||
|
if (typeof timer.unref === 'function') {
|
||||||
|
timer.unref();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
async function probeServer(serverName, resolvedConfig) {
|
||||||
|
const config = resolvedConfig.config;
|
||||||
|
|
||||||
|
if (config.type === 'http' || config.url) {
|
||||||
|
const result = await requestHttp(config.url, config.headers || {}, envNumber('ECC_MCP_HEALTH_TIMEOUT_MS', DEFAULT_TIMEOUT_MS));
|
||||||
|
|
||||||
|
return {
|
||||||
|
ok: result.ok,
|
||||||
|
failureCode: RECONNECT_STATUS_CODES.has(result.statusCode) ? result.statusCode : null,
|
||||||
|
reason: result.reason,
|
||||||
|
source: resolvedConfig.source
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (config.command) {
|
||||||
|
const result = await probeCommandServer(serverName, config);
|
||||||
|
|
||||||
|
return {
|
||||||
|
ok: result.ok,
|
||||||
|
failureCode: RECONNECT_STATUS_CODES.has(result.statusCode) ? result.statusCode : null,
|
||||||
|
reason: result.reason,
|
||||||
|
source: resolvedConfig.source
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
ok: false,
|
||||||
|
failureCode: null,
|
||||||
|
reason: 'unsupported MCP server config',
|
||||||
|
source: resolvedConfig.source
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function reconnectCommand(serverName) {
|
||||||
|
const key = `ECC_MCP_RECONNECT_${String(serverName).toUpperCase().replace(/[^A-Z0-9]/g, '_')}`;
|
||||||
|
const command = process.env[key] || process.env.ECC_MCP_RECONNECT_COMMAND || '';
|
||||||
|
if (!command.trim()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return command.includes('{server}')
|
||||||
|
? command.replace(/\{server\}/g, serverName)
|
||||||
|
: command;
|
||||||
|
}
|
||||||
|
|
||||||
|
function attemptReconnect(serverName) {
|
||||||
|
const command = reconnectCommand(serverName);
|
||||||
|
if (!command) {
|
||||||
|
return { attempted: false, success: false, reason: 'no reconnect command configured' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = spawnSync(command, {
|
||||||
|
shell: true,
|
||||||
|
env: process.env,
|
||||||
|
cwd: process.cwd(),
|
||||||
|
encoding: 'utf8',
|
||||||
|
timeout: envNumber('ECC_MCP_RECONNECT_TIMEOUT_MS', DEFAULT_TIMEOUT_MS)
|
||||||
|
});
|
||||||
|
|
||||||
|
if (result.error) {
|
||||||
|
return { attempted: true, success: false, reason: result.error.message };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result.status !== 0) {
|
||||||
|
return {
|
||||||
|
attempted: true,
|
||||||
|
success: false,
|
||||||
|
reason: (result.stderr || result.stdout || `reconnect exited ${result.status}`).trim()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { attempted: true, success: true, reason: 'reconnect command completed' };
|
||||||
|
}
|
||||||
|
|
||||||
|
function shouldFailOpen() {
|
||||||
|
return /^(1|true|yes)$/i.test(String(process.env.ECC_MCP_HEALTH_FAIL_OPEN || ''));
|
||||||
|
}
|
||||||
|
|
||||||
|
function emitLogs(logs) {
|
||||||
|
for (const line of logs) {
|
||||||
|
process.stderr.write(`${line}\n`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handlePreToolUse(rawInput, input, target, statePathValue, now) {
|
||||||
|
const logs = [];
|
||||||
|
const state = loadState(statePathValue);
|
||||||
|
const previous = state.servers[target.server] || {};
|
||||||
|
|
||||||
|
if (previous.status === 'healthy' && Number(previous.expiresAt || 0) > now) {
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (previous.status === 'unhealthy' && Number(previous.nextRetryAt || 0) > now) {
|
||||||
|
logs.push(
|
||||||
|
`[MCPHealthCheck] ${target.server} is marked unhealthy until ${new Date(previous.nextRetryAt).toISOString()}; skipping ${target.tool || 'tool'}`
|
||||||
|
);
|
||||||
|
return { rawInput, exitCode: shouldFailOpen() ? 0 : 2, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
const resolvedConfig = resolveServerConfig(target.server);
|
||||||
|
if (!resolvedConfig) {
|
||||||
|
logs.push(`[MCPHealthCheck] No MCP config found for ${target.server}; skipping preflight probe`);
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
const probe = await probeServer(target.server, resolvedConfig);
|
||||||
|
if (probe.ok) {
|
||||||
|
markHealthy(state, target.server, now, { source: resolvedConfig.source });
|
||||||
|
saveState(statePathValue, state);
|
||||||
|
|
||||||
|
if (previous.status === 'unhealthy') {
|
||||||
|
logs.push(`[MCPHealthCheck] ${target.server} connection restored`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
let reconnect = { attempted: false, success: false, reason: 'probe failed' };
|
||||||
|
if (probe.failureCode || previous.status === 'unhealthy') {
|
||||||
|
reconnect = attemptReconnect(target.server);
|
||||||
|
if (reconnect.success) {
|
||||||
|
const reprobe = await probeServer(target.server, resolvedConfig);
|
||||||
|
if (reprobe.ok) {
|
||||||
|
markHealthy(state, target.server, now, {
|
||||||
|
source: resolvedConfig.source,
|
||||||
|
restoredBy: 'reconnect-command'
|
||||||
|
});
|
||||||
|
saveState(statePathValue, state);
|
||||||
|
logs.push(`[MCPHealthCheck] ${target.server} connection restored after reconnect`);
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
probe.reason = `${probe.reason}; reconnect reprobe failed: ${reprobe.reason}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
markUnhealthy(state, target.server, now, probe.failureCode, probe.reason);
|
||||||
|
saveState(statePathValue, state);
|
||||||
|
|
||||||
|
const reconnectSuffix = reconnect.attempted
|
||||||
|
? ` Reconnect attempt: ${reconnect.success ? 'ok' : reconnect.reason}.`
|
||||||
|
: '';
|
||||||
|
logs.push(
|
||||||
|
`[MCPHealthCheck] ${target.server} is unavailable (${probe.reason}). Blocking ${target.tool || 'tool'} so Claude can fall back to non-MCP tools.${reconnectSuffix}`
|
||||||
|
);
|
||||||
|
|
||||||
|
return { rawInput, exitCode: shouldFailOpen() ? 0 : 2, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function handlePostToolUseFailure(rawInput, input, target, statePathValue, now) {
|
||||||
|
const logs = [];
|
||||||
|
const summary = failureSummary(input);
|
||||||
|
const failureCode = detectFailureCode(summary);
|
||||||
|
|
||||||
|
if (!failureCode) {
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
const state = loadState(statePathValue);
|
||||||
|
markUnhealthy(state, target.server, now, failureCode, summary.slice(0, 500));
|
||||||
|
saveState(statePathValue, state);
|
||||||
|
|
||||||
|
logs.push(`[MCPHealthCheck] ${target.server} reported ${failureCode}; marking server unhealthy and attempting reconnect`);
|
||||||
|
|
||||||
|
const reconnect = attemptReconnect(target.server);
|
||||||
|
if (!reconnect.attempted) {
|
||||||
|
logs.push(`[MCPHealthCheck] ${target.server} reconnect skipped: ${reconnect.reason}`);
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!reconnect.success) {
|
||||||
|
logs.push(`[MCPHealthCheck] ${target.server} reconnect failed: ${reconnect.reason}`);
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
const resolvedConfig = resolveServerConfig(target.server);
|
||||||
|
if (!resolvedConfig) {
|
||||||
|
logs.push(`[MCPHealthCheck] ${target.server} reconnect completed but no config was available for a follow-up probe`);
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
const reprobe = await probeServer(target.server, resolvedConfig);
|
||||||
|
if (!reprobe.ok) {
|
||||||
|
logs.push(`[MCPHealthCheck] ${target.server} reconnect command ran, but health probe still failed: ${reprobe.reason}`);
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
const refreshed = loadState(statePathValue);
|
||||||
|
markHealthy(refreshed, target.server, now, {
|
||||||
|
source: resolvedConfig.source,
|
||||||
|
restoredBy: 'post-failure-reconnect'
|
||||||
|
});
|
||||||
|
saveState(statePathValue, refreshed);
|
||||||
|
logs.push(`[MCPHealthCheck] ${target.server} connection restored`);
|
||||||
|
return { rawInput, exitCode: 0, logs };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const rawInput = await readRawStdin();
|
||||||
|
const input = safeParse(rawInput);
|
||||||
|
const target = extractMcpTarget(input);
|
||||||
|
|
||||||
|
if (!target) {
|
||||||
|
process.stdout.write(rawInput);
|
||||||
|
process.exit(0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const eventName = process.env.CLAUDE_HOOK_EVENT_NAME || 'PreToolUse';
|
||||||
|
const now = Date.now();
|
||||||
|
const statePathValue = stateFilePath();
|
||||||
|
|
||||||
|
const result = eventName === 'PostToolUseFailure'
|
||||||
|
? await handlePostToolUseFailure(rawInput, input, target, statePathValue, now)
|
||||||
|
: await handlePreToolUse(rawInput, input, target, statePathValue, now);
|
||||||
|
|
||||||
|
emitLogs(result.logs);
|
||||||
|
process.stdout.write(result.rawInput);
|
||||||
|
process.exit(result.exitCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
main().catch(error => {
|
||||||
|
process.stderr.write(`[MCPHealthCheck] Unexpected error: ${error.message}\n`);
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
266
tests/hooks/mcp-health-check.test.js
Normal file
266
tests/hooks/mcp-health-check.test.js
Normal file
@@ -0,0 +1,266 @@
|
|||||||
|
/**
|
||||||
|
* Tests for scripts/hooks/mcp-health-check.js
|
||||||
|
*
|
||||||
|
* Run with: node tests/hooks/mcp-health-check.test.js
|
||||||
|
*/
|
||||||
|
|
||||||
|
const assert = require('assert');
|
||||||
|
const fs = require('fs');
|
||||||
|
const os = require('os');
|
||||||
|
const path = require('path');
|
||||||
|
const { spawnSync } = require('child_process');
|
||||||
|
|
||||||
|
const script = path.join(__dirname, '..', '..', 'scripts', 'hooks', 'mcp-health-check.js');
|
||||||
|
|
||||||
|
function test(name, fn) {
|
||||||
|
try {
|
||||||
|
fn();
|
||||||
|
console.log(` ✓ ${name}`);
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
console.log(` ✗ ${name}`);
|
||||||
|
console.log(` Error: ${err.message}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function asyncTest(name, fn) {
|
||||||
|
try {
|
||||||
|
await fn();
|
||||||
|
console.log(` ✓ ${name}`);
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
console.log(` ✗ ${name}`);
|
||||||
|
console.log(` Error: ${err.message}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function createTempDir() {
|
||||||
|
return fs.mkdtempSync(path.join(os.tmpdir(), 'ecc-mcp-health-'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function cleanupTempDir(dirPath) {
|
||||||
|
fs.rmSync(dirPath, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
function writeConfig(configPath, body) {
|
||||||
|
fs.writeFileSync(configPath, JSON.stringify(body, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
function readState(statePath) {
|
||||||
|
return JSON.parse(fs.readFileSync(statePath, 'utf8'));
|
||||||
|
}
|
||||||
|
|
||||||
|
function createCommandConfig(scriptPath) {
|
||||||
|
return {
|
||||||
|
command: process.execPath,
|
||||||
|
args: [scriptPath]
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function runHook(input, env = {}) {
|
||||||
|
const result = spawnSync('node', [script], {
|
||||||
|
input: JSON.stringify(input),
|
||||||
|
encoding: 'utf8',
|
||||||
|
env: {
|
||||||
|
...process.env,
|
||||||
|
ECC_HOOK_PROFILE: 'standard',
|
||||||
|
...env
|
||||||
|
},
|
||||||
|
timeout: 15000,
|
||||||
|
stdio: ['pipe', 'pipe', 'pipe']
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
code: result.status || 0,
|
||||||
|
stdout: result.stdout || '',
|
||||||
|
stderr: result.stderr || ''
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runTests() {
|
||||||
|
console.log('\n=== Testing mcp-health-check.js ===\n');
|
||||||
|
|
||||||
|
let passed = 0;
|
||||||
|
let failed = 0;
|
||||||
|
|
||||||
|
if (test('passes through non-MCP tools untouched', () => {
|
||||||
|
const result = runHook(
|
||||||
|
{ tool_name: 'Read', tool_input: { file_path: 'README.md' } },
|
||||||
|
{ CLAUDE_HOOK_EVENT_NAME: 'PreToolUse' }
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.strictEqual(result.code, 0, 'Expected non-MCP tool to pass through');
|
||||||
|
assert.strictEqual(result.stderr, '', 'Expected no stderr for non-MCP tool');
|
||||||
|
})) passed++; else failed++;
|
||||||
|
|
||||||
|
if (await asyncTest('marks healthy command MCP servers and allows the tool call', async () => {
|
||||||
|
const tempDir = createTempDir();
|
||||||
|
const configPath = path.join(tempDir, 'claude.json');
|
||||||
|
const statePath = path.join(tempDir, 'mcp-health.json');
|
||||||
|
const serverScript = path.join(tempDir, 'healthy-server.js');
|
||||||
|
|
||||||
|
try {
|
||||||
|
fs.writeFileSync(serverScript, "setInterval(() => {}, 1000);\n");
|
||||||
|
writeConfig(configPath, {
|
||||||
|
mcpServers: {
|
||||||
|
mock: createCommandConfig(serverScript)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const input = { tool_name: 'mcp__mock__list_items', tool_input: {} };
|
||||||
|
const result = runHook(input, {
|
||||||
|
CLAUDE_HOOK_EVENT_NAME: 'PreToolUse',
|
||||||
|
ECC_MCP_CONFIG_PATH: configPath,
|
||||||
|
ECC_MCP_HEALTH_STATE_PATH: statePath,
|
||||||
|
ECC_MCP_HEALTH_TIMEOUT_MS: '100'
|
||||||
|
});
|
||||||
|
|
||||||
|
assert.strictEqual(result.code, 0, `Expected healthy server to pass, got ${result.code}`);
|
||||||
|
assert.strictEqual(result.stdout.trim(), JSON.stringify(input), 'Expected original JSON on stdout');
|
||||||
|
|
||||||
|
const state = readState(statePath);
|
||||||
|
assert.strictEqual(state.servers.mock.status, 'healthy', 'Expected mock server to be marked healthy');
|
||||||
|
} finally {
|
||||||
|
cleanupTempDir(tempDir);
|
||||||
|
}
|
||||||
|
})) passed++; else failed++;
|
||||||
|
|
||||||
|
if (await asyncTest('blocks unhealthy command MCP servers and records backoff state', async () => {
|
||||||
|
const tempDir = createTempDir();
|
||||||
|
const configPath = path.join(tempDir, 'claude.json');
|
||||||
|
const statePath = path.join(tempDir, 'mcp-health.json');
|
||||||
|
const serverScript = path.join(tempDir, 'unhealthy-server.js');
|
||||||
|
|
||||||
|
try {
|
||||||
|
fs.writeFileSync(serverScript, "process.exit(1);\n");
|
||||||
|
writeConfig(configPath, {
|
||||||
|
mcpServers: {
|
||||||
|
flaky: createCommandConfig(serverScript)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = runHook(
|
||||||
|
{ tool_name: 'mcp__flaky__search', tool_input: {} },
|
||||||
|
{
|
||||||
|
CLAUDE_HOOK_EVENT_NAME: 'PreToolUse',
|
||||||
|
ECC_MCP_CONFIG_PATH: configPath,
|
||||||
|
ECC_MCP_HEALTH_STATE_PATH: statePath,
|
||||||
|
ECC_MCP_HEALTH_TIMEOUT_MS: '100'
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.strictEqual(result.code, 2, 'Expected unhealthy server to block the MCP tool');
|
||||||
|
assert.ok(result.stderr.includes('Blocking search'), `Expected blocking message, got: ${result.stderr}`);
|
||||||
|
|
||||||
|
const state = readState(statePath);
|
||||||
|
assert.strictEqual(state.servers.flaky.status, 'unhealthy', 'Expected flaky server to be marked unhealthy');
|
||||||
|
assert.ok(state.servers.flaky.nextRetryAt > state.servers.flaky.checkedAt, 'Expected retry backoff to be recorded');
|
||||||
|
} finally {
|
||||||
|
cleanupTempDir(tempDir);
|
||||||
|
}
|
||||||
|
})) passed++; else failed++;
|
||||||
|
|
||||||
|
if (await asyncTest('fail-open mode warns but does not block unhealthy MCP servers', async () => {
|
||||||
|
const tempDir = createTempDir();
|
||||||
|
const configPath = path.join(tempDir, 'claude.json');
|
||||||
|
const statePath = path.join(tempDir, 'mcp-health.json');
|
||||||
|
const serverScript = path.join(tempDir, 'relaxed-server.js');
|
||||||
|
|
||||||
|
try {
|
||||||
|
fs.writeFileSync(serverScript, "process.exit(1);\n");
|
||||||
|
writeConfig(configPath, {
|
||||||
|
mcpServers: {
|
||||||
|
relaxed: createCommandConfig(serverScript)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = runHook(
|
||||||
|
{ tool_name: 'mcp__relaxed__list', tool_input: {} },
|
||||||
|
{
|
||||||
|
CLAUDE_HOOK_EVENT_NAME: 'PreToolUse',
|
||||||
|
ECC_MCP_CONFIG_PATH: configPath,
|
||||||
|
ECC_MCP_HEALTH_STATE_PATH: statePath,
|
||||||
|
ECC_MCP_HEALTH_FAIL_OPEN: '1',
|
||||||
|
ECC_MCP_HEALTH_TIMEOUT_MS: '100'
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.strictEqual(result.code, 0, 'Expected fail-open mode to allow execution');
|
||||||
|
assert.ok(result.stderr.includes('Blocking list') || result.stderr.includes('fall back'), 'Expected warning output in fail-open mode');
|
||||||
|
} finally {
|
||||||
|
cleanupTempDir(tempDir);
|
||||||
|
}
|
||||||
|
})) passed++; else failed++;
|
||||||
|
|
||||||
|
if (await asyncTest('post-failure reconnect command restores server health when a reprobe succeeds', async () => {
|
||||||
|
const tempDir = createTempDir();
|
||||||
|
const configPath = path.join(tempDir, 'claude.json');
|
||||||
|
const statePath = path.join(tempDir, 'mcp-health.json');
|
||||||
|
const switchFile = path.join(tempDir, 'server-mode.txt');
|
||||||
|
const reconnectFile = path.join(tempDir, 'reconnected.txt');
|
||||||
|
const probeScript = path.join(tempDir, 'probe-server.js');
|
||||||
|
|
||||||
|
fs.writeFileSync(switchFile, 'down');
|
||||||
|
fs.writeFileSync(
|
||||||
|
probeScript,
|
||||||
|
[
|
||||||
|
"const fs = require('fs');",
|
||||||
|
`const mode = fs.readFileSync(${JSON.stringify(switchFile)}, 'utf8').trim();`,
|
||||||
|
"if (mode === 'up') { setInterval(() => {}, 1000); } else { console.error('401 Unauthorized'); process.exit(1); }"
|
||||||
|
].join('\n')
|
||||||
|
);
|
||||||
|
|
||||||
|
const reconnectScript = path.join(tempDir, 'reconnect.js');
|
||||||
|
fs.writeFileSync(
|
||||||
|
reconnectScript,
|
||||||
|
[
|
||||||
|
"const fs = require('fs');",
|
||||||
|
`fs.writeFileSync(${JSON.stringify(switchFile)}, 'up');`,
|
||||||
|
`fs.writeFileSync(${JSON.stringify(reconnectFile)}, 'done');`
|
||||||
|
].join('\n')
|
||||||
|
);
|
||||||
|
|
||||||
|
try {
|
||||||
|
writeConfig(configPath, {
|
||||||
|
mcpServers: {
|
||||||
|
authy: createCommandConfig(probeScript)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = runHook(
|
||||||
|
{
|
||||||
|
tool_name: 'mcp__authy__messages',
|
||||||
|
tool_input: {},
|
||||||
|
error: '401 Unauthorized'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
CLAUDE_HOOK_EVENT_NAME: 'PostToolUseFailure',
|
||||||
|
ECC_MCP_CONFIG_PATH: configPath,
|
||||||
|
ECC_MCP_HEALTH_STATE_PATH: statePath,
|
||||||
|
ECC_MCP_RECONNECT_COMMAND: `node ${JSON.stringify(reconnectScript)}`,
|
||||||
|
ECC_MCP_HEALTH_TIMEOUT_MS: '100'
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.strictEqual(result.code, 0, 'Expected failure hook to remain non-blocking');
|
||||||
|
assert.ok(result.stderr.includes('reported 401'), `Expected reconnect log, got: ${result.stderr}`);
|
||||||
|
assert.ok(result.stderr.includes('connection restored'), `Expected restored log, got: ${result.stderr}`);
|
||||||
|
assert.ok(fs.existsSync(reconnectFile), 'Expected reconnect command to run');
|
||||||
|
|
||||||
|
const state = readState(statePath);
|
||||||
|
assert.strictEqual(state.servers.authy.status, 'healthy', 'Expected authy server to be restored after reconnect');
|
||||||
|
} finally {
|
||||||
|
cleanupTempDir(tempDir);
|
||||||
|
}
|
||||||
|
})) passed++; else failed++;
|
||||||
|
|
||||||
|
console.log(`\nResults: Passed: ${passed}, Failed: ${failed}`);
|
||||||
|
process.exit(failed > 0 ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
runTests().catch(error => {
|
||||||
|
console.error(error);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
@@ -313,6 +313,50 @@ async function runTests() {
|
|||||||
}
|
}
|
||||||
})) passed++; else failed++;
|
})) passed++; else failed++;
|
||||||
|
|
||||||
|
if (await asyncTest('MCP health hook blocks unhealthy MCP tool calls through hooks.json', async () => {
|
||||||
|
const hookCommand = getHookCommandByDescription(
|
||||||
|
hooks,
|
||||||
|
'PreToolUse',
|
||||||
|
'Check MCP server health before MCP tool execution'
|
||||||
|
);
|
||||||
|
|
||||||
|
const testDir = createTestDir();
|
||||||
|
const configPath = path.join(testDir, 'claude.json');
|
||||||
|
const statePath = path.join(testDir, 'mcp-health.json');
|
||||||
|
const serverScript = path.join(testDir, 'broken-mcp.js');
|
||||||
|
|
||||||
|
try {
|
||||||
|
fs.writeFileSync(serverScript, 'process.exit(1);\n');
|
||||||
|
fs.writeFileSync(
|
||||||
|
configPath,
|
||||||
|
JSON.stringify({
|
||||||
|
mcpServers: {
|
||||||
|
broken: {
|
||||||
|
command: process.execPath,
|
||||||
|
args: [serverScript]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
const result = await runHookCommand(
|
||||||
|
hookCommand,
|
||||||
|
{ tool_name: 'mcp__broken__search', tool_input: {} },
|
||||||
|
{
|
||||||
|
CLAUDE_HOOK_EVENT_NAME: 'PreToolUse',
|
||||||
|
ECC_MCP_CONFIG_PATH: configPath,
|
||||||
|
ECC_MCP_HEALTH_STATE_PATH: statePath,
|
||||||
|
ECC_MCP_HEALTH_TIMEOUT_MS: '100'
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.strictEqual(result.code, 2, 'Expected unhealthy MCP preflight to block');
|
||||||
|
assert.ok(result.stderr.includes('broken is unavailable'), `Expected health warning, got: ${result.stderr}`);
|
||||||
|
} finally {
|
||||||
|
cleanupTestDir(testDir);
|
||||||
|
}
|
||||||
|
})) passed++; else failed++;
|
||||||
|
|
||||||
if (await asyncTest('hooks handle missing files gracefully', async () => {
|
if (await asyncTest('hooks handle missing files gracefully', async () => {
|
||||||
const testDir = createTestDir();
|
const testDir = createTestDir();
|
||||||
const transcriptPath = path.join(testDir, 'nonexistent.jsonl');
|
const transcriptPath = path.join(testDir, 'nonexistent.jsonl');
|
||||||
|
|||||||
Reference in New Issue
Block a user