feat(ecc2): add session heartbeat stale detection

This commit is contained in:
Affaan Mustafa
2026-04-09 07:20:40 -07:00
parent 48fd68115e
commit 24a3ffa234
9 changed files with 520 additions and 47 deletions

View File

@@ -22,10 +22,8 @@ pub async fn run(db: StateStore, cfg: Config) -> Result<()> {
resume_crashed_sessions(&db)?;
let heartbeat_interval = Duration::from_secs(cfg.heartbeat_interval_secs);
let timeout = Duration::from_secs(cfg.session_timeout_secs);
loop {
if let Err(e) = check_sessions(&db, timeout) {
if let Err(e) = check_sessions(&db, &cfg) {
tracing::error!("Session check failed: {e}");
}
@@ -82,25 +80,8 @@ where
Ok(failed_sessions)
}
fn check_sessions(db: &StateStore, timeout: Duration) -> Result<()> {
let sessions = db.list_sessions()?;
for session in sessions {
if session.state != SessionState::Running {
continue;
}
let elapsed = chrono::Utc::now()
.signed_duration_since(session.updated_at)
.to_std()
.unwrap_or(Duration::ZERO);
if elapsed > timeout {
tracing::warn!("Session {} timed out after {:?}", session.id, elapsed);
db.update_state_and_pid(&session.id, &SessionState::Failed, None)?;
}
}
fn check_sessions(db: &StateStore, cfg: &Config) -> Result<()> {
let _ = manager::enforce_session_heartbeats(db, cfg)?;
Ok(())
}
@@ -498,6 +479,7 @@ mod tests {
worktree: None,
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
}
}

View File

@@ -68,6 +68,58 @@ pub fn get_team_status(db: &StateStore, id: &str, depth: usize) -> Result<TeamSt
})
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct HeartbeatEnforcementOutcome {
pub stale_sessions: Vec<String>,
pub auto_terminated_sessions: Vec<String>,
}
pub fn enforce_session_heartbeats(
db: &StateStore,
cfg: &Config,
) -> Result<HeartbeatEnforcementOutcome> {
enforce_session_heartbeats_with(db, cfg, kill_process)
}
fn enforce_session_heartbeats_with<F>(
db: &StateStore,
cfg: &Config,
terminate_pid: F,
) -> Result<HeartbeatEnforcementOutcome>
where
F: Fn(u32) -> Result<()>,
{
let timeout = chrono::Duration::seconds(cfg.session_timeout_secs as i64);
let now = chrono::Utc::now();
let mut outcome = HeartbeatEnforcementOutcome::default();
for session in db.list_sessions()? {
if !matches!(session.state, SessionState::Running | SessionState::Stale) {
continue;
}
if now.signed_duration_since(session.last_heartbeat_at) <= timeout {
continue;
}
if cfg.auto_terminate_stale_sessions {
if let Some(pid) = session.pid {
let _ = terminate_pid(pid);
}
db.update_state_and_pid(&session.id, &SessionState::Failed, None)?;
outcome.auto_terminated_sessions.push(session.id);
continue;
}
if session.state != SessionState::Stale {
db.update_state(&session.id, &SessionState::Stale)?;
outcome.stale_sessions.push(session.id);
}
}
Ok(outcome)
}
pub async fn assign_session(
db: &StateStore,
cfg: &Config,
@@ -685,7 +737,7 @@ pub async fn merge_session_worktree(
if matches!(
session.state,
SessionState::Pending | SessionState::Running | SessionState::Idle
SessionState::Pending | SessionState::Running | SessionState::Idle | SessionState::Stale
) {
anyhow::bail!(
"Cannot merge active session {} while it is {}",
@@ -747,7 +799,10 @@ pub async fn merge_ready_worktrees(
if matches!(
session.state,
SessionState::Pending | SessionState::Running | SessionState::Idle
SessionState::Pending
| SessionState::Running
| SessionState::Idle
| SessionState::Stale
) {
active_with_worktree_ids.push(session.id);
continue;
@@ -902,6 +957,7 @@ pub async fn run_session(
session_id.to_string(),
command,
SessionOutputStore::default(),
std::time::Duration::from_secs(cfg.heartbeat_interval_secs),
)
.await?;
Ok(())
@@ -997,6 +1053,7 @@ fn build_session_record(
worktree,
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})
}
@@ -1488,6 +1545,15 @@ impl fmt::Display for SessionStatus {
writeln!(f, "Tools: {}", s.metrics.tool_calls)?;
writeln!(f, "Files: {}", s.metrics.files_changed)?;
writeln!(f, "Cost: ${:.4}", s.metrics.cost_usd)?;
writeln!(
f,
"Heartbeat: {} ({}s ago)",
s.last_heartbeat_at,
chrono::Utc::now()
.signed_duration_since(s.last_heartbeat_at)
.num_seconds()
.max(0)
)?;
if !self.delegated_children.is_empty() {
writeln!(f, "Children: {}", self.delegated_children.join(", "))?;
}
@@ -1528,6 +1594,7 @@ impl fmt::Display for TeamStatus {
for lane in [
"Running",
"Idle",
"Stale",
"Pending",
"Failed",
"Stopped",
@@ -1676,6 +1743,7 @@ fn session_state_label(state: &SessionState) -> &'static str {
SessionState::Pending => "Pending",
SessionState::Running => "Running",
SessionState::Idle => "Idle",
SessionState::Stale => "Stale",
SessionState::Completed => "Completed",
SessionState::Failed => "Failed",
SessionState::Stopped => "Stopped",
@@ -1727,6 +1795,7 @@ mod tests {
max_parallel_worktrees: 4,
session_timeout_secs: 60,
heartbeat_interval_secs: 5,
auto_terminate_stale_sessions: false,
default_agent: "claude".to_string(),
auto_dispatch_unread_handoffs: false,
auto_dispatch_limit_per_session: 5,
@@ -1755,10 +1824,85 @@ mod tests {
worktree: None,
created_at: updated_at - Duration::minutes(1),
updated_at,
last_heartbeat_at: updated_at,
metrics: SessionMetrics::default(),
}
}
#[test]
fn enforce_session_heartbeats_marks_overdue_running_sessions_stale() -> Result<()> {
let tempdir = TestDir::new("manager-heartbeat-stale")?;
let cfg = build_config(tempdir.path());
let db = StateStore::open(&cfg.db_path)?;
let now = Utc::now();
db.insert_session(&Session {
id: "stale-1".to_string(),
task: "heartbeat overdue".to_string(),
agent_type: "claude".to_string(),
working_dir: PathBuf::from("/tmp"),
state: SessionState::Running,
pid: Some(4242),
worktree: None,
created_at: now - Duration::minutes(5),
updated_at: now - Duration::minutes(5),
last_heartbeat_at: now - Duration::minutes(5),
metrics: SessionMetrics::default(),
})?;
let outcome = enforce_session_heartbeats(&db, &cfg)?;
let session = db.get_session("stale-1")?.expect("session should exist");
assert_eq!(outcome.stale_sessions, vec!["stale-1".to_string()]);
assert!(outcome.auto_terminated_sessions.is_empty());
assert_eq!(session.state, SessionState::Stale);
assert_eq!(session.pid, Some(4242));
Ok(())
}
#[test]
fn enforce_session_heartbeats_auto_terminates_when_enabled() -> Result<()> {
let tempdir = TestDir::new("manager-heartbeat-terminate")?;
let mut cfg = build_config(tempdir.path());
cfg.auto_terminate_stale_sessions = true;
let db = StateStore::open(&cfg.db_path)?;
let now = Utc::now();
let killed = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
let killed_clone = killed.clone();
db.insert_session(&Session {
id: "stale-2".to_string(),
task: "terminate overdue".to_string(),
agent_type: "claude".to_string(),
working_dir: PathBuf::from("/tmp"),
state: SessionState::Running,
pid: Some(7777),
worktree: None,
created_at: now - Duration::minutes(5),
updated_at: now - Duration::minutes(5),
last_heartbeat_at: now - Duration::minutes(5),
metrics: SessionMetrics::default(),
})?;
let outcome = enforce_session_heartbeats_with(&db, &cfg, move |pid| {
killed_clone.lock().unwrap().push(pid);
Ok(())
})?;
let session = db.get_session("stale-2")?.expect("session should exist");
assert!(outcome.stale_sessions.is_empty());
assert_eq!(
outcome.auto_terminated_sessions,
vec!["stale-2".to_string()]
);
assert_eq!(*killed.lock().unwrap(), vec![7777]);
assert_eq!(session.state, SessionState::Failed);
assert_eq!(session.pid, None);
Ok(())
}
fn build_daemon_activity() -> super::super::store::DaemonActivity {
let now = Utc::now();
super::super::store::DaemonActivity {
@@ -1976,6 +2120,7 @@ mod tests {
}),
created_at: now - Duration::minutes(1),
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
db.update_metrics(
@@ -2032,6 +2177,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(2),
updated_at: now - Duration::minutes(1),
last_heartbeat_at: now - Duration::minutes(1),
metrics: SessionMetrics::default(),
})?;
db.update_metrics(
@@ -2076,6 +2222,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(1),
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
@@ -2328,6 +2475,7 @@ mod tests {
worktree: Some(merged_worktree.clone()),
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
@@ -2343,6 +2491,7 @@ mod tests {
worktree: Some(active_worktree.clone()),
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
@@ -2359,6 +2508,7 @@ mod tests {
worktree: Some(dirty_worktree.clone()),
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
@@ -2584,6 +2734,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(2),
updated_at: now - Duration::minutes(2),
last_heartbeat_at: now - Duration::minutes(2),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -2596,6 +2747,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(1),
updated_at: now - Duration::minutes(1),
last_heartbeat_at: now - Duration::minutes(1),
metrics: SessionMetrics::default(),
})?;
db.send_message(
@@ -2651,6 +2803,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -2663,6 +2816,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(2),
updated_at: now - Duration::minutes(2),
last_heartbeat_at: now - Duration::minutes(2),
metrics: SessionMetrics::default(),
})?;
db.send_message(
@@ -2727,6 +2881,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -2739,6 +2894,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(2),
updated_at: now - Duration::minutes(2),
last_heartbeat_at: now - Duration::minutes(2),
metrics: SessionMetrics::default(),
})?;
db.send_message(
@@ -2794,6 +2950,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -2806,6 +2963,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(2),
updated_at: now - Duration::minutes(2),
last_heartbeat_at: now - Duration::minutes(2),
metrics: SessionMetrics::default(),
})?;
db.send_message(
@@ -2865,6 +3023,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -2877,6 +3036,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(2),
updated_at: now - Duration::minutes(2),
last_heartbeat_at: now - Duration::minutes(2),
metrics: SessionMetrics::default(),
})?;
db.send_message(
@@ -2930,6 +3090,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
@@ -2977,6 +3138,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -2989,6 +3151,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(2),
updated_at: now - Duration::minutes(2),
last_heartbeat_at: now - Duration::minutes(2),
metrics: SessionMetrics::default(),
})?;
db.send_message(
@@ -3044,6 +3207,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
}
@@ -3103,6 +3267,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
}
@@ -3154,6 +3319,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
@@ -3167,6 +3333,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(2),
updated_at: now - Duration::minutes(2),
last_heartbeat_at: now - Duration::minutes(2),
metrics: SessionMetrics::default(),
})?;
@@ -3222,6 +3389,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(4),
updated_at: now - Duration::minutes(4),
last_heartbeat_at: now - Duration::minutes(4),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -3234,6 +3402,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -3246,6 +3415,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(2),
updated_at: now - Duration::minutes(2),
last_heartbeat_at: now - Duration::minutes(2),
metrics: SessionMetrics::default(),
})?;
@@ -3307,6 +3477,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(4),
updated_at: now - Duration::minutes(4),
last_heartbeat_at: now - Duration::minutes(4),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -3319,6 +3490,7 @@ mod tests {
worktree: None,
created_at: now - Duration::minutes(3),
updated_at: now - Duration::minutes(3),
last_heartbeat_at: now - Duration::minutes(3),
metrics: SessionMetrics::default(),
})?;

View File

@@ -20,6 +20,7 @@ pub struct Session {
pub worktree: Option<WorktreeInfo>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub last_heartbeat_at: DateTime<Utc>,
pub metrics: SessionMetrics,
}
@@ -28,6 +29,7 @@ pub enum SessionState {
Pending,
Running,
Idle,
Stale,
Completed,
Failed,
Stopped,
@@ -39,6 +41,7 @@ impl fmt::Display for SessionState {
SessionState::Pending => write!(f, "pending"),
SessionState::Running => write!(f, "running"),
SessionState::Idle => write!(f, "idle"),
SessionState::Stale => write!(f, "stale"),
SessionState::Completed => write!(f, "completed"),
SessionState::Failed => write!(f, "failed"),
SessionState::Stopped => write!(f, "stopped"),
@@ -60,12 +63,21 @@ impl SessionState {
) | (
SessionState::Running,
SessionState::Idle
| SessionState::Stale
| SessionState::Completed
| SessionState::Failed
| SessionState::Stopped
) | (
SessionState::Idle,
SessionState::Running
| SessionState::Stale
| SessionState::Completed
| SessionState::Failed
| SessionState::Stopped
) | (
SessionState::Stale,
SessionState::Running
| SessionState::Idle
| SessionState::Completed
| SessionState::Failed
| SessionState::Stopped
@@ -78,6 +90,7 @@ impl SessionState {
match value {
"running" => SessionState::Running,
"idle" => SessionState::Idle,
"stale" => SessionState::Stale,
"completed" => SessionState::Completed,
"failed" => SessionState::Failed,
"stopped" => SessionState::Stopped,

View File

@@ -5,6 +5,7 @@ use anyhow::{Context, Result};
use tokio::io::{AsyncBufReadExt, AsyncRead, BufReader};
use tokio::process::Command;
use tokio::sync::{mpsc, oneshot};
use tokio::time::{self, MissedTickBehavior};
use super::output::{OutputStream, SessionOutputStore};
use super::store::StateStore;
@@ -26,6 +27,9 @@ enum DbMessage {
line: String,
ack: oneshot::Sender<DbAck>,
},
TouchHeartbeat {
ack: oneshot::Sender<DbAck>,
},
}
#[derive(Clone)]
@@ -53,6 +57,10 @@ impl DbWriter {
.await
}
async fn touch_heartbeat(&self) -> Result<()> {
self.send(|ack| DbMessage::TouchHeartbeat { ack }).await
}
async fn send<F>(&self, build: F) -> Result<()>
where
F: FnOnce(oneshot::Sender<DbAck>) -> DbMessage,
@@ -111,6 +119,17 @@ fn run_db_writer(db_path: PathBuf, session_id: String, mut rx: mpsc::UnboundedRe
};
let _ = ack.send(result);
}
DbMessage::TouchHeartbeat { ack } => {
let result = match opened.as_ref() {
Some(db) => db
.touch_heartbeat(&session_id)
.map_err(|error| error.to_string()),
None => Err(open_error
.clone()
.unwrap_or_else(|| "Failed to open state store".to_string())),
};
let _ = ack.send(result);
}
}
}
}
@@ -120,6 +139,7 @@ pub async fn capture_command_output(
session_id: String,
mut command: Command,
output_store: SessionOutputStore,
heartbeat_interval: std::time::Duration,
) -> Result<ExitStatus> {
let db_writer = DbWriter::start(db_path, session_id.clone());
@@ -152,6 +172,19 @@ pub async fn capture_command_output(
.ok_or_else(|| anyhow::anyhow!("Spawned process did not expose a process id"))?;
db_writer.update_pid(Some(pid)).await?;
db_writer.update_state(SessionState::Running).await?;
db_writer.touch_heartbeat().await?;
let heartbeat_writer = db_writer.clone();
let heartbeat_task = tokio::spawn(async move {
let mut ticker = time::interval(heartbeat_interval);
ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
loop {
ticker.tick().await;
if heartbeat_writer.touch_heartbeat().await.is_err() {
break;
}
}
});
let stdout_task = tokio::spawn(capture_stream(
session_id.clone(),
@@ -169,6 +202,8 @@ pub async fn capture_command_output(
));
let status = child.wait().await?;
heartbeat_task.abort();
let _ = heartbeat_task.await;
stdout_task.await??;
stderr_task.await??;
@@ -244,6 +279,7 @@ mod tests {
worktree: None,
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
@@ -254,9 +290,14 @@ mod tests {
.arg("-c")
.arg("printf 'alpha\\n'; printf 'beta\\n' >&2");
let status =
capture_command_output(db_path.clone(), session_id.clone(), command, output_store)
.await?;
let status = capture_command_output(
db_path.clone(),
session_id.clone(),
command,
output_store,
std::time::Duration::from_millis(10),
)
.await?;
assert!(status.success());
@@ -286,4 +327,49 @@ mod tests {
Ok(())
}
#[tokio::test]
async fn capture_command_output_updates_heartbeat_for_quiet_processes() -> Result<()> {
let db_path = env::temp_dir().join(format!("ecc2-runtime-heartbeat-{}.db", Uuid::new_v4()));
let db = StateStore::open(&db_path)?;
let session_id = "session-heartbeat".to_string();
let now = Utc::now();
db.insert_session(&Session {
id: session_id.clone(),
task: "quiet process".to_string(),
agent_type: "test".to_string(),
working_dir: env::temp_dir(),
state: SessionState::Pending,
pid: None,
worktree: None,
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
let mut command = Command::new("/bin/sh");
command.arg("-c").arg("sleep 0.05");
let _ = capture_command_output(
db_path.clone(),
session_id.clone(),
command,
SessionOutputStore::default(),
std::time::Duration::from_millis(10),
)
.await?;
let db = StateStore::open(&db_path)?;
let session = db
.get_session(&session_id)?
.expect("session should still exist");
assert!(session.last_heartbeat_at > now);
assert_eq!(session.state, SessionState::Completed);
let _ = std::fs::remove_file(db_path);
Ok(())
}
}

View File

@@ -132,7 +132,8 @@ impl StateStore {
duration_secs INTEGER DEFAULT 0,
cost_usd REAL DEFAULT 0.0,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL
updated_at TEXT NOT NULL,
last_heartbeat_at TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS tool_log (
@@ -240,6 +241,20 @@ impl StateStore {
.context("Failed to add output_tokens column to sessions table")?;
}
if !self.has_column("sessions", "last_heartbeat_at")? {
self.conn
.execute("ALTER TABLE sessions ADD COLUMN last_heartbeat_at TEXT", [])
.context("Failed to add last_heartbeat_at column to sessions table")?;
self.conn
.execute(
"UPDATE sessions
SET last_heartbeat_at = updated_at
WHERE last_heartbeat_at IS NULL",
[],
)
.context("Failed to backfill last_heartbeat_at column")?;
}
if !self.has_column("tool_log", "hook_event_id")? {
self.conn
.execute("ALTER TABLE tool_log ADD COLUMN hook_event_id TEXT", [])
@@ -404,8 +419,8 @@ impl StateStore {
pub fn insert_session(&self, session: &Session) -> Result<()> {
self.conn.execute(
"INSERT INTO sessions (id, task, agent_type, working_dir, state, pid, worktree_path, worktree_branch, worktree_base, created_at, updated_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
"INSERT INTO sessions (id, task, agent_type, working_dir, state, pid, worktree_path, worktree_branch, worktree_base, created_at, updated_at, last_heartbeat_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
rusqlite::params![
session.id,
session.task,
@@ -421,6 +436,7 @@ impl StateStore {
session.worktree.as_ref().map(|w| w.base_branch.clone()),
session.created_at.to_rfc3339(),
session.updated_at.to_rfc3339(),
session.last_heartbeat_at.to_rfc3339(),
],
)?;
Ok(())
@@ -433,7 +449,12 @@ impl StateStore {
pid: Option<u32>,
) -> Result<()> {
let updated = self.conn.execute(
"UPDATE sessions SET state = ?1, pid = ?2, updated_at = ?3 WHERE id = ?4",
"UPDATE sessions
SET state = ?1,
pid = ?2,
updated_at = ?3,
last_heartbeat_at = ?3
WHERE id = ?4",
rusqlite::params![
state.to_string(),
pid.map(i64::from),
@@ -470,7 +491,11 @@ impl StateStore {
}
let updated = self.conn.execute(
"UPDATE sessions SET state = ?1, updated_at = ?2 WHERE id = ?3",
"UPDATE sessions
SET state = ?1,
updated_at = ?2,
last_heartbeat_at = ?2
WHERE id = ?3",
rusqlite::params![
state.to_string(),
chrono::Utc::now().to_rfc3339(),
@@ -487,7 +512,11 @@ impl StateStore {
pub fn update_pid(&self, session_id: &str, pid: Option<u32>) -> Result<()> {
let updated = self.conn.execute(
"UPDATE sessions SET pid = ?1, updated_at = ?2 WHERE id = ?3",
"UPDATE sessions
SET pid = ?1,
updated_at = ?2,
last_heartbeat_at = ?2
WHERE id = ?3",
rusqlite::params![
pid.map(i64::from),
chrono::Utc::now().to_rfc3339(),
@@ -505,7 +534,11 @@ impl StateStore {
pub fn clear_worktree(&self, session_id: &str) -> Result<()> {
let updated = self.conn.execute(
"UPDATE sessions
SET worktree_path = NULL, worktree_branch = NULL, worktree_base = NULL, updated_at = ?1
SET worktree_path = NULL,
worktree_branch = NULL,
worktree_base = NULL,
updated_at = ?1,
last_heartbeat_at = ?1
WHERE id = ?2",
rusqlite::params![chrono::Utc::now().to_rfc3339(), session_id],
)?;
@@ -571,7 +604,10 @@ impl StateStore {
.unwrap_or_default()
.with_timezone(&chrono::Utc);
let effective_end = match state {
SessionState::Pending | SessionState::Running | SessionState::Idle => now,
SessionState::Pending
| SessionState::Running
| SessionState::Idle
| SessionState::Stale => now,
SessionState::Completed | SessionState::Failed | SessionState::Stopped => {
updated_at
}
@@ -592,6 +628,20 @@ impl StateStore {
Ok(())
}
pub fn touch_heartbeat(&self, session_id: &str) -> Result<()> {
let now = chrono::Utc::now().to_rfc3339();
let updated = self.conn.execute(
"UPDATE sessions SET last_heartbeat_at = ?1 WHERE id = ?2",
rusqlite::params![now, session_id],
)?;
if updated == 0 {
anyhow::bail!("Session not found: {session_id}");
}
Ok(())
}
pub fn sync_cost_tracker_metrics(&self, metrics_path: &Path) -> Result<()> {
if !metrics_path.exists() {
return Ok(());
@@ -786,7 +836,11 @@ impl StateStore {
pub fn increment_tool_calls(&self, session_id: &str) -> Result<()> {
self.conn.execute(
"UPDATE sessions SET tool_calls = tool_calls + 1, updated_at = ?1 WHERE id = ?2",
"UPDATE sessions
SET tool_calls = tool_calls + 1,
updated_at = ?1,
last_heartbeat_at = ?1
WHERE id = ?2",
rusqlite::params![chrono::Utc::now().to_rfc3339(), session_id],
)?;
Ok(())
@@ -796,7 +850,7 @@ impl StateStore {
let mut stmt = self.conn.prepare(
"SELECT id, task, agent_type, working_dir, state, pid, worktree_path, worktree_branch, worktree_base,
input_tokens, output_tokens, tokens_used, tool_calls, files_changed, duration_secs, cost_usd,
created_at, updated_at
created_at, updated_at, last_heartbeat_at
FROM sessions ORDER BY updated_at DESC",
)?;
@@ -814,6 +868,7 @@ impl StateStore {
let created_str: String = row.get(16)?;
let updated_str: String = row.get(17)?;
let heartbeat_str: String = row.get(18)?;
Ok(Session {
id: row.get(0)?,
@@ -829,6 +884,11 @@ impl StateStore {
updated_at: chrono::DateTime::parse_from_rfc3339(&updated_str)
.unwrap_or_default()
.with_timezone(&chrono::Utc),
last_heartbeat_at: chrono::DateTime::parse_from_rfc3339(&heartbeat_str)
.unwrap_or_else(|_| {
chrono::DateTime::parse_from_rfc3339(&updated_str).unwrap_or_default()
})
.with_timezone(&chrono::Utc),
metrics: SessionMetrics {
input_tokens: row.get(9)?,
output_tokens: row.get(10)?,
@@ -1299,7 +1359,10 @@ impl StateStore {
)?;
self.conn.execute(
"UPDATE sessions SET updated_at = ?1 WHERE id = ?2",
"UPDATE sessions
SET updated_at = ?1,
last_heartbeat_at = ?1
WHERE id = ?2",
rusqlite::params![chrono::Utc::now().to_rfc3339(), session_id],
)?;
@@ -1460,6 +1523,7 @@ mod tests {
worktree: None,
created_at: now - ChronoDuration::minutes(1),
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
}
}
@@ -1520,6 +1584,9 @@ mod tests {
assert!(column_names.iter().any(|column| column == "pid"));
assert!(column_names.iter().any(|column| column == "input_tokens"));
assert!(column_names.iter().any(|column| column == "output_tokens"));
assert!(column_names
.iter()
.any(|column| column == "last_heartbeat_at"));
Ok(())
}
@@ -1539,6 +1606,7 @@ mod tests {
worktree: None,
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
@@ -1583,6 +1651,7 @@ mod tests {
worktree: None,
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -1595,6 +1664,7 @@ mod tests {
worktree: None,
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
@@ -1648,6 +1718,7 @@ mod tests {
worktree: None,
created_at: now - ChronoDuration::seconds(95),
updated_at: now - ChronoDuration::seconds(1),
last_heartbeat_at: now - ChronoDuration::seconds(1),
metrics: SessionMetrics::default(),
})?;
db.insert_session(&Session {
@@ -1660,6 +1731,7 @@ mod tests {
worktree: None,
created_at: now - ChronoDuration::seconds(80),
updated_at: now - ChronoDuration::seconds(5),
last_heartbeat_at: now - ChronoDuration::seconds(5),
metrics: SessionMetrics::default(),
})?;
@@ -1678,6 +1750,36 @@ mod tests {
Ok(())
}
#[test]
fn touch_heartbeat_updates_last_heartbeat_timestamp() -> Result<()> {
let tempdir = TestDir::new("store-touch-heartbeat")?;
let db = StateStore::open(&tempdir.path().join("state.db"))?;
let now = Utc::now() - ChronoDuration::seconds(30);
db.insert_session(&Session {
id: "session-1".to_string(),
task: "heartbeat".to_string(),
agent_type: "claude".to_string(),
working_dir: PathBuf::from("/tmp"),
state: SessionState::Running,
pid: Some(1234),
worktree: None,
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;
db.touch_heartbeat("session-1")?;
let session = db
.get_session("session-1")?
.expect("session should still exist");
assert!(session.last_heartbeat_at > now);
Ok(())
}
#[test]
fn append_output_line_keeps_latest_buffer_window() -> Result<()> {
let tempdir = TestDir::new("store-output")?;
@@ -1694,6 +1796,7 @@ mod tests {
worktree: None,
created_at: now,
updated_at: now,
last_heartbeat_at: now,
metrics: SessionMetrics::default(),
})?;