From 70449a1cd7e6e2719cbd5e6b6e69e98aecfb3c20 Mon Sep 17 00:00:00 2001 From: Affaan Mustafa Date: Tue, 10 Mar 2026 21:22:35 -0700 Subject: [PATCH] docs: tighten videodb listener guidance --- skills/videodb/SKILL.md | 4 +-- skills/videodb/reference/capture-reference.md | 16 +++++------ skills/videodb/reference/generative.md | 12 +++++++- skills/videodb/scripts/ws_listener.py | 28 +++++++++++++++++-- 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/skills/videodb/SKILL.md b/skills/videodb/SKILL.md index e9195d7d..02c417d9 100644 --- a/skills/videodb/SKILL.md +++ b/skills/videodb/SKILL.md @@ -10,7 +10,7 @@ argument-hint: "[task description]" **Perception + memory + actions for video, live streams, and desktop sessions.** -## When to Use +## When to use ### Desktop Perception - Start/stop a **desktop session** capturing **screen, mic, and system audio** @@ -37,7 +37,7 @@ argument-hint: "[task description]" - Connect **RTSP/live feeds** - Run **real-time visual and spoken understanding** and emit **events/alerts** for monitoring workflows -## How It Works +## How it works ### Common inputs - Local **file path**, public **URL**, or **RTSP URL** diff --git a/skills/videodb/reference/capture-reference.md b/skills/videodb/reference/capture-reference.md index 302e0ef1..125653ea 100644 --- a/skills/videodb/reference/capture-reference.md +++ b/skills/videodb/reference/capture-reference.md @@ -383,8 +383,13 @@ For RTStream methods (indexing, transcription, alerts, batch config), see [rtstr │ active │ ──> Start AI pipelines └───────┬──────────────┐ │ │ - │ └──────────────┐ - │ client.stop_capture() │ unrecoverable capture error + │ v + │ ┌───────────────┐ WebSocket: capture_session.failed + │ │ failed │ ──> Inspect error payload and retry setup + │ └───────────────┘ + │ unrecoverable capture error + │ + │ client.stop_capture() v ┌───────────────┐ WebSocket: capture_session.stopping │ stopping │ ──> Finalize streams @@ -399,11 +404,4 @@ For RTStream methods (indexing, transcription, alerts, batch config), see [rtstr ┌───────────────┐ WebSocket: capture_session.exported │ exported │ ──> Access video_id, stream_url, player_url └───────────────┘ - - unrecoverable capture error - │ - v - ┌───────────────┐ WebSocket: capture_session.failed - │ failed │ ──> Inspect error payload and retry setup - └───────────────┘ ``` diff --git a/skills/videodb/reference/generative.md b/skills/videodb/reference/generative.md index e71f31c2..8b36c524 100644 --- a/skills/videodb/reference/generative.md +++ b/skills/videodb/reference/generative.md @@ -165,7 +165,7 @@ Combine scene extraction with text generation: from videodb import SceneExtractionType # First index scenes -video.index_scenes( +scenes = video.index_scenes( extraction_type=SceneExtractionType.time_based, extraction_config={"time": 10}, prompt="Describe the visual content in this scene.", @@ -173,11 +173,21 @@ video.index_scenes( # Get transcript for spoken context transcript_text = video.get_transcript_text() +scene_descriptions = [] +for scene in scenes: + if isinstance(scene, dict): + description = scene.get("description") or scene.get("summary") + else: + description = getattr(scene, "description", None) or getattr(scene, "summary", None) + scene_descriptions.append(description or str(scene)) + +scenes_text = "\n".join(scene_descriptions) # Analyze with collection LLM result = coll.generate_text( prompt=( f"Given this video transcript:\n{transcript_text}\n\n" + f"And these visual scene descriptions:\n{scenes_text}\n\n" "Based on the spoken and visual content, describe the main topics covered." ), model_name="pro", diff --git a/skills/videodb/scripts/ws_listener.py b/skills/videodb/scripts/ws_listener.py index 456bfb20..105aabc7 100644 --- a/skills/videodb/scripts/ws_listener.py +++ b/skills/videodb/scripts/ws_listener.py @@ -39,6 +39,7 @@ from dotenv import load_dotenv load_dotenv() import videodb +from videodb.exceptions import AuthenticationError # Retry config MAX_RETRIES = 10 @@ -82,6 +83,8 @@ def parse_args() -> tuple[bool, Path]: for arg in args: if arg == "--clear": clear = True + elif arg.startswith("-"): + raise SystemExit(f"Unknown flag: {arg}") elif not arg.startswith("-"): output_dir = arg @@ -127,6 +130,17 @@ def cleanup_pid(): LOGGER.debug("Failed to remove PID file %s: %s", PID_FILE, exc) +def is_fatal_error(exc: Exception) -> bool: + """Return True when retrying would hide a permanent configuration error.""" + if isinstance(exc, (AuthenticationError, PermissionError)): + return True + status = getattr(exc, "status_code", None) + if status in {401, 403}: + return True + message = str(exc).lower() + return "401" in message or "403" in message or "auth" in message + + async def listen_with_retry(): """Main listen loop with auto-reconnect and exponential backoff.""" global _first_connection @@ -143,7 +157,12 @@ async def listen_with_retry(): except asyncio.CancelledError: log("Shutdown requested") raise - except RETRYABLE_ERRORS as e: + except Exception as e: + if is_fatal_error(e): + log(f"Fatal configuration error: {e}") + raise + if not isinstance(e, RETRYABLE_ERRORS): + raise retry_count += 1 log(f"Connection error: {e}") @@ -182,7 +201,12 @@ async def listen_with_retry(): except asyncio.CancelledError: log("Shutdown requested") raise - except RETRYABLE_ERRORS as e: + except Exception as e: + if is_fatal_error(e): + log(f"Fatal configuration error: {e}") + raise + if not isinstance(e, RETRYABLE_ERRORS): + raise retry_count += 1 log(f"Connection error: {e}")