docs: harden videodb skill examples

2026-07-01 04:21:27 +08:00 · 2026-03-10 21:03:32 -07:00
parent 9dfe149310
commit b8ab34e362
7 changed files with 173 additions and 61 deletions
@@ -328,7 +328,18 @@ Use `ws_listener.py` to capture WebSocket events during recording sessions. Desk

 ```python
 import json
-events = [json.loads(l) for l in open("/tmp/videodb_events.jsonl")]
+from pathlib import Path
+
+events_file = Path("/tmp/videodb_events.jsonl")
+events = []
+
+if events_file.exists():
+    with events_file.open(encoding="utf-8") as handle:
+        for line in handle:
+            try:
+                events.append(json.loads(line))
+            except json.JSONDecodeError:
+                continue

 # Get all transcripts
 transcripts = [e["data"]["text"] for e in events if e.get("channel") == "transcript"]
@@ -361,8 +372,9 @@ For complete capture workflow, see [reference/capture.md](reference/capture.md).
 | Need to combine/trim clips | `VideoAsset` on a `Timeline` |
 | Need to generate voiceover, music, or SFX | `coll.generate_voice()`, `generate_music()`, `generate_sound_effect()` |

-## Repository
+## Provenance

-https://github.com/video-db/skills
+Reference material for this skill is vendored locally under `skills/videodb/reference/`.
+Use the local copies above instead of following external repository links at runtime.

-**Maintained By:** [VideoDB](https://github.com/video-db)
+**Maintained By:** [VideoDB](https://www.videodb.io/)
@@ -168,8 +168,8 @@ kill $(cat /tmp/videodb_ws_pid)
 Each line is a JSON object with added timestamps:

 ```json
-{"ts": "2026-03-02T10:15:30.123Z", "unix_ts": 1709374530.12, "channel": "visual_index", "data": {"text": "..."}}
-{"ts": "2026-03-02T10:15:31.456Z", "unix_ts": 1709374531.45, "event": "capture_session.active", "capture_session_id": "cap-xxx"}
+{"ts": "2026-03-02T10:15:30.123Z", "unix_ts": 1772446530.123, "channel": "visual_index", "data": {"text": "..."}}
+{"ts": "2026-03-02T10:15:31.456Z", "unix_ts": 1772446531.456, "event": "capture_session.active", "capture_session_id": "cap-xxx"}
 ```

 ### Reading Events
@@ -365,10 +365,17 @@ For RTStream methods (indexing, transcription, alerts, batch config), see [rtstr
  └───────┬───────┘
          │  client.start_capture_session()
          v
+  ┌───────────────┐     WebSocket: capture_session.starting
+  │   starting     │ ──> Capture channels connect
+  └───────┬───────┘
+          │
+          v
  ┌───────────────┐     WebSocket: capture_session.active
  │    active      │ ──> Start AI pipelines
-  └───────┬───────┘
-          │  client.stop_capture()
+  └───────┬──────────────┐
+          │              │
+          │              └──────────────┐
+          │  client.stop_capture()      │ unrecoverable capture error
          v
  ┌───────────────┐     WebSocket: capture_session.stopping
  │   stopping     │ ──> Finalize streams
@@ -383,4 +390,8 @@ For RTStream methods (indexing, transcription, alerts, batch config), see [rtstr
  ┌───────────────┐     WebSocket: capture_session.exported
  │   exported     │ ──> Access video_id, stream_url, player_url
  └───────────────┘
+
+  ┌───────────────┐     WebSocket: capture_session.failed
+  │    failed      │ ──> Inspect error payload and retry setup
+  └───────────────┘
 ```
@@ -313,7 +313,7 @@ stream_url = timeline.generate_stream()
 print(f"Highlight reel: {stream_url}")
 ```

-### Picture-in-Picture with Background Music
+### Logo Overlay with Background Music

 ```python
 import videodb
@@ -365,6 +365,7 @@ clips = [
 ]

 timeline = Timeline(conn)
+timeline_offset = 0.0

 for clip in clips:
    # Add a label as an overlay on each clip
@@ -376,7 +377,8 @@ for clip in clips:
    timeline.add_inline(
        VideoAsset(asset_id=clip["video_id"], start=clip["start"], end=clip["end"])
    )
-    timeline.add_overlay(0, label)
+    timeline.add_overlay(timeline_offset, label)
+    timeline_offset += clip["end"] - clip["start"]

 stream_url = timeline.generate_stream()
 print(f"Montage: {stream_url}")
@@ -59,7 +59,7 @@ video.play()
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
 | `prompt` | `str` | required | Text description of the video to generate |
-| `duration` | `float` | `5` | Duration in seconds (must be integer value, 5-8) |
+| `duration` | `int` | `5` | Duration in seconds (must be integer value, 5-8) |
 | `callback_url` | `str\|None` | `None` | URL to receive async callback |

 Returns a `Video` object. Generated videos are automatically added to the collection and can be used in timelines, searches, and compilations like any uploaded video.
@@ -519,6 +519,7 @@ For WebSocket event structures and ws_listener usage, see [capture-reference.md]
 ```python
 import time
 import videodb
+from videodb.exceptions import InvalidRequestError

 conn = videodb.connect()
 coll = conn.get_collection()
@@ -527,6 +528,7 @@ coll = conn.get_collection()
 rtstream = coll.connect_rtstream(
    url="rtmp://your-stream-server/live/stream-key",
    name="Weekly Standup",
+    store=True,
 )
 rtstream.start()

@@ -536,6 +538,10 @@ time.sleep(1800)  # 30 minutes
 end_ts = time.time()
 rtstream.stop()

+# Generate an immediate playback URL for the captured window
+stream_url = rtstream.generate_stream(start=start_ts, end=end_ts)
+print(f"Recorded stream: {stream_url}")
+
 # 3. Export to a permanent video
 export_result = rtstream.export(name="Weekly Standup Recording")
 print(f"Exported video: {export_result.video_id}")
@@ -545,7 +551,13 @@ video = coll.get_video(export_result.video_id)
 video.index_spoken_words(force=True)

 # 5. Search for action items
-results = video.search("action items and next steps")
-stream_url = results.compile()
-print(f"Action items clip: {stream_url}")
+try:
+    results = video.search("action items and next steps")
+    stream_url = results.compile()
+    print(f"Action items clip: {stream_url}")
+except InvalidRequestError as exc:
+    if "No results found" in str(exc):
+        print("No action items were detected in the recording.")
+    else:
+        raise
 ```
@@ -108,26 +108,40 @@ Compile search results into a single stream of all matching segments:

 ```python
 from videodb import SearchType
+from videodb.exceptions import InvalidRequestError

 video.index_spoken_words(force=True)
-results = video.search("key announcement", search_type=SearchType.semantic)
+try:
+    results = video.search("key announcement", search_type=SearchType.semantic)

-# Compile all matching shots into one stream
-stream_url = results.compile()
-print(f"Search results stream: {stream_url}")
+    # Compile all matching shots into one stream
+    stream_url = results.compile()
+    print(f"Search results stream: {stream_url}")

-# Or play directly
-results.play()
+    # Or play directly
+    results.play()
+except InvalidRequestError as exc:
+    if "No results found" in str(exc):
+        print("No matching announcement segments were found.")
+    else:
+        raise
 ```

 ### Stream Individual Search Hits

 ```python
-results = video.search("product demo", search_type=SearchType.semantic)
+from videodb.exceptions import InvalidRequestError

-for i, shot in enumerate(results.get_shots()):
-    stream_url = shot.generate_stream()
-    print(f"Hit {i+1} [{shot.start:.1f}s-{shot.end:.1f}s]: {stream_url}")
+try:
+    results = video.search("product demo", search_type=SearchType.semantic)
+    for i, shot in enumerate(results.get_shots()):
+        stream_url = shot.generate_stream()
+        print(f"Hit {i+1} [{shot.start:.1f}s-{shot.end:.1f}s]: {stream_url}")
+except InvalidRequestError as exc:
+    if "No results found" in str(exc):
+        print("No product demo segments matched the query.")
+    else:
+        raise
 ```

 ## Audio Playback
@@ -149,6 +163,7 @@ Combine search, timeline composition, and streaming in one workflow:
 ```python
 import videodb
 from videodb import SearchType
+from videodb.exceptions import InvalidRequestError
 from videodb.timeline import Timeline
 from videodb.asset import VideoAsset, TextAsset, TextStyle

@@ -161,22 +176,34 @@ video.index_spoken_words(force=True)
 # Search for key moments
 queries = ["introduction", "main demo", "Q&A"]
 timeline = Timeline(conn)
+timeline_offset = 0.0

 for query in queries:
-    # Find matching segments
-    results = video.search(query, search_type=SearchType.semantic)
-    for shot in results.get_shots():
-        timeline.add_inline(
-            VideoAsset(asset_id=shot.video_id, start=shot.start, end=shot.end)
-        )
+    try:
+        results = video.search(query, search_type=SearchType.semantic)
+        shots = results.get_shots()
+    except InvalidRequestError as exc:
+        if "No results found" in str(exc):
+            shots = []
+        else:
+            raise

-    # Add section label as overlay on the first shot
-    timeline.add_overlay(0, TextAsset(
+    if not shots:
+        continue
+
+    # Add the section label where this batch starts in the compiled timeline
+    timeline.add_overlay(timeline_offset, TextAsset(
        text=query.title(),
        duration=2,
        style=TextStyle(fontsize=36, fontcolor="white", boxcolor="#222222"),
    ))

+    for shot in shots:
+        timeline.add_inline(
+            VideoAsset(asset_id=shot.video_id, start=shot.start, end=shot.end)
+        )
+        timeline_offset += shot.end - shot.start
+
 stream_url = timeline.generate_stream()
 print(f"Dynamic compilation: {stream_url}")
 ```
@@ -216,6 +243,7 @@ Build a stream dynamically based on search availability:
 ```python
 import videodb
 from videodb import SearchType
+from videodb.exceptions import InvalidRequestError
 from videodb.timeline import Timeline
 from videodb.asset import VideoAsset, TextAsset, TextStyle

@@ -231,21 +259,29 @@ timeline = Timeline(conn)
 topics = ["opening remarks", "technical deep dive", "closing"]

 found_any = False
+timeline_offset = 0.0
 for topic in topics:
-    results = video.search(topic, search_type=SearchType.semantic)
-    shots = results.get_shots()
+    try:
+        results = video.search(topic, search_type=SearchType.semantic)
+        shots = results.get_shots()
+    except InvalidRequestError as exc:
+        if "No results found" in str(exc):
+            shots = []
+        else:
+            raise
+
    if shots:
        found_any = True
-        for shot in shots:
-            timeline.add_inline(
-                VideoAsset(asset_id=shot.video_id, start=shot.start, end=shot.end)
-            )
-        # Add a label overlay for the section
-        timeline.add_overlay(0, TextAsset(
+        timeline.add_overlay(timeline_offset, TextAsset(
            text=topic.title(),
            duration=2,
            style=TextStyle(fontsize=32, fontcolor="white", boxcolor="#1a1a2e"),
        ))
+        for shot in shots:
+            timeline.add_inline(
+                VideoAsset(asset_id=shot.video_id, start=shot.start, end=shot.end)
+            )
+            timeline_offset += shot.end - shot.start

 if found_any:
    stream_url = timeline.generate_stream()
@@ -263,6 +299,7 @@ Process an event recording into a streamable recap with multiple sections:
 ```python
 import videodb
 from videodb import SearchType
+from videodb.exceptions import InvalidRequestError
 from videodb.timeline import Timeline
 from videodb.asset import VideoAsset, AudioAsset, ImageAsset, TextAsset, TextStyle

@@ -287,33 +324,63 @@ title_img = coll.generate_image(

 # Build the recap timeline
 timeline = Timeline(conn)
+timeline_offset = 0.0

 # Main video segments from search
-keynote = event.search("keynote announcement", search_type=SearchType.semantic)
-if keynote.get_shots():
-    for shot in keynote.get_shots()[:5]:
+try:
+    keynote = event.search("keynote announcement", search_type=SearchType.semantic)
+    keynote_shots = keynote.get_shots()[:5]
+except InvalidRequestError as exc:
+    if "No results found" in str(exc):
+        keynote_shots = []
+    else:
+        raise
+if keynote_shots:
+    keynote_start = timeline_offset
+    for shot in keynote_shots:
        timeline.add_inline(
            VideoAsset(asset_id=shot.video_id, start=shot.start, end=shot.end)
        )
+        timeline_offset += shot.end - shot.start
+else:
+    keynote_start = None

-demo = event.search("product demo", search_type=SearchType.semantic)
-if demo.get_shots():
-    for shot in demo.get_shots()[:5]:
+try:
+    demo = event.search("product demo", search_type=SearchType.semantic)
+    demo_shots = demo.get_shots()[:5]
+except InvalidRequestError as exc:
+    if "No results found" in str(exc):
+        demo_shots = []
+    else:
+        raise
+if demo_shots:
+    demo_start = timeline_offset
+    for shot in demo_shots:
        timeline.add_inline(
            VideoAsset(asset_id=shot.video_id, start=shot.start, end=shot.end)
        )
+        timeline_offset += shot.end - shot.start
+else:
+    demo_start = None

 # Overlay title card image
 timeline.add_overlay(0, ImageAsset(
    asset_id=title_img.id, width=100, height=100, x=80, y=20, duration=5
 ))

-# Overlay section labels
-timeline.add_overlay(5, TextAsset(
-    text="Keynote Highlights",
-    duration=3,
-    style=TextStyle(fontsize=40, fontcolor="white", boxcolor="#0d1117"),
-))
+# Overlay section labels at the correct timeline offsets
+if keynote_start is not None:
+    timeline.add_overlay(max(5, keynote_start), TextAsset(
+        text="Keynote Highlights",
+        duration=3,
+        style=TextStyle(fontsize=40, fontcolor="white", boxcolor="#0d1117"),
+    ))
+if demo_start is not None:
+    timeline.add_overlay(max(5, demo_start), TextAsset(
+        text="Demo Highlights",
+        duration=3,
+        style=TextStyle(fontsize=36, fontcolor="white", boxcolor="#0d1117"),
+    ))

 # Overlay background music
 timeline.add_overlay(0, AudioAsset(
@@ -30,6 +30,7 @@ import sys
 import json
 import signal
 import asyncio
+import logging
 from datetime import datetime, timezone
 from pathlib import Path

@@ -43,10 +44,17 @@ MAX_RETRIES = 10
 INITIAL_BACKOFF = 1  # seconds
 MAX_BACKOFF = 60     # seconds

+logging.basicConfig(
+    level=logging.INFO,
+    format="[%(asctime)s] %(message)s",
+    datefmt="%H:%M:%S",
+)
+LOGGER = logging.getLogger(__name__)
+
 # Parse arguments
-def parse_args():
+def parse_args() -> tuple[bool, Path]:
    clear = False
-    output_dir = None
+    output_dir: str | None = None
    
    args = sys.argv[1:]
    for arg in args:
@@ -71,15 +79,15 @@ _first_connection = True

 def log(msg: str):
    """Log with timestamp."""
-    ts = datetime.now().strftime("%H:%M:%S")
-    print(f"[{ts}] {msg}", flush=True)
+    LOGGER.info(msg)


 def append_event(event: dict):
    """Append event to JSONL file with timestamps."""
-    event["ts"] = datetime.now(timezone.utc).isoformat()
-    event["unix_ts"] = datetime.now(timezone.utc).timestamp()
-    with open(EVENTS_FILE, "a") as f:
+    now = datetime.now(timezone.utc)
+    event["ts"] = now.isoformat()
+    event["unix_ts"] = now.timestamp()
+    with EVENTS_FILE.open("a", encoding="utf-8") as f:
        f.write(json.dumps(event) + "\n")


@@ -93,8 +101,8 @@ def cleanup_pid():
    """Remove PID file on exit."""
    try:
        PID_FILE.unlink(missing_ok=True)
-    except Exception:
-        pass
+    except OSError as exc:
+        LOGGER.debug("Failed to remove PID file %s: %s", PID_FILE, exc)


 async def listen_with_retry():