docs: tighten videodb listener guidance

2026-06-14 20:21:23 +08:00 · 2026-03-10 21:22:35 -07:00
parent db2bf16427
commit 70449a1cd7
4 changed files with 46 additions and 14 deletions
@@ -165,7 +165,7 @@ Combine scene extraction with text generation:
 from videodb import SceneExtractionType

 # First index scenes
-video.index_scenes(
+scenes = video.index_scenes(
    extraction_type=SceneExtractionType.time_based,
    extraction_config={"time": 10},
    prompt="Describe the visual content in this scene.",
@@ -173,11 +173,21 @@ video.index_scenes(

 # Get transcript for spoken context
 transcript_text = video.get_transcript_text()
+scene_descriptions = []
+for scene in scenes:
+    if isinstance(scene, dict):
+        description = scene.get("description") or scene.get("summary")
+    else:
+        description = getattr(scene, "description", None) or getattr(scene, "summary", None)
+    scene_descriptions.append(description or str(scene))
+
+scenes_text = "\n".join(scene_descriptions)

 # Analyze with collection LLM
 result = coll.generate_text(
    prompt=(
        f"Given this video transcript:\n{transcript_text}\n\n"
+        f"And these visual scene descriptions:\n{scenes_text}\n\n"
        "Based on the spoken and visual content, describe the main topics covered."
    ),
    model_name="pro",