docs: tighten videodb listener guidance

This commit is contained in:
Affaan Mustafa
2026-03-10 21:22:35 -07:00
parent db2bf16427
commit 70449a1cd7
4 changed files with 46 additions and 14 deletions

View File

@@ -165,7 +165,7 @@ Combine scene extraction with text generation:
from videodb import SceneExtractionType
# First index scenes
video.index_scenes(
scenes = video.index_scenes(
extraction_type=SceneExtractionType.time_based,
extraction_config={"time": 10},
prompt="Describe the visual content in this scene.",
@@ -173,11 +173,21 @@ video.index_scenes(
# Get transcript for spoken context
transcript_text = video.get_transcript_text()
scene_descriptions = []
for scene in scenes:
if isinstance(scene, dict):
description = scene.get("description") or scene.get("summary")
else:
description = getattr(scene, "description", None) or getattr(scene, "summary", None)
scene_descriptions.append(description or str(scene))
scenes_text = "\n".join(scene_descriptions)
# Analyze with collection LLM
result = coll.generate_text(
prompt=(
f"Given this video transcript:\n{transcript_text}\n\n"
f"And these visual scene descriptions:\n{scenes_text}\n\n"
"Based on the spoken and visual content, describe the main topics covered."
),
model_name="pro",