|
|
import chromadb |
|
|
import json |
|
|
|
|
|
COLLECTION_NAME = 'video_analysis_data' |
|
|
|
|
|
|
|
|
def generate_text_summary(record): |
|
|
""" |
|
|
Converts a structured detection record into a natural language text description |
|
|
by summarizing all detected objects clearly. |
|
|
""" |
|
|
video_id = record['video_id'] |
|
|
timestamp = record['timestamp_sec'] |
|
|
detections = record['detections'] |
|
|
|
|
|
if not detections: |
|
|
return f"Analysis of video '{video_id}' at {timestamp} seconds: No objects were detected in this frame." |
|
|
|
|
|
|
|
|
object_counts = {} |
|
|
for det in detections: |
|
|
label = det['label'] |
|
|
object_counts[label] = object_counts.get(label, 0) + 1 |
|
|
|
|
|
summary_parts = [] |
|
|
|
|
|
if object_counts: |
|
|
|
|
|
object_descriptions = [ |
|
|
f"{count} instances of '{label}'" |
|
|
for label, count in object_counts.items() |
|
|
] |
|
|
summary_parts.append("Detected objects include: " + ", ".join(object_descriptions) + ".") |
|
|
|
|
|
summary_doc = f"Analysis of video '{video_id}' at {timestamp} seconds: {' '.join(summary_parts)}" |
|
|
return summary_doc |
|
|
|
|
|
|
|
|
def index_analysis_data(json_file='raw_analysis.json', collection_name='video_analysis_data'): |
|
|
""" |
|
|
Loads raw analysis, generates documents, and indexes them in ChromaDB. |
|
|
""" |
|
|
try: |
|
|
with open(json_file, 'r') as f: |
|
|
raw_data = json.load(f) |
|
|
except FileNotFoundError: |
|
|
print(f"Error: {json_file} not found. Run 'video_analyzer.py' first.") |
|
|
return |
|
|
|
|
|
|
|
|
client = chromadb.PersistentClient(path="./chroma_db") |
|
|
|
|
|
collection = client.get_or_create_collection(name=collection_name) |
|
|
|
|
|
documents = [] |
|
|
metadatas = [] |
|
|
ids = [] |
|
|
|
|
|
print(f"Indexing {len(raw_data)} analysis records...") |
|
|
|
|
|
for i, record in enumerate(raw_data): |
|
|
doc_text = generate_text_summary(record) |
|
|
if doc_text: |
|
|
documents.append(doc_text) |
|
|
|
|
|
metadatas.append({ |
|
|
'video_id': record['video_id'], |
|
|
'timestamp_sec': record['timestamp_sec'], |
|
|
'frame_id': record['frame_id'] |
|
|
}) |
|
|
ids.append(f"doc_{i}") |
|
|
|
|
|
|
|
|
if documents: |
|
|
collection.add( |
|
|
documents=documents, |
|
|
metadatas=metadatas, |
|
|
ids=ids |
|
|
) |
|
|
print(f"Successfully indexed {len(documents)} documents into ChromaDB collection '{collection_name}'.") |
|
|
else: |
|
|
print("No valid documents generated for indexing.") |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
index_analysis_data() |