File size: 2,873 Bytes
fea499e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python3
"""
🎬 Video Intelligence Platform
Akinator-style video search with RAG, boolean queries, and tree-based refinement.

Usage:
    python app.py                          # Launch Gradio UI
    python app.py --index video.mp4        # Index a video from CLI
    python app.py --search "red car"       # Search from CLI
"""
import argparse
import os
import sys

# Add parent to path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from video_intelligence.config import Config
from video_intelligence.app import main as launch_ui


def cli_index(video_path: str, config: Config):
    """Index a video from command line."""
    from video_intelligence.pipeline import IndexingPipeline
    pipeline = IndexingPipeline(config)
    stats = pipeline.index_video(video_path)
    print(f"\n📊 Stats: {stats}")


def cli_search(query: str, config: Config):
    """Search from command line."""
    from video_intelligence.pipeline import IndexingPipeline
    from video_intelligence.query_engine import QueryEngine
    from video_intelligence.visual_encoders import SigLIPEncoder
    from video_intelligence.gemini_client import GeminiClient
    from video_intelligence.index_store import VideoIndex

    # Load existing index
    index = VideoIndex(db_path=config.db_path)
    index.load_faiss(config.faiss_visual_path, config.faiss_caption_path)

    gemini = GeminiClient(api_key=config.gemini_api_key)
    siglip = SigLIPEncoder(model_name=config.siglip_model, device=config.device)

    engine = QueryEngine(index=index, gemini=gemini, siglip=siglip)
    results = engine.search(query)

    print(f"\n🔍 Results for: '{query}'")
    print(f"{'=' * 60}")
    for i, r in enumerate(results, 1):
        print(f"  {i}. [{r.time_str}] score={r.score:.3f} | {r.match_source}")
        if r.caption:
            print(f"     Caption: {r.caption[:120]}...")
        if r.detections:
            print(f"     Objects: {', '.join(r.detections)}")
        print()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Video Intelligence Platform")
    parser.add_argument("--index", type=str, help="Path to video file to index")
    parser.add_argument("--search", type=str, help="Search query")
    parser.add_argument("--api-key", type=str, help="Gemini API key (or set GEMINI_API_KEY env)")
    parser.add_argument("--device", type=str, default="cpu", help="Device (cpu/cuda)")
    args = parser.parse_args()

    if args.api_key:
        os.environ["GEMINI_API_KEY"] = args.api_key

    config = Config(
        gemini_api_key=os.environ.get("GEMINI_API_KEY", ""),
        device=args.device,
    )

    if args.index:
        config.validate()
        cli_index(args.index, config)
    elif args.search:
        config.validate()
        cli_search(args.search, config)
    else:
        launch_ui()