Spaces:

kishanAmaliya
/

database

Running

App Files Files Community

kishanAmaliya commited on about 6 hours ago

Commit

4dff442

0 Parent(s):

Deployment: Complete optimized multi-modal search engine

Browse files

Files changed (39) hide show

.DS_Store +0 -0
.cph/.B_Simons_and_Cakes_for_Success.cpp_86f1157f283c801d24529d81b96f40c8.prob +1 -0
Dockerfile +21 -0
README.md +1 -0
app/.DS_Store +0 -0
app/LLD/__init__.py +0 -0
app/LLD/__pycache__/__init__.cpython-313.pyc +0 -0
app/LLD/__pycache__/ffmpeg_strategy.cpython-313.pyc +0 -0
app/LLD/__pycache__/interfaces.cpython-313.pyc +0 -0
app/LLD/__pycache__/qdrant_strategy.cpython-313.pyc +0 -0
app/LLD/ffmpeg_strategy.py +55 -0
app/LLD/interfaces.py +33 -0
app/LLD/qdrant_strategy.py +103 -0
app/__init__.py +0 -0
app/__pycache__/__init__.cpython-313.pyc +0 -0
app/__pycache__/main.cpython-313.pyc +0 -0
app/api/__init__.py +0 -0
app/api/__pycache__/__init__.cpython-313.pyc +0 -0
app/api/v1/__init__.py +0 -0
app/api/v1/__pycache__/__init__.cpython-313.pyc +0 -0
app/api/v1/__pycache__/videos.cpython-313.pyc +0 -0
app/api/v1/videos.py +215 -0
app/core/__init__.py +0 -0
app/core/__pycache__/__init__.cpython-313.pyc +0 -0
app/core/__pycache__/config.cpython-313.pyc +0 -0
app/core/config.py +17 -0
app/main.py +38 -0
docker-compose.yml +20 -0
frontend/index.html +429 -0
requirements.txt +12 -0
start.sh +6 -0
workers/__init__.py +0 -0
workers/__pycache__/__init__.cpython-313.pyc +0 -0
workers/__pycache__/celery_app.cpython-313.pyc +0 -0
workers/__pycache__/ml_pipeline.cpython-313.pyc +0 -0
workers/__pycache__/tasks.cpython-313.pyc +0 -0
workers/celery_app.py +19 -0
workers/ml_pipeline.py +63 -0
workers/tasks.py +80 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.cph/.B_Simons_and_Cakes_for_Success.cpp_86f1157f283c801d24529d81b96f40c8.prob ADDED Viewed

	@@ -0,0 +1 @@

+ {"name":"B. Simons and Cakes for Success","group":"Codeforces - Codeforces Round 1083 (Div. 2)","url":"https://codeforces.com/contest/2205/problem/B","interactive":false,"memoryLimit":256,"timeLimit":1000,"tests":[{"id":1780024223141,"input":"4\n8\n12\n369\n55635800\n","output":"2\n6\n123\n2090\n"}],"testType":"single","input":{"type":"stdin"},"output":{"type":"stdout"},"languages":{"java":{"mainClass":"Main","taskClass":"BSimonsAndCakesForSuccess"}},"batch":{"id":"58721974-54fc-4479-b9d8-ababbbac7016","size":1},"srcPath":"/Users/kishanamaliya/Documents/videostream_ai/B_Simons_and_Cakes_for_Success.cpp"}

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+FROM python:3.11-slim
+# Install system media dependencies for video analysis and frame extraction
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+# Grant execution permissions to our orchestration script
+RUN chmod +x start.sh
+# Run the unified stack
+CMD ["./start.sh"]

README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ # videostream-AI

app/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

app/LLD/__init__.py ADDED Viewed

File without changes

app/LLD/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (162 Bytes). View file

app/LLD/__pycache__/ffmpeg_strategy.cpython-313.pyc ADDED Viewed

Binary file (2.88 kB). View file

app/LLD/__pycache__/interfaces.cpython-313.pyc ADDED Viewed

Binary file (2.57 kB). View file

app/LLD/__pycache__/qdrant_strategy.cpython-313.pyc ADDED Viewed

Binary file (5.09 kB). View file

app/LLD/ffmpeg_strategy.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import ffmpeg
+from typing import List, Dict, Any
+from app.LLD.interfaces import VideoProcessingStrategy
+class LocalFFmpegStrategy(VideoProcessingStrategy):
+    """
+    Concrete system execution using native FFmpeg pipelines.
+    """
+    def transcode_to_hls(self, input_path: str, output_dir: str) -> str:
+        os.makedirs(output_dir, exist_ok=True)
+        output_playlist = os.path.join(output_dir, "playlist.m3u8")
+        # Executes scalable stream segmentation into 4-second chunks
+        (
+            ffmpeg
+            .input(input_path)
+            .output(output_playlist,
+                    format='hls',
+                    hls_time=4,
+                    hls_playlist_type='vod',
+                    hls_segment_filename=os.path.join(output_dir, "file%03d.ts"))
+            .overwrite_output()
+            .run(quiet=True)
+        )
+        return output_playlist
+    def extract_keyframes(self, input_path: str, output_dir: str, interval_seconds: int) -> List[Dict[str, Any]]:
+        frames_dir = os.path.join(output_dir, "frames")
+        os.makedirs(frames_dir, exist_ok=True)
+        output_pattern = os.path.join(frames_dir, "frame_%04d.jpg")
+        # Forces extraction of 1 frame per specified interval duration safely
+        (
+            ffmpeg
+            .input(input_path)
+            .filter('fps', fps=f"1/{interval_seconds}")
+            .output(output_pattern, qscale=2)
+            .overwrite_output()
+            .run(quiet=True)
+        )
+        # Map out extracted physical assets into clear metadata dictionaries
+        extracted_metadata = []
+        generated_files = sorted(os.listdir(frames_dir))
+        for idx, filename in enumerate(generated_files):
+            timestamp = idx * interval_seconds
+            extracted_metadata.append({
+                "timestamp_seconds": timestamp,
+                "file_path": os.path.join(frames_dir, filename)
+            })
+        return extracted_metadata

app/LLD/interfaces.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from abc import ABC, abstractmethod
+from typing import List, Dict, Any
+class VideoProcessingStrategy(ABC):
+    """
+    Abstract Strategy for processing raw videos (Transcoding + Frame Extraction).
+    Enforces clean LLD decoupled from specific system binaries.
+    """
+    @abstractmethod
+    def transcode_to_hls(self, input_path: str, output_dir: str) -> str:
+        """Converts raw video (.mp4) into an HLS playlist (.m3u8)"""
+        pass
+    @abstractmethod
+    def extract_keyframes(self, input_path: str, output_dir: str, interval_seconds: int) -> List[Dict[str, Any]]:
+        """Extracts periodic frame assets along with their exact timestamps"""
+        pass
+class VectorStoreInterface(ABC):
+    """
+    Abstract Storage Interface for handling high-dimensional semantic search indexing.
+    Decouples application logic from specific Vector DB clients (Qdrant/Milvus).
+    """
+    @abstractmethod
+    def upsert_embeddings(self, video_id: str, embeddings: List[List[float]], metadata: List[Dict[str, Any]]) -> bool:
+        """Pushes batch frame vectors down into the vector space"""
+        pass
+    @abstractmethod
+    def search_similarity(self, query_vector: List[float], top_k: int) -> List[Dict[str, Any]]:
+        """Executes a high-dimensional nearest-neighbor lookup matching the query text vector"""
+        pass

app/LLD/qdrant_strategy.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import uuid
+from typing import List, Dict, Any
+from qdrant_client import QdrantClient
+from qdrant_client.models import Distance, VectorParams, PointStruct
+from app.LLD.interfaces import VectorStoreInterface
+from app.core.config import settings
+class QdrantVectorStoreStrategy(VectorStoreInterface):
+    """
+    Concrete implementation of VectorStoreInterface leveraging Qdrant DB.
+    Encapsulates schema enforcement, item upsert workflows, and multi-modal calculations.
+    """
+    def __init__(self) -> None:
+        # Establish client connection targeting the docker-compose service network
+        self.client = QdrantClient(host=settings.QDRANT_HOST, port=settings.QDRANT_PORT)
+        self.collection_name = "video_frames"
+        self._ensure_collection_exists()
+    def _ensure_collection_exists(self) -> None:
+        """
+        Idempotent schema controller checking for collection persistence
+        and initializing vector parameters if absent.
+        """
+        try:
+            if not self.client.collection_exists(collection_name=self.collection_name):
+                self.client.create_collection(
+                    collection_name=self.collection_name,
+                    vectors_config=VectorParams(
+                        size=settings.VECTOR_DIMENSION,
+                        distance=Distance.COSINE
+                    )
+                )
+        except Exception as e:
+            # Crucial LLD practice: fail fast during object construction if state is compromised
+            raise RuntimeError(f"Failed initializing Qdrant collection layer: {str(e)}")
+    def upsert_embeddings(self, video_id: str, embeddings: List[List[float]], metadata: List[Dict[str, Any]]) -> bool:
+        """
+        Transforms raw embeddings into structurally validated Point payloads
+        and updates the database via high-throughput vector chunk batching.
+        """
+        try:
+            points = []
+            for idx, (vector, meta) in enumerate(zip(embeddings, metadata)):
+                # Inject parent relational identifier directly into metadata payload
+                payload = {
+                    "video_id": video_id,
+                    "timestamp_seconds": meta.get("timestamp_seconds"),
+                    "file_path": meta.get("file_path")
+                }
+                # Use deterministic UUID generation based on the namespace
+                # to prevent document duplication during re-processing jobs
+                point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{video_id}_{idx}"))
+                points.append(
+                    PointStruct(
+                        id=point_id,
+                        vector=vector,
+                        payload=payload
+                    )
+                )
+            # Execute transactional batch upsert operation
+            self.client.upsert(
+                collection_name=self.collection_name,
+                wait=True,
+                points=points
+            )
+            return True
+        except Exception as e:
+            # Structured application reporting block
+            print(f"[ERROR] Vector storage execution fault for Video {video_id}: {str(e)}")
+            return False
+    def search_similarity(self, query_vector: List[float], top_k: int) -> List[Dict[str, Any]]:
+        """
+        Performs an approximate nearest neighbor (ANN) search inside the multi-dimensional
+        vector space matching against text embeddings.
+        """
+        try:
+            search_results = self.client.search(
+                collection_name=self.collection_name,
+                query_vector=query_vector,
+                limit=top_k
+            )
+            # Translate raw database structures into standard clean payloads
+            formatted_results = []
+            for hit in search_results:
+                formatted_results.append({
+                    "score": hit.score,
+                    "video_id": hit.payload.get("video_id"),
+                    "timestamp_seconds": hit.payload.get("timestamp_seconds"),
+                    "file_path": hit.payload.get("file_path")
+                })
+            return formatted_results
+        except Exception as e:
+            print(f"[ERROR] Vector distance analysis failed: {str(e)}")
+            return []

app/__init__.py ADDED Viewed

File without changes

app/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (158 Bytes). View file

app/__pycache__/main.cpython-313.pyc ADDED Viewed

Binary file (1.86 kB). View file

app/api/__init__.py ADDED Viewed

File without changes

app/api/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (162 Bytes). View file

app/api/v1/__init__.py ADDED Viewed

File without changes

app/api/v1/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (165 Bytes). View file

app/api/v1/__pycache__/videos.cpython-313.pyc ADDED Viewed

Binary file (10.4 kB). View file

app/api/v1/videos.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import os
+import json
+import shutil
+import uuid
+import traceback
+import redis
+from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Query
+from typing import List, Dict, Any
+from celery import Celery
+from qdrant_client.http import exceptions as qdrant_exceptions
+from app.core.config import settings
+from app.LLD.qdrant_strategy import QdrantVectorStoreStrategy
+router = APIRouter()
+# Initialize Permanent Cloud Redis Persistence Client
+redis_client = redis.Redis.from_url(settings.REDIS_URL, decode_responses=True)
+# Lazy-loaded Celery Client Instance for task injection
+celery_client = Celery("video_tasks", broker=settings.REDIS_URL, backend=settings.REDIS_URL)
+vector_store = QdrantVectorStoreStrategy()
+# --- HELPER STORAGE UTILITIES ---
+def fetch_all_cloud_metadata() -> List[Dict[str, Any]]:
+    """Retrieves all permanently saved metadata objects from Upstash Redis."""
+    try:
+        keys = redis_client.keys("video:metadata:*")
+        if not keys:
+            return []
+        values = redis_client.mget(keys)
+        return [json.loads(v) for v in values if v]
+    except Exception as e:
+        print(f"[REDIS STORAGE ERROR] Fetch failed: {str(e)}")
+        return []
+# --- 1. STATIC EXPLICIT GET/POST ROUTES ---
+@router.get("/", response_model=List[Dict[str, Any]])
+async def get_landing_page_feed():
+    """Returns all permanently registered video cards from the Redis cloud database layer."""
+    return fetch_all_cloud_metadata()
+@router.post("/upload", status_code=202)
+async def upload_video(
+    file: UploadFile = File(...),
+    title: str = Form(...),
+    description: str = Form(""),
+    tags: str = Form("")
+):
+    """Saves raw files locally, indexes vectors, and saves state to persistent cloud storage."""
+    if not file.filename.endswith(('.mp4', '.mkv', '.avi')):
+        raise HTTPException(status_code=400, detail="Invalid video format codec structure.")
+    video_id = str(uuid.uuid4())
+    file_extension = os.path.splitext(file.filename)[1]
+    saved_filename = f"{video_id}{file_extension}"
+    raw_file_path = os.path.join(settings.UPLOAD_DIR, saved_filename)
+    try:
+        with open(raw_file_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"File system failure during ingestion: {str(e)}")
+    video_metadata = {
+        "id": video_id,
+        "title": title,
+        "description": description,
+        "tags": [tag.strip() for tag in tags.split(",") if tag.strip()],
+        "hls_playlist_url": f"/shared_storage/processed/{video_id}/playlist.m3u8",
+        "status": "processing"
+    }
+    # Commit directly to your permanent Upstash Redis cluster
+    try:
+        redis_client.set(f"video:metadata:{video_id}", json.dumps(video_metadata))
+    except Exception as e:
+        print(f"[REDIS WRITE ERROR] Critical persistence failure: {str(e)}")
+    celery_client.send_task(
+        "workers.tasks.process_video_pipeline",
+        args=[video_id, raw_file_path]
+    )
+    return {
+        "message": "Video ingestion accepted. Saved to permanent cloud records.",
+        "video_id": video_id,
+        "status": "processing"
+    }
+@router.get("/search")
+async def execute_multimodal_search(query: str = Query(...), top_k: int = 20):
+    """Granular inside-video highlight matching using modern query_points."""
+    if not query.strip():
+        raise HTTPException(status_code=400, detail="Search text cannot be blank.")
+    try:
+        task = celery_client.send_task("workers.tasks.generate_text_embedding", args=[query])
+        text_vector = task.get(timeout=10)
+        if not text_vector:
+            raise HTTPException(status_code=502, detail="Neural embedding task failed.")
+        response = vector_store.client.query_points(
+            collection_name=vector_store.collection_name,
+            query=text_vector,
+            limit=top_k
+        )
+        search_results = response.points
+        formatted_results = [
+            {
+                "video_id": hit.payload.get("video_id"),
+                "timestamp_seconds": hit.payload.get("timestamp_seconds"),
+                "score": hit.score
+            }
+            for hit in search_results
+        ]
+        return {"results": formatted_results}
+    except qdrant_exceptions.UnexpectedResponse as q_err:
+        print(f"[QDRANT DATABASE WARN] Collection not initialized yet: {str(q_err)}")
+        return {"results": []}
+    except Exception as e:
+        print("\n💥!!! CRITICAL SEARCH EXCEPTION CAUGHT !!!💥")
+        traceback.print_exc()
+        print("💥!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!💥\n")
+        raise HTTPException(status_code=500, detail=f"Internal vector processing fault: {str(e)}")
+@router.get("/global-search", response_model=List[Dict[str, Any]])
+async def execute_global_platform_search(query: str = Query(...), top_k: int = 20):
+    """Global hybrid search engine reading cross-references safely from cloud Redis clusters."""
+    if not query.strip():
+        raise HTTPException(status_code=400, detail="Search query cannot be blank.")
+    query_lower = query.lower()
+    discovered_video_map = {}
+    # 1. Fetch live metadata array from Redis cache
+    all_metadata = fetch_all_cloud_metadata()
+    # 2. Relational Text Scanning
+    for video in all_metadata:
+        video_id = video.get("id")
+        in_title = query_lower in video.get("title", "").lower()
+        in_desc = query_lower in video.get("description", "").lower()
+        in_tags = any(query_lower in tag.lower() for tag in video.get("tags", []))
+        if (in_title or in_desc or in_tags) and video_id:
+            discovered_video_map[video_id] = video
+    # 3. Multimodal Neural Frame Scanning
+    try:
+        task = celery_client.send_task("workers.tasks.generate_text_embedding", args=[query])
+        text_vector = task.get(timeout=5)
+        if text_vector:
+            response = vector_store.client.query_points(
+                collection_name=vector_store.collection_name,
+                query=text_vector,
+                limit=top_k
+            )
+            # Map out database item objects using the Redis dictionary cache
+            metadata_lookup = {v.get("id"): v for v in all_metadata if v.get("id")}
+            for hit in response.points:
+                v_id = hit.payload.get("video_id")
+                if v_id and v_id not in discovered_video_map:
+                    if v_id in metadata_lookup:
+                        discovered_video_map[v_id] = metadata_lookup[v_id]
+    except Exception as e:
+        print(f"[GLOBAL SEARCH WARN] Neural fallback active: {str(e)}")
+    return list(discovered_video_map.values())
+# --- 2. DYNAMIC WILDCARD ROUTES (BOTTOM ZONE) ---
+@router.delete("/{video_id}", status_code=200)
+async def delete_video(video_id: str):
+    """Removes files, deletes vectors, and drops records from the cloud Redis instance."""
+    try:
+        redis_client.delete(f"video:metadata:{video_id}")
+    except Exception as e:
+        print(f"[REDIS DELETE ERROR] Record cleanup failed: {str(e)}")
+    processed_dir = os.path.join(settings.OUTPUT_DIR, video_id)
+    if os.path.exists(processed_dir):
+        shutil.rmtree(processed_dir)
+    try:
+        vector_store.client.delete(
+            collection_name=vector_store.collection_name,
+            points_selector=vector_store.client.models.Filter(
+                must=[
+                    vector_store.client.models.FieldCondition(
+                        key="video_id",
+                        match=vector_store.client.models.MatchValue(value=video_id)
+                    )
+                ]
+            )
+        )
+    except Exception as e:
+        print(f"[WARNING] Vector cascade clearance mismatch: {str(e)}")
+    return {"message": f"Successfully purged assets for ID: {video_id}."}

app/core/__init__.py ADDED Viewed

File without changes

app/core/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (163 Bytes). View file

app/core/__pycache__/config.cpython-313.pyc ADDED Viewed

Binary file (1.25 kB). View file

app/core/config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from pydantic_settings import BaseSettings
+class Settings(BaseSettings):
+    PROJECT_NAME: str = "Upgraded Video Search Platform (SigLIP 2 SO400M)"
+    REDIS_URL: str = "redis://localhost:6379/0"
+    QDRANT_HOST: str = "localhost"
+    QDRANT_PORT: int = 6333
+    SIGLIP_MODEL_ID: str = "google/siglip2-so400m-patch16-256"
+    VECTOR_DIMENSION: int = 1152
+    UPLOAD_DIR: str = "./shared_storage/uploads"
+    OUTPUT_DIR: str = "./shared_storage/processed"
+    class Config:
+        env_file = ".env"
+settings = Settings()

app/main.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles  # <-- 1. Add this critical import
+from app.api.v1.videos import router as video_router
+from app.core.config import settings
+app = FastAPI(
+    title=settings.PROJECT_NAME,
+    version="1.0.0",
+    description="LLD-compliant production-ready Video Search Platform"
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# <-- 2. Add this mount command right here!
+# This tells FastAPI to serve anything inside the physical 'shared_storage' folder over the network.
+app.mount("/shared_storage", StaticFiles(directory="shared_storage"), name="shared_storage")
+@app.on_event("startup")
+def configure_storage_directories():
+    os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
+    os.makedirs(settings.OUTPUT_DIR, exist_ok=True)
+    print("[BOOTSTRAP] System shared storage directories verified successfully.")
+app.include_router(video_router, prefix="/api/v1/videos", tags=["Videos"])
+@app.get("/")
+async def health_check():
+    return {"status": "healthy", "service": settings.PROJECT_NAME}

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+version: '3.8'
+services:
+  redis:
+    image: redis:7-alpine
+    container_name: youtube_clone_redis
+    ports:
+      - "6379:6379"
+  qdrant:
+    image: qdrant/qdrant:latest
+    container_name: youtube_clone_vector_db
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+    volumes:
+      - qdrant_storage:/qdrant/storage
+volumes:
+  qdrant_storage:

frontend/index.html ADDED Viewed

	@@ -0,0 +1,429 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Multimodal Video Semantic Analytics Platform</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700;800&display=swap" rel="stylesheet">
+    <link href="https://cdn.jsdelivr.net/npm/video.js@8.10.0/dist/video-js.min.css" rel="stylesheet" />
+    <style>
+        body {
+            font-family: 'Plus Jakarta Sans', sans-serif;
+        }
+        .instagram-gradient-text {
+            background: linear-gradient(45deg, #3b82f6, #8b5cf6, #ec4899);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+        }
+        .instagram-gradient-bg {
+            background: linear-gradient(135deg, #2563eb, #7c3aed, #db2777);
+        }
+        .instagram-gradient-border {
+            position: relative;
+        }
+        .instagram-gradient-border::before {
+            content: "";
+            position: absolute;
+            inset: 0;
+            border-radius: 1rem;
+            padding: 1.5px;
+            background: linear-gradient(135deg, #3b82f6, #8b5cf6, #ec4899);
+            -webkit-mask: linear-gradient(#fff 0 0) content-box, linear-gradient(#fff 0 0);
+            -webkit-mask-composite: xor;
+            mask-composite: exclude;
+            pointer-events: none;
+        }
+        .custom-scrollbar::-webkit-scrollbar {
+            width: 5px;
+        }
+        .custom-scrollbar::-webkit-scrollbar-track {
+            background: #09090b;
+        }
+        .custom-scrollbar::-webkit-scrollbar-thumb {
+            background: #27272a;
+            border-radius: 999px;
+        }
+        .custom-scrollbar::-webkit-scrollbar-thumb:hover {
+            background: #7c3aed;
+        }
+    </style>
+</head>
+<body class="bg-[#030303] text-zinc-100 min-h-screen selection:bg-purple-500/30 selection:text-purple-200">
+    <nav class="sticky top-0 bg-black/60 backdrop-blur-xl border-b border-zinc-900 z-50 px-8 py-4 flex items-center justify-between">
+        <div class="flex items-center space-x-3 cursor-pointer group" onclick="loadHomepageFeed()">
+            <div class="instagram-gradient-bg p-2.5 rounded-xl shadow-[0_0_20px_rgba(124,58,237,0.4)] group-hover:scale-105 transition duration-300">
+                <svg class="w-5 h-5 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z"/>
+                    <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/>
+                </svg>
+            </div>
+            <span class="font-extrabold text-2xl tracking-tight text-white group-hover:opacity-90 transition">
+                AI<span class="instagram-gradient-text">Stream</span>
+            </span>
+        </div>
+        <div class="w-1/2 flex max-w-xl bg-zinc-900/50 rounded-2xl border border-zinc-800/80 overflow-hidden focus-within:border-purple-500/50 shadow-inner transition duration-300 backdrop-blur-md">
+            <input type="text" id="globalSearchInput" placeholder="Search library using title, tags, or visual concepts..."
+                   class="w-full bg-transparent px-5 py-3 text-sm text-zinc-100 outline-none placeholder-zinc-500">
+            <button id="globalSearchButton" onclick="executeGlobalPlatformSearch()" class="instagram-gradient-bg px-6 text-sm font-semibold transition text-white hover:opacity-90 active:scale-95 duration-200">
+                Search Feed
+            </button>
+        </div>
+        <div class="flex items-center space-x-4">
+            <button onclick="toggleUploadStudio()" class="bg-zinc-900 hover:bg-zinc-800 border border-zinc-800 hover:border-zinc-700 px-4 py-2 rounded-xl text-xs font-bold tracking-wide transition flex items-center space-x-2">
+                <span class="text-base text-purple-400 font-normal">+</span>
+                <span>Creator Studio</span>
+            </button>
+        </div>
+    </nav>
+    <main class="p-8 max-w-7xl mx-auto space-y-12">
+        <div id="studioSection" class="hidden bg-zinc-950 rounded-2xl border border-zinc-900 shadow-2xl p-6 transition duration-300 animate-slideDown max-w-3xl mx-auto">
+            <div class="flex items-center justify-between border-b border-zinc-900 pb-4 mb-6">
+                <div>
+                    <h3 class="text-lg font-bold tracking-tight text-zinc-100">AI Ingestion Pipeline</h3>
+                    <p class="text-xs text-zinc-500 mt-0.5">Upload raw media streams to run real-time automated visual frame vector parsing.</p>
+                </div>
+                <button onclick="toggleUploadStudio()" class="text-zinc-500 hover:text-zinc-300 text-sm font-bold px-2">✕</button>
+            </div>
+            <form id="uploadForm" onsubmit="executeVideoUpload(event)" class="space-y-4">
+                <div class="grid grid-cols-2 gap-4">
+                    <div class="space-y-1.5">
+                        <label class="text-[11px] font-bold tracking-wider text-zinc-400 uppercase">Video Title *</label>
+                        <input type="text" id="uploadTitle" required placeholder="e.g., Game of Thrones Scene" class="w-full bg-zinc-900/60 border border-zinc-800 rounded-xl px-4 py-2.5 text-xs text-zinc-100 outline-none focus:border-purple-500/50 transition">
+                    </div>
+                    <div class="space-y-1.5">
+                        <label class="text-[11px] font-bold tracking-wider text-zinc-400 uppercase">Tags (Comma Separated)</label>
+                        <input type="text" id="uploadTags" placeholder="e.g., action, fantasy, dragon" class="w-full bg-zinc-900/60 border border-zinc-800 rounded-xl px-4 py-2.5 text-xs text-zinc-100 outline-none focus:border-purple-500/50 transition">
+                    </div>
+                </div>
+                <div class="space-y-1.5">
+                    <label class="text-[11px] font-bold tracking-wider text-zinc-400 uppercase">Description</label>
+                    <textarea id="uploadDescription" rows="2" placeholder="Provide contextual metadata notes here..." class="w-full bg-zinc-900/60 border border-zinc-800 rounded-xl px-4 py-2.5 text-xs text-zinc-100 outline-none focus:border-purple-500/50 transition resize-none"></textarea>
+                </div>
+                <div class="space-y-1.5">
+                    <label class="text-[11px] font-bold tracking-wider text-zinc-400 uppercase">Select Media Track *</label>
+                    <input type="file" id="uploadFile" required accept=".mp4,.mkv,.avi" class="w-full bg-zinc-900/30 border border-dashed border-zinc-800 rounded-xl px-4 py-4 text-xs text-zinc-400 file:mr-4 file:py-1.5 file:px-3 file:rounded-lg file:border-0 file:text-xs file:font-bold file:bg-purple-600 file:text-white hover:file:bg-purple-500 file:cursor-pointer cursor-pointer transition">
+                </div>
+                <button type="submit" id="uploadSubmitButton" class="w-full instagram-gradient-bg text-white py-3 rounded-xl text-xs font-bold tracking-wide shadow-lg hover:opacity-90 active:scale-[0.99] transition duration-150">
+                    Deploy to Processing Cluster
+                </button>
+            </form>
+        </div>
+        <div id="theaterSection" class="hidden bg-zinc-950 rounded-2xl border border-zinc-900 shadow-[0_24px_70px_rgba(0,0,0,0.7)] p-6 transition duration-500">
+            <div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
+                <div class="lg:col-span-2 space-y-4">
+                    <div class="instagram-gradient-border p-[1px] rounded-2xl overflow-hidden bg-zinc-900 shadow-[0_0_40px_rgba(59,130,246,0.15)]">
+                        <video id="mainVideoPlayer" class="video-js vjs-default-skin vjs-big-play-centered w-full aspect-video rounded-2xl overflow-hidden" controls preload="auto"></video>
+                    </div>
+                    <h1 id="playerTitle" class="text-2xl font-bold tracking-tight text-zinc-100 px-1">Video Analysis Engine</h1>
+                </div>
+                <div class="bg-zinc-900/40 rounded-2xl p-5 border border-zinc-900 flex flex-col h-[460px] backdrop-blur-md">
+                    <div class="mb-4">
+                        <h3 class="font-bold text-xs text-zinc-400 tracking-widest uppercase mb-3">Deep Timeline Vector Finder</h3>
+                        <div class="flex bg-zinc-950/80 border border-zinc-800 rounded-xl overflow-hidden focus-within:border-pink-500/50 transition">
+                            <input type="text" id="insideSearchInput" placeholder="Find precise visual moment..." class="w-full bg-transparent px-4 py-2.5 text-xs text-zinc-200 outline-none placeholder-zinc-600">
+                            <button id="insideSearchButton" onclick="executeInsideVideoSearch()" class="bg-zinc-800 hover:bg-zinc-700 px-4 text-xs font-bold text-zinc-300 transition tracking-wide">Find</button>
+                        </div>
+                    </div>
+                    <div id="timelineMatches" class="space-y-2.5 overflow-y-auto flex-1 pr-1 custom-scrollbar">
+                        <p class="text-xs text-zinc-500 text-center py-12 px-4 leading-relaxed">Type an explicit object or concept above to scan frame coordinates inside this clip.</p>
+                    </div>
+                </div>
+            </div>
+        </div>
+        <div id="feedSection" class="space-y-6">
+            <h2 id="feedHeading" class="text-xl font-extrabold tracking-tight flex items-center space-x-2 text-zinc-200">
+                <span>Recommended Framework Feeds</span>
+            </h2>
+            <div id="videoGrid" class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-8"></div>
+        </div>
+    </main>
+    <script src="https://cdn.jsdelivr.net/npm/video.js@8.10.0/dist/video.min.js"></script>
+    <script>
+    const API_BASE = "https://kishanamaliya-database.hf.space/api/v1/videos";;
+    let player = null;
+    let currentVideo = null;
+    document.addEventListener("DOMContentLoaded", () => {
+        loadHomepageFeed();
+        // Keyboard search listeners
+        document.getElementById('globalSearchInput').addEventListener('keydown', (e) => {
+            if (e.key === 'Enter') {
+                e.preventDefault();
+                executeGlobalPlatformSearch();
+            }
+        });
+        document.getElementById('insideSearchInput').addEventListener('keydown', (e) => {
+            if (e.key === 'Enter') {
+                e.preventDefault();
+                executeInsideVideoSearch();
+            }
+        });
+    });
+    function toggleUploadStudio() {
+        const studio = document.getElementById('studioSection');
+        studio.classList.toggle('hidden');
+    }
+    async function loadHomepageFeed() {
+        document.getElementById('theaterSection').classList.add('hidden');
+        document.getElementById('feedHeading').innerText = "Recommended Videos";
+        currentVideo = null;
+        if (player) player.pause();
+        try {
+            const res = await fetch(`${API_BASE}/`);
+            if (!res.ok) return;
+            const data = await res.json();
+            renderVideoGrid(data);
+        } catch (err) {
+            console.error("Feed extraction failure:", err);
+        }
+    }
+    function renderVideoGrid(videos) {
+        const grid = document.getElementById('videoGrid');
+        grid.innerHTML = "";
+        if (videos.length === 0) {
+            grid.innerHTML = `
+                <div class="col-span-full border border-dashed border-zinc-800 rounded-2xl py-16 text-center text-sm text-zinc-500">
+                    No matching items discovered inside active cache indexes.
+                </div>`;
+            return;
+        }
+        videos.forEach(vid => {
+            const card = document.createElement('div');
+            card.className = "bg-zinc-900/40 rounded-2xl overflow-hidden border border-zinc-900 hover:border-zinc-800/80 transition duration-300 cursor-pointer group shadow-lg hover:-translate-y-1 hover:shadow-[0_12px_40px_rgba(0,0,0,0.5)] flex flex-col relative";
+            card.onclick = () => launchVideoTheater(vid, []);
+            card.innerHTML = `
+                <div class="aspect-video bg-zinc-950 w-full relative overflow-hidden">
+                    <button onclick="event.stopPropagation(); executeVideoPurge('${vid.id}')"
+                            class="absolute top-3 right-3 z-30 bg-black/60 hover:bg-red-600/90 border border-zinc-800 hover:border-red-500 text-zinc-400 hover:text-white w-8 h-8 rounded-xl flex items-center justify-center transition duration-200 opacity-0 group-hover:opacity-100 shadow-md backdrop-blur-md"
+                            title="Purge Video Assets">
+                        <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
+                        </svg>
+                    </button>
+                    <div class="absolute inset-0 bg-gradient-to-t from-black/80 via-black/20 to-transparent opacity-60 group-hover:opacity-40 transition z-10"></div>
+                    <div class="absolute inset-0 flex items-center justify-center z-20">
+                        <div class="w-12 h-12 rounded-full bg-white/10 backdrop-blur-md border border-white/20 flex items-center justify-center text-white opacity-0 group-hover:opacity-100 group-hover:scale-110 transition duration-300">
+                            <span class="text-xl ml-1">▶</span>
+                        </div>
+                    </div>
+                </div>
+                <div class="p-5 flex-1 flex flex-col justify-between space-y-2">
+                    <div>
+                        <h4 class="font-bold text-zinc-100 group-hover:text-purple-400 transition duration-200 line-clamp-2 leading-snug tracking-tight text-base">${vid.title || 'Untitled Video'}</h4>
+                        <p class="text-xs text-zinc-400 mt-1.5 line-clamp-2 leading-relaxed font-normal">${vid.description || 'No description payload configured.'}</p>
+                    </div>
+                </div>
+            `;
+            grid.appendChild(card);
+        });
+    }
+    // Natively handles the Multipart Binary Streaming Data ingestion Form
+    async function executeVideoUpload(event) {
+        event.preventDefault();
+        const fileInput = document.getElementById('uploadFile');
+        const titleInput = document.getElementById('uploadTitle');
+        const descInput = document.getElementById('uploadDescription');
+        const tagsInput = document.getElementById('uploadTags');
+        const submitBtn = document.getElementById('uploadSubmitButton');
+        if (!fileInput.files[0]) return;
+        const formData = new FormData();
+        formData.append("file", fileInput.files[0]);
+        formData.append("title", titleInput.value.trim());
+        formData.append("description", descInput.value.trim());
+        formData.append("tags", tagsInput.value.trim());
+        submitBtn.innerText = "Processing Neural Video Extraction (Running Celery Frame Pipeline)...";
+        submitBtn.disabled = true;
+        try {
+            const res = await fetch(`${API_BASE}/upload`, {
+                method: "POST",
+                body: formData
+            });
+            if (!res.ok) throw new Error("Backend reject pipeline format parameters.");
+            alert("Video successfully queued! The automated background task is now generating SigLIP 2 frame vectors. Refresh the home library in a few seconds.");
+            document.getElementById('uploadForm').reset();
+            toggleUploadStudio();
+            loadHomepageFeed();
+        } catch (err) {
+            console.error("Ingestion fault:", err);
+            alert("Processing failed. Verify server storage disk nodes and connection protocols.");
+        } finally {
+            submitBtn.innerText = "Deploy to Processing Cluster";
+            submitBtn.disabled = false;
+        }
+    }
+    // Connects seamlessly to the Cascading Erasure DELETE Endpoint
+    async function executeVideoPurge(videoId) {
+        if (!confirm("Are you absolute sure you want to permanently delete this video, erase its static transcoded disk assets, and clear its high-dimensional Qdrant records?")) {
+            return;
+        }
+        try {
+            const res = await fetch(`${API_BASE}/${videoId}`, {
+                method: "DELETE"
+            });
+            if (!res.ok) throw new Error("Delete requested aborted by database logic.");
+            alert("Video collection vectors and data successfully deleted from server.");
+            loadHomepageFeed(); // Instantly refresh layout feed matrix
+        } catch (err) {
+            console.error("Deletion fault:", err);
+            alert("Failed to wipe video assets cleanly from disk coordinates.");
+        }
+    }
+    function launchVideoTheater(video, matches) {
+        currentVideo = video;
+        document.getElementById('theaterSection').classList.remove('hidden');
+        document.getElementById('playerTitle').innerText = video.title;
+        document.getElementById('insideSearchInput').value = "";
+        window.scrollTo({ top: 0, behavior: 'smooth' });
+        setTimeout(() => {
+            if (!player) {
+                player = videojs('mainVideoPlayer', {
+                    controls: true,
+                    autoplay: true,
+                    preload: 'auto',
+                    fluid: true,
+                    responsive: true
+                });
+            }
+            player.src({
+                src: `http://127.0.0.1:8000${video.hls_playlist_url}`,
+                type: 'application/x-mpegURL'
+            });
+            player.load();
+            player.play().catch(err => console.log("Awaiting activation interaction flag.", err));
+        }, 50);
+        renderSidebarMatches(matches);
+    }
+    function renderSidebarMatches(matches) {
+        const sidebar = document.getElementById('timelineMatches');
+        sidebar.innerHTML = "";
+        if (!matches || matches.length === 0) {
+            sidebar.innerHTML = `
+                <div class="py-12 px-4 text-center">
+                    <p class="text-xs text-zinc-500 leading-relaxed">Regular streaming track. Use the search field above to cross-reference visual moments in real-time.</p>
+                </div>`;
+            return;
+        }
+        matches.forEach(match => {
+            const btn = document.createElement('div');
+            btn.className = "bg-zinc-950/60 hover:bg-zinc-900 p-3.5 rounded-xl border border-zinc-900/60 flex items-center justify-between cursor-pointer transition duration-200 group active:scale-[0.98]";
+            btn.onclick = () => player.currentTime(match.timestamp);
+            btn.innerHTML = `
+                <div class="flex flex-col space-y-0.5">
+                    <span class="text-xs font-bold text-purple-400 group-hover:text-pink-400 transition group-hover:underline">Moment @ ${formatTime(match.timestamp)}</span>
+                    <span class="text-[10px] text-zinc-500 font-medium">Metric Score: ${(match.score * 100).toFixed(1)}%</span>
+                </div>
+                <div class="w-7 h-7 rounded-lg bg-zinc-900 border border-zinc-800 group-hover:border-purple-500/40 flex items-center justify-center transition">
+                    <span class="text-zinc-500 group-hover:text-purple-400 text-xs transform translate-x-[1px] transition">➔</span>
+                </div>
+            `;
+            sidebar.appendChild(btn);
+        });
+    }
+    async function executeGlobalPlatformSearch() {
+        const query = document.getElementById('globalSearchInput').value.trim();
+        if (!query) return;
+        const btn = document.getElementById('globalSearchButton');
+        btn.innerText = "Searching...";
+        document.getElementById('feedHeading').innerText = `Search Engine Matches: "${query}"`;
+        try {
+            const res = await fetch(`${API_BASE}/global-search?query=${encodeURIComponent(query)}&top_k=20`);
+            const data = await res.json();
+            renderVideoGrid(data);
+        } catch (err) {
+            console.error("Global core query failure:", err);
+        } finally {
+            btn.innerText = "Search Feed";
+        }
+    }
+    async function executeInsideVideoSearch() {
+        if (!currentVideo) return;
+        const query = document.getElementById('insideSearchInput').value.trim();
+        if (!query) return;
+        const btn = document.getElementById('insideSearchButton');
+        btn.innerText = "...";
+        try {
+            const res = await fetch(`${API_BASE}/search?query=${encodeURIComponent(query)}&top_k=20`);
+            const data = await res.json();
+            const activeVideoMatches = data.results
+                .filter(hit => hit.video_id === currentVideo.id)
+                .map(hit => ({
+                    timestamp: hit.timestamp_seconds,
+                    score: hit.score
+                }))
+                .sort((a, b) => a.timestamp - b.timestamp);
+            renderSidebarMatches(activeVideoMatches);
+        } catch (err) {
+            console.error("Timeline vector resolution fault:", err);
+        } finally {
+            btn.innerText = "Find";
+        }
+    }
+    function formatTime(secs) {
+        const m = Math.floor(secs / 60);
+        const s = Math.floor(secs % 60);
+        return `${m}:${s < 10 ? '0' : ''}${s}`;
+    }
+</script>
+</body>
+</html>

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+fastapi>=0.110.0
+uvicorn>=0.28.0
+celery>=5.3.6
+redis>=5.0.3
+pydantic>=2.6.4
+pydantic-settings>=2.2.1
+qdrant-client>=1.8.0
+ffmpeg-python>=0.2.0
+torch>=2.6.0
+transformers>=4.40.0
+pillow>=10.2.0
+python-multipart>=0.0.9

start.sh ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/bin/bash
+# Start the Celery background worker process concurrently
+celery -A app.api.v1.videos.celery_client worker --loglevel=info --concurrency=1 &
+# Start the primary FastAPI gateway app on port 7860 (Hugging Face default)
+uvicorn app.main:app --host 0.0.0.0 --port 7860

workers/__init__.py ADDED Viewed

File without changes

workers/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (162 Bytes). View file

workers/__pycache__/celery_app.cpython-313.pyc ADDED Viewed

Binary file (663 Bytes). View file

workers/__pycache__/ml_pipeline.cpython-313.pyc ADDED Viewed

Binary file (4.65 kB). View file

workers/__pycache__/tasks.cpython-313.pyc ADDED Viewed

Binary file (3.51 kB). View file

workers/celery_app.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from celery import Celery
+from app.core.config import settings
+celery_app = Celery(
+    "video_tasks",
+    broker=settings.REDIS_URL,
+    backend=settings.REDIS_URL,
+    include=["workers.tasks"]
+)
+# Enterprise task processing configurations
+celery_app.conf.update(
+    task_serializer="json",
+    accept_content=["json"],
+    result_serializer="json",
+    timezone="UTC",
+    enable_utc=True,
+    worker_concurrency=1  # Recommended as 1 if running heavy ViT models locally to prevent memory thrashing
+)

workers/ml_pipeline.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from PIL import Image
+from transformers import AutoProcessor, AutoModel
+from typing import List
+from app.core.config import settings
+class Siglip2EmbeddingPipeline:
+    """
+    High-performance Machine Learning inference pipeline for SigLIP 2 (1152-D).
+    Optimized dynamically for both CUDA environments and Apple Silicon Neural Cores.
+    """
+    def __init__(self) -> None:
+        if torch.cuda.is_available():
+            self.device = "cuda"
+        elif torch.backends.mps.is_available():
+            self.device = "mps"
+        else:
+            self.device = "cpu"
+        self.model_id = settings.SIGLIP_MODEL_ID
+        print(f"[AI ENGINE] Booting SigLIP 2 SO400M on device accelerator: {self.device}...")
+        self.processor = AutoProcessor.from_pretrained(self.model_id)
+        self.model = AutoModel.from_pretrained(self.model_id).to(self.device)
+        self.model.eval()
+        print("[AI ENGINE] Multimodal network parameters successfully mapped and frozen.")
+    def get_text_embedding(self, text: str) -> List[float]:
+        """Maps raw user text queries down into the shared 1152-D spatial map."""
+        with torch.no_grad():
+            inputs = self.processor(text=[text], padding="max_length", return_tensors="pt").to(self.device)
+            outputs = self.model.get_text_features(**inputs)
+            # --- LLD Safe Extraction Guard ---
+            # If the response is wrapped inside BaseModelOutputWithPooling, extract the core pooler tensor
+            text_features = outputs.pooler_output if hasattr(outputs, "pooler_output") else outputs
+            # Perform explicit L2 normalization to enable accurate Cosine Distance math inside Qdrant
+            text_features = text_features / text_features.norm(dim=-1, keepdim=True)
+            return text_features.squeeze(0).cpu().tolist()
+    def get_image_batch_embeddings(self, image_paths: List[str]) -> List[List[float]]:
+        """Extracts dense visual embeddings across structural frame lists concurrently."""
+        images = []
+        for path in image_paths:
+            try:
+                images.append(Image.open(path).convert("RGB"))
+            except Exception as e:
+                print(f"[AI ENGINE] Error reading frame asset {path}: {str(e)}")
+        if not images:
+            return []
+        with torch.no_grad():
+            inputs = self.processor(images=images, return_tensors="pt").to(self.device)
+            outputs = self.model.get_image_features(**inputs)
+            # --- LLD Safe Extraction Guard ---
+            # Safely unpack the raw frame feature matrix tensor from the container wrapper
+            image_features = outputs.pooler_output if hasattr(outputs, "pooler_output") else outputs
+            image_features = image_features / image_features.norm(dim=-1, keepdim=True)
+            return image_features.cpu().tolist()

workers/tasks.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+from workers.celery_app import celery_app
+from workers.ml_pipeline import Siglip2EmbeddingPipeline
+from app.LLD.ffmpeg_strategy import LocalFFmpegStrategy
+from app.LLD.qdrant_strategy import QdrantVectorStoreStrategy
+from app.core.config import settings
+# Global placeholders for safe, lazy runtime evaluation
+ffmpeg_strategy = None
+vector_store = None
+ai_engine_pipeline = None
+def get_ffmpeg_strategy():
+    global ffmpeg_strategy
+    if ffmpeg_strategy is None:
+        ffmpeg_strategy = LocalFFmpegStrategy()
+    return ffmpeg_strategy
+def get_vector_store():
+    global vector_store
+    if vector_store is None:
+        vector_store = QdrantVectorStoreStrategy()
+    return vector_store
+def load_ai_engine():
+    global ai_engine_pipeline
+    if ai_engine_pipeline is None:
+        ai_engine_pipeline = Siglip2EmbeddingPipeline()
+    return ai_engine_pipeline
+@celery_app.task(name="workers.tasks.process_video_pipeline")
+def process_video_pipeline(video_id: str, raw_file_path: str) -> bool:
+    print(f"[WORKER CHOREOGRAPHER] Commencing processing pipeline layout for ID: {video_id}")
+    output_dir = os.path.join(settings.OUTPUT_DIR, video_id)
+    # Instantiate strategies inside the execution block instead of the import layer
+    ffmpeg_engine = get_ffmpeg_strategy()
+    db_vector_store = get_vector_store()
+    ai_model = load_ai_engine()
+    try:
+        # Step 1: HLS transcoding
+        playlist_path = ffmpeg_engine.transcode_to_hls(raw_file_path, output_dir)
+        print(f"[WORKER] Transcoding complete: {playlist_path}")
+        # Step 2: Keyframe extraction
+        frames_metadata = ffmpeg_engine.extract_keyframes(raw_file_path, output_dir, interval_seconds=1)
+        print(f"[WORKER] Extracted {len(frames_metadata)} frames.")
+        if not frames_metadata:
+            return False
+        # Step 3: SigLIP 2 Batch Matrix Encoding
+        frame_paths = [item["file_path"] for item in frames_metadata]
+        batch_size = 16
+        all_computed_vectors = []
+        for i in range(0, len(frame_paths), batch_size):
+            chunk_paths = frame_paths[i:i + batch_size]
+            chunk_vectors = ai_model.get_image_batch_embeddings(chunk_paths)
+            all_computed_vectors.extend(chunk_vectors)
+        # Step 4: Sync to Qdrant Space
+        return db_vector_store.upsert_embeddings(
+            video_id=video_id,
+            embeddings=all_computed_vectors,
+            metadata=frames_metadata
+        )
+    except Exception as e:
+        print(f"[WORKER CRITICAL SHUTDOWN] Ingestion routine dropped: {str(e)}")
+        return False
+@celery_app.task(name="workers.tasks.generate_text_embedding")
+def generate_text_embedding(query_text: str) -> list[float]:
+    # Dynamic instantiation on user search trigger call
+    ai_model = load_ai_engine()
+    return ai_model.get_text_embedding(query_text)