kishanAmaliya commited on
Commit
4dff442
·
0 Parent(s):

Deployment: Complete optimized multi-modal search engine

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.cph/.B_Simons_and_Cakes_for_Success.cpp_86f1157f283c801d24529d81b96f40c8.prob ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name":"B. Simons and Cakes for Success","group":"Codeforces - Codeforces Round 1083 (Div. 2)","url":"https://codeforces.com/contest/2205/problem/B","interactive":false,"memoryLimit":256,"timeLimit":1000,"tests":[{"id":1780024223141,"input":"4\n8\n12\n369\n55635800\n","output":"2\n6\n123\n2090\n"}],"testType":"single","input":{"type":"stdin"},"output":{"type":"stdout"},"languages":{"java":{"mainClass":"Main","taskClass":"BSimonsAndCakesForSuccess"}},"batch":{"id":"58721974-54fc-4479-b9d8-ababbbac7016","size":1},"srcPath":"/Users/kishanamaliya/Documents/videostream_ai/B_Simons_and_Cakes_for_Success.cpp"}
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Install system media dependencies for video analysis and frame extraction
4
+ RUN apt-get update && apt-get install -y \
5
+ ffmpeg \
6
+ libsm6 \
7
+ libxext6 \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ WORKDIR /code
11
+
12
+ COPY ./requirements.txt /code/requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
14
+
15
+ COPY . .
16
+
17
+ # Grant execution permissions to our orchestration script
18
+ RUN chmod +x start.sh
19
+
20
+ # Run the unified stack
21
+ CMD ["./start.sh"]
README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # videostream-AI
app/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/LLD/__init__.py ADDED
File without changes
app/LLD/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (162 Bytes). View file
 
app/LLD/__pycache__/ffmpeg_strategy.cpython-313.pyc ADDED
Binary file (2.88 kB). View file
 
app/LLD/__pycache__/interfaces.cpython-313.pyc ADDED
Binary file (2.57 kB). View file
 
app/LLD/__pycache__/qdrant_strategy.cpython-313.pyc ADDED
Binary file (5.09 kB). View file
 
app/LLD/ffmpeg_strategy.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import ffmpeg
3
+ from typing import List, Dict, Any
4
+ from app.LLD.interfaces import VideoProcessingStrategy
5
+
6
+ class LocalFFmpegStrategy(VideoProcessingStrategy):
7
+ """
8
+ Concrete system execution using native FFmpeg pipelines.
9
+ """
10
+ def transcode_to_hls(self, input_path: str, output_dir: str) -> str:
11
+ os.makedirs(output_dir, exist_ok=True)
12
+ output_playlist = os.path.join(output_dir, "playlist.m3u8")
13
+
14
+ # Executes scalable stream segmentation into 4-second chunks
15
+ (
16
+ ffmpeg
17
+ .input(input_path)
18
+ .output(output_playlist,
19
+ format='hls',
20
+ hls_time=4,
21
+ hls_playlist_type='vod',
22
+ hls_segment_filename=os.path.join(output_dir, "file%03d.ts"))
23
+ .overwrite_output()
24
+ .run(quiet=True)
25
+ )
26
+ return output_playlist
27
+
28
+ def extract_keyframes(self, input_path: str, output_dir: str, interval_seconds: int) -> List[Dict[str, Any]]:
29
+ frames_dir = os.path.join(output_dir, "frames")
30
+ os.makedirs(frames_dir, exist_ok=True)
31
+
32
+ output_pattern = os.path.join(frames_dir, "frame_%04d.jpg")
33
+
34
+ # Forces extraction of 1 frame per specified interval duration safely
35
+ (
36
+ ffmpeg
37
+ .input(input_path)
38
+ .filter('fps', fps=f"1/{interval_seconds}")
39
+ .output(output_pattern, qscale=2)
40
+ .overwrite_output()
41
+ .run(quiet=True)
42
+ )
43
+
44
+ # Map out extracted physical assets into clear metadata dictionaries
45
+ extracted_metadata = []
46
+ generated_files = sorted(os.listdir(frames_dir))
47
+
48
+ for idx, filename in enumerate(generated_files):
49
+ timestamp = idx * interval_seconds
50
+ extracted_metadata.append({
51
+ "timestamp_seconds": timestamp,
52
+ "file_path": os.path.join(frames_dir, filename)
53
+ })
54
+
55
+ return extracted_metadata
app/LLD/interfaces.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Dict, Any
3
+
4
+ class VideoProcessingStrategy(ABC):
5
+ """
6
+ Abstract Strategy for processing raw videos (Transcoding + Frame Extraction).
7
+ Enforces clean LLD decoupled from specific system binaries.
8
+ """
9
+ @abstractmethod
10
+ def transcode_to_hls(self, input_path: str, output_dir: str) -> str:
11
+ """Converts raw video (.mp4) into an HLS playlist (.m3u8)"""
12
+ pass
13
+
14
+ @abstractmethod
15
+ def extract_keyframes(self, input_path: str, output_dir: str, interval_seconds: int) -> List[Dict[str, Any]]:
16
+ """Extracts periodic frame assets along with their exact timestamps"""
17
+ pass
18
+
19
+
20
+ class VectorStoreInterface(ABC):
21
+ """
22
+ Abstract Storage Interface for handling high-dimensional semantic search indexing.
23
+ Decouples application logic from specific Vector DB clients (Qdrant/Milvus).
24
+ """
25
+ @abstractmethod
26
+ def upsert_embeddings(self, video_id: str, embeddings: List[List[float]], metadata: List[Dict[str, Any]]) -> bool:
27
+ """Pushes batch frame vectors down into the vector space"""
28
+ pass
29
+
30
+ @abstractmethod
31
+ def search_similarity(self, query_vector: List[float], top_k: int) -> List[Dict[str, Any]]:
32
+ """Executes a high-dimensional nearest-neighbor lookup matching the query text vector"""
33
+ pass
app/LLD/qdrant_strategy.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ from typing import List, Dict, Any
3
+ from qdrant_client import QdrantClient
4
+ from qdrant_client.models import Distance, VectorParams, PointStruct
5
+ from app.LLD.interfaces import VectorStoreInterface
6
+ from app.core.config import settings
7
+
8
+ class QdrantVectorStoreStrategy(VectorStoreInterface):
9
+ """
10
+ Concrete implementation of VectorStoreInterface leveraging Qdrant DB.
11
+ Encapsulates schema enforcement, item upsert workflows, and multi-modal calculations.
12
+ """
13
+ def __init__(self) -> None:
14
+ # Establish client connection targeting the docker-compose service network
15
+ self.client = QdrantClient(host=settings.QDRANT_HOST, port=settings.QDRANT_PORT)
16
+ self.collection_name = "video_frames"
17
+ self._ensure_collection_exists()
18
+
19
+ def _ensure_collection_exists(self) -> None:
20
+ """
21
+ Idempotent schema controller checking for collection persistence
22
+ and initializing vector parameters if absent.
23
+ """
24
+ try:
25
+ if not self.client.collection_exists(collection_name=self.collection_name):
26
+ self.client.create_collection(
27
+ collection_name=self.collection_name,
28
+ vectors_config=VectorParams(
29
+ size=settings.VECTOR_DIMENSION,
30
+ distance=Distance.COSINE
31
+ )
32
+ )
33
+ except Exception as e:
34
+ # Crucial LLD practice: fail fast during object construction if state is compromised
35
+ raise RuntimeError(f"Failed initializing Qdrant collection layer: {str(e)}")
36
+
37
+ def upsert_embeddings(self, video_id: str, embeddings: List[List[float]], metadata: List[Dict[str, Any]]) -> bool:
38
+ """
39
+ Transforms raw embeddings into structurally validated Point payloads
40
+ and updates the database via high-throughput vector chunk batching.
41
+ """
42
+ try:
43
+ points = []
44
+ for idx, (vector, meta) in enumerate(zip(embeddings, metadata)):
45
+ # Inject parent relational identifier directly into metadata payload
46
+ payload = {
47
+ "video_id": video_id,
48
+ "timestamp_seconds": meta.get("timestamp_seconds"),
49
+ "file_path": meta.get("file_path")
50
+ }
51
+
52
+ # Use deterministic UUID generation based on the namespace
53
+ # to prevent document duplication during re-processing jobs
54
+ point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{video_id}_{idx}"))
55
+
56
+ points.append(
57
+ PointStruct(
58
+ id=point_id,
59
+ vector=vector,
60
+ payload=payload
61
+ )
62
+ )
63
+
64
+ # Execute transactional batch upsert operation
65
+ self.client.upsert(
66
+ collection_name=self.collection_name,
67
+ wait=True,
68
+ points=points
69
+ )
70
+ return True
71
+
72
+ except Exception as e:
73
+ # Structured application reporting block
74
+ print(f"[ERROR] Vector storage execution fault for Video {video_id}: {str(e)}")
75
+ return False
76
+
77
+ def search_similarity(self, query_vector: List[float], top_k: int) -> List[Dict[str, Any]]:
78
+ """
79
+ Performs an approximate nearest neighbor (ANN) search inside the multi-dimensional
80
+ vector space matching against text embeddings.
81
+ """
82
+ try:
83
+ search_results = self.client.search(
84
+ collection_name=self.collection_name,
85
+ query_vector=query_vector,
86
+ limit=top_k
87
+ )
88
+
89
+ # Translate raw database structures into standard clean payloads
90
+ formatted_results = []
91
+ for hit in search_results:
92
+ formatted_results.append({
93
+ "score": hit.score,
94
+ "video_id": hit.payload.get("video_id"),
95
+ "timestamp_seconds": hit.payload.get("timestamp_seconds"),
96
+ "file_path": hit.payload.get("file_path")
97
+ })
98
+ return formatted_results
99
+
100
+ except Exception as e:
101
+ print(f"[ERROR] Vector distance analysis failed: {str(e)}")
102
+ return []
103
+
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (158 Bytes). View file
 
app/__pycache__/main.cpython-313.pyc ADDED
Binary file (1.86 kB). View file
 
app/api/__init__.py ADDED
File without changes
app/api/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (162 Bytes). View file
 
app/api/v1/__init__.py ADDED
File without changes
app/api/v1/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (165 Bytes). View file
 
app/api/v1/__pycache__/videos.cpython-313.pyc ADDED
Binary file (10.4 kB). View file
 
app/api/v1/videos.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import shutil
4
+ import uuid
5
+ import traceback
6
+ import redis
7
+ from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Query
8
+ from typing import List, Dict, Any
9
+ from celery import Celery
10
+ from qdrant_client.http import exceptions as qdrant_exceptions
11
+ from app.core.config import settings
12
+ from app.LLD.qdrant_strategy import QdrantVectorStoreStrategy
13
+
14
+ router = APIRouter()
15
+
16
+ # Initialize Permanent Cloud Redis Persistence Client
17
+ redis_client = redis.Redis.from_url(settings.REDIS_URL, decode_responses=True)
18
+
19
+ # Lazy-loaded Celery Client Instance for task injection
20
+ celery_client = Celery("video_tasks", broker=settings.REDIS_URL, backend=settings.REDIS_URL)
21
+ vector_store = QdrantVectorStoreStrategy()
22
+
23
+
24
+ # --- HELPER STORAGE UTILITIES ---
25
+ def fetch_all_cloud_metadata() -> List[Dict[str, Any]]:
26
+ """Retrieves all permanently saved metadata objects from Upstash Redis."""
27
+ try:
28
+ keys = redis_client.keys("video:metadata:*")
29
+ if not keys:
30
+ return []
31
+ values = redis_client.mget(keys)
32
+ return [json.loads(v) for v in values if v]
33
+ except Exception as e:
34
+ print(f"[REDIS STORAGE ERROR] Fetch failed: {str(e)}")
35
+ return []
36
+
37
+
38
+ # --- 1. STATIC EXPLICIT GET/POST ROUTES ---
39
+
40
+ @router.get("/", response_model=List[Dict[str, Any]])
41
+ async def get_landing_page_feed():
42
+ """Returns all permanently registered video cards from the Redis cloud database layer."""
43
+ return fetch_all_cloud_metadata()
44
+
45
+
46
+ @router.post("/upload", status_code=202)
47
+ async def upload_video(
48
+ file: UploadFile = File(...),
49
+ title: str = Form(...),
50
+ description: str = Form(""),
51
+ tags: str = Form("")
52
+ ):
53
+ """Saves raw files locally, indexes vectors, and saves state to persistent cloud storage."""
54
+ if not file.filename.endswith(('.mp4', '.mkv', '.avi')):
55
+ raise HTTPException(status_code=400, detail="Invalid video format codec structure.")
56
+
57
+ video_id = str(uuid.uuid4())
58
+ file_extension = os.path.splitext(file.filename)[1]
59
+ saved_filename = f"{video_id}{file_extension}"
60
+ raw_file_path = os.path.join(settings.UPLOAD_DIR, saved_filename)
61
+
62
+ try:
63
+ with open(raw_file_path, "wb") as buffer:
64
+ shutil.copyfileobj(file.file, buffer)
65
+ except Exception as e:
66
+ raise HTTPException(status_code=500, detail=f"File system failure during ingestion: {str(e)}")
67
+
68
+ video_metadata = {
69
+ "id": video_id,
70
+ "title": title,
71
+ "description": description,
72
+ "tags": [tag.strip() for tag in tags.split(",") if tag.strip()],
73
+ "hls_playlist_url": f"/shared_storage/processed/{video_id}/playlist.m3u8",
74
+ "status": "processing"
75
+ }
76
+
77
+ # Commit directly to your permanent Upstash Redis cluster
78
+ try:
79
+ redis_client.set(f"video:metadata:{video_id}", json.dumps(video_metadata))
80
+ except Exception as e:
81
+ print(f"[REDIS WRITE ERROR] Critical persistence failure: {str(e)}")
82
+
83
+ celery_client.send_task(
84
+ "workers.tasks.process_video_pipeline",
85
+ args=[video_id, raw_file_path]
86
+ )
87
+
88
+ return {
89
+ "message": "Video ingestion accepted. Saved to permanent cloud records.",
90
+ "video_id": video_id,
91
+ "status": "processing"
92
+ }
93
+
94
+
95
+ @router.get("/search")
96
+ async def execute_multimodal_search(query: str = Query(...), top_k: int = 20):
97
+ """Granular inside-video highlight matching using modern query_points."""
98
+ if not query.strip():
99
+ raise HTTPException(status_code=400, detail="Search text cannot be blank.")
100
+
101
+ try:
102
+ task = celery_client.send_task("workers.tasks.generate_text_embedding", args=[query])
103
+ text_vector = task.get(timeout=10)
104
+
105
+ if not text_vector:
106
+ raise HTTPException(status_code=502, detail="Neural embedding task failed.")
107
+
108
+ response = vector_store.client.query_points(
109
+ collection_name=vector_store.collection_name,
110
+ query=text_vector,
111
+ limit=top_k
112
+ )
113
+
114
+ search_results = response.points
115
+
116
+ formatted_results = [
117
+ {
118
+ "video_id": hit.payload.get("video_id"),
119
+ "timestamp_seconds": hit.payload.get("timestamp_seconds"),
120
+ "score": hit.score
121
+ }
122
+ for hit in search_results
123
+ ]
124
+
125
+ return {"results": formatted_results}
126
+
127
+ except qdrant_exceptions.UnexpectedResponse as q_err:
128
+ print(f"[QDRANT DATABASE WARN] Collection not initialized yet: {str(q_err)}")
129
+ return {"results": []}
130
+
131
+ except Exception as e:
132
+ print("\n💥!!! CRITICAL SEARCH EXCEPTION CAUGHT !!!💥")
133
+ traceback.print_exc()
134
+ print("💥!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!💥\n")
135
+ raise HTTPException(status_code=500, detail=f"Internal vector processing fault: {str(e)}")
136
+
137
+
138
+ @router.get("/global-search", response_model=List[Dict[str, Any]])
139
+ async def execute_global_platform_search(query: str = Query(...), top_k: int = 20):
140
+ """Global hybrid search engine reading cross-references safely from cloud Redis clusters."""
141
+ if not query.strip():
142
+ raise HTTPException(status_code=400, detail="Search query cannot be blank.")
143
+
144
+ query_lower = query.lower()
145
+ discovered_video_map = {}
146
+
147
+ # 1. Fetch live metadata array from Redis cache
148
+ all_metadata = fetch_all_cloud_metadata()
149
+
150
+ # 2. Relational Text Scanning
151
+ for video in all_metadata:
152
+ video_id = video.get("id")
153
+ in_title = query_lower in video.get("title", "").lower()
154
+ in_desc = query_lower in video.get("description", "").lower()
155
+ in_tags = any(query_lower in tag.lower() for tag in video.get("tags", []))
156
+
157
+ if (in_title or in_desc or in_tags) and video_id:
158
+ discovered_video_map[video_id] = video
159
+
160
+ # 3. Multimodal Neural Frame Scanning
161
+ try:
162
+ task = celery_client.send_task("workers.tasks.generate_text_embedding", args=[query])
163
+ text_vector = task.get(timeout=5)
164
+
165
+ if text_vector:
166
+ response = vector_store.client.query_points(
167
+ collection_name=vector_store.collection_name,
168
+ query=text_vector,
169
+ limit=top_k
170
+ )
171
+
172
+ # Map out database item objects using the Redis dictionary cache
173
+ metadata_lookup = {v.get("id"): v for v in all_metadata if v.get("id")}
174
+ for hit in response.points:
175
+ v_id = hit.payload.get("video_id")
176
+ if v_id and v_id not in discovered_video_map:
177
+ if v_id in metadata_lookup:
178
+ discovered_video_map[v_id] = metadata_lookup[v_id]
179
+
180
+ except Exception as e:
181
+ print(f"[GLOBAL SEARCH WARN] Neural fallback active: {str(e)}")
182
+
183
+ return list(discovered_video_map.values())
184
+
185
+
186
+ # --- 2. DYNAMIC WILDCARD ROUTES (BOTTOM ZONE) ---
187
+
188
+ @router.delete("/{video_id}", status_code=200)
189
+ async def delete_video(video_id: str):
190
+ """Removes files, deletes vectors, and drops records from the cloud Redis instance."""
191
+ try:
192
+ redis_client.delete(f"video:metadata:{video_id}")
193
+ except Exception as e:
194
+ print(f"[REDIS DELETE ERROR] Record cleanup failed: {str(e)}")
195
+
196
+ processed_dir = os.path.join(settings.OUTPUT_DIR, video_id)
197
+ if os.path.exists(processed_dir):
198
+ shutil.rmtree(processed_dir)
199
+
200
+ try:
201
+ vector_store.client.delete(
202
+ collection_name=vector_store.collection_name,
203
+ points_selector=vector_store.client.models.Filter(
204
+ must=[
205
+ vector_store.client.models.FieldCondition(
206
+ key="video_id",
207
+ match=vector_store.client.models.MatchValue(value=video_id)
208
+ )
209
+ ]
210
+ )
211
+ )
212
+ except Exception as e:
213
+ print(f"[WARNING] Vector cascade clearance mismatch: {str(e)}")
214
+
215
+ return {"message": f"Successfully purged assets for ID: {video_id}."}
app/core/__init__.py ADDED
File without changes
app/core/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (163 Bytes). View file
 
app/core/__pycache__/config.cpython-313.pyc ADDED
Binary file (1.25 kB). View file
 
app/core/config.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings
2
+
3
+ class Settings(BaseSettings):
4
+ PROJECT_NAME: str = "Upgraded Video Search Platform (SigLIP 2 SO400M)"
5
+ REDIS_URL: str = "redis://localhost:6379/0"
6
+ QDRANT_HOST: str = "localhost"
7
+ QDRANT_PORT: int = 6333
8
+ SIGLIP_MODEL_ID: str = "google/siglip2-so400m-patch16-256"
9
+ VECTOR_DIMENSION: int = 1152
10
+
11
+ UPLOAD_DIR: str = "./shared_storage/uploads"
12
+ OUTPUT_DIR: str = "./shared_storage/processed"
13
+
14
+ class Config:
15
+ env_file = ".env"
16
+
17
+ settings = Settings()
app/main.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.staticfiles import StaticFiles # <-- 1. Add this critical import
5
+ from app.api.v1.videos import router as video_router
6
+ from app.core.config import settings
7
+
8
+ app = FastAPI(
9
+ title=settings.PROJECT_NAME,
10
+ version="1.0.0",
11
+ description="LLD-compliant production-ready Video Search Platform"
12
+ )
13
+
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=["*"],
17
+ allow_credentials=True,
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ # <-- 2. Add this mount command right here!
23
+ # This tells FastAPI to serve anything inside the physical 'shared_storage' folder over the network.
24
+ app.mount("/shared_storage", StaticFiles(directory="shared_storage"), name="shared_storage")
25
+
26
+
27
+ @app.on_event("startup")
28
+ def configure_storage_directories():
29
+ os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
30
+ os.makedirs(settings.OUTPUT_DIR, exist_ok=True)
31
+ print("[BOOTSTRAP] System shared storage directories verified successfully.")
32
+
33
+
34
+ app.include_router(video_router, prefix="/api/v1/videos", tags=["Videos"])
35
+
36
+ @app.get("/")
37
+ async def health_check():
38
+ return {"status": "healthy", "service": settings.PROJECT_NAME}
docker-compose.yml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ redis:
5
+ image: redis:7-alpine
6
+ container_name: youtube_clone_redis
7
+ ports:
8
+ - "6379:6379"
9
+
10
+ qdrant:
11
+ image: qdrant/qdrant:latest
12
+ container_name: youtube_clone_vector_db
13
+ ports:
14
+ - "6333:6333"
15
+ - "6334:6334"
16
+ volumes:
17
+ - qdrant_storage:/qdrant/storage
18
+
19
+ volumes:
20
+ qdrant_storage:
frontend/index.html ADDED
@@ -0,0 +1,429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Multimodal Video Semantic Analytics Platform</title>
7
+
8
+ <script src="https://cdn.tailwindcss.com"></script>
9
+
10
+ <link rel="preconnect" href="https://fonts.googleapis.com">
11
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
12
+ <link href="https://fonts.googleapis.com/css2?family=Plus+Jakarta+Sans:wght@400;500;600;700;800&display=swap" rel="stylesheet">
13
+
14
+ <link href="https://cdn.jsdelivr.net/npm/video.js@8.10.0/dist/video-js.min.css" rel="stylesheet" />
15
+
16
+ <style>
17
+ body {
18
+ font-family: 'Plus Jakarta Sans', sans-serif;
19
+ }
20
+ .instagram-gradient-text {
21
+ background: linear-gradient(45deg, #3b82f6, #8b5cf6, #ec4899);
22
+ -webkit-background-clip: text;
23
+ -webkit-text-fill-color: transparent;
24
+ }
25
+ .instagram-gradient-bg {
26
+ background: linear-gradient(135deg, #2563eb, #7c3aed, #db2777);
27
+ }
28
+ .instagram-gradient-border {
29
+ position: relative;
30
+ }
31
+ .instagram-gradient-border::before {
32
+ content: "";
33
+ position: absolute;
34
+ inset: 0;
35
+ border-radius: 1rem;
36
+ padding: 1.5px;
37
+ background: linear-gradient(135deg, #3b82f6, #8b5cf6, #ec4899);
38
+ -webkit-mask: linear-gradient(#fff 0 0) content-box, linear-gradient(#fff 0 0);
39
+ -webkit-mask-composite: xor;
40
+ mask-composite: exclude;
41
+ pointer-events: none;
42
+ }
43
+ .custom-scrollbar::-webkit-scrollbar {
44
+ width: 5px;
45
+ }
46
+ .custom-scrollbar::-webkit-scrollbar-track {
47
+ background: #09090b;
48
+ }
49
+ .custom-scrollbar::-webkit-scrollbar-thumb {
50
+ background: #27272a;
51
+ border-radius: 999px;
52
+ }
53
+ .custom-scrollbar::-webkit-scrollbar-thumb:hover {
54
+ background: #7c3aed;
55
+ }
56
+ </style>
57
+ </head>
58
+ <body class="bg-[#030303] text-zinc-100 min-h-screen selection:bg-purple-500/30 selection:text-purple-200">
59
+
60
+ <nav class="sticky top-0 bg-black/60 backdrop-blur-xl border-b border-zinc-900 z-50 px-8 py-4 flex items-center justify-between">
61
+ <div class="flex items-center space-x-3 cursor-pointer group" onclick="loadHomepageFeed()">
62
+ <div class="instagram-gradient-bg p-2.5 rounded-xl shadow-[0_0_20px_rgba(124,58,237,0.4)] group-hover:scale-105 transition duration-300">
63
+ <svg class="w-5 h-5 text-white" fill="none" stroke="currentColor" viewBox="0 0 24 24">
64
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z"/>
65
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z"/>
66
+ </svg>
67
+ </div>
68
+ <span class="font-extrabold text-2xl tracking-tight text-white group-hover:opacity-90 transition">
69
+ AI<span class="instagram-gradient-text">Stream</span>
70
+ </span>
71
+ </div>
72
+
73
+ <div class="w-1/2 flex max-w-xl bg-zinc-900/50 rounded-2xl border border-zinc-800/80 overflow-hidden focus-within:border-purple-500/50 shadow-inner transition duration-300 backdrop-blur-md">
74
+ <input type="text" id="globalSearchInput" placeholder="Search library using title, tags, or visual concepts..."
75
+ class="w-full bg-transparent px-5 py-3 text-sm text-zinc-100 outline-none placeholder-zinc-500">
76
+ <button id="globalSearchButton" onclick="executeGlobalPlatformSearch()" class="instagram-gradient-bg px-6 text-sm font-semibold transition text-white hover:opacity-90 active:scale-95 duration-200">
77
+ Search Feed
78
+ </button>
79
+ </div>
80
+
81
+ <div class="flex items-center space-x-4">
82
+ <button onclick="toggleUploadStudio()" class="bg-zinc-900 hover:bg-zinc-800 border border-zinc-800 hover:border-zinc-700 px-4 py-2 rounded-xl text-xs font-bold tracking-wide transition flex items-center space-x-2">
83
+ <span class="text-base text-purple-400 font-normal">+</span>
84
+ <span>Creator Studio</span>
85
+ </button>
86
+ </div>
87
+ </nav>
88
+
89
+ <main class="p-8 max-w-7xl mx-auto space-y-12">
90
+
91
+ <div id="studioSection" class="hidden bg-zinc-950 rounded-2xl border border-zinc-900 shadow-2xl p-6 transition duration-300 animate-slideDown max-w-3xl mx-auto">
92
+ <div class="flex items-center justify-between border-b border-zinc-900 pb-4 mb-6">
93
+ <div>
94
+ <h3 class="text-lg font-bold tracking-tight text-zinc-100">AI Ingestion Pipeline</h3>
95
+ <p class="text-xs text-zinc-500 mt-0.5">Upload raw media streams to run real-time automated visual frame vector parsing.</p>
96
+ </div>
97
+ <button onclick="toggleUploadStudio()" class="text-zinc-500 hover:text-zinc-300 text-sm font-bold px-2">✕</button>
98
+ </div>
99
+
100
+ <form id="uploadForm" onsubmit="executeVideoUpload(event)" class="space-y-4">
101
+ <div class="grid grid-cols-2 gap-4">
102
+ <div class="space-y-1.5">
103
+ <label class="text-[11px] font-bold tracking-wider text-zinc-400 uppercase">Video Title *</label>
104
+ <input type="text" id="uploadTitle" required placeholder="e.g., Game of Thrones Scene" class="w-full bg-zinc-900/60 border border-zinc-800 rounded-xl px-4 py-2.5 text-xs text-zinc-100 outline-none focus:border-purple-500/50 transition">
105
+ </div>
106
+ <div class="space-y-1.5">
107
+ <label class="text-[11px] font-bold tracking-wider text-zinc-400 uppercase">Tags (Comma Separated)</label>
108
+ <input type="text" id="uploadTags" placeholder="e.g., action, fantasy, dragon" class="w-full bg-zinc-900/60 border border-zinc-800 rounded-xl px-4 py-2.5 text-xs text-zinc-100 outline-none focus:border-purple-500/50 transition">
109
+ </div>
110
+ </div>
111
+ <div class="space-y-1.5">
112
+ <label class="text-[11px] font-bold tracking-wider text-zinc-400 uppercase">Description</label>
113
+ <textarea id="uploadDescription" rows="2" placeholder="Provide contextual metadata notes here..." class="w-full bg-zinc-900/60 border border-zinc-800 rounded-xl px-4 py-2.5 text-xs text-zinc-100 outline-none focus:border-purple-500/50 transition resize-none"></textarea>
114
+ </div>
115
+ <div class="space-y-1.5">
116
+ <label class="text-[11px] font-bold tracking-wider text-zinc-400 uppercase">Select Media Track *</label>
117
+ <input type="file" id="uploadFile" required accept=".mp4,.mkv,.avi" class="w-full bg-zinc-900/30 border border-dashed border-zinc-800 rounded-xl px-4 py-4 text-xs text-zinc-400 file:mr-4 file:py-1.5 file:px-3 file:rounded-lg file:border-0 file:text-xs file:font-bold file:bg-purple-600 file:text-white hover:file:bg-purple-500 file:cursor-pointer cursor-pointer transition">
118
+ </div>
119
+ <button type="submit" id="uploadSubmitButton" class="w-full instagram-gradient-bg text-white py-3 rounded-xl text-xs font-bold tracking-wide shadow-lg hover:opacity-90 active:scale-[0.99] transition duration-150">
120
+ Deploy to Processing Cluster
121
+ </button>
122
+ </form>
123
+ </div>
124
+
125
+ <div id="theaterSection" class="hidden bg-zinc-950 rounded-2xl border border-zinc-900 shadow-[0_24px_70px_rgba(0,0,0,0.7)] p-6 transition duration-500">
126
+ <div class="grid grid-cols-1 lg:grid-cols-3 gap-8">
127
+
128
+ <div class="lg:col-span-2 space-y-4">
129
+ <div class="instagram-gradient-border p-[1px] rounded-2xl overflow-hidden bg-zinc-900 shadow-[0_0_40px_rgba(59,130,246,0.15)]">
130
+ <video id="mainVideoPlayer" class="video-js vjs-default-skin vjs-big-play-centered w-full aspect-video rounded-2xl overflow-hidden" controls preload="auto"></video>
131
+ </div>
132
+ <h1 id="playerTitle" class="text-2xl font-bold tracking-tight text-zinc-100 px-1">Video Analysis Engine</h1>
133
+ </div>
134
+
135
+ <div class="bg-zinc-900/40 rounded-2xl p-5 border border-zinc-900 flex flex-col h-[460px] backdrop-blur-md">
136
+ <div class="mb-4">
137
+ <h3 class="font-bold text-xs text-zinc-400 tracking-widest uppercase mb-3">Deep Timeline Vector Finder</h3>
138
+ <div class="flex bg-zinc-950/80 border border-zinc-800 rounded-xl overflow-hidden focus-within:border-pink-500/50 transition">
139
+ <input type="text" id="insideSearchInput" placeholder="Find precise visual moment..." class="w-full bg-transparent px-4 py-2.5 text-xs text-zinc-200 outline-none placeholder-zinc-600">
140
+ <button id="insideSearchButton" onclick="executeInsideVideoSearch()" class="bg-zinc-800 hover:bg-zinc-700 px-4 text-xs font-bold text-zinc-300 transition tracking-wide">Find</button>
141
+ </div>
142
+ </div>
143
+
144
+ <div id="timelineMatches" class="space-y-2.5 overflow-y-auto flex-1 pr-1 custom-scrollbar">
145
+ <p class="text-xs text-zinc-500 text-center py-12 px-4 leading-relaxed">Type an explicit object or concept above to scan frame coordinates inside this clip.</p>
146
+ </div>
147
+ </div>
148
+ </div>
149
+ </div>
150
+
151
+ <div id="feedSection" class="space-y-6">
152
+ <h2 id="feedHeading" class="text-xl font-extrabold tracking-tight flex items-center space-x-2 text-zinc-200">
153
+ <span>Recommended Framework Feeds</span>
154
+ </h2>
155
+ <div id="videoGrid" class="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-8"></div>
156
+ </div>
157
+ </main>
158
+
159
+ <script src="https://cdn.jsdelivr.net/npm/video.js@8.10.0/dist/video.min.js"></script>
160
+ <script>
161
+ const API_BASE = "https://kishanamaliya-database.hf.space/api/v1/videos";;
162
+ let player = null;
163
+ let currentVideo = null;
164
+
165
+ document.addEventListener("DOMContentLoaded", () => {
166
+ loadHomepageFeed();
167
+
168
+ // Keyboard search listeners
169
+ document.getElementById('globalSearchInput').addEventListener('keydown', (e) => {
170
+ if (e.key === 'Enter') {
171
+ e.preventDefault();
172
+ executeGlobalPlatformSearch();
173
+ }
174
+ });
175
+
176
+ document.getElementById('insideSearchInput').addEventListener('keydown', (e) => {
177
+ if (e.key === 'Enter') {
178
+ e.preventDefault();
179
+ executeInsideVideoSearch();
180
+ }
181
+ });
182
+ });
183
+
184
+ function toggleUploadStudio() {
185
+ const studio = document.getElementById('studioSection');
186
+ studio.classList.toggle('hidden');
187
+ }
188
+
189
+ async function loadHomepageFeed() {
190
+ document.getElementById('theaterSection').classList.add('hidden');
191
+ document.getElementById('feedHeading').innerText = "Recommended Videos";
192
+ currentVideo = null;
193
+ if (player) player.pause();
194
+
195
+ try {
196
+ const res = await fetch(`${API_BASE}/`);
197
+ if (!res.ok) return;
198
+ const data = await res.json();
199
+ renderVideoGrid(data);
200
+ } catch (err) {
201
+ console.error("Feed extraction failure:", err);
202
+ }
203
+ }
204
+
205
+ function renderVideoGrid(videos) {
206
+ const grid = document.getElementById('videoGrid');
207
+ grid.innerHTML = "";
208
+
209
+ if (videos.length === 0) {
210
+ grid.innerHTML = `
211
+ <div class="col-span-full border border-dashed border-zinc-800 rounded-2xl py-16 text-center text-sm text-zinc-500">
212
+ No matching items discovered inside active cache indexes.
213
+ </div>`;
214
+ return;
215
+ }
216
+
217
+ videos.forEach(vid => {
218
+ const card = document.createElement('div');
219
+ card.className = "bg-zinc-900/40 rounded-2xl overflow-hidden border border-zinc-900 hover:border-zinc-800/80 transition duration-300 cursor-pointer group shadow-lg hover:-translate-y-1 hover:shadow-[0_12px_40px_rgba(0,0,0,0.5)] flex flex-col relative";
220
+ card.onclick = () => launchVideoTheater(vid, []);
221
+
222
+ card.innerHTML = `
223
+ <div class="aspect-video bg-zinc-950 w-full relative overflow-hidden">
224
+ <button onclick="event.stopPropagation(); executeVideoPurge('${vid.id}')"
225
+ class="absolute top-3 right-3 z-30 bg-black/60 hover:bg-red-600/90 border border-zinc-800 hover:border-red-500 text-zinc-400 hover:text-white w-8 h-8 rounded-xl flex items-center justify-center transition duration-200 opacity-0 group-hover:opacity-100 shadow-md backdrop-blur-md"
226
+ title="Purge Video Assets">
227
+ <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
228
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
229
+ </svg>
230
+ </button>
231
+
232
+ <div class="absolute inset-0 bg-gradient-to-t from-black/80 via-black/20 to-transparent opacity-60 group-hover:opacity-40 transition z-10"></div>
233
+ <div class="absolute inset-0 flex items-center justify-center z-20">
234
+ <div class="w-12 h-12 rounded-full bg-white/10 backdrop-blur-md border border-white/20 flex items-center justify-center text-white opacity-0 group-hover:opacity-100 group-hover:scale-110 transition duration-300">
235
+ <span class="text-xl ml-1">▶</span>
236
+ </div>
237
+ </div>
238
+ </div>
239
+ <div class="p-5 flex-1 flex flex-col justify-between space-y-2">
240
+ <div>
241
+ <h4 class="font-bold text-zinc-100 group-hover:text-purple-400 transition duration-200 line-clamp-2 leading-snug tracking-tight text-base">${vid.title || 'Untitled Video'}</h4>
242
+ <p class="text-xs text-zinc-400 mt-1.5 line-clamp-2 leading-relaxed font-normal">${vid.description || 'No description payload configured.'}</p>
243
+ </div>
244
+ </div>
245
+ `;
246
+ grid.appendChild(card);
247
+ });
248
+ }
249
+
250
+ // Natively handles the Multipart Binary Streaming Data ingestion Form
251
+ async function executeVideoUpload(event) {
252
+ event.preventDefault();
253
+
254
+ const fileInput = document.getElementById('uploadFile');
255
+ const titleInput = document.getElementById('uploadTitle');
256
+ const descInput = document.getElementById('uploadDescription');
257
+ const tagsInput = document.getElementById('uploadTags');
258
+ const submitBtn = document.getElementById('uploadSubmitButton');
259
+
260
+ if (!fileInput.files[0]) return;
261
+
262
+ const formData = new FormData();
263
+ formData.append("file", fileInput.files[0]);
264
+ formData.append("title", titleInput.value.trim());
265
+ formData.append("description", descInput.value.trim());
266
+ formData.append("tags", tagsInput.value.trim());
267
+
268
+ submitBtn.innerText = "Processing Neural Video Extraction (Running Celery Frame Pipeline)...";
269
+ submitBtn.disabled = true;
270
+
271
+ try {
272
+ const res = await fetch(`${API_BASE}/upload`, {
273
+ method: "POST",
274
+ body: formData
275
+ });
276
+
277
+ if (!res.ok) throw new Error("Backend reject pipeline format parameters.");
278
+
279
+ alert("Video successfully queued! The automated background task is now generating SigLIP 2 frame vectors. Refresh the home library in a few seconds.");
280
+ document.getElementById('uploadForm').reset();
281
+ toggleUploadStudio();
282
+ loadHomepageFeed();
283
+
284
+ } catch (err) {
285
+ console.error("Ingestion fault:", err);
286
+ alert("Processing failed. Verify server storage disk nodes and connection protocols.");
287
+ } finally {
288
+ submitBtn.innerText = "Deploy to Processing Cluster";
289
+ submitBtn.disabled = false;
290
+ }
291
+ }
292
+
293
+ // Connects seamlessly to the Cascading Erasure DELETE Endpoint
294
+ async function executeVideoPurge(videoId) {
295
+ if (!confirm("Are you absolute sure you want to permanently delete this video, erase its static transcoded disk assets, and clear its high-dimensional Qdrant records?")) {
296
+ return;
297
+ }
298
+
299
+ try {
300
+ const res = await fetch(`${API_BASE}/${videoId}`, {
301
+ method: "DELETE"
302
+ });
303
+
304
+ if (!res.ok) throw new Error("Delete requested aborted by database logic.");
305
+
306
+ alert("Video collection vectors and data successfully deleted from server.");
307
+ loadHomepageFeed(); // Instantly refresh layout feed matrix
308
+
309
+ } catch (err) {
310
+ console.error("Deletion fault:", err);
311
+ alert("Failed to wipe video assets cleanly from disk coordinates.");
312
+ }
313
+ }
314
+
315
+ function launchVideoTheater(video, matches) {
316
+ currentVideo = video;
317
+ document.getElementById('theaterSection').classList.remove('hidden');
318
+ document.getElementById('playerTitle').innerText = video.title;
319
+ document.getElementById('insideSearchInput').value = "";
320
+ window.scrollTo({ top: 0, behavior: 'smooth' });
321
+
322
+ setTimeout(() => {
323
+ if (!player) {
324
+ player = videojs('mainVideoPlayer', {
325
+ controls: true,
326
+ autoplay: true,
327
+ preload: 'auto',
328
+ fluid: true,
329
+ responsive: true
330
+ });
331
+ }
332
+
333
+ player.src({
334
+ src: `http://127.0.0.1:8000${video.hls_playlist_url}`,
335
+ type: 'application/x-mpegURL'
336
+ });
337
+
338
+ player.load();
339
+ player.play().catch(err => console.log("Awaiting activation interaction flag.", err));
340
+ }, 50);
341
+
342
+ renderSidebarMatches(matches);
343
+ }
344
+
345
+ function renderSidebarMatches(matches) {
346
+ const sidebar = document.getElementById('timelineMatches');
347
+ sidebar.innerHTML = "";
348
+
349
+ if (!matches || matches.length === 0) {
350
+ sidebar.innerHTML = `
351
+ <div class="py-12 px-4 text-center">
352
+ <p class="text-xs text-zinc-500 leading-relaxed">Regular streaming track. Use the search field above to cross-reference visual moments in real-time.</p>
353
+ </div>`;
354
+ return;
355
+ }
356
+
357
+ matches.forEach(match => {
358
+ const btn = document.createElement('div');
359
+ btn.className = "bg-zinc-950/60 hover:bg-zinc-900 p-3.5 rounded-xl border border-zinc-900/60 flex items-center justify-between cursor-pointer transition duration-200 group active:scale-[0.98]";
360
+ btn.onclick = () => player.currentTime(match.timestamp);
361
+
362
+ btn.innerHTML = `
363
+ <div class="flex flex-col space-y-0.5">
364
+ <span class="text-xs font-bold text-purple-400 group-hover:text-pink-400 transition group-hover:underline">Moment @ ${formatTime(match.timestamp)}</span>
365
+ <span class="text-[10px] text-zinc-500 font-medium">Metric Score: ${(match.score * 100).toFixed(1)}%</span>
366
+ </div>
367
+ <div class="w-7 h-7 rounded-lg bg-zinc-900 border border-zinc-800 group-hover:border-purple-500/40 flex items-center justify-center transition">
368
+ <span class="text-zinc-500 group-hover:text-purple-400 text-xs transform translate-x-[1px] transition">➔</span>
369
+ </div>
370
+ `;
371
+ sidebar.appendChild(btn);
372
+ });
373
+ }
374
+
375
+ async function executeGlobalPlatformSearch() {
376
+ const query = document.getElementById('globalSearchInput').value.trim();
377
+ if (!query) return;
378
+
379
+ const btn = document.getElementById('globalSearchButton');
380
+ btn.innerText = "Searching...";
381
+ document.getElementById('feedHeading').innerText = `Search Engine Matches: "${query}"`;
382
+
383
+ try {
384
+ const res = await fetch(`${API_BASE}/global-search?query=${encodeURIComponent(query)}&top_k=20`);
385
+ const data = await res.json();
386
+ renderVideoGrid(data);
387
+ } catch (err) {
388
+ console.error("Global core query failure:", err);
389
+ } finally {
390
+ btn.innerText = "Search Feed";
391
+ }
392
+ }
393
+
394
+ async function executeInsideVideoSearch() {
395
+ if (!currentVideo) return;
396
+ const query = document.getElementById('insideSearchInput').value.trim();
397
+ if (!query) return;
398
+
399
+ const btn = document.getElementById('insideSearchButton');
400
+ btn.innerText = "...";
401
+
402
+ try {
403
+ const res = await fetch(`${API_BASE}/search?query=${encodeURIComponent(query)}&top_k=20`);
404
+ const data = await res.json();
405
+
406
+ const activeVideoMatches = data.results
407
+ .filter(hit => hit.video_id === currentVideo.id)
408
+ .map(hit => ({
409
+ timestamp: hit.timestamp_seconds,
410
+ score: hit.score
411
+ }))
412
+ .sort((a, b) => a.timestamp - b.timestamp);
413
+
414
+ renderSidebarMatches(activeVideoMatches);
415
+ } catch (err) {
416
+ console.error("Timeline vector resolution fault:", err);
417
+ } finally {
418
+ btn.innerText = "Find";
419
+ }
420
+ }
421
+
422
+ function formatTime(secs) {
423
+ const m = Math.floor(secs / 60);
424
+ const s = Math.floor(secs % 60);
425
+ return `${m}:${s < 10 ? '0' : ''}${s}`;
426
+ }
427
+ </script>
428
+ </body>
429
+ </html>
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.110.0
2
+ uvicorn>=0.28.0
3
+ celery>=5.3.6
4
+ redis>=5.0.3
5
+ pydantic>=2.6.4
6
+ pydantic-settings>=2.2.1
7
+ qdrant-client>=1.8.0
8
+ ffmpeg-python>=0.2.0
9
+ torch>=2.6.0
10
+ transformers>=4.40.0
11
+ pillow>=10.2.0
12
+ python-multipart>=0.0.9
start.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Start the Celery background worker process concurrently
3
+ celery -A app.api.v1.videos.celery_client worker --loglevel=info --concurrency=1 &
4
+
5
+ # Start the primary FastAPI gateway app on port 7860 (Hugging Face default)
6
+ uvicorn app.main:app --host 0.0.0.0 --port 7860
workers/__init__.py ADDED
File without changes
workers/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (162 Bytes). View file
 
workers/__pycache__/celery_app.cpython-313.pyc ADDED
Binary file (663 Bytes). View file
 
workers/__pycache__/ml_pipeline.cpython-313.pyc ADDED
Binary file (4.65 kB). View file
 
workers/__pycache__/tasks.cpython-313.pyc ADDED
Binary file (3.51 kB). View file
 
workers/celery_app.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from celery import Celery
2
+ from app.core.config import settings
3
+
4
+ celery_app = Celery(
5
+ "video_tasks",
6
+ broker=settings.REDIS_URL,
7
+ backend=settings.REDIS_URL,
8
+ include=["workers.tasks"]
9
+ )
10
+
11
+ # Enterprise task processing configurations
12
+ celery_app.conf.update(
13
+ task_serializer="json",
14
+ accept_content=["json"],
15
+ result_serializer="json",
16
+ timezone="UTC",
17
+ enable_utc=True,
18
+ worker_concurrency=1 # Recommended as 1 if running heavy ViT models locally to prevent memory thrashing
19
+ )
workers/ml_pipeline.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from PIL import Image
3
+ from transformers import AutoProcessor, AutoModel
4
+ from typing import List
5
+ from app.core.config import settings
6
+
7
+ class Siglip2EmbeddingPipeline:
8
+ """
9
+ High-performance Machine Learning inference pipeline for SigLIP 2 (1152-D).
10
+ Optimized dynamically for both CUDA environments and Apple Silicon Neural Cores.
11
+ """
12
+ def __init__(self) -> None:
13
+ if torch.cuda.is_available():
14
+ self.device = "cuda"
15
+ elif torch.backends.mps.is_available():
16
+ self.device = "mps"
17
+ else:
18
+ self.device = "cpu"
19
+
20
+ self.model_id = settings.SIGLIP_MODEL_ID
21
+
22
+ print(f"[AI ENGINE] Booting SigLIP 2 SO400M on device accelerator: {self.device}...")
23
+ self.processor = AutoProcessor.from_pretrained(self.model_id)
24
+ self.model = AutoModel.from_pretrained(self.model_id).to(self.device)
25
+ self.model.eval()
26
+ print("[AI ENGINE] Multimodal network parameters successfully mapped and frozen.")
27
+
28
+ def get_text_embedding(self, text: str) -> List[float]:
29
+ """Maps raw user text queries down into the shared 1152-D spatial map."""
30
+ with torch.no_grad():
31
+ inputs = self.processor(text=[text], padding="max_length", return_tensors="pt").to(self.device)
32
+ outputs = self.model.get_text_features(**inputs)
33
+
34
+ # --- LLD Safe Extraction Guard ---
35
+ # If the response is wrapped inside BaseModelOutputWithPooling, extract the core pooler tensor
36
+ text_features = outputs.pooler_output if hasattr(outputs, "pooler_output") else outputs
37
+
38
+ # Perform explicit L2 normalization to enable accurate Cosine Distance math inside Qdrant
39
+ text_features = text_features / text_features.norm(dim=-1, keepdim=True)
40
+ return text_features.squeeze(0).cpu().tolist()
41
+
42
+ def get_image_batch_embeddings(self, image_paths: List[str]) -> List[List[float]]:
43
+ """Extracts dense visual embeddings across structural frame lists concurrently."""
44
+ images = []
45
+ for path in image_paths:
46
+ try:
47
+ images.append(Image.open(path).convert("RGB"))
48
+ except Exception as e:
49
+ print(f"[AI ENGINE] Error reading frame asset {path}: {str(e)}")
50
+
51
+ if not images:
52
+ return []
53
+
54
+ with torch.no_grad():
55
+ inputs = self.processor(images=images, return_tensors="pt").to(self.device)
56
+ outputs = self.model.get_image_features(**inputs)
57
+
58
+ # --- LLD Safe Extraction Guard ---
59
+ # Safely unpack the raw frame feature matrix tensor from the container wrapper
60
+ image_features = outputs.pooler_output if hasattr(outputs, "pooler_output") else outputs
61
+
62
+ image_features = image_features / image_features.norm(dim=-1, keepdim=True)
63
+ return image_features.cpu().tolist()
workers/tasks.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from workers.celery_app import celery_app
3
+ from workers.ml_pipeline import Siglip2EmbeddingPipeline
4
+ from app.LLD.ffmpeg_strategy import LocalFFmpegStrategy
5
+ from app.LLD.qdrant_strategy import QdrantVectorStoreStrategy
6
+ from app.core.config import settings
7
+
8
+ # Global placeholders for safe, lazy runtime evaluation
9
+ ffmpeg_strategy = None
10
+ vector_store = None
11
+ ai_engine_pipeline = None
12
+
13
+ def get_ffmpeg_strategy():
14
+ global ffmpeg_strategy
15
+ if ffmpeg_strategy is None:
16
+ ffmpeg_strategy = LocalFFmpegStrategy()
17
+ return ffmpeg_strategy
18
+
19
+ def get_vector_store():
20
+ global vector_store
21
+ if vector_store is None:
22
+ vector_store = QdrantVectorStoreStrategy()
23
+ return vector_store
24
+
25
+ def load_ai_engine():
26
+ global ai_engine_pipeline
27
+ if ai_engine_pipeline is None:
28
+ ai_engine_pipeline = Siglip2EmbeddingPipeline()
29
+ return ai_engine_pipeline
30
+
31
+
32
+ @celery_app.task(name="workers.tasks.process_video_pipeline")
33
+ def process_video_pipeline(video_id: str, raw_file_path: str) -> bool:
34
+ print(f"[WORKER CHOREOGRAPHER] Commencing processing pipeline layout for ID: {video_id}")
35
+ output_dir = os.path.join(settings.OUTPUT_DIR, video_id)
36
+
37
+ # Instantiate strategies inside the execution block instead of the import layer
38
+ ffmpeg_engine = get_ffmpeg_strategy()
39
+ db_vector_store = get_vector_store()
40
+ ai_model = load_ai_engine()
41
+
42
+ try:
43
+ # Step 1: HLS transcoding
44
+ playlist_path = ffmpeg_engine.transcode_to_hls(raw_file_path, output_dir)
45
+ print(f"[WORKER] Transcoding complete: {playlist_path}")
46
+
47
+ # Step 2: Keyframe extraction
48
+ frames_metadata = ffmpeg_engine.extract_keyframes(raw_file_path, output_dir, interval_seconds=1)
49
+ print(f"[WORKER] Extracted {len(frames_metadata)} frames.")
50
+
51
+ if not frames_metadata:
52
+ return False
53
+
54
+ # Step 3: SigLIP 2 Batch Matrix Encoding
55
+ frame_paths = [item["file_path"] for item in frames_metadata]
56
+ batch_size = 16
57
+ all_computed_vectors = []
58
+
59
+ for i in range(0, len(frame_paths), batch_size):
60
+ chunk_paths = frame_paths[i:i + batch_size]
61
+ chunk_vectors = ai_model.get_image_batch_embeddings(chunk_paths)
62
+ all_computed_vectors.extend(chunk_vectors)
63
+
64
+ # Step 4: Sync to Qdrant Space
65
+ return db_vector_store.upsert_embeddings(
66
+ video_id=video_id,
67
+ embeddings=all_computed_vectors,
68
+ metadata=frames_metadata
69
+ )
70
+
71
+ except Exception as e:
72
+ print(f"[WORKER CRITICAL SHUTDOWN] Ingestion routine dropped: {str(e)}")
73
+ return False
74
+
75
+
76
+ @celery_app.task(name="workers.tasks.generate_text_embedding")
77
+ def generate_text_embedding(query_text: str) -> list[float]:
78
+ # Dynamic instantiation on user search trigger call
79
+ ai_model = load_ai_engine()
80
+ return ai_model.get_text_embedding(query_text)