Utkarshres32 commited on
Commit
2758540
·
1 Parent(s): e2b09b1

Deploy Sentinelai API backend

Browse files
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ .env*
4
+ __pycache__/
5
+ *.pyc
6
+ .venv/
7
+ venv/
8
+ env/
Dockerfile ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.10-slim
3
+
4
+ # Set environment variables for Python
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ # Hugging Face Spaces uses port 7860 by default
9
+ ENV PORT=7860
10
+
11
+ # Install system dependencies required for OpenCV and AI models
12
+ RUN apt-get update && apt-get install -y \
13
+ libgl1-mesa-glx \
14
+ libglib2.0-0 \
15
+ libsm6 \
16
+ libxext6 \
17
+ libxrender-dev \
18
+ git \
19
+ && rm -rf /var/lib/apt/lists/*
20
+
21
+ # Set the working directory in the container
22
+ WORKDIR /app
23
+
24
+ # Copy the requirements file into the container
25
+ COPY requirements.txt .
26
+
27
+ # Install Python dependencies
28
+ # We specifically install the CPU-only version of PyTorch to save space and avoid GPU driver issues on the free tier
29
+ RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
30
+ RUN pip install --no-cache-dir -r requirements.txt
31
+
32
+ # Copy the current directory contents into the container at /app
33
+ COPY . .
34
+
35
+ # Create directories for static files and databases
36
+ RUN mkdir -p static/thumbnails static/anomalies database/data
37
+ RUN chmod -R 777 static database
38
+
39
+ # Expose the port Hugging Face expects
40
+ EXPOSE 7860
41
+
42
+ # Command to run the FastAPI server
43
+ CMD uvicorn app:app --host 0.0.0.0 --port $PORT
app.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py - Main FastAPI Application Entry Point
3
+ Multimodal Surveillance Intelligence System
4
+ Run with: uvicorn app:app --host 0.0.0.0 --port 8000 --reload
5
+ """
6
+ import asyncio
7
+ import time
8
+ from contextlib import asynccontextmanager
9
+
10
+ from fastapi import FastAPI, Request
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+ from fastapi.responses import JSONResponse
13
+ from fastapi.staticfiles import StaticFiles
14
+ from loguru import logger
15
+ import psutil
16
+ import os
17
+
18
+ from config import settings, DEVICE
19
+
20
+ # ── Global Singletons (initialized on startup) ────────────────────────────────
21
+ vision_pipeline = None
22
+ search_engine = None
23
+ qa_system = None
24
+ report_generator = None
25
+ summarizer = None
26
+ movement_graph = None
27
+ audio_asr = None
28
+ audio_classifier = None
29
+
30
+
31
+ @asynccontextmanager
32
+ async def lifespan(app: FastAPI):
33
+ """Application startup/shutdown lifecycle."""
34
+ global vision_pipeline, search_engine, qa_system, report_generator
35
+ global summarizer, movement_graph, audio_asr, audio_classifier
36
+
37
+ logger.info("=" * 60)
38
+ logger.info(f"🚀 Starting {settings.APP_NAME} v{settings.APP_VERSION}")
39
+ logger.info(f" Device: {DEVICE}")
40
+ logger.info("=" * 60)
41
+
42
+ # 1. Initialize database tables
43
+ logger.info("📦 Initializing database...")
44
+ from database.session import create_tables
45
+ await create_tables()
46
+
47
+ # 2. Load Vision Pipeline
48
+ logger.info("🎥 Loading Vision Pipeline...")
49
+ from vision.pipeline import VisionPipeline
50
+ vision_pipeline = VisionPipeline()
51
+
52
+ # 3. Load NLP Components
53
+ logger.info("💬 Loading NLP: Semantic Search...")
54
+ from nlp.search import SemanticSearchEngine
55
+ search_engine = SemanticSearchEngine()
56
+
57
+ logger.info("❓ Loading NLP: QA System...")
58
+ from nlp.qa import SurveillanceQA
59
+ qa_system = SurveillanceQA()
60
+
61
+ logger.info("📝 Loading NLP: Report Generator...")
62
+ from nlp.report import IncidentReportGenerator
63
+ report_generator = IncidentReportGenerator()
64
+
65
+ logger.info("📋 Loading NLP: Summarizer...")
66
+ from nlp.summarizer import SurveillanceSummarizer
67
+ summarizer = SurveillanceSummarizer()
68
+
69
+ # 4. Load Graph Module
70
+ logger.info("🕸️ Initializing Movement Graph...")
71
+ from graph.movement_graph import MovementGraph
72
+ movement_graph = MovementGraph()
73
+
74
+ # 5. Load Audio (optional)
75
+ if settings.ENABLE_AUDIO:
76
+ logger.info("🎙️ Loading Audio Module...")
77
+ from audio.audio_module import WhisperASR, AudioClassifier
78
+ audio_asr = WhisperASR()
79
+ audio_classifier = AudioClassifier()
80
+
81
+ logger.info("✅ All components loaded successfully!")
82
+ logger.info(f"📊 Memory usage: {psutil.Process().memory_info().rss / 1e6:.1f} MB")
83
+
84
+ yield # App is running
85
+
86
+ # Shutdown
87
+ logger.info("🛑 Shutting down Surveillance System...")
88
+ from vision.stream_manager import stream_manager
89
+ stream_manager.shutdown()
90
+
91
+
92
+ # ── FastAPI App ────────────────────────────────────────────────────────────────
93
+
94
+ app = FastAPI(
95
+ title=settings.APP_NAME,
96
+ version=settings.APP_VERSION,
97
+ description="""
98
+ **Multimodal Surveillance Intelligence System**
99
+
100
+ Capabilities:
101
+ - 🎥 Real-time multi-camera video processing
102
+ - 👤 Person detection (DETR) + Multi-object tracking (ByteTrack)
103
+ - 🔍 Cross-camera Re-Identification (ViT + FAISS)
104
+ - 👗 Clothing/attribute recognition (CLIP zero-shot)
105
+ - 💬 Semantic search over surveillance logs
106
+ - ❓ Natural language Q&A over events
107
+ - 📝 Automated incident report generation
108
+ - 🕸️ Movement graph anomaly detection
109
+ """,
110
+ lifespan=lifespan,
111
+ docs_url="/docs",
112
+ redoc_url="/redoc",
113
+ )
114
+
115
+ # ── CORS ───────────────────────────────────────────────────────────────────────
116
+
117
+ app.add_middleware(
118
+ CORSMiddleware,
119
+ allow_origins=settings.CORS_ORIGINS + ["*"],
120
+ allow_credentials=True,
121
+ allow_methods=["*"],
122
+ allow_headers=["*"],
123
+ )
124
+
125
+ # ── Request Latency Logging Middleware ─────────────────────────────────────────
126
+
127
+ @app.middleware("http")
128
+ async def log_requests(request: Request, call_next):
129
+ t0 = time.perf_counter()
130
+ response = await call_next(request)
131
+ latency = (time.perf_counter() - t0) * 1000
132
+ logger.debug(f"{request.method} {request.url.path} → {response.status_code} ({latency:.1f}ms)")
133
+ response.headers["X-Process-Time-Ms"] = f"{latency:.2f}"
134
+ return response
135
+
136
+ # ── Register Routers ────────────────────────────────────────────────────────��──
137
+
138
+ from routes.vision_routes import router as vision_router
139
+ from routes.nlp_routes import router as nlp_router
140
+ from routes.stream_routes import router as stream_router
141
+
142
+ app.include_router(vision_router)
143
+ app.include_router(nlp_router)
144
+ app.include_router(stream_router)
145
+
146
+ # ── Static Files ───────────────────────────────────────────────────────────────
147
+
148
+ # Create static directory if it doesn't exist
149
+ os.makedirs("static/thumbnails", exist_ok=True)
150
+ app.mount("/static", StaticFiles(directory="static"), name="static")
151
+
152
+
153
+ # ── Health & Status Routes ─────────────────────────────────────────────────────
154
+
155
+ @app.get("/", tags=["Health"])
156
+ async def root():
157
+ return {
158
+ "system": settings.APP_NAME,
159
+ "version": settings.APP_VERSION,
160
+ "status": "operational",
161
+ "device": str(DEVICE),
162
+ "docs": "/docs",
163
+ }
164
+
165
+
166
+ @app.get("/health", tags=["Health"])
167
+ async def health_check():
168
+ mem = psutil.Process().memory_info().rss / 1e6
169
+ cpu = psutil.cpu_percent(interval=0.1)
170
+ import torch
171
+ gpu_info = {}
172
+ if torch.cuda.is_available():
173
+ gpu_info = {
174
+ "name": torch.cuda.get_device_name(0),
175
+ "memory_allocated_mb": round(torch.cuda.memory_allocated(0) / 1e6, 1),
176
+ "memory_reserved_mb": round(torch.cuda.memory_reserved(0) / 1e6, 1),
177
+ }
178
+ return {
179
+ "status": "healthy",
180
+ "device": str(DEVICE),
181
+ "memory_mb": round(mem, 1),
182
+ "cpu_percent": cpu,
183
+ "gpu": gpu_info,
184
+ "models_loaded": {
185
+ "vision_pipeline": vision_pipeline is not None,
186
+ "search_engine": search_engine is not None,
187
+ "qa_system": qa_system is not None,
188
+ "report_generator": report_generator is not None,
189
+ "summarizer": summarizer is not None,
190
+ "movement_graph": movement_graph is not None,
191
+ "audio": audio_asr is not None,
192
+ },
193
+ }
194
+
195
+
196
+ @app.get("/metrics", tags=["Health"])
197
+ async def prometheus_metrics():
198
+ """Basic Prometheus-style metrics."""
199
+ from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
200
+ from starlette.responses import Response
201
+ return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
202
+
203
+
204
+ # ── Entry point ────────────────────────────────────────────────────────────────
205
+
206
+ if __name__ == "__main__":
207
+ import uvicorn
208
+ uvicorn.run(
209
+ "app:app",
210
+ host=settings.HOST,
211
+ port=settings.PORT,
212
+ reload=settings.DEBUG,
213
+ workers=1, # 1 worker required for shared model singletons
214
+ log_level="info",
215
+ )
audio/__init__.py ADDED
File without changes
audio/audio_module.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ audio/asr.py - Automatic Speech Recognition using openai/whisper-small
3
+ audio/classifier.py - Audio classification using wav2vec2
4
+ audio/tts.py - Text-to-Speech using microsoft/speecht5_tts
5
+ """
6
+ import time
7
+ import torch
8
+ import numpy as np
9
+ from typing import Dict, Optional, Union
10
+ from loguru import logger
11
+ from config import settings, DEVICE
12
+
13
+
14
+ # ═══════════════════════════════════════════════════════════════════════════
15
+ # ASR - Automatic Speech Recognition
16
+ # ═══════════════════════════════════════════════════════════════════════════
17
+
18
+ class WhisperASR:
19
+ """Transcribes audio using openai/whisper-small."""
20
+
21
+ def __init__(self):
22
+ if not settings.ENABLE_AUDIO:
23
+ logger.info("Audio module disabled. Set ENABLE_AUDIO=true to activate.")
24
+ self._ready = False
25
+ return
26
+ logger.info(f"Loading Whisper ASR model: {settings.WHISPER_MODEL}")
27
+ try:
28
+ import whisper
29
+ model_name = settings.WHISPER_MODEL.split("/")[-1] # "whisper-small" → "small"
30
+ self.model = whisper.load_model(model_name, device=str(DEVICE))
31
+ self._ready = True
32
+ logger.info("✅ WhisperASR ready.")
33
+ except ImportError:
34
+ logger.warning("openai-whisper not installed. ASR unavailable.")
35
+ self._ready = False
36
+
37
+ def transcribe(self, audio_path: str, language: Optional[str] = None) -> Dict:
38
+ if not self._ready:
39
+ return {"text": "", "error": "ASR not available"}
40
+ t0 = time.perf_counter()
41
+ opts = {}
42
+ if language:
43
+ opts["language"] = language
44
+ result = self.model.transcribe(audio_path, **opts)
45
+ latency_ms = (time.perf_counter() - t0) * 1000
46
+ return {
47
+ "text": result["text"],
48
+ "language": result.get("language", "unknown"),
49
+ "segments": result.get("segments", []),
50
+ "latency_ms": round(latency_ms, 2),
51
+ }
52
+
53
+ def transcribe_bytes(self, audio_bytes: bytes, sample_rate: int = 16000) -> Dict:
54
+ import tempfile, soundfile as sf, os
55
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
56
+ tmp_path = f.name
57
+ try:
58
+ audio_array = np.frombuffer(audio_bytes, dtype=np.float32)
59
+ sf.write(tmp_path, audio_array, sample_rate)
60
+ return self.transcribe(tmp_path)
61
+ finally:
62
+ os.unlink(tmp_path)
63
+
64
+
65
+ # ═══════════════════════════════════════════════════════════════════════════
66
+ # Audio Classifier
67
+ # ═══════════════════════════════════════════════════════════════════════════
68
+
69
+ class AudioClassifier:
70
+ """Classifies audio events (gunshot, scream, etc.) using wav2vec2."""
71
+
72
+ KEYWORDS = ["yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go"]
73
+
74
+ def __init__(self):
75
+ if not settings.ENABLE_AUDIO:
76
+ self._ready = False
77
+ return
78
+ logger.info(f"Loading audio classifier: {settings.AUDIO_CLASS_MODEL}")
79
+ try:
80
+ from transformers import pipeline
81
+ device_id = 0 if str(DEVICE) == "cuda" else -1
82
+ self.classifier = pipeline(
83
+ "audio-classification",
84
+ model=settings.AUDIO_CLASS_MODEL,
85
+ device=device_id,
86
+ )
87
+ self._ready = True
88
+ logger.info("✅ AudioClassifier ready.")
89
+ except Exception as e:
90
+ logger.warning(f"AudioClassifier init failed: {e}")
91
+ self._ready = False
92
+
93
+ def classify(self, audio_path: str, top_k: int = 5) -> Dict:
94
+ if not self._ready:
95
+ return {"classes": [], "error": "Audio classifier not available"}
96
+ t0 = time.perf_counter()
97
+ results = self.classifier(audio_path, top_k=top_k)
98
+ latency_ms = (time.perf_counter() - t0) * 1000
99
+ return {
100
+ "classes": [{"label": r["label"], "score": round(r["score"], 4)} for r in results],
101
+ "latency_ms": round(latency_ms, 2),
102
+ }
103
+
104
+
105
+ # ═══════════════════════════════════════════════════════════════════════════
106
+ # TTS - Text to Speech
107
+ # ══════════════════════════════════════════════════════════��════════════════
108
+
109
+ class SpeechSynthesizer:
110
+ """Generates speech from text using microsoft/speecht5_tts."""
111
+
112
+ def __init__(self):
113
+ if not settings.ENABLE_AUDIO:
114
+ self._ready = False
115
+ return
116
+ logger.info(f"Loading TTS model: {settings.TTS_MODEL}")
117
+ try:
118
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
119
+ from datasets import load_dataset
120
+ self.processor = SpeechT5Processor.from_pretrained(settings.TTS_MODEL)
121
+ self.model = SpeechT5ForTextToSpeech.from_pretrained(settings.TTS_MODEL).to(DEVICE)
122
+ self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(DEVICE)
123
+ # Load speaker embeddings
124
+ ds = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
125
+ self.speaker_embeddings = torch.tensor(ds[7306]["xvector"]).unsqueeze(0).to(DEVICE)
126
+ self._ready = True
127
+ logger.info("✅ SpeechSynthesizer ready.")
128
+ except Exception as e:
129
+ logger.warning(f"TTS init failed: {e}")
130
+ self._ready = False
131
+
132
+ def synthesize(self, text: str) -> Optional[np.ndarray]:
133
+ """Synthesize text to audio. Returns numpy array (float32) or None."""
134
+ if not self._ready:
135
+ return None
136
+ inputs = self.processor(text=text, return_tensors="pt").to(DEVICE)
137
+ with torch.inference_mode():
138
+ speech = self.model.generate_speech(
139
+ inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder
140
+ )
141
+ return speech.cpu().numpy()
config.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ config.py - Central configuration for Multimodal Surveillance Intelligence System
3
+ """
4
+ import os
5
+ import torch
6
+ from pathlib import Path
7
+ from pydantic_settings import BaseSettings
8
+ from pydantic import Field
9
+ from loguru import logger
10
+
11
+
12
+ class Settings(BaseSettings):
13
+ # ── App ───────────────────────────────────────────────
14
+ APP_NAME: str = "Multimodal Surveillance Intelligence System"
15
+ APP_VERSION: str = "1.0.0"
16
+ DEBUG: bool = False
17
+ HOST: str = "0.0.0.0"
18
+ PORT: int = 8000
19
+
20
+ # ── Database (NeonDB PostgreSQL) ───────────────────────
21
+ DATABASE_URL: str = Field(
22
+ default="postgresql+asyncpg://user:password@hostname/dbname?ssl=require",
23
+ description="Database connection URL"
24
+ )
25
+ DB_ECHO: bool = False
26
+
27
+ # ── Security ───────────────────────────────────────────
28
+ SECRET_KEY: str = Field(default="change-in-production-super-secret-key", env="SECRET_KEY")
29
+ API_KEY: str = Field(default="surveillance-api-key-2024", env="API_KEY")
30
+
31
+ # ── Model Paths / Cache ───────────────────────────────
32
+ MODEL_CACHE_DIR: str = Field(default="./model_cache", env="HF_HOME")
33
+
34
+ # ── Vision Models ─────────────────────────────────────
35
+ DETECTION_MODEL: str = "facebook/detr-resnet-50"
36
+ REID_MODEL: str = "google/vit-base-patch16-224"
37
+ CLIP_MODEL: str = "openai/clip-vit-base-patch32"
38
+ DETECTION_CONFIDENCE: float = 0.7
39
+ DETECTION_BATCH_SIZE: int = 4
40
+
41
+ # ── NLP Models ────────────────────────────────────────
42
+ SEMANTIC_SEARCH_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
43
+ QA_MODEL: str = "deepset/roberta-base-squad2"
44
+ REPORT_MODEL: str = "google/flan-t5-base"
45
+ SUMMARIZER_MODEL: str = "facebook/bart-large-cnn"
46
+
47
+ # ── Audio Models (Optional) ───────────────────────────
48
+ WHISPER_MODEL: str = "openai/whisper-small"
49
+ AUDIO_CLASS_MODEL: str = "superb/wav2vec2-base-superb-ks"
50
+ TTS_MODEL: str = "microsoft/speecht5_tts"
51
+ ENABLE_AUDIO: bool = False
52
+
53
+ # ── FAISS ─────────────────────────────────────────────
54
+ FAISS_INDEX_PATH: str = "./faiss_indexes"
55
+ REID_EMBEDDING_DIM: int = 768 # ViT-base hidden size
56
+ CLIP_EMBEDDING_DIM: int = 512 # CLIP embedding size
57
+ SEARCH_EMBEDDING_DIM: int = 384 # MiniLM embedding size
58
+ FAISS_NPROBE: int = 10
59
+
60
+ # ── Tracking ──────────────────────────────────────────
61
+ TRACKER_TYPE: str = "bytetrack" # "bytetrack" or "deepsort"
62
+ TRACK_THRESH: float = 0.5
63
+ TRACK_BUFFER: int = 30
64
+ MATCH_THRESH: float = 0.8
65
+
66
+ # ── WebSocket / Cameras ───────────────────────────────
67
+ MAX_CAMERAS: int = 16
68
+ FRAME_QUEUE_SIZE: int = 30
69
+ FPS_TARGET: int = 30
70
+
71
+ # ── Anomaly Detection ─────────────────────────────────
72
+ ANOMALY_THRESHOLD: float = 0.75
73
+
74
+ # ── CORS ──────────────────────────────────────────────
75
+ CORS_ORIGINS: list = ["http://localhost:3000", "http://127.0.0.1:3000"]
76
+
77
+ class Config:
78
+ env_file = ".env"
79
+ env_file_encoding = "utf-8"
80
+ case_sensitive = False
81
+
82
+
83
+ settings = Settings()
84
+
85
+ # ── Device Detection ──────────────────────────────────────────
86
+ def get_device() -> torch.device:
87
+ if torch.cuda.is_available():
88
+ device = torch.device("cuda")
89
+ logger.info(f"🚀 GPU detected: {torch.cuda.get_device_name(0)} | VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
90
+ else:
91
+ device = torch.device("cpu")
92
+ logger.warning("⚠️ No GPU detected — running on CPU. Performance may be degraded.")
93
+ return device
94
+
95
+
96
+ DEVICE = get_device()
97
+
98
+ # ── Paths ─────────────────────────────────────────────────────
99
+ BASE_DIR = Path(__file__).resolve().parent
100
+ FAISS_DIR = BASE_DIR / settings.FAISS_INDEX_PATH
101
+ MODEL_CACHE = BASE_DIR / settings.MODEL_CACHE_DIR
102
+ FAISS_DIR.mkdir(parents=True, exist_ok=True)
103
+ MODEL_CACHE.mkdir(parents=True, exist_ok=True)
104
+
105
+ # Set HuggingFace cache
106
+ os.environ["HF_HOME"] = str(MODEL_CACHE)
107
+ os.environ["TRANSFORMERS_CACHE"] = str(MODEL_CACHE)
database/__init__.py ADDED
File without changes
database/crud.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ database/crud.py - CRUD operations for Persons, Events, and Incident Reports
3
+ """
4
+ import uuid
5
+ from datetime import datetime
6
+ from typing import List, Optional, Dict, Any
7
+ from sqlalchemy.ext.asyncio import AsyncSession
8
+ from sqlalchemy import select, update, and_, desc
9
+ from database.models import Person, Event, IncidentReport, ActivityType
10
+ from loguru import logger
11
+
12
+
13
+ # ─── Person Operations ────────────────────────────────────────────────────────
14
+
15
+ async def create_person(
16
+ db: AsyncSession,
17
+ faiss_id: Optional[int] = None,
18
+ attributes: Optional[Dict] = None,
19
+ thumbnail_path: Optional[str] = None,
20
+ ) -> Person:
21
+ person = Person(
22
+ faiss_id=faiss_id,
23
+ attributes=attributes or {},
24
+ thumbnail_path=thumbnail_path,
25
+ track_ids=[],
26
+ )
27
+ db.add(person)
28
+ await db.flush()
29
+ logger.info(f"Created person: {person.id}")
30
+ return person
31
+
32
+
33
+ async def get_person(db: AsyncSession, person_id: uuid.UUID) -> Optional[Person]:
34
+ result = await db.execute(select(Person).where(Person.id == person_id))
35
+ return result.scalar_one_or_none()
36
+
37
+
38
+ async def get_all_persons(db: AsyncSession, limit: int = 100, offset: int = 0) -> List[Person]:
39
+ result = await db.execute(
40
+ select(Person).order_by(desc(Person.last_seen)).limit(limit).offset(offset)
41
+ )
42
+ return result.scalars().all()
43
+
44
+
45
+ async def update_person_last_seen(db: AsyncSession, person_id: uuid.UUID) -> None:
46
+ await db.execute(
47
+ update(Person).where(Person.id == person_id).values(last_seen=datetime.utcnow())
48
+ )
49
+
50
+
51
+ async def update_person_faiss_id(db: AsyncSession, person_id: uuid.UUID, faiss_id: int) -> None:
52
+ await db.execute(
53
+ update(Person).where(Person.id == person_id).values(faiss_id=faiss_id)
54
+ )
55
+
56
+
57
+ async def update_person_attributes(db: AsyncSession, person_id: uuid.UUID, attributes: Dict) -> None:
58
+ await db.execute(
59
+ update(Person).where(Person.id == person_id).values(attributes=attributes)
60
+ )
61
+
62
+
63
+ # ─── Event Operations ─────────────────────────────────────────────────────────
64
+
65
+ async def create_event(
66
+ db: AsyncSession,
67
+ person_id: uuid.UUID,
68
+ camera_id: str,
69
+ activity_type: ActivityType = ActivityType.DETECTED,
70
+ bounding_box: Optional[Dict] = None,
71
+ confidence: Optional[float] = None,
72
+ track_id: Optional[int] = None,
73
+ location_zone: Optional[str] = None,
74
+ anomaly_score: float = 0.0,
75
+ description: Optional[str] = None,
76
+ raw_metadata: Optional[Dict] = None,
77
+ ) -> Event:
78
+ event = Event(
79
+ person_id=person_id,
80
+ camera_id=camera_id,
81
+ activity_type=activity_type,
82
+ bounding_box=bounding_box,
83
+ confidence=confidence,
84
+ track_id=track_id,
85
+ location_zone=location_zone,
86
+ anomaly_score=anomaly_score,
87
+ description=description,
88
+ raw_metadata=raw_metadata or {},
89
+ )
90
+ db.add(event)
91
+ await db.flush()
92
+ return event
93
+
94
+
95
+ async def get_events_for_person(
96
+ db: AsyncSession,
97
+ person_id: uuid.UUID,
98
+ limit: int = 50,
99
+ ) -> List[Event]:
100
+ result = await db.execute(
101
+ select(Event)
102
+ .where(Event.person_id == person_id)
103
+ .order_by(desc(Event.timestamp))
104
+ .limit(limit)
105
+ )
106
+ return result.scalars().all()
107
+
108
+
109
+ async def get_recent_events(
110
+ db: AsyncSession,
111
+ camera_id: Optional[str] = None,
112
+ limit: int = 100,
113
+ ) -> List[Event]:
114
+ query = select(Event).order_by(desc(Event.timestamp)).limit(limit)
115
+ if camera_id:
116
+ query = query.where(Event.camera_id == camera_id)
117
+ result = await db.execute(query)
118
+ return result.scalars().all()
119
+
120
+
121
+ async def get_anomaly_events(db: AsyncSession, threshold: float = 0.75, limit: int = 50) -> List[Event]:
122
+ result = await db.execute(
123
+ select(Event)
124
+ .where(Event.anomaly_score >= threshold)
125
+ .order_by(desc(Event.timestamp))
126
+ .limit(limit)
127
+ )
128
+ return result.scalars().all()
129
+
130
+
131
+ # ─── Incident Report Operations ───────────────────────────────────────────────
132
+
133
+ async def create_incident_report(
134
+ db: AsyncSession,
135
+ person_id: Optional[uuid.UUID],
136
+ report_text: str,
137
+ summary: Optional[str] = None,
138
+ severity: str = "medium",
139
+ camera_ids: Optional[List[str]] = None,
140
+ ) -> IncidentReport:
141
+ report = IncidentReport(
142
+ person_id=person_id,
143
+ report_text=report_text,
144
+ summary=summary,
145
+ severity=severity,
146
+ camera_ids=camera_ids or [],
147
+ )
148
+ db.add(report)
149
+ await db.flush()
150
+ return report
151
+
152
+
153
+ async def get_report(db: AsyncSession, report_id: uuid.UUID) -> Optional[IncidentReport]:
154
+ result = await db.execute(select(IncidentReport).where(IncidentReport.report_id == report_id))
155
+ return result.scalar_one_or_none()
156
+
157
+
158
+ async def get_reports_for_person(db: AsyncSession, person_id: uuid.UUID) -> List[IncidentReport]:
159
+ result = await db.execute(
160
+ select(IncidentReport)
161
+ .where(IncidentReport.person_id == person_id)
162
+ .order_by(desc(IncidentReport.generated_at))
163
+ )
164
+ return result.scalars().all()
database/models.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ database/models.py - SQLAlchemy ORM models for Surveillance System
3
+ """
4
+ import uuid
5
+ from datetime import datetime
6
+ from sqlalchemy import Column, String, DateTime, Float, Text, Integer, ForeignKey, JSON, Enum
7
+ from sqlalchemy.dialects.postgresql import UUID
8
+ from sqlalchemy.orm import relationship
9
+ from database.session import Base
10
+ import enum
11
+
12
+
13
+ class ActivityType(str, enum.Enum):
14
+ DETECTED = "detected"
15
+ TRACKED = "tracked"
16
+ REID_MATCH = "reid_match"
17
+ ANOMALY = "anomaly"
18
+ LOITERING = "loitering"
19
+ RUNNING = "running"
20
+ FIGHTING = "fighting"
21
+ TRESPASSING = "trespassing"
22
+
23
+
24
+ class AnalysisSession(Base):
25
+ __tablename__ = "analysis_sessions"
26
+
27
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
28
+ timestamp = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
29
+ video_filename = Column(String(255), nullable=False)
30
+ thumbnail_path = Column(String(500), nullable=True)
31
+
32
+ # Analysis Summary Stats
33
+ duration_sec = Column(Float, nullable=False, default=0.0)
34
+ frames_processed = Column(Integer, nullable=False, default=0)
35
+ unique_persons = Column(Integer, nullable=False, default=0)
36
+ peak_count = Column(Integer, nullable=False, default=0)
37
+
38
+ def __repr__(self):
39
+ return f"<AnalysisSession id={self.id} video={self.video_filename}>"
40
+
41
+
42
+ class Person(Base):
43
+ __tablename__ = "persons"
44
+
45
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
46
+ first_seen = Column(DateTime, default=datetime.utcnow, nullable=False)
47
+ last_seen = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
48
+
49
+ # ReID embedding reference
50
+ faiss_id = Column(Integer, nullable=True, unique=True)
51
+ embedding_version = Column(String(50), default="vit-base-patch16-224")
52
+
53
+ # Attributes from CLIP
54
+ attributes = Column(JSON, nullable=True) # {"clothing": [...], "colors": [...]}
55
+ thumbnail_path = Column(String(500), nullable=True)
56
+ track_ids = Column(JSON, default=list) # list of all track IDs assigned cross-camera
57
+
58
+ events = relationship("Event", back_populates="person", cascade="all, delete-orphan")
59
+
60
+ def __repr__(self):
61
+ return f"<Person id={self.id} first_seen={self.first_seen}>"
62
+
63
+
64
+ class Event(Base):
65
+ __tablename__ = "events"
66
+
67
+ event_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
68
+ person_id = Column(UUID(as_uuid=True), ForeignKey("persons.id", ondelete="CASCADE"), nullable=False, index=True)
69
+ camera_id = Column(String(64), nullable=False, index=True)
70
+ timestamp = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
71
+ activity_type = Column(Enum(ActivityType), default=ActivityType.DETECTED, nullable=False)
72
+
73
+ # Spatial & tracking info
74
+ bounding_box = Column(JSON, nullable=True) # {"x1": f, "y1": f, "x2": f, "y2": f}
75
+ confidence = Column(Float, nullable=True)
76
+ track_id = Column(Integer, nullable=True)
77
+ location_zone = Column(String(128), nullable=True)
78
+
79
+ # Additional metadata
80
+ anomaly_score = Column(Float, default=0.0)
81
+ raw_metadata = Column(JSON, nullable=True)
82
+ description = Column(Text, nullable=True) # NLP-generated description
83
+
84
+ person = relationship("Person", back_populates="events")
85
+
86
+ def __repr__(self):
87
+ return f"<Event {self.event_id} person={self.person_id} cam={self.camera_id} type={self.activity_type}>"
88
+
89
+
90
+ class IncidentReport(Base):
91
+ __tablename__ = "incident_reports"
92
+
93
+ report_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
94
+ person_id = Column(UUID(as_uuid=True), ForeignKey("persons.id", ondelete="SET NULL"), nullable=True)
95
+ generated_at = Column(DateTime, default=datetime.utcnow)
96
+ report_text = Column(Text, nullable=False)
97
+ summary = Column(Text, nullable=True)
98
+ severity = Column(String(20), default="medium") # low / medium / high / critical
99
+ camera_ids = Column(JSON, default=list)
100
+ model_version = Column(String(50), default="flan-t5-base")
database/session.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ database/session.py - Async SQLAlchemy session management for NeonDB PostgreSQL
3
+ """
4
+ from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
5
+ from sqlalchemy.orm import DeclarativeBase
6
+ from config import settings
7
+ from loguru import logger
8
+
9
+ engine = create_async_engine(
10
+ settings.DATABASE_URL,
11
+ echo=settings.DB_ECHO,
12
+ pool_size=10,
13
+ max_overflow=20,
14
+ pool_pre_ping=True,
15
+ connect_args={"ssl": "require"},
16
+ )
17
+
18
+ AsyncSessionLocal = async_sessionmaker(
19
+ engine,
20
+ class_=AsyncSession,
21
+ expire_on_commit=False,
22
+ )
23
+
24
+
25
+ class Base(DeclarativeBase):
26
+ pass
27
+
28
+
29
+ async def get_db() -> AsyncSession:
30
+ async with AsyncSessionLocal() as session:
31
+ try:
32
+ yield session
33
+ await session.commit()
34
+ except Exception:
35
+ await session.rollback()
36
+ raise
37
+ finally:
38
+ await session.close()
39
+
40
+
41
+ async def create_tables():
42
+ """Create all tables on startup."""
43
+ async with engine.begin() as conn:
44
+ from database.models import Person, Event # noqa: F401
45
+ await conn.run_sync(Base.metadata.create_all)
46
+ logger.info("✅ Database tables ensured.")
graph/__init__.py ADDED
File without changes
graph/movement_graph.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ graph/movement_graph.py - PyTorch Geometric Movement Graph & Anomaly Detection
3
+ Builds a directed graph of person movements across cameras and zones.
4
+ Detects abnormal route patterns using GNN-based anomaly scoring.
5
+ """
6
+ import time
7
+ import torch
8
+ import numpy as np
9
+ from typing import Dict, List, Optional, Tuple
10
+ from collections import defaultdict
11
+ from loguru import logger
12
+ from config import DEVICE, settings
13
+
14
+ try:
15
+ import torch_geometric
16
+ from torch_geometric.data import Data
17
+ from torch_geometric.nn import GCNConv, global_mean_pool
18
+ GEO_AVAILABLE = True
19
+ except ImportError:
20
+ GEO_AVAILABLE = False
21
+ logger.warning("torch-geometric not installed. Movement graph module will use fallback.")
22
+
23
+
24
+ # ── GNN Autoencoder for Anomaly Detection ──────────────────────────────────────
25
+
26
+ class MovementGNN(torch.nn.Module):
27
+ """
28
+ Simple GCN autoencoder to encode node features (camera visit patterns).
29
+ Reconstruction error → anomaly score.
30
+ """
31
+
32
+ def __init__(self, in_channels: int = 8, hidden: int = 16, out_channels: int = 8):
33
+ super().__init__()
34
+ if GEO_AVAILABLE:
35
+ self.enc1 = GCNConv(in_channels, hidden)
36
+ self.enc2 = GCNConv(hidden, out_channels)
37
+ self.dec1 = GCNConv(out_channels, hidden)
38
+ self.dec2 = GCNConv(hidden, in_channels)
39
+ self.relu = torch.nn.ReLU()
40
+
41
+ def forward(self, x, edge_index):
42
+ # Encode
43
+ z = self.relu(self.enc1(x, edge_index))
44
+ z = self.enc2(z, edge_index)
45
+ # Decode
46
+ x_hat = self.relu(self.dec1(z, edge_index))
47
+ x_hat = self.dec2(x_hat, edge_index)
48
+ return x_hat, z
49
+
50
+
51
+ # ── Movement Graph Builder ──────────────────────────────────────────────────────
52
+
53
+ class MovementGraph:
54
+ """
55
+ Maintains a person-level movement graph.
56
+ Nodes = cameras/zones; Edges = observed transitions.
57
+ """
58
+
59
+ def __init__(self):
60
+ # person_id → list of (camera_id, timestamp, zone)
61
+ self.person_trails: Dict[str, List[Dict]] = defaultdict(list)
62
+ # camera graph: edge_weights[cam_a][cam_b] = count
63
+ self.edge_weights: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
64
+ self.camera_ids: List[str] = []
65
+ self.cam_index: Dict[str, int] = {}
66
+
67
+ if GEO_AVAILABLE:
68
+ self.gnn = MovementGNN().to(DEVICE)
69
+ self.gnn.eval()
70
+ else:
71
+ self.gnn = None
72
+
73
+ logger.info(f"MovementGraph initialized. PyG available: {GEO_AVAILABLE}")
74
+
75
+ def register_camera(self, camera_id: str):
76
+ if camera_id not in self.cam_index:
77
+ self.cam_index[camera_id] = len(self.camera_ids)
78
+ self.camera_ids.append(camera_id)
79
+
80
+ def add_observation(
81
+ self,
82
+ person_id: str,
83
+ camera_id: str,
84
+ timestamp: float,
85
+ zone: Optional[str] = None,
86
+ ):
87
+ """Record that a person was observed at camera/zone at timestamp."""
88
+ self.register_camera(camera_id)
89
+ trail = self.person_trails[person_id]
90
+
91
+ # Add transition edge if person has prior observation
92
+ if trail:
93
+ last_cam = trail[-1]["camera_id"]
94
+ if last_cam != camera_id:
95
+ self.edge_weights[last_cam][camera_id] += 1
96
+
97
+ trail.append({"camera_id": camera_id, "timestamp": timestamp, "zone": zone})
98
+ # Keep last 50 observations per person
99
+ if len(trail) > 50:
100
+ self.person_trails[person_id] = trail[-50:]
101
+
102
+ def _build_graph(self) -> Optional["Data"]:
103
+ """Convert current camera graph to PyG Data object."""
104
+ if not GEO_AVAILABLE or len(self.camera_ids) == 0:
105
+ return None
106
+
107
+ n = len(self.camera_ids)
108
+ # Node features: [visit_count_normalized, in_degree, out_degree, ...]
109
+ node_features = np.zeros((n, 8), dtype=np.float32)
110
+ edge_src, edge_dst = [], []
111
+
112
+ # Count visits per camera
113
+ cam_visits = defaultdict(int)
114
+ for trails in self.person_trails.values():
115
+ for obs in trails:
116
+ cam_visits[obs["camera_id"]] += 1
117
+
118
+ max_visits = max(cam_visits.values()) if cam_visits else 1
119
+ for cam, idx in self.cam_index.items():
120
+ node_features[idx, 0] = cam_visits[cam] / max_visits
121
+
122
+ # Build edges and compute in/out degree
123
+ for src_cam, dst_dict in self.edge_weights.items():
124
+ for dst_cam, weight in dst_dict.items():
125
+ si = self.cam_index.get(src_cam)
126
+ di = self.cam_index.get(dst_cam)
127
+ if si is not None and di is not None:
128
+ edge_src.append(si)
129
+ edge_dst.append(di)
130
+ node_features[si, 1] += 1 # out-degree
131
+ node_features[di, 2] += 1 # in-degree
132
+
133
+ if not edge_src:
134
+ # Add self-loops to avoid empty graph
135
+ edge_src = list(range(n))
136
+ edge_dst = list(range(n))
137
+
138
+ x = torch.tensor(node_features, dtype=torch.float32).to(DEVICE)
139
+ edge_index = torch.tensor([edge_src, edge_dst], dtype=torch.long).to(DEVICE)
140
+ return Data(x=x, edge_index=edge_index)
141
+
142
+ @torch.inference_mode()
143
+ def compute_anomaly_score(self, person_id: str) -> Dict:
144
+ """
145
+ Compute anomaly score for a person's movement trail.
146
+
147
+ Returns:
148
+ {"person_id": str, "anomaly_score": float, "route": list, "verdict": str}
149
+ """
150
+ trail = self.person_trails.get(person_id, [])
151
+ if len(trail) < 2:
152
+ return {"person_id": person_id, "anomaly_score": 0.0, "verdict": "insufficient_data", "route": []}
153
+
154
+ t0 = time.perf_counter()
155
+
156
+ # Heuristic features for pattern scoring
157
+ cameras = [obs["camera_id"] for obs in trail]
158
+ timestamps = [obs["timestamp"] for obs in trail]
159
+ unique_cams = len(set(cameras))
160
+ total_obs = len(cameras)
161
+
162
+ # Time between observations
163
+ gaps = np.diff(timestamps)
164
+ avg_gap = float(np.mean(gaps)) if len(gaps) > 0 else 0
165
+ max_gap = float(np.max(gaps)) if len(gaps) > 0 else 0
166
+
167
+ # Suspicious patterns:
168
+ # 1. Too many unique cameras in short time → rapid movement
169
+ # 2. Very short dwell time per camera → running/fleeing behavior
170
+ # 3. Visiting same camera repeatedly in short time → loitering
171
+ rapid_movement = unique_cams / max(total_obs, 1) > 0.8
172
+ loitering = cameras.count(cameras[-1]) / total_obs > 0.6 if cameras else False
173
+ fast_dwell = avg_gap < 10 and unique_cams > 3 # under 10s per camera
174
+
175
+ heuristic_score = 0.0
176
+ if rapid_movement:
177
+ heuristic_score += 0.4
178
+ if loitering:
179
+ heuristic_score += 0.3
180
+ if fast_dwell:
181
+ heuristic_score += 0.3
182
+
183
+ # GNN-based score (if available)
184
+ gnn_score = 0.0
185
+ if GEO_AVAILABLE and self.gnn is not None:
186
+ graph = self._build_graph()
187
+ if graph is not None and graph.num_nodes > 0:
188
+ x_hat, _ = self.gnn(graph.x, graph.edge_index)
189
+ reconstruction_error = float(torch.mean((graph.x - x_hat) ** 2))
190
+ gnn_score = min(reconstruction_error * 5, 1.0)
191
+
192
+ # Combined score
193
+ anomaly_score = round(0.5 * heuristic_score + 0.5 * gnn_score, 4)
194
+ anomaly_score = min(anomaly_score, 1.0)
195
+
196
+ latency = (time.perf_counter() - t0) * 1000
197
+
198
+ if anomaly_score > settings.ANOMALY_THRESHOLD:
199
+ verdict = "anomalous"
200
+ elif anomaly_score > 0.4:
201
+ verdict = "suspicious"
202
+ else:
203
+ verdict = "normal"
204
+
205
+ return {
206
+ "person_id": person_id,
207
+ "anomaly_score": anomaly_score,
208
+ "verdict": verdict,
209
+ "route": [{"camera_id": obs["camera_id"], "timestamp": obs["timestamp"]} for obs in trail[-10:]],
210
+ "unique_cameras": unique_cams,
211
+ "total_observations": total_obs,
212
+ "avg_dwell_seconds": round(avg_gap, 2),
213
+ "flags": {
214
+ "rapid_movement": rapid_movement,
215
+ "loitering": loitering,
216
+ "fast_dwell": fast_dwell,
217
+ },
218
+ "latency_ms": round(latency, 2),
219
+ }
220
+
221
+ def get_all_anomalies(self, threshold: float = 0.75) -> List[Dict]:
222
+ """Compute anomaly scores for all tracked persons."""
223
+ results = []
224
+ for pid in self.person_trails:
225
+ score_data = self.compute_anomaly_score(pid)
226
+ if score_data["anomaly_score"] >= threshold:
227
+ results.append(score_data)
228
+ return sorted(results, key=lambda x: -x["anomaly_score"])
229
+
230
+ def get_movement_summary(self) -> Dict:
231
+ return {
232
+ "total_persons_tracked": len(self.person_trails),
233
+ "total_cameras": len(self.camera_ids),
234
+ "cameras": self.camera_ids,
235
+ "edge_count": sum(len(v) for v in self.edge_weights.values()),
236
+ }
nlp/__init__.py ADDED
File without changes
nlp/qa.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ nlp/qa.py - Question Answering over Surveillance Logs using deepset/roberta-base-squad2
3
+ """
4
+ import time
5
+ from typing import Optional, Dict, List
6
+ from transformers import pipeline, Pipeline
7
+ from loguru import logger
8
+ from config import settings, DEVICE
9
+
10
+
11
+ class SurveillanceQA:
12
+ """
13
+ Extractive QA system. Given a question and a context built from
14
+ surveillance logs/events, extracts the most relevant answer span.
15
+ """
16
+
17
+ def __init__(self):
18
+ logger.info(f"Loading QA model: {settings.QA_MODEL}")
19
+ device_id = 0 if str(DEVICE) == "cuda" else -1
20
+ self.qa_pipeline: Pipeline = pipeline(
21
+ "question-answering",
22
+ model=settings.QA_MODEL,
23
+ tokenizer=settings.QA_MODEL,
24
+ device=device_id,
25
+ )
26
+ logger.info("✅ SurveillanceQA ready.")
27
+
28
+ def _build_context(self, events: List[Dict]) -> str:
29
+ """Build a natural language context string from event records."""
30
+ lines = []
31
+ for e in events:
32
+ ts = e.get("timestamp", "unknown time")
33
+ cam = e.get("camera_id", "unknown camera")
34
+ activity = e.get("activity_type", "detected")
35
+ person_id = str(e.get("person_id", "unknown"))[:8]
36
+ attrs = e.get("attributes", {})
37
+ desc = e.get("description", "")
38
+ attr_str = ""
39
+ if attrs:
40
+ gender = attrs.get("gender", "")
41
+ color = attrs.get("color", "")
42
+ clothing = ", ".join([c.get("label", "") for c in attrs.get("clothing", [])[:2]])
43
+ attr_str = f"({gender}, {color} clothing, {clothing})"
44
+ line = f"At {ts}, camera {cam} detected person {person_id} {attr_str} with activity: {activity}."
45
+ if desc:
46
+ line += f" {desc}"
47
+ lines.append(line)
48
+ return " ".join(lines)
49
+
50
+ def answer(
51
+ self,
52
+ question: str,
53
+ events: Optional[List[Dict]] = None,
54
+ context: Optional[str] = None,
55
+ top_k: int = 3,
56
+ ) -> Dict:
57
+ """
58
+ Answer a natural language question about surveillance data.
59
+
60
+ Args:
61
+ question: User's question
62
+ events: List of event dicts (auto-builds context)
63
+ context: Pre-built context string
64
+ top_k: Number of answer candidates
65
+
66
+ Returns:
67
+ {"answer": str, "score": float, "start": int, "end": int, "context": str, "latency_ms": float}
68
+ """
69
+ if context is None:
70
+ if not events:
71
+ return {"answer": "No surveillance data available to answer from.", "score": 0.0}
72
+ context = self._build_context(events)
73
+
74
+ if not context.strip():
75
+ return {"answer": "No context available.", "score": 0.0}
76
+
77
+ # Truncate context to model max (512 tokens ≈ ~2000 chars)
78
+ context = context[:4000]
79
+
80
+ t0 = time.perf_counter()
81
+ result = self.qa_pipeline(
82
+ question=question,
83
+ context=context,
84
+ top_k=top_k,
85
+ handle_impossible_answer=True,
86
+ )
87
+ latency_ms = (time.perf_counter() - t0) * 1000
88
+
89
+ if isinstance(result, list):
90
+ best = result[0]
91
+ else:
92
+ best = result
93
+
94
+ logger.debug(f"QA answered '{question[:50]}' in {latency_ms:.1f}ms | score={best.get('score', 0):.3f}")
95
+ return {
96
+ "answer": best.get("answer", ""),
97
+ "score": round(best.get("score", 0.0), 4),
98
+ "start": best.get("start", 0),
99
+ "end": best.get("end", 0),
100
+ "context_used": context[:500] + "..." if len(context) > 500 else context,
101
+ "latency_ms": round(latency_ms, 2),
102
+ "all_answers": result if isinstance(result, list) else [result],
103
+ }
nlp/report.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ nlp/report.py - Incident Report Generation using google/flan-t5-base
3
+ """
4
+ import time
5
+ from typing import Dict, List, Optional
6
+ from transformers import pipeline, Pipeline
7
+ from loguru import logger
8
+ from config import settings, DEVICE
9
+
10
+
11
+ REPORT_PROMPT_TEMPLATE = """Generate a structured surveillance incident report based on the following events:
12
+
13
+ Events:
14
+ {events_summary}
15
+
16
+ Format the report as:
17
+ INCIDENT REPORT
18
+ Date/Time: [datetime]
19
+ Cameras Involved: [cameras]
20
+ Subject Description: [physical description]
21
+ Activity Observed: [description of events]
22
+ Anomaly Level: [low/medium/high]
23
+ Recommended Action: [action]
24
+ """
25
+
26
+
27
+ class IncidentReportGenerator:
28
+ """
29
+ Generates structured incident reports from surveillance events using Flan-T5.
30
+ """
31
+
32
+ def __init__(self):
33
+ logger.info(f"Loading report generation model: {settings.REPORT_MODEL}")
34
+ device_id = 0 if str(DEVICE) == "cuda" else -1
35
+ self.generator: Pipeline = pipeline(
36
+ "text2text-generation",
37
+ model=settings.REPORT_MODEL,
38
+ tokenizer=settings.REPORT_MODEL,
39
+ device=device_id,
40
+ )
41
+ logger.info("✅ IncidentReportGenerator ready.")
42
+
43
+ def _format_events(self, events: List[Dict]) -> str:
44
+ """Format events list into a readable string for the prompt."""
45
+ lines = []
46
+ for i, e in enumerate(events, 1):
47
+ ts = e.get("timestamp", "unknown time")
48
+ cam = e.get("camera_id", "unknown camera")
49
+ activity = e.get("activity_type", "detected")
50
+ attrs = e.get("attributes", {})
51
+ gender = attrs.get("gender", "") if attrs else ""
52
+ color = attrs.get("color", "") if attrs else ""
53
+ anomaly_score = e.get("anomaly_score", 0.0)
54
+ lines.append(
55
+ f"{i}. [{ts}] Camera {cam}: {gender} person in {color} clothing, "
56
+ f"activity={activity}, anomaly_score={anomaly_score:.2f}"
57
+ )
58
+ return "\n".join(lines)
59
+
60
+ def generate(
61
+ self,
62
+ events: List[Dict],
63
+ person_id: Optional[str] = None,
64
+ max_length: int = 512,
65
+ severity_hint: Optional[str] = None,
66
+ ) -> Dict:
67
+ """
68
+ Generate a structured incident report from a list of event records.
69
+
70
+ Returns:
71
+ {"report_text": str, "severity": str, "latency_ms": float}
72
+ """
73
+ if not events:
74
+ return {"report_text": "No events provided for report generation.", "severity": "low"}
75
+
76
+ events_summary = self._format_events(events[:20]) # limit for token budget
77
+ prompt = REPORT_PROMPT_TEMPLATE.format(events_summary=events_summary)
78
+ if person_id:
79
+ prompt = f"Person ID: {person_id[:8]}\n" + prompt
80
+
81
+ t0 = time.perf_counter()
82
+ outputs = self.generator(
83
+ prompt,
84
+ max_new_tokens=max_length,
85
+ num_beams=4,
86
+ early_stopping=True,
87
+ no_repeat_ngram_size=3,
88
+ )
89
+ latency_ms = (time.perf_counter() - t0) * 1000
90
+ report_text = outputs[0]["generated_text"]
91
+
92
+ # Determine severity from anomaly scores
93
+ scores = [e.get("anomaly_score", 0.0) for e in events]
94
+ avg_anomaly = sum(scores) / max(len(scores), 1)
95
+ if avg_anomaly > 0.8:
96
+ severity = "critical"
97
+ elif avg_anomaly > 0.6:
98
+ severity = "high"
99
+ elif avg_anomaly > 0.3:
100
+ severity = "medium"
101
+ else:
102
+ severity = "low"
103
+
104
+ severity = severity_hint or severity
105
+ logger.info(f"Report generated in {latency_ms:.1f}ms | severity={severity}")
106
+
107
+ return {
108
+ "report_text": report_text,
109
+ "severity": severity,
110
+ "event_count": len(events),
111
+ "avg_anomaly_score": round(avg_anomaly, 4),
112
+ "latency_ms": round(latency_ms, 2),
113
+ "person_id": person_id,
114
+ }
nlp/search.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ nlp/search.py - Semantic Search using sentence-transformers/all-MiniLM-L6-v2 + FAISS
3
+ Embeds natural language queries and matches against stored surveillance metadata.
4
+ """
5
+ import os
6
+ import time
7
+ import numpy as np
8
+ import faiss
9
+ import torch
10
+ from typing import List, Dict, Optional
11
+ from sentence_transformers import SentenceTransformer
12
+ from loguru import logger
13
+ from config import settings, DEVICE, FAISS_DIR
14
+
15
+
16
+ class SemanticSearchEngine:
17
+ """
18
+ Encodes surveillance metadata (event descriptions, attributes) into
19
+ sentence embeddings stored in FAISS. Supports natural-language querying.
20
+ """
21
+
22
+ INDEX_FILE = str(FAISS_DIR / "search_index.faiss")
23
+ META_FILE = str(FAISS_DIR / "search_meta.npy")
24
+
25
+ def __init__(self):
26
+ logger.info(f"Loading semantic search model: {settings.SEMANTIC_SEARCH_MODEL}")
27
+ self.model = SentenceTransformer(settings.SEMANTIC_SEARCH_MODEL, device=str(DEVICE))
28
+ self.dim = settings.SEARCH_EMBEDDING_DIM
29
+ self.index = self._load_or_create_index()
30
+ self.meta: List[Dict] = self._load_meta()
31
+ logger.info(f"✅ SemanticSearchEngine ready. Index size: {self.index.ntotal}")
32
+
33
+ def _load_or_create_index(self) -> faiss.IndexFlatIP:
34
+ if os.path.exists(self.INDEX_FILE):
35
+ logger.info("Loading existing FAISS search index.")
36
+ return faiss.read_index(self.INDEX_FILE)
37
+ return faiss.IndexFlatIP(self.dim)
38
+
39
+ def _load_meta(self) -> List[Dict]:
40
+ if os.path.exists(self.META_FILE):
41
+ return list(np.load(self.META_FILE, allow_pickle=True))
42
+ return []
43
+
44
+ def save(self):
45
+ faiss.write_index(self.index, self.INDEX_FILE)
46
+ np.save(self.META_FILE, np.array(self.meta, dtype=object))
47
+
48
+ def encode(self, texts: List[str]) -> np.ndarray:
49
+ """Encode texts to L2-normalized embeddings (batch)."""
50
+ embeddings = self.model.encode(
51
+ texts,
52
+ batch_size=32,
53
+ normalize_embeddings=True,
54
+ convert_to_numpy=True,
55
+ show_progress_bar=False,
56
+ )
57
+ return embeddings.astype(np.float32)
58
+
59
+ def index_event(self, text: str, metadata: Dict) -> int:
60
+ """
61
+ Add a single surveillance event description to the FAISS search index.
62
+
63
+ Args:
64
+ text: Natural language description of the event
65
+ metadata: {"event_id", "person_id", "camera_id", "timestamp", "activity_type", ...}
66
+
67
+ Returns:
68
+ faiss_id (row index)
69
+ """
70
+ embedding = self.encode([text])
71
+ faiss_id = self.index.ntotal
72
+ self.index.add(embedding)
73
+ self.meta.append({**metadata, "text": text, "faiss_id": faiss_id})
74
+ self.save()
75
+ return faiss_id
76
+
77
+ def index_batch(self, texts: List[str], metadatas: List[Dict]):
78
+ """Batch indexing for bulk ingestion."""
79
+ embeddings = self.encode(texts)
80
+ base_id = self.index.ntotal
81
+ self.index.add(embeddings)
82
+ for i, (text, meta) in enumerate(zip(texts, metadatas)):
83
+ self.meta.append({**meta, "text": text, "faiss_id": base_id + i})
84
+ self.save()
85
+ logger.info(f"Indexed {len(texts)} events into search index.")
86
+
87
+ def search(self, query: str, top_k: int = 10, score_threshold: float = 0.4) -> List[Dict]:
88
+ """
89
+ Search surveillance logs by natural language query.
90
+
91
+ Returns:
92
+ List of {"text": str, "score": float, ...metadata fields}
93
+ """
94
+ if self.index.ntotal == 0:
95
+ return []
96
+
97
+ t0 = time.perf_counter()
98
+ query_emb = self.encode([query])
99
+ k = min(top_k, self.index.ntotal)
100
+ distances, indices = self.index.search(query_emb, k)
101
+ latency = (time.perf_counter() - t0) * 1000
102
+
103
+ results = []
104
+ for dist, idx in zip(distances[0], indices[0]):
105
+ if idx == -1 or float(dist) < score_threshold:
106
+ continue
107
+ entry = dict(self.meta[idx])
108
+ entry["score"] = round(float(dist), 4)
109
+ results.append(entry)
110
+
111
+ logger.debug(f"Semantic search '{query[:40]}...' → {len(results)} results in {latency:.1f}ms")
112
+ return sorted(results, key=lambda x: -x["score"])
nlp/summarizer.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ nlp/summarizer.py - Surveillance log summarization using facebook/bart-large-cnn
3
+ """
4
+ import time
5
+ from typing import List, Dict
6
+ from transformers import pipeline, Pipeline
7
+ from loguru import logger
8
+ from config import settings, DEVICE
9
+
10
+
11
+ class SurveillanceSummarizer:
12
+ """Abstractive summarization of surveillance event logs using BART."""
13
+
14
+ def __init__(self):
15
+ logger.info(f"Loading summarization model: {settings.SUMMARIZER_MODEL}")
16
+ device_id = 0 if str(DEVICE) == "cuda" else -1
17
+ self.summarizer: Pipeline = pipeline(
18
+ "summarization",
19
+ model=settings.SUMMARIZER_MODEL,
20
+ tokenizer=settings.SUMMARIZER_MODEL,
21
+ device=device_id,
22
+ )
23
+ logger.info("✅ SurveillanceSummarizer ready.")
24
+
25
+ def _events_to_text(self, events: List[Dict]) -> str:
26
+ parts = []
27
+ for e in events:
28
+ ts = e.get("timestamp", "")
29
+ cam = e.get("camera_id", "")
30
+ activity = e.get("activity_type", "")
31
+ anomaly = e.get("anomaly_score", 0.0)
32
+ attrs = e.get("attributes", {})
33
+ gender = attrs.get("gender", "") if attrs else ""
34
+ color = attrs.get("color", "") if attrs else ""
35
+ parts.append(
36
+ f"Camera {cam} at {ts}: {gender} person in {color} clothing observed {activity} "
37
+ f"with anomaly score {anomaly:.2f}."
38
+ )
39
+ return " ".join(parts)
40
+
41
+ def summarize(
42
+ self,
43
+ events: List[Dict],
44
+ min_length: int = 30,
45
+ max_length: int = 200,
46
+ ) -> Dict:
47
+ """Summarize a list of surveillance events."""
48
+ if not events:
49
+ return {"summary": "No events to summarize.", "latency_ms": 0}
50
+
51
+ text = self._events_to_text(events[:30])
52
+ # BART max input is ~1024 tokens
53
+ text = text[:3000]
54
+
55
+ t0 = time.perf_counter()
56
+ result = self.summarizer(
57
+ text,
58
+ min_length=min_length,
59
+ max_length=max_length,
60
+ do_sample=False,
61
+ )
62
+ latency_ms = (time.perf_counter() - t0) * 1000
63
+ summary = result[0]["summary_text"]
64
+ logger.debug(f"Summarized {len(events)} events in {latency_ms:.1f}ms")
65
+ return {
66
+ "summary": summary,
67
+ "event_count": len(events),
68
+ "latency_ms": round(latency_ms, 2),
69
+ }
70
+
71
+ def summarize_text(self, text: str, min_length: int = 30, max_length: int = 150) -> str:
72
+ """Summarize arbitrary text string."""
73
+ text = text[:3000]
74
+ result = self.summarizer(text, min_length=min_length, max_length=max_length, do_sample=False)
75
+ return result[0]["summary_text"]
requirements.txt ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # Multimodal Surveillance Intelligence System - Requirements
3
+ # Python 3.10+
4
+ # ============================================================
5
+
6
+ # --- Core API Framework ---
7
+ fastapi==0.115.0
8
+ uvicorn[standard]==0.30.6
9
+ python-multipart==0.0.9
10
+ websockets==12.0
11
+ httpx==0.27.2
12
+ pydantic==2.9.2
13
+ pydantic-settings==2.5.2
14
+
15
+ # --- Database ---
16
+ asyncpg==0.29.0
17
+ sqlalchemy[asyncio]==2.0.35
18
+
19
+ alembic==1.13.3
20
+ psycopg2-binary==2.9.9
21
+
22
+ # --- PyTorch (install separately for CUDA - see README) ---
23
+ # torch==2.4.1+cu124
24
+ # torchvision==0.19.1+cu124
25
+ # torchaudio==0.19.1+cu124
26
+
27
+ # --- HuggingFace Ecosystem ---
28
+ transformers==4.45.0
29
+ accelerate==0.34.2
30
+ datasets==3.0.1
31
+ tokenizers==0.20.0
32
+ safetensors==0.4.5
33
+ huggingface-hub==0.25.1
34
+ sentence-transformers==3.1.1
35
+
36
+ # --- Vision & Computer Vision ---
37
+ Pillow==10.4.0
38
+ opencv-python==4.10.0.84
39
+ numpy==1.26.4
40
+ scipy==1.14.1
41
+ scikit-image==0.24.0
42
+ torchvision
43
+ timm
44
+
45
+ # --- FAISS (install separately for GPU - see README) ---
46
+ # faiss-gpu -> pip install faiss-gpu (CUDA)
47
+ # faiss-cpu -> pip install faiss-cpu (CPU only)
48
+
49
+ # --- Object Detection & Tracking ---
50
+ # ByteTrack / DeepSORT dependencies
51
+ filterpy==1.4.5
52
+ lapx==0.5.9.post1
53
+
54
+ # --- PyTorch Geometric (Graph Module) ---
55
+ # Install separately:
56
+ # pip install torch-geometric
57
+ # pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.4.1+cu124.html
58
+ torch-geometric==2.6.1
59
+
60
+ # --- Audio Processing ---
61
+ librosa==0.10.2
62
+ soundfile==0.12.1
63
+ pyaudio==0.2.14
64
+ torchaudio
65
+ openai-whisper
66
+
67
+ # --- NLP & Text Processing ---
68
+ nltk==3.9.1
69
+ spacy==3.8.2
70
+ tiktoken==0.7.0
71
+
72
+ # --- Logging & Monitoring ---
73
+ loguru==0.7.2
74
+ prometheus-client==0.21.0
75
+ rich==13.8.1
76
+
77
+ # --- Utilities ---
78
+ python-dotenv==1.0.1
79
+ aiofiles==24.1.0
80
+ anyio==4.6.2
81
+ tenacity==9.0.0
82
+ cachetools==5.5.0
83
+ click==8.1.7
84
+ tqdm==4.66.5
85
+ psutil==6.0.0
86
+ GPUtil==1.4.0
87
+ PyYAML==6.0.2
88
+
89
+ # --- Testing & Benchmarks ---
90
+ pytest==8.3.3
91
+ pytest-asyncio==0.24.0
92
+ locust==2.31.8
93
+
94
+ # --- Image & Annotation ---
95
+ matplotlib==3.9.2
96
+ seaborn==0.13.2
routes/__init__.py ADDED
File without changes
routes/nlp_routes.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ routes/nlp_routes.py - NLP API endpoints:
3
+ GET /search (semantic search over surveillance logs)
4
+ POST /ask (QA over logs)
5
+ GET /report/{person_id} (generate incident report)
6
+ GET /anomalies (get anomalous persons)
7
+ """
8
+ import uuid
9
+ from typing import Optional, List
10
+
11
+ from fastapi import APIRouter, Depends, HTTPException, Query
12
+ from pydantic import BaseModel
13
+ from loguru import logger
14
+ from sqlalchemy.ext.asyncio import AsyncSession
15
+
16
+ from database.session import get_db
17
+ from database import crud
18
+
19
+ router = APIRouter(prefix="/api/v1", tags=["NLP & Intelligence"])
20
+
21
+
22
+ # ── Dependency helpers ─────────────────────────────────────────────────────────
23
+
24
+ def get_search_engine():
25
+ from app import search_engine
26
+ return search_engine
27
+
28
+
29
+ def get_qa_system():
30
+ from app import qa_system
31
+ return qa_system
32
+
33
+
34
+ def get_report_generator():
35
+ from app import report_generator
36
+ return report_generator
37
+
38
+
39
+ def get_summarizer():
40
+ from app import summarizer
41
+ return summarizer
42
+
43
+
44
+ def get_movement_graph():
45
+ from app import movement_graph
46
+ return movement_graph
47
+
48
+
49
+ # ── Request models ──────────────────────────────────────────────────────────────
50
+
51
+ class SearchRequest(BaseModel):
52
+ query: str
53
+ top_k: int = 10
54
+ score_threshold: float = 0.35
55
+
56
+
57
+ class AskRequest(BaseModel):
58
+ question: str
59
+ camera_id: Optional[str] = None
60
+ person_id: Optional[str] = None
61
+ context: Optional[str] = None
62
+
63
+
64
+ # ── GET /search ───────────────────────────────────────────────────────────────
65
+
66
+ @router.get("/search", summary="Semantic search over surveillance logs")
67
+ async def semantic_search(
68
+ q: str = Query(..., description="Natural language search query"),
69
+ top_k: int = Query(10, ge=1, le=50),
70
+ score_threshold: float = Query(0.35, ge=0.0, le=1.0),
71
+ search_engine=Depends(get_search_engine),
72
+ ):
73
+ """
74
+ Search across all indexed surveillance events using natural language.
75
+ Example: "person in red jacket near camera 3"
76
+ """
77
+ if not q.strip():
78
+ raise HTTPException(status_code=400, detail="Query cannot be empty")
79
+ results = search_engine.search(query=q, top_k=top_k, score_threshold=score_threshold)
80
+ return {
81
+ "query": q,
82
+ "results": results,
83
+ "total": len(results),
84
+ }
85
+
86
+
87
+ # ── POST /ask ─────────────────────────────────────────────────────────────────
88
+
89
+ @router.post("/ask", summary="Ask a natural language question about surveillance data")
90
+ async def ask_question(
91
+ request: AskRequest,
92
+ db: AsyncSession = Depends(get_db),
93
+ qa=Depends(get_qa_system),
94
+ ):
95
+ """
96
+ Extractive QA over recent surveillance events.
97
+ Example: "Which camera last detected person 123abc?"
98
+ """
99
+ events = []
100
+ if request.context:
101
+ # Use provided context directly
102
+ result = qa.answer(question=request.question, context=request.context)
103
+ else:
104
+ # Fetch recent events from DB
105
+ if request.person_id:
106
+ try:
107
+ pid = uuid.UUID(request.person_id)
108
+ except ValueError:
109
+ raise HTTPException(status_code=400, detail="Invalid person_id UUID")
110
+ db_events = await crud.get_events_for_person(db, pid, limit=30)
111
+ else:
112
+ db_events = await crud.get_recent_events(db, camera_id=request.camera_id, limit=50)
113
+
114
+ # Convert to dicts
115
+ events = [
116
+ {
117
+ "event_id": str(e.event_id),
118
+ "person_id": str(e.person_id),
119
+ "camera_id": e.camera_id,
120
+ "timestamp": str(e.timestamp),
121
+ "activity_type": e.activity_type.value,
122
+ "anomaly_score": e.anomaly_score or 0.0,
123
+ "bounding_box": e.bounding_box or {},
124
+ }
125
+ for e in db_events
126
+ ]
127
+ result = qa.answer(question=request.question, events=events)
128
+
129
+ return {
130
+ "question": request.question,
131
+ **result,
132
+ "events_used": len(events),
133
+ }
134
+
135
+
136
+ # ── GET /report/{person_id} ───────────────────────────────────────────────────
137
+
138
+ @router.get("/report/{person_id}", summary="Generate and store an incident report for a person")
139
+ async def generate_report(
140
+ person_id: str,
141
+ db: AsyncSession = Depends(get_db),
142
+ report_gen=Depends(get_report_generator),
143
+ _summarizer=Depends(get_summarizer),
144
+ ):
145
+ """Generate a structured incident report for a tracked person."""
146
+ try:
147
+ pid = uuid.UUID(person_id)
148
+ except ValueError:
149
+ raise HTTPException(status_code=400, detail="Invalid person_id UUID")
150
+
151
+ person = await crud.get_person(db, pid)
152
+ if not person:
153
+ raise HTTPException(status_code=404, detail=f"Person {person_id} not found")
154
+
155
+ events = await crud.get_events_for_person(db, pid, limit=30)
156
+ if not events:
157
+ raise HTTPException(status_code=404, detail="No events found for this person")
158
+
159
+ events_dicts = [
160
+ {
161
+ "event_id": str(e.event_id),
162
+ "camera_id": e.camera_id,
163
+ "timestamp": str(e.timestamp),
164
+ "activity_type": e.activity_type.value,
165
+ "anomaly_score": e.anomaly_score or 0.0,
166
+ "attributes": person.attributes or {},
167
+ }
168
+ for e in events
169
+ ]
170
+
171
+ # Generate report
172
+ report_result = report_gen.generate(events=events_dicts, person_id=person_id)
173
+ summary_result = _summarizer.summarize(events=events_dicts)
174
+
175
+ # Store report in DB
176
+ camera_ids = list({e["camera_id"] for e in events_dicts})
177
+ db_report = await crud.create_incident_report(
178
+ db,
179
+ person_id=pid,
180
+ report_text=report_result["report_text"],
181
+ summary=summary_result["summary"],
182
+ severity=report_result.get("severity", "medium"),
183
+ camera_ids=camera_ids,
184
+ )
185
+
186
+ return {
187
+ "report_id": str(db_report.report_id),
188
+ "person_id": person_id,
189
+ "generated_at": str(db_report.generated_at),
190
+ "severity": db_report.severity,
191
+ "summary": summary_result["summary"],
192
+ "report_text": report_result["report_text"],
193
+ "cameras_involved": camera_ids,
194
+ "event_count": len(events),
195
+ "latency_ms": report_result.get("latency_ms"),
196
+ }
197
+
198
+
199
+ # ── GET /anomalies ────────────────────────────────────────────────────────────
200
+
201
+ @router.get("/anomalies", summary="Get persons with anomalous movement patterns")
202
+ async def get_anomalies(
203
+ threshold: float = Query(0.75, ge=0.0, le=1.0),
204
+ graph=Depends(get_movement_graph),
205
+ ):
206
+ anomalies = graph.get_all_anomalies(threshold=threshold)
207
+ return {
208
+ "threshold": threshold,
209
+ "anomalous_persons": anomalies,
210
+ "count": len(anomalies),
211
+ }
212
+
213
+
214
+ # ── GET /persons ───────────────────────────────────────────────────────────────
215
+
216
+ @router.get("/persons", summary="List all tracked persons")
217
+ async def list_persons(
218
+ limit: int = Query(50, ge=1, le=200),
219
+ offset: int = Query(0, ge=0),
220
+ db: AsyncSession = Depends(get_db),
221
+ ):
222
+ persons = await crud.get_all_persons(db, limit=limit, offset=offset)
223
+ return {
224
+ "persons": [
225
+ {
226
+ "id": str(p.id),
227
+ "first_seen": str(p.first_seen),
228
+ "last_seen": str(p.last_seen),
229
+ "faiss_id": p.faiss_id,
230
+ "attributes": p.attributes,
231
+ }
232
+ for p in persons
233
+ ],
234
+ "total": len(persons),
235
+ }
routes/stream_routes.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Request
2
+ from fastapi.responses import StreamingResponse
3
+ import asyncio
4
+ from typing import List
5
+ from pydantic import BaseModel
6
+ from loguru import logger
7
+
8
+ from vision.stream_manager import stream_manager
9
+
10
+ router = APIRouter(prefix="/api/v1/streams", tags=["Streams"])
11
+
12
+ class StreamCreateRequest(BaseModel):
13
+ camera_id: str
14
+ source: str # e.g. '0', 'rtsp://...', 'http://...'
15
+
16
+ @router.post("/add")
17
+ async def add_stream(request: StreamCreateRequest):
18
+ """Start a new background stream"""
19
+ stream_manager.add_stream(request.camera_id, request.source)
20
+ return {"status": "added", "camera_id": request.camera_id}
21
+
22
+ @router.delete("/{camera_id}")
23
+ async def remove_stream(camera_id: str):
24
+ """Stop a background stream"""
25
+ stream_manager.remove_stream(camera_id)
26
+ return {"status": "removed", "camera_id": camera_id}
27
+
28
+ @router.get("/")
29
+ async def list_streams():
30
+ """Get active streams"""
31
+ return {"streams": list(stream_manager.streams.keys())}
32
+
33
+ @router.get("/results")
34
+ async def stream_results():
35
+ """Get the latest inference results for all active streams"""
36
+ return stream_manager.results
37
+
38
+ @router.get("/feed/{camera_id}")
39
+ async def video_feed(camera_id: str, request: Request):
40
+ """
41
+ Multipart MJPEG video feed for `<img>` tags.
42
+ """
43
+ # If a feed is requested but not added, try a local fallback
44
+ if camera_id not in stream_manager.streams:
45
+ # Fallback to local webcam '0' for demo purposes
46
+ stream_manager.add_stream(camera_id, "0")
47
+
48
+ async def frame_generator():
49
+ while True:
50
+ if await request.is_disconnected():
51
+ logger.info(f"Client disconnected from feed {camera_id}")
52
+ break
53
+
54
+ frame = stream_manager.get_frame(camera_id)
55
+ if frame is None:
56
+ await asyncio.sleep(0.1)
57
+ continue
58
+
59
+ # Yield multipart boundary + image bytes
60
+ yield (b'--frame\r\n'
61
+ b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
62
+
63
+ # Target ~30fps
64
+ await asyncio.sleep(0.033)
65
+
66
+ return StreamingResponse(
67
+ frame_generator(),
68
+ media_type="multipart/x-mixed-replace; boundary=frame"
69
+ )
routes/vision_routes.py ADDED
@@ -0,0 +1,421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ routes/vision_routes.py - Vision API endpoints:
3
+ POST /process-frame (single frame inference)
4
+ GET /live-feed (WebSocket stream)
5
+ GET /cameras (list active cameras)
6
+ """
7
+ import asyncio
8
+ import base64
9
+ import json
10
+ import os
11
+ import tempfile
12
+ import time
13
+ import uuid
14
+ from io import BytesIO
15
+ from typing import Optional
16
+
17
+ from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, WebSocket, WebSocketDisconnect
18
+ from fastapi.responses import JSONResponse, StreamingResponse
19
+ from pydantic import BaseModel
20
+ from PIL import Image
21
+ from loguru import logger
22
+ from sqlalchemy.ext.asyncio import AsyncSession
23
+
24
+ from database.session import get_db, AsyncSessionLocal
25
+ from database import crud
26
+ from database.models import ActivityType, AnalysisSession
27
+
28
+ router = APIRouter(prefix="/api/v1", tags=["Vision"])
29
+
30
+
31
+ # ── Request / Response Models ─────────────────────────────────────────────────
32
+
33
+ class FrameProcessRequest(BaseModel):
34
+ camera_id: str
35
+ image_b64: str # base64-encoded JPEG/PNG
36
+ run_attributes: bool = True
37
+ run_reid: bool = True
38
+ reid_threshold: float = 0.85
39
+
40
+
41
+ class FrameProcessResponse(BaseModel):
42
+ camera_id: str
43
+ frame_id: int
44
+ person_count: int
45
+ persons: list
46
+ latency: dict
47
+ fps: float
48
+ timestamp: float
49
+
50
+
51
+ # ── Dependency: get vision pipeline singleton ──────────────────────────────────
52
+
53
+ def get_pipeline():
54
+ from app import vision_pipeline
55
+ return vision_pipeline
56
+
57
+
58
+ def get_movement_graph():
59
+ from app import movement_graph
60
+ return movement_graph
61
+
62
+
63
+ # ── POST /process-frame ───────────────────────────────────────────────────────
64
+
65
+ @router.post("/process-frame", response_model=FrameProcessResponse, summary="Process a single camera frame")
66
+ async def process_frame(
67
+ request: FrameProcessRequest,
68
+ db: AsyncSession = Depends(get_db),
69
+ pipeline=Depends(get_pipeline),
70
+ graph=Depends(get_movement_graph),
71
+ ):
72
+ """
73
+ Submit a single camera frame for full vision pipeline processing.
74
+ Returns detected persons with tracking IDs, ReID matches, and attributes.
75
+ """
76
+ try:
77
+ result = pipeline.process_frame(
78
+ image_input=request.image_b64,
79
+ camera_id=request.camera_id,
80
+ run_attributes=request.run_attributes,
81
+ run_reid=request.run_reid,
82
+ reid_threshold=request.reid_threshold,
83
+ )
84
+ except Exception as e:
85
+ logger.error(f"Vision pipeline error: {e}")
86
+ raise HTTPException(status_code=500, detail=str(e))
87
+
88
+ # Persist events to database
89
+ for person in result.get("persons", []):
90
+ person_id_str = person.get("assigned_person_id")
91
+ if not person_id_str:
92
+ continue
93
+ try:
94
+ person_uuid = uuid.UUID(person_id_str)
95
+ except ValueError:
96
+ continue
97
+
98
+ # Upsert person record
99
+ db_person = await crud.get_person(db, person_uuid)
100
+ if db_person is None and person.get("is_new_person"):
101
+ db_person = await crud.create_person(
102
+ db,
103
+ faiss_id=person.get("faiss_id"),
104
+ attributes=person.get("attributes"),
105
+ )
106
+
107
+ if db_person:
108
+ await crud.update_person_last_seen(db, db_person.id)
109
+ await crud.create_event(
110
+ db,
111
+ person_id=db_person.id,
112
+ camera_id=request.camera_id,
113
+ activity_type=ActivityType.DETECTED,
114
+ bounding_box={"x1": person["bbox"][0], "y1": person["bbox"][1],
115
+ "x2": person["bbox"][2], "y2": person["bbox"][3]},
116
+ confidence=person.get("score"),
117
+ track_id=person.get("track_id"),
118
+ raw_metadata={"reid_matches": person.get("reid_matches", [])},
119
+ )
120
+
121
+ # Update movement graph
122
+ graph.add_observation(
123
+ person_id=str(db_person.id),
124
+ camera_id=request.camera_id,
125
+ timestamp=result["timestamp"],
126
+ )
127
+
128
+ return JSONResponse(content=result)
129
+
130
+
131
+ # ── POST /process-frame/upload (multipart form data) ─────────────────────────
132
+
133
+ @router.post("/process-frame/upload", summary="Upload image file for processing")
134
+ async def process_frame_upload(
135
+ camera_id: str = Form(...),
136
+ run_attributes: bool = Form(True),
137
+ run_reid: bool = Form(True),
138
+ image: UploadFile = File(...),
139
+ db: AsyncSession = Depends(get_db),
140
+ pipeline=Depends(get_pipeline),
141
+ graph=Depends(get_movement_graph),
142
+ ):
143
+ """Submit a frame via multipart file upload."""
144
+ content = await image.read()
145
+ b64 = base64.b64encode(content).decode()
146
+ # Reuse main endpoint logic via internal call
147
+ from routes.vision_routes import process_frame, FrameProcessRequest
148
+ req = FrameProcessRequest(
149
+ camera_id=camera_id,
150
+ image_b64=b64,
151
+ run_attributes=run_attributes,
152
+ run_reid=run_reid,
153
+ )
154
+ return await process_frame(req, db, pipeline, graph)
155
+
156
+
157
+ # ── POST /analyze-video ──────────────────────────────────────────────────────
158
+
159
+ @router.post("/analyze-video", summary="Upload a video file and stream per-frame detection results via SSE")
160
+ async def analyze_video(
161
+ camera_id: str = Form("VIDEO-UPLOAD"),
162
+ frame_interval: int = Form(5),
163
+ run_attributes: bool = Form(True),
164
+ run_reid: bool = Form(True),
165
+ video: UploadFile = File(...),
166
+ pipeline=Depends(get_pipeline),
167
+ ):
168
+ """
169
+ Upload a video file. Frames are sampled every `frame_interval` frames.
170
+ Results are streamed back as Server-Sent Events (SSE).
171
+
172
+ SSE event format:
173
+ data: {"type": "frame", "frame_id": int, "timestamp_sec": float, ...}
174
+ data: {"type": "summary", "total_frames": int, "unique_persons": int, ...}
175
+ data: {"type": "error", "message": str}
176
+ """
177
+ # Read video bytes into a temp file (cv2 needs a real path on Windows)
178
+ content = await video.read()
179
+ filename = video.filename or "uploaded_video.mp4"
180
+ suffix = os.path.splitext(filename)[1] or ".mp4"
181
+
182
+ # Pre-generate an ID and path for the thumbnail
183
+ session_id = uuid.uuid4()
184
+ thumb_filename = f"{session_id}.jpg"
185
+ thumb_path = os.path.join("static", "thumbnails", thumb_filename)
186
+
187
+ async def event_stream():
188
+ tmp_path = None
189
+ try:
190
+ import cv2
191
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
192
+ tmp.write(content)
193
+ tmp_path = tmp.name
194
+
195
+ cap = cv2.VideoCapture(tmp_path)
196
+ if not cap.isOpened():
197
+ yield f"data: {json.dumps({'type': 'error', 'message': 'Cannot open video file'})}\n\n"
198
+ return
199
+
200
+ fps_native = cap.get(cv2.CAP_PROP_FPS) or 25.0
201
+ total_raw = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
202
+ total_sampled = max(1, total_raw // max(1, frame_interval))
203
+
204
+ # Metadata handshake
205
+ yield f"data: {json.dumps({'type': 'meta', 'total_sampled': total_sampled, 'fps_native': fps_native, 'total_frames_raw': total_raw})}\n\n"
206
+
207
+ raw_idx = 0
208
+ processed_idx = 0
209
+ unique_ids = set()
210
+ peak_count = 0
211
+ t_video_start = time.time()
212
+
213
+ while True:
214
+ ret, frame = cap.read()
215
+ if not ret:
216
+ break
217
+
218
+ if raw_idx % max(1, frame_interval) == 0:
219
+ # Convert BGR → RGB → PIL
220
+ import numpy as np
221
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
222
+ pil_img = Image.fromarray(rgb)
223
+
224
+ # Save the very first processed frame as the session thumbnail
225
+ if processed_idx == 0:
226
+ pil_img.save(thumb_path, "JPEG", quality=85)
227
+
228
+ timestamp_sec = round(raw_idx / fps_native, 3)
229
+
230
+ try:
231
+ result = pipeline.process_frame(
232
+ image_input=pil_img,
233
+ camera_id=camera_id,
234
+ run_attributes=run_attributes,
235
+ run_reid=run_reid,
236
+ )
237
+ except Exception as e:
238
+ logger.warning(f"Frame {raw_idx} error: {e}")
239
+ raw_idx += 1
240
+ continue
241
+
242
+ persons = result.get("persons", [])
243
+ for p in persons:
244
+ pid = p.get("assigned_person_id")
245
+ if pid:
246
+ unique_ids.add(pid)
247
+ peak_count = max(peak_count, len(persons))
248
+
249
+ # Slim down persons payload for SSE
250
+ slim_persons = [
251
+ {
252
+ "track_id": p.get("track_id"),
253
+ "bbox": p.get("bbox"),
254
+ "score": round(p.get("score", 0), 3),
255
+ "is_new": p.get("is_new_person", False),
256
+ "attributes": p.get("attributes", {}),
257
+ "reid_sim": round(p["reid_matches"][0]["similarity"], 3)
258
+ if p.get("reid_matches") else None,
259
+ }
260
+ for p in persons
261
+ ]
262
+
263
+ event = {
264
+ "type": "frame",
265
+ "frame_id": processed_idx,
266
+ "raw_frame": raw_idx,
267
+ "timestamp_sec": timestamp_sec,
268
+ "person_count": len(persons),
269
+ "persons": slim_persons,
270
+ "latency_ms": round(result["latency"]["total_ms"], 1),
271
+ "progress": round((processed_idx + 1) / total_sampled * 100, 1),
272
+ }
273
+ yield f"data: {json.dumps(event)}\n\n"
274
+ processed_idx += 1
275
+
276
+ # Yield control so FastAPI can flush the buffer
277
+ await asyncio.sleep(0)
278
+
279
+ raw_idx += 1
280
+
281
+ cap.release()
282
+ duration_sec = round(time.time() - t_video_start, 2)
283
+
284
+ summary = {
285
+ "type": "summary",
286
+ "total_frames_processed": processed_idx,
287
+ "unique_person_count": len(unique_ids),
288
+ "peak_person_count": peak_count,
289
+ "duration_sec": duration_sec,
290
+ "video_duration_sec": round(total_raw / fps_native, 2),
291
+ }
292
+ yield f"data: {json.dumps(summary)}\n\n"
293
+ logger.info(f"Video analysis done — {processed_idx} frames, {len(unique_ids)} unique persons")
294
+
295
+ # Save the session to the database
296
+ try:
297
+ # We need a fresh session here inside the generator
298
+ async with AsyncSessionLocal() as db:
299
+ new_session = AnalysisSession(
300
+ id=session_id,
301
+ video_filename=filename,
302
+ thumbnail_path=f"/static/thumbnails/{thumb_filename}",
303
+ duration_sec=duration_sec,
304
+ frames_processed=processed_idx,
305
+ unique_persons=len(unique_ids),
306
+ peak_count=peak_count,
307
+ )
308
+ db.add(new_session)
309
+ await db.commit()
310
+ except Exception as e:
311
+ logger.error(f"Failed to save session to DB: {e}")
312
+
313
+ except Exception as e:
314
+ logger.error(f"analyze_video error: {e}")
315
+ yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
316
+ finally:
317
+ if tmp_path and os.path.exists(tmp_path):
318
+ os.unlink(tmp_path)
319
+
320
+ return StreamingResponse(
321
+ event_stream(),
322
+ media_type="text/event-stream",
323
+ headers={
324
+ "Cache-Control": "no-cache",
325
+ "X-Accel-Buffering": "no",
326
+ "Connection": "keep-alive",
327
+ },
328
+ )
329
+
330
+
331
+ # ── GET /sessions ─────────────────────────────────────────────────────────────
332
+
333
+ @router.get("/sessions", summary="List historical video analysis sessions")
334
+ async def list_sessions(db: AsyncSession = Depends(get_db)):
335
+ from sqlalchemy.future import select
336
+ query = select(AnalysisSession).order_by(AnalysisSession.timestamp.desc())
337
+ result = await db.execute(query)
338
+ sessions = result.scalars().all()
339
+
340
+ return [
341
+ {
342
+ "id": str(s.id),
343
+ "timestamp": s.timestamp.isoformat(),
344
+ "video_filename": s.video_filename,
345
+ "thumbnail_path": s.thumbnail_path,
346
+ "duration_sec": s.duration_sec,
347
+ "frames_processed": s.frames_processed,
348
+ "unique_persons": s.unique_persons,
349
+ "peak_count": s.peak_count,
350
+ }
351
+ for s in sessions
352
+ ]
353
+
354
+
355
+ # ── GET /cameras ──────────────────────────────────────────────────────────────
356
+
357
+ @router.get("/cameras", summary="List active cameras and their status")
358
+ async def list_cameras(pipeline=Depends(get_pipeline), graph=Depends(get_movement_graph)):
359
+ summary = graph.get_movement_summary()
360
+ trackers = list(pipeline.tracker_manager._trackers.keys())
361
+ fps_data = {cam: pipeline._compute_fps(cam) for cam in trackers}
362
+ return {
363
+ "active_cameras": trackers,
364
+ "fps_per_camera": fps_data,
365
+ "movement_graph_summary": summary,
366
+ }
367
+
368
+
369
+ # ── WebSocket /live-feed ──────────────────────────────────────────────────────
370
+
371
+ class ConnectionManager:
372
+ def __init__(self):
373
+ self.active_connections: list[WebSocket] = []
374
+
375
+ async def connect(self, ws: WebSocket):
376
+ await ws.accept()
377
+ self.active_connections.append(ws)
378
+ logger.info(f"WebSocket connected. Total: {len(self.active_connections)}")
379
+
380
+ def disconnect(self, ws: WebSocket):
381
+ self.active_connections.remove(ws)
382
+ logger.info(f"WebSocket disconnected. Total: {len(self.active_connections)}")
383
+
384
+ async def broadcast(self, data: dict):
385
+ import json
386
+ msg = json.dumps(data)
387
+ for conn in self.active_connections:
388
+ try:
389
+ await conn.send_text(msg)
390
+ except Exception:
391
+ pass
392
+
393
+
394
+ ws_manager = ConnectionManager()
395
+
396
+
397
+ @router.websocket("/live-feed")
398
+ async def live_feed_websocket(websocket: WebSocket, pipeline=Depends(get_pipeline)):
399
+ """
400
+ WebSocket endpoint for realtime surveillance feed.
401
+ Client sends: {"camera_id": str, "image_b64": str}
402
+ Server broadcasts processed results to all connected clients.
403
+ """
404
+ await ws_manager.connect(websocket)
405
+ try:
406
+ while True:
407
+ data = await websocket.receive_json()
408
+ camera_id = data.get("camera_id", "cam-0")
409
+ image_b64 = data.get("image_b64", "")
410
+ if not image_b64:
411
+ await websocket.send_json({"error": "No image provided"})
412
+ continue
413
+ result = pipeline.process_frame(
414
+ image_input=image_b64,
415
+ camera_id=camera_id,
416
+ run_attributes=data.get("run_attributes", False),
417
+ run_reid=data.get("run_reid", True),
418
+ )
419
+ await websocket.send_json(result)
420
+ except WebSocketDisconnect:
421
+ ws_manager.disconnect(websocket)
static/thumbnails/4f42294d-cdc1-4c64-abae-71a2891167b2.jpg ADDED
vision/__init__.py ADDED
File without changes
vision/attributes.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ vision/attributes.py - CLIP-based Zero-Shot Clothing & Attribute Recognition
3
+ """
4
+ import time
5
+ import torch
6
+ import numpy as np
7
+ import faiss
8
+ import os
9
+ from PIL import Image
10
+ from typing import List, Dict, Tuple, Optional
11
+ from transformers import CLIPProcessor, CLIPModel
12
+ from loguru import logger
13
+ from config import settings, DEVICE, FAISS_DIR
14
+
15
+
16
+ # Attribute taxonomies for zero-shot classification
17
+ CLOTHING_LABELS = [
18
+ "wearing a red shirt", "wearing a blue shirt", "wearing a white shirt",
19
+ "wearing a black shirt", "wearing a yellow jacket", "wearing a green jacket",
20
+ "wearing jeans", "wearing formal trousers", "wearing shorts", "wearing a dress",
21
+ "wearing a hoodie", "wearing a suit", "wearing a uniform", "wearing a coat",
22
+ ]
23
+
24
+ COLOR_LABELS = [
25
+ "person in red clothing", "person in blue clothing", "person in black clothing",
26
+ "person in white clothing", "person in gray clothing", "person in green clothing",
27
+ "person in yellow clothing", "person in orange clothing", "person in brown clothing",
28
+ ]
29
+
30
+ GENDER_LABELS = ["a male person", "a female person"]
31
+
32
+ ACCESSORY_LABELS = [
33
+ "wearing a backpack", "carrying a bag", "wearing a hat", "wearing sunglasses",
34
+ "carrying an umbrella", "wearing a mask", "no accessories",
35
+ ]
36
+
37
+ AGE_LABELS = [
38
+ "a child person", "a teenager person", "a young adult person",
39
+ "a middle-aged person", "an elderly person",
40
+ ]
41
+
42
+
43
+ class AttributeRecognizer:
44
+ """
45
+ Zero-shot attribute recognition using CLIP.
46
+ Generates structured attribute dict and CLIP visual embeddings per person.
47
+ """
48
+
49
+ ATTR_INDEX_FILE = str(FAISS_DIR / "attr_index.faiss")
50
+ ATTR_META_FILE = str(FAISS_DIR / "attr_meta.npy")
51
+
52
+ def __init__(self):
53
+ logger.info(f"Loading CLIP model: {settings.CLIP_MODEL}")
54
+ self.processor = CLIPProcessor.from_pretrained(settings.CLIP_MODEL)
55
+ self.model = CLIPModel.from_pretrained(settings.CLIP_MODEL)
56
+ self.model.to(DEVICE)
57
+ self.model.eval()
58
+
59
+ self.dim = settings.CLIP_EMBEDDING_DIM
60
+ self.index = self._load_or_create_index()
61
+ self.meta: List[Dict] = self._load_meta()
62
+ logger.info(f"✅ AttributeRecognizer ready. FAISS attr index size: {self.index.ntotal}")
63
+
64
+ def _load_or_create_index(self):
65
+ if os.path.exists(self.ATTR_INDEX_FILE):
66
+ return faiss.read_index(self.ATTR_INDEX_FILE)
67
+ return faiss.IndexFlatIP(self.dim)
68
+
69
+ def _load_meta(self) -> List[Dict]:
70
+ if os.path.exists(self.ATTR_META_FILE):
71
+ return list(np.load(self.ATTR_META_FILE, allow_pickle=True))
72
+ return []
73
+
74
+ def save(self):
75
+ faiss.write_index(self.index, self.ATTR_INDEX_FILE)
76
+ np.save(self.ATTR_META_FILE, np.array(self.meta, dtype=object))
77
+
78
+ @torch.inference_mode()
79
+ def _classify(self, image: Image.Image, labels: List[str]) -> List[Tuple[str, float]]:
80
+ """Run zero-shot CLIP classification. Returns sorted (label, prob) list."""
81
+ inputs = self.processor(
82
+ text=labels, images=image, return_tensors="pt", padding=True
83
+ )
84
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
85
+ outputs = self.model(**inputs)
86
+ logits = outputs.logits_per_image[0]
87
+ probs = torch.softmax(logits, dim=0).cpu().numpy()
88
+ return sorted(zip(labels, probs.tolist()), key=lambda x: -x[1])
89
+
90
+ @torch.inference_mode()
91
+ def extract_visual_embedding(self, image: Image.Image) -> np.ndarray:
92
+ """Extract L2-normalized CLIP visual embedding."""
93
+ inputs = self.processor(images=image, return_tensors="pt")
94
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
95
+ features = self.model.get_image_features(**inputs)
96
+ features = features / features.norm(dim=-1, keepdim=True)
97
+ return features.cpu().numpy().astype(np.float32)
98
+
99
+ def recognize(self, image: Image.Image) -> Dict:
100
+ """
101
+ Run all attribute classifiers on a cropped person image.
102
+
103
+ Returns:
104
+ {
105
+ "clothing": [{"label": str, "confidence": float}],
106
+ "color": str,
107
+ "gender": str,
108
+ "accessories": [str],
109
+ "age_group": str,
110
+ }
111
+ """
112
+ t0 = time.perf_counter()
113
+
114
+ clothing_results = self._classify(image, CLOTHING_LABELS)
115
+ color_results = self._classify(image, COLOR_LABELS)
116
+ gender_results = self._classify(image, GENDER_LABELS)
117
+ accessory_results = self._classify(image, ACCESSORY_LABELS)
118
+ age_results = self._classify(image, AGE_LABELS)
119
+
120
+ latency = (time.perf_counter() - t0) * 1000
121
+
122
+ attributes = {
123
+ "clothing": [
124
+ {"label": l, "confidence": round(p, 4)}
125
+ for l, p in clothing_results[:3]
126
+ if p > 0.1
127
+ ],
128
+ "color": color_results[0][0].replace("person in ", "").replace(" clothing", "") if color_results else "unknown",
129
+ "gender": gender_results[0][0].replace("a ", "").replace(" person", "") if gender_results else "unknown",
130
+ "accessories": [l for l, p in accessory_results if p > 0.3 and "no accessories" not in l],
131
+ "age_group": age_results[0][0].replace("a ", "").replace(" person", "") if age_results else "unknown",
132
+ "inference_ms": round(latency, 2),
133
+ }
134
+
135
+ logger.debug(f"Attributes recognized in {latency:.1f}ms")
136
+ return attributes
137
+
138
+ def add_to_gallery(self, image: Image.Image, person_id: str) -> int:
139
+ """Store CLIP visual embedding in FAISS for attribute-based search."""
140
+ embedding = self.extract_visual_embedding(image)
141
+ faiss_id = self.index.ntotal
142
+ self.index.add(embedding)
143
+ self.meta.append({"person_id": person_id, "faiss_id": faiss_id})
144
+ self.save()
145
+ return faiss_id
146
+
147
+ def search_by_attribute_query(self, text_query: str, top_k: int = 10) -> List[Dict]:
148
+ """Search gallery using a natural language attribute query."""
149
+ if self.index.ntotal == 0:
150
+ return []
151
+ inputs = self.processor(text=[text_query], return_tensors="pt", padding=True)
152
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
153
+ with torch.inference_mode():
154
+ text_features = self.model.get_text_features(**inputs)
155
+ text_features = text_features / text_features.norm(dim=-1, keepdim=True)
156
+ query = text_features.cpu().numpy().astype(np.float32)
157
+ k = min(top_k, self.index.ntotal)
158
+ distances, indices = self.index.search(query, k)
159
+ return [
160
+ {"person_id": self.meta[idx]["person_id"], "similarity": round(float(dist), 4)}
161
+ for dist, idx in zip(distances[0], indices[0])
162
+ if idx != -1
163
+ ]
vision/detector.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ vision/detector.py - Person Detection using facebook/detr-resnet-50
3
+ """
4
+ import time
5
+ import torch
6
+ import numpy as np
7
+ from PIL import Image
8
+ from typing import List, Dict, Tuple, Optional
9
+ from transformers import DetrImageProcessor, DetrForObjectDetection
10
+ from loguru import logger
11
+ from config import settings, DEVICE
12
+
13
+
14
+ class PersonDetector:
15
+ """
16
+ DETR-based person detector.
17
+ Returns bounding boxes, confidence scores, and processing latency.
18
+ """
19
+
20
+ PERSON_LABEL = "person"
21
+ COCO_LABEL_MAP = None # populated after model loads
22
+
23
+ def __init__(self):
24
+ logger.info(f"Loading detection model: {settings.DETECTION_MODEL}")
25
+ self.processor = DetrImageProcessor.from_pretrained(settings.DETECTION_MODEL)
26
+ self.model = DetrForObjectDetection.from_pretrained(settings.DETECTION_MODEL)
27
+ self.model.to(DEVICE)
28
+ self.model.eval()
29
+
30
+ # Build label → id map
31
+ self.id2label = self.model.config.id2label
32
+ self.person_label_ids = [
33
+ k for k, v in self.id2label.items() if v.lower() == self.PERSON_LABEL
34
+ ]
35
+ logger.info(f"✅ PersonDetector ready on {DEVICE}. Person class ids: {self.person_label_ids}")
36
+
37
+ @torch.inference_mode()
38
+ def detect(
39
+ self,
40
+ image: Image.Image,
41
+ confidence_threshold: Optional[float] = None,
42
+ ) -> Tuple[List[Dict], float]:
43
+ """
44
+ Detect persons in a PIL image.
45
+
46
+ Returns:
47
+ detections: list of {"bbox": [x1,y1,x2,y2], "score": float, "label": "person"}
48
+ latency_ms: inference time in milliseconds
49
+ """
50
+ threshold = confidence_threshold or settings.DETECTION_CONFIDENCE
51
+
52
+ t0 = time.perf_counter()
53
+ inputs = self.processor(images=image, return_tensors="pt")
54
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
55
+ outputs = self.model(**inputs)
56
+ t1 = time.perf_counter()
57
+ latency_ms = (t1 - t0) * 1000
58
+
59
+ # Post-process to original image size
60
+ target_sizes = torch.tensor([image.size[::-1]], device=DEVICE) # (H, W)
61
+ results = self.processor.post_process_object_detection(
62
+ outputs, threshold=threshold, target_sizes=target_sizes
63
+ )[0]
64
+
65
+ detections = []
66
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
67
+ label_id = label.item()
68
+ if label_id in self.person_label_ids:
69
+ x1, y1, x2, y2 = box.tolist()
70
+ detections.append({
71
+ "bbox": [round(x1, 2), round(y1, 2), round(x2, 2), round(y2, 2)],
72
+ "score": round(score.item(), 4),
73
+ "label": "person",
74
+ })
75
+
76
+ logger.debug(f"Detected {len(detections)} persons in {latency_ms:.1f}ms")
77
+ return detections, latency_ms
78
+
79
+ def detect_batch(
80
+ self,
81
+ images: List[Image.Image],
82
+ confidence_threshold: Optional[float] = None,
83
+ ) -> List[Tuple[List[Dict], float]]:
84
+ """Batch detection for multiple frames."""
85
+ return [self.detect(img, confidence_threshold) for img in images]
86
+
87
+ @staticmethod
88
+ def crop_person(image: Image.Image, bbox: List[float]) -> Image.Image:
89
+ """Crop a person region from image given bbox [x1, y1, x2, y2]."""
90
+ x1, y1, x2, y2 = [int(v) for v in bbox]
91
+ return image.crop((x1, y1, x2, y2))
vision/pipeline.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ vision/pipeline.py - Full Vision Inference Pipeline
3
+ Orchestrates detection → tracking → ReID → attribute recognition per frame
4
+ """
5
+ import time
6
+ import base64
7
+ import numpy as np
8
+ import uuid
9
+ from io import BytesIO
10
+ from PIL import Image
11
+ from typing import Dict, List, Optional, Any
12
+ from loguru import logger
13
+
14
+ from vision.detector import PersonDetector
15
+ from vision.tracker import TrackerManager
16
+ from vision.reid import PersonReID
17
+ from vision.attributes import AttributeRecognizer
18
+ from config import settings
19
+
20
+
21
+ class VisionPipeline:
22
+ """
23
+ End-to-end vision pipeline for a single frame from a camera.
24
+ Components initialized lazily (singletons shared across requests).
25
+ """
26
+
27
+ def __init__(self):
28
+ logger.info("Initializing VisionPipeline...")
29
+ self.detector = PersonDetector()
30
+ self.tracker_manager = TrackerManager()
31
+ self.reid = PersonReID()
32
+ self.attributes = AttributeRecognizer()
33
+ self._frame_counts: Dict[str, int] = {}
34
+ self._fps_timers: Dict[str, List[float]] = {}
35
+ logger.info("✅ VisionPipeline ready.")
36
+
37
+ def _decode_image(self, image_input: Any) -> Image.Image:
38
+ """Accept PIL Image, numpy array, bytes, or base64 string."""
39
+ if isinstance(image_input, Image.Image):
40
+ return image_input.convert("RGB")
41
+ if isinstance(image_input, np.ndarray):
42
+ return Image.fromarray(image_input).convert("RGB")
43
+ if isinstance(image_input, bytes):
44
+ return Image.open(BytesIO(image_input)).convert("RGB")
45
+ if isinstance(image_input, str):
46
+ # base64 encoded
47
+ data = base64.b64decode(image_input)
48
+ return Image.open(BytesIO(data)).convert("RGB")
49
+ raise ValueError(f"Unsupported image type: {type(image_input)}")
50
+
51
+ def _compute_fps(self, camera_id: str) -> float:
52
+ """Compute rolling FPS over last 30 frames."""
53
+ now = time.perf_counter()
54
+ if camera_id not in self._fps_timers:
55
+ self._fps_timers[camera_id] = []
56
+ self._fps_timers[camera_id].append(now)
57
+ if len(self._fps_timers[camera_id]) > 30:
58
+ self._fps_timers[camera_id].pop(0)
59
+ times = self._fps_timers[camera_id]
60
+ if len(times) < 2:
61
+ return 0.0
62
+ return round((len(times) - 1) / (times[-1] - times[0]), 2)
63
+
64
+ def process_frame(
65
+ self,
66
+ image_input: Any,
67
+ camera_id: str,
68
+ run_attributes: bool = True,
69
+ run_reid: bool = True,
70
+ reid_threshold: float = 0.85,
71
+ ) -> Dict:
72
+ """
73
+ Full pipeline for a single frame.
74
+
75
+ Args:
76
+ image_input: PIL Image | numpy array | bytes | base64 str
77
+ camera_id: unique camera identifier
78
+ run_attributes: whether to run CLIP attribute recognition
79
+ run_reid: whether to run ReID matching
80
+ reid_threshold: cosine similarity threshold for ReID
81
+
82
+ Returns:
83
+ Result dict with detections, tracks, reid matches, attributes, and latency breakdown.
84
+ """
85
+ t_start = time.perf_counter()
86
+
87
+ # 1. Decode image
88
+ image = self._decode_image(image_input)
89
+ w, h = image.size
90
+
91
+ # 2. Detection
92
+ detections, det_ms = self.detector.detect(image)
93
+
94
+ # 3. Tracking
95
+ t_track = time.perf_counter()
96
+ tracks = self.tracker_manager.update(camera_id, detections)
97
+ track_ms = (time.perf_counter() - t_track) * 1000
98
+
99
+ # 4. Per-person: ReID + Attributes
100
+ persons_data = []
101
+ for track in tracks:
102
+ bbox = track["bbox"]
103
+ # Crop person region
104
+ try:
105
+ crop = PersonDetector.crop_person(image, bbox)
106
+ if crop.width < 10 or crop.height < 10:
107
+ continue
108
+ except Exception:
109
+ continue
110
+
111
+ person_entry: Dict = {
112
+ "track_id": track["track_id"],
113
+ "bbox": bbox,
114
+ "score": track["score"],
115
+ "camera_id": camera_id,
116
+ "reid_matches": [],
117
+ "attributes": {},
118
+ "is_new_person": False,
119
+ "assigned_person_id": None,
120
+ }
121
+
122
+ # 4a. ReID — try to match against gallery
123
+ if run_reid:
124
+ t_reid = time.perf_counter()
125
+ reid_matches = self.reid.search(crop, top_k=3, similarity_threshold=reid_threshold)
126
+ person_entry["reid_matches"] = reid_matches
127
+ person_entry["reid_ms"] = round((time.perf_counter() - t_reid) * 1000, 2)
128
+
129
+ if reid_matches:
130
+ person_entry["assigned_person_id"] = reid_matches[0]["person_id"]
131
+ else:
132
+ # New person — register in gallery with temporary UUID
133
+ new_pid = str(uuid.uuid4())
134
+ faiss_id = self.reid.add_person(crop, new_pid, camera_id)
135
+ person_entry["assigned_person_id"] = new_pid
136
+ person_entry["is_new_person"] = True
137
+ person_entry["faiss_id"] = faiss_id
138
+
139
+ import os
140
+ os.makedirs("static/thumbnails", exist_ok=True)
141
+ try:
142
+ crop.save(f"static/thumbnails/{new_pid}.jpg", "JPEG", quality=85)
143
+ except Exception as e:
144
+ logger.warning(f"Failed to save thumbnail for {new_pid}: {e}")
145
+
146
+ # 4b. Attribute recognition
147
+ if run_attributes:
148
+ t_attr = time.perf_counter()
149
+ attrs = self.attributes.recognize(crop)
150
+ person_entry["attributes"] = attrs
151
+ person_entry["attr_ms"] = round((time.perf_counter() - t_attr) * 1000, 2)
152
+
153
+ # Also store visual embedding for attribute-based search
154
+ if run_reid:
155
+ self.attributes.add_to_gallery(crop, person_entry["assigned_person_id"])
156
+
157
+ persons_data.append(person_entry)
158
+
159
+ total_ms = (time.perf_counter() - t_start) * 1000
160
+ fps = self._compute_fps(camera_id)
161
+ self._frame_counts[camera_id] = self._frame_counts.get(camera_id, 0) + 1
162
+
163
+ return {
164
+ "camera_id": camera_id,
165
+ "frame_id": self._frame_counts[camera_id],
166
+ "image_size": {"width": w, "height": h},
167
+ "persons": persons_data,
168
+ "person_count": len(persons_data),
169
+ "detection_count": len(detections),
170
+ "latency": {
171
+ "detection_ms": round(det_ms, 2),
172
+ "tracking_ms": round(track_ms, 2),
173
+ "total_ms": round(total_ms, 2),
174
+ },
175
+ "fps": fps,
176
+ "timestamp": time.time(),
177
+ }
vision/reid.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ vision/reid.py - Cross-Camera Person Re-Identification using ViT + FAISS
3
+ """
4
+ import os
5
+ import time
6
+ import numpy as np
7
+ import faiss
8
+ import torch
9
+ import torch.nn.functional as F
10
+ from PIL import Image
11
+ from typing import List, Dict, Optional, Tuple
12
+ from transformers import ViTImageProcessor, ViTModel
13
+ from loguru import logger
14
+ from config import settings, DEVICE, FAISS_DIR
15
+
16
+
17
+ class PersonReID:
18
+ """
19
+ Person Re-Identification using google/vit-base-patch16-224 embeddings.
20
+ Embeddings are stored in a FAISS IndexFlatIP (inner product = cosine after normalization).
21
+ """
22
+
23
+ INDEX_FILE = str(FAISS_DIR / "reid_index.faiss")
24
+ META_FILE = str(FAISS_DIR / "reid_meta.npy")
25
+
26
+ def __init__(self):
27
+ logger.info(f"Loading ReID model: {settings.REID_MODEL}")
28
+ self.processor = ViTImageProcessor.from_pretrained(settings.REID_MODEL)
29
+ self.model = ViTModel.from_pretrained(settings.REID_MODEL)
30
+ self.model.to(DEVICE)
31
+ self.model.eval()
32
+
33
+ self.dim = settings.REID_EMBEDDING_DIM
34
+ self.index = self._load_or_create_index()
35
+ # meta list: maps faiss internal id (row index) → {"person_id": str, "camera_id": str}
36
+ self.meta: List[Dict] = self._load_meta()
37
+ logger.info(f"✅ ReID ready. FAISS index size: {self.index.ntotal}")
38
+
39
+ # ── Index Management ──────────────────────────────────────────────────────
40
+
41
+ def _load_or_create_index(self) -> faiss.IndexFlatIP:
42
+ if os.path.exists(self.INDEX_FILE):
43
+ logger.info("Loading existing FAISS ReID index.")
44
+ return faiss.read_index(self.INDEX_FILE)
45
+ logger.info("Creating new FAISS ReID index (IndexFlatIP).")
46
+ return faiss.IndexFlatIP(self.dim)
47
+
48
+ def _load_meta(self) -> List[Dict]:
49
+ if os.path.exists(self.META_FILE):
50
+ data = np.load(self.META_FILE, allow_pickle=True)
51
+ return list(data)
52
+ return []
53
+
54
+ def save(self):
55
+ faiss.write_index(self.index, self.INDEX_FILE)
56
+ np.save(self.META_FILE, np.array(self.meta, dtype=object))
57
+ logger.debug("FAISS ReID index saved.")
58
+
59
+ # ── Embedding Extraction ──────────────────────────────────────────────────
60
+
61
+ @torch.inference_mode()
62
+ def extract_embedding(self, image: Image.Image) -> np.ndarray:
63
+ """Extract L2-normalized ViT CLS token embedding from a cropped person image."""
64
+ inputs = self.processor(images=image, return_tensors="pt")
65
+ inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
66
+ outputs = self.model(**inputs)
67
+ # CLS token → (1, hidden_size)
68
+ cls = outputs.last_hidden_state[:, 0, :]
69
+ # L2 normalize for cosine similarity via inner product
70
+ embedding = F.normalize(cls, p=2, dim=-1).cpu().numpy().astype(np.float32)
71
+ return embedding # shape: (1, 768)
72
+
73
+ # ── Gallery Operations ────────────────────────────────────────────────────
74
+
75
+ def add_person(self, image: Image.Image, person_id: str, camera_id: str) -> int:
76
+ """Add a new person embedding to the FAISS gallery. Returns faiss_id."""
77
+ embedding = self.extract_embedding(image)
78
+ faiss_id = self.index.ntotal
79
+ self.index.add(embedding)
80
+ self.meta.append({"person_id": person_id, "camera_id": camera_id, "faiss_id": faiss_id})
81
+ self.save()
82
+ return faiss_id
83
+
84
+ def search(
85
+ self,
86
+ image: Image.Image,
87
+ top_k: int = 5,
88
+ similarity_threshold: float = 0.85,
89
+ ) -> List[Dict]:
90
+ """
91
+ Search gallery for matching persons.
92
+
93
+ Returns:
94
+ list of {"person_id": str, "camera_id": str, "similarity": float, "faiss_id": int}
95
+ """
96
+ if self.index.ntotal == 0:
97
+ return []
98
+
99
+ t0 = time.perf_counter()
100
+ query = self.extract_embedding(image)
101
+ k = min(top_k, self.index.ntotal)
102
+ distances, indices = self.index.search(query, k)
103
+ latency = (time.perf_counter() - t0) * 1000
104
+
105
+ results = []
106
+ for dist, idx in zip(distances[0], indices[0]):
107
+ if idx == -1:
108
+ continue
109
+ similarity = float(dist)
110
+ if similarity >= similarity_threshold:
111
+ meta = self.meta[idx]
112
+ results.append({
113
+ "person_id": meta["person_id"],
114
+ "camera_id": meta["camera_id"],
115
+ "similarity": round(similarity, 4),
116
+ "faiss_id": int(idx),
117
+ })
118
+
119
+ logger.debug(f"ReID search: {len(results)} matches in {latency:.1f}ms")
120
+ return results
121
+
122
+ def search_by_embedding(
123
+ self,
124
+ embedding: np.ndarray,
125
+ top_k: int = 5,
126
+ similarity_threshold: float = 0.85,
127
+ ) -> List[Dict]:
128
+ """Direct search with a precomputed embedding."""
129
+ if self.index.ntotal == 0:
130
+ return []
131
+ k = min(top_k, self.index.ntotal)
132
+ distances, indices = self.index.search(embedding, k)
133
+ results = []
134
+ for dist, idx in zip(distances[0], indices[0]):
135
+ if idx == -1 or float(dist) < similarity_threshold:
136
+ continue
137
+ meta = self.meta[idx]
138
+ results.append({
139
+ "person_id": meta["person_id"],
140
+ "camera_id": meta["camera_id"],
141
+ "similarity": round(float(dist), 4),
142
+ "faiss_id": int(idx),
143
+ })
144
+ return results
vision/stream_manager.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import threading
3
+ import time
4
+ import io
5
+ from PIL import Image, ImageDraw, ImageFont
6
+ from loguru import logger
7
+
8
+ class StreamManager:
9
+ """
10
+ Manages background threads that constantly pull frames from RTSP streams or local webcams,
11
+ run the vision pipeline, and store annotated JPEG bytes for MJPEG streaming.
12
+ """
13
+ def __init__(self):
14
+ self.streams = {} # camera_id -> dict with thread info
15
+ self.frames = {} # camera_id -> latest JPEG bytes
16
+ self.results = {} # camera_id -> latest inference result dict
17
+ self.running = True
18
+
19
+ def add_stream(self, camera_id: str, source: str):
20
+ """Add a new camera stream"""
21
+ if camera_id in self.streams:
22
+ logger.info(f"Stream {camera_id} is already running.")
23
+ return
24
+
25
+ # If source is just a digit like "0", handle it as an int for local webcam
26
+ if source.isdigit():
27
+ source = int(source)
28
+
29
+ logger.info(f"Adding stream {camera_id} from {source}")
30
+ thread = threading.Thread(target=self._stream_loop, args=(camera_id, source), daemon=True)
31
+ self.streams[camera_id] = {
32
+ "thread": thread,
33
+ "source": source,
34
+ "active": True
35
+ }
36
+ thread.start()
37
+
38
+ def remove_stream(self, camera_id: str):
39
+ if camera_id in self.streams:
40
+ logger.info(f"Removing stream {camera_id}")
41
+ self.streams[camera_id]["active"] = False
42
+ del self.streams[camera_id]
43
+
44
+ def _stream_loop(self, camera_id: str, source):
45
+ # Import inside the loop to avoid circular import issues if imported from app.py
46
+ from app import vision_pipeline
47
+
48
+ cap = cv2.VideoCapture(source)
49
+ if not cap.isOpened():
50
+ logger.error(f"Failed to open Stream: {camera_id} -> {source}")
51
+ self.remove_stream(camera_id)
52
+ return
53
+
54
+ fps_native = cap.get(cv2.CAP_PROP_FPS) or 25.0
55
+ delay = 1.0 / max(1, fps_native)
56
+
57
+ logger.info(f"Stream {camera_id} connected. Target FPS: {fps_native}")
58
+
59
+ while self.streams.get(camera_id, {}).get("active", False) and self.running:
60
+ start_t = time.perf_counter()
61
+ ret, frame = cap.read()
62
+
63
+ if not ret:
64
+ logger.warning(f"Stream {camera_id} disconnected. Attempting reconnect...")
65
+ time.sleep(2)
66
+ cap = cv2.VideoCapture(source)
67
+ continue
68
+
69
+ # Convert to PIL
70
+ rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
71
+ pil_img = Image.fromarray(rgb_frame)
72
+
73
+ # ── Inference ──
74
+ has_result = False
75
+ result_data = None
76
+ try:
77
+ if vision_pipeline:
78
+ result_data = vision_pipeline.process_frame(
79
+ image_input=pil_img,
80
+ camera_id=camera_id,
81
+ run_attributes=True,
82
+ run_reid=True
83
+ )
84
+ self.results[camera_id] = result_data
85
+ has_result = True
86
+ except Exception as e:
87
+ logger.error(f"Inference error on stream {camera_id}: {e}")
88
+
89
+ # ── Annotation ──
90
+ if has_result and result_data:
91
+ pil_img = self._annotate_frame(pil_img, result_data)
92
+
93
+ # ── Encode to JPEG ──
94
+ buf = io.BytesIO()
95
+ pil_img.save(buf, format="JPEG", quality=75)
96
+ self.frames[camera_id] = buf.getvalue()
97
+
98
+ # Enforce FPS limit
99
+ elapsed = time.perf_counter() - start_t
100
+ if elapsed < delay:
101
+ time.sleep(delay - elapsed)
102
+
103
+ cap.release()
104
+ logger.info(f"Stream {camera_id} loop terminated.")
105
+
106
+ def _annotate_frame(self, image: Image.Image, result: dict) -> Image.Image:
107
+ """Draw bounding boxes natively on the PIL image before encoding to MJPEG"""
108
+ draw = ImageDraw.Draw(image)
109
+ try:
110
+ # Using default font for robust cross-platform rendering
111
+ font = ImageFont.load_default()
112
+ except:
113
+ font = None
114
+
115
+ for p in result.get("persons", []):
116
+ x1, y1, x2, y2 = p["bbox"]
117
+ is_new = p.get("is_new_person", False)
118
+
119
+ # Extract ReID sim
120
+ reid_sim = 0
121
+ if p.get("reid_matches"):
122
+ reid_sim = p["reid_matches"][0].get("similarity", 0)
123
+
124
+ is_alert = not is_new and reid_sim > 0.85
125
+
126
+ # Colors match the frontend UI standard
127
+ color = "#FF1744" if is_alert else ("#FFB300" if is_new else "#00E5FF")
128
+
129
+ # Bounding Box
130
+ draw.rectangle([x1, y1, x2, y2], outline=color, width=3)
131
+
132
+ # Label
133
+ label = f"TRK-{p.get('track_id')} {(p.get('score', 0)*100):.0f}%"
134
+ # Draw label background
135
+ text_bg_y0 = max(0, y1 - 16)
136
+ draw.rectangle([x1, text_bg_y0, x1 + 120, text_bg_y0 + 16], fill=color)
137
+
138
+ if font:
139
+ draw.text((x1 + 4, text_bg_y0 + 2), label, fill="black", font=font)
140
+
141
+ return image
142
+
143
+ def get_frame(self, camera_id: str):
144
+ return self.frames.get(camera_id)
145
+
146
+ def shutdown(self):
147
+ self.running = False
148
+ self.streams.clear()
149
+
150
+ # Global Singleton
151
+ stream_manager = StreamManager()
vision/tracker.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ vision/tracker.py - Multi-Object Tracking using ByteTrack algorithm
3
+ Assigns persistent track IDs across frames for each camera.
4
+ """
5
+ import numpy as np
6
+ from typing import List, Dict, Tuple, Optional
7
+ from dataclasses import dataclass, field
8
+ from loguru import logger
9
+ from config import settings
10
+
11
+
12
+ @dataclass
13
+ class Track:
14
+ track_id: int
15
+ bbox: List[float] # [x1, y1, x2, y2]
16
+ score: float
17
+ age: int = 0
18
+ hits: int = 1
19
+ time_since_update: int = 0
20
+ state: str = "active" # active | lost | removed
21
+ history: List[List[float]] = field(default_factory=list)
22
+
23
+ def update(self, bbox: List[float], score: float):
24
+ self.bbox = bbox
25
+ self.score = score
26
+ self.hits += 1
27
+ self.age += 1
28
+ self.time_since_update = 0
29
+ self.state = "active"
30
+ self.history.append(bbox)
31
+ if len(self.history) > 30:
32
+ self.history.pop(0)
33
+
34
+ def predict(self):
35
+ """Simple linear prediction (extend with Kalman for production)."""
36
+ self.time_since_update += 1
37
+ self.age += 1
38
+ if self.time_since_update > settings.TRACK_BUFFER:
39
+ self.state = "removed"
40
+ elif self.time_since_update > 5:
41
+ self.state = "lost"
42
+
43
+
44
+ def iou(boxA: List[float], boxB: List[float]) -> float:
45
+ """Compute Intersection over Union between two [x1,y1,x2,y2] boxes."""
46
+ xA = max(boxA[0], boxB[0])
47
+ yA = max(boxA[1], boxB[1])
48
+ xB = min(boxA[2], boxB[2])
49
+ yB = min(boxA[3], boxB[3])
50
+ inter = max(0, xB - xA) * max(0, yB - yA)
51
+ areaA = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
52
+ areaB = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
53
+ union = areaA + areaB - inter
54
+ return inter / (union + 1e-6)
55
+
56
+
57
+ class ByteTracker:
58
+ """
59
+ Simplified ByteTrack-style multi-object tracker.
60
+ Uses two-stage matching: high-confidence detections first, then low-confidence.
61
+ One instance per camera.
62
+ """
63
+
64
+ def __init__(self, camera_id: str):
65
+ self.camera_id = camera_id
66
+ self.tracks: List[Track] = []
67
+ self._next_id = 1
68
+ self.frame_id = 0
69
+ logger.info(f"ByteTracker initialized for camera: {camera_id}")
70
+
71
+ def _new_track(self, bbox: List[float], score: float) -> Track:
72
+ t = Track(track_id=self._next_id, bbox=bbox, score=score, history=[bbox])
73
+ self._next_id += 1
74
+ return t
75
+
76
+ def _match(
77
+ self,
78
+ detections: List[Dict],
79
+ threshold: float = 0.5,
80
+ ) -> Tuple[List[Tuple[int, int]], List[int], List[int]]:
81
+ """
82
+ Greedy IoU matching between active tracks and detections.
83
+ Returns: (matched pairs), (unmatched track indices), (unmatched det indices)
84
+ """
85
+ active = [i for i, t in enumerate(self.tracks) if t.state != "removed"]
86
+ if not active or not detections:
87
+ return [], active, list(range(len(detections)))
88
+
89
+ iou_matrix = np.zeros((len(active), len(detections)))
90
+ for i, ti in enumerate(active):
91
+ for j, det in enumerate(detections):
92
+ iou_matrix[i, j] = iou(self.tracks[ti].bbox, det["bbox"])
93
+
94
+ matched, unmatched_tracks, unmatched_dets = [], list(active), list(range(len(detections)))
95
+ while True:
96
+ if iou_matrix.size == 0:
97
+ break
98
+ flat_idx = np.argmax(iou_matrix)
99
+ ti_local, di = divmod(flat_idx, iou_matrix.shape[1])
100
+ if iou_matrix[ti_local, di] < threshold:
101
+ break
102
+ ti_global = active[ti_local]
103
+ matched.append((ti_global, di))
104
+ unmatched_tracks.remove(ti_global)
105
+ unmatched_dets.remove(di)
106
+ iou_matrix[ti_local, :] = -1
107
+ iou_matrix[:, di] = -1
108
+
109
+ return matched, unmatched_tracks, unmatched_dets
110
+
111
+ def update(self, detections: List[Dict]) -> List[Dict]:
112
+ """
113
+ Update tracker with new detections.
114
+
115
+ Args:
116
+ detections: list of {"bbox": [...], "score": float}
117
+
118
+ Returns:
119
+ tracked_objects: list of {"track_id": int, "bbox": [...], "score": float, "state": str}
120
+ """
121
+ self.frame_id += 1
122
+
123
+ # Predict existing tracks
124
+ for t in self.tracks:
125
+ t.predict()
126
+
127
+ # Remove permanently dead tracks
128
+ self.tracks = [t for t in self.tracks if t.state != "removed"]
129
+
130
+ # High confidence detections
131
+ high_dets = [d for d in detections if d["score"] >= settings.TRACK_THRESH]
132
+ low_dets = [d for d in detections if d["score"] < settings.TRACK_THRESH]
133
+
134
+ # Stage 1: Match high-confidence detections
135
+ matched, unmatched_tracks, unmatched_high = self._match(high_dets, threshold=settings.MATCH_THRESH)
136
+ for ti, di in matched:
137
+ self.tracks[ti].update(high_dets[di]["bbox"], high_dets[di]["score"])
138
+
139
+ # Stage 2: Match remaining tracks with low-confidence detections
140
+ remaining_unmatched = [ti for ti in unmatched_tracks if self.tracks[ti].state == "lost"]
141
+ if remaining_unmatched and low_dets:
142
+ low_iou_matrix = np.zeros((len(remaining_unmatched), len(low_dets)))
143
+ for i, ti in enumerate(remaining_unmatched):
144
+ for j, det in enumerate(low_dets):
145
+ low_iou_matrix[i, j] = iou(self.tracks[ti].bbox, det["bbox"])
146
+ for i, ti in enumerate(remaining_unmatched):
147
+ best_j = int(np.argmax(low_iou_matrix[i]))
148
+ if low_iou_matrix[i, best_j] > 0.5:
149
+ self.tracks[ti].update(low_dets[best_j]["bbox"], low_dets[best_j]["score"])
150
+
151
+ # Create new tracks for unmatched high-confidence detections
152
+ for di in unmatched_high:
153
+ self.tracks.append(self._new_track(high_dets[di]["bbox"], high_dets[di]["score"]))
154
+
155
+ # Return active tracks
156
+ return [
157
+ {
158
+ "track_id": t.track_id,
159
+ "bbox": t.bbox,
160
+ "score": t.score,
161
+ "state": t.state,
162
+ "age": t.age,
163
+ "hits": t.hits,
164
+ }
165
+ for t in self.tracks
166
+ if t.state == "active"
167
+ ]
168
+
169
+
170
+ class TrackerManager:
171
+ """Manages one ByteTracker per camera."""
172
+
173
+ def __init__(self):
174
+ self._trackers: Dict[str, ByteTracker] = {}
175
+
176
+ def get_tracker(self, camera_id: str) -> ByteTracker:
177
+ if camera_id not in self._trackers:
178
+ self._trackers[camera_id] = ByteTracker(camera_id)
179
+ return self._trackers[camera_id]
180
+
181
+ def update(self, camera_id: str, detections: List[Dict]) -> List[Dict]:
182
+ return self.get_tracker(camera_id).update(detections)
183
+
184
+ def reset(self, camera_id: str):
185
+ if camera_id in self._trackers:
186
+ del self._trackers[camera_id]