Spaces:
Running
Running
Commit ·
2758540
1
Parent(s): e2b09b1
Deploy Sentinelai API backend
Browse files- .gitignore +8 -0
- Dockerfile +43 -0
- app.py +215 -0
- audio/__init__.py +0 -0
- audio/audio_module.py +141 -0
- config.py +107 -0
- database/__init__.py +0 -0
- database/crud.py +164 -0
- database/models.py +100 -0
- database/session.py +46 -0
- graph/__init__.py +0 -0
- graph/movement_graph.py +236 -0
- nlp/__init__.py +0 -0
- nlp/qa.py +103 -0
- nlp/report.py +114 -0
- nlp/search.py +112 -0
- nlp/summarizer.py +75 -0
- requirements.txt +96 -0
- routes/__init__.py +0 -0
- routes/nlp_routes.py +235 -0
- routes/stream_routes.py +69 -0
- routes/vision_routes.py +421 -0
- static/thumbnails/4f42294d-cdc1-4c64-abae-71a2891167b2.jpg +0 -0
- vision/__init__.py +0 -0
- vision/attributes.py +163 -0
- vision/detector.py +91 -0
- vision/pipeline.py +177 -0
- vision/reid.py +144 -0
- vision/stream_manager.py +151 -0
- vision/tracker.py +186 -0
.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
.env*
|
| 4 |
+
__pycache__/
|
| 5 |
+
*.pyc
|
| 6 |
+
.venv/
|
| 7 |
+
venv/
|
| 8 |
+
env/
|
Dockerfile
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official Python runtime as a parent image
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set environment variables for Python
|
| 5 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
| 6 |
+
ENV PYTHONUNBUFFERED=1
|
| 7 |
+
|
| 8 |
+
# Hugging Face Spaces uses port 7860 by default
|
| 9 |
+
ENV PORT=7860
|
| 10 |
+
|
| 11 |
+
# Install system dependencies required for OpenCV and AI models
|
| 12 |
+
RUN apt-get update && apt-get install -y \
|
| 13 |
+
libgl1-mesa-glx \
|
| 14 |
+
libglib2.0-0 \
|
| 15 |
+
libsm6 \
|
| 16 |
+
libxext6 \
|
| 17 |
+
libxrender-dev \
|
| 18 |
+
git \
|
| 19 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 20 |
+
|
| 21 |
+
# Set the working directory in the container
|
| 22 |
+
WORKDIR /app
|
| 23 |
+
|
| 24 |
+
# Copy the requirements file into the container
|
| 25 |
+
COPY requirements.txt .
|
| 26 |
+
|
| 27 |
+
# Install Python dependencies
|
| 28 |
+
# We specifically install the CPU-only version of PyTorch to save space and avoid GPU driver issues on the free tier
|
| 29 |
+
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
| 30 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 31 |
+
|
| 32 |
+
# Copy the current directory contents into the container at /app
|
| 33 |
+
COPY . .
|
| 34 |
+
|
| 35 |
+
# Create directories for static files and databases
|
| 36 |
+
RUN mkdir -p static/thumbnails static/anomalies database/data
|
| 37 |
+
RUN chmod -R 777 static database
|
| 38 |
+
|
| 39 |
+
# Expose the port Hugging Face expects
|
| 40 |
+
EXPOSE 7860
|
| 41 |
+
|
| 42 |
+
# Command to run the FastAPI server
|
| 43 |
+
CMD uvicorn app:app --host 0.0.0.0 --port $PORT
|
app.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
app.py - Main FastAPI Application Entry Point
|
| 3 |
+
Multimodal Surveillance Intelligence System
|
| 4 |
+
Run with: uvicorn app:app --host 0.0.0.0 --port 8000 --reload
|
| 5 |
+
"""
|
| 6 |
+
import asyncio
|
| 7 |
+
import time
|
| 8 |
+
from contextlib import asynccontextmanager
|
| 9 |
+
|
| 10 |
+
from fastapi import FastAPI, Request
|
| 11 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 12 |
+
from fastapi.responses import JSONResponse
|
| 13 |
+
from fastapi.staticfiles import StaticFiles
|
| 14 |
+
from loguru import logger
|
| 15 |
+
import psutil
|
| 16 |
+
import os
|
| 17 |
+
|
| 18 |
+
from config import settings, DEVICE
|
| 19 |
+
|
| 20 |
+
# ── Global Singletons (initialized on startup) ────────────────────────────────
|
| 21 |
+
vision_pipeline = None
|
| 22 |
+
search_engine = None
|
| 23 |
+
qa_system = None
|
| 24 |
+
report_generator = None
|
| 25 |
+
summarizer = None
|
| 26 |
+
movement_graph = None
|
| 27 |
+
audio_asr = None
|
| 28 |
+
audio_classifier = None
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
@asynccontextmanager
|
| 32 |
+
async def lifespan(app: FastAPI):
|
| 33 |
+
"""Application startup/shutdown lifecycle."""
|
| 34 |
+
global vision_pipeline, search_engine, qa_system, report_generator
|
| 35 |
+
global summarizer, movement_graph, audio_asr, audio_classifier
|
| 36 |
+
|
| 37 |
+
logger.info("=" * 60)
|
| 38 |
+
logger.info(f"🚀 Starting {settings.APP_NAME} v{settings.APP_VERSION}")
|
| 39 |
+
logger.info(f" Device: {DEVICE}")
|
| 40 |
+
logger.info("=" * 60)
|
| 41 |
+
|
| 42 |
+
# 1. Initialize database tables
|
| 43 |
+
logger.info("📦 Initializing database...")
|
| 44 |
+
from database.session import create_tables
|
| 45 |
+
await create_tables()
|
| 46 |
+
|
| 47 |
+
# 2. Load Vision Pipeline
|
| 48 |
+
logger.info("🎥 Loading Vision Pipeline...")
|
| 49 |
+
from vision.pipeline import VisionPipeline
|
| 50 |
+
vision_pipeline = VisionPipeline()
|
| 51 |
+
|
| 52 |
+
# 3. Load NLP Components
|
| 53 |
+
logger.info("💬 Loading NLP: Semantic Search...")
|
| 54 |
+
from nlp.search import SemanticSearchEngine
|
| 55 |
+
search_engine = SemanticSearchEngine()
|
| 56 |
+
|
| 57 |
+
logger.info("❓ Loading NLP: QA System...")
|
| 58 |
+
from nlp.qa import SurveillanceQA
|
| 59 |
+
qa_system = SurveillanceQA()
|
| 60 |
+
|
| 61 |
+
logger.info("📝 Loading NLP: Report Generator...")
|
| 62 |
+
from nlp.report import IncidentReportGenerator
|
| 63 |
+
report_generator = IncidentReportGenerator()
|
| 64 |
+
|
| 65 |
+
logger.info("📋 Loading NLP: Summarizer...")
|
| 66 |
+
from nlp.summarizer import SurveillanceSummarizer
|
| 67 |
+
summarizer = SurveillanceSummarizer()
|
| 68 |
+
|
| 69 |
+
# 4. Load Graph Module
|
| 70 |
+
logger.info("🕸️ Initializing Movement Graph...")
|
| 71 |
+
from graph.movement_graph import MovementGraph
|
| 72 |
+
movement_graph = MovementGraph()
|
| 73 |
+
|
| 74 |
+
# 5. Load Audio (optional)
|
| 75 |
+
if settings.ENABLE_AUDIO:
|
| 76 |
+
logger.info("🎙️ Loading Audio Module...")
|
| 77 |
+
from audio.audio_module import WhisperASR, AudioClassifier
|
| 78 |
+
audio_asr = WhisperASR()
|
| 79 |
+
audio_classifier = AudioClassifier()
|
| 80 |
+
|
| 81 |
+
logger.info("✅ All components loaded successfully!")
|
| 82 |
+
logger.info(f"📊 Memory usage: {psutil.Process().memory_info().rss / 1e6:.1f} MB")
|
| 83 |
+
|
| 84 |
+
yield # App is running
|
| 85 |
+
|
| 86 |
+
# Shutdown
|
| 87 |
+
logger.info("🛑 Shutting down Surveillance System...")
|
| 88 |
+
from vision.stream_manager import stream_manager
|
| 89 |
+
stream_manager.shutdown()
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# ── FastAPI App ────────────────────────────────────────────────────────────────
|
| 93 |
+
|
| 94 |
+
app = FastAPI(
|
| 95 |
+
title=settings.APP_NAME,
|
| 96 |
+
version=settings.APP_VERSION,
|
| 97 |
+
description="""
|
| 98 |
+
**Multimodal Surveillance Intelligence System**
|
| 99 |
+
|
| 100 |
+
Capabilities:
|
| 101 |
+
- 🎥 Real-time multi-camera video processing
|
| 102 |
+
- 👤 Person detection (DETR) + Multi-object tracking (ByteTrack)
|
| 103 |
+
- 🔍 Cross-camera Re-Identification (ViT + FAISS)
|
| 104 |
+
- 👗 Clothing/attribute recognition (CLIP zero-shot)
|
| 105 |
+
- 💬 Semantic search over surveillance logs
|
| 106 |
+
- ❓ Natural language Q&A over events
|
| 107 |
+
- 📝 Automated incident report generation
|
| 108 |
+
- 🕸️ Movement graph anomaly detection
|
| 109 |
+
""",
|
| 110 |
+
lifespan=lifespan,
|
| 111 |
+
docs_url="/docs",
|
| 112 |
+
redoc_url="/redoc",
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
# ── CORS ───────────────────────────────────────────────────────────────────────
|
| 116 |
+
|
| 117 |
+
app.add_middleware(
|
| 118 |
+
CORSMiddleware,
|
| 119 |
+
allow_origins=settings.CORS_ORIGINS + ["*"],
|
| 120 |
+
allow_credentials=True,
|
| 121 |
+
allow_methods=["*"],
|
| 122 |
+
allow_headers=["*"],
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# ── Request Latency Logging Middleware ─────────────────────────────────────────
|
| 126 |
+
|
| 127 |
+
@app.middleware("http")
|
| 128 |
+
async def log_requests(request: Request, call_next):
|
| 129 |
+
t0 = time.perf_counter()
|
| 130 |
+
response = await call_next(request)
|
| 131 |
+
latency = (time.perf_counter() - t0) * 1000
|
| 132 |
+
logger.debug(f"{request.method} {request.url.path} → {response.status_code} ({latency:.1f}ms)")
|
| 133 |
+
response.headers["X-Process-Time-Ms"] = f"{latency:.2f}"
|
| 134 |
+
return response
|
| 135 |
+
|
| 136 |
+
# ── Register Routers ────────────────────────────────────────────────────────��──
|
| 137 |
+
|
| 138 |
+
from routes.vision_routes import router as vision_router
|
| 139 |
+
from routes.nlp_routes import router as nlp_router
|
| 140 |
+
from routes.stream_routes import router as stream_router
|
| 141 |
+
|
| 142 |
+
app.include_router(vision_router)
|
| 143 |
+
app.include_router(nlp_router)
|
| 144 |
+
app.include_router(stream_router)
|
| 145 |
+
|
| 146 |
+
# ── Static Files ───────────────────────────────────────────────────────────────
|
| 147 |
+
|
| 148 |
+
# Create static directory if it doesn't exist
|
| 149 |
+
os.makedirs("static/thumbnails", exist_ok=True)
|
| 150 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
# ── Health & Status Routes ─────────────────────────────────────────────────────
|
| 154 |
+
|
| 155 |
+
@app.get("/", tags=["Health"])
|
| 156 |
+
async def root():
|
| 157 |
+
return {
|
| 158 |
+
"system": settings.APP_NAME,
|
| 159 |
+
"version": settings.APP_VERSION,
|
| 160 |
+
"status": "operational",
|
| 161 |
+
"device": str(DEVICE),
|
| 162 |
+
"docs": "/docs",
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
@app.get("/health", tags=["Health"])
|
| 167 |
+
async def health_check():
|
| 168 |
+
mem = psutil.Process().memory_info().rss / 1e6
|
| 169 |
+
cpu = psutil.cpu_percent(interval=0.1)
|
| 170 |
+
import torch
|
| 171 |
+
gpu_info = {}
|
| 172 |
+
if torch.cuda.is_available():
|
| 173 |
+
gpu_info = {
|
| 174 |
+
"name": torch.cuda.get_device_name(0),
|
| 175 |
+
"memory_allocated_mb": round(torch.cuda.memory_allocated(0) / 1e6, 1),
|
| 176 |
+
"memory_reserved_mb": round(torch.cuda.memory_reserved(0) / 1e6, 1),
|
| 177 |
+
}
|
| 178 |
+
return {
|
| 179 |
+
"status": "healthy",
|
| 180 |
+
"device": str(DEVICE),
|
| 181 |
+
"memory_mb": round(mem, 1),
|
| 182 |
+
"cpu_percent": cpu,
|
| 183 |
+
"gpu": gpu_info,
|
| 184 |
+
"models_loaded": {
|
| 185 |
+
"vision_pipeline": vision_pipeline is not None,
|
| 186 |
+
"search_engine": search_engine is not None,
|
| 187 |
+
"qa_system": qa_system is not None,
|
| 188 |
+
"report_generator": report_generator is not None,
|
| 189 |
+
"summarizer": summarizer is not None,
|
| 190 |
+
"movement_graph": movement_graph is not None,
|
| 191 |
+
"audio": audio_asr is not None,
|
| 192 |
+
},
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
@app.get("/metrics", tags=["Health"])
|
| 197 |
+
async def prometheus_metrics():
|
| 198 |
+
"""Basic Prometheus-style metrics."""
|
| 199 |
+
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST
|
| 200 |
+
from starlette.responses import Response
|
| 201 |
+
return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# ── Entry point ────────────────────────────────────────────────────────────────
|
| 205 |
+
|
| 206 |
+
if __name__ == "__main__":
|
| 207 |
+
import uvicorn
|
| 208 |
+
uvicorn.run(
|
| 209 |
+
"app:app",
|
| 210 |
+
host=settings.HOST,
|
| 211 |
+
port=settings.PORT,
|
| 212 |
+
reload=settings.DEBUG,
|
| 213 |
+
workers=1, # 1 worker required for shared model singletons
|
| 214 |
+
log_level="info",
|
| 215 |
+
)
|
audio/__init__.py
ADDED
|
File without changes
|
audio/audio_module.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
audio/asr.py - Automatic Speech Recognition using openai/whisper-small
|
| 3 |
+
audio/classifier.py - Audio classification using wav2vec2
|
| 4 |
+
audio/tts.py - Text-to-Speech using microsoft/speecht5_tts
|
| 5 |
+
"""
|
| 6 |
+
import time
|
| 7 |
+
import torch
|
| 8 |
+
import numpy as np
|
| 9 |
+
from typing import Dict, Optional, Union
|
| 10 |
+
from loguru import logger
|
| 11 |
+
from config import settings, DEVICE
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# ═══════════════════════════════════════════════════════════════════════════
|
| 15 |
+
# ASR - Automatic Speech Recognition
|
| 16 |
+
# ═══════════════════════════════════════════════════════════════════════════
|
| 17 |
+
|
| 18 |
+
class WhisperASR:
|
| 19 |
+
"""Transcribes audio using openai/whisper-small."""
|
| 20 |
+
|
| 21 |
+
def __init__(self):
|
| 22 |
+
if not settings.ENABLE_AUDIO:
|
| 23 |
+
logger.info("Audio module disabled. Set ENABLE_AUDIO=true to activate.")
|
| 24 |
+
self._ready = False
|
| 25 |
+
return
|
| 26 |
+
logger.info(f"Loading Whisper ASR model: {settings.WHISPER_MODEL}")
|
| 27 |
+
try:
|
| 28 |
+
import whisper
|
| 29 |
+
model_name = settings.WHISPER_MODEL.split("/")[-1] # "whisper-small" → "small"
|
| 30 |
+
self.model = whisper.load_model(model_name, device=str(DEVICE))
|
| 31 |
+
self._ready = True
|
| 32 |
+
logger.info("✅ WhisperASR ready.")
|
| 33 |
+
except ImportError:
|
| 34 |
+
logger.warning("openai-whisper not installed. ASR unavailable.")
|
| 35 |
+
self._ready = False
|
| 36 |
+
|
| 37 |
+
def transcribe(self, audio_path: str, language: Optional[str] = None) -> Dict:
|
| 38 |
+
if not self._ready:
|
| 39 |
+
return {"text": "", "error": "ASR not available"}
|
| 40 |
+
t0 = time.perf_counter()
|
| 41 |
+
opts = {}
|
| 42 |
+
if language:
|
| 43 |
+
opts["language"] = language
|
| 44 |
+
result = self.model.transcribe(audio_path, **opts)
|
| 45 |
+
latency_ms = (time.perf_counter() - t0) * 1000
|
| 46 |
+
return {
|
| 47 |
+
"text": result["text"],
|
| 48 |
+
"language": result.get("language", "unknown"),
|
| 49 |
+
"segments": result.get("segments", []),
|
| 50 |
+
"latency_ms": round(latency_ms, 2),
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
def transcribe_bytes(self, audio_bytes: bytes, sample_rate: int = 16000) -> Dict:
|
| 54 |
+
import tempfile, soundfile as sf, os
|
| 55 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
| 56 |
+
tmp_path = f.name
|
| 57 |
+
try:
|
| 58 |
+
audio_array = np.frombuffer(audio_bytes, dtype=np.float32)
|
| 59 |
+
sf.write(tmp_path, audio_array, sample_rate)
|
| 60 |
+
return self.transcribe(tmp_path)
|
| 61 |
+
finally:
|
| 62 |
+
os.unlink(tmp_path)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# ═══════════════════════════════════════════════════════════════════════════
|
| 66 |
+
# Audio Classifier
|
| 67 |
+
# ═══════════════════════════════════════════════════════════════════════════
|
| 68 |
+
|
| 69 |
+
class AudioClassifier:
|
| 70 |
+
"""Classifies audio events (gunshot, scream, etc.) using wav2vec2."""
|
| 71 |
+
|
| 72 |
+
KEYWORDS = ["yes", "no", "up", "down", "left", "right", "on", "off", "stop", "go"]
|
| 73 |
+
|
| 74 |
+
def __init__(self):
|
| 75 |
+
if not settings.ENABLE_AUDIO:
|
| 76 |
+
self._ready = False
|
| 77 |
+
return
|
| 78 |
+
logger.info(f"Loading audio classifier: {settings.AUDIO_CLASS_MODEL}")
|
| 79 |
+
try:
|
| 80 |
+
from transformers import pipeline
|
| 81 |
+
device_id = 0 if str(DEVICE) == "cuda" else -1
|
| 82 |
+
self.classifier = pipeline(
|
| 83 |
+
"audio-classification",
|
| 84 |
+
model=settings.AUDIO_CLASS_MODEL,
|
| 85 |
+
device=device_id,
|
| 86 |
+
)
|
| 87 |
+
self._ready = True
|
| 88 |
+
logger.info("✅ AudioClassifier ready.")
|
| 89 |
+
except Exception as e:
|
| 90 |
+
logger.warning(f"AudioClassifier init failed: {e}")
|
| 91 |
+
self._ready = False
|
| 92 |
+
|
| 93 |
+
def classify(self, audio_path: str, top_k: int = 5) -> Dict:
|
| 94 |
+
if not self._ready:
|
| 95 |
+
return {"classes": [], "error": "Audio classifier not available"}
|
| 96 |
+
t0 = time.perf_counter()
|
| 97 |
+
results = self.classifier(audio_path, top_k=top_k)
|
| 98 |
+
latency_ms = (time.perf_counter() - t0) * 1000
|
| 99 |
+
return {
|
| 100 |
+
"classes": [{"label": r["label"], "score": round(r["score"], 4)} for r in results],
|
| 101 |
+
"latency_ms": round(latency_ms, 2),
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
# ═══════════════════════════════════════════════════════════════════════════
|
| 106 |
+
# TTS - Text to Speech
|
| 107 |
+
# ══════════════════════════════════════════════════════════��════════════════
|
| 108 |
+
|
| 109 |
+
class SpeechSynthesizer:
|
| 110 |
+
"""Generates speech from text using microsoft/speecht5_tts."""
|
| 111 |
+
|
| 112 |
+
def __init__(self):
|
| 113 |
+
if not settings.ENABLE_AUDIO:
|
| 114 |
+
self._ready = False
|
| 115 |
+
return
|
| 116 |
+
logger.info(f"Loading TTS model: {settings.TTS_MODEL}")
|
| 117 |
+
try:
|
| 118 |
+
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
| 119 |
+
from datasets import load_dataset
|
| 120 |
+
self.processor = SpeechT5Processor.from_pretrained(settings.TTS_MODEL)
|
| 121 |
+
self.model = SpeechT5ForTextToSpeech.from_pretrained(settings.TTS_MODEL).to(DEVICE)
|
| 122 |
+
self.vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(DEVICE)
|
| 123 |
+
# Load speaker embeddings
|
| 124 |
+
ds = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
| 125 |
+
self.speaker_embeddings = torch.tensor(ds[7306]["xvector"]).unsqueeze(0).to(DEVICE)
|
| 126 |
+
self._ready = True
|
| 127 |
+
logger.info("✅ SpeechSynthesizer ready.")
|
| 128 |
+
except Exception as e:
|
| 129 |
+
logger.warning(f"TTS init failed: {e}")
|
| 130 |
+
self._ready = False
|
| 131 |
+
|
| 132 |
+
def synthesize(self, text: str) -> Optional[np.ndarray]:
|
| 133 |
+
"""Synthesize text to audio. Returns numpy array (float32) or None."""
|
| 134 |
+
if not self._ready:
|
| 135 |
+
return None
|
| 136 |
+
inputs = self.processor(text=text, return_tensors="pt").to(DEVICE)
|
| 137 |
+
with torch.inference_mode():
|
| 138 |
+
speech = self.model.generate_speech(
|
| 139 |
+
inputs["input_ids"], self.speaker_embeddings, vocoder=self.vocoder
|
| 140 |
+
)
|
| 141 |
+
return speech.cpu().numpy()
|
config.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
config.py - Central configuration for Multimodal Surveillance Intelligence System
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import torch
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from pydantic_settings import BaseSettings
|
| 8 |
+
from pydantic import Field
|
| 9 |
+
from loguru import logger
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class Settings(BaseSettings):
|
| 13 |
+
# ── App ───────────────────────────────────────────────
|
| 14 |
+
APP_NAME: str = "Multimodal Surveillance Intelligence System"
|
| 15 |
+
APP_VERSION: str = "1.0.0"
|
| 16 |
+
DEBUG: bool = False
|
| 17 |
+
HOST: str = "0.0.0.0"
|
| 18 |
+
PORT: int = 8000
|
| 19 |
+
|
| 20 |
+
# ── Database (NeonDB PostgreSQL) ───────────────────────
|
| 21 |
+
DATABASE_URL: str = Field(
|
| 22 |
+
default="postgresql+asyncpg://user:password@hostname/dbname?ssl=require",
|
| 23 |
+
description="Database connection URL"
|
| 24 |
+
)
|
| 25 |
+
DB_ECHO: bool = False
|
| 26 |
+
|
| 27 |
+
# ── Security ───────────────────────────────────────────
|
| 28 |
+
SECRET_KEY: str = Field(default="change-in-production-super-secret-key", env="SECRET_KEY")
|
| 29 |
+
API_KEY: str = Field(default="surveillance-api-key-2024", env="API_KEY")
|
| 30 |
+
|
| 31 |
+
# ── Model Paths / Cache ───────────────────────────────
|
| 32 |
+
MODEL_CACHE_DIR: str = Field(default="./model_cache", env="HF_HOME")
|
| 33 |
+
|
| 34 |
+
# ── Vision Models ─────────────────────────────────────
|
| 35 |
+
DETECTION_MODEL: str = "facebook/detr-resnet-50"
|
| 36 |
+
REID_MODEL: str = "google/vit-base-patch16-224"
|
| 37 |
+
CLIP_MODEL: str = "openai/clip-vit-base-patch32"
|
| 38 |
+
DETECTION_CONFIDENCE: float = 0.7
|
| 39 |
+
DETECTION_BATCH_SIZE: int = 4
|
| 40 |
+
|
| 41 |
+
# ── NLP Models ────────────────────────────────────────
|
| 42 |
+
SEMANTIC_SEARCH_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
|
| 43 |
+
QA_MODEL: str = "deepset/roberta-base-squad2"
|
| 44 |
+
REPORT_MODEL: str = "google/flan-t5-base"
|
| 45 |
+
SUMMARIZER_MODEL: str = "facebook/bart-large-cnn"
|
| 46 |
+
|
| 47 |
+
# ── Audio Models (Optional) ───────────────────────────
|
| 48 |
+
WHISPER_MODEL: str = "openai/whisper-small"
|
| 49 |
+
AUDIO_CLASS_MODEL: str = "superb/wav2vec2-base-superb-ks"
|
| 50 |
+
TTS_MODEL: str = "microsoft/speecht5_tts"
|
| 51 |
+
ENABLE_AUDIO: bool = False
|
| 52 |
+
|
| 53 |
+
# ── FAISS ─────────────────────────────────────────────
|
| 54 |
+
FAISS_INDEX_PATH: str = "./faiss_indexes"
|
| 55 |
+
REID_EMBEDDING_DIM: int = 768 # ViT-base hidden size
|
| 56 |
+
CLIP_EMBEDDING_DIM: int = 512 # CLIP embedding size
|
| 57 |
+
SEARCH_EMBEDDING_DIM: int = 384 # MiniLM embedding size
|
| 58 |
+
FAISS_NPROBE: int = 10
|
| 59 |
+
|
| 60 |
+
# ── Tracking ──────────────────────────────────────────
|
| 61 |
+
TRACKER_TYPE: str = "bytetrack" # "bytetrack" or "deepsort"
|
| 62 |
+
TRACK_THRESH: float = 0.5
|
| 63 |
+
TRACK_BUFFER: int = 30
|
| 64 |
+
MATCH_THRESH: float = 0.8
|
| 65 |
+
|
| 66 |
+
# ── WebSocket / Cameras ───────────────────────────────
|
| 67 |
+
MAX_CAMERAS: int = 16
|
| 68 |
+
FRAME_QUEUE_SIZE: int = 30
|
| 69 |
+
FPS_TARGET: int = 30
|
| 70 |
+
|
| 71 |
+
# ── Anomaly Detection ─────────────────────────────────
|
| 72 |
+
ANOMALY_THRESHOLD: float = 0.75
|
| 73 |
+
|
| 74 |
+
# ── CORS ──────────────────────────────────────────────
|
| 75 |
+
CORS_ORIGINS: list = ["http://localhost:3000", "http://127.0.0.1:3000"]
|
| 76 |
+
|
| 77 |
+
class Config:
|
| 78 |
+
env_file = ".env"
|
| 79 |
+
env_file_encoding = "utf-8"
|
| 80 |
+
case_sensitive = False
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
settings = Settings()
|
| 84 |
+
|
| 85 |
+
# ── Device Detection ──────────────────────────────────────────
|
| 86 |
+
def get_device() -> torch.device:
|
| 87 |
+
if torch.cuda.is_available():
|
| 88 |
+
device = torch.device("cuda")
|
| 89 |
+
logger.info(f"🚀 GPU detected: {torch.cuda.get_device_name(0)} | VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
|
| 90 |
+
else:
|
| 91 |
+
device = torch.device("cpu")
|
| 92 |
+
logger.warning("⚠️ No GPU detected — running on CPU. Performance may be degraded.")
|
| 93 |
+
return device
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
DEVICE = get_device()
|
| 97 |
+
|
| 98 |
+
# ── Paths ─────────────────────────────────────────────────────
|
| 99 |
+
BASE_DIR = Path(__file__).resolve().parent
|
| 100 |
+
FAISS_DIR = BASE_DIR / settings.FAISS_INDEX_PATH
|
| 101 |
+
MODEL_CACHE = BASE_DIR / settings.MODEL_CACHE_DIR
|
| 102 |
+
FAISS_DIR.mkdir(parents=True, exist_ok=True)
|
| 103 |
+
MODEL_CACHE.mkdir(parents=True, exist_ok=True)
|
| 104 |
+
|
| 105 |
+
# Set HuggingFace cache
|
| 106 |
+
os.environ["HF_HOME"] = str(MODEL_CACHE)
|
| 107 |
+
os.environ["TRANSFORMERS_CACHE"] = str(MODEL_CACHE)
|
database/__init__.py
ADDED
|
File without changes
|
database/crud.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
database/crud.py - CRUD operations for Persons, Events, and Incident Reports
|
| 3 |
+
"""
|
| 4 |
+
import uuid
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from typing import List, Optional, Dict, Any
|
| 7 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 8 |
+
from sqlalchemy import select, update, and_, desc
|
| 9 |
+
from database.models import Person, Event, IncidentReport, ActivityType
|
| 10 |
+
from loguru import logger
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ─── Person Operations ────────────────────────────────────────────────────────
|
| 14 |
+
|
| 15 |
+
async def create_person(
|
| 16 |
+
db: AsyncSession,
|
| 17 |
+
faiss_id: Optional[int] = None,
|
| 18 |
+
attributes: Optional[Dict] = None,
|
| 19 |
+
thumbnail_path: Optional[str] = None,
|
| 20 |
+
) -> Person:
|
| 21 |
+
person = Person(
|
| 22 |
+
faiss_id=faiss_id,
|
| 23 |
+
attributes=attributes or {},
|
| 24 |
+
thumbnail_path=thumbnail_path,
|
| 25 |
+
track_ids=[],
|
| 26 |
+
)
|
| 27 |
+
db.add(person)
|
| 28 |
+
await db.flush()
|
| 29 |
+
logger.info(f"Created person: {person.id}")
|
| 30 |
+
return person
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
async def get_person(db: AsyncSession, person_id: uuid.UUID) -> Optional[Person]:
|
| 34 |
+
result = await db.execute(select(Person).where(Person.id == person_id))
|
| 35 |
+
return result.scalar_one_or_none()
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
async def get_all_persons(db: AsyncSession, limit: int = 100, offset: int = 0) -> List[Person]:
|
| 39 |
+
result = await db.execute(
|
| 40 |
+
select(Person).order_by(desc(Person.last_seen)).limit(limit).offset(offset)
|
| 41 |
+
)
|
| 42 |
+
return result.scalars().all()
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
async def update_person_last_seen(db: AsyncSession, person_id: uuid.UUID) -> None:
|
| 46 |
+
await db.execute(
|
| 47 |
+
update(Person).where(Person.id == person_id).values(last_seen=datetime.utcnow())
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
async def update_person_faiss_id(db: AsyncSession, person_id: uuid.UUID, faiss_id: int) -> None:
|
| 52 |
+
await db.execute(
|
| 53 |
+
update(Person).where(Person.id == person_id).values(faiss_id=faiss_id)
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
async def update_person_attributes(db: AsyncSession, person_id: uuid.UUID, attributes: Dict) -> None:
|
| 58 |
+
await db.execute(
|
| 59 |
+
update(Person).where(Person.id == person_id).values(attributes=attributes)
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# ─── Event Operations ─────────────────────────────────────────────────────────
|
| 64 |
+
|
| 65 |
+
async def create_event(
|
| 66 |
+
db: AsyncSession,
|
| 67 |
+
person_id: uuid.UUID,
|
| 68 |
+
camera_id: str,
|
| 69 |
+
activity_type: ActivityType = ActivityType.DETECTED,
|
| 70 |
+
bounding_box: Optional[Dict] = None,
|
| 71 |
+
confidence: Optional[float] = None,
|
| 72 |
+
track_id: Optional[int] = None,
|
| 73 |
+
location_zone: Optional[str] = None,
|
| 74 |
+
anomaly_score: float = 0.0,
|
| 75 |
+
description: Optional[str] = None,
|
| 76 |
+
raw_metadata: Optional[Dict] = None,
|
| 77 |
+
) -> Event:
|
| 78 |
+
event = Event(
|
| 79 |
+
person_id=person_id,
|
| 80 |
+
camera_id=camera_id,
|
| 81 |
+
activity_type=activity_type,
|
| 82 |
+
bounding_box=bounding_box,
|
| 83 |
+
confidence=confidence,
|
| 84 |
+
track_id=track_id,
|
| 85 |
+
location_zone=location_zone,
|
| 86 |
+
anomaly_score=anomaly_score,
|
| 87 |
+
description=description,
|
| 88 |
+
raw_metadata=raw_metadata or {},
|
| 89 |
+
)
|
| 90 |
+
db.add(event)
|
| 91 |
+
await db.flush()
|
| 92 |
+
return event
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
async def get_events_for_person(
|
| 96 |
+
db: AsyncSession,
|
| 97 |
+
person_id: uuid.UUID,
|
| 98 |
+
limit: int = 50,
|
| 99 |
+
) -> List[Event]:
|
| 100 |
+
result = await db.execute(
|
| 101 |
+
select(Event)
|
| 102 |
+
.where(Event.person_id == person_id)
|
| 103 |
+
.order_by(desc(Event.timestamp))
|
| 104 |
+
.limit(limit)
|
| 105 |
+
)
|
| 106 |
+
return result.scalars().all()
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
async def get_recent_events(
|
| 110 |
+
db: AsyncSession,
|
| 111 |
+
camera_id: Optional[str] = None,
|
| 112 |
+
limit: int = 100,
|
| 113 |
+
) -> List[Event]:
|
| 114 |
+
query = select(Event).order_by(desc(Event.timestamp)).limit(limit)
|
| 115 |
+
if camera_id:
|
| 116 |
+
query = query.where(Event.camera_id == camera_id)
|
| 117 |
+
result = await db.execute(query)
|
| 118 |
+
return result.scalars().all()
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
async def get_anomaly_events(db: AsyncSession, threshold: float = 0.75, limit: int = 50) -> List[Event]:
|
| 122 |
+
result = await db.execute(
|
| 123 |
+
select(Event)
|
| 124 |
+
.where(Event.anomaly_score >= threshold)
|
| 125 |
+
.order_by(desc(Event.timestamp))
|
| 126 |
+
.limit(limit)
|
| 127 |
+
)
|
| 128 |
+
return result.scalars().all()
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# ─── Incident Report Operations ───────────────────────────────────────────────
|
| 132 |
+
|
| 133 |
+
async def create_incident_report(
|
| 134 |
+
db: AsyncSession,
|
| 135 |
+
person_id: Optional[uuid.UUID],
|
| 136 |
+
report_text: str,
|
| 137 |
+
summary: Optional[str] = None,
|
| 138 |
+
severity: str = "medium",
|
| 139 |
+
camera_ids: Optional[List[str]] = None,
|
| 140 |
+
) -> IncidentReport:
|
| 141 |
+
report = IncidentReport(
|
| 142 |
+
person_id=person_id,
|
| 143 |
+
report_text=report_text,
|
| 144 |
+
summary=summary,
|
| 145 |
+
severity=severity,
|
| 146 |
+
camera_ids=camera_ids or [],
|
| 147 |
+
)
|
| 148 |
+
db.add(report)
|
| 149 |
+
await db.flush()
|
| 150 |
+
return report
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
async def get_report(db: AsyncSession, report_id: uuid.UUID) -> Optional[IncidentReport]:
|
| 154 |
+
result = await db.execute(select(IncidentReport).where(IncidentReport.report_id == report_id))
|
| 155 |
+
return result.scalar_one_or_none()
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
async def get_reports_for_person(db: AsyncSession, person_id: uuid.UUID) -> List[IncidentReport]:
|
| 159 |
+
result = await db.execute(
|
| 160 |
+
select(IncidentReport)
|
| 161 |
+
.where(IncidentReport.person_id == person_id)
|
| 162 |
+
.order_by(desc(IncidentReport.generated_at))
|
| 163 |
+
)
|
| 164 |
+
return result.scalars().all()
|
database/models.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
database/models.py - SQLAlchemy ORM models for Surveillance System
|
| 3 |
+
"""
|
| 4 |
+
import uuid
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from sqlalchemy import Column, String, DateTime, Float, Text, Integer, ForeignKey, JSON, Enum
|
| 7 |
+
from sqlalchemy.dialects.postgresql import UUID
|
| 8 |
+
from sqlalchemy.orm import relationship
|
| 9 |
+
from database.session import Base
|
| 10 |
+
import enum
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class ActivityType(str, enum.Enum):
|
| 14 |
+
DETECTED = "detected"
|
| 15 |
+
TRACKED = "tracked"
|
| 16 |
+
REID_MATCH = "reid_match"
|
| 17 |
+
ANOMALY = "anomaly"
|
| 18 |
+
LOITERING = "loitering"
|
| 19 |
+
RUNNING = "running"
|
| 20 |
+
FIGHTING = "fighting"
|
| 21 |
+
TRESPASSING = "trespassing"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class AnalysisSession(Base):
|
| 25 |
+
__tablename__ = "analysis_sessions"
|
| 26 |
+
|
| 27 |
+
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
|
| 28 |
+
timestamp = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
| 29 |
+
video_filename = Column(String(255), nullable=False)
|
| 30 |
+
thumbnail_path = Column(String(500), nullable=True)
|
| 31 |
+
|
| 32 |
+
# Analysis Summary Stats
|
| 33 |
+
duration_sec = Column(Float, nullable=False, default=0.0)
|
| 34 |
+
frames_processed = Column(Integer, nullable=False, default=0)
|
| 35 |
+
unique_persons = Column(Integer, nullable=False, default=0)
|
| 36 |
+
peak_count = Column(Integer, nullable=False, default=0)
|
| 37 |
+
|
| 38 |
+
def __repr__(self):
|
| 39 |
+
return f"<AnalysisSession id={self.id} video={self.video_filename}>"
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class Person(Base):
|
| 43 |
+
__tablename__ = "persons"
|
| 44 |
+
|
| 45 |
+
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
|
| 46 |
+
first_seen = Column(DateTime, default=datetime.utcnow, nullable=False)
|
| 47 |
+
last_seen = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
| 48 |
+
|
| 49 |
+
# ReID embedding reference
|
| 50 |
+
faiss_id = Column(Integer, nullable=True, unique=True)
|
| 51 |
+
embedding_version = Column(String(50), default="vit-base-patch16-224")
|
| 52 |
+
|
| 53 |
+
# Attributes from CLIP
|
| 54 |
+
attributes = Column(JSON, nullable=True) # {"clothing": [...], "colors": [...]}
|
| 55 |
+
thumbnail_path = Column(String(500), nullable=True)
|
| 56 |
+
track_ids = Column(JSON, default=list) # list of all track IDs assigned cross-camera
|
| 57 |
+
|
| 58 |
+
events = relationship("Event", back_populates="person", cascade="all, delete-orphan")
|
| 59 |
+
|
| 60 |
+
def __repr__(self):
|
| 61 |
+
return f"<Person id={self.id} first_seen={self.first_seen}>"
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class Event(Base):
|
| 65 |
+
__tablename__ = "events"
|
| 66 |
+
|
| 67 |
+
event_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
|
| 68 |
+
person_id = Column(UUID(as_uuid=True), ForeignKey("persons.id", ondelete="CASCADE"), nullable=False, index=True)
|
| 69 |
+
camera_id = Column(String(64), nullable=False, index=True)
|
| 70 |
+
timestamp = Column(DateTime, default=datetime.utcnow, nullable=False, index=True)
|
| 71 |
+
activity_type = Column(Enum(ActivityType), default=ActivityType.DETECTED, nullable=False)
|
| 72 |
+
|
| 73 |
+
# Spatial & tracking info
|
| 74 |
+
bounding_box = Column(JSON, nullable=True) # {"x1": f, "y1": f, "x2": f, "y2": f}
|
| 75 |
+
confidence = Column(Float, nullable=True)
|
| 76 |
+
track_id = Column(Integer, nullable=True)
|
| 77 |
+
location_zone = Column(String(128), nullable=True)
|
| 78 |
+
|
| 79 |
+
# Additional metadata
|
| 80 |
+
anomaly_score = Column(Float, default=0.0)
|
| 81 |
+
raw_metadata = Column(JSON, nullable=True)
|
| 82 |
+
description = Column(Text, nullable=True) # NLP-generated description
|
| 83 |
+
|
| 84 |
+
person = relationship("Person", back_populates="events")
|
| 85 |
+
|
| 86 |
+
def __repr__(self):
|
| 87 |
+
return f"<Event {self.event_id} person={self.person_id} cam={self.camera_id} type={self.activity_type}>"
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class IncidentReport(Base):
|
| 91 |
+
__tablename__ = "incident_reports"
|
| 92 |
+
|
| 93 |
+
report_id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
|
| 94 |
+
person_id = Column(UUID(as_uuid=True), ForeignKey("persons.id", ondelete="SET NULL"), nullable=True)
|
| 95 |
+
generated_at = Column(DateTime, default=datetime.utcnow)
|
| 96 |
+
report_text = Column(Text, nullable=False)
|
| 97 |
+
summary = Column(Text, nullable=True)
|
| 98 |
+
severity = Column(String(20), default="medium") # low / medium / high / critical
|
| 99 |
+
camera_ids = Column(JSON, default=list)
|
| 100 |
+
model_version = Column(String(50), default="flan-t5-base")
|
database/session.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
database/session.py - Async SQLAlchemy session management for NeonDB PostgreSQL
|
| 3 |
+
"""
|
| 4 |
+
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker
|
| 5 |
+
from sqlalchemy.orm import DeclarativeBase
|
| 6 |
+
from config import settings
|
| 7 |
+
from loguru import logger
|
| 8 |
+
|
| 9 |
+
engine = create_async_engine(
|
| 10 |
+
settings.DATABASE_URL,
|
| 11 |
+
echo=settings.DB_ECHO,
|
| 12 |
+
pool_size=10,
|
| 13 |
+
max_overflow=20,
|
| 14 |
+
pool_pre_ping=True,
|
| 15 |
+
connect_args={"ssl": "require"},
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
AsyncSessionLocal = async_sessionmaker(
|
| 19 |
+
engine,
|
| 20 |
+
class_=AsyncSession,
|
| 21 |
+
expire_on_commit=False,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class Base(DeclarativeBase):
|
| 26 |
+
pass
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
async def get_db() -> AsyncSession:
|
| 30 |
+
async with AsyncSessionLocal() as session:
|
| 31 |
+
try:
|
| 32 |
+
yield session
|
| 33 |
+
await session.commit()
|
| 34 |
+
except Exception:
|
| 35 |
+
await session.rollback()
|
| 36 |
+
raise
|
| 37 |
+
finally:
|
| 38 |
+
await session.close()
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
async def create_tables():
|
| 42 |
+
"""Create all tables on startup."""
|
| 43 |
+
async with engine.begin() as conn:
|
| 44 |
+
from database.models import Person, Event # noqa: F401
|
| 45 |
+
await conn.run_sync(Base.metadata.create_all)
|
| 46 |
+
logger.info("✅ Database tables ensured.")
|
graph/__init__.py
ADDED
|
File without changes
|
graph/movement_graph.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
graph/movement_graph.py - PyTorch Geometric Movement Graph & Anomaly Detection
|
| 3 |
+
Builds a directed graph of person movements across cameras and zones.
|
| 4 |
+
Detects abnormal route patterns using GNN-based anomaly scoring.
|
| 5 |
+
"""
|
| 6 |
+
import time
|
| 7 |
+
import torch
|
| 8 |
+
import numpy as np
|
| 9 |
+
from typing import Dict, List, Optional, Tuple
|
| 10 |
+
from collections import defaultdict
|
| 11 |
+
from loguru import logger
|
| 12 |
+
from config import DEVICE, settings
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
import torch_geometric
|
| 16 |
+
from torch_geometric.data import Data
|
| 17 |
+
from torch_geometric.nn import GCNConv, global_mean_pool
|
| 18 |
+
GEO_AVAILABLE = True
|
| 19 |
+
except ImportError:
|
| 20 |
+
GEO_AVAILABLE = False
|
| 21 |
+
logger.warning("torch-geometric not installed. Movement graph module will use fallback.")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# ── GNN Autoencoder for Anomaly Detection ──────────────────────────────────────
|
| 25 |
+
|
| 26 |
+
class MovementGNN(torch.nn.Module):
|
| 27 |
+
"""
|
| 28 |
+
Simple GCN autoencoder to encode node features (camera visit patterns).
|
| 29 |
+
Reconstruction error → anomaly score.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def __init__(self, in_channels: int = 8, hidden: int = 16, out_channels: int = 8):
|
| 33 |
+
super().__init__()
|
| 34 |
+
if GEO_AVAILABLE:
|
| 35 |
+
self.enc1 = GCNConv(in_channels, hidden)
|
| 36 |
+
self.enc2 = GCNConv(hidden, out_channels)
|
| 37 |
+
self.dec1 = GCNConv(out_channels, hidden)
|
| 38 |
+
self.dec2 = GCNConv(hidden, in_channels)
|
| 39 |
+
self.relu = torch.nn.ReLU()
|
| 40 |
+
|
| 41 |
+
def forward(self, x, edge_index):
|
| 42 |
+
# Encode
|
| 43 |
+
z = self.relu(self.enc1(x, edge_index))
|
| 44 |
+
z = self.enc2(z, edge_index)
|
| 45 |
+
# Decode
|
| 46 |
+
x_hat = self.relu(self.dec1(z, edge_index))
|
| 47 |
+
x_hat = self.dec2(x_hat, edge_index)
|
| 48 |
+
return x_hat, z
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# ── Movement Graph Builder ──────────────────────────────────────────────────────
|
| 52 |
+
|
| 53 |
+
class MovementGraph:
|
| 54 |
+
"""
|
| 55 |
+
Maintains a person-level movement graph.
|
| 56 |
+
Nodes = cameras/zones; Edges = observed transitions.
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
def __init__(self):
|
| 60 |
+
# person_id → list of (camera_id, timestamp, zone)
|
| 61 |
+
self.person_trails: Dict[str, List[Dict]] = defaultdict(list)
|
| 62 |
+
# camera graph: edge_weights[cam_a][cam_b] = count
|
| 63 |
+
self.edge_weights: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
| 64 |
+
self.camera_ids: List[str] = []
|
| 65 |
+
self.cam_index: Dict[str, int] = {}
|
| 66 |
+
|
| 67 |
+
if GEO_AVAILABLE:
|
| 68 |
+
self.gnn = MovementGNN().to(DEVICE)
|
| 69 |
+
self.gnn.eval()
|
| 70 |
+
else:
|
| 71 |
+
self.gnn = None
|
| 72 |
+
|
| 73 |
+
logger.info(f"MovementGraph initialized. PyG available: {GEO_AVAILABLE}")
|
| 74 |
+
|
| 75 |
+
def register_camera(self, camera_id: str):
|
| 76 |
+
if camera_id not in self.cam_index:
|
| 77 |
+
self.cam_index[camera_id] = len(self.camera_ids)
|
| 78 |
+
self.camera_ids.append(camera_id)
|
| 79 |
+
|
| 80 |
+
def add_observation(
|
| 81 |
+
self,
|
| 82 |
+
person_id: str,
|
| 83 |
+
camera_id: str,
|
| 84 |
+
timestamp: float,
|
| 85 |
+
zone: Optional[str] = None,
|
| 86 |
+
):
|
| 87 |
+
"""Record that a person was observed at camera/zone at timestamp."""
|
| 88 |
+
self.register_camera(camera_id)
|
| 89 |
+
trail = self.person_trails[person_id]
|
| 90 |
+
|
| 91 |
+
# Add transition edge if person has prior observation
|
| 92 |
+
if trail:
|
| 93 |
+
last_cam = trail[-1]["camera_id"]
|
| 94 |
+
if last_cam != camera_id:
|
| 95 |
+
self.edge_weights[last_cam][camera_id] += 1
|
| 96 |
+
|
| 97 |
+
trail.append({"camera_id": camera_id, "timestamp": timestamp, "zone": zone})
|
| 98 |
+
# Keep last 50 observations per person
|
| 99 |
+
if len(trail) > 50:
|
| 100 |
+
self.person_trails[person_id] = trail[-50:]
|
| 101 |
+
|
| 102 |
+
def _build_graph(self) -> Optional["Data"]:
|
| 103 |
+
"""Convert current camera graph to PyG Data object."""
|
| 104 |
+
if not GEO_AVAILABLE or len(self.camera_ids) == 0:
|
| 105 |
+
return None
|
| 106 |
+
|
| 107 |
+
n = len(self.camera_ids)
|
| 108 |
+
# Node features: [visit_count_normalized, in_degree, out_degree, ...]
|
| 109 |
+
node_features = np.zeros((n, 8), dtype=np.float32)
|
| 110 |
+
edge_src, edge_dst = [], []
|
| 111 |
+
|
| 112 |
+
# Count visits per camera
|
| 113 |
+
cam_visits = defaultdict(int)
|
| 114 |
+
for trails in self.person_trails.values():
|
| 115 |
+
for obs in trails:
|
| 116 |
+
cam_visits[obs["camera_id"]] += 1
|
| 117 |
+
|
| 118 |
+
max_visits = max(cam_visits.values()) if cam_visits else 1
|
| 119 |
+
for cam, idx in self.cam_index.items():
|
| 120 |
+
node_features[idx, 0] = cam_visits[cam] / max_visits
|
| 121 |
+
|
| 122 |
+
# Build edges and compute in/out degree
|
| 123 |
+
for src_cam, dst_dict in self.edge_weights.items():
|
| 124 |
+
for dst_cam, weight in dst_dict.items():
|
| 125 |
+
si = self.cam_index.get(src_cam)
|
| 126 |
+
di = self.cam_index.get(dst_cam)
|
| 127 |
+
if si is not None and di is not None:
|
| 128 |
+
edge_src.append(si)
|
| 129 |
+
edge_dst.append(di)
|
| 130 |
+
node_features[si, 1] += 1 # out-degree
|
| 131 |
+
node_features[di, 2] += 1 # in-degree
|
| 132 |
+
|
| 133 |
+
if not edge_src:
|
| 134 |
+
# Add self-loops to avoid empty graph
|
| 135 |
+
edge_src = list(range(n))
|
| 136 |
+
edge_dst = list(range(n))
|
| 137 |
+
|
| 138 |
+
x = torch.tensor(node_features, dtype=torch.float32).to(DEVICE)
|
| 139 |
+
edge_index = torch.tensor([edge_src, edge_dst], dtype=torch.long).to(DEVICE)
|
| 140 |
+
return Data(x=x, edge_index=edge_index)
|
| 141 |
+
|
| 142 |
+
@torch.inference_mode()
|
| 143 |
+
def compute_anomaly_score(self, person_id: str) -> Dict:
|
| 144 |
+
"""
|
| 145 |
+
Compute anomaly score for a person's movement trail.
|
| 146 |
+
|
| 147 |
+
Returns:
|
| 148 |
+
{"person_id": str, "anomaly_score": float, "route": list, "verdict": str}
|
| 149 |
+
"""
|
| 150 |
+
trail = self.person_trails.get(person_id, [])
|
| 151 |
+
if len(trail) < 2:
|
| 152 |
+
return {"person_id": person_id, "anomaly_score": 0.0, "verdict": "insufficient_data", "route": []}
|
| 153 |
+
|
| 154 |
+
t0 = time.perf_counter()
|
| 155 |
+
|
| 156 |
+
# Heuristic features for pattern scoring
|
| 157 |
+
cameras = [obs["camera_id"] for obs in trail]
|
| 158 |
+
timestamps = [obs["timestamp"] for obs in trail]
|
| 159 |
+
unique_cams = len(set(cameras))
|
| 160 |
+
total_obs = len(cameras)
|
| 161 |
+
|
| 162 |
+
# Time between observations
|
| 163 |
+
gaps = np.diff(timestamps)
|
| 164 |
+
avg_gap = float(np.mean(gaps)) if len(gaps) > 0 else 0
|
| 165 |
+
max_gap = float(np.max(gaps)) if len(gaps) > 0 else 0
|
| 166 |
+
|
| 167 |
+
# Suspicious patterns:
|
| 168 |
+
# 1. Too many unique cameras in short time → rapid movement
|
| 169 |
+
# 2. Very short dwell time per camera → running/fleeing behavior
|
| 170 |
+
# 3. Visiting same camera repeatedly in short time → loitering
|
| 171 |
+
rapid_movement = unique_cams / max(total_obs, 1) > 0.8
|
| 172 |
+
loitering = cameras.count(cameras[-1]) / total_obs > 0.6 if cameras else False
|
| 173 |
+
fast_dwell = avg_gap < 10 and unique_cams > 3 # under 10s per camera
|
| 174 |
+
|
| 175 |
+
heuristic_score = 0.0
|
| 176 |
+
if rapid_movement:
|
| 177 |
+
heuristic_score += 0.4
|
| 178 |
+
if loitering:
|
| 179 |
+
heuristic_score += 0.3
|
| 180 |
+
if fast_dwell:
|
| 181 |
+
heuristic_score += 0.3
|
| 182 |
+
|
| 183 |
+
# GNN-based score (if available)
|
| 184 |
+
gnn_score = 0.0
|
| 185 |
+
if GEO_AVAILABLE and self.gnn is not None:
|
| 186 |
+
graph = self._build_graph()
|
| 187 |
+
if graph is not None and graph.num_nodes > 0:
|
| 188 |
+
x_hat, _ = self.gnn(graph.x, graph.edge_index)
|
| 189 |
+
reconstruction_error = float(torch.mean((graph.x - x_hat) ** 2))
|
| 190 |
+
gnn_score = min(reconstruction_error * 5, 1.0)
|
| 191 |
+
|
| 192 |
+
# Combined score
|
| 193 |
+
anomaly_score = round(0.5 * heuristic_score + 0.5 * gnn_score, 4)
|
| 194 |
+
anomaly_score = min(anomaly_score, 1.0)
|
| 195 |
+
|
| 196 |
+
latency = (time.perf_counter() - t0) * 1000
|
| 197 |
+
|
| 198 |
+
if anomaly_score > settings.ANOMALY_THRESHOLD:
|
| 199 |
+
verdict = "anomalous"
|
| 200 |
+
elif anomaly_score > 0.4:
|
| 201 |
+
verdict = "suspicious"
|
| 202 |
+
else:
|
| 203 |
+
verdict = "normal"
|
| 204 |
+
|
| 205 |
+
return {
|
| 206 |
+
"person_id": person_id,
|
| 207 |
+
"anomaly_score": anomaly_score,
|
| 208 |
+
"verdict": verdict,
|
| 209 |
+
"route": [{"camera_id": obs["camera_id"], "timestamp": obs["timestamp"]} for obs in trail[-10:]],
|
| 210 |
+
"unique_cameras": unique_cams,
|
| 211 |
+
"total_observations": total_obs,
|
| 212 |
+
"avg_dwell_seconds": round(avg_gap, 2),
|
| 213 |
+
"flags": {
|
| 214 |
+
"rapid_movement": rapid_movement,
|
| 215 |
+
"loitering": loitering,
|
| 216 |
+
"fast_dwell": fast_dwell,
|
| 217 |
+
},
|
| 218 |
+
"latency_ms": round(latency, 2),
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
def get_all_anomalies(self, threshold: float = 0.75) -> List[Dict]:
|
| 222 |
+
"""Compute anomaly scores for all tracked persons."""
|
| 223 |
+
results = []
|
| 224 |
+
for pid in self.person_trails:
|
| 225 |
+
score_data = self.compute_anomaly_score(pid)
|
| 226 |
+
if score_data["anomaly_score"] >= threshold:
|
| 227 |
+
results.append(score_data)
|
| 228 |
+
return sorted(results, key=lambda x: -x["anomaly_score"])
|
| 229 |
+
|
| 230 |
+
def get_movement_summary(self) -> Dict:
|
| 231 |
+
return {
|
| 232 |
+
"total_persons_tracked": len(self.person_trails),
|
| 233 |
+
"total_cameras": len(self.camera_ids),
|
| 234 |
+
"cameras": self.camera_ids,
|
| 235 |
+
"edge_count": sum(len(v) for v in self.edge_weights.values()),
|
| 236 |
+
}
|
nlp/__init__.py
ADDED
|
File without changes
|
nlp/qa.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
nlp/qa.py - Question Answering over Surveillance Logs using deepset/roberta-base-squad2
|
| 3 |
+
"""
|
| 4 |
+
import time
|
| 5 |
+
from typing import Optional, Dict, List
|
| 6 |
+
from transformers import pipeline, Pipeline
|
| 7 |
+
from loguru import logger
|
| 8 |
+
from config import settings, DEVICE
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class SurveillanceQA:
|
| 12 |
+
"""
|
| 13 |
+
Extractive QA system. Given a question and a context built from
|
| 14 |
+
surveillance logs/events, extracts the most relevant answer span.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __init__(self):
|
| 18 |
+
logger.info(f"Loading QA model: {settings.QA_MODEL}")
|
| 19 |
+
device_id = 0 if str(DEVICE) == "cuda" else -1
|
| 20 |
+
self.qa_pipeline: Pipeline = pipeline(
|
| 21 |
+
"question-answering",
|
| 22 |
+
model=settings.QA_MODEL,
|
| 23 |
+
tokenizer=settings.QA_MODEL,
|
| 24 |
+
device=device_id,
|
| 25 |
+
)
|
| 26 |
+
logger.info("✅ SurveillanceQA ready.")
|
| 27 |
+
|
| 28 |
+
def _build_context(self, events: List[Dict]) -> str:
|
| 29 |
+
"""Build a natural language context string from event records."""
|
| 30 |
+
lines = []
|
| 31 |
+
for e in events:
|
| 32 |
+
ts = e.get("timestamp", "unknown time")
|
| 33 |
+
cam = e.get("camera_id", "unknown camera")
|
| 34 |
+
activity = e.get("activity_type", "detected")
|
| 35 |
+
person_id = str(e.get("person_id", "unknown"))[:8]
|
| 36 |
+
attrs = e.get("attributes", {})
|
| 37 |
+
desc = e.get("description", "")
|
| 38 |
+
attr_str = ""
|
| 39 |
+
if attrs:
|
| 40 |
+
gender = attrs.get("gender", "")
|
| 41 |
+
color = attrs.get("color", "")
|
| 42 |
+
clothing = ", ".join([c.get("label", "") for c in attrs.get("clothing", [])[:2]])
|
| 43 |
+
attr_str = f"({gender}, {color} clothing, {clothing})"
|
| 44 |
+
line = f"At {ts}, camera {cam} detected person {person_id} {attr_str} with activity: {activity}."
|
| 45 |
+
if desc:
|
| 46 |
+
line += f" {desc}"
|
| 47 |
+
lines.append(line)
|
| 48 |
+
return " ".join(lines)
|
| 49 |
+
|
| 50 |
+
def answer(
|
| 51 |
+
self,
|
| 52 |
+
question: str,
|
| 53 |
+
events: Optional[List[Dict]] = None,
|
| 54 |
+
context: Optional[str] = None,
|
| 55 |
+
top_k: int = 3,
|
| 56 |
+
) -> Dict:
|
| 57 |
+
"""
|
| 58 |
+
Answer a natural language question about surveillance data.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
question: User's question
|
| 62 |
+
events: List of event dicts (auto-builds context)
|
| 63 |
+
context: Pre-built context string
|
| 64 |
+
top_k: Number of answer candidates
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
{"answer": str, "score": float, "start": int, "end": int, "context": str, "latency_ms": float}
|
| 68 |
+
"""
|
| 69 |
+
if context is None:
|
| 70 |
+
if not events:
|
| 71 |
+
return {"answer": "No surveillance data available to answer from.", "score": 0.0}
|
| 72 |
+
context = self._build_context(events)
|
| 73 |
+
|
| 74 |
+
if not context.strip():
|
| 75 |
+
return {"answer": "No context available.", "score": 0.0}
|
| 76 |
+
|
| 77 |
+
# Truncate context to model max (512 tokens ≈ ~2000 chars)
|
| 78 |
+
context = context[:4000]
|
| 79 |
+
|
| 80 |
+
t0 = time.perf_counter()
|
| 81 |
+
result = self.qa_pipeline(
|
| 82 |
+
question=question,
|
| 83 |
+
context=context,
|
| 84 |
+
top_k=top_k,
|
| 85 |
+
handle_impossible_answer=True,
|
| 86 |
+
)
|
| 87 |
+
latency_ms = (time.perf_counter() - t0) * 1000
|
| 88 |
+
|
| 89 |
+
if isinstance(result, list):
|
| 90 |
+
best = result[0]
|
| 91 |
+
else:
|
| 92 |
+
best = result
|
| 93 |
+
|
| 94 |
+
logger.debug(f"QA answered '{question[:50]}' in {latency_ms:.1f}ms | score={best.get('score', 0):.3f}")
|
| 95 |
+
return {
|
| 96 |
+
"answer": best.get("answer", ""),
|
| 97 |
+
"score": round(best.get("score", 0.0), 4),
|
| 98 |
+
"start": best.get("start", 0),
|
| 99 |
+
"end": best.get("end", 0),
|
| 100 |
+
"context_used": context[:500] + "..." if len(context) > 500 else context,
|
| 101 |
+
"latency_ms": round(latency_ms, 2),
|
| 102 |
+
"all_answers": result if isinstance(result, list) else [result],
|
| 103 |
+
}
|
nlp/report.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
nlp/report.py - Incident Report Generation using google/flan-t5-base
|
| 3 |
+
"""
|
| 4 |
+
import time
|
| 5 |
+
from typing import Dict, List, Optional
|
| 6 |
+
from transformers import pipeline, Pipeline
|
| 7 |
+
from loguru import logger
|
| 8 |
+
from config import settings, DEVICE
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
REPORT_PROMPT_TEMPLATE = """Generate a structured surveillance incident report based on the following events:
|
| 12 |
+
|
| 13 |
+
Events:
|
| 14 |
+
{events_summary}
|
| 15 |
+
|
| 16 |
+
Format the report as:
|
| 17 |
+
INCIDENT REPORT
|
| 18 |
+
Date/Time: [datetime]
|
| 19 |
+
Cameras Involved: [cameras]
|
| 20 |
+
Subject Description: [physical description]
|
| 21 |
+
Activity Observed: [description of events]
|
| 22 |
+
Anomaly Level: [low/medium/high]
|
| 23 |
+
Recommended Action: [action]
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class IncidentReportGenerator:
|
| 28 |
+
"""
|
| 29 |
+
Generates structured incident reports from surveillance events using Flan-T5.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def __init__(self):
|
| 33 |
+
logger.info(f"Loading report generation model: {settings.REPORT_MODEL}")
|
| 34 |
+
device_id = 0 if str(DEVICE) == "cuda" else -1
|
| 35 |
+
self.generator: Pipeline = pipeline(
|
| 36 |
+
"text2text-generation",
|
| 37 |
+
model=settings.REPORT_MODEL,
|
| 38 |
+
tokenizer=settings.REPORT_MODEL,
|
| 39 |
+
device=device_id,
|
| 40 |
+
)
|
| 41 |
+
logger.info("✅ IncidentReportGenerator ready.")
|
| 42 |
+
|
| 43 |
+
def _format_events(self, events: List[Dict]) -> str:
|
| 44 |
+
"""Format events list into a readable string for the prompt."""
|
| 45 |
+
lines = []
|
| 46 |
+
for i, e in enumerate(events, 1):
|
| 47 |
+
ts = e.get("timestamp", "unknown time")
|
| 48 |
+
cam = e.get("camera_id", "unknown camera")
|
| 49 |
+
activity = e.get("activity_type", "detected")
|
| 50 |
+
attrs = e.get("attributes", {})
|
| 51 |
+
gender = attrs.get("gender", "") if attrs else ""
|
| 52 |
+
color = attrs.get("color", "") if attrs else ""
|
| 53 |
+
anomaly_score = e.get("anomaly_score", 0.0)
|
| 54 |
+
lines.append(
|
| 55 |
+
f"{i}. [{ts}] Camera {cam}: {gender} person in {color} clothing, "
|
| 56 |
+
f"activity={activity}, anomaly_score={anomaly_score:.2f}"
|
| 57 |
+
)
|
| 58 |
+
return "\n".join(lines)
|
| 59 |
+
|
| 60 |
+
def generate(
|
| 61 |
+
self,
|
| 62 |
+
events: List[Dict],
|
| 63 |
+
person_id: Optional[str] = None,
|
| 64 |
+
max_length: int = 512,
|
| 65 |
+
severity_hint: Optional[str] = None,
|
| 66 |
+
) -> Dict:
|
| 67 |
+
"""
|
| 68 |
+
Generate a structured incident report from a list of event records.
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
{"report_text": str, "severity": str, "latency_ms": float}
|
| 72 |
+
"""
|
| 73 |
+
if not events:
|
| 74 |
+
return {"report_text": "No events provided for report generation.", "severity": "low"}
|
| 75 |
+
|
| 76 |
+
events_summary = self._format_events(events[:20]) # limit for token budget
|
| 77 |
+
prompt = REPORT_PROMPT_TEMPLATE.format(events_summary=events_summary)
|
| 78 |
+
if person_id:
|
| 79 |
+
prompt = f"Person ID: {person_id[:8]}\n" + prompt
|
| 80 |
+
|
| 81 |
+
t0 = time.perf_counter()
|
| 82 |
+
outputs = self.generator(
|
| 83 |
+
prompt,
|
| 84 |
+
max_new_tokens=max_length,
|
| 85 |
+
num_beams=4,
|
| 86 |
+
early_stopping=True,
|
| 87 |
+
no_repeat_ngram_size=3,
|
| 88 |
+
)
|
| 89 |
+
latency_ms = (time.perf_counter() - t0) * 1000
|
| 90 |
+
report_text = outputs[0]["generated_text"]
|
| 91 |
+
|
| 92 |
+
# Determine severity from anomaly scores
|
| 93 |
+
scores = [e.get("anomaly_score", 0.0) for e in events]
|
| 94 |
+
avg_anomaly = sum(scores) / max(len(scores), 1)
|
| 95 |
+
if avg_anomaly > 0.8:
|
| 96 |
+
severity = "critical"
|
| 97 |
+
elif avg_anomaly > 0.6:
|
| 98 |
+
severity = "high"
|
| 99 |
+
elif avg_anomaly > 0.3:
|
| 100 |
+
severity = "medium"
|
| 101 |
+
else:
|
| 102 |
+
severity = "low"
|
| 103 |
+
|
| 104 |
+
severity = severity_hint or severity
|
| 105 |
+
logger.info(f"Report generated in {latency_ms:.1f}ms | severity={severity}")
|
| 106 |
+
|
| 107 |
+
return {
|
| 108 |
+
"report_text": report_text,
|
| 109 |
+
"severity": severity,
|
| 110 |
+
"event_count": len(events),
|
| 111 |
+
"avg_anomaly_score": round(avg_anomaly, 4),
|
| 112 |
+
"latency_ms": round(latency_ms, 2),
|
| 113 |
+
"person_id": person_id,
|
| 114 |
+
}
|
nlp/search.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
nlp/search.py - Semantic Search using sentence-transformers/all-MiniLM-L6-v2 + FAISS
|
| 3 |
+
Embeds natural language queries and matches against stored surveillance metadata.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import time
|
| 7 |
+
import numpy as np
|
| 8 |
+
import faiss
|
| 9 |
+
import torch
|
| 10 |
+
from typing import List, Dict, Optional
|
| 11 |
+
from sentence_transformers import SentenceTransformer
|
| 12 |
+
from loguru import logger
|
| 13 |
+
from config import settings, DEVICE, FAISS_DIR
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class SemanticSearchEngine:
|
| 17 |
+
"""
|
| 18 |
+
Encodes surveillance metadata (event descriptions, attributes) into
|
| 19 |
+
sentence embeddings stored in FAISS. Supports natural-language querying.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
INDEX_FILE = str(FAISS_DIR / "search_index.faiss")
|
| 23 |
+
META_FILE = str(FAISS_DIR / "search_meta.npy")
|
| 24 |
+
|
| 25 |
+
def __init__(self):
|
| 26 |
+
logger.info(f"Loading semantic search model: {settings.SEMANTIC_SEARCH_MODEL}")
|
| 27 |
+
self.model = SentenceTransformer(settings.SEMANTIC_SEARCH_MODEL, device=str(DEVICE))
|
| 28 |
+
self.dim = settings.SEARCH_EMBEDDING_DIM
|
| 29 |
+
self.index = self._load_or_create_index()
|
| 30 |
+
self.meta: List[Dict] = self._load_meta()
|
| 31 |
+
logger.info(f"✅ SemanticSearchEngine ready. Index size: {self.index.ntotal}")
|
| 32 |
+
|
| 33 |
+
def _load_or_create_index(self) -> faiss.IndexFlatIP:
|
| 34 |
+
if os.path.exists(self.INDEX_FILE):
|
| 35 |
+
logger.info("Loading existing FAISS search index.")
|
| 36 |
+
return faiss.read_index(self.INDEX_FILE)
|
| 37 |
+
return faiss.IndexFlatIP(self.dim)
|
| 38 |
+
|
| 39 |
+
def _load_meta(self) -> List[Dict]:
|
| 40 |
+
if os.path.exists(self.META_FILE):
|
| 41 |
+
return list(np.load(self.META_FILE, allow_pickle=True))
|
| 42 |
+
return []
|
| 43 |
+
|
| 44 |
+
def save(self):
|
| 45 |
+
faiss.write_index(self.index, self.INDEX_FILE)
|
| 46 |
+
np.save(self.META_FILE, np.array(self.meta, dtype=object))
|
| 47 |
+
|
| 48 |
+
def encode(self, texts: List[str]) -> np.ndarray:
|
| 49 |
+
"""Encode texts to L2-normalized embeddings (batch)."""
|
| 50 |
+
embeddings = self.model.encode(
|
| 51 |
+
texts,
|
| 52 |
+
batch_size=32,
|
| 53 |
+
normalize_embeddings=True,
|
| 54 |
+
convert_to_numpy=True,
|
| 55 |
+
show_progress_bar=False,
|
| 56 |
+
)
|
| 57 |
+
return embeddings.astype(np.float32)
|
| 58 |
+
|
| 59 |
+
def index_event(self, text: str, metadata: Dict) -> int:
|
| 60 |
+
"""
|
| 61 |
+
Add a single surveillance event description to the FAISS search index.
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
text: Natural language description of the event
|
| 65 |
+
metadata: {"event_id", "person_id", "camera_id", "timestamp", "activity_type", ...}
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
faiss_id (row index)
|
| 69 |
+
"""
|
| 70 |
+
embedding = self.encode([text])
|
| 71 |
+
faiss_id = self.index.ntotal
|
| 72 |
+
self.index.add(embedding)
|
| 73 |
+
self.meta.append({**metadata, "text": text, "faiss_id": faiss_id})
|
| 74 |
+
self.save()
|
| 75 |
+
return faiss_id
|
| 76 |
+
|
| 77 |
+
def index_batch(self, texts: List[str], metadatas: List[Dict]):
|
| 78 |
+
"""Batch indexing for bulk ingestion."""
|
| 79 |
+
embeddings = self.encode(texts)
|
| 80 |
+
base_id = self.index.ntotal
|
| 81 |
+
self.index.add(embeddings)
|
| 82 |
+
for i, (text, meta) in enumerate(zip(texts, metadatas)):
|
| 83 |
+
self.meta.append({**meta, "text": text, "faiss_id": base_id + i})
|
| 84 |
+
self.save()
|
| 85 |
+
logger.info(f"Indexed {len(texts)} events into search index.")
|
| 86 |
+
|
| 87 |
+
def search(self, query: str, top_k: int = 10, score_threshold: float = 0.4) -> List[Dict]:
|
| 88 |
+
"""
|
| 89 |
+
Search surveillance logs by natural language query.
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
List of {"text": str, "score": float, ...metadata fields}
|
| 93 |
+
"""
|
| 94 |
+
if self.index.ntotal == 0:
|
| 95 |
+
return []
|
| 96 |
+
|
| 97 |
+
t0 = time.perf_counter()
|
| 98 |
+
query_emb = self.encode([query])
|
| 99 |
+
k = min(top_k, self.index.ntotal)
|
| 100 |
+
distances, indices = self.index.search(query_emb, k)
|
| 101 |
+
latency = (time.perf_counter() - t0) * 1000
|
| 102 |
+
|
| 103 |
+
results = []
|
| 104 |
+
for dist, idx in zip(distances[0], indices[0]):
|
| 105 |
+
if idx == -1 or float(dist) < score_threshold:
|
| 106 |
+
continue
|
| 107 |
+
entry = dict(self.meta[idx])
|
| 108 |
+
entry["score"] = round(float(dist), 4)
|
| 109 |
+
results.append(entry)
|
| 110 |
+
|
| 111 |
+
logger.debug(f"Semantic search '{query[:40]}...' → {len(results)} results in {latency:.1f}ms")
|
| 112 |
+
return sorted(results, key=lambda x: -x["score"])
|
nlp/summarizer.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
nlp/summarizer.py - Surveillance log summarization using facebook/bart-large-cnn
|
| 3 |
+
"""
|
| 4 |
+
import time
|
| 5 |
+
from typing import List, Dict
|
| 6 |
+
from transformers import pipeline, Pipeline
|
| 7 |
+
from loguru import logger
|
| 8 |
+
from config import settings, DEVICE
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class SurveillanceSummarizer:
|
| 12 |
+
"""Abstractive summarization of surveillance event logs using BART."""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
logger.info(f"Loading summarization model: {settings.SUMMARIZER_MODEL}")
|
| 16 |
+
device_id = 0 if str(DEVICE) == "cuda" else -1
|
| 17 |
+
self.summarizer: Pipeline = pipeline(
|
| 18 |
+
"summarization",
|
| 19 |
+
model=settings.SUMMARIZER_MODEL,
|
| 20 |
+
tokenizer=settings.SUMMARIZER_MODEL,
|
| 21 |
+
device=device_id,
|
| 22 |
+
)
|
| 23 |
+
logger.info("✅ SurveillanceSummarizer ready.")
|
| 24 |
+
|
| 25 |
+
def _events_to_text(self, events: List[Dict]) -> str:
|
| 26 |
+
parts = []
|
| 27 |
+
for e in events:
|
| 28 |
+
ts = e.get("timestamp", "")
|
| 29 |
+
cam = e.get("camera_id", "")
|
| 30 |
+
activity = e.get("activity_type", "")
|
| 31 |
+
anomaly = e.get("anomaly_score", 0.0)
|
| 32 |
+
attrs = e.get("attributes", {})
|
| 33 |
+
gender = attrs.get("gender", "") if attrs else ""
|
| 34 |
+
color = attrs.get("color", "") if attrs else ""
|
| 35 |
+
parts.append(
|
| 36 |
+
f"Camera {cam} at {ts}: {gender} person in {color} clothing observed {activity} "
|
| 37 |
+
f"with anomaly score {anomaly:.2f}."
|
| 38 |
+
)
|
| 39 |
+
return " ".join(parts)
|
| 40 |
+
|
| 41 |
+
def summarize(
|
| 42 |
+
self,
|
| 43 |
+
events: List[Dict],
|
| 44 |
+
min_length: int = 30,
|
| 45 |
+
max_length: int = 200,
|
| 46 |
+
) -> Dict:
|
| 47 |
+
"""Summarize a list of surveillance events."""
|
| 48 |
+
if not events:
|
| 49 |
+
return {"summary": "No events to summarize.", "latency_ms": 0}
|
| 50 |
+
|
| 51 |
+
text = self._events_to_text(events[:30])
|
| 52 |
+
# BART max input is ~1024 tokens
|
| 53 |
+
text = text[:3000]
|
| 54 |
+
|
| 55 |
+
t0 = time.perf_counter()
|
| 56 |
+
result = self.summarizer(
|
| 57 |
+
text,
|
| 58 |
+
min_length=min_length,
|
| 59 |
+
max_length=max_length,
|
| 60 |
+
do_sample=False,
|
| 61 |
+
)
|
| 62 |
+
latency_ms = (time.perf_counter() - t0) * 1000
|
| 63 |
+
summary = result[0]["summary_text"]
|
| 64 |
+
logger.debug(f"Summarized {len(events)} events in {latency_ms:.1f}ms")
|
| 65 |
+
return {
|
| 66 |
+
"summary": summary,
|
| 67 |
+
"event_count": len(events),
|
| 68 |
+
"latency_ms": round(latency_ms, 2),
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
def summarize_text(self, text: str, min_length: int = 30, max_length: int = 150) -> str:
|
| 72 |
+
"""Summarize arbitrary text string."""
|
| 73 |
+
text = text[:3000]
|
| 74 |
+
result = self.summarizer(text, min_length=min_length, max_length=max_length, do_sample=False)
|
| 75 |
+
return result[0]["summary_text"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================
|
| 2 |
+
# Multimodal Surveillance Intelligence System - Requirements
|
| 3 |
+
# Python 3.10+
|
| 4 |
+
# ============================================================
|
| 5 |
+
|
| 6 |
+
# --- Core API Framework ---
|
| 7 |
+
fastapi==0.115.0
|
| 8 |
+
uvicorn[standard]==0.30.6
|
| 9 |
+
python-multipart==0.0.9
|
| 10 |
+
websockets==12.0
|
| 11 |
+
httpx==0.27.2
|
| 12 |
+
pydantic==2.9.2
|
| 13 |
+
pydantic-settings==2.5.2
|
| 14 |
+
|
| 15 |
+
# --- Database ---
|
| 16 |
+
asyncpg==0.29.0
|
| 17 |
+
sqlalchemy[asyncio]==2.0.35
|
| 18 |
+
|
| 19 |
+
alembic==1.13.3
|
| 20 |
+
psycopg2-binary==2.9.9
|
| 21 |
+
|
| 22 |
+
# --- PyTorch (install separately for CUDA - see README) ---
|
| 23 |
+
# torch==2.4.1+cu124
|
| 24 |
+
# torchvision==0.19.1+cu124
|
| 25 |
+
# torchaudio==0.19.1+cu124
|
| 26 |
+
|
| 27 |
+
# --- HuggingFace Ecosystem ---
|
| 28 |
+
transformers==4.45.0
|
| 29 |
+
accelerate==0.34.2
|
| 30 |
+
datasets==3.0.1
|
| 31 |
+
tokenizers==0.20.0
|
| 32 |
+
safetensors==0.4.5
|
| 33 |
+
huggingface-hub==0.25.1
|
| 34 |
+
sentence-transformers==3.1.1
|
| 35 |
+
|
| 36 |
+
# --- Vision & Computer Vision ---
|
| 37 |
+
Pillow==10.4.0
|
| 38 |
+
opencv-python==4.10.0.84
|
| 39 |
+
numpy==1.26.4
|
| 40 |
+
scipy==1.14.1
|
| 41 |
+
scikit-image==0.24.0
|
| 42 |
+
torchvision
|
| 43 |
+
timm
|
| 44 |
+
|
| 45 |
+
# --- FAISS (install separately for GPU - see README) ---
|
| 46 |
+
# faiss-gpu -> pip install faiss-gpu (CUDA)
|
| 47 |
+
# faiss-cpu -> pip install faiss-cpu (CPU only)
|
| 48 |
+
|
| 49 |
+
# --- Object Detection & Tracking ---
|
| 50 |
+
# ByteTrack / DeepSORT dependencies
|
| 51 |
+
filterpy==1.4.5
|
| 52 |
+
lapx==0.5.9.post1
|
| 53 |
+
|
| 54 |
+
# --- PyTorch Geometric (Graph Module) ---
|
| 55 |
+
# Install separately:
|
| 56 |
+
# pip install torch-geometric
|
| 57 |
+
# pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.4.1+cu124.html
|
| 58 |
+
torch-geometric==2.6.1
|
| 59 |
+
|
| 60 |
+
# --- Audio Processing ---
|
| 61 |
+
librosa==0.10.2
|
| 62 |
+
soundfile==0.12.1
|
| 63 |
+
pyaudio==0.2.14
|
| 64 |
+
torchaudio
|
| 65 |
+
openai-whisper
|
| 66 |
+
|
| 67 |
+
# --- NLP & Text Processing ---
|
| 68 |
+
nltk==3.9.1
|
| 69 |
+
spacy==3.8.2
|
| 70 |
+
tiktoken==0.7.0
|
| 71 |
+
|
| 72 |
+
# --- Logging & Monitoring ---
|
| 73 |
+
loguru==0.7.2
|
| 74 |
+
prometheus-client==0.21.0
|
| 75 |
+
rich==13.8.1
|
| 76 |
+
|
| 77 |
+
# --- Utilities ---
|
| 78 |
+
python-dotenv==1.0.1
|
| 79 |
+
aiofiles==24.1.0
|
| 80 |
+
anyio==4.6.2
|
| 81 |
+
tenacity==9.0.0
|
| 82 |
+
cachetools==5.5.0
|
| 83 |
+
click==8.1.7
|
| 84 |
+
tqdm==4.66.5
|
| 85 |
+
psutil==6.0.0
|
| 86 |
+
GPUtil==1.4.0
|
| 87 |
+
PyYAML==6.0.2
|
| 88 |
+
|
| 89 |
+
# --- Testing & Benchmarks ---
|
| 90 |
+
pytest==8.3.3
|
| 91 |
+
pytest-asyncio==0.24.0
|
| 92 |
+
locust==2.31.8
|
| 93 |
+
|
| 94 |
+
# --- Image & Annotation ---
|
| 95 |
+
matplotlib==3.9.2
|
| 96 |
+
seaborn==0.13.2
|
routes/__init__.py
ADDED
|
File without changes
|
routes/nlp_routes.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
routes/nlp_routes.py - NLP API endpoints:
|
| 3 |
+
GET /search (semantic search over surveillance logs)
|
| 4 |
+
POST /ask (QA over logs)
|
| 5 |
+
GET /report/{person_id} (generate incident report)
|
| 6 |
+
GET /anomalies (get anomalous persons)
|
| 7 |
+
"""
|
| 8 |
+
import uuid
|
| 9 |
+
from typing import Optional, List
|
| 10 |
+
|
| 11 |
+
from fastapi import APIRouter, Depends, HTTPException, Query
|
| 12 |
+
from pydantic import BaseModel
|
| 13 |
+
from loguru import logger
|
| 14 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 15 |
+
|
| 16 |
+
from database.session import get_db
|
| 17 |
+
from database import crud
|
| 18 |
+
|
| 19 |
+
router = APIRouter(prefix="/api/v1", tags=["NLP & Intelligence"])
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# ── Dependency helpers ─────────────────────────────────────────────────────────
|
| 23 |
+
|
| 24 |
+
def get_search_engine():
|
| 25 |
+
from app import search_engine
|
| 26 |
+
return search_engine
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def get_qa_system():
|
| 30 |
+
from app import qa_system
|
| 31 |
+
return qa_system
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def get_report_generator():
|
| 35 |
+
from app import report_generator
|
| 36 |
+
return report_generator
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def get_summarizer():
|
| 40 |
+
from app import summarizer
|
| 41 |
+
return summarizer
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def get_movement_graph():
|
| 45 |
+
from app import movement_graph
|
| 46 |
+
return movement_graph
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# ── Request models ──────────────────────────────────────────────────────────────
|
| 50 |
+
|
| 51 |
+
class SearchRequest(BaseModel):
|
| 52 |
+
query: str
|
| 53 |
+
top_k: int = 10
|
| 54 |
+
score_threshold: float = 0.35
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class AskRequest(BaseModel):
|
| 58 |
+
question: str
|
| 59 |
+
camera_id: Optional[str] = None
|
| 60 |
+
person_id: Optional[str] = None
|
| 61 |
+
context: Optional[str] = None
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# ── GET /search ───────────────────────────────────────────────────────────────
|
| 65 |
+
|
| 66 |
+
@router.get("/search", summary="Semantic search over surveillance logs")
|
| 67 |
+
async def semantic_search(
|
| 68 |
+
q: str = Query(..., description="Natural language search query"),
|
| 69 |
+
top_k: int = Query(10, ge=1, le=50),
|
| 70 |
+
score_threshold: float = Query(0.35, ge=0.0, le=1.0),
|
| 71 |
+
search_engine=Depends(get_search_engine),
|
| 72 |
+
):
|
| 73 |
+
"""
|
| 74 |
+
Search across all indexed surveillance events using natural language.
|
| 75 |
+
Example: "person in red jacket near camera 3"
|
| 76 |
+
"""
|
| 77 |
+
if not q.strip():
|
| 78 |
+
raise HTTPException(status_code=400, detail="Query cannot be empty")
|
| 79 |
+
results = search_engine.search(query=q, top_k=top_k, score_threshold=score_threshold)
|
| 80 |
+
return {
|
| 81 |
+
"query": q,
|
| 82 |
+
"results": results,
|
| 83 |
+
"total": len(results),
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# ── POST /ask ─────────────────────────────────────────────────────────────────
|
| 88 |
+
|
| 89 |
+
@router.post("/ask", summary="Ask a natural language question about surveillance data")
|
| 90 |
+
async def ask_question(
|
| 91 |
+
request: AskRequest,
|
| 92 |
+
db: AsyncSession = Depends(get_db),
|
| 93 |
+
qa=Depends(get_qa_system),
|
| 94 |
+
):
|
| 95 |
+
"""
|
| 96 |
+
Extractive QA over recent surveillance events.
|
| 97 |
+
Example: "Which camera last detected person 123abc?"
|
| 98 |
+
"""
|
| 99 |
+
events = []
|
| 100 |
+
if request.context:
|
| 101 |
+
# Use provided context directly
|
| 102 |
+
result = qa.answer(question=request.question, context=request.context)
|
| 103 |
+
else:
|
| 104 |
+
# Fetch recent events from DB
|
| 105 |
+
if request.person_id:
|
| 106 |
+
try:
|
| 107 |
+
pid = uuid.UUID(request.person_id)
|
| 108 |
+
except ValueError:
|
| 109 |
+
raise HTTPException(status_code=400, detail="Invalid person_id UUID")
|
| 110 |
+
db_events = await crud.get_events_for_person(db, pid, limit=30)
|
| 111 |
+
else:
|
| 112 |
+
db_events = await crud.get_recent_events(db, camera_id=request.camera_id, limit=50)
|
| 113 |
+
|
| 114 |
+
# Convert to dicts
|
| 115 |
+
events = [
|
| 116 |
+
{
|
| 117 |
+
"event_id": str(e.event_id),
|
| 118 |
+
"person_id": str(e.person_id),
|
| 119 |
+
"camera_id": e.camera_id,
|
| 120 |
+
"timestamp": str(e.timestamp),
|
| 121 |
+
"activity_type": e.activity_type.value,
|
| 122 |
+
"anomaly_score": e.anomaly_score or 0.0,
|
| 123 |
+
"bounding_box": e.bounding_box or {},
|
| 124 |
+
}
|
| 125 |
+
for e in db_events
|
| 126 |
+
]
|
| 127 |
+
result = qa.answer(question=request.question, events=events)
|
| 128 |
+
|
| 129 |
+
return {
|
| 130 |
+
"question": request.question,
|
| 131 |
+
**result,
|
| 132 |
+
"events_used": len(events),
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# ── GET /report/{person_id} ───────────────────────────────────────────────────
|
| 137 |
+
|
| 138 |
+
@router.get("/report/{person_id}", summary="Generate and store an incident report for a person")
|
| 139 |
+
async def generate_report(
|
| 140 |
+
person_id: str,
|
| 141 |
+
db: AsyncSession = Depends(get_db),
|
| 142 |
+
report_gen=Depends(get_report_generator),
|
| 143 |
+
_summarizer=Depends(get_summarizer),
|
| 144 |
+
):
|
| 145 |
+
"""Generate a structured incident report for a tracked person."""
|
| 146 |
+
try:
|
| 147 |
+
pid = uuid.UUID(person_id)
|
| 148 |
+
except ValueError:
|
| 149 |
+
raise HTTPException(status_code=400, detail="Invalid person_id UUID")
|
| 150 |
+
|
| 151 |
+
person = await crud.get_person(db, pid)
|
| 152 |
+
if not person:
|
| 153 |
+
raise HTTPException(status_code=404, detail=f"Person {person_id} not found")
|
| 154 |
+
|
| 155 |
+
events = await crud.get_events_for_person(db, pid, limit=30)
|
| 156 |
+
if not events:
|
| 157 |
+
raise HTTPException(status_code=404, detail="No events found for this person")
|
| 158 |
+
|
| 159 |
+
events_dicts = [
|
| 160 |
+
{
|
| 161 |
+
"event_id": str(e.event_id),
|
| 162 |
+
"camera_id": e.camera_id,
|
| 163 |
+
"timestamp": str(e.timestamp),
|
| 164 |
+
"activity_type": e.activity_type.value,
|
| 165 |
+
"anomaly_score": e.anomaly_score or 0.0,
|
| 166 |
+
"attributes": person.attributes or {},
|
| 167 |
+
}
|
| 168 |
+
for e in events
|
| 169 |
+
]
|
| 170 |
+
|
| 171 |
+
# Generate report
|
| 172 |
+
report_result = report_gen.generate(events=events_dicts, person_id=person_id)
|
| 173 |
+
summary_result = _summarizer.summarize(events=events_dicts)
|
| 174 |
+
|
| 175 |
+
# Store report in DB
|
| 176 |
+
camera_ids = list({e["camera_id"] for e in events_dicts})
|
| 177 |
+
db_report = await crud.create_incident_report(
|
| 178 |
+
db,
|
| 179 |
+
person_id=pid,
|
| 180 |
+
report_text=report_result["report_text"],
|
| 181 |
+
summary=summary_result["summary"],
|
| 182 |
+
severity=report_result.get("severity", "medium"),
|
| 183 |
+
camera_ids=camera_ids,
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
return {
|
| 187 |
+
"report_id": str(db_report.report_id),
|
| 188 |
+
"person_id": person_id,
|
| 189 |
+
"generated_at": str(db_report.generated_at),
|
| 190 |
+
"severity": db_report.severity,
|
| 191 |
+
"summary": summary_result["summary"],
|
| 192 |
+
"report_text": report_result["report_text"],
|
| 193 |
+
"cameras_involved": camera_ids,
|
| 194 |
+
"event_count": len(events),
|
| 195 |
+
"latency_ms": report_result.get("latency_ms"),
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# ── GET /anomalies ────────────────────────────────────────────────────────────
|
| 200 |
+
|
| 201 |
+
@router.get("/anomalies", summary="Get persons with anomalous movement patterns")
|
| 202 |
+
async def get_anomalies(
|
| 203 |
+
threshold: float = Query(0.75, ge=0.0, le=1.0),
|
| 204 |
+
graph=Depends(get_movement_graph),
|
| 205 |
+
):
|
| 206 |
+
anomalies = graph.get_all_anomalies(threshold=threshold)
|
| 207 |
+
return {
|
| 208 |
+
"threshold": threshold,
|
| 209 |
+
"anomalous_persons": anomalies,
|
| 210 |
+
"count": len(anomalies),
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
# ── GET /persons ───────────────────────────────────────────────────────────────
|
| 215 |
+
|
| 216 |
+
@router.get("/persons", summary="List all tracked persons")
|
| 217 |
+
async def list_persons(
|
| 218 |
+
limit: int = Query(50, ge=1, le=200),
|
| 219 |
+
offset: int = Query(0, ge=0),
|
| 220 |
+
db: AsyncSession = Depends(get_db),
|
| 221 |
+
):
|
| 222 |
+
persons = await crud.get_all_persons(db, limit=limit, offset=offset)
|
| 223 |
+
return {
|
| 224 |
+
"persons": [
|
| 225 |
+
{
|
| 226 |
+
"id": str(p.id),
|
| 227 |
+
"first_seen": str(p.first_seen),
|
| 228 |
+
"last_seen": str(p.last_seen),
|
| 229 |
+
"faiss_id": p.faiss_id,
|
| 230 |
+
"attributes": p.attributes,
|
| 231 |
+
}
|
| 232 |
+
for p in persons
|
| 233 |
+
],
|
| 234 |
+
"total": len(persons),
|
| 235 |
+
}
|
routes/stream_routes.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Request
|
| 2 |
+
from fastapi.responses import StreamingResponse
|
| 3 |
+
import asyncio
|
| 4 |
+
from typing import List
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
from loguru import logger
|
| 7 |
+
|
| 8 |
+
from vision.stream_manager import stream_manager
|
| 9 |
+
|
| 10 |
+
router = APIRouter(prefix="/api/v1/streams", tags=["Streams"])
|
| 11 |
+
|
| 12 |
+
class StreamCreateRequest(BaseModel):
|
| 13 |
+
camera_id: str
|
| 14 |
+
source: str # e.g. '0', 'rtsp://...', 'http://...'
|
| 15 |
+
|
| 16 |
+
@router.post("/add")
|
| 17 |
+
async def add_stream(request: StreamCreateRequest):
|
| 18 |
+
"""Start a new background stream"""
|
| 19 |
+
stream_manager.add_stream(request.camera_id, request.source)
|
| 20 |
+
return {"status": "added", "camera_id": request.camera_id}
|
| 21 |
+
|
| 22 |
+
@router.delete("/{camera_id}")
|
| 23 |
+
async def remove_stream(camera_id: str):
|
| 24 |
+
"""Stop a background stream"""
|
| 25 |
+
stream_manager.remove_stream(camera_id)
|
| 26 |
+
return {"status": "removed", "camera_id": camera_id}
|
| 27 |
+
|
| 28 |
+
@router.get("/")
|
| 29 |
+
async def list_streams():
|
| 30 |
+
"""Get active streams"""
|
| 31 |
+
return {"streams": list(stream_manager.streams.keys())}
|
| 32 |
+
|
| 33 |
+
@router.get("/results")
|
| 34 |
+
async def stream_results():
|
| 35 |
+
"""Get the latest inference results for all active streams"""
|
| 36 |
+
return stream_manager.results
|
| 37 |
+
|
| 38 |
+
@router.get("/feed/{camera_id}")
|
| 39 |
+
async def video_feed(camera_id: str, request: Request):
|
| 40 |
+
"""
|
| 41 |
+
Multipart MJPEG video feed for `<img>` tags.
|
| 42 |
+
"""
|
| 43 |
+
# If a feed is requested but not added, try a local fallback
|
| 44 |
+
if camera_id not in stream_manager.streams:
|
| 45 |
+
# Fallback to local webcam '0' for demo purposes
|
| 46 |
+
stream_manager.add_stream(camera_id, "0")
|
| 47 |
+
|
| 48 |
+
async def frame_generator():
|
| 49 |
+
while True:
|
| 50 |
+
if await request.is_disconnected():
|
| 51 |
+
logger.info(f"Client disconnected from feed {camera_id}")
|
| 52 |
+
break
|
| 53 |
+
|
| 54 |
+
frame = stream_manager.get_frame(camera_id)
|
| 55 |
+
if frame is None:
|
| 56 |
+
await asyncio.sleep(0.1)
|
| 57 |
+
continue
|
| 58 |
+
|
| 59 |
+
# Yield multipart boundary + image bytes
|
| 60 |
+
yield (b'--frame\r\n'
|
| 61 |
+
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
|
| 62 |
+
|
| 63 |
+
# Target ~30fps
|
| 64 |
+
await asyncio.sleep(0.033)
|
| 65 |
+
|
| 66 |
+
return StreamingResponse(
|
| 67 |
+
frame_generator(),
|
| 68 |
+
media_type="multipart/x-mixed-replace; boundary=frame"
|
| 69 |
+
)
|
routes/vision_routes.py
ADDED
|
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
routes/vision_routes.py - Vision API endpoints:
|
| 3 |
+
POST /process-frame (single frame inference)
|
| 4 |
+
GET /live-feed (WebSocket stream)
|
| 5 |
+
GET /cameras (list active cameras)
|
| 6 |
+
"""
|
| 7 |
+
import asyncio
|
| 8 |
+
import base64
|
| 9 |
+
import json
|
| 10 |
+
import os
|
| 11 |
+
import tempfile
|
| 12 |
+
import time
|
| 13 |
+
import uuid
|
| 14 |
+
from io import BytesIO
|
| 15 |
+
from typing import Optional
|
| 16 |
+
|
| 17 |
+
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, WebSocket, WebSocketDisconnect
|
| 18 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
| 19 |
+
from pydantic import BaseModel
|
| 20 |
+
from PIL import Image
|
| 21 |
+
from loguru import logger
|
| 22 |
+
from sqlalchemy.ext.asyncio import AsyncSession
|
| 23 |
+
|
| 24 |
+
from database.session import get_db, AsyncSessionLocal
|
| 25 |
+
from database import crud
|
| 26 |
+
from database.models import ActivityType, AnalysisSession
|
| 27 |
+
|
| 28 |
+
router = APIRouter(prefix="/api/v1", tags=["Vision"])
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# ── Request / Response Models ─────────────────────────────────────────────────
|
| 32 |
+
|
| 33 |
+
class FrameProcessRequest(BaseModel):
|
| 34 |
+
camera_id: str
|
| 35 |
+
image_b64: str # base64-encoded JPEG/PNG
|
| 36 |
+
run_attributes: bool = True
|
| 37 |
+
run_reid: bool = True
|
| 38 |
+
reid_threshold: float = 0.85
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class FrameProcessResponse(BaseModel):
|
| 42 |
+
camera_id: str
|
| 43 |
+
frame_id: int
|
| 44 |
+
person_count: int
|
| 45 |
+
persons: list
|
| 46 |
+
latency: dict
|
| 47 |
+
fps: float
|
| 48 |
+
timestamp: float
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
# ── Dependency: get vision pipeline singleton ──────────────────────────────────
|
| 52 |
+
|
| 53 |
+
def get_pipeline():
|
| 54 |
+
from app import vision_pipeline
|
| 55 |
+
return vision_pipeline
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def get_movement_graph():
|
| 59 |
+
from app import movement_graph
|
| 60 |
+
return movement_graph
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# ── POST /process-frame ───────────────────────────────────────────────────────
|
| 64 |
+
|
| 65 |
+
@router.post("/process-frame", response_model=FrameProcessResponse, summary="Process a single camera frame")
|
| 66 |
+
async def process_frame(
|
| 67 |
+
request: FrameProcessRequest,
|
| 68 |
+
db: AsyncSession = Depends(get_db),
|
| 69 |
+
pipeline=Depends(get_pipeline),
|
| 70 |
+
graph=Depends(get_movement_graph),
|
| 71 |
+
):
|
| 72 |
+
"""
|
| 73 |
+
Submit a single camera frame for full vision pipeline processing.
|
| 74 |
+
Returns detected persons with tracking IDs, ReID matches, and attributes.
|
| 75 |
+
"""
|
| 76 |
+
try:
|
| 77 |
+
result = pipeline.process_frame(
|
| 78 |
+
image_input=request.image_b64,
|
| 79 |
+
camera_id=request.camera_id,
|
| 80 |
+
run_attributes=request.run_attributes,
|
| 81 |
+
run_reid=request.run_reid,
|
| 82 |
+
reid_threshold=request.reid_threshold,
|
| 83 |
+
)
|
| 84 |
+
except Exception as e:
|
| 85 |
+
logger.error(f"Vision pipeline error: {e}")
|
| 86 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 87 |
+
|
| 88 |
+
# Persist events to database
|
| 89 |
+
for person in result.get("persons", []):
|
| 90 |
+
person_id_str = person.get("assigned_person_id")
|
| 91 |
+
if not person_id_str:
|
| 92 |
+
continue
|
| 93 |
+
try:
|
| 94 |
+
person_uuid = uuid.UUID(person_id_str)
|
| 95 |
+
except ValueError:
|
| 96 |
+
continue
|
| 97 |
+
|
| 98 |
+
# Upsert person record
|
| 99 |
+
db_person = await crud.get_person(db, person_uuid)
|
| 100 |
+
if db_person is None and person.get("is_new_person"):
|
| 101 |
+
db_person = await crud.create_person(
|
| 102 |
+
db,
|
| 103 |
+
faiss_id=person.get("faiss_id"),
|
| 104 |
+
attributes=person.get("attributes"),
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
if db_person:
|
| 108 |
+
await crud.update_person_last_seen(db, db_person.id)
|
| 109 |
+
await crud.create_event(
|
| 110 |
+
db,
|
| 111 |
+
person_id=db_person.id,
|
| 112 |
+
camera_id=request.camera_id,
|
| 113 |
+
activity_type=ActivityType.DETECTED,
|
| 114 |
+
bounding_box={"x1": person["bbox"][0], "y1": person["bbox"][1],
|
| 115 |
+
"x2": person["bbox"][2], "y2": person["bbox"][3]},
|
| 116 |
+
confidence=person.get("score"),
|
| 117 |
+
track_id=person.get("track_id"),
|
| 118 |
+
raw_metadata={"reid_matches": person.get("reid_matches", [])},
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
# Update movement graph
|
| 122 |
+
graph.add_observation(
|
| 123 |
+
person_id=str(db_person.id),
|
| 124 |
+
camera_id=request.camera_id,
|
| 125 |
+
timestamp=result["timestamp"],
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
return JSONResponse(content=result)
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# ── POST /process-frame/upload (multipart form data) ─────────────────────────
|
| 132 |
+
|
| 133 |
+
@router.post("/process-frame/upload", summary="Upload image file for processing")
|
| 134 |
+
async def process_frame_upload(
|
| 135 |
+
camera_id: str = Form(...),
|
| 136 |
+
run_attributes: bool = Form(True),
|
| 137 |
+
run_reid: bool = Form(True),
|
| 138 |
+
image: UploadFile = File(...),
|
| 139 |
+
db: AsyncSession = Depends(get_db),
|
| 140 |
+
pipeline=Depends(get_pipeline),
|
| 141 |
+
graph=Depends(get_movement_graph),
|
| 142 |
+
):
|
| 143 |
+
"""Submit a frame via multipart file upload."""
|
| 144 |
+
content = await image.read()
|
| 145 |
+
b64 = base64.b64encode(content).decode()
|
| 146 |
+
# Reuse main endpoint logic via internal call
|
| 147 |
+
from routes.vision_routes import process_frame, FrameProcessRequest
|
| 148 |
+
req = FrameProcessRequest(
|
| 149 |
+
camera_id=camera_id,
|
| 150 |
+
image_b64=b64,
|
| 151 |
+
run_attributes=run_attributes,
|
| 152 |
+
run_reid=run_reid,
|
| 153 |
+
)
|
| 154 |
+
return await process_frame(req, db, pipeline, graph)
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# ── POST /analyze-video ──────────────────────────────────────────────────────
|
| 158 |
+
|
| 159 |
+
@router.post("/analyze-video", summary="Upload a video file and stream per-frame detection results via SSE")
|
| 160 |
+
async def analyze_video(
|
| 161 |
+
camera_id: str = Form("VIDEO-UPLOAD"),
|
| 162 |
+
frame_interval: int = Form(5),
|
| 163 |
+
run_attributes: bool = Form(True),
|
| 164 |
+
run_reid: bool = Form(True),
|
| 165 |
+
video: UploadFile = File(...),
|
| 166 |
+
pipeline=Depends(get_pipeline),
|
| 167 |
+
):
|
| 168 |
+
"""
|
| 169 |
+
Upload a video file. Frames are sampled every `frame_interval` frames.
|
| 170 |
+
Results are streamed back as Server-Sent Events (SSE).
|
| 171 |
+
|
| 172 |
+
SSE event format:
|
| 173 |
+
data: {"type": "frame", "frame_id": int, "timestamp_sec": float, ...}
|
| 174 |
+
data: {"type": "summary", "total_frames": int, "unique_persons": int, ...}
|
| 175 |
+
data: {"type": "error", "message": str}
|
| 176 |
+
"""
|
| 177 |
+
# Read video bytes into a temp file (cv2 needs a real path on Windows)
|
| 178 |
+
content = await video.read()
|
| 179 |
+
filename = video.filename or "uploaded_video.mp4"
|
| 180 |
+
suffix = os.path.splitext(filename)[1] or ".mp4"
|
| 181 |
+
|
| 182 |
+
# Pre-generate an ID and path for the thumbnail
|
| 183 |
+
session_id = uuid.uuid4()
|
| 184 |
+
thumb_filename = f"{session_id}.jpg"
|
| 185 |
+
thumb_path = os.path.join("static", "thumbnails", thumb_filename)
|
| 186 |
+
|
| 187 |
+
async def event_stream():
|
| 188 |
+
tmp_path = None
|
| 189 |
+
try:
|
| 190 |
+
import cv2
|
| 191 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
| 192 |
+
tmp.write(content)
|
| 193 |
+
tmp_path = tmp.name
|
| 194 |
+
|
| 195 |
+
cap = cv2.VideoCapture(tmp_path)
|
| 196 |
+
if not cap.isOpened():
|
| 197 |
+
yield f"data: {json.dumps({'type': 'error', 'message': 'Cannot open video file'})}\n\n"
|
| 198 |
+
return
|
| 199 |
+
|
| 200 |
+
fps_native = cap.get(cv2.CAP_PROP_FPS) or 25.0
|
| 201 |
+
total_raw = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 202 |
+
total_sampled = max(1, total_raw // max(1, frame_interval))
|
| 203 |
+
|
| 204 |
+
# Metadata handshake
|
| 205 |
+
yield f"data: {json.dumps({'type': 'meta', 'total_sampled': total_sampled, 'fps_native': fps_native, 'total_frames_raw': total_raw})}\n\n"
|
| 206 |
+
|
| 207 |
+
raw_idx = 0
|
| 208 |
+
processed_idx = 0
|
| 209 |
+
unique_ids = set()
|
| 210 |
+
peak_count = 0
|
| 211 |
+
t_video_start = time.time()
|
| 212 |
+
|
| 213 |
+
while True:
|
| 214 |
+
ret, frame = cap.read()
|
| 215 |
+
if not ret:
|
| 216 |
+
break
|
| 217 |
+
|
| 218 |
+
if raw_idx % max(1, frame_interval) == 0:
|
| 219 |
+
# Convert BGR → RGB → PIL
|
| 220 |
+
import numpy as np
|
| 221 |
+
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 222 |
+
pil_img = Image.fromarray(rgb)
|
| 223 |
+
|
| 224 |
+
# Save the very first processed frame as the session thumbnail
|
| 225 |
+
if processed_idx == 0:
|
| 226 |
+
pil_img.save(thumb_path, "JPEG", quality=85)
|
| 227 |
+
|
| 228 |
+
timestamp_sec = round(raw_idx / fps_native, 3)
|
| 229 |
+
|
| 230 |
+
try:
|
| 231 |
+
result = pipeline.process_frame(
|
| 232 |
+
image_input=pil_img,
|
| 233 |
+
camera_id=camera_id,
|
| 234 |
+
run_attributes=run_attributes,
|
| 235 |
+
run_reid=run_reid,
|
| 236 |
+
)
|
| 237 |
+
except Exception as e:
|
| 238 |
+
logger.warning(f"Frame {raw_idx} error: {e}")
|
| 239 |
+
raw_idx += 1
|
| 240 |
+
continue
|
| 241 |
+
|
| 242 |
+
persons = result.get("persons", [])
|
| 243 |
+
for p in persons:
|
| 244 |
+
pid = p.get("assigned_person_id")
|
| 245 |
+
if pid:
|
| 246 |
+
unique_ids.add(pid)
|
| 247 |
+
peak_count = max(peak_count, len(persons))
|
| 248 |
+
|
| 249 |
+
# Slim down persons payload for SSE
|
| 250 |
+
slim_persons = [
|
| 251 |
+
{
|
| 252 |
+
"track_id": p.get("track_id"),
|
| 253 |
+
"bbox": p.get("bbox"),
|
| 254 |
+
"score": round(p.get("score", 0), 3),
|
| 255 |
+
"is_new": p.get("is_new_person", False),
|
| 256 |
+
"attributes": p.get("attributes", {}),
|
| 257 |
+
"reid_sim": round(p["reid_matches"][0]["similarity"], 3)
|
| 258 |
+
if p.get("reid_matches") else None,
|
| 259 |
+
}
|
| 260 |
+
for p in persons
|
| 261 |
+
]
|
| 262 |
+
|
| 263 |
+
event = {
|
| 264 |
+
"type": "frame",
|
| 265 |
+
"frame_id": processed_idx,
|
| 266 |
+
"raw_frame": raw_idx,
|
| 267 |
+
"timestamp_sec": timestamp_sec,
|
| 268 |
+
"person_count": len(persons),
|
| 269 |
+
"persons": slim_persons,
|
| 270 |
+
"latency_ms": round(result["latency"]["total_ms"], 1),
|
| 271 |
+
"progress": round((processed_idx + 1) / total_sampled * 100, 1),
|
| 272 |
+
}
|
| 273 |
+
yield f"data: {json.dumps(event)}\n\n"
|
| 274 |
+
processed_idx += 1
|
| 275 |
+
|
| 276 |
+
# Yield control so FastAPI can flush the buffer
|
| 277 |
+
await asyncio.sleep(0)
|
| 278 |
+
|
| 279 |
+
raw_idx += 1
|
| 280 |
+
|
| 281 |
+
cap.release()
|
| 282 |
+
duration_sec = round(time.time() - t_video_start, 2)
|
| 283 |
+
|
| 284 |
+
summary = {
|
| 285 |
+
"type": "summary",
|
| 286 |
+
"total_frames_processed": processed_idx,
|
| 287 |
+
"unique_person_count": len(unique_ids),
|
| 288 |
+
"peak_person_count": peak_count,
|
| 289 |
+
"duration_sec": duration_sec,
|
| 290 |
+
"video_duration_sec": round(total_raw / fps_native, 2),
|
| 291 |
+
}
|
| 292 |
+
yield f"data: {json.dumps(summary)}\n\n"
|
| 293 |
+
logger.info(f"Video analysis done — {processed_idx} frames, {len(unique_ids)} unique persons")
|
| 294 |
+
|
| 295 |
+
# Save the session to the database
|
| 296 |
+
try:
|
| 297 |
+
# We need a fresh session here inside the generator
|
| 298 |
+
async with AsyncSessionLocal() as db:
|
| 299 |
+
new_session = AnalysisSession(
|
| 300 |
+
id=session_id,
|
| 301 |
+
video_filename=filename,
|
| 302 |
+
thumbnail_path=f"/static/thumbnails/{thumb_filename}",
|
| 303 |
+
duration_sec=duration_sec,
|
| 304 |
+
frames_processed=processed_idx,
|
| 305 |
+
unique_persons=len(unique_ids),
|
| 306 |
+
peak_count=peak_count,
|
| 307 |
+
)
|
| 308 |
+
db.add(new_session)
|
| 309 |
+
await db.commit()
|
| 310 |
+
except Exception as e:
|
| 311 |
+
logger.error(f"Failed to save session to DB: {e}")
|
| 312 |
+
|
| 313 |
+
except Exception as e:
|
| 314 |
+
logger.error(f"analyze_video error: {e}")
|
| 315 |
+
yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
|
| 316 |
+
finally:
|
| 317 |
+
if tmp_path and os.path.exists(tmp_path):
|
| 318 |
+
os.unlink(tmp_path)
|
| 319 |
+
|
| 320 |
+
return StreamingResponse(
|
| 321 |
+
event_stream(),
|
| 322 |
+
media_type="text/event-stream",
|
| 323 |
+
headers={
|
| 324 |
+
"Cache-Control": "no-cache",
|
| 325 |
+
"X-Accel-Buffering": "no",
|
| 326 |
+
"Connection": "keep-alive",
|
| 327 |
+
},
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
# ── GET /sessions ─────────────────────────────────────────────────────────────
|
| 332 |
+
|
| 333 |
+
@router.get("/sessions", summary="List historical video analysis sessions")
|
| 334 |
+
async def list_sessions(db: AsyncSession = Depends(get_db)):
|
| 335 |
+
from sqlalchemy.future import select
|
| 336 |
+
query = select(AnalysisSession).order_by(AnalysisSession.timestamp.desc())
|
| 337 |
+
result = await db.execute(query)
|
| 338 |
+
sessions = result.scalars().all()
|
| 339 |
+
|
| 340 |
+
return [
|
| 341 |
+
{
|
| 342 |
+
"id": str(s.id),
|
| 343 |
+
"timestamp": s.timestamp.isoformat(),
|
| 344 |
+
"video_filename": s.video_filename,
|
| 345 |
+
"thumbnail_path": s.thumbnail_path,
|
| 346 |
+
"duration_sec": s.duration_sec,
|
| 347 |
+
"frames_processed": s.frames_processed,
|
| 348 |
+
"unique_persons": s.unique_persons,
|
| 349 |
+
"peak_count": s.peak_count,
|
| 350 |
+
}
|
| 351 |
+
for s in sessions
|
| 352 |
+
]
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
# ── GET /cameras ──────────────────────────────────────────────────────────────
|
| 356 |
+
|
| 357 |
+
@router.get("/cameras", summary="List active cameras and their status")
|
| 358 |
+
async def list_cameras(pipeline=Depends(get_pipeline), graph=Depends(get_movement_graph)):
|
| 359 |
+
summary = graph.get_movement_summary()
|
| 360 |
+
trackers = list(pipeline.tracker_manager._trackers.keys())
|
| 361 |
+
fps_data = {cam: pipeline._compute_fps(cam) for cam in trackers}
|
| 362 |
+
return {
|
| 363 |
+
"active_cameras": trackers,
|
| 364 |
+
"fps_per_camera": fps_data,
|
| 365 |
+
"movement_graph_summary": summary,
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
# ── WebSocket /live-feed ──────────────────────────────────────────────────────
|
| 370 |
+
|
| 371 |
+
class ConnectionManager:
|
| 372 |
+
def __init__(self):
|
| 373 |
+
self.active_connections: list[WebSocket] = []
|
| 374 |
+
|
| 375 |
+
async def connect(self, ws: WebSocket):
|
| 376 |
+
await ws.accept()
|
| 377 |
+
self.active_connections.append(ws)
|
| 378 |
+
logger.info(f"WebSocket connected. Total: {len(self.active_connections)}")
|
| 379 |
+
|
| 380 |
+
def disconnect(self, ws: WebSocket):
|
| 381 |
+
self.active_connections.remove(ws)
|
| 382 |
+
logger.info(f"WebSocket disconnected. Total: {len(self.active_connections)}")
|
| 383 |
+
|
| 384 |
+
async def broadcast(self, data: dict):
|
| 385 |
+
import json
|
| 386 |
+
msg = json.dumps(data)
|
| 387 |
+
for conn in self.active_connections:
|
| 388 |
+
try:
|
| 389 |
+
await conn.send_text(msg)
|
| 390 |
+
except Exception:
|
| 391 |
+
pass
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
ws_manager = ConnectionManager()
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
@router.websocket("/live-feed")
|
| 398 |
+
async def live_feed_websocket(websocket: WebSocket, pipeline=Depends(get_pipeline)):
|
| 399 |
+
"""
|
| 400 |
+
WebSocket endpoint for realtime surveillance feed.
|
| 401 |
+
Client sends: {"camera_id": str, "image_b64": str}
|
| 402 |
+
Server broadcasts processed results to all connected clients.
|
| 403 |
+
"""
|
| 404 |
+
await ws_manager.connect(websocket)
|
| 405 |
+
try:
|
| 406 |
+
while True:
|
| 407 |
+
data = await websocket.receive_json()
|
| 408 |
+
camera_id = data.get("camera_id", "cam-0")
|
| 409 |
+
image_b64 = data.get("image_b64", "")
|
| 410 |
+
if not image_b64:
|
| 411 |
+
await websocket.send_json({"error": "No image provided"})
|
| 412 |
+
continue
|
| 413 |
+
result = pipeline.process_frame(
|
| 414 |
+
image_input=image_b64,
|
| 415 |
+
camera_id=camera_id,
|
| 416 |
+
run_attributes=data.get("run_attributes", False),
|
| 417 |
+
run_reid=data.get("run_reid", True),
|
| 418 |
+
)
|
| 419 |
+
await websocket.send_json(result)
|
| 420 |
+
except WebSocketDisconnect:
|
| 421 |
+
ws_manager.disconnect(websocket)
|
static/thumbnails/4f42294d-cdc1-4c64-abae-71a2891167b2.jpg
ADDED
|
|
vision/__init__.py
ADDED
|
File without changes
|
vision/attributes.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
vision/attributes.py - CLIP-based Zero-Shot Clothing & Attribute Recognition
|
| 3 |
+
"""
|
| 4 |
+
import time
|
| 5 |
+
import torch
|
| 6 |
+
import numpy as np
|
| 7 |
+
import faiss
|
| 8 |
+
import os
|
| 9 |
+
from PIL import Image
|
| 10 |
+
from typing import List, Dict, Tuple, Optional
|
| 11 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 12 |
+
from loguru import logger
|
| 13 |
+
from config import settings, DEVICE, FAISS_DIR
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# Attribute taxonomies for zero-shot classification
|
| 17 |
+
CLOTHING_LABELS = [
|
| 18 |
+
"wearing a red shirt", "wearing a blue shirt", "wearing a white shirt",
|
| 19 |
+
"wearing a black shirt", "wearing a yellow jacket", "wearing a green jacket",
|
| 20 |
+
"wearing jeans", "wearing formal trousers", "wearing shorts", "wearing a dress",
|
| 21 |
+
"wearing a hoodie", "wearing a suit", "wearing a uniform", "wearing a coat",
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
COLOR_LABELS = [
|
| 25 |
+
"person in red clothing", "person in blue clothing", "person in black clothing",
|
| 26 |
+
"person in white clothing", "person in gray clothing", "person in green clothing",
|
| 27 |
+
"person in yellow clothing", "person in orange clothing", "person in brown clothing",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
GENDER_LABELS = ["a male person", "a female person"]
|
| 31 |
+
|
| 32 |
+
ACCESSORY_LABELS = [
|
| 33 |
+
"wearing a backpack", "carrying a bag", "wearing a hat", "wearing sunglasses",
|
| 34 |
+
"carrying an umbrella", "wearing a mask", "no accessories",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
AGE_LABELS = [
|
| 38 |
+
"a child person", "a teenager person", "a young adult person",
|
| 39 |
+
"a middle-aged person", "an elderly person",
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class AttributeRecognizer:
|
| 44 |
+
"""
|
| 45 |
+
Zero-shot attribute recognition using CLIP.
|
| 46 |
+
Generates structured attribute dict and CLIP visual embeddings per person.
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
ATTR_INDEX_FILE = str(FAISS_DIR / "attr_index.faiss")
|
| 50 |
+
ATTR_META_FILE = str(FAISS_DIR / "attr_meta.npy")
|
| 51 |
+
|
| 52 |
+
def __init__(self):
|
| 53 |
+
logger.info(f"Loading CLIP model: {settings.CLIP_MODEL}")
|
| 54 |
+
self.processor = CLIPProcessor.from_pretrained(settings.CLIP_MODEL)
|
| 55 |
+
self.model = CLIPModel.from_pretrained(settings.CLIP_MODEL)
|
| 56 |
+
self.model.to(DEVICE)
|
| 57 |
+
self.model.eval()
|
| 58 |
+
|
| 59 |
+
self.dim = settings.CLIP_EMBEDDING_DIM
|
| 60 |
+
self.index = self._load_or_create_index()
|
| 61 |
+
self.meta: List[Dict] = self._load_meta()
|
| 62 |
+
logger.info(f"✅ AttributeRecognizer ready. FAISS attr index size: {self.index.ntotal}")
|
| 63 |
+
|
| 64 |
+
def _load_or_create_index(self):
|
| 65 |
+
if os.path.exists(self.ATTR_INDEX_FILE):
|
| 66 |
+
return faiss.read_index(self.ATTR_INDEX_FILE)
|
| 67 |
+
return faiss.IndexFlatIP(self.dim)
|
| 68 |
+
|
| 69 |
+
def _load_meta(self) -> List[Dict]:
|
| 70 |
+
if os.path.exists(self.ATTR_META_FILE):
|
| 71 |
+
return list(np.load(self.ATTR_META_FILE, allow_pickle=True))
|
| 72 |
+
return []
|
| 73 |
+
|
| 74 |
+
def save(self):
|
| 75 |
+
faiss.write_index(self.index, self.ATTR_INDEX_FILE)
|
| 76 |
+
np.save(self.ATTR_META_FILE, np.array(self.meta, dtype=object))
|
| 77 |
+
|
| 78 |
+
@torch.inference_mode()
|
| 79 |
+
def _classify(self, image: Image.Image, labels: List[str]) -> List[Tuple[str, float]]:
|
| 80 |
+
"""Run zero-shot CLIP classification. Returns sorted (label, prob) list."""
|
| 81 |
+
inputs = self.processor(
|
| 82 |
+
text=labels, images=image, return_tensors="pt", padding=True
|
| 83 |
+
)
|
| 84 |
+
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 85 |
+
outputs = self.model(**inputs)
|
| 86 |
+
logits = outputs.logits_per_image[0]
|
| 87 |
+
probs = torch.softmax(logits, dim=0).cpu().numpy()
|
| 88 |
+
return sorted(zip(labels, probs.tolist()), key=lambda x: -x[1])
|
| 89 |
+
|
| 90 |
+
@torch.inference_mode()
|
| 91 |
+
def extract_visual_embedding(self, image: Image.Image) -> np.ndarray:
|
| 92 |
+
"""Extract L2-normalized CLIP visual embedding."""
|
| 93 |
+
inputs = self.processor(images=image, return_tensors="pt")
|
| 94 |
+
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 95 |
+
features = self.model.get_image_features(**inputs)
|
| 96 |
+
features = features / features.norm(dim=-1, keepdim=True)
|
| 97 |
+
return features.cpu().numpy().astype(np.float32)
|
| 98 |
+
|
| 99 |
+
def recognize(self, image: Image.Image) -> Dict:
|
| 100 |
+
"""
|
| 101 |
+
Run all attribute classifiers on a cropped person image.
|
| 102 |
+
|
| 103 |
+
Returns:
|
| 104 |
+
{
|
| 105 |
+
"clothing": [{"label": str, "confidence": float}],
|
| 106 |
+
"color": str,
|
| 107 |
+
"gender": str,
|
| 108 |
+
"accessories": [str],
|
| 109 |
+
"age_group": str,
|
| 110 |
+
}
|
| 111 |
+
"""
|
| 112 |
+
t0 = time.perf_counter()
|
| 113 |
+
|
| 114 |
+
clothing_results = self._classify(image, CLOTHING_LABELS)
|
| 115 |
+
color_results = self._classify(image, COLOR_LABELS)
|
| 116 |
+
gender_results = self._classify(image, GENDER_LABELS)
|
| 117 |
+
accessory_results = self._classify(image, ACCESSORY_LABELS)
|
| 118 |
+
age_results = self._classify(image, AGE_LABELS)
|
| 119 |
+
|
| 120 |
+
latency = (time.perf_counter() - t0) * 1000
|
| 121 |
+
|
| 122 |
+
attributes = {
|
| 123 |
+
"clothing": [
|
| 124 |
+
{"label": l, "confidence": round(p, 4)}
|
| 125 |
+
for l, p in clothing_results[:3]
|
| 126 |
+
if p > 0.1
|
| 127 |
+
],
|
| 128 |
+
"color": color_results[0][0].replace("person in ", "").replace(" clothing", "") if color_results else "unknown",
|
| 129 |
+
"gender": gender_results[0][0].replace("a ", "").replace(" person", "") if gender_results else "unknown",
|
| 130 |
+
"accessories": [l for l, p in accessory_results if p > 0.3 and "no accessories" not in l],
|
| 131 |
+
"age_group": age_results[0][0].replace("a ", "").replace(" person", "") if age_results else "unknown",
|
| 132 |
+
"inference_ms": round(latency, 2),
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
logger.debug(f"Attributes recognized in {latency:.1f}ms")
|
| 136 |
+
return attributes
|
| 137 |
+
|
| 138 |
+
def add_to_gallery(self, image: Image.Image, person_id: str) -> int:
|
| 139 |
+
"""Store CLIP visual embedding in FAISS for attribute-based search."""
|
| 140 |
+
embedding = self.extract_visual_embedding(image)
|
| 141 |
+
faiss_id = self.index.ntotal
|
| 142 |
+
self.index.add(embedding)
|
| 143 |
+
self.meta.append({"person_id": person_id, "faiss_id": faiss_id})
|
| 144 |
+
self.save()
|
| 145 |
+
return faiss_id
|
| 146 |
+
|
| 147 |
+
def search_by_attribute_query(self, text_query: str, top_k: int = 10) -> List[Dict]:
|
| 148 |
+
"""Search gallery using a natural language attribute query."""
|
| 149 |
+
if self.index.ntotal == 0:
|
| 150 |
+
return []
|
| 151 |
+
inputs = self.processor(text=[text_query], return_tensors="pt", padding=True)
|
| 152 |
+
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 153 |
+
with torch.inference_mode():
|
| 154 |
+
text_features = self.model.get_text_features(**inputs)
|
| 155 |
+
text_features = text_features / text_features.norm(dim=-1, keepdim=True)
|
| 156 |
+
query = text_features.cpu().numpy().astype(np.float32)
|
| 157 |
+
k = min(top_k, self.index.ntotal)
|
| 158 |
+
distances, indices = self.index.search(query, k)
|
| 159 |
+
return [
|
| 160 |
+
{"person_id": self.meta[idx]["person_id"], "similarity": round(float(dist), 4)}
|
| 161 |
+
for dist, idx in zip(distances[0], indices[0])
|
| 162 |
+
if idx != -1
|
| 163 |
+
]
|
vision/detector.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
vision/detector.py - Person Detection using facebook/detr-resnet-50
|
| 3 |
+
"""
|
| 4 |
+
import time
|
| 5 |
+
import torch
|
| 6 |
+
import numpy as np
|
| 7 |
+
from PIL import Image
|
| 8 |
+
from typing import List, Dict, Tuple, Optional
|
| 9 |
+
from transformers import DetrImageProcessor, DetrForObjectDetection
|
| 10 |
+
from loguru import logger
|
| 11 |
+
from config import settings, DEVICE
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class PersonDetector:
|
| 15 |
+
"""
|
| 16 |
+
DETR-based person detector.
|
| 17 |
+
Returns bounding boxes, confidence scores, and processing latency.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
PERSON_LABEL = "person"
|
| 21 |
+
COCO_LABEL_MAP = None # populated after model loads
|
| 22 |
+
|
| 23 |
+
def __init__(self):
|
| 24 |
+
logger.info(f"Loading detection model: {settings.DETECTION_MODEL}")
|
| 25 |
+
self.processor = DetrImageProcessor.from_pretrained(settings.DETECTION_MODEL)
|
| 26 |
+
self.model = DetrForObjectDetection.from_pretrained(settings.DETECTION_MODEL)
|
| 27 |
+
self.model.to(DEVICE)
|
| 28 |
+
self.model.eval()
|
| 29 |
+
|
| 30 |
+
# Build label → id map
|
| 31 |
+
self.id2label = self.model.config.id2label
|
| 32 |
+
self.person_label_ids = [
|
| 33 |
+
k for k, v in self.id2label.items() if v.lower() == self.PERSON_LABEL
|
| 34 |
+
]
|
| 35 |
+
logger.info(f"✅ PersonDetector ready on {DEVICE}. Person class ids: {self.person_label_ids}")
|
| 36 |
+
|
| 37 |
+
@torch.inference_mode()
|
| 38 |
+
def detect(
|
| 39 |
+
self,
|
| 40 |
+
image: Image.Image,
|
| 41 |
+
confidence_threshold: Optional[float] = None,
|
| 42 |
+
) -> Tuple[List[Dict], float]:
|
| 43 |
+
"""
|
| 44 |
+
Detect persons in a PIL image.
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
detections: list of {"bbox": [x1,y1,x2,y2], "score": float, "label": "person"}
|
| 48 |
+
latency_ms: inference time in milliseconds
|
| 49 |
+
"""
|
| 50 |
+
threshold = confidence_threshold or settings.DETECTION_CONFIDENCE
|
| 51 |
+
|
| 52 |
+
t0 = time.perf_counter()
|
| 53 |
+
inputs = self.processor(images=image, return_tensors="pt")
|
| 54 |
+
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 55 |
+
outputs = self.model(**inputs)
|
| 56 |
+
t1 = time.perf_counter()
|
| 57 |
+
latency_ms = (t1 - t0) * 1000
|
| 58 |
+
|
| 59 |
+
# Post-process to original image size
|
| 60 |
+
target_sizes = torch.tensor([image.size[::-1]], device=DEVICE) # (H, W)
|
| 61 |
+
results = self.processor.post_process_object_detection(
|
| 62 |
+
outputs, threshold=threshold, target_sizes=target_sizes
|
| 63 |
+
)[0]
|
| 64 |
+
|
| 65 |
+
detections = []
|
| 66 |
+
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
|
| 67 |
+
label_id = label.item()
|
| 68 |
+
if label_id in self.person_label_ids:
|
| 69 |
+
x1, y1, x2, y2 = box.tolist()
|
| 70 |
+
detections.append({
|
| 71 |
+
"bbox": [round(x1, 2), round(y1, 2), round(x2, 2), round(y2, 2)],
|
| 72 |
+
"score": round(score.item(), 4),
|
| 73 |
+
"label": "person",
|
| 74 |
+
})
|
| 75 |
+
|
| 76 |
+
logger.debug(f"Detected {len(detections)} persons in {latency_ms:.1f}ms")
|
| 77 |
+
return detections, latency_ms
|
| 78 |
+
|
| 79 |
+
def detect_batch(
|
| 80 |
+
self,
|
| 81 |
+
images: List[Image.Image],
|
| 82 |
+
confidence_threshold: Optional[float] = None,
|
| 83 |
+
) -> List[Tuple[List[Dict], float]]:
|
| 84 |
+
"""Batch detection for multiple frames."""
|
| 85 |
+
return [self.detect(img, confidence_threshold) for img in images]
|
| 86 |
+
|
| 87 |
+
@staticmethod
|
| 88 |
+
def crop_person(image: Image.Image, bbox: List[float]) -> Image.Image:
|
| 89 |
+
"""Crop a person region from image given bbox [x1, y1, x2, y2]."""
|
| 90 |
+
x1, y1, x2, y2 = [int(v) for v in bbox]
|
| 91 |
+
return image.crop((x1, y1, x2, y2))
|
vision/pipeline.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
vision/pipeline.py - Full Vision Inference Pipeline
|
| 3 |
+
Orchestrates detection → tracking → ReID → attribute recognition per frame
|
| 4 |
+
"""
|
| 5 |
+
import time
|
| 6 |
+
import base64
|
| 7 |
+
import numpy as np
|
| 8 |
+
import uuid
|
| 9 |
+
from io import BytesIO
|
| 10 |
+
from PIL import Image
|
| 11 |
+
from typing import Dict, List, Optional, Any
|
| 12 |
+
from loguru import logger
|
| 13 |
+
|
| 14 |
+
from vision.detector import PersonDetector
|
| 15 |
+
from vision.tracker import TrackerManager
|
| 16 |
+
from vision.reid import PersonReID
|
| 17 |
+
from vision.attributes import AttributeRecognizer
|
| 18 |
+
from config import settings
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class VisionPipeline:
|
| 22 |
+
"""
|
| 23 |
+
End-to-end vision pipeline for a single frame from a camera.
|
| 24 |
+
Components initialized lazily (singletons shared across requests).
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(self):
|
| 28 |
+
logger.info("Initializing VisionPipeline...")
|
| 29 |
+
self.detector = PersonDetector()
|
| 30 |
+
self.tracker_manager = TrackerManager()
|
| 31 |
+
self.reid = PersonReID()
|
| 32 |
+
self.attributes = AttributeRecognizer()
|
| 33 |
+
self._frame_counts: Dict[str, int] = {}
|
| 34 |
+
self._fps_timers: Dict[str, List[float]] = {}
|
| 35 |
+
logger.info("✅ VisionPipeline ready.")
|
| 36 |
+
|
| 37 |
+
def _decode_image(self, image_input: Any) -> Image.Image:
|
| 38 |
+
"""Accept PIL Image, numpy array, bytes, or base64 string."""
|
| 39 |
+
if isinstance(image_input, Image.Image):
|
| 40 |
+
return image_input.convert("RGB")
|
| 41 |
+
if isinstance(image_input, np.ndarray):
|
| 42 |
+
return Image.fromarray(image_input).convert("RGB")
|
| 43 |
+
if isinstance(image_input, bytes):
|
| 44 |
+
return Image.open(BytesIO(image_input)).convert("RGB")
|
| 45 |
+
if isinstance(image_input, str):
|
| 46 |
+
# base64 encoded
|
| 47 |
+
data = base64.b64decode(image_input)
|
| 48 |
+
return Image.open(BytesIO(data)).convert("RGB")
|
| 49 |
+
raise ValueError(f"Unsupported image type: {type(image_input)}")
|
| 50 |
+
|
| 51 |
+
def _compute_fps(self, camera_id: str) -> float:
|
| 52 |
+
"""Compute rolling FPS over last 30 frames."""
|
| 53 |
+
now = time.perf_counter()
|
| 54 |
+
if camera_id not in self._fps_timers:
|
| 55 |
+
self._fps_timers[camera_id] = []
|
| 56 |
+
self._fps_timers[camera_id].append(now)
|
| 57 |
+
if len(self._fps_timers[camera_id]) > 30:
|
| 58 |
+
self._fps_timers[camera_id].pop(0)
|
| 59 |
+
times = self._fps_timers[camera_id]
|
| 60 |
+
if len(times) < 2:
|
| 61 |
+
return 0.0
|
| 62 |
+
return round((len(times) - 1) / (times[-1] - times[0]), 2)
|
| 63 |
+
|
| 64 |
+
def process_frame(
|
| 65 |
+
self,
|
| 66 |
+
image_input: Any,
|
| 67 |
+
camera_id: str,
|
| 68 |
+
run_attributes: bool = True,
|
| 69 |
+
run_reid: bool = True,
|
| 70 |
+
reid_threshold: float = 0.85,
|
| 71 |
+
) -> Dict:
|
| 72 |
+
"""
|
| 73 |
+
Full pipeline for a single frame.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
image_input: PIL Image | numpy array | bytes | base64 str
|
| 77 |
+
camera_id: unique camera identifier
|
| 78 |
+
run_attributes: whether to run CLIP attribute recognition
|
| 79 |
+
run_reid: whether to run ReID matching
|
| 80 |
+
reid_threshold: cosine similarity threshold for ReID
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
Result dict with detections, tracks, reid matches, attributes, and latency breakdown.
|
| 84 |
+
"""
|
| 85 |
+
t_start = time.perf_counter()
|
| 86 |
+
|
| 87 |
+
# 1. Decode image
|
| 88 |
+
image = self._decode_image(image_input)
|
| 89 |
+
w, h = image.size
|
| 90 |
+
|
| 91 |
+
# 2. Detection
|
| 92 |
+
detections, det_ms = self.detector.detect(image)
|
| 93 |
+
|
| 94 |
+
# 3. Tracking
|
| 95 |
+
t_track = time.perf_counter()
|
| 96 |
+
tracks = self.tracker_manager.update(camera_id, detections)
|
| 97 |
+
track_ms = (time.perf_counter() - t_track) * 1000
|
| 98 |
+
|
| 99 |
+
# 4. Per-person: ReID + Attributes
|
| 100 |
+
persons_data = []
|
| 101 |
+
for track in tracks:
|
| 102 |
+
bbox = track["bbox"]
|
| 103 |
+
# Crop person region
|
| 104 |
+
try:
|
| 105 |
+
crop = PersonDetector.crop_person(image, bbox)
|
| 106 |
+
if crop.width < 10 or crop.height < 10:
|
| 107 |
+
continue
|
| 108 |
+
except Exception:
|
| 109 |
+
continue
|
| 110 |
+
|
| 111 |
+
person_entry: Dict = {
|
| 112 |
+
"track_id": track["track_id"],
|
| 113 |
+
"bbox": bbox,
|
| 114 |
+
"score": track["score"],
|
| 115 |
+
"camera_id": camera_id,
|
| 116 |
+
"reid_matches": [],
|
| 117 |
+
"attributes": {},
|
| 118 |
+
"is_new_person": False,
|
| 119 |
+
"assigned_person_id": None,
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
# 4a. ReID — try to match against gallery
|
| 123 |
+
if run_reid:
|
| 124 |
+
t_reid = time.perf_counter()
|
| 125 |
+
reid_matches = self.reid.search(crop, top_k=3, similarity_threshold=reid_threshold)
|
| 126 |
+
person_entry["reid_matches"] = reid_matches
|
| 127 |
+
person_entry["reid_ms"] = round((time.perf_counter() - t_reid) * 1000, 2)
|
| 128 |
+
|
| 129 |
+
if reid_matches:
|
| 130 |
+
person_entry["assigned_person_id"] = reid_matches[0]["person_id"]
|
| 131 |
+
else:
|
| 132 |
+
# New person — register in gallery with temporary UUID
|
| 133 |
+
new_pid = str(uuid.uuid4())
|
| 134 |
+
faiss_id = self.reid.add_person(crop, new_pid, camera_id)
|
| 135 |
+
person_entry["assigned_person_id"] = new_pid
|
| 136 |
+
person_entry["is_new_person"] = True
|
| 137 |
+
person_entry["faiss_id"] = faiss_id
|
| 138 |
+
|
| 139 |
+
import os
|
| 140 |
+
os.makedirs("static/thumbnails", exist_ok=True)
|
| 141 |
+
try:
|
| 142 |
+
crop.save(f"static/thumbnails/{new_pid}.jpg", "JPEG", quality=85)
|
| 143 |
+
except Exception as e:
|
| 144 |
+
logger.warning(f"Failed to save thumbnail for {new_pid}: {e}")
|
| 145 |
+
|
| 146 |
+
# 4b. Attribute recognition
|
| 147 |
+
if run_attributes:
|
| 148 |
+
t_attr = time.perf_counter()
|
| 149 |
+
attrs = self.attributes.recognize(crop)
|
| 150 |
+
person_entry["attributes"] = attrs
|
| 151 |
+
person_entry["attr_ms"] = round((time.perf_counter() - t_attr) * 1000, 2)
|
| 152 |
+
|
| 153 |
+
# Also store visual embedding for attribute-based search
|
| 154 |
+
if run_reid:
|
| 155 |
+
self.attributes.add_to_gallery(crop, person_entry["assigned_person_id"])
|
| 156 |
+
|
| 157 |
+
persons_data.append(person_entry)
|
| 158 |
+
|
| 159 |
+
total_ms = (time.perf_counter() - t_start) * 1000
|
| 160 |
+
fps = self._compute_fps(camera_id)
|
| 161 |
+
self._frame_counts[camera_id] = self._frame_counts.get(camera_id, 0) + 1
|
| 162 |
+
|
| 163 |
+
return {
|
| 164 |
+
"camera_id": camera_id,
|
| 165 |
+
"frame_id": self._frame_counts[camera_id],
|
| 166 |
+
"image_size": {"width": w, "height": h},
|
| 167 |
+
"persons": persons_data,
|
| 168 |
+
"person_count": len(persons_data),
|
| 169 |
+
"detection_count": len(detections),
|
| 170 |
+
"latency": {
|
| 171 |
+
"detection_ms": round(det_ms, 2),
|
| 172 |
+
"tracking_ms": round(track_ms, 2),
|
| 173 |
+
"total_ms": round(total_ms, 2),
|
| 174 |
+
},
|
| 175 |
+
"fps": fps,
|
| 176 |
+
"timestamp": time.time(),
|
| 177 |
+
}
|
vision/reid.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
vision/reid.py - Cross-Camera Person Re-Identification using ViT + FAISS
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import time
|
| 6 |
+
import numpy as np
|
| 7 |
+
import faiss
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn.functional as F
|
| 10 |
+
from PIL import Image
|
| 11 |
+
from typing import List, Dict, Optional, Tuple
|
| 12 |
+
from transformers import ViTImageProcessor, ViTModel
|
| 13 |
+
from loguru import logger
|
| 14 |
+
from config import settings, DEVICE, FAISS_DIR
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class PersonReID:
|
| 18 |
+
"""
|
| 19 |
+
Person Re-Identification using google/vit-base-patch16-224 embeddings.
|
| 20 |
+
Embeddings are stored in a FAISS IndexFlatIP (inner product = cosine after normalization).
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
INDEX_FILE = str(FAISS_DIR / "reid_index.faiss")
|
| 24 |
+
META_FILE = str(FAISS_DIR / "reid_meta.npy")
|
| 25 |
+
|
| 26 |
+
def __init__(self):
|
| 27 |
+
logger.info(f"Loading ReID model: {settings.REID_MODEL}")
|
| 28 |
+
self.processor = ViTImageProcessor.from_pretrained(settings.REID_MODEL)
|
| 29 |
+
self.model = ViTModel.from_pretrained(settings.REID_MODEL)
|
| 30 |
+
self.model.to(DEVICE)
|
| 31 |
+
self.model.eval()
|
| 32 |
+
|
| 33 |
+
self.dim = settings.REID_EMBEDDING_DIM
|
| 34 |
+
self.index = self._load_or_create_index()
|
| 35 |
+
# meta list: maps faiss internal id (row index) → {"person_id": str, "camera_id": str}
|
| 36 |
+
self.meta: List[Dict] = self._load_meta()
|
| 37 |
+
logger.info(f"✅ ReID ready. FAISS index size: {self.index.ntotal}")
|
| 38 |
+
|
| 39 |
+
# ── Index Management ──────────────────────────────────────────────────────
|
| 40 |
+
|
| 41 |
+
def _load_or_create_index(self) -> faiss.IndexFlatIP:
|
| 42 |
+
if os.path.exists(self.INDEX_FILE):
|
| 43 |
+
logger.info("Loading existing FAISS ReID index.")
|
| 44 |
+
return faiss.read_index(self.INDEX_FILE)
|
| 45 |
+
logger.info("Creating new FAISS ReID index (IndexFlatIP).")
|
| 46 |
+
return faiss.IndexFlatIP(self.dim)
|
| 47 |
+
|
| 48 |
+
def _load_meta(self) -> List[Dict]:
|
| 49 |
+
if os.path.exists(self.META_FILE):
|
| 50 |
+
data = np.load(self.META_FILE, allow_pickle=True)
|
| 51 |
+
return list(data)
|
| 52 |
+
return []
|
| 53 |
+
|
| 54 |
+
def save(self):
|
| 55 |
+
faiss.write_index(self.index, self.INDEX_FILE)
|
| 56 |
+
np.save(self.META_FILE, np.array(self.meta, dtype=object))
|
| 57 |
+
logger.debug("FAISS ReID index saved.")
|
| 58 |
+
|
| 59 |
+
# ── Embedding Extraction ──────────────────────────────────────────────────
|
| 60 |
+
|
| 61 |
+
@torch.inference_mode()
|
| 62 |
+
def extract_embedding(self, image: Image.Image) -> np.ndarray:
|
| 63 |
+
"""Extract L2-normalized ViT CLS token embedding from a cropped person image."""
|
| 64 |
+
inputs = self.processor(images=image, return_tensors="pt")
|
| 65 |
+
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 66 |
+
outputs = self.model(**inputs)
|
| 67 |
+
# CLS token → (1, hidden_size)
|
| 68 |
+
cls = outputs.last_hidden_state[:, 0, :]
|
| 69 |
+
# L2 normalize for cosine similarity via inner product
|
| 70 |
+
embedding = F.normalize(cls, p=2, dim=-1).cpu().numpy().astype(np.float32)
|
| 71 |
+
return embedding # shape: (1, 768)
|
| 72 |
+
|
| 73 |
+
# ── Gallery Operations ────────────────────────────────────────────────────
|
| 74 |
+
|
| 75 |
+
def add_person(self, image: Image.Image, person_id: str, camera_id: str) -> int:
|
| 76 |
+
"""Add a new person embedding to the FAISS gallery. Returns faiss_id."""
|
| 77 |
+
embedding = self.extract_embedding(image)
|
| 78 |
+
faiss_id = self.index.ntotal
|
| 79 |
+
self.index.add(embedding)
|
| 80 |
+
self.meta.append({"person_id": person_id, "camera_id": camera_id, "faiss_id": faiss_id})
|
| 81 |
+
self.save()
|
| 82 |
+
return faiss_id
|
| 83 |
+
|
| 84 |
+
def search(
|
| 85 |
+
self,
|
| 86 |
+
image: Image.Image,
|
| 87 |
+
top_k: int = 5,
|
| 88 |
+
similarity_threshold: float = 0.85,
|
| 89 |
+
) -> List[Dict]:
|
| 90 |
+
"""
|
| 91 |
+
Search gallery for matching persons.
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
list of {"person_id": str, "camera_id": str, "similarity": float, "faiss_id": int}
|
| 95 |
+
"""
|
| 96 |
+
if self.index.ntotal == 0:
|
| 97 |
+
return []
|
| 98 |
+
|
| 99 |
+
t0 = time.perf_counter()
|
| 100 |
+
query = self.extract_embedding(image)
|
| 101 |
+
k = min(top_k, self.index.ntotal)
|
| 102 |
+
distances, indices = self.index.search(query, k)
|
| 103 |
+
latency = (time.perf_counter() - t0) * 1000
|
| 104 |
+
|
| 105 |
+
results = []
|
| 106 |
+
for dist, idx in zip(distances[0], indices[0]):
|
| 107 |
+
if idx == -1:
|
| 108 |
+
continue
|
| 109 |
+
similarity = float(dist)
|
| 110 |
+
if similarity >= similarity_threshold:
|
| 111 |
+
meta = self.meta[idx]
|
| 112 |
+
results.append({
|
| 113 |
+
"person_id": meta["person_id"],
|
| 114 |
+
"camera_id": meta["camera_id"],
|
| 115 |
+
"similarity": round(similarity, 4),
|
| 116 |
+
"faiss_id": int(idx),
|
| 117 |
+
})
|
| 118 |
+
|
| 119 |
+
logger.debug(f"ReID search: {len(results)} matches in {latency:.1f}ms")
|
| 120 |
+
return results
|
| 121 |
+
|
| 122 |
+
def search_by_embedding(
|
| 123 |
+
self,
|
| 124 |
+
embedding: np.ndarray,
|
| 125 |
+
top_k: int = 5,
|
| 126 |
+
similarity_threshold: float = 0.85,
|
| 127 |
+
) -> List[Dict]:
|
| 128 |
+
"""Direct search with a precomputed embedding."""
|
| 129 |
+
if self.index.ntotal == 0:
|
| 130 |
+
return []
|
| 131 |
+
k = min(top_k, self.index.ntotal)
|
| 132 |
+
distances, indices = self.index.search(embedding, k)
|
| 133 |
+
results = []
|
| 134 |
+
for dist, idx in zip(distances[0], indices[0]):
|
| 135 |
+
if idx == -1 or float(dist) < similarity_threshold:
|
| 136 |
+
continue
|
| 137 |
+
meta = self.meta[idx]
|
| 138 |
+
results.append({
|
| 139 |
+
"person_id": meta["person_id"],
|
| 140 |
+
"camera_id": meta["camera_id"],
|
| 141 |
+
"similarity": round(float(dist), 4),
|
| 142 |
+
"faiss_id": int(idx),
|
| 143 |
+
})
|
| 144 |
+
return results
|
vision/stream_manager.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import threading
|
| 3 |
+
import time
|
| 4 |
+
import io
|
| 5 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 6 |
+
from loguru import logger
|
| 7 |
+
|
| 8 |
+
class StreamManager:
|
| 9 |
+
"""
|
| 10 |
+
Manages background threads that constantly pull frames from RTSP streams or local webcams,
|
| 11 |
+
run the vision pipeline, and store annotated JPEG bytes for MJPEG streaming.
|
| 12 |
+
"""
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self.streams = {} # camera_id -> dict with thread info
|
| 15 |
+
self.frames = {} # camera_id -> latest JPEG bytes
|
| 16 |
+
self.results = {} # camera_id -> latest inference result dict
|
| 17 |
+
self.running = True
|
| 18 |
+
|
| 19 |
+
def add_stream(self, camera_id: str, source: str):
|
| 20 |
+
"""Add a new camera stream"""
|
| 21 |
+
if camera_id in self.streams:
|
| 22 |
+
logger.info(f"Stream {camera_id} is already running.")
|
| 23 |
+
return
|
| 24 |
+
|
| 25 |
+
# If source is just a digit like "0", handle it as an int for local webcam
|
| 26 |
+
if source.isdigit():
|
| 27 |
+
source = int(source)
|
| 28 |
+
|
| 29 |
+
logger.info(f"Adding stream {camera_id} from {source}")
|
| 30 |
+
thread = threading.Thread(target=self._stream_loop, args=(camera_id, source), daemon=True)
|
| 31 |
+
self.streams[camera_id] = {
|
| 32 |
+
"thread": thread,
|
| 33 |
+
"source": source,
|
| 34 |
+
"active": True
|
| 35 |
+
}
|
| 36 |
+
thread.start()
|
| 37 |
+
|
| 38 |
+
def remove_stream(self, camera_id: str):
|
| 39 |
+
if camera_id in self.streams:
|
| 40 |
+
logger.info(f"Removing stream {camera_id}")
|
| 41 |
+
self.streams[camera_id]["active"] = False
|
| 42 |
+
del self.streams[camera_id]
|
| 43 |
+
|
| 44 |
+
def _stream_loop(self, camera_id: str, source):
|
| 45 |
+
# Import inside the loop to avoid circular import issues if imported from app.py
|
| 46 |
+
from app import vision_pipeline
|
| 47 |
+
|
| 48 |
+
cap = cv2.VideoCapture(source)
|
| 49 |
+
if not cap.isOpened():
|
| 50 |
+
logger.error(f"Failed to open Stream: {camera_id} -> {source}")
|
| 51 |
+
self.remove_stream(camera_id)
|
| 52 |
+
return
|
| 53 |
+
|
| 54 |
+
fps_native = cap.get(cv2.CAP_PROP_FPS) or 25.0
|
| 55 |
+
delay = 1.0 / max(1, fps_native)
|
| 56 |
+
|
| 57 |
+
logger.info(f"Stream {camera_id} connected. Target FPS: {fps_native}")
|
| 58 |
+
|
| 59 |
+
while self.streams.get(camera_id, {}).get("active", False) and self.running:
|
| 60 |
+
start_t = time.perf_counter()
|
| 61 |
+
ret, frame = cap.read()
|
| 62 |
+
|
| 63 |
+
if not ret:
|
| 64 |
+
logger.warning(f"Stream {camera_id} disconnected. Attempting reconnect...")
|
| 65 |
+
time.sleep(2)
|
| 66 |
+
cap = cv2.VideoCapture(source)
|
| 67 |
+
continue
|
| 68 |
+
|
| 69 |
+
# Convert to PIL
|
| 70 |
+
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 71 |
+
pil_img = Image.fromarray(rgb_frame)
|
| 72 |
+
|
| 73 |
+
# ── Inference ──
|
| 74 |
+
has_result = False
|
| 75 |
+
result_data = None
|
| 76 |
+
try:
|
| 77 |
+
if vision_pipeline:
|
| 78 |
+
result_data = vision_pipeline.process_frame(
|
| 79 |
+
image_input=pil_img,
|
| 80 |
+
camera_id=camera_id,
|
| 81 |
+
run_attributes=True,
|
| 82 |
+
run_reid=True
|
| 83 |
+
)
|
| 84 |
+
self.results[camera_id] = result_data
|
| 85 |
+
has_result = True
|
| 86 |
+
except Exception as e:
|
| 87 |
+
logger.error(f"Inference error on stream {camera_id}: {e}")
|
| 88 |
+
|
| 89 |
+
# ── Annotation ──
|
| 90 |
+
if has_result and result_data:
|
| 91 |
+
pil_img = self._annotate_frame(pil_img, result_data)
|
| 92 |
+
|
| 93 |
+
# ── Encode to JPEG ──
|
| 94 |
+
buf = io.BytesIO()
|
| 95 |
+
pil_img.save(buf, format="JPEG", quality=75)
|
| 96 |
+
self.frames[camera_id] = buf.getvalue()
|
| 97 |
+
|
| 98 |
+
# Enforce FPS limit
|
| 99 |
+
elapsed = time.perf_counter() - start_t
|
| 100 |
+
if elapsed < delay:
|
| 101 |
+
time.sleep(delay - elapsed)
|
| 102 |
+
|
| 103 |
+
cap.release()
|
| 104 |
+
logger.info(f"Stream {camera_id} loop terminated.")
|
| 105 |
+
|
| 106 |
+
def _annotate_frame(self, image: Image.Image, result: dict) -> Image.Image:
|
| 107 |
+
"""Draw bounding boxes natively on the PIL image before encoding to MJPEG"""
|
| 108 |
+
draw = ImageDraw.Draw(image)
|
| 109 |
+
try:
|
| 110 |
+
# Using default font for robust cross-platform rendering
|
| 111 |
+
font = ImageFont.load_default()
|
| 112 |
+
except:
|
| 113 |
+
font = None
|
| 114 |
+
|
| 115 |
+
for p in result.get("persons", []):
|
| 116 |
+
x1, y1, x2, y2 = p["bbox"]
|
| 117 |
+
is_new = p.get("is_new_person", False)
|
| 118 |
+
|
| 119 |
+
# Extract ReID sim
|
| 120 |
+
reid_sim = 0
|
| 121 |
+
if p.get("reid_matches"):
|
| 122 |
+
reid_sim = p["reid_matches"][0].get("similarity", 0)
|
| 123 |
+
|
| 124 |
+
is_alert = not is_new and reid_sim > 0.85
|
| 125 |
+
|
| 126 |
+
# Colors match the frontend UI standard
|
| 127 |
+
color = "#FF1744" if is_alert else ("#FFB300" if is_new else "#00E5FF")
|
| 128 |
+
|
| 129 |
+
# Bounding Box
|
| 130 |
+
draw.rectangle([x1, y1, x2, y2], outline=color, width=3)
|
| 131 |
+
|
| 132 |
+
# Label
|
| 133 |
+
label = f"TRK-{p.get('track_id')} {(p.get('score', 0)*100):.0f}%"
|
| 134 |
+
# Draw label background
|
| 135 |
+
text_bg_y0 = max(0, y1 - 16)
|
| 136 |
+
draw.rectangle([x1, text_bg_y0, x1 + 120, text_bg_y0 + 16], fill=color)
|
| 137 |
+
|
| 138 |
+
if font:
|
| 139 |
+
draw.text((x1 + 4, text_bg_y0 + 2), label, fill="black", font=font)
|
| 140 |
+
|
| 141 |
+
return image
|
| 142 |
+
|
| 143 |
+
def get_frame(self, camera_id: str):
|
| 144 |
+
return self.frames.get(camera_id)
|
| 145 |
+
|
| 146 |
+
def shutdown(self):
|
| 147 |
+
self.running = False
|
| 148 |
+
self.streams.clear()
|
| 149 |
+
|
| 150 |
+
# Global Singleton
|
| 151 |
+
stream_manager = StreamManager()
|
vision/tracker.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
vision/tracker.py - Multi-Object Tracking using ByteTrack algorithm
|
| 3 |
+
Assigns persistent track IDs across frames for each camera.
|
| 4 |
+
"""
|
| 5 |
+
import numpy as np
|
| 6 |
+
from typing import List, Dict, Tuple, Optional
|
| 7 |
+
from dataclasses import dataclass, field
|
| 8 |
+
from loguru import logger
|
| 9 |
+
from config import settings
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class Track:
|
| 14 |
+
track_id: int
|
| 15 |
+
bbox: List[float] # [x1, y1, x2, y2]
|
| 16 |
+
score: float
|
| 17 |
+
age: int = 0
|
| 18 |
+
hits: int = 1
|
| 19 |
+
time_since_update: int = 0
|
| 20 |
+
state: str = "active" # active | lost | removed
|
| 21 |
+
history: List[List[float]] = field(default_factory=list)
|
| 22 |
+
|
| 23 |
+
def update(self, bbox: List[float], score: float):
|
| 24 |
+
self.bbox = bbox
|
| 25 |
+
self.score = score
|
| 26 |
+
self.hits += 1
|
| 27 |
+
self.age += 1
|
| 28 |
+
self.time_since_update = 0
|
| 29 |
+
self.state = "active"
|
| 30 |
+
self.history.append(bbox)
|
| 31 |
+
if len(self.history) > 30:
|
| 32 |
+
self.history.pop(0)
|
| 33 |
+
|
| 34 |
+
def predict(self):
|
| 35 |
+
"""Simple linear prediction (extend with Kalman for production)."""
|
| 36 |
+
self.time_since_update += 1
|
| 37 |
+
self.age += 1
|
| 38 |
+
if self.time_since_update > settings.TRACK_BUFFER:
|
| 39 |
+
self.state = "removed"
|
| 40 |
+
elif self.time_since_update > 5:
|
| 41 |
+
self.state = "lost"
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def iou(boxA: List[float], boxB: List[float]) -> float:
|
| 45 |
+
"""Compute Intersection over Union between two [x1,y1,x2,y2] boxes."""
|
| 46 |
+
xA = max(boxA[0], boxB[0])
|
| 47 |
+
yA = max(boxA[1], boxB[1])
|
| 48 |
+
xB = min(boxA[2], boxB[2])
|
| 49 |
+
yB = min(boxA[3], boxB[3])
|
| 50 |
+
inter = max(0, xB - xA) * max(0, yB - yA)
|
| 51 |
+
areaA = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
|
| 52 |
+
areaB = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
|
| 53 |
+
union = areaA + areaB - inter
|
| 54 |
+
return inter / (union + 1e-6)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class ByteTracker:
|
| 58 |
+
"""
|
| 59 |
+
Simplified ByteTrack-style multi-object tracker.
|
| 60 |
+
Uses two-stage matching: high-confidence detections first, then low-confidence.
|
| 61 |
+
One instance per camera.
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
def __init__(self, camera_id: str):
|
| 65 |
+
self.camera_id = camera_id
|
| 66 |
+
self.tracks: List[Track] = []
|
| 67 |
+
self._next_id = 1
|
| 68 |
+
self.frame_id = 0
|
| 69 |
+
logger.info(f"ByteTracker initialized for camera: {camera_id}")
|
| 70 |
+
|
| 71 |
+
def _new_track(self, bbox: List[float], score: float) -> Track:
|
| 72 |
+
t = Track(track_id=self._next_id, bbox=bbox, score=score, history=[bbox])
|
| 73 |
+
self._next_id += 1
|
| 74 |
+
return t
|
| 75 |
+
|
| 76 |
+
def _match(
|
| 77 |
+
self,
|
| 78 |
+
detections: List[Dict],
|
| 79 |
+
threshold: float = 0.5,
|
| 80 |
+
) -> Tuple[List[Tuple[int, int]], List[int], List[int]]:
|
| 81 |
+
"""
|
| 82 |
+
Greedy IoU matching between active tracks and detections.
|
| 83 |
+
Returns: (matched pairs), (unmatched track indices), (unmatched det indices)
|
| 84 |
+
"""
|
| 85 |
+
active = [i for i, t in enumerate(self.tracks) if t.state != "removed"]
|
| 86 |
+
if not active or not detections:
|
| 87 |
+
return [], active, list(range(len(detections)))
|
| 88 |
+
|
| 89 |
+
iou_matrix = np.zeros((len(active), len(detections)))
|
| 90 |
+
for i, ti in enumerate(active):
|
| 91 |
+
for j, det in enumerate(detections):
|
| 92 |
+
iou_matrix[i, j] = iou(self.tracks[ti].bbox, det["bbox"])
|
| 93 |
+
|
| 94 |
+
matched, unmatched_tracks, unmatched_dets = [], list(active), list(range(len(detections)))
|
| 95 |
+
while True:
|
| 96 |
+
if iou_matrix.size == 0:
|
| 97 |
+
break
|
| 98 |
+
flat_idx = np.argmax(iou_matrix)
|
| 99 |
+
ti_local, di = divmod(flat_idx, iou_matrix.shape[1])
|
| 100 |
+
if iou_matrix[ti_local, di] < threshold:
|
| 101 |
+
break
|
| 102 |
+
ti_global = active[ti_local]
|
| 103 |
+
matched.append((ti_global, di))
|
| 104 |
+
unmatched_tracks.remove(ti_global)
|
| 105 |
+
unmatched_dets.remove(di)
|
| 106 |
+
iou_matrix[ti_local, :] = -1
|
| 107 |
+
iou_matrix[:, di] = -1
|
| 108 |
+
|
| 109 |
+
return matched, unmatched_tracks, unmatched_dets
|
| 110 |
+
|
| 111 |
+
def update(self, detections: List[Dict]) -> List[Dict]:
|
| 112 |
+
"""
|
| 113 |
+
Update tracker with new detections.
|
| 114 |
+
|
| 115 |
+
Args:
|
| 116 |
+
detections: list of {"bbox": [...], "score": float}
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
tracked_objects: list of {"track_id": int, "bbox": [...], "score": float, "state": str}
|
| 120 |
+
"""
|
| 121 |
+
self.frame_id += 1
|
| 122 |
+
|
| 123 |
+
# Predict existing tracks
|
| 124 |
+
for t in self.tracks:
|
| 125 |
+
t.predict()
|
| 126 |
+
|
| 127 |
+
# Remove permanently dead tracks
|
| 128 |
+
self.tracks = [t for t in self.tracks if t.state != "removed"]
|
| 129 |
+
|
| 130 |
+
# High confidence detections
|
| 131 |
+
high_dets = [d for d in detections if d["score"] >= settings.TRACK_THRESH]
|
| 132 |
+
low_dets = [d for d in detections if d["score"] < settings.TRACK_THRESH]
|
| 133 |
+
|
| 134 |
+
# Stage 1: Match high-confidence detections
|
| 135 |
+
matched, unmatched_tracks, unmatched_high = self._match(high_dets, threshold=settings.MATCH_THRESH)
|
| 136 |
+
for ti, di in matched:
|
| 137 |
+
self.tracks[ti].update(high_dets[di]["bbox"], high_dets[di]["score"])
|
| 138 |
+
|
| 139 |
+
# Stage 2: Match remaining tracks with low-confidence detections
|
| 140 |
+
remaining_unmatched = [ti for ti in unmatched_tracks if self.tracks[ti].state == "lost"]
|
| 141 |
+
if remaining_unmatched and low_dets:
|
| 142 |
+
low_iou_matrix = np.zeros((len(remaining_unmatched), len(low_dets)))
|
| 143 |
+
for i, ti in enumerate(remaining_unmatched):
|
| 144 |
+
for j, det in enumerate(low_dets):
|
| 145 |
+
low_iou_matrix[i, j] = iou(self.tracks[ti].bbox, det["bbox"])
|
| 146 |
+
for i, ti in enumerate(remaining_unmatched):
|
| 147 |
+
best_j = int(np.argmax(low_iou_matrix[i]))
|
| 148 |
+
if low_iou_matrix[i, best_j] > 0.5:
|
| 149 |
+
self.tracks[ti].update(low_dets[best_j]["bbox"], low_dets[best_j]["score"])
|
| 150 |
+
|
| 151 |
+
# Create new tracks for unmatched high-confidence detections
|
| 152 |
+
for di in unmatched_high:
|
| 153 |
+
self.tracks.append(self._new_track(high_dets[di]["bbox"], high_dets[di]["score"]))
|
| 154 |
+
|
| 155 |
+
# Return active tracks
|
| 156 |
+
return [
|
| 157 |
+
{
|
| 158 |
+
"track_id": t.track_id,
|
| 159 |
+
"bbox": t.bbox,
|
| 160 |
+
"score": t.score,
|
| 161 |
+
"state": t.state,
|
| 162 |
+
"age": t.age,
|
| 163 |
+
"hits": t.hits,
|
| 164 |
+
}
|
| 165 |
+
for t in self.tracks
|
| 166 |
+
if t.state == "active"
|
| 167 |
+
]
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
class TrackerManager:
|
| 171 |
+
"""Manages one ByteTracker per camera."""
|
| 172 |
+
|
| 173 |
+
def __init__(self):
|
| 174 |
+
self._trackers: Dict[str, ByteTracker] = {}
|
| 175 |
+
|
| 176 |
+
def get_tracker(self, camera_id: str) -> ByteTracker:
|
| 177 |
+
if camera_id not in self._trackers:
|
| 178 |
+
self._trackers[camera_id] = ByteTracker(camera_id)
|
| 179 |
+
return self._trackers[camera_id]
|
| 180 |
+
|
| 181 |
+
def update(self, camera_id: str, detections: List[Dict]) -> List[Dict]:
|
| 182 |
+
return self.get_tracker(camera_id).update(detections)
|
| 183 |
+
|
| 184 |
+
def reset(self, camera_id: str):
|
| 185 |
+
if camera_id in self._trackers:
|
| 186 |
+
del self._trackers[camera_id]
|