diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000000000000000000000000000000000..72cef222a7f007265ce8b7cbf6494f9e82ba404f
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,74 @@
+# Ignore everything we don't need in the Docker image
+__pycache__/
+*.pyc
+*.pyo
+venv/
+.venv/
+.env
+.env.example
+
+# Videos & test files
+*.mp4
+*.avi
+*.mov
+*.mkv
+*.mpeg
+*.wmv
+*.flv
+*.jpeg
+*.jpg
+download.jpeg
+images.jpeg
+
+# Large model files — downloaded at build time from HF Hub instead
+behavior_analysis/accident_detection.pt
+behavior_analysis/fight_detection.pt
+report_generation/models/qwen2.5-3b-instruct-q4_k_m.gguf
+report_generation/models/.cache/
+
+# Output directories
+video_processing_outputs/
+logs/
+uploads/
+temp_faces/
+
+# Test & debug files
+test_*.py
+check_*.py
+debug_*.py
+verify_*.py
+reproduce_issue.py
+fix_*.py
+clear_cache_and_test.py
+simple_test_report.py
+quick_fix_reports.py
+scan_imports_temp.py
+protected_api_example.py
+
+# Misc
+output*.txt
+verify_log.txt
+*.zip
+*.bat
+README.md
+BUCKET_NAMES.md
+VIDEO_CAPTIONING_MONGODB_INTEGRATION.md
+video_captioning_store/
+backfill_*.py
+create_subscriptions.py
+
+# Unnecessary sub-items
+behavior_analysis/action_recognition_outputs/
+video_captioning/video_captioning/captions.db
+video_captioning/video_captioning/tests/
+video_captioning/video_captioning/vector_store/
+video_captioning/video_captioning/example_usage.py
+video_captioning/video_captioning/install_requirements.py
+video_captioning/video_captioning/integration_example.py
+video_captioning/video_captioning/quick_test.py
+video_captioning/video_captioning/run_video_test.py
+video_captioning/video_captioning/simple_test.py
+video_captioning/video_captioning/test_runner.py
+video_captioning/video_captioning/working_test.py
+video_captioning/video_captioning/data_flow_diagram.md
+video_captioning/video_captioning/README.md
diff --git a/DetectifAI_db/app_integrated.py b/DetectifAI_db/app_integrated.py
new file mode 100644
index 0000000000000000000000000000000000000000..683cf40b3f007147643a54e799f7b89377753422
--- /dev/null
+++ b/DetectifAI_db/app_integrated.py
@@ -0,0 +1,1250 @@
+"""
+DetectifAI Flask Backend - AI-Powered CCTV Surveillance System with Database Integration
+
+Enhanced Flask API for:
+- Video upload and processing with DetectifAI security focus
+- Real-time processing status and results
+- Object detection with fire/weapon recognition
+- Security event analysis and threat assessment
+- Database integration with MongoDB and FAISS vector search
+- User authentication and authorization
+- Frontend integration for surveillance dashboard
+"""
+
+import os
+from datetime import datetime, timedelta, timezone
+from uuid import uuid4
+
+from flask import Flask, request, jsonify, send_file, send_from_directory, g
+from flask_cors import CORS
+from werkzeug.utils import secure_filename
+import threading
+import json
+import logging
+import jwt
+from dotenv import load_dotenv
+import numpy as np
+
+# Import DetectifAI components
+from main_pipeline import CompleteVideoProcessingPipeline
+from config import get_security_focused_config, VideoProcessingConfig
+
+# Import database components
+from pymongo import MongoClient
+from minio import Minio
+from minio.error import S3Error
+from vector_index import get_faiss_manager, generate_text_embedding, generate_visual_embedding
+
+# Try to import caption search (optional - may not be available)
+try:
+    from caption_search import get_caption_search_engine
+    CAPTION_SEARCH_AVAILABLE = True
+except ImportError as e:
+    logger.warning(f"Caption search not available: {e}")
+    CAPTION_SEARCH_AVAILABLE = False
+    get_caption_search_engine = None
+
+# Try to import DetectifAI-specific components
+try:
+    from detectifai_events import DetectifAIEventType, ThreatLevel
+    DETECTIFAI_EVENTS_AVAILABLE = True
+except ImportError:
+    DETECTIFAI_EVENTS_AVAILABLE = False
+    logging.warning("DetectifAI events module not available - using basic functionality")
+
+# === Load Environment ===
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY")
+MINIO_BUCKET = os.getenv("MINIO_BUCKET")
+JWT_SECRET = os.getenv("JWT_SECRET", "defaultsecret")
+
+# Initialize Flask app
+app = Flask(__name__)
+CORS(app, resources={r"/api/*": {"origins": "*"}})
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        logging.FileHandler('logs/detectifai_api.log')
+    ]
+)
+logger = logging.getLogger(__name__)
+
+# Configuration
+UPLOAD_FOLDER = 'uploads'
+OUTPUT_FOLDER = 'video_processing_outputs'
+ALLOWED_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'wmv', 'flv'}
+MAX_CONTENT_LENGTH = 500 * 1024 * 1024  # 500MB max file size
+
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
+
+# Create necessary directories
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+os.makedirs(OUTPUT_FOLDER, exist_ok=True)
+os.makedirs('logs', exist_ok=True)
+
+# === MongoDB Atlas Setup ===
+mongo = MongoClient(MONGO_URI)
+db = mongo.get_default_database()
+
+# Collections from schema
+admin = db.admin
+user = db.users  # Use 'users' to match database_setup.py
+users = db.users  # Alias for clarity
+video_file = db.video_file
+event = db.event
+event_clip = db.event_clip
+detected_faces = db.detected_faces
+face_matches = db.face_matches
+event_description = db.event_description
+event_caption = db.event_caption
+query = db.query
+query_result = db.query_result
+subscription_plan = db.subscription_plan
+user_subscription = db.user_subscription
+
+# === MinIO Setup ===
+minio_client = Minio(
+    MINIO_ENDPOINT,
+    access_key=MINIO_ACCESS_KEY,
+    secret_key=MINIO_SECRET_KEY,
+    secure=False
+)
+
+try:
+    if not minio_client.bucket_exists(MINIO_BUCKET):
+        minio_client.make_bucket(MINIO_BUCKET)
+except S3Error as err:
+    if err.code != "BucketAlreadyOwnedByYou" and err.code != "BucketAlreadyExists":
+        raise
+
+# === FAISS Setup ===
+faiss_manager = get_faiss_manager()
+
+# Store processing status in memory (use Redis in production)
+processing_status = {}
+
+# === Auth Helpers ===
+def generate_jwt(user):
+    payload = {
+        "user_id": user["user_id"],
+        "email": user["email"],
+        "role": user.get("role", "user"),
+        "exp": datetime.now(timezone.utc) + timedelta(hours=24)
+    }
+    return jwt.encode(payload, JWT_SECRET, algorithm="HS256")
+
+def decode_jwt(token):
+    try:
+        return jwt.decode(token, JWT_SECRET, algorithms=["HS256"])
+    except jwt.ExpiredSignatureError:
+        return None
+    except jwt.InvalidTokenError:
+        return None
+
+def auth_required(role=None):
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            token = request.headers.get("Authorization", "").replace("Bearer ", "")
+            if not token:
+                return jsonify({"error": "missing token"}), 401
+            decoded = decode_jwt(token)
+            if not decoded:
+                return jsonify({"error": "invalid or expired token"}), 401
+            if role and decoded.get("role") != role:
+                return jsonify({"error": "unauthorized"}), 403
+            g.user = decoded
+            return func(*args, **kwargs)
+        wrapper.__name__ = func.__name__
+        return wrapper
+    return decorator
+
+def allowed_file(filename):
+    """Check if file extension is allowed"""
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+
+def extract_detectifai_results(pipeline_results):
+    """Extract DetectifAI-specific results from pipeline output"""
+    try:
+        detectifai_results = {
+            # Basic video metrics
+            'video_info': {
+                'total_keyframes': pipeline_results['outputs'].get('total_keyframes', 0),
+                'processing_time': pipeline_results['processing_stats'].get('total_processing_time', 0),
+                'output_directory': pipeline_results['outputs'].get('output_directory', '')
+            },
+            
+            # Security detection results
+            'security_detection': {
+                'total_object_detections': pipeline_results['outputs'].get('total_object_detections', 0),
+                'total_object_events': pipeline_results['outputs'].get('total_object_events', 0),
+                'detectifai_events': pipeline_results['outputs'].get('detectifai_events', 0),
+                'fire_detections': 0,  # Will be populated from actual results
+                'weapon_detections': 0,
+                'security_alerts': []
+            },
+            
+            # Event analysis
+            'event_analysis': {
+                'canonical_events': pipeline_results['outputs'].get('canonical_events', 0),
+                'total_motion_events': pipeline_results['outputs'].get('total_motion_events', 0),
+                'high_priority_events': 0,
+                'critical_events': 0
+            },
+            
+            # Output files
+            'output_files': {
+                'keyframes_directory': os.path.join(pipeline_results['outputs'].get('output_directory', ''), 'frames'),
+                'reports': pipeline_results['outputs'].get('reports', {}),
+                'highlight_reels': pipeline_results['outputs'].get('highlight_reels', {}),
+                'compressed_video': pipeline_results['outputs'].get('compressed_video', '')
+            },
+            
+            # System performance
+            'performance': {
+                'frames_processed': pipeline_results['processing_stats'].get('frames_processed', 0),
+                'frames_enhanced': pipeline_results['processing_stats'].get('frames_enhanced', 0),
+                'gpu_acceleration': pipeline_results['processing_stats'].get('gpu_used', False)
+            }
+        }
+        
+        return detectifai_results
+        
+    except Exception as e:
+        logger.error(f"Error extracting DetectifAI results: {e}")
+        return {'error': 'Failed to extract results'}
+
+def process_video_async(video_id, video_path, config_type='detectifai', user_id=None):
+    """Process video in background thread with DetectifAI focus and database integration"""
+    try:
+        processing_status[video_id]['status'] = 'processing'
+        processing_status[video_id]['progress'] = 0
+        processing_status[video_id]['message'] = 'Initializing DetectifAI processing...'
+        
+        # Select configuration with DetectifAI optimizations
+        if config_type == 'detectifai' or config_type == 'security':
+            config = get_security_focused_config()
+        # Removed robbery detection - using security focused config
+        elif config_type == 'high_recall':
+            try:
+                from config import get_high_recall_config
+                config = get_high_recall_config()
+            except ImportError:
+                config = get_security_focused_config()
+        elif config_type == 'balanced':
+            try:
+                from config import get_balanced_config
+                config = get_balanced_config()
+            except ImportError:
+                config = VideoProcessingConfig()
+        else:
+            config = VideoProcessingConfig()
+        
+        # DetectifAI-specific configuration enhancements
+        config.enable_object_detection = True
+        config.enable_facial_recognition = True
+        config.keyframe_extraction_fps = 1.0  # Extract 1 frame per second for surveillance
+        config.enable_adaptive_processing = True
+        
+        # Set custom output directory for this video
+        config.output_base_dir = os.path.join(OUTPUT_FOLDER, video_id)
+        
+        # Initialize pipeline
+        pipeline = CompleteVideoProcessingPipeline(config)
+        
+        # Update progress
+        processing_status[video_id]['progress'] = 10
+        processing_status[video_id]['message'] = 'Extracting keyframes for security analysis...'
+        
+        # Process video with DetectifAI (with error tolerance)
+        output_name = os.path.splitext(os.path.basename(video_path))[0]
+        results = None
+        processing_errors = []
+        
+        try:
+            results = pipeline.process_video_complete(video_path, output_name)
+            logger.info(f"✅ Core pipeline processing completed for {video_id}")
+        except Exception as pipeline_error:
+            logger.error(f"⚠️ Pipeline error (but continuing): {str(pipeline_error)}")
+            processing_errors.append(f"Pipeline: {str(pipeline_error)}")
+            # Create minimal results structure
+            results = {
+                'outputs': {
+                    'total_keyframes': 0,
+                    'total_events': 0,
+                    'total_motion_events': 0,
+                    'total_object_events': 0,
+                    'total_object_detections': 0,
+                    'canonical_events': [],
+                    'total_segments': 1,
+                    'highlight_reels': {},
+                    'reports': {},
+                    'compressed_video': ''
+                },
+                'processing_stats': {'total_processing_time': 0}
+            }
+        
+        # Extract DetectifAI-specific results (with error tolerance)
+        detectifai_results = {}
+        try:
+            detectifai_results = extract_detectifai_results(results)
+        except Exception as extract_error:
+            logger.error(f"⚠️ Result extraction error (but continuing): {str(extract_error)}")
+            processing_errors.append(f"Extraction: {str(extract_error)}")
+            detectifai_results = {'security_detection': {}, 'event_analysis': {}, 'performance': {}}
+        
+        # Store results in database
+        try:
+            # Update video file record with processing results
+            video_file.update_one(
+                {"video_id": video_id},
+                {
+                    "$set": {
+                        "processing_status": "completed",
+                        "processing_results": {
+                            "total_keyframes": results['outputs']['total_keyframes'],
+                            "total_events": results['outputs']['total_events'],
+                            "processing_time": results['processing_stats']['total_processing_time'],
+                            "detectifai_results": detectifai_results
+                        },
+                        "updated_at": datetime.now(timezone.utc)
+                    }
+                }
+            )
+            
+            # Create events in database
+            for i, canonical_event in enumerate(results['outputs'].get('canonical_events', [])):
+                event_doc = {
+                    "event_id": str(uuid4()),
+                    "video_id": video_id,
+                    "start_timestamp_ms": int(canonical_event.get('start_time', 0) * 1000),
+                    "end_timestamp_ms": int(canonical_event.get('end_time', 0) * 1000),
+                    "confidence_score": canonical_event.get('importance', 0.0),
+                    "is_verified": False,
+                    "is_false_positive": False,
+                    "verified_at": None,
+                    "verified_by": None,
+                    "visual_embedding": generate_visual_embedding(),
+                    "bounding_boxes": canonical_event.get('bounding_boxes', {}),
+                    "event_type": canonical_event.get('event_type', 'motion_detection')
+                }
+                
+                event.insert_one(event_doc)
+                
+                # Add to FAISS index
+                faiss_manager.add_visual_embedding(event_doc["event_id"], event_doc["visual_embedding"])
+                
+                # Create event description
+                description_doc = {
+                    "description_id": str(uuid4()),
+                    "event_id": event_doc["event_id"],
+                    "text_embedding": generate_text_embedding(f"Event {i+1}: {canonical_event.get('description', 'Motion detected')}"),
+                    "caption": canonical_event.get('description', f'Motion detected at {canonical_event.get("start_time", 0):.2f}s'),
+                    "confidence": canonical_event.get('importance', 0.0),
+                    "created_at": datetime.now(timezone.utc),
+                    "updated_at": datetime.now(timezone.utc)
+                }
+                
+                event_description.insert_one(description_doc)
+                
+                # Add to FAISS text index
+                faiss_manager.add_text_embedding(description_doc["description_id"], description_doc["text_embedding"])
+            
+            logger.info(f"✅ Database integration completed for {video_id}")
+            
+        except Exception as db_error:
+            logger.error(f"⚠️ Database integration error (but continuing): {str(db_error)}")
+            processing_errors.append(f"Database: {str(db_error)}")
+        
+        # Always mark as completed (even with errors)
+        processing_status[video_id]['status'] = 'completed'
+        processing_status[video_id]['progress'] = 100
+        completion_message = 'DetectifAI processing completed successfully!'
+        if processing_errors:
+            completion_message = f'DetectifAI processing completed with warnings: {len(processing_errors)} non-critical errors'
+        processing_status[video_id]['message'] = completion_message
+        processing_status[video_id]['results'] = {
+            # Original results for backward compatibility
+            'total_keyframes': results['outputs']['total_keyframes'],
+            'total_events': results['outputs']['total_events'],
+            'total_motion_events': results['outputs'].get('total_motion_events', 0),
+            'total_object_events': results['outputs'].get('total_object_events', 0),
+            'total_object_detections': results['outputs'].get('total_object_detections', 0),
+            'canonical_events': results['outputs']['canonical_events'],
+            'total_segments': results['outputs']['total_segments'],
+            'processing_time': results['processing_stats']['total_processing_time'],
+            'highlight_reels': results['outputs'].get('highlight_reels', {}),
+            'reports': results['outputs'].get('reports', {}),
+            'compressed_video': results['outputs'].get('compressed_video', ''),
+            'output_directory': config.output_base_dir,
+            'object_detection_enabled': config.enable_object_detection,
+            
+            # DetectifAI-specific results
+            'detectifai_results': detectifai_results,
+            'security_detection': detectifai_results.get('security_detection', {}),
+            'event_analysis': detectifai_results.get('event_analysis', {}),
+            'performance': detectifai_results.get('performance', {}),
+            
+            # Processing status
+            'processing_errors': processing_errors,
+            'has_warnings': len(processing_errors) > 0
+        }
+        
+        logger.info(f"Video {video_id} processed successfully")
+        
+    except Exception as e:
+        logger.error(f"Error processing video {video_id}: {str(e)}")
+        processing_status[video_id]['status'] = 'failed'
+        processing_status[video_id]['message'] = f'Error: {str(e)}'
+        processing_status[video_id]['error'] = str(e)
+
+# === API Endpoints ===
+
+@app.route('/')
+def index():
+    return jsonify({"message": "DetectifAI backend running with database integration"})
+
+@app.route('/api/health', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    return jsonify({'status': 'healthy', 'timestamp': datetime.now().isoformat()})
+
+# === Authentication Endpoints ===
+
+@app.route("/api/register", methods=["POST"])
+def register():
+    data = request.json or {}
+    email = data.get("email")
+    password = data.get("password")
+    username = data.get("username", email.split("@")[0] if email else None)
+
+    if not email or not password:
+        return jsonify({"error": "email and password required"}), 400
+    if user.find_one({"email": email}):
+        return jsonify({"error": "email exists"}), 400
+
+    user_doc = {
+        "user_id": str(uuid4()),
+        "username": username,
+        "email": email,
+        "password": password,  # TODO: hash properly
+        "role": "user",
+        "created_at": datetime.now(timezone.utc),
+        "updated_at": datetime.now(timezone.utc),
+        "last_login": None
+    }
+    user.insert_one(user_doc)
+    token = generate_jwt(user_doc)
+    return jsonify({"token": token})
+
+@app.route("/api/login", methods=["POST", "OPTIONS"])
+def login():
+    if request.method == "OPTIONS":
+        return '', 200  # Handle preflight CORS request
+    
+    data = request.json or {}
+    email = data.get("email")
+    password = data.get("password")
+
+    if not email or not password:
+        return jsonify({"error": "email and password required"}), 400
+
+    # Check against Mongo
+    user_doc = user.find_one({"email": email})
+    if not user_doc or user_doc.get("password") != password:
+        return jsonify({"error": "invalid credentials"}), 401
+
+    token = generate_jwt(user_doc)
+    return jsonify({
+        "message": "login successful",
+        "token": token,
+        "user": {
+            "user_id": user_doc["user_id"],
+            "username": user_doc.get("username"),
+            "email": user_doc["email"]
+        }
+    })
+
+# === Admin User Management Endpoints ===
+
+@app.route("/api/admin/users", methods=["GET"])
+@auth_required(role="admin")
+def get_all_users():
+    """Get all users - Admin only"""
+    try:
+        # Get query parameters for pagination and filtering
+        page = int(request.args.get("page", 1))
+        limit = int(request.args.get("limit", 50))
+        search = request.args.get("search", "")
+        role_filter = request.args.get("role", "")
+        status_filter = request.args.get("status", "")
+        
+        # Build query
+        query = {}
+        if search:
+            query["$or"] = [
+                {"email": {"$regex": search, "$options": "i"}},
+                {"username": {"$regex": search, "$options": "i"}}
+            ]
+        if role_filter:
+            query["role"] = role_filter
+        if status_filter:
+            if status_filter == "active":
+                query["is_active"] = True
+            elif status_filter == "inactive":
+                query["is_active"] = False
+        
+        # Get total count
+        total = users.count_documents(query)
+        
+        # Get users with pagination
+        skip = (page - 1) * limit
+        user_list = list(users.find(query).skip(skip).limit(limit).sort("created_at", -1))
+        
+        # Remove sensitive data
+        for u in user_list:
+            u["_id"] = str(u["_id"])
+            u.pop("password", None)
+            u.pop("password_hash", None)
+        
+        return jsonify({
+            "users": user_list,
+            "total": total,
+            "page": page,
+            "limit": limit,
+            "pages": (total + limit - 1) // limit
+        })
+    except Exception as e:
+        logger.error(f"Error fetching users: {str(e)}")
+        return jsonify({"error": "Failed to fetch users"}), 500
+
+@app.route("/api/admin/users", methods=["POST"])
+@auth_required(role="admin")
+def create_user():
+    """Create a new user - Admin only"""
+    try:
+        data = request.json or {}
+        email = data.get("email")
+        password = data.get("password")
+        username = data.get("username") or data.get("name")
+        role = data.get("role", "user")
+        
+        if not email or not password:
+            return jsonify({"error": "email and password required"}), 400
+        
+        # Check if user already exists
+        if users.find_one({"email": email}):
+            return jsonify({"error": "User with this email already exists"}), 400
+        
+        # Create user document
+        user_doc = {
+            "user_id": str(uuid4()),
+            "username": username or email.split("@")[0],
+            "email": email,
+            "password": password,  # TODO: hash properly with bcrypt
+            "password_hash": password,  # For compatibility
+            "role": role,
+            "is_active": True,
+            "profile_data": {},
+            "created_at": datetime.now(timezone.utc),
+            "updated_at": datetime.now(timezone.utc),
+            "last_login": None
+        }
+        
+        users.insert_one(user_doc)
+        
+        # Remove sensitive data before returning
+        user_doc["_id"] = str(user_doc["_id"])
+        user_doc.pop("password", None)
+        user_doc.pop("password_hash", None)
+        
+        return jsonify({
+            "message": "User created successfully",
+            "user": user_doc
+        }), 201
+    except Exception as e:
+        logger.error(f"Error creating user: {str(e)}")
+        return jsonify({"error": "Failed to create user"}), 500
+
+@app.route("/api/admin/users/<user_id>", methods=["GET"])
+@auth_required(role="admin")
+def get_user(user_id):
+    """Get a specific user by ID - Admin only"""
+    try:
+        user_doc = users.find_one({"user_id": user_id})
+        if not user_doc:
+            return jsonify({"error": "User not found"}), 404
+        
+        # Remove sensitive data
+        user_doc["_id"] = str(user_doc["_id"])
+        user_doc.pop("password", None)
+        user_doc.pop("password_hash", None)
+        
+        return jsonify({"user": user_doc})
+    except Exception as e:
+        logger.error(f"Error fetching user: {str(e)}")
+        return jsonify({"error": "Failed to fetch user"}), 500
+
+@app.route("/api/admin/users/<user_id>", methods=["PUT"])
+@auth_required(role="admin")
+def update_user(user_id):
+    """Update a user - Admin only"""
+    try:
+        data = request.json or {}
+        user_doc = users.find_one({"user_id": user_id})
+        
+        if not user_doc:
+            return jsonify({"error": "User not found"}), 404
+        
+        # Update allowed fields
+        update_data = {}
+        if "username" in data or "name" in data:
+            update_data["username"] = data.get("username") or data.get("name")
+        if "email" in data:
+            # Check if new email already exists
+            existing = users.find_one({"email": data["email"], "user_id": {"$ne": user_id}})
+            if existing:
+                return jsonify({"error": "Email already in use"}), 400
+            update_data["email"] = data["email"]
+        if "role" in data:
+            update_data["role"] = data["role"]
+        if "is_active" in data:
+            update_data["is_active"] = data["is_active"]
+        if "password" in data and data["password"]:
+            update_data["password"] = data["password"]
+            update_data["password_hash"] = data["password"]
+        
+        if not update_data:
+            return jsonify({"error": "No valid fields to update"}), 400
+        
+        update_data["updated_at"] = datetime.now(timezone.utc)
+        
+        users.update_one({"user_id": user_id}, {"$set": update_data})
+        
+        # Fetch updated user
+        updated_user = users.find_one({"user_id": user_id})
+        updated_user["_id"] = str(updated_user["_id"])
+        updated_user.pop("password", None)
+        updated_user.pop("password_hash", None)
+        
+        return jsonify({
+            "message": "User updated successfully",
+            "user": updated_user
+        })
+    except Exception as e:
+        logger.error(f"Error updating user: {str(e)}")
+        return jsonify({"error": "Failed to update user"}), 500
+
+@app.route("/api/admin/users/<user_id>", methods=["DELETE"])
+@auth_required(role="admin")
+def delete_user(user_id):
+    """Delete a user - Admin only"""
+    try:
+        user_doc = users.find_one({"user_id": user_id})
+        if not user_doc:
+            return jsonify({"error": "User not found"}), 404
+        
+        # Prevent deleting yourself
+        current_user = g.user
+        if current_user.get("user_id") == user_id:
+            return jsonify({"error": "Cannot delete your own account"}), 400
+        
+        users.delete_one({"user_id": user_id})
+        
+        return jsonify({"message": "User deleted successfully"})
+    except Exception as e:
+        logger.error(f"Error deleting user: {str(e)}")
+        return jsonify({"error": "Failed to delete user"}), 500
+
+# === Video Processing Endpoints ===
+
+@app.route('/api/video/upload', methods=['POST'])
+@app.route('/api/upload', methods=['POST'])
+@auth_required()
+def upload_video():
+    """Upload video endpoint with database integration"""
+    try:
+        # Check if file is present
+        if 'video' not in request.files:
+            return jsonify({'error': 'No video file provided'}), 400
+        
+        file = request.files['video']
+        
+        if file.filename == '':
+            return jsonify({'error': 'No file selected'}), 400
+        
+        if not allowed_file(file.filename):
+            return jsonify({'error': 'Invalid file type. Allowed: mp4, avi, mov, mkv, wmv, flv'}), 400
+        
+        # Get processing configuration (default to DetectifAI optimized)
+        config_type = request.form.get('config_type', 'detectifai')
+        
+        # Generate unique video ID
+        video_id = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{os.urandom(4).hex()}"
+        
+        # Save uploaded file
+        filename = secure_filename(file.filename)
+        video_path = os.path.join(app.config['UPLOAD_FOLDER'], f"{video_id}_{filename}")
+        file.save(video_path)
+        
+        # Get file size
+        file.seek(0, os.SEEK_END)
+        file_size = file.tell()
+        file.seek(0)
+        
+        # Store in MinIO using standardized paths
+        from minio_config import VIDEOS_BUCKET, get_minio_paths
+        
+        minio_paths = get_minio_paths(video_id, filename)
+        object_name = minio_paths["original"]
+        
+        try:
+            with open(video_path, 'rb') as file_data:
+                minio_client.put_object(
+                    VIDEOS_BUCKET,
+                    object_name,
+                    file_data,
+                    file_size,
+                    content_type='video/mp4'
+                )
+                logger.info(f"✅ Video uploaded to MinIO: {object_name}")
+        except Exception as e:
+            logger.error(f"❌ MinIO upload failed: {e}")
+            raise
+        
+        # Create video record in database
+        video_doc = {
+            "video_id": video_id,
+            "user_id": g.user.get("user_id"),
+            "file_path": video_path,
+            "minio_object_key": object_name,
+            "minio_bucket": MINIO_BUCKET,
+            "codec": None,
+            "fps": None,
+            "upload_date": datetime.now(timezone.utc),
+            "duration_secs": None,
+            "file_size_bytes": file_size,
+            "meta_data": {},
+            "processing_status": "uploaded"
+        }
+        video_file.insert_one(video_doc)
+        
+        # Initialize processing status
+        processing_status[video_id] = {
+            'video_id': video_id,
+            'filename': filename,
+            'status': 'queued',
+            'progress': 0,
+            'message': 'Video uploaded successfully. Processing queued.',
+            'uploaded_at': datetime.now().isoformat(),
+            'config_type': config_type
+        }
+        
+        # Start background processing
+        thread = threading.Thread(
+            target=process_video_async,
+            args=(video_id, video_path, config_type, g.user.get("user_id"))
+        )
+        thread.daemon = True
+        thread.start()
+        
+        return jsonify({
+            'success': True,
+            'video_id': video_id,
+            'message': 'Video uploaded successfully. Processing started.',
+            'status_url': f'/api/status/{video_id}'
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"Upload error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/video/status/<video_id>', methods=['GET'])
+@app.route('/api/status/<video_id>', methods=['GET'])
+@auth_required()
+def get_status(video_id):
+    """Get processing status for a video"""
+    # Check memory first
+    if video_id in processing_status:
+        return jsonify(processing_status[video_id]), 200
+    
+    # Check database for video record
+    video_doc = video_file.find_one({"video_id": video_id})
+    if video_doc:
+        status = {
+            'video_id': video_id,
+            'filename': video_doc.get('file_path', '').split('/')[-1],
+            'status': video_doc.get('processing_status', 'unknown'),
+            'progress': 100 if video_doc.get('processing_status') == 'completed' else 0,
+            'message': f"Video status: {video_doc.get('processing_status', 'unknown')}",
+            'uploaded_at': video_doc.get('upload_date', '').isoformat() if video_doc.get('upload_date') else '',
+            'results': video_doc.get('processing_results', {})
+        }
+        return jsonify(status), 200
+    
+    return jsonify({'error': 'Video not found'}), 404
+
+# === Database Query Endpoints ===
+
+@app.route("/api/videos", methods=["GET"])
+@auth_required()
+def list_videos():
+    """List all videos for the authenticated user"""
+    user_id = g.user.get("user_id")
+    vids = list(video_file.find({"user_id": user_id}, {"_id": 0}))
+    return jsonify(vids)
+
+@app.route("/api/video/<video_id>", methods=["GET"])
+@auth_required()
+def get_video(video_id):
+    """Get specific video details"""
+    user_id = g.user.get("user_id")
+    vid = video_file.find_one({"video_id": video_id, "user_id": user_id}, {"_id": 0})
+    if not vid:
+        return jsonify({"error": "not found"}), 404
+    return jsonify(vid)
+
+@app.route("/api/video/<video_id>/events", methods=["GET"])
+@auth_required()
+def get_video_events(video_id):
+    """Get events for a specific video"""
+    user_id = g.user.get("user_id")
+    # Verify user owns the video
+    video_doc = video_file.find_one({"video_id": video_id, "user_id": user_id})
+    if not video_doc:
+        return jsonify({"error": "video not found or access denied"}), 404
+    
+    events_list = list(event.find({"video_id": video_id}, {"_id": 0}))
+    return jsonify(events_list)
+
+@app.route("/api/event/<event_id>", methods=["GET"])
+@auth_required()
+def get_event_details(event_id):
+    """Get event details with descriptions"""
+    event_doc = event.find_one({"event_id": event_id}, {"_id": 0})
+    if not event_doc:
+        return jsonify({"error": "event not found"}), 404
+    
+    # Get descriptions for this event
+    descriptions = list(event_description.find({"event_id": event_id}, {"_id": 0}))
+    event_doc["descriptions"] = descriptions
+    
+    return jsonify(event_doc)
+
+# === Search Endpoints ===
+
+@app.route("/api/search", methods=["GET"])
+@auth_required()
+def search():
+    """Simple text search in event descriptions"""
+    q = request.args.get("q", "")
+    user_id = g.user.get("user_id")
+    
+    # Get user's videos first
+    user_videos = [v["video_id"] for v in video_file.find({"user_id": user_id}, {"video_id": 1})]
+    
+    # Search in descriptions for user's videos
+    matches = list(event_description.find({
+        "caption": {"$regex": q, "$options": "i"},
+        "event_id": {"$in": [e["event_id"] for e in event.find({"video_id": {"$in": user_videos}}, {"event_id": 1})]}
+    }, {"_id": 0}))
+    
+    return jsonify(matches)
+
+@app.route("/api/search-vector", methods=["POST"])
+@auth_required()
+def search_vector():
+    """Vector search for similar text embeddings using FAISS"""
+    data = request.json or {}
+    query_text = data.get("query_text")
+    k = data.get("k", 10)  # Number of results to return
+    
+    if not query_text:
+        return jsonify({"error": "query_text is required"}), 400
+    
+    try:
+        # Generate embedding for the query text
+        query_embedding = generate_text_embedding(query_text)
+        
+        # Search FAISS index
+        results = faiss_manager.search_text_embeddings(query_embedding, k)
+        
+        return jsonify({
+            "query_text": query_text,
+            "results": results,
+            "total_results": len(results)
+        })
+        
+    except Exception as e:
+        return jsonify({"error": f"Search failed: {str(e)}"}), 500
+
+@app.route("/api/search-visual", methods=["POST"])
+@auth_required()
+def search_visual():
+    """Vector search for similar visual embeddings using FAISS"""
+    data = request.json or {}
+    query_embedding = data.get("query_embedding")
+    k = data.get("k", 10)  # Number of results to return
+    
+    if not query_embedding:
+        return jsonify({"error": "query_embedding is required"}), 400
+    
+    if not isinstance(query_embedding, list):
+        return jsonify({"error": "query_embedding must be a list of floats"}), 400
+    
+    try:
+        # Search FAISS index
+        results = faiss_manager.search_visual_embeddings(query_embedding, k)
+        
+        return jsonify({
+            "query_embedding_dim": len(query_embedding),
+            "results": results,
+            "total_results": len(results)
+        })
+        
+    except Exception as e:
+        return jsonify({"error": f"Visual search failed: {str(e)}"}), 500
+
+@app.route("/api/search/captions", methods=["POST"])
+@auth_required()
+def search_captions():
+    """Search captions using FAISS index and sentence transformers"""
+    try:
+        if not CAPTION_SEARCH_AVAILABLE:
+            return jsonify({
+                "error": "Caption search not available",
+                "message": "Caption search module not installed or not available"
+            }), 503
+        
+        data = request.json or {}
+        query_text = data.get("query", "").strip()
+        top_k = data.get("top_k", 10)
+        min_score = data.get("min_score", 0.0)
+        
+        if not query_text:
+            return jsonify({"error": "query is required"}), 400
+        
+        # Get caption search engine
+        search_engine = get_caption_search_engine()
+        
+        if not search_engine or not search_engine.is_ready():
+            return jsonify({
+                "error": "Caption search engine not ready",
+                "stats": search_engine.get_stats() if search_engine else {}
+            }), 503
+        
+        # Perform search
+        results = search_engine.search(query_text, top_k=top_k, min_score=min_score)
+        
+        # Format results for frontend
+        formatted_results = []
+        for result in results:
+            video_ref = result.get("video_reference", {})
+            minio_path = video_ref.get("minio_path", "")
+            object_name = video_ref.get("object_name", "")
+            
+            # Generate MinIO URL for the image/video
+            image_url = None
+            if object_name:
+                try:
+                    bucket = video_ref.get("bucket", "nlp-images")
+                    
+                    # Create bucket if it doesn't exist
+                    try:
+                        if not minio_client.bucket_exists(bucket):
+                            logger.info(f"Creating MinIO bucket: {bucket}")
+                            minio_client.make_bucket(bucket)
+                    except S3Error as e:
+                        if e.code != "BucketAlreadyOwnedByYou" and e.code != "BucketAlreadyExists":
+                            logger.warning(f"Could not create bucket {bucket}: {e}")
+                    
+                    # Generate presigned URL for MinIO object (valid for 1 hour)
+                    from datetime import timedelta
+                    image_url = minio_client.presigned_get_object(
+                        bucket,
+                        object_name,
+                        expires=timedelta(hours=1)
+                    )
+                except Exception as e:
+                    logger.warning(f"Could not generate MinIO URL: {e}")
+                    # Fallback: use unified image serving endpoint
+                    bucket = video_ref.get("bucket", "nlp-images")
+                    image_url = f"/api/minio/image/{bucket}/{object_name}"
+            
+            formatted_result = {
+                "id": result.get("description_id"),
+                "event_id": result.get("event_id"),
+                "description": result.get("caption", ""),
+                "caption": result.get("caption", ""),
+                "confidence": result.get("confidence", 0.0),
+                "similarity_score": result.get("similarity_score", 0.0),
+                "thumbnail": image_url,
+                "video_reference": video_ref,
+                "timestamp": result.get("created_at"),
+                "zone": "N/A"  # Can be enhanced with actual zone data
+            }
+            formatted_results.append(formatted_result)
+        
+        return jsonify({
+            "query": query_text,
+            "results": formatted_results,
+            "total_results": len(formatted_results),
+            "stats": search_engine.get_stats()
+        })
+        
+    except Exception as e:
+        logger.error(f"Error in caption search: {e}")
+        return jsonify({"error": f"Search failed: {str(e)}"}), 500
+
+# === FAISS Management Endpoints ===
+
+@app.route("/api/rebuild-indices", methods=["POST"])
+@auth_required()
+def rebuild_indices():
+    """Rebuild FAISS indices from MongoDB data"""
+    try:
+        # Rebuild both indices
+        faiss_manager.rebuild_text_index()
+        faiss_manager.rebuild_visual_index()
+        
+        # Get updated stats
+        stats = faiss_manager.get_index_stats()
+        
+        return jsonify({
+            "message": "Indices rebuilt successfully",
+            "stats": stats
+        })
+        
+    except Exception as e:
+        return jsonify({"error": f"Failed to rebuild indices: {str(e)}"}), 500
+
+@app.route("/api/index-stats", methods=["GET"])
+@auth_required()
+def get_index_stats():
+    """Get statistics about FAISS indices"""
+    try:
+        stats = faiss_manager.get_index_stats()
+        return jsonify(stats)
+        
+    except Exception as e:
+        return jsonify({"error": f"Failed to get index stats: {str(e)}"}), 500
+
+# === Legacy DetectifAI Endpoints (for backward compatibility) ===
+
+@app.route('/api/results/<video_id>', methods=['GET'])
+@auth_required()
+def get_results(video_id):
+    """Get processing results for a video"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status['status'] != 'completed':
+        return jsonify({
+            'error': 'Processing not completed',
+            'current_status': status['status']
+        }), 400
+    
+    return jsonify(status.get('results', {})), 200
+
+@app.route('/api/video/results/<video_id>', methods=['GET'])
+@auth_required()
+def get_video_results(video_id):
+    """Get video processing results with availability flags"""
+    # First check if video is in memory status
+    if video_id in processing_status:
+        status = processing_status[video_id]
+        
+        if status['status'] != 'completed':
+            return jsonify({
+                'error': 'Processing not completed',
+                'current_status': status['status']
+            }), 400
+        
+        # Check if status has results structure (normal processing)
+        if 'results' in status and 'output_directory' in status['results']:
+            output_dir = status['results']['output_directory']
+        else:
+            # Fallback to standard directory structure
+            output_dir = os.path.join('video_processing_outputs', video_id)
+    else:
+        # Check database for video record
+        video_doc = video_file.find_one({"video_id": video_id})
+        if not video_doc:
+            return jsonify({'error': 'Video not found'}), 404
+        
+        output_dir = os.path.join('video_processing_outputs', video_id)
+        if not os.path.exists(output_dir):
+            return jsonify({'error': 'Video processing results not found'}), 404
+        
+        logger.info(f"📁 Found video files on disk for {video_id}, recovering results")
+    
+    # Check for compressed video
+    compressed_dir = os.path.join(output_dir, 'compressed')
+    compressed_video_available = False
+    compressed_video_url = None
+    
+    if os.path.exists(compressed_dir):
+        video_files = [f for f in os.listdir(compressed_dir) if f.endswith('.mp4')]
+        if video_files:
+            compressed_video_available = True
+            compressed_video_url = f'/api/video/compressed/{video_id}'
+    
+    # Check for keyframes
+    frames_dir = os.path.join(output_dir, 'frames')
+    keyframes_available = os.path.exists(frames_dir) and len([f for f in os.listdir(frames_dir) if f.endswith('.jpg')]) > 0
+    keyframes_count = len([f for f in os.listdir(frames_dir) if f.endswith('.jpg')]) if keyframes_available else 0
+    
+    # Check for reports
+    reports_dir = os.path.join(output_dir, 'reports')
+    reports_available = os.path.exists(reports_dir)
+    report_files = []
+    if reports_available:
+        report_files = [f for f in os.listdir(reports_dir) if f.endswith('.json')]
+    
+    return jsonify({
+        'video_id': video_id,
+        'compressed_video_available': compressed_video_available,
+        'compressed_video_url': compressed_video_url,
+        'keyframes_available': keyframes_available,
+        'keyframes_count': keyframes_count,
+        'keyframes_url': f'/api/video/keyframes/{video_id}',
+        'reports_available': reports_available,
+        'reports': report_files
+    }), 200
+
+# === File Serving Endpoints ===
+
+@app.route('/api/video/keyframes/<video_id>', methods=['GET'])
+@app.route('/api/keyframes/<video_id>', methods=['GET'])
+@auth_required()
+def get_keyframes(video_id):
+    """Get list of extracted keyframes with DetectifAI annotations"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status['status'] != 'completed':
+        return jsonify({'error': 'Processing not completed'}), 400
+    
+    output_dir = status['results']['output_directory']
+    frames_dir = os.path.join(output_dir, 'frames')
+    
+    if not os.path.exists(frames_dir):
+        return jsonify({'error': 'Frames directory not found'}), 404
+    
+    # Load detection metadata if available
+    detection_metadata = {}
+    detection_metadata_path = os.path.join(output_dir, 'detection_metadata.json')
+    if os.path.exists(detection_metadata_path):
+        try:
+            with open(detection_metadata_path, 'r') as f:
+                detection_metadata = json.load(f)
+        except Exception as e:
+            logger.warning(f"Could not load detection metadata: {e}")
+    
+    # Get filter parameter
+    filter_detections = request.args.get('filter_detections', 'false').lower() == 'true'
+    
+    keyframes = []
+    frames_with_detections = {item['original_path']: item for item in detection_metadata.get('detection_summary', [])}
+    
+    for filename in sorted(os.listdir(frames_dir)):
+        if filename.endswith('.jpg') and not filename.endswith('_annotated.jpg'):
+            # Extract timestamp from filename
+            timestamp = 0.0
+            try:
+                if '_' in filename:
+                    timestamp_part = filename.split('_')[1].replace('s', '').replace('.jpg', '')
+                    timestamp = float(timestamp_part)
+            except:
+                pass
+            
+            frame_path = os.path.join(frames_dir, filename)
+            has_detections = frame_path in frames_with_detections
+            
+            # Skip frames without detections if filtering is enabled
+            if filter_detections and not has_detections:
+                continue
+            
+            keyframe_data = {
+                'filename': filename,
+                'timestamp': timestamp,
+                'url': f'/api/keyframe/{video_id}/{filename}',
+                'has_detections': has_detections
+            }
+            
+            # Add detection details if available
+            if has_detections:
+                detection_info = frames_with_detections[frame_path]
+                keyframe_data.update({
+                    'detection_count': detection_info.get('detection_count', 0),
+                    'objects': detection_info.get('objects', []),
+                    'confidence_avg': detection_info.get('confidence_avg', 0.0)
+                })
+            
+            keyframes.append(keyframe_data)
+    
+    return jsonify({
+        'video_id': video_id,
+        'total_keyframes': detection_metadata.get('total_keyframes', len(keyframes)),
+        'keyframes_with_detections': detection_metadata.get('frames_with_detections', 0),
+        'keyframes': keyframes,
+        'objects_detected': detection_metadata.get('objects_detected', {}),
+        'filter_applied': filter_detections
+    }), 200
+
+@app.route('/api/keyframe/<video_id>/<filename>', methods=['GET'])
+@auth_required()
+def get_keyframe_image(video_id, filename):
+    """Serve keyframe image"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    output_dir = status['results']['output_directory']
+    frames_dir = os.path.join(output_dir, 'frames')
+    
+    return send_from_directory(frames_dir, filename)
+
+@app.route('/api/video/compressed/<video_id>', methods=['GET'])
+@auth_required()
+def get_compressed_video(video_id):
+    """Serve compressed video"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status['status'] != 'completed':
+        return jsonify({'error': 'Processing not completed'}), 400
+    
+    output_dir = status['results']['output_directory']
+    compressed_dir = os.path.join(output_dir, 'compressed')
+    
+    if not os.path.exists(compressed_dir):
+        return jsonify({'error': 'Compressed video directory not found'}), 404
+    
+    # Find the compressed video file
+    video_files = [f for f in os.listdir(compressed_dir) if f.endswith('.mp4')]
+    
+    if not video_files:
+        return jsonify({'error': 'Compressed video file not found'}), 404
+    
+    # Use the first video file found (should only be one)
+    video_filename = video_files[0]
+    
+    return send_from_directory(compressed_dir, video_filename)
+
+if __name__ == '__main__':
+    logger.info("Starting DetectifAI Flask API server with database integration...")
+    app.run(host='0.0.0.0', port=5000, debug=True)
diff --git a/DetectifAI_db/caption_search.py b/DetectifAI_db/caption_search.py
new file mode 100644
index 0000000000000000000000000000000000000000..27d48e5b269819ce60a4e7e540ee1cf8136109a4
--- /dev/null
+++ b/DetectifAI_db/caption_search.py
@@ -0,0 +1,209 @@
+"""
+Caption Search Module for DetectifAI
+
+This module provides caption-based search functionality using FAISS index
+and MongoDB for retrieving video descriptions based on text queries.
+"""
+
+import os
+import json
+import logging
+import numpy as np
+import faiss
+from typing import List, Dict, Optional, Tuple
+from pymongo import MongoClient
+from dotenv import load_dotenv
+
+# Optional import for sentence transformers
+try:
+    from sentence_transformers import SentenceTransformer
+    SENTENCE_TRANSFORMERS_AVAILABLE = True
+except ImportError:
+    SENTENCE_TRANSFORMERS_AVAILABLE = False
+    logging.warning("sentence-transformers not available - caption search will not work")
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+# Paths for FAISS index and id map
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+FAISS_INDEX_PATH = os.path.join(BASE_DIR, "faiss_captions.index")
+FAISS_IDMAP_PATH = os.path.join(BASE_DIR, "faiss_captions_idmap.json")
+
+# MongoDB connection
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai")
+
+# Embedding model name
+EMBEDDING_MODEL = "all-mpnet-base-v2"
+EMBEDDING_DIM = 768  # Dimension for all-mpnet-base-v2
+
+
+class CaptionSearchEngine:
+    """Search engine for caption-based video search using FAISS"""
+    
+    def __init__(self):
+        """Initialize the caption search engine"""
+        self.faiss_index = None
+        self.id_map = {}  # Maps FAISS index -> description_id
+        self.embedding_model = None
+        self.mongo_client = None
+        self.db = None
+        self.collection = None
+        
+        # Initialize components
+        self._load_faiss_index()
+        self._load_embedding_model()
+        self._connect_mongodb()
+    
+    def _load_faiss_index(self):
+        """Load FAISS index and id map from disk"""
+        try:
+            if os.path.exists(FAISS_INDEX_PATH):
+                self.faiss_index = faiss.read_index(FAISS_INDEX_PATH)
+                logger.info(f"✅ Loaded FAISS index from {FAISS_INDEX_PATH}")
+                logger.info(f"   Index size: {self.faiss_index.ntotal} vectors")
+            else:
+                logger.warning(f"⚠️ FAISS index not found at {FAISS_INDEX_PATH}")
+                return
+            
+            if os.path.exists(FAISS_IDMAP_PATH):
+                with open(FAISS_IDMAP_PATH, 'r', encoding='utf-8') as f:
+                    id_map_list = json.load(f)
+                    # Convert list to dict: index -> description_id
+                    self.id_map = {i: desc_id for i, desc_id in enumerate(id_map_list)}
+                logger.info(f"✅ Loaded FAISS id map from {FAISS_IDMAP_PATH}")
+                logger.info(f"   Mapped {len(self.id_map)} indices")
+            else:
+                logger.warning(f"⚠️ FAISS id map not found at {FAISS_IDMAP_PATH}")
+                
+        except Exception as e:
+            logger.error(f"❌ Error loading FAISS index: {e}")
+            self.faiss_index = None
+    
+    def _load_embedding_model(self):
+        """Load sentence transformer model for generating query embeddings"""
+        if not SENTENCE_TRANSFORMERS_AVAILABLE:
+            logger.warning("⚠️ sentence-transformers not available - cannot generate embeddings")
+            return
+        
+        try:
+            logger.info(f"Loading embedding model: {EMBEDDING_MODEL}...")
+            self.embedding_model = SentenceTransformer(EMBEDDING_MODEL)
+            logger.info(f"✅ Loaded embedding model: {EMBEDDING_MODEL}")
+        except Exception as e:
+            logger.error(f"❌ Error loading embedding model: {e}")
+            self.embedding_model = None
+    
+    def _connect_mongodb(self):
+        """Connect to MongoDB"""
+        try:
+            self.mongo_client = MongoClient(MONGO_URI)
+            self.db = self.mongo_client.get_default_database()
+            self.collection = self.db["event_descriptions"]
+            logger.info("✅ Connected to MongoDB")
+        except Exception as e:
+            logger.error(f"❌ Error connecting to MongoDB: {e}")
+            self.mongo_client = None
+    
+    def is_ready(self) -> bool:
+        """Check if the search engine is ready to use"""
+        return (
+            self.faiss_index is not None and
+            self.embedding_model is not None and
+            self.mongo_client is not None and
+            self.faiss_index.ntotal > 0
+        )
+    
+    def search(self, query_text: str, top_k: int = 10, min_score: float = 0.0) -> List[Dict]:
+        """
+        Search for captions similar to the query text
+        
+        Args:
+            query_text: Text query to search for
+            top_k: Number of results to return
+            min_score: Minimum similarity score threshold
+            
+        Returns:
+            List of result dictionaries with caption, video reference, and similarity score
+        """
+        if not self.is_ready():
+            logger.warning("⚠️ Search engine not ready - missing components")
+            return []
+        
+        try:
+            # Generate query embedding
+            query_embedding = self.embedding_model.encode(
+                query_text,
+                normalize_embeddings=True,
+                show_progress_bar=False
+            ).astype("float32")
+            
+            # Reshape for FAISS (1, dim)
+            query_embedding = query_embedding.reshape(1, -1)
+            
+            # Search FAISS index
+            k = min(top_k, self.faiss_index.ntotal)
+            scores, indices = self.faiss_index.search(query_embedding, k)
+            
+            # Process results
+            results = []
+            for score, idx in zip(scores[0], indices[0]):
+                if idx < 0 or idx not in self.id_map:
+                    continue
+                
+                if score < min_score:
+                    continue
+                
+                description_id = self.id_map[idx]
+                
+                # Fetch document from MongoDB
+                doc = self.collection.find_one(
+                    {"description_id": description_id},
+                    {"_id": 0}
+                )
+                
+                if doc:
+                    result = {
+                        "description_id": doc.get("description_id"),
+                        "event_id": doc.get("event_id"),
+                        "caption": doc.get("caption"),
+                        "confidence": doc.get("confidence", 0.0),
+                        "similarity_score": float(score),
+                        "video_reference": doc.get("video_reference", {}),
+                        "created_at": doc.get("created_at").isoformat() if doc.get("created_at") else None
+                    }
+                    results.append(result)
+            
+            logger.info(f"✅ Found {len(results)} results for query: '{query_text[:50]}...'")
+            return results
+            
+        except Exception as e:
+            logger.error(f"❌ Error during search: {e}")
+            return []
+    
+    def get_stats(self) -> Dict:
+        """Get statistics about the search engine"""
+        return {
+            "faiss_index_loaded": self.faiss_index is not None,
+            "faiss_index_size": self.faiss_index.ntotal if self.faiss_index else 0,
+            "id_map_size": len(self.id_map),
+            "embedding_model_loaded": self.embedding_model is not None,
+            "embedding_model": EMBEDDING_MODEL if self.embedding_model else None,
+            "embedding_dim": EMBEDDING_DIM,
+            "mongodb_connected": self.mongo_client is not None,
+            "ready": self.is_ready()
+        }
+
+
+# Global instance
+_caption_search_engine = None
+
+
+def get_caption_search_engine() -> CaptionSearchEngine:
+    """Get the global caption search engine instance"""
+    global _caption_search_engine
+    if _caption_search_engine is None:
+        _caption_search_engine = CaptionSearchEngine()
+    return _caption_search_engine
+
diff --git a/DetectifAI_db/check_minio.py b/DetectifAI_db/check_minio.py
new file mode 100644
index 0000000000000000000000000000000000000000..68b253438d93565d0f0618e7ea3b5555b800eaa0
--- /dev/null
+++ b/DetectifAI_db/check_minio.py
@@ -0,0 +1,26 @@
+from minio import Minio
+from dotenv import load_dotenv
+import os
+
+# Load environment variables
+load_dotenv()
+
+# MinIO client setup
+client = Minio(
+    os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com"),
+    access_key=os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001"),
+    secret_key=os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA"),
+    secure=os.getenv("MINIO_SECURE", "true").lower() == "true",
+    region=os.getenv("MINIO_REGION", "eu-central-003")
+)
+
+# Check if bucket exists
+bucket_name = "detectifai-videos"
+found = client.bucket_exists(bucket_name)
+print(f"Bucket '{bucket_name}' exists: {found}")
+
+if found:
+    print("\nListing objects in bucket:")
+    objects = client.list_objects(bucket_name, recursive=True)
+    for obj in objects:
+        print(f"- {obj.object_name} (size: {obj.size} bytes)")
\ No newline at end of file
diff --git a/DetectifAI_db/check_video_storage.py b/DetectifAI_db/check_video_storage.py
new file mode 100644
index 0000000000000000000000000000000000000000..25d21ce4996f87441b737bc0971c1226b8573076
--- /dev/null
+++ b/DetectifAI_db/check_video_storage.py
@@ -0,0 +1,191 @@
+"""
+Utility script to validate and fix video storage
+"""
+
+import os
+import sys
+from datetime import datetime
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from database.config import DatabaseManager
+from database.models import VideoFileModel
+import logging
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+def check_video_storage():
+    """Check and validate video storage in MongoDB and MinIO"""
+    db_manager = DatabaseManager()
+    
+    # 1. Check MongoDB video records
+    logger.info("Checking MongoDB video records...")
+    video_collection = db_manager.db.video_file
+    videos = list(video_collection.find({}))
+    logger.info(f"Found {len(videos)} video records in MongoDB")
+    
+    # 2. Check MinIO storage
+    logger.info("\nChecking MinIO storage...")
+    try:
+        # Check video bucket
+        video_objects = list(db_manager.minio_client.list_objects(
+            db_manager.config.minio_video_bucket, 
+            recursive=True
+        ))
+        logger.info(f"Found {len(video_objects)} objects in video bucket")
+        
+        # Check keyframe bucket
+        keyframe_objects = list(db_manager.minio_client.list_objects(
+            db_manager.config.minio_keyframe_bucket, 
+            recursive=True
+        ))
+        logger.info(f"Found {len(keyframe_objects)} objects in keyframe bucket")
+        
+        # Map MinIO objects to video IDs
+        minio_video_ids = set()
+        minio_keyframe_video_ids = set()
+        
+        for obj in video_objects:
+            parts = obj.object_name.split('/')
+            if len(parts) > 1:
+                minio_video_ids.add(parts[1])  # original/{video_id}/video.mp4
+                
+        for obj in keyframe_objects:
+            parts = obj.object_name.split('/')
+            if len(parts) > 0:
+                minio_keyframe_video_ids.add(parts[0])  # {video_id}/keyframes/...
+        
+        # 3. Cross-reference and find inconsistencies
+        logger.info("\nCross-referencing storage...")
+        mongo_video_ids = {str(v['video_id']) for v in videos}
+        
+        # Find mismatches
+        missing_in_minio = mongo_video_ids - minio_video_ids
+        missing_keyframes = mongo_video_ids - minio_keyframe_video_ids
+        orphaned_in_minio = minio_video_ids - mongo_video_ids
+        
+        if missing_in_minio:
+            logger.warning(f"\n⚠️ Found {len(missing_in_minio)} videos missing in MinIO:")
+            for vid in missing_in_minio:
+                logger.warning(f"- {vid}")
+        
+        if missing_keyframes:
+            logger.warning(f"\n⚠️ Found {len(missing_keyframes)} videos missing keyframes:")
+            for vid in missing_keyframes:
+                logger.warning(f"- {vid}")
+        
+        if orphaned_in_minio:
+            logger.warning(f"\n⚠️ Found {len(orphaned_in_minio)} orphaned videos in MinIO:")
+            for vid in orphaned_in_minio:
+                logger.warning(f"- {vid}")
+        
+        # 4. Check MongoDB metadata completeness
+        logger.info("\nChecking metadata completeness...")
+        incomplete_metadata = []
+        for video in videos:
+            if not video.get('meta_data'):
+                incomplete_metadata.append(video['video_id'])
+                continue
+            
+            meta = video['meta_data']
+            required_fields = ['filename', 'processing_status', 'upload_date']
+            missing_fields = [f for f in required_fields if f not in meta]
+            
+            if missing_fields:
+                incomplete_metadata.append({
+                    'video_id': video['video_id'],
+                    'missing_fields': missing_fields
+                })
+        
+        if incomplete_metadata:
+            logger.warning(f"\n⚠️ Found {len(incomplete_metadata)} videos with incomplete metadata:")
+            for item in incomplete_metadata:
+                if isinstance(item, dict):
+                    logger.warning(f"- {item['video_id']} (missing: {', '.join(item['missing_fields'])})")
+                else:
+                    logger.warning(f"- {item} (missing entire meta_data object)")
+        
+        return {
+            'mongodb_videos': len(videos),
+            'minio_videos': len(video_objects),
+            'minio_keyframes': len(keyframe_objects),
+            'missing_in_minio': list(missing_in_minio),
+            'missing_keyframes': list(missing_keyframes),
+            'orphaned_in_minio': list(orphaned_in_minio),
+            'incomplete_metadata': incomplete_metadata
+        }
+        
+    except Exception as e:
+        logger.error(f"Error checking storage: {e}")
+        raise
+
+def fix_metadata():
+    """Fix incomplete metadata in MongoDB records"""
+    db_manager = DatabaseManager()
+    video_collection = db_manager.db.video_file
+    
+    logger.info("Fixing incomplete metadata...")
+    fixed_count = 0
+    
+    for video in video_collection.find({}):
+        needs_update = False
+        update_fields = {}
+        
+        # Ensure meta_data exists
+        if 'meta_data' not in video:
+            update_fields['meta_data'] = {
+                'processing_status': 'unknown',
+                'upload_date': video.get('upload_date', datetime.utcnow()),
+                'filename': f"video_{video['video_id']}.mp4"
+            }
+            needs_update = True
+        else:
+            meta = video['meta_data']
+            
+            # Check and fix required fields
+            if 'processing_status' not in meta:
+                meta['processing_status'] = 'unknown'
+                needs_update = True
+            
+            if 'upload_date' not in meta and 'upload_date' in video:
+                meta['upload_date'] = video['upload_date']
+                needs_update = True
+            
+            if 'filename' not in meta:
+                meta['filename'] = f"video_{video['video_id']}.mp4"
+                needs_update = True
+            
+            if needs_update:
+                update_fields['meta_data'] = meta
+        
+        # Apply updates if needed
+        if needs_update:
+            try:
+                video_collection.update_one(
+                    {'_id': video['_id']},
+                    {'$set': update_fields}
+                )
+                fixed_count += 1
+                logger.info(f"Fixed metadata for video {video['video_id']}")
+            except Exception as e:
+                logger.error(f"Failed to fix metadata for {video['video_id']}: {e}")
+    
+    logger.info(f"\n✅ Fixed metadata for {fixed_count} videos")
+    return fixed_count
+
+if __name__ == "__main__":
+    try:
+        # First check storage
+        results = check_video_storage()
+        
+        # If there are metadata issues, fix them
+        if results['incomplete_metadata']:
+            if input("\nFix incomplete metadata? (y/n): ").lower() == 'y':
+                fixed = fix_metadata()
+                print(f"\nFixed {fixed} video records")
+        
+        print("\nStorage check complete!")
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)
\ No newline at end of file
diff --git a/DetectifAI_db/create_admin.py b/DetectifAI_db/create_admin.py
new file mode 100644
index 0000000000000000000000000000000000000000..56113c370db123dc4f00f5d5080ec556d3daeefd
--- /dev/null
+++ b/DetectifAI_db/create_admin.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+"""
+Script to create an admin user in the DetectifAI database
+"""
+
+from pymongo import MongoClient
+from uuid import uuid4
+from datetime import datetime, timezone
+import bcrypt
+import os
+import sys
+from dotenv import load_dotenv
+
+load_dotenv()
+
+def create_admin_user():
+    """Create an admin user in the database"""
+    
+    # Get MongoDB connection
+    mongo_uri = os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai")
+    client = MongoClient(mongo_uri)
+    db = client.get_default_database()
+    users = db.users
+    
+    # Admin credentials (change these!)
+    admin_email = "admin@detectifai.com"
+    admin_password = "admin123"  # ⚠️ CHANGE THIS PASSWORD!
+    admin_username = "admin"
+    
+    # Check if admin already exists
+    existing_admin = users.find_one({"email": admin_email})
+    if existing_admin:
+        print(f"⚠️  Admin user with email '{admin_email}' already exists!")
+        update = input("Do you want to update the password? (y/n): ").lower().strip()
+        if update == 'y':
+            new_password = input("Enter new password: ").strip()
+            if not new_password:
+                print("❌ Password cannot be empty")
+                sys.exit(1)
+            
+            # Hash new password
+            password_hash = bcrypt.hashpw(new_password.encode('utf-8'), bcrypt.gensalt()).decode('utf-8')
+            
+            # Update admin user
+            users.update_one(
+                {"email": admin_email},
+                {
+                    "$set": {
+                        "password_hash": password_hash,
+                        "password": new_password,  # For Flask backend compatibility
+                        "role": "admin",
+                        "is_active": True,
+                        "updated_at": datetime.now(timezone.utc)
+                    }
+                }
+            )
+            print(f"✅ Admin password updated successfully!")
+            print(f"   Email: {admin_email}")
+            print(f"   Password: {new_password}")
+        else:
+            print("ℹ️  Keeping existing admin user")
+        client.close()
+        return
+    
+    # Create new admin user
+    print(f"Creating admin user...")
+    print(f"   Email: {admin_email}")
+    print(f"   Username: {admin_username}")
+    
+    # Hash password
+    password_hash = bcrypt.hashpw(admin_password.encode('utf-8'), bcrypt.gensalt()).decode('utf-8')
+    
+    admin_user = {
+        "user_id": str(uuid4()),
+        "username": admin_username,
+        "email": admin_email,
+        "password_hash": password_hash,
+        "password": admin_password,  # For Flask backend compatibility (plain text - TODO: remove in production)
+        "role": "admin",
+        "is_active": True,
+        "profile_data": {},
+        "created_at": datetime.now(timezone.utc),
+        "updated_at": datetime.now(timezone.utc),
+        "last_login": None
+    }
+    
+    try:
+        users.insert_one(admin_user)
+        print("\n✅ Admin user created successfully!")
+        print(f"\n📋 Login Credentials:")
+        print(f"   Email: {admin_email}")
+        print(f"   Password: {admin_password}")
+        print(f"\n⚠️  IMPORTANT: Change this password after first login!")
+        print(f"\n🌐 Access the admin panel at: http://localhost:3000/admin/signin")
+    except Exception as e:
+        print(f"❌ Error creating admin user: {e}")
+        sys.exit(1)
+    finally:
+        client.close()
+
+if __name__ == "__main__":
+    print("=" * 60)
+    print("DetectifAI - Admin User Creation Script")
+    print("=" * 60)
+    print()
+    
+    # Check if MONGO_URI is set
+    if not os.getenv("MONGO_URI"):
+        print("❌ Error: MONGO_URI environment variable not set")
+        print("Please create a .env file with your MongoDB connection string")
+        print("Example: MONGO_URI=mongodb://localhost:27017/detectifai")
+        sys.exit(1)
+    
+    create_admin_user()
+    print("\n" + "=" * 60)
+    print("✅ Script completed!")
+    print("=" * 60)
+
+
+
diff --git a/DetectifAI_db/database_seed.py b/DetectifAI_db/database_seed.py
new file mode 100644
index 0000000000000000000000000000000000000000..66b9a4d64c600969614c1fb7893ef4e2ce25c79e
--- /dev/null
+++ b/DetectifAI_db/database_seed.py
@@ -0,0 +1,212 @@
+from pymongo import MongoClient
+from uuid import uuid4
+from dotenv import load_dotenv
+from datetime import datetime, timezone
+import os
+
+load_dotenv()
+
+client = MongoClient(os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai"))
+db = client.get_default_database()
+users = db.users
+video_files = db.video_files
+event_descriptions = db.event_descriptions
+subscription_plans = db.subscription_plans
+events = db.events
+
+# Add sample user if not exists
+sample_user = {
+    "user_id": str(uuid4()),
+    "username": "testuser",
+    "email": "user@detectifai.test",
+    "password": "userpass",
+    "role": "user",
+    "created_at": datetime.now(timezone.utc),
+    "updated_at": datetime.now(timezone.utc),
+    "last_login": None
+}
+if users.count_documents({"email": "user@detectifai.test"}) == 0:
+    users.insert_one(sample_user)
+    print("Added sample user: user@detectifai.test / userpass")
+else:
+    print("Sample user already exists")
+
+# Add sample subscription plans
+sample_plans = [
+    {
+        "plan_id": str(uuid4()),
+        "plan_name": "Basic",
+        "description": "Basic surveillance features",
+        "price": 9.99,
+        "features": "basic_ai,email_support",
+        "storage_limit": 10,
+        "is_active": True
+    },
+    {
+        "plan_id": str(uuid4()),
+        "plan_name": "Pro",
+        "description": "Advanced AI features with priority support",
+        "price": 29.99,
+        "features": "advanced_ai,priority_support,face_recognition",
+        "storage_limit": 100,
+        "is_active": True
+    },
+    {
+        "plan_id": str(uuid4()),
+        "plan_name": "Enterprise",
+        "description": "Full enterprise features with 24/7 support",
+        "price": 99.99,
+        "features": "premium_ai,24_7_support,face_recognition,custom_integrations",
+        "storage_limit": 1000,
+        "is_active": True
+    }
+]
+
+for plan in sample_plans:
+    if subscription_plans.count_documents({"plan_id": plan["plan_id"]}) == 0:
+        subscription_plans.insert_one(plan)
+        print(f"Added subscription plan: {plan['plan_name']}")
+    else:
+        print(f"Subscription plan {plan['plan_name']} already exists")
+
+# Get existing video files to add sample events and descriptions
+existing_videos = list(video_files.find({}))
+
+if not existing_videos:
+    print("No video files found. Upload some videos first, then run this script.")
+else:
+    # Add sample events and descriptions to the first video
+    video = existing_videos[0]
+    video_id = video["video_id"]
+    
+    # Create sample events
+    sample_events = [
+        {
+            "event_id": str(uuid4()),
+            "video_id": video_id,
+            "event_type": "person_detection",
+            "confidence_score": 0.95,
+            "start_timestamp_ms": 0,
+            "end_timestamp_ms": 5000,
+            "bounding_boxes": {"x": 100, "y": 150, "width": 200, "height": 300},
+            "visual_embedding": [],
+            "is_verified": False,
+            "is_false_positive": False,
+            "verified_by": None,
+            "verified_at": None
+        },
+        {
+            "event_id": str(uuid4()),
+            "video_id": video_id,
+            "event_type": "object_detection",
+            "confidence_score": 0.87,
+            "start_timestamp_ms": 5200,
+            "end_timestamp_ms": 12800,
+            "bounding_boxes": {"x": 300, "y": 200, "width": 150, "height": 100},
+            "visual_embedding": [],
+            "is_verified": False,
+            "is_false_positive": False,
+            "verified_by": None,
+            "verified_at": None
+        }
+    ]
+    
+    # Insert events
+    for event in sample_events:
+        if events.count_documents({"event_id": event["event_id"]}) == 0:
+            events.insert_one(event)
+            print(f"Added event: {event['event_type']}")
+    
+    # Add sample descriptions for the events
+    sample_descriptions = [
+        {
+            "description_id": str(uuid4()),
+            "event_id": sample_events[0]["event_id"],
+            "caption": "Person walking into the room carrying a briefcase",
+            "text_embedding": [],
+            "confidence": 0.92,
+            "created_at": datetime.now(timezone.utc),
+            "updated_at": datetime.now(timezone.utc)
+        },
+        {
+            "description_id": str(uuid4()),
+            "event_id": sample_events[1]["event_id"],
+            "caption": "Individual sits down at desk and opens laptop computer",
+            "text_embedding": [],
+            "confidence": 0.88,
+            "created_at": datetime.now(timezone.utc),
+            "updated_at": datetime.now(timezone.utc)
+        }
+    ]
+    
+    # Insert descriptions
+    for desc in sample_descriptions:
+        if event_descriptions.count_documents({"description_id": desc["description_id"]}) == 0:
+            event_descriptions.insert_one(desc)
+            print(f"Added description: {desc['caption'][:50]}...")
+    
+    # If there are more videos, add different events to the second one
+    if len(existing_videos) > 1:
+        video2 = existing_videos[1]
+        video2_id = video2["video_id"]
+        
+        sample_events2 = [
+            {
+                "event_id": str(uuid4()),
+                "video_id": video2_id,
+                "event_type": "security_patrol",
+                "confidence_score": 0.93,
+                "start_timestamp_ms": 2100,
+                "end_timestamp_ms": 15400,
+                "bounding_boxes": {"x": 50, "y": 100, "width": 180, "height": 250},
+                "visual_embedding": [],
+                "is_verified": False,
+                "is_false_positive": False,
+                "verified_by": None,
+                "verified_at": None
+            }
+        ]
+        
+        for event in sample_events2:
+            if events.count_documents({"event_id": event["event_id"]}) == 0:
+                events.insert_one(event)
+                print(f"Added event: {event['event_type']}")
+        
+        sample_descriptions2 = [
+            {
+                "description_id": str(uuid4()),
+                "event_id": sample_events2[0]["event_id"],
+                "caption": "Security guard patrolling the hallway with flashlight",
+                "text_embedding": [],
+                "confidence": 0.91,
+                "created_at": datetime.now(timezone.utc),
+                "updated_at": datetime.now(timezone.utc)
+            }
+        ]
+        
+        for desc in sample_descriptions2:
+            if event_descriptions.count_documents({"description_id": desc["description_id"]}) == 0:
+                event_descriptions.insert_one(desc)
+                print(f"Added description: {desc['caption'][:50]}...")
+
+print("\n--- Database Seeding Complete ---")
+print("You can now test search functionality with terms like:")
+print("- 'briefcase' or 'laptop'")
+print("- 'security' or 'guard'") 
+print("- 'person' or 'detection'")
+print("- 'desk' or 'computer'")
+print("- 'patrol' or 'hallway'")
+
+# Show summary
+total_videos = video_files.count_documents({})
+total_events = events.count_documents({})
+total_descriptions = event_descriptions.count_documents({})
+total_users = users.count_documents({})
+total_plans = subscription_plans.count_documents({})
+
+print(f"\nDatabase Summary:")
+print(f"Total users: {total_users}")
+print(f"Total subscription plans: {total_plans}")
+print(f"Total video files: {total_videos}")
+print(f"Total events: {total_events}")
+print(f"Total event descriptions: {total_descriptions}")
diff --git a/DetectifAI_db/database_setup.py b/DetectifAI_db/database_setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdc5ce3ce6c5ae53b4173c3ede51378d3dcf29a0
--- /dev/null
+++ b/DetectifAI_db/database_setup.py
@@ -0,0 +1,375 @@
+from pymongo import MongoClient, ASCENDING
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+
+client = MongoClient(MONGO_URI)
+db = client.get_default_database()
+
+
+def create_collection_if_not_exists(name, validator=None, indexes=None):
+    """Create collection if it doesn't exist, otherwise skip"""
+    try:
+        if validator:
+            db.create_collection(name, validator=validator)
+        else:
+            db.create_collection(name)
+        print(f"Created collection: {name}")
+    except Exception as e:
+        if "already exists" in str(e):
+            print(f"Collection {name} already exists, skipping...")
+        else:
+            print(f"Error creating collection {name}: {e}")
+            return False
+
+    # Create indexes if specified
+    if indexes:
+        for index in indexes:
+            try:
+                if isinstance(index, tuple):
+                    # Index with options
+                    db[name].create_index(index[0], **index[1])
+                else:
+                    # Simple index
+                    db[name].create_index(index)
+                print(f"  Created index on {name}")
+            except Exception as e:
+                if "already exists" in str(e) or "duplicate key" in str(e):
+                    print(f"  Index on {name} already exists")
+                else:
+                    print(f"  Error creating index on {name}: {e}")
+    return True
+
+
+# === ADMIN ===
+create_collection_if_not_exists("admin", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["admin_id", "username", "email", "password"],
+        "properties": {
+            "admin_id": {"bsonType": "string"},
+            "username": {"bsonType": "string"},
+            "email": {"bsonType": "string"},
+            "password": {"bsonType": "string"},
+            "role": {"bsonType": "string"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"},
+            "last_login": {"bsonType": ["date", "null"]}
+        }
+    }
+}, indexes=[([("email", ASCENDING)], {"unique": True}), "username"])
+
+
+# === USERS ===
+create_collection_if_not_exists("users", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["user_id", "email"],
+        "properties": {
+            "user_id": {"bsonType": "string"},
+            "username": {"bsonType": "string"},
+            "email": {"bsonType": "string"},
+            "password_hash": {"bsonType": "string"},
+            "role": {"bsonType": "string"},
+            "profile_data": {"bsonType": "object"},
+            "is_active": {"bsonType": "bool"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"},
+            "last_login": {"bsonType": ["date", "null"]}
+        }
+    }
+}, indexes=[([("email", ASCENDING)], {"unique": True}), "username"])
+
+
+# === VIDEO FILES ===
+create_collection_if_not_exists("video_files", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["video_id", "user_id", "file_path"],
+        "properties": {
+            "video_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "file_path": {"bsonType": "string"},
+            "minio_object_key": {"bsonType": "string"},
+            "minio_bucket": {"bsonType": "string"},
+            "codec": {"bsonType": "string"},
+            "fps": {"bsonType": "double"},
+            "upload_date": {"bsonType": "date"},
+            "duration_secs": {"bsonType": "int"},
+            "file_size_bytes": {"bsonType": "long"},
+            "meta_data": {"bsonType": "object"}
+        }
+    }
+}, indexes=["user_id", "upload_date"])
+
+
+# === EVENTS ===
+create_collection_if_not_exists("events", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["event_id", "video_id", "start_timestamp_ms", "end_timestamp_ms"],
+        "properties": {
+            "event_id": {"bsonType": "string"},
+            "video_id": {"bsonType": "string"},
+            "start_timestamp_ms": {"bsonType": "long"},
+            "end_timestamp_ms": {"bsonType": "long"},
+            "confidence_score": {"bsonType": "double"},
+            "is_verified": {"bsonType": "bool"},
+            "is_false_positive": {"bsonType": "bool"},
+            "verified_at": {"bsonType": ["date", "null"]},
+            "verified_by": {"bsonType": ["string", "null"]},
+            "visual_embedding": {"bsonType": "array"},
+            "bounding_boxes": {"bsonType": "object"},
+            "event_type": {"bsonType": "string"}
+        }
+    }
+}, indexes=["video_id", "event_type", "is_verified"])
+
+
+# === EVENT CLIPS ===
+create_collection_if_not_exists("event_clips", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["clip_id", "event_id", "clip_path"],
+        "properties": {
+            "clip_id": {"bsonType": "string"},
+            "event_id": {"bsonType": "string"},
+            "clip_path": {"bsonType": "string"},
+            "thumbnail_path": {"bsonType": "string"},
+            "minio_object_key": {"bsonType": "string"},
+            "minio_bucket": {"bsonType": "string"},
+            "duration_ms": {"bsonType": "long"},
+            "extracted_at": {"bsonType": "date"},
+            "file_size_bytes": {"bsonType": "long"}
+        }
+    }
+}, indexes=["event_id"])
+
+
+# === DETECTED FACES ===
+create_collection_if_not_exists("detected_faces", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["face_id", "event_id", "detected_at"],
+        "properties": {
+            "face_id": {"bsonType": "string"},
+            "event_id": {"bsonType": "string"},
+            "detected_at": {"bsonType": "date"},
+            "confidence_score": {"bsonType": "double"},
+            "face_embedding": {"bsonType": "array"},
+            "minio_object_key": {"bsonType": "string"},
+            "minio_bucket": {"bsonType": "string"},
+            "face_image_path": {"bsonType": "string"},
+            "bounding_boxes": {"bsonType": "object"}
+        }
+    }
+}, indexes=["event_id", "detected_at"])
+
+
+# === FACE MATCHES ===
+create_collection_if_not_exists("face_matches", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["match_id", "face_id_1", "face_id_2", "similarity_score"],
+        "properties": {
+            "match_id": {"bsonType": "string"},
+            "face_id_1": {"bsonType": "string"},
+            "face_id_2": {"bsonType": "string"},
+            "similarity_score": {"bsonType": "double"},
+            "matched_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["face_id_1", "face_id_2", "similarity_score"])
+
+
+# === EVENT DESCRIPTIONS ===
+create_collection_if_not_exists("event_descriptions", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["description_id", "event_id", "text_embedding"],
+        "properties": {
+            "description_id": {"bsonType": "string"},
+            "event_id": {"bsonType": "string"},
+            "text_embedding": {"bsonType": "array"},
+            "caption": {"bsonType": "string"},
+            "confidence": {"bsonType": "double"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["event_id", "created_at"])
+
+
+# === EVENT CAPTIONS ===
+create_collection_if_not_exists("event_captions", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["description_id", "description"],
+        "properties": {
+            "description_id": {"bsonType": "string"},
+            "description": {"bsonType": "string"}
+        }
+    }
+}, indexes=["description_id"])
+
+
+# === QUERY ===
+create_collection_if_not_exists("query", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["query_id", "user_id", "query_text"],
+        "properties": {
+            "query_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "query_text": {"bsonType": "string"},
+            "query_embedding": {"bsonType": "array"},
+            "executed_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["user_id", "executed_at"])
+
+
+# === QUERY RESULT ===
+create_collection_if_not_exists("query_result", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["result_id", "query_id", "event_id"],
+        "properties": {
+            "result_id": {"bsonType": "string"},
+            "query_id": {"bsonType": "string"},
+            "event_id": {"bsonType": "string"},
+            "relevance_score": {"bsonType": "double"},
+            "match_details": {"bsonType": "object"},
+            "returned_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["query_id", "event_id", "relevance_score"])
+
+
+# === SUBSCRIPTION PLANS ===
+create_collection_if_not_exists("subscription_plans", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["plan_id", "plan_name", "price"],
+        "properties": {
+            "plan_id": {"bsonType": "string"},
+            "plan_name": {"bsonType": "string"},
+            "description": {"bsonType": "string"},
+            "price": {"bsonType": "decimal"},
+            "features": {"bsonType": "string"},
+            "storage_limit": {"bsonType": "int"},
+            "is_active": {"bsonType": "bool"},
+            "stripe_product_id": {"bsonType": "string"},
+            "stripe_price_ids": {"bsonType": "object"},
+            "billing_periods": {"bsonType": "array"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=[([("plan_id", ASCENDING)], {"unique": True}), "is_active", "stripe_product_id"])
+
+
+# === USER SUBSCRIPTIONS ===
+create_collection_if_not_exists("user_subscriptions", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["subscription_id", "user_id", "plan_id"],
+        "properties": {
+            "subscription_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "plan_id": {"bsonType": "string"},
+            "start_date": {"bsonType": "date"},
+            "end_date": {"bsonType": "date"},
+            "stripe_customer_id": {"bsonType": "string"},
+            "stripe_subscription_id": {"bsonType": "string"},
+            "billing_period": {"bsonType": "string"},
+            "status": {"bsonType": "string"},
+            "current_period_start": {"bsonType": "date"},
+            "current_period_end": {"bsonType": "date"},
+            "cancel_at_period_end": {"bsonType": "bool"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["user_id", "plan_id", "start_date", "stripe_customer_id", "stripe_subscription_id", "status"])
+
+
+# === SUBSCRIPTION EVENTS === (NEW - for audit trail)
+create_collection_if_not_exists("subscription_events", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["event_id", "subscription_id", "event_type"],
+        "properties": {
+            "event_id": {"bsonType": "string"},
+            "subscription_id": {"bsonType": "string"},
+            "event_type": {"bsonType": "string"},
+            "stripe_event_id": {"bsonType": "string"},
+            "event_data": {"bsonType": "object"},
+            "created_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["subscription_id", "event_type", "created_at", "stripe_event_id"])
+
+
+# === PAYMENT HISTORY === (NEW - for transaction records)
+create_collection_if_not_exists("payment_history", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["payment_id", "user_id", "amount"],
+        "properties": {
+            "payment_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "stripe_payment_intent_id": {"bsonType": "string"},
+            "amount": {"bsonType": "double"},
+            "currency": {"bsonType": "string"},
+            "status": {"bsonType": "string"},
+            "payment_method": {"bsonType": "string"},
+            "created_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["user_id", "created_at", "status", "stripe_payment_intent_id"])
+
+
+# === SUBSCRIPTION USAGE === (NEW - for analytics and limits)
+create_collection_if_not_exists("subscription_usage", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["usage_id", "user_id", "usage_type"],
+        "properties": {
+            "usage_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "usage_type": {"bsonType": "string"},
+            "usage_value": {"bsonType": "double"},
+            "usage_date": {"bsonType": "date"},
+            "created_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["user_id", "usage_type", "usage_date"])
+
+
+# === USER SESSIONS ===
+create_collection_if_not_exists("user_sessions", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["session_id", "user_id", "session_token", "expires_at"],
+        "properties": {
+            "session_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "session_token": {"bsonType": "string"},
+            "expires_at": {"bsonType": "date"},
+            "ip_address": {"bsonType": "string"},
+            "user_agent": {"bsonType": "string"},
+            "created_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=[
+    ([("session_token", ASCENDING)], {"unique": True}),
+    "user_id",
+    "expires_at"
+])
+
+
+print("\nDatabase schema setup completed successfully.")
+print("All collections are ready with validation and indexes.")
diff --git a/DetectifAI_db/env.example b/DetectifAI_db/env.example
new file mode 100644
index 0000000000000000000000000000000000000000..d3de5c73073b906f997e4166741328116f1cea1c
--- /dev/null
+++ b/DetectifAI_db/env.example
@@ -0,0 +1,19 @@
+# MongoDB Configuration
+MONGO_URI=mongodb://localhost:27017/detectifai
+
+# S3-compatible Storage (Backblaze B2)
+MINIO_ENDPOINT=s3.eu-central-003.backblazeb2.com
+MINIO_ACCESS_KEY=your-b2-key-id
+MINIO_SECRET_KEY=your-b2-application-key
+MINIO_VIDEO_BUCKET=detectifai-videos
+MINIO_KEYFRAME_BUCKET=detectifai-keyframes
+MINIO_REPORTS_BUCKET=detectifai-reports
+MINIO_SECURE=true
+MINIO_REGION=eu-central-003
+
+# JWT Configuration
+JWT_SECRET=your-super-secret-jwt-key-here
+
+# Flask Configuration
+FLASK_ENV=development
+FLASK_DEBUG=True
diff --git a/DetectifAI_db/faiss_captions.index b/DetectifAI_db/faiss_captions.index
new file mode 100644
index 0000000000000000000000000000000000000000..c56f6f0459da9de32c40c3749e1fe5c6a0ab318b
Binary files /dev/null and b/DetectifAI_db/faiss_captions.index differ
diff --git a/DetectifAI_db/faiss_captions_idmap.json b/DetectifAI_db/faiss_captions_idmap.json
new file mode 100644
index 0000000000000000000000000000000000000000..465008fe3da153a18318df721149cc0eefebde20
--- /dev/null
+++ b/DetectifAI_db/faiss_captions_idmap.json
@@ -0,0 +1,12 @@
+[
+  "desc_fe5f4141f350",
+  "desc_6683c8f65ca9",
+  "desc_93f7c560626c",
+  "desc_02ac022c7621",
+  "desc_9fc4ce829b64",
+  "desc_3b45f7543394",
+  "desc_49df9ce76beb",
+  "desc_e119f53298d0",
+  "desc_e6a2154fb826",
+  "desc_3e3ca6f4637d"
+]
\ No newline at end of file
diff --git a/DetectifAI_db/migrate_stripe_integration.py b/DetectifAI_db/migrate_stripe_integration.py
new file mode 100644
index 0000000000000000000000000000000000000000..912e1a5af6eee148625af2d57e0cb43b63e3bd04
--- /dev/null
+++ b/DetectifAI_db/migrate_stripe_integration.py
@@ -0,0 +1,209 @@
+"""
+Database Migration Script: Add Stripe Integration to Subscription Plans
+
+This script updates existing subscription_plans and prepares the database
+for Stripe payment integration.
+
+Run this script ONCE after updating the database schema.
+"""
+
+from pymongo import MongoClient
+from datetime import datetime
+import os
+from dotenv import load_dotenv
+from uuid import uuid4
+
+load_dotenv()
+
+# Connect to MongoDB
+MONGO_URI = os.getenv("MONGO_URI")
+client = MongoClient(MONGO_URI)
+db = client.get_default_database()
+
+subscription_plans = db.subscription_plans
+user_subscriptions = db.user_subscriptions
+
+print("🔄 Starting Stripe integration migration...")
+
+# ========================================
+# Step 1: Update existing subscription plans with Stripe data
+# ========================================
+
+print("\n📋 Step 1: Updating subscription plans with Stripe data...")
+
+# DetectifAI Basic Plan
+basic_plan = subscription_plans.find_one({"plan_name": "Basic"})
+if basic_plan:
+    subscription_plans.update_one(
+        {"_id": basic_plan["_id"]},
+        {
+            "$set": {
+                "stripe_product_id": "prod_TqIuL76gNG4hxu",
+                "stripe_price_ids": {
+                    "monthly": "price_1SscIsBC7V4mGo7rR4T0YZIc",
+                    "yearly": "price_1SscMQBC7V4mGo7rigJ4bFFE"
+                },
+                "billing_periods": ["monthly", "yearly"],
+                "price": 19.00,
+                "description": "Essential AI-powered security monitoring",
+                "features": "single_video,object_detection,face_recognition,7day_history,dashboard,basic_reports",
+                "updated_at": datetime.utcnow()
+            }
+        }
+    )
+    print("✅ Updated Basic plan with Stripe integration")
+else:
+    # Create Basic plan if it doesn't exist
+    basic_plan_data = {
+        "plan_id": str(uuid4()),
+        "plan_name": "Basic",
+        "description": "Essential AI-powered security monitoring",
+        "price": 19.00,
+        "features": "single_video,object_detection,face_recognition,7day_history,dashboard,basic_reports",
+        "storage_limit": 50,
+        "is_active": True,
+        "stripe_product_id": "prod_TqIuL76gNG4hxu",
+        "stripe_price_ids": {
+            "monthly": "price_1SscIsBC7V4mGo7rR4T0YZIc",
+            "yearly": "price_1SscMQBC7V4mGo7rigJ4bFFE"
+        },
+        "billing_periods": ["monthly", "yearly"],
+        "created_at": datetime.utcnow(),
+        "updated_at": datetime.utcnow()
+    }
+    subscription_plans.insert_one(basic_plan_data)
+    print("✅ Created Basic plan with Stripe integration")
+
+# DetectifAI Pro Plan
+pro_plan = subscription_plans.find_one({"plan_name": "Pro"})
+if pro_plan:
+    subscription_plans.update_one(
+        {"_id": pro_plan["_id"]},
+        {
+            "$set": {
+                "stripe_product_id": "prod_TqIyhR08zDDa2B",
+                "stripe_price_ids": {
+                    "monthly": "price_1SscMwBC7V4mGo7rmmRPTTOz",
+                    "yearly": "price_1SscNXBC7V4mGo7rdGgYAYRs"
+                },
+                "billing_periods": ["monthly", "yearly"],
+                "price": 49.00,
+                "description": "Advanced security intelligence with extended capabilities",
+                "features": "everything_basic,30day_history,behavior_analysis,person_tracking,nlp_search,image_search,custom_reports,priority_queue",
+                "updated_at": datetime.utcnow()
+            }
+        }
+    )
+    print("✅ Updated Pro plan with Stripe integration")
+else:
+    # Create Pro plan if it doesn't exist
+    pro_plan_data = {
+        "plan_id": str(uuid4()),
+        "plan_name": "Pro",
+        "description": "Advanced security intelligence with extended capabilities",
+        "price": 49.00,
+        "features": "everything_basic,30day_history,behavior_analysis,person_tracking,nlp_search,image_search,custom_reports,priority_queue",
+        "storage_limit": 200,
+        "is_active": True,
+        "stripe_product_id": "prod_TqIyhR08zDDa2B",
+        "stripe_price_ids": {
+            "monthly": "price_1SscMwBC7V4mGo7rmmRPTTOz",
+            "yearly": "price_1SscNXBC7V4mGo7rdGgYAYRs"
+        },
+        "billing_periods": ["monthly", "yearly"],
+        "created_at": datetime.utcnow(),
+        "updated_at": datetime.utcnow()
+    }
+    subscription_plans.insert_one(pro_plan_data)
+    print("✅ Created Pro plan with Stripe integration")
+
+# Remove Enterprise plan if it exists (not part of current offering)
+enterprise_plan = subscription_plans.find_one({"plan_name": "Enterprise"})
+if enterprise_plan:
+    subscription_plans.update_one(
+        {"_id": enterprise_plan["_id"]},
+        {"$set": {"is_active": False, "updated_at": datetime.utcnow()}}
+    )
+    print("✅ Deactivated Enterprise plan (not in current offering)")
+
+# ========================================
+# Step 2: Add Stripe fields to existing user subscriptions
+# ========================================
+
+print("\n📋 Step 2: Adding Stripe fields to existing user subscriptions...")
+
+existing_subscriptions = user_subscriptions.find({})
+updated_count = 0
+
+for sub in existing_subscriptions:
+    # Check if Stripe fields already exist
+    if "stripe_customer_id" not in sub:
+        user_subscriptions.update_one(
+            {"_id": sub["_id"]},
+            {
+                "$set": {
+                    "stripe_customer_id": None,
+                    "stripe_subscription_id": None,
+                    "billing_period": "monthly",
+                    "status": "active",
+                    "current_period_start": sub.get("start_date"),
+                    "current_period_end": sub.get("end_date"),
+                    "cancel_at_period_end": False,
+                    "updated_at": datetime.utcnow()
+                }
+            }
+        )
+        updated_count += 1
+
+if updated_count > 0:
+    print(f"✅ Updated {updated_count} existing subscriptions with Stripe fields")
+else:
+    print("✅ No existing subscriptions to update")
+
+# ========================================
+# Step 3: Verify collections exist
+# ========================================
+
+print("\n📋 Step 3: Verifying new collections...")
+
+collections_to_check = [
+    "subscription_events",
+    "payment_history",
+    "subscription_usage"
+]
+
+for collection_name in collections_to_check:
+    if collection_name in db.list_collection_names():
+        count = db[collection_name].count_documents({})
+        print(f"✅ Collection '{collection_name}' exists (documents: {count})")
+    else:
+        print(f"⚠️  Collection '{collection_name}' not found - run database_setup.py first")
+
+# ========================================
+# Step 4: Display summary
+# ========================================
+
+print("\n" + "="*60)
+print("📊 MIGRATION SUMMARY")
+print("="*60)
+
+all_plans = list(subscription_plans.find({"is_active": True}))
+print(f"\n✅ Active Subscription Plans: {len(all_plans)}")
+for plan in all_plans:
+    print(f"   • {plan['plan_name']}: ${plan['price']}/month")
+    print(f"     Stripe Product: {plan.get('stripe_product_id', 'NOT SET')}")
+    print(f"     Billing: {', '.join(plan.get('billing_periods', []))}")
+
+all_subs = user_subscriptions.count_documents({})
+print(f"\n✅ Total User Subscriptions: {all_subs}")
+
+print("\n" + "="*60)
+print("✅ Migration completed successfully!")
+print("="*60)
+print("\nNext steps:")
+print("1. Test Stripe integration endpoints")
+print("2. Create webhook endpoint for Stripe events")
+print("3. Test checkout flow with test cards")
+print("4. Update frontend pricing components")
+
+client.close()
diff --git a/DetectifAI_db/minio_config.py b/DetectifAI_db/minio_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fb804a55373e872bf2e0c848ed701d1a731c107
--- /dev/null
+++ b/DetectifAI_db/minio_config.py
@@ -0,0 +1,37 @@
+"""
+S3-compatible storage configuration for DetectifAI (Backblaze B2)
+"""
+
+# S3 bucket names (matching actual Backblaze B2 buckets)
+VIDEOS_BUCKET = "detectifai-videos"
+KEYFRAMES_BUCKET = "detectifai-keyframes"
+COMPRESSED_BUCKET = "detectifai-compressed"
+NLP_IMAGES_BUCKET = "nlp-images"
+REPORTS_BUCKET = "detectifai-reports"
+
+# Object prefixes/paths
+ORIGINAL_VIDEO_PREFIX = "original"
+COMPRESSED_VIDEO_PREFIX = "compressed"
+KEYFRAME_PREFIX = "keyframes"
+
+# S3-compatible storage default configuration (Backblaze B2)
+MINIO_CONFIG = {
+    "endpoint": "s3.eu-central-003.backblazeb2.com",
+    "access_key": "00367479ffb7e4e0000000001",
+    "secret_key": "K003opTvf92ijRj5dM7H1dgrlwcGTdA",
+    "secure": True,
+    "region": "eu-central-003"
+}
+
+# Function to generate MinIO paths
+def get_minio_paths(video_id: str, filename: str = None):
+    """Generate standardized MinIO paths for a video"""
+    if filename is None:
+        filename = f"{video_id}.mp4"
+        
+    return {
+        "original": f"{ORIGINAL_VIDEO_PREFIX}/{video_id}/{filename}",
+        "compressed": f"{COMPRESSED_VIDEO_PREFIX}/{video_id}/{filename}",
+        "keyframes": f"{KEYFRAME_PREFIX}/{video_id}",
+        "reports": f"reports/{video_id}"
+    }
\ No newline at end of file
diff --git a/DetectifAI_db/requirements.txt b/DetectifAI_db/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4727643b30e7b838110b68bb68385d79d2bd468
--- /dev/null
+++ b/DetectifAI_db/requirements.txt
@@ -0,0 +1,14 @@
+Flask==2.3.3
+Flask-CORS==4.0.0
+Werkzeug==3.0.0
+PyJWT==2.8.0
+pymongo>=4.6.3,<5.0
+python-multipart==0.0.6
+minio==7.1.11
+opencv-python==4.8.0.74
+python-dotenv==1.0.0
+faiss-cpu
+numpy
+Pillow
+scikit-learn
+sentence-transformers
diff --git a/DetectifAI_db/reset_minio.py b/DetectifAI_db/reset_minio.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a6a0f3fc5ebcd5b8544f22fc6db12f78679d678
--- /dev/null
+++ b/DetectifAI_db/reset_minio.py
@@ -0,0 +1,104 @@
+"""
+Reset MinIO buckets and test storage paths for DetectifAI.
+
+This script ensures that all required MinIO buckets and storage paths
+are properly configured for video processing.
+"""
+
+from minio import Minio
+from minio.error import S3Error
+import os
+from datetime import datetime
+from dotenv import load_dotenv
+import logging
+
+# Load environment variables
+load_dotenv()
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# MinIO configuration
+MINIO_CONFIG = {
+    "endpoint": os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com"),
+    "access_key": os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001"),
+    "secret_key": os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA"),
+    "secure": os.getenv("MINIO_SECURE", "true").lower() == "true",
+    "region": os.getenv("MINIO_REGION", "eu-central-003")
+}
+
+# Bucket configuration with descriptions
+BUCKETS = {
+    "detectifai-videos": {
+        "description": "Main bucket for video storage",
+        "prefixes": {
+            "original": "Original uploaded videos",
+            "compressed": "Compressed video versions"
+        }
+    },
+    "detectifai-keyframes": {
+        "description": "Storage for extracted video frames",
+        "prefixes": {
+            "keyframes": "Extracted keyframes and annotated frames"
+        }
+    }
+}
+
+def reset_minio_storage():
+    """Reset and verify MinIO storage configuration"""
+    client = Minio(**MINIO_CONFIG)
+    
+    print("Checking MinIO connection and buckets...")
+    
+    for bucket_name, config in BUCKETS.items():
+        try:
+            # Check if bucket exists
+            found = client.bucket_exists(bucket_name)
+            if not found:
+                print(f"Creating bucket: {bucket_name}")
+                client.make_bucket(bucket_name)
+            
+            # Test each prefix path
+            for prefix in config["prefixes"]:
+                test_object = f"{prefix}/test.txt"
+                test_data = f"Test data for {bucket_name}/{prefix}"
+                
+                print(f"\nTesting path: {bucket_name}/{test_object}")
+                
+                # Upload test object
+                test_bytes = bytes(test_data, 'utf-8')
+                from io import BytesIO
+                test_stream = BytesIO(test_bytes)
+                client.put_object(
+                    bucket_name,
+                    test_object,
+                    test_stream,
+                    len(test_bytes)
+                )
+                
+                # Verify upload
+                try:
+                    client.stat_object(bucket_name, test_object)
+                    print(f"✅ Test file uploaded successfully")
+                    
+                    # Clean up test file
+                    client.remove_object(bucket_name, test_object)
+                    print(f"✅ Test file removed")
+                except:
+                    print(f"❌ Could not verify test file")
+            
+            print(f"\nListing objects in {bucket_name}:")
+            objects = client.list_objects(bucket_name, recursive=True)
+            for obj in objects:
+                print(f"- {obj.object_name} (size: {obj.size} bytes)")
+                
+        except S3Error as e:
+            print(f"❌ Error with bucket {bucket_name}: {e}")
+            continue
+
+if __name__ == "__main__":
+    reset_minio_storage()
\ No newline at end of file
diff --git a/DetectifAI_db/reset_users_collection.py b/DetectifAI_db/reset_users_collection.py
new file mode 100644
index 0000000000000000000000000000000000000000..1722cf1c0484bc748d2791a59bd2f3d7a6026932
--- /dev/null
+++ b/DetectifAI_db/reset_users_collection.py
@@ -0,0 +1,29 @@
+from pymongo import MongoClient
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+
+def reset_users_collection():
+    try:
+        client = MongoClient(MONGO_URI)
+        db = client.get_default_database()
+        
+        # Drop the existing users collection
+        print("Dropping existing users collection...")
+        db.users.drop()
+        
+        # Run database_setup.py to recreate with new schema
+        print("Creating users collection with new schema...")
+        import database_setup
+        
+        print("✅ Users collection reset successfully!")
+        
+    except Exception as e:
+        print(f"❌ Error: {e}")
+    finally:
+        client.close()
+
+if __name__ == "__main__":
+    reset_users_collection()
\ No newline at end of file
diff --git a/DetectifAI_db/seed_stripe_plans.py b/DetectifAI_db/seed_stripe_plans.py
new file mode 100644
index 0000000000000000000000000000000000000000..32213ca9346ecefe39148fde23bd951fbd5c2082
--- /dev/null
+++ b/DetectifAI_db/seed_stripe_plans.py
@@ -0,0 +1,141 @@
+"""
+Seed Stripe-Integrated Subscription Plans
+
+This script populates the subscription_plans collection with accurate
+DetectifAI Basic and Pro plans connected to Stripe.
+"""
+
+from pymongo import MongoClient
+from datetime import datetime
+import os
+from dotenv import load_dotenv
+from uuid import uuid4
+
+load_dotenv()
+
+MONGO_URI = os.getenv("MONGO_URI")
+client = MongoClient(MONGO_URI)
+db = client.get_default_database()
+subscription_plans = db.subscription_plans
+
+print("🌱 Seeding Stripe-integrated subscription plans...")
+
+# DetectifAI Basic Plan
+basic_plan = {
+    "plan_id": "detectifai_basic",
+    "plan_name": "DetectifAI Basic",
+    "description": "Essential AI-powered security monitoring for single installations",
+    "price": 19.00,
+    "features": [
+        "single_video",
+        "object_detection",
+        "face_recognition",
+        "event_history_7day",
+        "dashboard",
+        "basic_reports",
+        "video_clips"
+    ],
+    "limits": {
+        "video_processing": 10,  # Videos per month
+        "history_retention_days": 7,
+        "nlp_searches": 0,  # Not available in Basic
+        "image_searches": 0,  # Not available in Basic
+        "concurrent_streams": 1
+    },
+    "is_active": True,
+    "stripe_product_id": "prod_TqIuL76gNG4hxu",
+    "stripe_price_ids": {
+        "monthly": "price_1SscIsBC7V4mGo7rR4T0YZIc",
+        "yearly": "price_1SscMQBC7V4mGo7rigJ4bFFE"
+    },
+    "billing_periods": ["monthly", "yearly"],
+    "created_at": datetime.utcnow(),
+    "updated_at": datetime.utcnow()
+}
+
+# DetectifAI Pro Plan
+pro_plan = {
+    "plan_id": "detectifai_pro",
+    "plan_name": "DetectifAI Pro",
+    "description": "Advanced security intelligence with extended capabilities",
+    "price": 49.00,
+    "features": [
+        "single_video",
+        "object_detection",
+        "face_recognition",
+        "event_history_30day",
+        "dashboard",
+        "basic_reports",
+        "video_clips",
+        "behavior_analysis",
+        "person_tracking",
+        "nlp_search",
+        "image_search",
+        "custom_reports",
+        "priority_queue"
+    ],
+    "limits": {
+        "video_processing": 999999,  # Unlimited videos per month for Pro
+        "history_retention_days": 30,
+        "nlp_searches": 200,  # NLP searches per month
+        "image_searches": 100,  # Image searches per month
+        "concurrent_streams": 1
+    },
+    "is_active": True,
+    "stripe_product_id": "prod_TqIyhR08zDDa2B",
+    "stripe_price_ids": {
+        "monthly": "price_1SscMwBC7V4mGo7rmmRPTTOz",
+        "yearly": "price_1SscNXBC7V4mGo7rdGgYAYRs"
+    },
+    "billing_periods": ["monthly", "yearly"],
+    "created_at": datetime.utcnow(),
+    "updated_at": datetime.utcnow()
+}
+
+# Upsert plans
+for plan in [basic_plan, pro_plan]:
+    result = subscription_plans.update_one(
+        {"plan_id": plan["plan_id"]},
+        {"$set": plan},
+        upsert=True
+    )
+    if result.upserted_id:
+        print(f"✅ Created plan: {plan['plan_name']}")
+    else:
+        print(f"✅ Updated plan: {plan['plan_name']}")
+
+# Display summary
+print("\n" + "="*60)
+print("📊 SUBSCRIPTION PLANS")
+print("="*60)
+
+all_plans = list(subscription_plans.find({"is_active": True}))
+for plan in all_plans:
+    print(f"\n{plan['plan_name']} - ${plan['price']}/month")
+    print(f"  Description: {plan['description']}")
+    
+    # Only print if exists (for compatibility with old plans)
+    if 'stripe_product_id' in plan:
+        print(f"  Stripe Product: {plan['stripe_product_id']}")
+    
+    if 'stripe_price_ids' in plan:
+        monthly_price = plan['stripe_price_ids'].get('monthly', 'N/A')
+        yearly_price = plan['stripe_price_ids'].get('yearly', 'N/A')
+        print(f"  Monthly Price ID: {monthly_price}")
+        print(f"  Yearly Price ID: {yearly_price}")
+    
+    if 'features' in plan:
+        features = plan['features']
+        if isinstance(features, list):
+            print(f"  Features: {', '.join(features)}")
+        else:
+            print(f"  Features: {features}")
+    
+    if 'limits' in plan:
+        print(f"  Limits:")
+        for limit_name, limit_value in plan['limits'].items():
+            print(f"    - {limit_name}: {limit_value}")
+
+print("\n✅ Subscription plans seeded successfully!")
+
+client.close()
diff --git a/DetectifAI_db/setup_database.py b/DetectifAI_db/setup_database.py
new file mode 100644
index 0000000000000000000000000000000000000000..740ef134e821e6f1106b500aa0412bf1f3d5d085
--- /dev/null
+++ b/DetectifAI_db/setup_database.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+"""
+Database setup script for DetectifAI backend
+This script initializes the MongoDB database with the required collections and indexes.
+"""
+
+import os
+import sys
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# Check if MONGO_URI is set
+if not os.getenv("MONGO_URI"):
+    print("❌ Error: MONGO_URI environment variable not set")
+    print("Please create a .env file with your MongoDB connection string")
+    print("Example: MONGO_URI=mongodb://localhost:27017/detectifai")
+    sys.exit(1)
+
+try:
+    # Import and run database setup
+    from database_setup import *
+    print("\n✅ Database setup completed successfully!")
+    
+    # Ask if user wants to seed the database
+    seed_choice = input("\nWould you like to seed the database with sample data? (y/n): ").lower().strip()
+    
+    if seed_choice in ['y', 'yes']:
+        print("\n🌱 Seeding database with sample data...")
+        from database_seed import *
+        print("\n✅ Database seeding completed!")
+    else:
+        print("\n⏭️  Skipping database seeding")
+    
+    print("\n🎉 Database initialization complete!")
+    print("\nNext steps:")
+    print("1. Start the integrated Flask app: python app_integrated.py")
+    print("2. Or start the original app: python app.py")
+    print("3. Test the API endpoints at http://localhost:5000")
+    
+except Exception as e:
+    print(f"❌ Error during database setup: {e}")
+    sys.exit(1)
diff --git a/DetectifAI_db/setup_minio.py b/DetectifAI_db/setup_minio.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3504eaf091380d98efbaf351a5d450419366c45
--- /dev/null
+++ b/DetectifAI_db/setup_minio.py
@@ -0,0 +1,91 @@
+"""
+S3-compatible Storage Setup and Test Script for DetectifAI (Backblaze B2)
+"""
+from minio import Minio
+from dotenv import load_dotenv
+import os
+import logging
+
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Load environment variables
+load_dotenv()
+
+def setup_minio():
+    """Setup S3-compatible storage (Backblaze B2)"""
+    try:
+        endpoint = os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com')
+        access_key = os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001')
+        secret_key = os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA')
+        secure = os.getenv('MINIO_SECURE', 'true').lower() == 'true'
+        region = os.getenv('MINIO_REGION', 'eu-central-003')
+
+        # S3 client setup
+        client = Minio(
+            endpoint,
+            access_key=access_key,
+            secret_key=secret_key,
+            secure=secure,
+            region=region or None
+        )
+
+        # Define required buckets
+        buckets = [
+            "detectifai-videos",       # Original and compressed videos
+            "detectifai-keyframes",    # Extracted keyframes
+            "detectifai-reports"       # Generated reports (HTML/PDF)
+        ]
+
+        # Verify buckets exist (don't create — buckets managed in B2 dashboard)
+        for bucket in buckets:
+            found = client.bucket_exists(bucket)
+            if found:
+                logger.info(f"✅ Bucket exists: {bucket}")
+            else:
+                logger.warning(f"⚠️ Bucket NOT found: {bucket} — create it in Backblaze B2 dashboard")
+
+        # Test upload to each bucket
+        test_data = b"DetectifAI Test Data"
+        for bucket in buckets:
+            try:
+                test_object = f"test_{bucket}.txt"
+                client.put_object(
+                    bucket,
+                    test_object,
+                    bytes(test_data),
+                    len(test_data)
+                )
+                logger.info(f"✅ Test upload successful to {bucket}")
+
+                # Clean up test file
+                client.remove_object(bucket, test_object)
+
+            except Exception as bucket_error:
+                logger.error(f"❌ Failed to upload test file to {bucket}: {str(bucket_error)}")
+
+        # List objects in each bucket
+        logger.info("\nCurrent bucket contents:")
+        for bucket in buckets:
+            logger.info(f"\nBucket: {bucket}")
+            try:
+                objects = client.list_objects(bucket, recursive=True)
+                for obj in objects:
+                    logger.info(f"- {obj.object_name} (size: {obj.size} bytes)")
+            except Exception as list_error:
+                logger.error(f"❌ Failed to list objects in {bucket}: {str(list_error)}")
+
+        return True, "MinIO setup completed successfully"
+
+    except Exception as e:
+        error_message = f"MinIO setup failed: {str(e)}"
+        logger.error(f"❌ {error_message}")
+        return False, error_message
+
+if __name__ == "__main__":
+    success, message = setup_minio()
+    if success:
+        logger.info("✅ MinIO setup completed successfully!")
+    else:
+        logger.error(f"❌ MinIO setup failed: {message}")
\ No newline at end of file
diff --git a/DetectifAI_db/setup_nlp_bucket.py b/DetectifAI_db/setup_nlp_bucket.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3728105cbe85b7bf8c81bf0a271a2b13b69f08f
--- /dev/null
+++ b/DetectifAI_db/setup_nlp_bucket.py
@@ -0,0 +1,61 @@
+"""
+Setup script to create the nlp-images bucket in MinIO
+"""
+
+import os
+from dotenv import load_dotenv
+from minio import Minio
+from minio.error import S3Error
+import logging
+
+load_dotenv()
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA")
+MINIO_SECURE = os.getenv("MINIO_SECURE", "true").lower() == "true"
+MINIO_REGION = os.getenv("MINIO_REGION", "eu-central-003")
+NLP_IMAGES_BUCKET = "nlp-images"
+
+def setup_nlp_bucket():
+    """Create the nlp-images bucket if it doesn't exist"""
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=MINIO_SECURE,
+            region=MINIO_REGION
+        )
+        
+        if client.bucket_exists(NLP_IMAGES_BUCKET):
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' already exists")
+            return True
+        else:
+            logger.info(f"Creating MinIO bucket '{NLP_IMAGES_BUCKET}'...")
+            client.make_bucket(NLP_IMAGES_BUCKET)
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' created successfully")
+            return True
+    except S3Error as e:
+        if e.code == "BucketAlreadyOwnedByYou" or e.code == "BucketAlreadyExists":
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' already exists")
+            return True
+        else:
+            logger.error(f"❌ Error creating bucket: {e}")
+            return False
+    except Exception as e:
+        logger.error(f"❌ Error connecting to MinIO: {e}")
+        return False
+
+if __name__ == "__main__":
+    logger.info("Setting up nlp-images bucket...")
+    success = setup_nlp_bucket()
+    if success:
+        logger.info("✅ Setup complete!")
+    else:
+        logger.error("❌ Setup failed!")
+        exit(1)
+
diff --git a/DetectifAI_db/upload_caption_images.py b/DetectifAI_db/upload_caption_images.py
new file mode 100644
index 0000000000000000000000000000000000000000..6034995a24851e7ae0a611deab33d1646408343b
--- /dev/null
+++ b/DetectifAI_db/upload_caption_images.py
@@ -0,0 +1,264 @@
+"""
+Upload Caption Images to MinIO
+
+This script uploads the image files referenced in the captions to the MinIO nlp-images bucket.
+The images should be in a local directory (e.g., 'caption_images' folder).
+
+Usage:
+    python upload_caption_images.py [--image-dir <directory>]
+"""
+
+import os
+import sys
+from pathlib import Path
+from dotenv import load_dotenv
+from minio import Minio
+from minio.error import S3Error
+import logging
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Load environment variables
+load_dotenv()
+
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai")
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA")
+MINIO_SECURE = os.getenv("MINIO_SECURE", "true").lower() == "true"
+MINIO_REGION = os.getenv("MINIO_REGION", "eu-central-003")
+NLP_IMAGES_BUCKET = "nlp-images"
+
+# Expected image files from upload_captions.py
+EXPECTED_IMAGES = [
+    "img1.webp",
+    "img2.jpg",
+    "img3.png",
+    "img4.png",
+    "img5.jpg",
+    "img6.webp",
+    "img7.webp",
+    "img8.webp",
+    "img9.jpg",
+    "img10.png"
+]
+
+
+def setup_minio_client():
+    """Initialize MinIO client"""
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=MINIO_SECURE,
+            region=MINIO_REGION
+        )
+        return client
+    except Exception as e:
+        logger.error(f"❌ Error connecting to MinIO: {e}")
+        return None
+
+
+def ensure_bucket_exists(client, bucket_name):
+    """Ensure the bucket exists, create if it doesn't"""
+    try:
+        if not client.bucket_exists(bucket_name):
+            logger.info(f"Creating bucket: {bucket_name}")
+            client.make_bucket(bucket_name)
+            logger.info(f"✅ Created bucket: {bucket_name}")
+        else:
+            logger.info(f"✅ Bucket '{bucket_name}' already exists")
+        return True
+    except S3Error as e:
+        if e.code == "BucketAlreadyOwnedByYou" or e.code == "BucketAlreadyExists":
+            logger.info(f"✅ Bucket '{bucket_name}' already exists")
+            return True
+        logger.error(f"❌ Error creating bucket: {e}")
+        return False
+    except Exception as e:
+        logger.error(f"❌ Unexpected error: {e}")
+        return False
+
+
+def upload_image(client, bucket_name, image_path, object_name):
+    """Upload a single image file to MinIO"""
+    try:
+        if not os.path.exists(image_path):
+            logger.warning(f"⚠️ Image file not found: {image_path}")
+            return False
+        
+        file_size = os.path.getsize(image_path)
+        
+        # Determine content type based on extension
+        ext = image_path.lower().split('.')[-1]
+        content_type_map = {
+            'jpg': 'image/jpeg',
+            'jpeg': 'image/jpeg',
+            'png': 'image/png',
+            'webp': 'image/webp',
+            'gif': 'image/gif'
+        }
+        content_type = content_type_map.get(ext, 'application/octet-stream')
+        
+        with open(image_path, 'rb') as file_data:
+            client.put_object(
+                bucket_name,
+                object_name,
+                file_data,
+                length=file_size,
+                content_type=content_type
+            )
+        
+        logger.info(f"✅ Uploaded: {object_name} ({file_size} bytes)")
+        return True
+    except S3Error as e:
+        logger.error(f"❌ S3Error uploading {object_name}: {e}")
+        return False
+    except Exception as e:
+        logger.error(f"❌ Error uploading {object_name}: {e}")
+        return False
+
+
+def find_image_directory():
+    """Try to find the directory containing caption images"""
+    # Common locations to check
+    possible_dirs = [
+        Path(__file__).parent / "caption_images",
+        Path(__file__).parent.parent / "caption_images",
+        Path(__file__).parent / "images",
+        Path(__file__).parent.parent / "images",
+        Path(__file__).parent / "DetectifAI_db" / "caption_images",
+    ]
+    
+    for dir_path in possible_dirs:
+        if dir_path.exists() and dir_path.is_dir():
+            # Check if it contains any of the expected images
+            files = [f.name for f in dir_path.iterdir() if f.is_file()]
+            if any(img in files for img in EXPECTED_IMAGES):
+                return dir_path
+    
+    return None
+
+
+def upload_all_images(image_dir=None):
+    """Upload all caption images to MinIO"""
+    logger.info("🚀 Starting Caption Image Upload Process")
+    logger.info("=" * 80)
+    
+    # Initialize MinIO client
+    client = setup_minio_client()
+    if not client:
+        logger.error("❌ Failed to initialize MinIO client")
+        return False
+    
+    # Ensure bucket exists
+    if not ensure_bucket_exists(client, NLP_IMAGES_BUCKET):
+        logger.error("❌ Failed to ensure bucket exists")
+        return False
+    
+    # Find image directory
+    if image_dir is None:
+        image_dir = find_image_directory()
+    
+    if image_dir is None:
+        logger.error("❌ Could not find image directory")
+        logger.info("💡 Please provide the image directory path:")
+        logger.info("   python upload_caption_images.py --image-dir <path>")
+        logger.info("")
+        logger.info("Expected image files:")
+        for img in EXPECTED_IMAGES:
+            logger.info(f"   - {img}")
+        return False
+    
+    image_dir = Path(image_dir)
+    if not image_dir.exists():
+        logger.error(f"❌ Image directory does not exist: {image_dir}")
+        return False
+    
+    logger.info(f"📁 Using image directory: {image_dir}")
+    logger.info("")
+    
+    # Upload each image
+    uploaded_count = 0
+    failed_count = 0
+    missing_count = 0
+    
+    for image_name in EXPECTED_IMAGES:
+        image_path = image_dir / image_name
+        
+        if not image_path.exists():
+            logger.warning(f"⚠️ Image not found: {image_name}")
+            missing_count += 1
+            continue
+        
+        if upload_image(client, NLP_IMAGES_BUCKET, str(image_path), image_name):
+            uploaded_count += 1
+        else:
+            failed_count += 1
+    
+    # Summary
+    logger.info("")
+    logger.info("=" * 80)
+    logger.info("📊 Upload Summary:")
+    logger.info(f"   ✅ Successfully uploaded: {uploaded_count}")
+    logger.info(f"   ❌ Failed: {failed_count}")
+    logger.info(f"   ⚠️ Missing: {missing_count}")
+    logger.info(f"   📦 Total expected: {len(EXPECTED_IMAGES)}")
+    logger.info("=" * 80)
+    
+    if uploaded_count > 0:
+        logger.info("✅ Image upload process completed!")
+        return True
+    else:
+        logger.error("❌ No images were uploaded")
+        return False
+
+
+def list_bucket_contents(client, bucket_name):
+    """List all objects in the bucket"""
+    try:
+        logger.info(f"\n📦 Contents of '{bucket_name}' bucket:")
+        objects = client.list_objects(bucket_name, recursive=True)
+        count = 0
+        for obj in objects:
+            logger.info(f"   - {obj.object_name} ({obj.size} bytes)")
+            count += 1
+        if count == 0:
+            logger.info("   (bucket is empty)")
+        return count
+    except Exception as e:
+        logger.error(f"❌ Error listing bucket contents: {e}")
+        return 0
+
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="Upload caption images to MinIO")
+    parser.add_argument(
+        "--image-dir",
+        type=str,
+        help="Directory containing the caption images"
+    )
+    parser.add_argument(
+        "--list",
+        action="store_true",
+        help="List current contents of nlp-images bucket"
+    )
+    
+    args = parser.parse_args()
+    
+    if args.list:
+        client = setup_minio_client()
+        if client:
+            list_bucket_contents(client, NLP_IMAGES_BUCKET)
+    else:
+        success = upload_all_images(args.image_dir)
+        sys.exit(0 if success else 1)
+
diff --git a/DetectifAI_db/upload_captions.py b/DetectifAI_db/upload_captions.py
new file mode 100644
index 0000000000000000000000000000000000000000..55936ced4ae3d511aa3ede8352c8c3a7142cc5c9
--- /dev/null
+++ b/DetectifAI_db/upload_captions.py
@@ -0,0 +1,349 @@
+"""
+Upload Captions to MongoDB
+
+This script uploads 10 hardcoded captions linked to videos stored in the
+MinIO 'nlp-images' bucket. The captions are inserted into the MongoDB
+'event_descriptions' collection.
+
+Usage:
+    python upload_captions.py
+"""
+
+import os
+import uuid
+from datetime import datetime
+from dotenv import load_dotenv
+from pymongo import MongoClient
+from minio import Minio
+import logging
+import numpy as np
+import json
+
+# Optional imports for embeddings and FAISS
+try:
+    from sentence_transformers import SentenceTransformer
+    import faiss
+    SENTER_AVAILABLE = True
+except Exception:
+    SENTER_AVAILABLE = False
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Load environment variables
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai")
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA")
+MINIO_SECURE = os.getenv("MINIO_SECURE", "true").lower() == "true"
+MINIO_REGION = os.getenv("MINIO_REGION", "eu-central-003")
+
+# MinIO bucket for NLP images/videos
+NLP_IMAGES_BUCKET = "nlp-images"
+
+# Hardcoded captions with video references
+HARDCODED_CAPTIONS = [
+    {
+        "video_filename": "img1.webp",
+        "caption": "Forty story building reported to be on fire with smoke visible from several floors",
+        "confidence": 0.95
+    },
+    {
+        "video_filename": "img2.jpg",
+        "caption": "Smoke seen to be coming from a building next to tower by the road",
+        "confidence": 0.87
+    },
+    {
+        "video_filename": "img3.png",
+        "caption": "Large flames visible on a local high-rise building with fire department on the scene",
+        "confidence": 0.92
+    },
+    {
+        "video_filename": "img4.png",
+        "caption": "Wide parking of local school building with many parked cars",
+        "confidence": 0.92
+    },
+    {
+        "video_filename": "img5.jpg",
+        "caption": "Smoke coming from skyscraper fire brigade on scene trying to extinguish the flames",
+        "confidence": 0.89
+    },
+    {
+        "video_filename": "img6.webp",
+        "caption": "dog sitting on grass",
+        "confidence": 0.91
+    },
+    {
+        "video_filename": "img7.webp",
+        "caption": "dog sitting infront of tree trunk in park",
+        "confidence": 0.88
+    },
+    {
+        "video_filename": "img8.webp",
+        "caption": "dog out on a hike with owner",
+        "confidence": 0.84
+    },
+    {
+        "video_filename": "img9.jpg",
+        "caption": "dog jumping over obstacle",
+        "confidence": 0.96
+    },
+    {
+        "video_filename": "img10.png",
+        "caption": "puppy sleeping while hugging stuffed animal",
+        "confidence": 0.79
+    }
+]
+
+# Paths for FAISS index and id map
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+FAISS_INDEX_PATH = os.path.join(BASE_DIR, "faiss_captions.index")
+FAISS_IDMAP_PATH = os.path.join(BASE_DIR, "faiss_captions_idmap.json")
+
+def verify_minio_bucket():
+    """Verify that the nlp-images bucket exists in MinIO"""
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=MINIO_SECURE,
+            region=MINIO_REGION
+        )
+        
+        if client.bucket_exists(NLP_IMAGES_BUCKET):
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' exists")
+            return True
+        else:
+            logger.warning(f"⚠️ MinIO bucket '{NLP_IMAGES_BUCKET}' does not exist")
+            logger.info(f"Creating bucket '{NLP_IMAGES_BUCKET}'...")
+            client.make_bucket(NLP_IMAGES_BUCKET)
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' created")
+            return True
+    except Exception as e:
+        logger.error(f"❌ Error connecting to MinIO: {e}")
+        return False
+
+
+def list_objects_in_bucket():
+    """List all objects in the nlp-images bucket"""
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=MINIO_SECURE,
+            region=MINIO_REGION
+        )
+        
+        objects = client.list_objects(NLP_IMAGES_BUCKET)
+        object_list = [obj.object_name for obj in objects]
+        
+        if object_list:
+            logger.info(f"📁 Objects in '{NLP_IMAGES_BUCKET}' bucket:")
+            for obj in object_list:
+                logger.info(f"   - {obj}")
+            return object_list
+        else:
+            logger.warning(f"⚠️ No objects found in '{NLP_IMAGES_BUCKET}' bucket")
+            return []
+    except Exception as e:
+        logger.error(f"❌ Error listing objects: {e}")
+        return []
+
+
+def upload_captions_to_mongodb():
+    """Upload captions to MongoDB event_descriptions collection"""
+    try:
+        # Connect to MongoDB
+        client = MongoClient(MONGO_URI)
+        db = client.get_default_database()
+        collection = db["event_descriptions"]
+        
+        logger.info(f"📊 Connected to MongoDB database")
+        logger.info(f"📝 Uploading {len(HARDCODED_CAPTIONS)} captions to 'event_descriptions' collection...")
+        
+        inserted_count = 0
+        inserted_documents = []
+
+        # Prepare embedding model and lists for FAISS
+        embeddings = []
+        id_map = []  # maps faiss idx -> description_id
+
+        if not SENTER_AVAILABLE:
+            logger.warning("⚠️ sentence-transformers or faiss not available; captions will be stored without embeddings")
+        else:
+            # Load model once
+            try:
+                embed_model = SentenceTransformer("all-mpnet-base-v2")
+                embed_dim = 768
+                logger.info("✅ Loaded SentenceTransformer 'all-mpnet-base-v2' for embeddings")
+            except Exception as e:
+                logger.error(f"❌ Failed to load embedding model: {e}")
+                embed_model = None
+        
+        for i, caption_data in enumerate(HARDCODED_CAPTIONS, 1):
+            # Generate unique IDs
+            description_id = f"desc_{uuid.uuid4().hex[:12]}"
+            event_id = f"event_{uuid.uuid4().hex[:12]}"
+            
+            # Compute embedding if available
+            text_emb_list = []
+            if SENTER_AVAILABLE and embed_model is not None:
+                try:
+                    emb = embed_model.encode(caption_data["caption"], normalize_embeddings=True).astype("float32")
+                    text_emb_list = emb.tolist()
+                    embeddings.append(emb)
+                    id_map.append(description_id)
+                except Exception as e:
+                    logger.warning(f"⚠️ Failed to compute embedding for caption {i}: {e}")
+
+            # Create caption document
+            caption_doc = {
+                "description_id": description_id,
+                "event_id": event_id,
+                "caption": caption_data["caption"],
+                "confidence": caption_data["confidence"],
+                "text_embedding": text_emb_list,
+                "video_reference": {
+                    "bucket": NLP_IMAGES_BUCKET,
+                    "object_name": caption_data["video_filename"],
+                    "minio_path": f"{NLP_IMAGES_BUCKET}/{caption_data['video_filename']}"
+                },
+                "created_at": datetime.utcnow(),
+                "updated_at": datetime.utcnow()
+            }
+            
+            # Insert into MongoDB
+            result = collection.insert_one(caption_doc)
+            inserted_count += 1
+            inserted_documents.append({
+                "index": i,
+                "description_id": description_id,
+                "event_id": event_id,
+                "video": caption_data["video_filename"],
+                "confidence": caption_data["confidence"]
+            })
+            
+            logger.info(f"✅ [{i}/10] Inserted caption: {description_id}")
+
+        logger.info(f"\n🎉 Successfully uploaded {inserted_count} captions to MongoDB")
+        logger.info("\n📋 Inserted Captions Summary:")
+        logger.info("=" * 80)
+
+        for doc in inserted_documents:
+            logger.info(
+                f"[{doc['index']:2d}] ID: {doc['description_id']} | "
+                f"Event: {doc['event_id']} | "
+                f"Video: {doc['video']} | "
+                f"Confidence: {doc['confidence']:.2f}"
+            )
+
+        logger.info("=" * 80)
+
+        # Display summary statistics
+        total_captions = collection.count_documents({})
+        logger.info(f"\n📊 Total captions in collection: {total_captions}")
+
+        # Build and persist FAISS index if embeddings were computed
+        if SENTER_AVAILABLE and embeddings:
+            try:
+                emb_matrix = np.stack(embeddings, axis=0).astype("float32")
+                dim = emb_matrix.shape[1]
+                index = faiss.IndexFlatIP(dim)
+                # Add embeddings
+                index.add(emb_matrix)
+
+                # Write index to disk
+                faiss.write_index(index, FAISS_INDEX_PATH)
+
+                # Save id map (index -> description_id)
+                with open(FAISS_IDMAP_PATH, "w", encoding="utf-8") as f:
+                    json.dump(id_map, f, indent=2)
+
+                logger.info(f"✅ FAISS index saved to: {FAISS_INDEX_PATH}")
+                logger.info(f"✅ FAISS id map saved to: {FAISS_IDMAP_PATH}")
+            except Exception as e:
+                logger.error(f"❌ Failed to build/save FAISS index: {e}")
+
+        return True
+        
+    except Exception as e:
+        logger.error(f"❌ Error uploading captions to MongoDB: {e}")
+        return False
+
+
+def verify_uploaded_captions():
+    """Verify that captions were successfully uploaded"""
+    try:
+        client = MongoClient(MONGO_URI)
+        db = client.get_default_database()
+        collection = db["event_descriptions"]
+        
+        # Find recently uploaded captions
+        captions = list(collection.find(
+            {"video_reference": {"$exists": True}},
+            {"_id": 0, "description_id": 1, "caption": 1, "confidence": 1, "video_reference": 1}
+        ).limit(10))
+        
+        if captions:
+            logger.info(f"\n✅ Verification: Found {len(captions)} captions with video references")
+            logger.info("\n📝 Sample Captions:")
+            logger.info("=" * 80)
+            for cap in captions[:3]:
+                logger.info(f"ID: {cap['description_id']}")
+                logger.info(f"Caption: {cap['caption']}")
+                logger.info(f"Confidence: {cap['confidence']:.2f}")
+                logger.info(f"Video: {cap['video_reference']['object_name']}")
+                logger.info("-" * 80)
+            return True
+        else:
+            logger.warning("⚠️ No captions found with video references")
+            return False
+            
+    except Exception as e:
+        logger.error(f"❌ Error verifying captions: {e}")
+        return False
+
+
+def main():
+    """Main execution function"""
+    logger.info("🚀 Starting Caption Upload Process")
+    logger.info("=" * 80)
+    
+    # Step 1: Verify MinIO bucket
+    logger.info("\n[Step 1/4] Verifying MinIO bucket...")
+    if not verify_minio_bucket():
+        logger.error("❌ Failed to verify MinIO bucket. Exiting.")
+        return False
+    
+    # Step 2: List objects in bucket
+    logger.info("\n[Step 2/4] Listing objects in MinIO bucket...")
+    objects = list_objects_in_bucket()
+    
+    # Step 3: Upload captions to MongoDB
+    logger.info("\n[Step 3/4] Uploading captions to MongoDB...")
+    if not upload_captions_to_mongodb():
+        logger.error("❌ Failed to upload captions. Exiting.")
+        return False
+    
+    # Step 4: Verify upload
+    logger.info("\n[Step 4/4] Verifying uploaded captions...")
+    if not verify_uploaded_captions():
+        logger.warning("⚠️ Verification encountered issues")
+    
+    logger.info("\n" + "=" * 80)
+    logger.info("🎉 Caption Upload Process Completed Successfully!")
+    logger.info("=" * 80)
+    
+    return True
+
+
+if __name__ == "__main__":
+    success = main()
+    exit(0 if success else 1)
diff --git a/DetectifAI_db/vector_index.py b/DetectifAI_db/vector_index.py
new file mode 100644
index 0000000000000000000000000000000000000000..69e2febfacb4692b17f3cd3abd9daa826d920733
--- /dev/null
+++ b/DetectifAI_db/vector_index.py
@@ -0,0 +1,348 @@
+import faiss
+import numpy as np
+from pymongo import MongoClient
+import os
+from dotenv import load_dotenv
+import pickle
+from typing import List, Dict, Tuple, Optional
+import logging
+
+load_dotenv()
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class FAISSIndexManager:
+    """Manages FAISS indices for text and visual embeddings"""
+    
+    def __init__(self, mongo_uri: str, db_name: str = None):
+        self.mongo_client = MongoClient(mongo_uri)
+        self.db = self.mongo_client.get_default_database() if not db_name else self.mongo_client[db_name]
+        
+        # Collection references
+        self.event_descriptions = self.db.event_description
+        self.events = self.db.event
+        
+        # FAISS indices
+        self.text_index = None
+        self.visual_index = None
+        
+        # Index metadata
+        self.text_index_metadata = {}  # Maps FAISS ID to MongoDB document ID
+        self.visual_index_metadata = {}  # Maps FAISS ID to MongoDB document ID
+        
+        # Embedding dimensions (adjust based on your embedding model)
+        self.text_embedding_dim = 384  # Common for sentence-transformers
+        self.visual_embedding_dim = 512  # Common for visual embeddings
+        
+        # Index file paths
+        self.text_index_path = "faiss_text_index.bin"
+        self.visual_index_path = "faiss_visual_index.bin"
+        self.text_metadata_path = "faiss_text_metadata.pkl"
+        self.visual_metadata_path = "faiss_visual_metadata.pkl"
+        
+        self._initialize_indices()
+    
+    def _initialize_indices(self):
+        """Initialize or load existing FAISS indices"""
+        try:
+            # Try to load existing indices
+            if os.path.exists(self.text_index_path) and os.path.exists(self.text_metadata_path):
+                self._load_text_index()
+                logger.info("Loaded existing text index")
+            else:
+                self._create_text_index()
+                logger.info("Created new text index")
+            
+            if os.path.exists(self.visual_index_path) and os.path.exists(self.visual_metadata_path):
+                self._load_visual_index()
+                logger.info("Loaded existing visual index")
+            else:
+                self._create_visual_index()
+                logger.info("Created new visual index")
+                
+        except Exception as e:
+            logger.error(f"Error initializing indices: {e}")
+            # Fallback to creating new indices
+            self._create_text_index()
+            self._create_visual_index()
+    
+    def _create_text_index(self):
+        """Create a new FAISS index for text embeddings"""
+        self.text_index = faiss.IndexFlatIP(self.text_embedding_dim)  # Inner product for cosine similarity
+        self.text_index_metadata = {}
+        self._save_text_index()
+    
+    def _create_visual_index(self):
+        """Create a new FAISS index for visual embeddings"""
+        self.visual_index = faiss.IndexFlatIP(self.visual_embedding_dim)  # Inner product for cosine similarity
+        self.visual_index_metadata = {}
+        self._save_visual_index()
+    
+    def _load_text_index(self):
+        """Load text index from disk"""
+        self.text_index = faiss.read_index(self.text_index_path)
+        with open(self.text_metadata_path, 'rb') as f:
+            self.text_index_metadata = pickle.load(f)
+    
+    def _load_visual_index(self):
+        """Load visual index from disk"""
+        self.visual_index = faiss.read_index(self.visual_index_path)
+        with open(self.visual_metadata_path, 'rb') as f:
+            self.visual_index_metadata = pickle.load(f)
+    
+    def _save_text_index(self):
+        """Save text index to disk"""
+        if self.text_index is not None:
+            faiss.write_index(self.text_index, self.text_index_path)
+            with open(self.text_metadata_path, 'wb') as f:
+                pickle.dump(self.text_index_metadata, f)
+    
+    def _save_visual_index(self):
+        """Save visual index to disk"""
+        if self.visual_index is not None:
+            faiss.write_index(self.visual_index, self.visual_index_path)
+            with open(self.visual_metadata_path, 'wb') as f:
+                pickle.dump(self.visual_index_metadata, f)
+    
+    def rebuild_text_index(self):
+        """Rebuild text index from MongoDB data"""
+        logger.info("Rebuilding text index from MongoDB...")
+        
+        # Create new index
+        self._create_text_index()
+        
+        # Fetch all event descriptions with embeddings
+        cursor = self.event_descriptions.find(
+            {"text_embedding": {"$exists": True, "$ne": []}},
+            {"_id": 0, "description_id": 1, "text_embedding": 1}
+        )
+        
+        embeddings = []
+        metadata = {}
+        
+        for doc in cursor:
+            embedding = np.array(doc["text_embedding"], dtype=np.float32)
+            if len(embedding) == self.text_embedding_dim:
+                faiss_id = len(embeddings)
+                embeddings.append(embedding)
+                metadata[faiss_id] = doc["description_id"]
+        
+        if embeddings:
+            embeddings_array = np.vstack(embeddings)
+            self.text_index.add(embeddings_array)
+            self.text_index_metadata = metadata
+            self._save_text_index()
+            logger.info(f"Rebuilt text index with {len(embeddings)} embeddings")
+        else:
+            logger.warning("No text embeddings found in MongoDB")
+    
+    def rebuild_visual_index(self):
+        """Rebuild visual index from MongoDB data"""
+        logger.info("Rebuilding visual index from MongoDB...")
+        
+        # Create new index
+        self._create_visual_index()
+        
+        # Fetch all events with visual embeddings
+        cursor = self.events.find(
+            {"visual_embedding": {"$exists": True, "$ne": []}},
+            {"_id": 0, "event_id": 1, "visual_embedding": 1}
+        )
+        
+        embeddings = []
+        metadata = {}
+        
+        for doc in cursor:
+            embedding = np.array(doc["visual_embedding"], dtype=np.float32)
+            if len(embedding) == self.visual_embedding_dim:
+                faiss_id = len(embeddings)
+                embeddings.append(embedding)
+                metadata[faiss_id] = doc["event_id"]
+        
+        if embeddings:
+            embeddings_array = np.vstack(embeddings)
+            self.visual_index.add(embeddings_array)
+            self.visual_index_metadata = metadata
+            self._save_visual_index()
+            logger.info(f"Rebuilt visual index with {len(embeddings)} embeddings")
+        else:
+            logger.warning("No visual embeddings found in MongoDB")
+    
+    def add_text_embedding(self, description_id: str, embedding: List[float]) -> bool:
+        """Add a text embedding to the index"""
+        try:
+            embedding_array = np.array(embedding, dtype=np.float32).reshape(1, -1)
+            
+            if embedding_array.shape[1] != self.text_embedding_dim:
+                logger.error(f"Text embedding dimension mismatch: expected {self.text_embedding_dim}, got {embedding_array.shape[1]}")
+                return False
+            
+            faiss_id = self.text_index.ntotal
+            self.text_index.add(embedding_array)
+            self.text_index_metadata[faiss_id] = description_id
+            self._save_text_index()
+            
+            logger.info(f"Added text embedding for description_id: {description_id}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error adding text embedding: {e}")
+            return False
+    
+    def add_visual_embedding(self, event_id: str, embedding: List[float]) -> bool:
+        """Add a visual embedding to the index"""
+        try:
+            embedding_array = np.array(embedding, dtype=np.float32).reshape(1, -1)
+            
+            if embedding_array.shape[1] != self.visual_embedding_dim:
+                logger.error(f"Visual embedding dimension mismatch: expected {self.visual_embedding_dim}, got {embedding_array.shape[1]}")
+                return False
+            
+            faiss_id = self.visual_index.ntotal
+            self.visual_index.add(embedding_array)
+            self.visual_index_metadata[faiss_id] = event_id
+            self._save_visual_index()
+            
+            logger.info(f"Added visual embedding for event_id: {event_id}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error adding visual embedding: {e}")
+            return False
+    
+    def search_text_embeddings(self, query_embedding: List[float], k: int = 10) -> List[Dict]:
+        """Search for similar text embeddings"""
+        try:
+            if self.text_index.ntotal == 0:
+                return []
+            
+            query_array = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
+            
+            if query_array.shape[1] != self.text_embedding_dim:
+                logger.error(f"Query embedding dimension mismatch: expected {self.text_embedding_dim}, got {query_array.shape[1]}")
+                return []
+            
+            # Search FAISS
+            scores, indices = self.text_index.search(query_array, min(k, self.text_index.ntotal))
+            
+            # Fetch corresponding documents from MongoDB
+            results = []
+            for score, idx in zip(scores[0], indices[0]):
+                if idx in self.text_index_metadata:
+                    description_id = self.text_index_metadata[idx]
+                    doc = self.event_descriptions.find_one(
+                        {"description_id": description_id},
+                        {"_id": 0}
+                    )
+                    if doc:
+                        doc["similarity_score"] = float(score)
+                        results.append(doc)
+            
+            return results
+            
+        except Exception as e:
+            logger.error(f"Error searching text embeddings: {e}")
+            return []
+    
+    def search_visual_embeddings(self, query_embedding: List[float], k: int = 10) -> List[Dict]:
+        """Search for similar visual embeddings"""
+        try:
+            if self.visual_index.ntotal == 0:
+                return []
+            
+            query_array = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
+            
+            if query_array.shape[1] != self.visual_embedding_dim:
+                logger.error(f"Query embedding dimension mismatch: expected {self.visual_embedding_dim}, got {query_array.shape[1]}")
+                return []
+            
+            # Search FAISS
+            scores, indices = self.visual_index.search(query_array, min(k, self.visual_index.ntotal))
+            
+            # Fetch corresponding documents from MongoDB
+            results = []
+            for score, idx in zip(scores[0], indices[0]):
+                if idx in self.visual_index_metadata:
+                    event_id = self.visual_index_metadata[idx]
+                    doc = self.events.find_one(
+                        {"event_id": event_id},
+                        {"_id": 0}
+                    )
+                    if doc:
+                        doc["similarity_score"] = float(score)
+                        results.append(doc)
+            
+            return results
+            
+        except Exception as e:
+            logger.error(f"Error searching visual embeddings: {e}")
+            return []
+    
+    def get_index_stats(self) -> Dict:
+        """Get statistics about the indices"""
+        return {
+            "text_index_size": self.text_index.ntotal if self.text_index else 0,
+            "visual_index_size": self.visual_index.ntotal if self.visual_index else 0,
+            "text_embedding_dim": self.text_embedding_dim,
+            "visual_embedding_dim": self.visual_embedding_dim
+        }
+    
+    def close(self):
+        """Close the index manager and save indices"""
+        self._save_text_index()
+        self._save_visual_index()
+        self.mongo_client.close()
+
+# Global instance
+faiss_manager = None
+
+def get_faiss_manager() -> FAISSIndexManager:
+    """Get the global FAISS manager instance"""
+    global faiss_manager
+    if faiss_manager is None:
+        mongo_uri = os.getenv("MONGO_URI")
+        faiss_manager = FAISSIndexManager(mongo_uri)
+    return faiss_manager
+
+def generate_text_embedding(text: str) -> List[float]:
+    """
+    Generate text embeddings using SentenceTransformer.
+    Uses all-mpnet-base-v2 for compatibility with NLP search (query_retreival.py).
+    Model is lazy-loaded and cached on first call.
+    """
+    global _text_embedding_model
+    
+    if '_text_embedding_model' not in globals() or _text_embedding_model is None:
+        try:
+            from sentence_transformers import SentenceTransformer
+            _text_embedding_model = SentenceTransformer('all-mpnet-base-v2')
+            logger.info("✅ Loaded SentenceTransformer (all-mpnet-base-v2) for text embeddings")
+        except Exception as e:
+            logger.error(f"Failed to load SentenceTransformer: {e}")
+            # Fallback to deterministic random for graceful degradation
+            np.random.seed(hash(text) % 2**32)
+            return np.random.randn(768).astype(np.float32).tolist()
+    
+    try:
+        embedding = _text_embedding_model.encode(text, normalize_embeddings=True)
+        return embedding.astype(np.float32).tolist()
+    except Exception as e:
+        logger.error(f"Failed to generate embedding for text: {e}")
+        np.random.seed(hash(text) % 2**32)
+        return np.random.randn(768).astype(np.float32).tolist()
+
+# Global model cache
+_text_embedding_model = None
+
+def generate_visual_embedding(image_data: bytes = None) -> List[float]:
+    """
+    Placeholder function to generate visual embeddings.
+    Replace this with your actual visual embedding model.
+    """
+    # For now, return a random embedding of the correct dimension
+    # In production, use a proper visual embedding model
+    
+    np.random.seed(42)  # Fixed seed for demo
+    return np.random.randn(512).astype(np.float32).tolist()
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..0d8cf22ec33360f869561132ef86808d67676118
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,92 @@
+# ============================================================
+# DetectifAI Backend — Hugging Face Spaces (Docker SDK, CPU)
+# ============================================================
+FROM python:3.11-slim
+
+# ---- Non-interactive, UTF-8 ----
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PORT=7860
+
+WORKDIR /app
+
+# ---- System deps (OpenCV, WeasyPrint, ffmpeg) ----
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev \
+    libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 \
+    libffi-dev shared-mime-info \
+    ffmpeg \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+
+# ---- Install PyTorch CPU-only first (saves ~1 GB vs CUDA) ----
+RUN pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+
+# ---- Python deps (torch excluded — installed above as CPU-only) ----
+COPY requirements-docker.txt .
+RUN pip install --no-cache-dir -r requirements-docker.txt
+
+# ---- Copy application code ----
+# Core application files
+COPY app.py config.py main_pipeline.py database_video_service.py \
+     object_detection.py behavior_analysis_integrator.py \
+     video_captioning_integrator.py event_aggregation.py \
+     video_segmentation.py highlight_reel.py video_compression.py \
+     json_reports.py detectifai_events.py facial_recognition.py \
+     stripe_service.py subscription_middleware.py subscription_routes.py \
+     alert_routes.py real_time_alerts.py event_clip_generator.py \
+     extract_upload_keyframes.py live_stream_processor.py \
+     start_detectifai.py ./
+
+# Sub-packages
+COPY core/ core/
+COPY database/ database/
+COPY report_generation/ report_generation/
+COPY video_captioning/ video_captioning/
+COPY behavior_analysis/ behavior_analysis/
+COPY nlp_search/ nlp_search/
+COPY DetectifAI_db/ DetectifAI_db/
+
+# Small model files (<50 MB each) — ship in image
+COPY models/fire_YOLO11.pt models/fire_YOLO11.pt
+COPY models/weapon_YOLO11.pt models/weapon_YOLO11.pt
+COPY models/merged_fire_knife_gun.pt models/merged_fire_knife_gun.pt
+COPY "models/best (2).pt" "models/best (2).pt"
+COPY models/classifier_svm.pkl models/classifier_svm.pkl
+COPY models/label_encoder.pkl models/label_encoder.pkl
+COPY models/metadata.json models/metadata.json
+
+# Copy the top-level model/ directory (FAISS/SVM face index)
+COPY model/ /app/model/
+
+# ---- Pre-create writable directories ----
+RUN mkdir -p /app/uploads /app/video_processing_outputs /app/logs \
+    /app/temp_faces /app/report_generation/models \
+    && chmod -R 777 /app/uploads /app/video_processing_outputs /app/logs /app/temp_faces
+
+# ---- Download large models at build time (cached in Docker layer) ----
+# fight_detection.pt & accident_detection.pt (~127 MB each)
+# Qwen2.5-3B GGUF (~2 GB)
+# This runs once during build; layer is cached on HF Spaces.
+RUN python -c "\
+from huggingface_hub import hf_hub_download; \
+print('Downloading fight_detection.pt...'); \
+hf_hub_download('blacksinisterx/detectifai-models', 'fight_detection.pt', local_dir='/app/behavior_analysis', local_dir_use_symlinks=False); \
+print('Downloading accident_detection.pt...'); \
+hf_hub_download('blacksinisterx/detectifai-models', 'accident_detection.pt', local_dir='/app/behavior_analysis', local_dir_use_symlinks=False); \
+print('Done with behavior models.'); \
+" || echo "WARNING: Could not download behavior models — will retry at startup"
+
+RUN python -c "\
+from huggingface_hub import hf_hub_download; \
+print('Downloading Qwen2.5-3B GGUF (~2 GB)...'); \
+hf_hub_download('Qwen/Qwen2.5-3B-Instruct-GGUF', 'qwen2.5-3b-instruct-q4_k_m.gguf', local_dir='/app/report_generation/models', local_dir_use_symlinks=False); \
+print('Done with LLM model.'); \
+" || echo "WARNING: Could not download LLM model — report generation will download on first use"
+
+EXPOSE 7860
+
+# ---- Start Flask ----
+CMD ["python", "app.py"]
diff --git a/README.md b/README.md
index fb9778a10e1e52a339bd61851c1a7379c6c61451..2650b7dc227962edff09abaf134463158ddd82d5 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,31 @@
 ---
-title: Detectifai Backend
-emoji: 📚
-colorFrom: yellow
-colorTo: indigo
+title: DetectifAI Backend
+emoji: "\U0001F50D"
+colorFrom: blue
+colorTo: red
 sdk: docker
-pinned: false
+app_port: 7860
 ---
 
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+# DetectifAI Backend API
+
+AI-powered CCTV surveillance system backend. Runs Flask + PyTorch + YOLO on CPU.
+
+## Features
+- Video upload & processing (object detection, action recognition)
+- Fire / weapon / fight / accident / wall-climbing detection
+- Video captioning with BLIP
+- Facial recognition with FaceNet
+- Forensic report generation with local LLM (Qwen2.5-3B)
+- Stripe subscription management
+
+## Environment Variables (set in Space Settings → Secrets)
+- `MONGO_URI` — MongoDB Atlas connection string
+- `MINIO_ENDPOINT` — Cloud object storage endpoint (Cloudflare R2 recommended)
+- `MINIO_ACCESS_KEY` — Storage access key
+- `MINIO_SECRET_KEY` — Storage secret key
+- `MINIO_SECURE` — `true` for HTTPS
+- `JWT_SECRET` — JWT signing secret
+- `STRIPE_SECRET_KEY` — Stripe secret key
+- `FRONTEND_URL` — Vercel frontend URL (for CORS)
+- `CORS_ORIGINS` — Comma-separated allowed origins
diff --git a/alert_routes.py b/alert_routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f48ed6bd9057e45fa30c5d363630927e708ffea
--- /dev/null
+++ b/alert_routes.py
@@ -0,0 +1,361 @@
+"""
+Alert API Routes for DetectifAI
+
+Flask Blueprint providing:
+- SSE (Server-Sent Events) endpoint for real-time alert streaming
+- REST endpoints for alert confirmation/dismissal
+- Alert history and statistics
+- Alert snapshot image serving
+"""
+
+import json
+import time
+import logging
+import queue
+from datetime import datetime
+
+from flask import Blueprint, request, jsonify, Response, stream_with_context
+
+from real_time_alerts import get_alert_engine
+
+logger = logging.getLogger(__name__)
+
+alert_bp = Blueprint('alerts', __name__, url_prefix='/api/alerts')
+
+
+# ========================================
+# SSE Stream Endpoint
+# ========================================
+
+@alert_bp.route('/stream', methods=['GET'])
+def alert_stream():
+    """
+    SSE (Server-Sent Events) endpoint for real-time alert streaming.
+    
+    Frontend connects to this endpoint and receives push notifications
+    whenever a new alert is generated by the live stream pipeline.
+    
+    Response format (SSE):
+        event: alert
+        data: {"alert_id": "...", "severity": "critical", ...}
+    
+        event: alert_update  
+        data: {"alert_id": "...", "status": "confirmed", ...}
+    
+        event: heartbeat
+        data: {"time": 1234567890}
+    """
+    engine = get_alert_engine()
+    subscriber_queue = engine.subscribe()
+    
+    def event_stream():
+        try:
+            # Send initial connection event
+            yield f"event: connected\ndata: {json.dumps({'message': 'Connected to alert stream', 'timestamp': time.time()})}\n\n"
+            
+            # Send any active pending alerts immediately
+            active = engine.get_active_alerts()
+            if active:
+                yield f"event: active_alerts\ndata: {json.dumps(active)}\n\n"
+            
+            heartbeat_interval = 15  # seconds
+            last_heartbeat = time.time()
+            
+            while True:
+                try:
+                    # Wait for alert with timeout (for heartbeat)
+                    alert_data = subscriber_queue.get(timeout=heartbeat_interval)
+                    
+                    if alert_data is None:
+                        # Poison pill — disconnect
+                        break
+                    
+                    # Determine event type
+                    event_type = alert_data.pop("type", "alert") if isinstance(alert_data, dict) and "type" in alert_data else "alert"
+                    
+                    yield f"event: {event_type}\ndata: {json.dumps(alert_data)}\n\n"
+                    
+                except queue.Empty:
+                    # Send heartbeat to keep connection alive
+                    now = time.time()
+                    if now - last_heartbeat >= heartbeat_interval:
+                        stats = engine.get_stats()
+                        yield f"event: heartbeat\ndata: {json.dumps({'time': now, 'pending': stats.get('active_pending_count', 0)})}\n\n"
+                        last_heartbeat = now
+                
+        except GeneratorExit:
+            logger.info("SSE client disconnected")
+        except Exception as e:
+            logger.error(f"SSE stream error: {e}")
+        finally:
+            engine.unsubscribe(subscriber_queue)
+    
+    return Response(
+        stream_with_context(event_stream()),
+        mimetype='text/event-stream',
+        headers={
+            'Cache-Control': 'no-cache',
+            'X-Accel-Buffering': 'no',
+            'Connection': 'keep-alive',
+            'Access-Control-Allow-Origin': '*',
+        }
+    )
+
+
+# ========================================
+# Alert Actions
+# ========================================
+
+@alert_bp.route('/confirm/<alert_id>', methods=['POST'])
+def confirm_alert(alert_id):
+    """
+    Confirm an alert as a real threat.
+    
+    Body (JSON):
+        user_id: str (optional)
+        note: str (optional)
+    """
+    try:
+        data = request.json or {}
+        user_id = data.get('user_id', 'anonymous')
+        note = data.get('note', '')
+        
+        engine = get_alert_engine()
+        result = engine.confirm_alert(alert_id, user_id=user_id, note=note)
+        
+        if result:
+            return jsonify({
+                'success': True,
+                'message': f'Alert {alert_id} confirmed as real threat',
+                'alert': result
+            })
+        else:
+            return jsonify({
+                'success': False,
+                'error': f'Alert {alert_id} not found'
+            }), 404
+    
+    except Exception as e:
+        logger.error(f"Error confirming alert: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+
+@alert_bp.route('/dismiss/<alert_id>', methods=['POST'])
+def dismiss_alert(alert_id):
+    """
+    Dismiss an alert as a false positive.
+    
+    Body (JSON):
+        user_id: str (optional)
+        note: str (optional)
+    """
+    try:
+        data = request.json or {}
+        user_id = data.get('user_id', 'anonymous')
+        note = data.get('note', '')
+        
+        engine = get_alert_engine()
+        result = engine.dismiss_alert(alert_id, user_id=user_id, note=note)
+        
+        if result:
+            return jsonify({
+                'success': True,
+                'message': f'Alert {alert_id} dismissed as false positive',
+                'alert': result
+            })
+        else:
+            return jsonify({
+                'success': False,
+                'error': f'Alert {alert_id} not found'
+            }), 404
+    
+    except Exception as e:
+        logger.error(f"Error dismissing alert: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+
+# ========================================
+# Alert Queries
+# ========================================
+
+@alert_bp.route('/active', methods=['GET'])
+def get_active_alerts():
+    """Get all active (pending) alerts"""
+    try:
+        camera_id = request.args.get('camera_id')
+        
+        engine = get_alert_engine()
+        alerts = engine.get_active_alerts(camera_id=camera_id)
+        
+        return jsonify({
+            'success': True,
+            'count': len(alerts),
+            'alerts': alerts
+        })
+    
+    except Exception as e:
+        logger.error(f"Error getting active alerts: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+
+@alert_bp.route('/history', methods=['GET'])
+def get_alert_history():
+    """
+    Get alert history with optional filters.
+    
+    Query params:
+        limit: int (default 50)
+        camera_id: str (optional)
+        severity: str (optional) - critical, high, medium, low
+        status: str (optional) - pending, confirmed, dismissed
+    """
+    try:
+        limit = int(request.args.get('limit', 50))
+        camera_id = request.args.get('camera_id')
+        severity = request.args.get('severity')
+        status = request.args.get('status')
+        
+        engine = get_alert_engine()
+        
+        # Try to get from DB for persistence across restarts
+        try:
+            query = {}
+            if camera_id:
+                query["camera_id"] = camera_id
+            if severity:
+                query["severity"] = severity
+            if status:
+                query["status"] = status
+            
+            db_alerts = list(
+                engine.alerts_collection.find(query)
+                .sort("timestamp", -1)
+                .limit(limit)
+            )
+            
+            # Convert ObjectId to string
+            for alert in db_alerts:
+                alert["_id"] = str(alert["_id"])
+            
+            return jsonify({
+                'success': True,
+                'count': len(db_alerts),
+                'alerts': db_alerts
+            })
+        except Exception:
+            # Fallback to in-memory
+            alerts = engine.get_alert_history(
+                limit=limit, camera_id=camera_id, 
+                severity=severity, status=status
+            )
+            return jsonify({
+                'success': True,
+                'count': len(alerts),
+                'alerts': alerts
+            })
+    
+    except Exception as e:
+        logger.error(f"Error getting alert history: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+
+@alert_bp.route('/<alert_id>', methods=['GET'])
+def get_alert(alert_id):
+    """Get a single alert by ID"""
+    try:
+        engine = get_alert_engine()
+        alert = engine.get_alert_by_id(alert_id)
+        
+        if alert:
+            return jsonify({'success': True, 'alert': alert})
+        else:
+            return jsonify({'success': False, 'error': 'Alert not found'}), 404
+    
+    except Exception as e:
+        logger.error(f"Error getting alert: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+
+@alert_bp.route('/stats', methods=['GET'])
+def get_alert_stats():
+    """Get alert statistics"""
+    try:
+        engine = get_alert_engine()
+        stats = engine.get_stats()
+        
+        return jsonify({'success': True, 'stats': stats})
+    
+    except Exception as e:
+        logger.error(f"Error getting alert stats: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+
+@alert_bp.route('/snapshot/<alert_id>', methods=['GET'])
+def get_alert_snapshot(alert_id):
+    """Get the frame snapshot for an alert (proxied from MinIO)"""
+    try:
+        engine = get_alert_engine()
+        alert = engine.get_alert_by_id(alert_id)
+        
+        if not alert:
+            return jsonify({'success': False, 'error': 'Alert not found'}), 404
+        
+        snapshot_path = alert.get('frame_snapshot_path')
+        if not snapshot_path:
+            return jsonify({'success': False, 'error': 'No snapshot available'}), 404
+        
+        # Generate fresh presigned URL
+        url = engine._get_snapshot_url(snapshot_path)
+        if url:
+            return jsonify({'success': True, 'url': url})
+        else:
+            return jsonify({'success': False, 'error': 'Failed to generate snapshot URL'}), 500
+    
+    except Exception as e:
+        logger.error(f"Error getting snapshot: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+
+# ========================================
+# Test Endpoint (for development)
+# ========================================
+
+@alert_bp.route('/test', methods=['POST'])
+def test_alert():
+    """
+    Send a test alert for development/testing.
+    
+    Body (JSON):
+        detection_class: str (e.g., 'fire', 'gun', 'fighting')
+        confidence: float (0.0-1.0)
+        camera_id: str (optional, default 'webcam_01')
+    """
+    try:
+        data = request.json or {}
+        detection_class = data.get('detection_class', 'fire')
+        confidence = float(data.get('confidence', 0.85))
+        camera_id = data.get('camera_id', 'webcam_01')
+        
+        engine = get_alert_engine()
+        alert = engine.process_detection(
+            camera_id=camera_id,
+            detection_class=detection_class,
+            confidence=confidence,
+            timestamp=time.time(),
+        )
+        
+        if alert:
+            return jsonify({
+                'success': True,
+                'message': f'Test alert created: {alert.display_name}',
+                'alert': alert.to_sse_payload()
+            })
+        else:
+            return jsonify({
+                'success': False,
+                'message': 'Alert was suppressed (cooldown or low confidence)'
+            })
+    
+    except Exception as e:
+        logger.error(f"Error creating test alert: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bf0d58529c5ae6ccc54c650a8eac1f60e98a87e
--- /dev/null
+++ b/app.py
@@ -0,0 +1,3731 @@
+"""
+DetectifAI Flask Backend - AI-Powered CCTV Surveillance System
+
+Enhanced Flask API for:
+- Video upload and processing with DetectifAI security focus
+- Real-time processing status and results
+- Object detection with fire/weapon recognition
+- Security event analysis and threat assessment
+- Frontend integration for surveillance dashboard
+- Automated forensic report generation
+"""
+
+from flask import Flask, request, jsonify, send_file, send_from_directory, Response, redirect
+from flask_cors import CORS
+from werkzeug.utils import secure_filename
+import os
+import threading
+import json
+from datetime import datetime, timedelta
+import logging
+import uuid
+import time
+import urllib.parse
+from typing import List, Dict, Any
+
+# Import DetectifAI components
+from main_pipeline import CompleteVideoProcessingPipeline
+from config import get_security_focused_config, VideoProcessingConfig
+
+# Import Report Generation components
+try:
+    from report_generation import ReportGenerator, ReportConfig
+    REPORT_GENERATION_AVAILABLE = True
+except ImportError as e:
+    logging.warning(f"Report generation not available: {e}")
+    REPORT_GENERATION_AVAILABLE = False
+    ReportGenerator = None
+    ReportConfig = None
+
+# Import database-integrated service
+from database_video_service import DatabaseIntegratedVideoService
+
+# Try to import DetectifAI-specific components
+try:
+    from detectifai_events import DetectifAIEventType, ThreatLevel
+    DETECTIFAI_EVENTS_AVAILABLE = True
+except ImportError:
+    DETECTIFAI_EVENTS_AVAILABLE = False
+    logging.warning("DetectifAI events module not available - using basic functionality")
+
+# Try to import caption search (optional - may not be available)
+try:
+    import sys
+    import os
+    # Add DetectifAI_db to path for imports
+    detectifai_db_path = os.path.join(os.path.dirname(__file__), 'DetectifAI_db')
+    if detectifai_db_path not in sys.path:
+        sys.path.insert(0, detectifai_db_path)
+    from caption_search import get_caption_search_engine
+    CAPTION_SEARCH_AVAILABLE = True
+except ImportError as e:
+    logging.warning(f"Caption search not available: {e}")
+    CAPTION_SEARCH_AVAILABLE = False
+    get_caption_search_engine = None
+
+# Import subscription middleware for feature gating
+try:
+    from subscription_middleware import (
+        SubscriptionMiddleware,
+        require_subscription,
+        require_feature,
+        check_usage_limit
+    )
+    SUBSCRIPTION_MIDDLEWARE_AVAILABLE = True
+except ImportError as e:
+    logging.warning(f"Subscription middleware not available: {e}")
+    SUBSCRIPTION_MIDDLEWARE_AVAILABLE = False
+    # Create dummy decorators that do nothing
+    def require_subscription(plan=None):
+        def decorator(f):
+            return f
+        return decorator
+    def require_feature(feature):
+        def decorator(f):
+            return f
+        return decorator
+    def check_usage_limit(limit_type, auto_increment=True):
+        def decorator(f):
+            return f
+        return decorator
+
+# Initialize Flask app
+app = Flask(__name__)
+
+# CORS — allow Vercel frontend + localhost dev
+_allowed_origins = os.environ.get(
+    'CORS_ORIGINS',
+    'http://localhost:3000,https://detectif-ai-fyp.vercel.app'
+).split(',')
+CORS(app, resources={r"/api/*": {"origins": _allowed_origins}})
+
+# Configure logging — file handler only when logs/ is writable
+_log_handlers = [logging.StreamHandler()]
+try:
+    os.makedirs('logs', exist_ok=True)
+    _log_handlers.append(logging.FileHandler('logs/detectifai_api.log'))
+except OSError:
+    pass  # read-only filesystem (cloud)
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=_log_handlers,
+)
+logger = logging.getLogger(__name__)
+
+# DEMO_MODE — when True, all subscription checks return Pro access
+DEMO_MODE = os.environ.get('DEMO_MODE', 'false').lower() in ('true', '1', 'yes')
+if DEMO_MODE:
+    logger.info("🎯 DEMO_MODE enabled — all users get Pro features, Stripe bypassed")
+
+# Configuration - use absolute paths to handle different working directories
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))  # Project root
+UPLOAD_FOLDER = os.path.join(BASE_DIR, 'uploads')
+OUTPUT_FOLDER = os.path.join(BASE_DIR, 'video_processing_outputs')
+ALLOWED_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'wmv', 'flv'}
+MAX_CONTENT_LENGTH = 500 * 1024 * 1024  # 500MB max file size
+
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
+
+# Create necessary directories (ignore errors on read-only FS)
+for _dir in [UPLOAD_FOLDER, OUTPUT_FOLDER, 'logs']:
+    try:
+        os.makedirs(_dir, exist_ok=True)
+    except OSError:
+        pass
+
+# Store processing status in memory (use Redis in production)
+processing_status = {}
+
+# Initialize database-integrated video service
+DATABASE_ENABLED = False
+try:
+    db_video_service = DatabaseIntegratedVideoService(get_security_focused_config())
+    DATABASE_ENABLED = True
+    # Initialize DETECTIFAI_DB for subscription middleware
+    app.config['DETECTIFAI_DB'] = db_video_service.db_manager.db
+    logger.info("✅ Database-integrated video service initialized")
+except Exception as e:
+    logger.error(f"❌ Failed to initialize database service: {e}")
+
+# ---- Health check (HF Spaces / Render ping this) ----
+@app.route('/')
+@app.route('/api/health')
+def health_check():
+    return jsonify({'status': 'ok', 'service': 'DetectifAI Backend', 'database': DATABASE_ENABLED}), 200
+
+def allowed_file(filename):
+    """Check if file extension is allowed"""
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+
+def extract_detectifai_results(pipeline_results):
+    """Extract DetectifAI-specific results from pipeline output"""
+    try:
+        detectifai_results = {
+            # Basic video metrics
+            'video_info': {
+                'total_keyframes': pipeline_results['outputs'].get('total_keyframes', 0),
+                'processing_time': pipeline_results['processing_stats'].get('total_processing_time', 0),
+                'output_directory': pipeline_results['outputs'].get('output_directory', '')
+            },
+            
+            # Security detection results
+            'security_detection': {
+                'total_object_detections': pipeline_results['outputs'].get('total_object_detections', 0),
+                'total_object_events': pipeline_results['outputs'].get('total_object_events', 0),
+                'detectifai_events': pipeline_results['outputs'].get('detectifai_events', 0),
+                'fire_detections': 0,  # Will be populated from actual results
+                'weapon_detections': 0,
+                'security_alerts': []
+            },
+            
+            # Event analysis
+            'event_analysis': {
+                'canonical_events': pipeline_results['outputs'].get('canonical_events', 0),
+                'total_motion_events': pipeline_results['outputs'].get('total_motion_events', 0),
+                'high_priority_events': 0,
+                'critical_events': 0
+            },
+            
+            # Output files
+            'output_files': {
+                'keyframes_directory': os.path.join(pipeline_results['outputs'].get('output_directory', ''), 'frames'),
+                'reports': pipeline_results['outputs'].get('reports', {}),
+                'highlight_reels': pipeline_results['outputs'].get('highlight_reels', {}),
+                'compressed_video': pipeline_results['outputs'].get('compressed_video', '')
+            },
+            
+            # System performance
+            'performance': {
+                'frames_processed': pipeline_results['processing_stats'].get('frames_processed', 0),
+                'frames_enhanced': pipeline_results['processing_stats'].get('frames_enhanced', 0),
+                'gpu_acceleration': pipeline_results['processing_stats'].get('gpu_used', False)
+            }
+        }
+        
+        return detectifai_results
+        
+    except Exception as e:
+        logger.error(f"Error extracting DetectifAI results: {e}")
+        return {'error': 'Failed to extract results'}
+
+def process_video_async(video_id, video_path, config_type='detectifai'):
+    """Process video in background thread with DetectifAI focus"""
+    try:
+        processing_status[video_id]['status'] = 'processing'
+        processing_status[video_id]['progress'] = 0
+        processing_status[video_id]['message'] = 'Initializing DetectifAI processing...'
+        
+        # Select configuration with DetectifAI optimizations
+        if config_type == 'detectifai' or config_type == 'security':
+            config = get_security_focused_config()
+        # Removed robbery detection - using security focused config as default
+        elif config_type == 'high_recall':
+            try:
+                from config import get_high_recall_config
+                config = get_high_recall_config()
+            except ImportError:
+                config = get_security_focused_config()
+        elif config_type == 'balanced':
+            try:
+                from config import get_balanced_config
+                config = get_balanced_config()
+            except ImportError:
+                config = VideoProcessingConfig()
+        else:
+            config = VideoProcessingConfig()
+        
+        # DetectifAI-specific configuration enhancements
+        config.enable_object_detection = True
+        config.enable_facial_recognition = True
+        config.enable_video_captioning = True # Re-enabled with improved error handling and timeouts
+        config.keyframe_extraction_fps = 1.0  # Extract 1 frame per second for surveillance
+        config.enable_adaptive_processing = True
+        
+        # Set custom output directory for this video
+        config.output_base_dir = os.path.join(OUTPUT_FOLDER, video_id)
+        
+        # Initialize pipeline with database manager for MongoDB integration
+        db_manager = None
+        if DATABASE_ENABLED:
+            db_manager = db_video_service.db_manager
+        
+        pipeline = CompleteVideoProcessingPipeline(config, db_manager=db_manager)
+        
+        # Update progress
+        processing_status[video_id]['progress'] = 10
+        processing_status[video_id]['message'] = 'Extracting keyframes for security analysis...'
+        
+        # Process video with DetectifAI (with error tolerance)
+        output_name = os.path.splitext(os.path.basename(video_path))[0]
+        results = None
+        processing_errors = []
+        
+        try:
+            results = pipeline.process_video_complete(video_path, output_name)
+            logger.info(f"✅ Core pipeline processing completed for {video_id}")
+        except Exception as pipeline_error:
+            logger.error(f"⚠️ Pipeline error (but continuing): {str(pipeline_error)}")
+            processing_errors.append(f"Pipeline: {str(pipeline_error)}")
+            # Create minimal results structure
+            results = {
+                'outputs': {
+                    'total_keyframes': 0,
+                    'total_events': 0,
+                    'total_motion_events': 0,
+                    'total_object_events': 0,
+                    'total_object_detections': 0,
+                    'canonical_events': [],
+                    'total_segments': 1,
+                    'highlight_reels': {},
+                    'reports': {},
+                    'compressed_video': ''
+                },
+                'processing_stats': {'total_processing_time': 0}
+            }
+        
+        # Extract DetectifAI-specific results (with error tolerance)
+        detectifai_results = {}
+        try:
+            detectifai_results = extract_detectifai_results(results)
+        except Exception as extract_error:
+            logger.error(f"⚠️ Result extraction error (but continuing): {str(extract_error)}")
+            processing_errors.append(f"Extraction: {str(extract_error)}")
+            detectifai_results = {'security_detection': {}, 'event_analysis': {}, 'performance': {}}
+        
+        # Always mark as completed (even with errors)
+        processing_status[video_id]['status'] = 'completed'
+        processing_status[video_id]['progress'] = 100
+        completion_message = 'DetectifAI processing completed successfully!'
+        if processing_errors:
+            completion_message = f'DetectifAI processing completed with warnings: {len(processing_errors)} non-critical errors'
+        processing_status[video_id]['message'] = completion_message
+        processing_status[video_id]['results'] = {
+            # Original results for backward compatibility
+            'total_keyframes': results['outputs']['total_keyframes'],
+            'total_events': results['outputs']['total_events'],
+            'total_motion_events': results['outputs'].get('total_motion_events', 0),
+            'total_object_events': results['outputs'].get('total_object_events', 0),
+            'total_object_detections': results['outputs'].get('total_object_detections', 0),
+            'canonical_events': results['outputs']['canonical_events'],
+            'total_segments': results['outputs']['total_segments'],
+            'processing_time': results['processing_stats']['total_processing_time'],
+            'highlight_reels': results['outputs'].get('highlight_reels', {}),
+            'reports': results['outputs'].get('reports', {}),
+            'compressed_video': results['outputs'].get('compressed_video', ''),
+            'output_directory': config.output_base_dir,
+            'object_detection_enabled': config.enable_object_detection,
+            
+            # DetectifAI-specific results
+            'detectifai_results': detectifai_results,
+            'security_detection': detectifai_results.get('security_detection', {}),
+            'event_analysis': detectifai_results.get('event_analysis', {}),
+            'performance': detectifai_results.get('performance', {}),
+            
+            # Processing status
+            'processing_errors': processing_errors,
+            'has_warnings': len(processing_errors) > 0
+        }
+        
+        logger.info(f"Video {video_id} processed successfully")
+        
+    except Exception as e:
+        logger.error(f"Error processing video {video_id}: {str(e)}")
+        processing_status[video_id]['status'] = 'failed'
+        processing_status[video_id]['message'] = f'Error: {str(e)}'
+        processing_status[video_id]['error'] = str(e)
+
+@app.route('/api/health', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    return jsonify({
+        'status': 'healthy', 
+        'timestamp': datetime.now().isoformat(),
+        'database_enabled': DATABASE_ENABLED
+    })
+
+# ====== SUBSCRIPTION & FEATURE GATING ENDPOINTS ======
+
+@app.route('/api/feature/check', methods=['GET'])
+def check_feature_access():
+    """
+    Check if user has access to specific feature based on subscription plan.
+    Used by frontend to determine feature visibility.
+    """
+    try:
+        user_id = request.args.get('user_id')
+        feature = request.args.get('feature')
+        
+        if not user_id or not feature:
+            return jsonify({
+                'success': False,
+                'error': 'user_id and feature required'
+            }), 400
+        
+        if not SUBSCRIPTION_MIDDLEWARE_AVAILABLE or DEMO_MODE:
+            # If middleware not available or demo mode, allow all (dev mode)
+            return jsonify({
+                'success': True,
+                'feature': feature,
+                'has_access': True,
+                'current_plan': 'dev_mode',
+                'message': 'Subscription middleware not available - all features enabled'
+            }), 200
+        
+        db = app.config.get('DETECTIFAI_DB')
+        middleware = SubscriptionMiddleware(db)
+        
+        has_access = middleware.check_feature_access(user_id, feature)
+        plan_name = middleware.get_user_plan_name(user_id)
+        
+        return jsonify({
+            'success': True,
+            'feature': feature,
+            'has_access': has_access,
+            'current_plan': plan_name
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"Error checking feature access: {e}")
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
+
+
+@app.route('/api/usage/summary', methods=['GET'])
+def get_usage_summary():
+    """
+    Get user's current usage statistics and limits based on subscription.
+    Returns usage for video processing, searches, etc.
+    """
+    try:
+        user_id = request.args.get('user_id')
+        
+        if not user_id:
+            return jsonify({
+                'success': False,
+                'error': 'user_id required'
+            }), 400
+        
+        if not SUBSCRIPTION_MIDDLEWARE_AVAILABLE or DEMO_MODE:
+            # If middleware not available or demo mode, return unlimited (dev mode)
+            return jsonify({
+                'success': True,
+                'usage': {
+                    'has_subscription': True,
+                    'plan': 'dev_mode',
+                    'plan_name': 'Development Mode',
+                    'status': 'active',
+                    'message': 'Subscription middleware not available - unlimited usage'
+                }
+            }), 200
+        
+        db = app.config.get('DETECTIFAI_DB')
+        middleware = SubscriptionMiddleware(db)
+        
+        summary = middleware.get_usage_summary(user_id)
+        
+        return jsonify({
+            'success': True,
+            'usage': summary
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"Error getting usage summary: {e}")
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
+
+
+@app.route('/api/usage/increment', methods=['POST'])
+def increment_usage():
+    """
+    Manually increment usage counter for a user.
+    Called after successful operations that should count toward limits.
+    """
+    try:
+        data = request.get_json() or {}
+        user_id = data.get('user_id')
+        limit_type = data.get('limit_type')
+        amount = data.get('amount', 1)
+        
+        if not user_id or not limit_type:
+            return jsonify({
+                'success': False,
+                'error': 'user_id and limit_type required'
+            }), 400
+        
+        if not SUBSCRIPTION_MIDDLEWARE_AVAILABLE:
+            return jsonify({
+                'success': True,
+                'message': 'Usage tracking not available in dev mode'
+            }), 200
+        
+        db = app.config.get('DETECTIFAI_DB')
+        middleware = SubscriptionMiddleware(db)
+        
+        success = middleware.increment_usage(user_id, limit_type, amount)
+        
+        return jsonify({
+            'success': success,
+            'message': 'Usage incremented' if success else 'Failed to increment usage'
+        }), 200 if success else 500
+        
+    except Exception as e:
+        logger.error(f"Error incrementing usage: {e}")
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        }), 500
+
+
+
+# ====== REPORT GENERATION ENDPOINTS ======
+
+@app.route('/api/video/reports/generate', methods=['POST'])
+@require_subscription()
+@check_usage_limit('report_generation')
+def generate_report():
+    """Generate forensic report for a video and upload to MinIO"""
+    if not REPORT_GENERATION_AVAILABLE:
+        return jsonify({'error': 'Report generation service not available'}), 503
+        
+    try:
+        data = request.get_json()
+        video_id = data.get('video_id')
+        
+        if not video_id:
+             return jsonify({'error': 'video_id required'}), 400
+             
+        # Initialize generator
+        config = ReportConfig()
+        # Use existing model path or default
+        if os.path.exists(os.path.join(BASE_DIR, 'report_generation', 'models', 'qwen2.5-3b-instruct-q4_k_m.gguf')):
+             # Config should pick it up automatically if in expected path
+             pass
+        
+        generator = ReportGenerator(config)
+        
+        # Generate report
+        logger.info(f"Generating report for video: {video_id}")
+        report = generator.generate_report(video_id=video_id)
+        
+        # Define report output directory (local temporary storage)
+        report_dir = os.path.join(OUTPUT_FOLDER, video_id, 'reports')
+        os.makedirs(report_dir, exist_ok=True)
+        
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        pdf_filename = f"report_{timestamp}.pdf"
+        html_filename = f"report_{timestamp}.html"
+        
+        pdf_path = os.path.join(report_dir, pdf_filename)
+        html_path = os.path.join(report_dir, html_filename)
+        
+        # Export HTML (always available)
+        final_html_path = generator.export_html(report, output_path=html_path)
+        logger.info(f"✅ HTML report exported locally: {final_html_path}")
+        
+        # Try to export PDF (optional - may fail if WeasyPrint dependencies missing)
+        final_pdf_path = None
+        try:
+            final_pdf_path = generator.export_pdf(report, output_path=pdf_path)
+            logger.info(f"✅ PDF report exported locally: {final_pdf_path}")
+        except Exception as pdf_error:
+            logger.warning(f"⚠️ PDF export failed (HTML report still available): {pdf_error}")
+            # Try fallback SimplePDFExporter if available
+            try:
+                from report_generation.pdf_exporter import SimplePDFExporter
+                simple_exporter = SimplePDFExporter(config)
+                final_pdf_path = simple_exporter.export(report, output_path=pdf_path)
+                logger.info(f"✅ PDF exported using SimplePDFExporter: {final_pdf_path}")
+            except Exception as fallback_error:
+                logger.warning(f"⚠️ SimplePDFExporter also failed: {fallback_error}")
+                # Continue without PDF - HTML is still available
+                final_pdf_path = None
+        
+        # Upload reports to MinIO and get presigned URLs
+        html_url = None
+        pdf_url = None
+        
+        try:
+            # Initialize ReportRepository
+            from database.config import DatabaseManager
+            from database.repositories import ReportRepository
+            
+            db_manager = DatabaseManager()
+            report_repo = ReportRepository(db_manager)
+            
+            # Upload HTML to MinIO
+            logger.info(f"📤 Uploading HTML report to MinIO...")
+            html_minio_path = report_repo.upload_report_to_minio(final_html_path, video_id, html_filename)
+            html_url = report_repo.get_report_presigned_url(video_id, html_filename, expires=timedelta(hours=24))
+            logger.info(f"✅ HTML report uploaded to MinIO: {html_minio_path}")
+            
+            # Upload PDF to MinIO if available
+            if final_pdf_path and os.path.exists(final_pdf_path):
+                logger.info(f"📤 Uploading PDF report to MinIO...")
+                pdf_minio_path = report_repo.upload_report_to_minio(final_pdf_path, video_id, pdf_filename)
+                pdf_url = report_repo.get_report_presigned_url(video_id, pdf_filename, expires=timedelta(hours=24))
+                logger.info(f"✅ PDF report uploaded to MinIO: {pdf_minio_path}")
+            
+        except Exception as minio_error:
+            logger.error(f"❌ Failed to upload reports to MinIO: {minio_error}")
+            # Fall back to local file serving if MinIO upload fails
+            html_url = f"/api/video/reports/download/{video_id}/{html_filename}"
+            if final_pdf_path:
+                pdf_url = f"/api/video/reports/download/{video_id}/{pdf_filename}"
+        
+        response_data = {
+            'success': True,
+            'report_id': report.report_id,
+            'html_url': html_url,
+            'pdf_available': pdf_url is not None
+        }
+        
+        if pdf_url:
+            response_data['pdf_url'] = pdf_url
+        
+        logger.info(f"✅ Report generation complete for {video_id}")
+        return jsonify(response_data)
+
+    except Exception as e:
+        logger.error(f"Report generation error: {e}")
+        import traceback
+        logger.error(f"Traceback: {traceback.format_exc()}")
+        return jsonify({'error': str(e), 'success': False}), 500
+
+@app.route('/api/video/reports/download/<video_id>/<filename>', methods=['GET'])
+def download_report(video_id, filename):
+    """Download generated report file"""
+    try:
+        report_dir = os.path.join(OUTPUT_FOLDER, video_id, 'reports')
+        return send_from_directory(report_dir, filename, as_attachment=True)
+    except Exception as e:
+         return jsonify({'error': 'File not found'}), 404
+
+
+# ====== DATABASE-INTEGRATED ENDPOINTS ======
+
+@app.route('/api/v2/video/upload', methods=['POST'])
+@require_subscription()  # Requires any active subscription (Basic or Pro)
+@check_usage_limit('video_processing')  # Check and increment video processing limit
+def upload_video_db():
+    """Enhanced video upload with database storage. Requires: Active subscription"""
+    if not DATABASE_ENABLED:
+        return jsonify({'error': 'Database service not available'}), 503
+    
+    try:
+        # Check if file is present
+        if 'video' not in request.files:
+            return jsonify({'error': 'No video file provided'}), 400
+        
+        file = request.files['video']
+        if file.filename == '':
+            return jsonify({'error': 'No file selected'}), 400
+        
+        if not allowed_file(file.filename):
+            return jsonify({'error': 'Invalid file type. Allowed: mp4, avi, mov, mkv, wmv, flv'}), 400
+        
+        # Generate video ID with consistent format
+        video_id = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{os.urandom(4).hex()}"
+        
+        # Save temporary file with original extension
+        filename = secure_filename(file.filename)
+        base, ext = os.path.splitext(filename)
+        temp_path = os.path.join(app.config['UPLOAD_FOLDER'], f"{video_id}/video{ext}")
+        os.makedirs(os.path.dirname(temp_path), exist_ok=True)
+        file.save(temp_path)
+        
+        # Get user ID (if authenticated) - TODO: implement proper authentication
+        user_id = request.form.get('user_id', None)
+        
+        # STEP 1: Extract video metadata FIRST (before MongoDB record)
+        try:
+            import cv2
+            cap = cv2.VideoCapture(temp_path)
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            duration = frame_count / fps if fps > 0 else 0
+            cap.release()
+            
+            file_size = os.path.getsize(temp_path)
+            resolution = f"{width}x{height}"
+        except Exception as e:
+            logger.warning(f"Could not extract video metadata: {e}")
+            fps = 30.0
+            duration = 0
+            file_size = os.path.getsize(temp_path)
+            resolution = "unknown"
+        
+        # STEP 2: Create MongoDB record FIRST (before MinIO upload)
+        video_record = {
+            "video_id": video_id,
+            "user_id": user_id or "system",
+            "file_path": f"videos/{video_id}/video{ext}",
+            "minio_object_key": f"original/{video_id}/video{ext}",  # Will be confirmed after MinIO upload
+            "minio_bucket": db_video_service.video_repo.video_bucket,
+            "codec": "h264",  # Default, can be updated later
+            "fps": float(fps),
+            "upload_date": datetime.utcnow(),
+            "duration_secs": int(duration),
+            "file_size_bytes": int(file_size),
+            "meta_data": {
+                "filename": filename,
+                "original_name": file.filename,
+                "resolution": resolution,
+                "processing_status": "uploading",
+                "processing_progress": 0,
+                "processing_message": "Creating database record..."
+            }
+        }
+        
+        # Create MongoDB record immediately
+        try:
+            video_doc_id = db_video_service.video_repo.create_video_record(video_record)
+            logger.info(f"✅ Created MongoDB record for video: {video_id}")
+        except Exception as e:
+            logger.error(f"❌ Failed to create MongoDB record: {e}")
+            return jsonify({'error': f'Failed to create database record: {str(e)}'}), 500
+        
+        # STEP 3: Upload video to MinIO immediately (after MongoDB record exists)
+        try:
+            db_video_service.video_repo.update_metadata(video_id, {
+                "processing_progress": 5,
+                "processing_message": "Uploading video to MinIO..."
+            })
+            
+            minio_path = db_video_service.video_repo.upload_video_to_minio(temp_path, video_id)
+            
+            # STEP 4: Update MongoDB with MinIO path (link metadata)
+            db_video_service.video_repo.collection.update_one(
+                {"video_id": video_id},
+                {"$set": {
+                    "minio_object_key": minio_path,
+                    "meta_data.minio_original_path": minio_path
+                }}
+            )
+            logger.info(f"✅ Uploaded video to MinIO and linked in MongoDB: {minio_path}")
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to upload to MinIO: {e}")
+            db_video_service.video_repo.update_metadata(video_id, {
+                "processing_status": "failed",
+                "error_message": f"MinIO upload failed: {str(e)}"
+            })
+            return jsonify({'error': f'Failed to upload to MinIO: {str(e)}'}), 500
+        
+        # STEP 5: Start background processing (frames, detection, etc.)
+        try:
+            thread = threading.Thread(
+                target=db_video_service.process_video_with_database_storage,
+                args=(temp_path, video_id, user_id),
+                daemon=True
+            )
+            thread.start()
+            
+            return jsonify({
+                'success': True,
+                'video_id': video_id,
+                'message': 'Video uploaded successfully. Processing started with database storage.',
+                'status_url': f'/api/v2/video/status/{video_id}'
+            }), 201
+            
+        except Exception as process_error:
+            logger.error(f"Failed to start video processing: {process_error}")
+            # Update status in database
+            db_video_service.video_repo.update_metadata(video_id, {
+                "processing_status": "failed",
+                "error_message": str(process_error)
+            })
+            raise
+            
+    except Exception as e:
+        logger.error(f"Database upload error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+        
+    except Exception as e:
+        logger.error(f"Database upload error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/v2/video/status/<video_id>', methods=['GET'])
+def get_video_status_db(video_id):
+    """Get processing status from database with fallback to in-memory status"""
+    if not DATABASE_ENABLED:
+        # Fallback to in-memory status if database not available
+        if video_id in processing_status:
+            return jsonify(processing_status[video_id]), 200
+        return jsonify({'error': 'Database service not available and video not found in memory'}), 503
+
+    try:
+        status_data = db_video_service.get_video_status(video_id)
+
+        if 'error' in status_data:
+            # Fallback to in-memory status if database lookup fails
+            if video_id in processing_status:
+                logger.info(f"Database lookup failed for {video_id}, falling back to in-memory status")
+                return jsonify(processing_status[video_id]), 200
+            return jsonify(status_data), 404
+
+        return jsonify(status_data), 200
+
+    except Exception as e:
+        logger.error(f"Database status check error: {str(e)}")
+        # Fallback to in-memory status on exception
+        if video_id in processing_status:
+            logger.info(f"Database error for {video_id}, falling back to in-memory status")
+            return jsonify(processing_status[video_id]), 200
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/v2/video/keyframes/<video_id>', methods=['GET'])
+def get_video_keyframes_db(video_id):
+    """Get keyframes from database with MinIO URLs"""
+    if not DATABASE_ENABLED:
+        return jsonify({'error': 'Database service not available'}), 503
+    
+    try:
+        # Get query parameters
+        filter_detections = request.args.get('filter_detections', 'false').lower() == 'true'
+        limit = request.args.get('limit', type=int)
+        
+        keyframes_data = db_video_service.get_video_keyframes(
+            video_id, filter_detections=filter_detections, limit=limit
+        )
+        
+        return jsonify(keyframes_data), 200
+        
+    except Exception as e:
+        logger.error(f"Database keyframes retrieval error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/v2/video/events/<video_id>', methods=['GET'])
+def get_video_events_db(video_id):
+    """Get events from database"""
+    if not DATABASE_ENABLED:
+        return jsonify({'error': 'Database service not available'}), 503
+    
+    try:
+        event_type = request.args.get('type')  # motion, object_detection, face_recognition
+        
+        events_data = db_video_service.get_video_events(video_id, event_type)
+        
+        return jsonify(events_data), 200
+        
+    except Exception as e:
+        logger.error(f"Database events retrieval error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/v2/video/detections/<video_id>', methods=['GET'])
+def get_video_detections_db(video_id):
+    """Get object detections from database"""
+    if not DATABASE_ENABLED:
+        return jsonify({'error': 'Database service not available'}), 503
+    
+    try:
+        class_filter = request.args.get('class')  # fire, knife, gun, smoke
+        
+        detections_data = db_video_service.get_video_detections(video_id, class_filter)
+        
+        return jsonify(detections_data), 200
+        
+    except Exception as e:
+        logger.error(f"Database detections retrieval error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/v2/video/faces/<video_id>', methods=['GET'])
+def get_video_faces_db(video_id):
+    """Get detected faces from database for a video"""
+    if not DATABASE_ENABLED:
+        return jsonify({'error': 'Database service not available'}), 503
+    
+    try:
+        faces_data = db_video_service.get_video_faces(video_id)
+        
+        return jsonify(faces_data), 200
+        
+    except Exception as e:
+        logger.error(f"Database faces retrieval error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/v2/video/results/<video_id>', methods=['GET'])
+def get_video_results_db(video_id):
+    """Get comprehensive video results from database"""
+    if not DATABASE_ENABLED:
+        return jsonify({'error': 'Database service not available'}), 503
+    
+    try:
+        # Get video status and basic info
+        status_data = db_video_service.get_video_status(video_id)
+        
+        if 'error' in status_data:
+            logger.warning(f"Video not found in database: {video_id}")
+            return jsonify(status_data), 404
+        
+        # Check if processing is completed (check multiple possible status fields)
+        processing_status = status_data.get('status') or status_data.get('meta_data', {}).get('processing_status', 'unknown')
+        
+        # Log status for debugging
+        logger.info(f"Video {video_id} status: {processing_status}, progress: {status_data.get('processing_progress')}")
+        
+        # Allow results even if status is not exactly 'completed' - check if we have detections/events
+        meta_data = status_data.get('meta_data', {})
+        has_detections = meta_data.get('detection_count', 0) > 0 or status_data.get('detection_count', 0) > 0
+        has_events = meta_data.get('event_count', 0) > 0 or status_data.get('event_count', 0) > 0
+        
+        if processing_status not in ['completed', 'done'] and not (has_detections or has_events):
+            return jsonify({
+                'error': 'Processing not completed',
+                'current_status': processing_status,
+                'progress': status_data.get('processing_progress') or meta_data.get('processing_progress', 0),
+                'message': status_data.get('processing_message') or meta_data.get('processing_message', '')
+            }), 400
+        
+        # Get keyframes, events, and detections
+        keyframes_data = db_video_service.get_video_keyframes(video_id, limit=50)
+        events_data = db_video_service.get_video_events(video_id)
+        detections_data = db_video_service.get_video_detections(video_id)
+        
+        # Extract behavior analysis events
+        all_events = events_data.get('events', [])
+        behavior_events = [e for e in all_events if e.get('event_type', '').startswith('behavior_')]
+        
+        # Summarize behavior detections
+        behavior_summary = _summarize_behaviors(behavior_events)
+        
+        # Get compressed video URL from status
+        compressed_video_url = status_data.get('compressed_video_url') or f'/api/video/compressed/{video_id}'
+        compressed_video_available = bool(status_data.get('compressed_video_url') or status_data.get('meta_data', {}).get('minio_compressed_path'))
+        
+        # Compile comprehensive results
+        results = {
+            'video_info': status_data,
+            'compressed_video_available': compressed_video_available,
+            'compressed_video_url': compressed_video_url,
+            'keyframes_available': len(keyframes_data.get('keyframes', [])) > 0,
+            'keyframes_count': keyframes_data.get('total_keyframes', 0),
+            'keyframes_sample': keyframes_data.get('keyframes', [])[:10],  # First 10 keyframes
+            'events_available': len(events_data.get('events', [])) > 0,
+            'events_count': events_data.get('total_events', 0),
+            'events_summary': _summarize_events(events_data.get('events', [])),
+            'detections_available': len(detections_data.get('detections', [])) > 0,
+            'detections_count': detections_data.get('total_detections', 0),
+            'detections_summary': _summarize_detections(detections_data.get('detections', [])),
+            'behaviors_available': len(behavior_events) > 0,
+            'behaviors_count': len(behavior_events),
+            'behaviors_summary': behavior_summary,
+            'behavior_events': behavior_events[:10],  # First 10 behavior events
+            'threat_assessment': _assess_threat_level(events_data.get('events', []), detections_data.get('detections', []))
+        }
+        
+        return jsonify(results), 200
+        
+    except Exception as e:
+        logger.error(f"Database results retrieval error: {str(e)}")
+        import traceback
+        logger.error(f"Traceback: {traceback.format_exc()}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/video/upload', methods=['POST'])
+@app.route('/api/upload', methods=['POST'])
+@require_subscription()  # Requires any active subscription (Basic or Pro)
+@check_usage_limit('video_processing')  # Check and increment video processing limit
+def upload_video():
+    """Upload video endpoint. Requires: Active subscription"""
+    try:
+        # Check if file is present
+        if 'video' not in request.files:
+            return jsonify({'error': 'No video file provided'}), 400
+        
+        file = request.files['video']
+        
+        if file.filename == '':
+            return jsonify({'error': 'No file selected'}), 400
+        
+        if not allowed_file(file.filename):
+            return jsonify({'error': 'Invalid file type. Allowed: mp4, avi, mov, mkv, wmv, flv'}), 400
+        
+        # Get processing configuration (default to DetectifAI optimized)
+        config_type = request.form.get('config_type', 'detectifai')
+        
+        # Generate unique video ID
+        video_id = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{os.urandom(4).hex()}"
+        
+        # Save uploaded file
+        filename = secure_filename(file.filename)
+        video_path = os.path.join(app.config['UPLOAD_FOLDER'], f"{video_id}_{filename}")
+        file.save(video_path)
+        
+        # Initialize processing status
+        processing_status[video_id] = {
+            'video_id': video_id,
+            'filename': filename,
+            'status': 'queued',
+            'progress': 0,
+            'message': 'Video uploaded successfully. Processing queued.',
+            'uploaded_at': datetime.now().isoformat(),
+            'config_type': config_type
+        }
+        
+        # Start background processing
+        thread = threading.Thread(
+            target=process_video_async,
+            args=(video_id, video_path, config_type)
+        )
+        thread.daemon = True
+        thread.start()
+        
+        return jsonify({
+            'success': True,
+            'video_id': video_id,
+            'message': 'Video uploaded successfully. Processing started.',
+            'status_url': f'/api/status/{video_id}'
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"Upload error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/video/status/<video_id>', methods=['GET'])
+@app.route('/api/status/<video_id>', methods=['GET'])
+def get_status(video_id):
+    """Get processing status for a video"""
+    # Check memory first
+    if video_id in processing_status:
+        return jsonify(processing_status[video_id]), 200
+    
+    # Check if video files exist on disk (recovered processing)
+    output_dir = os.path.join(OUTPUT_FOLDER, video_id)
+    if os.path.exists(output_dir):
+        # Recover status from disk
+        recovered_status = {
+            'video_id': video_id,
+            'status': 'completed',
+            'progress': 100,
+            'message': 'Processing completed (recovered from disk)',
+            'uploaded_at': '',
+            'filename': f"{video_id}.avi"
+        }
+        
+        # Add back to memory for future requests
+        processing_status[video_id] = recovered_status
+        
+        logger.info(f"🔄 Recovered status for {video_id} from disk")
+        return jsonify(recovered_status), 200
+    
+    return jsonify({'error': 'Video not found'}), 404
+
+@app.route('/api/results/<video_id>', methods=['GET'])
+def get_results(video_id):
+    """Get processing results for a video"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status['status'] != 'completed':
+        return jsonify({
+            'error': 'Processing not completed',
+            'current_status': status['status']
+        }), 400
+    
+    return jsonify(status.get('results', {})), 200
+
+@app.route('/api/video/results/<video_id>', methods=['GET'])
+def get_video_results(video_id):
+    """Get video processing results with availability flags"""
+    # First check if video is in memory status
+    if video_id in processing_status:
+        status = processing_status[video_id]
+
+        if status['status'] == 'processing':
+            # Return partial results while processing
+            return jsonify({
+                'video_id': video_id,
+                'status': 'processing',
+                'progress': status.get('progress', 0),
+                'message': status.get('message', 'Processing...'),
+                'compressed_video_available': False,
+                'keyframes_available': False,
+                'reports_available': False
+            }), 200
+
+        if status['status'] == 'failed':
+            return jsonify({
+                'error': 'Processing failed',
+                'message': status.get('message', 'Unknown error'),
+                'current_status': status['status']
+            }), 400
+
+        # Check if status has results structure (normal processing)
+        if 'results' in status and 'output_directory' in status['results']:
+            output_dir = status['results']['output_directory']
+        else:
+            # Fallback to standard directory structure
+            output_dir = os.path.join(OUTPUT_FOLDER, video_id)
+    else:
+        # Check database for video status (for database-integrated processing)
+        if DATABASE_ENABLED:
+            try:
+                db_status = db_video_service.get_video_status(video_id)
+                if 'error' not in db_status:
+                    # Video found in database, construct results from database metadata
+                    meta_data = db_status.get('meta_data', {})
+
+                    # Check for compressed video in MinIO
+                    compressed_video_available = bool(meta_data.get('minio_compressed_path'))
+                    compressed_video_url = f'/api/video/compressed/{video_id}' if compressed_video_available else None
+
+                    # Check for keyframes
+                    keyframes_available = meta_data.get('keyframe_count', 0) > 0
+                    keyframes_count = meta_data.get('keyframe_count', 0)
+
+                    # Check for reports (assume available if processing completed)
+                    reports_available = db_status.get('status') == 'completed'
+
+                    return jsonify({
+                        'video_id': video_id,
+                        'status': db_status.get('status', 'unknown'),
+                        'compressed_video_available': compressed_video_available,
+                        'compressed_video_url': compressed_video_url,
+                        'keyframes_available': keyframes_available,
+                        'keyframes_count': keyframes_count,
+                        'keyframes_url': f'/api/v2/video/keyframes/{video_id}',  # Use v2 endpoint for database
+                        'reports_available': reports_available,
+                        'reports': []  # Database doesn't store report files locally
+                    }), 200
+            except Exception as e:
+                logger.warning(f"Database lookup failed for results: {e}")
+
+        # Check if video files exist on disk (for recovered/restarted servers)
+        output_dir = os.path.join(OUTPUT_FOLDER, video_id)
+        if not os.path.exists(output_dir):
+            return jsonify({'error': 'Video not found'}), 404
+
+        logger.info(f"📁 Found video files on disk for {video_id}, recovering results")
+
+    # Check for compressed video
+    compressed_dir = os.path.join(output_dir, 'compressed')
+    compressed_video_available = False
+    compressed_video_url = None
+
+    if os.path.exists(compressed_dir):
+        video_files = [f for f in os.listdir(compressed_dir) if f.endswith('.mp4')]
+        if video_files:
+            compressed_video_available = True
+            compressed_video_url = f'/api/video/compressed/{video_id}'
+
+    # Check for keyframes
+    frames_dir = os.path.join(output_dir, 'frames')
+    keyframes_available = os.path.exists(frames_dir) and len([f for f in os.listdir(frames_dir) if f.endswith('.jpg')]) > 0
+    keyframes_count = len([f for f in os.listdir(frames_dir) if f.endswith('.jpg')]) if keyframes_available else 0
+
+    # Check for reports
+    reports_dir = os.path.join(output_dir, 'reports')
+    reports_available = os.path.exists(reports_dir)
+    report_files = []
+    if reports_available:
+        report_files = [f for f in os.listdir(reports_dir) if f.endswith('.json')]
+
+    return jsonify({
+        'video_id': video_id,
+        'compressed_video_available': compressed_video_available,
+        'compressed_video_url': compressed_video_url,
+        'keyframes_available': keyframes_available,
+        'keyframes_count': keyframes_count,
+        'keyframes_url': f'/api/video/keyframes/{video_id}',
+        'reports_available': reports_available,
+        'reports': report_files
+    }), 200
+
+@app.route('/api/download/<video_id>/<file_type>', methods=['GET'])
+def download_file(video_id, file_type):
+    """Download processed files"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status['status'] != 'completed':
+        return jsonify({'error': 'Processing not completed'}), 400
+    
+    output_dir = status['results']['output_directory']
+    
+    try:
+        if file_type == 'highlight_event':
+            file_path = status['results']['highlight_reels'].get('event_aware', '')
+        elif file_type == 'highlight_comprehensive':
+            file_path = status['results']['highlight_reels'].get('ultra_comprehensive', '')
+        elif file_type == 'highlight_quality':
+            file_path = status['results']['highlight_reels'].get('quality_focused', '')
+        elif file_type == 'compressed_video':
+            file_path = status['results']['compressed_video']
+        elif file_type == 'report_processing':
+            file_path = status['results']['reports'].get('processing_results', '')
+        elif file_type == 'report_events':
+            file_path = status['results']['reports'].get('canonical_events', '')
+        elif file_type == 'html_gallery':
+            file_path = status['results']['reports'].get('html_gallery', '')
+        else:
+            return jsonify({'error': 'Invalid file type'}), 400
+        
+        if not file_path or not os.path.exists(file_path):
+            return jsonify({'error': 'File not found'}), 404
+        
+        return send_file(file_path, as_attachment=True)
+        
+    except Exception as e:
+        logger.error(f"Download error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/video/keyframes/<video_id>', methods=['GET'])
+@app.route('/api/keyframes/<video_id>', methods=['GET'])
+def get_keyframes(video_id):
+    """Get list of extracted keyframes with DetectifAI annotations"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status['status'] != 'completed':
+        return jsonify({'error': 'Processing not completed'}), 400
+    
+    output_dir = status['results']['output_directory']
+    frames_dir = os.path.join(output_dir, 'frames')
+    
+    if not os.path.exists(frames_dir):
+        return jsonify({'error': 'Frames directory not found'}), 404
+    
+    # Load detection metadata if available
+    detection_metadata = {}
+    detection_metadata_path = os.path.join(output_dir, 'detection_metadata.json')
+    if os.path.exists(detection_metadata_path):
+        try:
+            with open(detection_metadata_path, 'r') as f:
+                detection_metadata = json.load(f)
+        except Exception as e:
+            logger.warning(f"Could not load detection metadata: {e}")
+    
+    # Get filter parameter
+    filter_detections = request.args.get('filter_detections', 'false').lower() == 'true'
+    
+    keyframes = []
+    frames_with_detections = {item['original_path']: item for item in detection_metadata.get('detection_summary', [])}
+    
+    for filename in sorted(os.listdir(frames_dir)):
+        if filename.endswith('.jpg') and not filename.endswith('_annotated.jpg'):
+            # Extract timestamp from filename
+            timestamp = 0.0
+            try:
+                if '_' in filename:
+                    timestamp_part = filename.split('_')[1].replace('s', '').replace('.jpg', '')
+                    timestamp = float(timestamp_part)
+            except:
+                pass
+            
+            frame_path = os.path.join(frames_dir, filename)
+            has_detections = frame_path in frames_with_detections
+            
+            # Skip frames without detections if filtering is enabled
+            if filter_detections and not has_detections:
+                continue
+            
+            keyframe_data = {
+                'filename': filename,
+                'timestamp': timestamp,
+                'url': f'/api/video/{video_id}/keyframe/{filename}',
+                'minio_url': f'/api/minio/image/detectifai-keyframes/{video_id}/keyframes/{filename}',
+                'has_detections': has_detections
+            }
+            
+            # Add detection details if available
+            if has_detections:
+                detection_info = frames_with_detections[frame_path]
+                keyframe_data.update({
+                    'detection_count': detection_info.get('detection_count', 0),
+                    'objects': detection_info.get('objects', []),
+                    'confidence_avg': detection_info.get('confidence_avg', 0.0)
+                })
+            
+            keyframes.append(keyframe_data)
+    
+    return jsonify({
+        'video_id': video_id,
+        'total_keyframes': detection_metadata.get('total_keyframes', len(keyframes)),
+        'keyframes_with_detections': detection_metadata.get('frames_with_detections', 0),
+        'keyframes': keyframes,
+        'objects_detected': detection_metadata.get('objects_detected', {}),
+        'filter_applied': filter_detections
+    }), 200
+
+@app.route('/api/keyframe/<video_id>/<filename>', methods=['GET'])
+def get_keyframe_image(video_id, filename):
+    """Serve keyframe image"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    output_dir = status['results']['output_directory']
+    frames_dir = os.path.join(output_dir, 'frames')
+    
+    return send_from_directory(frames_dir, filename)
+
+@app.route('/api/video/compressed/<video_id>', methods=['GET'])
+def get_compressed_video(video_id):
+    """Serve compressed video"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status['status'] != 'completed':
+        return jsonify({'error': 'Processing not completed'}), 400
+    
+    output_dir = status['results']['output_directory']
+    compressed_dir = os.path.join(output_dir, 'compressed')
+    
+    if not os.path.exists(compressed_dir):
+        return jsonify({'error': 'Compressed video directory not found'}), 404
+    
+    # Find the compressed video file
+    video_files = [f for f in os.listdir(compressed_dir) if f.endswith('.mp4')]
+    
+    if not video_files:
+        return jsonify({'error': 'Compressed video file not found'}), 404
+    
+    # Use the first video file found (should only be one)
+    video_filename = video_files[0]
+    
+    return send_from_directory(compressed_dir, video_filename)
+
+@app.route('/api/videos', methods=['GET'])
+def list_videos():
+    """List all processed videos"""
+    videos = []
+    for video_id, status in processing_status.items():
+        videos.append({
+            'video_id': video_id,
+            'filename': status.get('filename', ''),
+            'status': status.get('status', ''),
+            'uploaded_at': status.get('uploaded_at', ''),
+            'progress': status.get('progress', 0)
+        })
+    
+    return jsonify({'videos': videos}), 200
+
+@app.route('/api/video/processing-summary/<video_id>', methods=['GET'])
+@app.route('/api/processing-summary/<video_id>', methods=['GET'])
+def get_processing_summary(video_id):
+    """Get detailed processing summary for a video"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status['status'] != 'completed':
+        return jsonify({'error': 'Processing not completed'}), 400
+    
+    output_dir = status['results']['output_directory']
+    
+    # Load detection metadata
+    detection_metadata = {}
+    detection_metadata_path = os.path.join(output_dir, 'detection_metadata.json')
+    if os.path.exists(detection_metadata_path):
+        try:
+            with open(detection_metadata_path, 'r') as f:
+                detection_metadata = json.load(f)
+        except Exception as e:
+            logger.warning(f"Could not load detection metadata: {e}")
+    
+    # Get processing stats from status
+    processing_stats = status['results'].get('processing_stats', {})
+    
+    summary = {
+        'video_id': video_id,
+        'filename': status.get('filename', ''),
+        'processing_time': processing_stats.get('total_processing_time', 0),
+        'keyframes_extracted': detection_metadata.get('total_keyframes', 0),
+        'keyframes_with_detections': detection_metadata.get('frames_with_detections', 0),
+        'objects_detected': detection_metadata.get('objects_detected', {}),
+        'total_objects': sum(detection_metadata.get('objects_detected', {}).values()),
+        'component_times': processing_stats.get('component_times', {}),
+        'output_files': {
+            'compressed_video': status['results'].get('compressed_video_path', ''),
+            'frames_directory': os.path.join(output_dir, 'frames'),
+            'reports_directory': os.path.join(output_dir, 'reports')
+        }
+    }
+    
+    return jsonify(summary), 200
+
+@app.route('/api/delete/<video_id>', methods=['DELETE'])
+def delete_video(video_id):
+    """Delete video and its processing results"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    try:
+        # Remove from status
+        status = processing_status.pop(video_id)
+        
+        # Delete output directory
+        if 'results' in status and 'output_directory' in status['results']:
+            import shutil
+            output_dir = status['results']['output_directory']
+            if os.path.exists(output_dir):
+                shutil.rmtree(output_dir)
+        
+        # Delete uploaded video
+        for file in os.listdir(app.config['UPLOAD_FOLDER']):
+            if file.startswith(video_id):
+                os.remove(os.path.join(app.config['UPLOAD_FOLDER'], file))
+        
+        return jsonify({'success': True, 'message': 'Video deleted successfully'}), 200
+        
+    except Exception as e:
+        logger.error(f"Delete error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+# DetectifAI-specific endpoints
+
+@app.route('/api/detectifai/events/<video_id>', methods=['GET'])
+def get_detectifai_events(video_id):
+    """Get DetectifAI security events for a video"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status['status'] != 'completed':
+        return jsonify({'error': 'Processing not completed'}), 400
+    
+    results = status.get('results', {})
+    security_events = results.get('security_detection', {})
+    
+    return jsonify({
+        'video_id': video_id,
+        'security_events': security_events,
+        'total_detections': security_events.get('total_object_detections', 0),
+        'fire_detections': security_events.get('fire_detections', 0),
+        'weapon_detections': security_events.get('weapon_detections', 0),
+        'security_alerts': security_events.get('security_alerts', [])
+    }), 200
+
+@app.route('/api/detectifai/demo', methods=['GET'])
+def demo_detectifai():
+    """Demo endpoint to process test videos (rob.mp4, fire.avi)"""
+    try:
+        demo_videos = []
+        
+        # Check for test videos
+        test_files = ['rob.mp4', 'fire.avi']
+        for test_file in test_files:
+            if os.path.exists(test_file):
+                # Create demo processing entry
+                video_id = f"demo_{test_file.replace('.', '_')}_{int(datetime.now().timestamp())}"
+                
+                processing_status[video_id] = {
+                    'video_id': video_id,
+                    'filename': test_file,
+                    'status': 'ready',
+                    'progress': 0,
+                    'message': f'Demo video {test_file} ready for DetectifAI processing',
+                    'uploaded_at': datetime.now().isoformat(),
+                    'video_path': test_file,
+                    'is_demo': True,
+                    'config_type': 'detectifai'
+                }
+                
+                demo_videos.append({
+                    'video_id': video_id,
+                    'filename': test_file,
+                    'process_url': f'/api/process/{video_id}'
+                })
+        
+        return jsonify({
+            'demo_videos': demo_videos,
+            'message': f'Found {len(demo_videos)} demo videos ready for DetectifAI processing'
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"Demo endpoint error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/process/<video_id>', methods=['POST'])
+def process_existing_video(video_id):
+    """Process an existing video (useful for demo videos)"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    
+    status = processing_status[video_id]
+    
+    if status.get('status') not in ['ready', 'failed']:
+        return jsonify({'error': 'Video is already being processed or completed'}), 400
+    
+    video_path = status.get('video_path', '')
+    if not os.path.exists(video_path):
+        return jsonify({'error': 'Video file not found'}), 404
+    
+    config_type = status.get('config_type', 'detectifai')
+    
+    # Start background processing
+    thread = threading.Thread(
+        target=process_video_async,
+        args=(video_id, video_path, config_type)
+    )
+    thread.daemon = True
+    thread.start()
+    
+    return jsonify({
+        'success': True,
+        'video_id': video_id,
+        'message': 'DetectifAI processing started',
+        'status_url': f'/api/status/{video_id}'
+    }), 200
+
+@app.route('/api/debug/compressed/<video_id>', methods=['GET'])
+def debug_compressed_video(video_id):
+    """Debug endpoint to check compressed video storage and optionally serve it"""
+    if not DATABASE_ENABLED:
+        return jsonify({'error': 'Database not enabled'}), 503
+    
+    # Check if user wants to download the video
+    serve_video = request.args.get('serve', 'false').lower() == 'true'
+    
+    try:
+        video_record = db_video_service.video_repo.get_video_by_id(video_id)
+        if not video_record:
+            return jsonify({'error': 'Video not found'}), 404
+        
+        meta_data = video_record.get('meta_data', {})
+        bucket = video_record.get('minio_bucket', db_video_service.video_repo.video_bucket)
+        
+        # Check MinIO
+        minio_info = {}
+        objects = []
+        try:
+            objects = list(db_video_service.video_repo.minio.list_objects(bucket, prefix=f"compressed/{video_id}/", recursive=True))
+            minio_info['objects_found'] = len(objects)
+            minio_info['objects'] = [{'name': obj.object_name, 'size': obj.size} for obj in objects]
+        except Exception as e:
+            minio_info['error'] = str(e)
+        
+        # If user wants to serve the video, try to serve it
+        if serve_video and objects:
+            logger.info(f"🐛 DEBUG: Attempting to serve compressed video for: {video_id}")
+            try:
+                # Find video.mp4 in the objects
+                video_object = None
+                for obj in objects:
+                    if obj.object_name.endswith('video.mp4'):
+                        video_object = obj
+                        break
+                
+                if video_object:
+                    logger.info(f"🐛 DEBUG: Found video object: {video_object.object_name}")
+                    
+                    # Get the video data
+                    minio_client = db_video_service.video_repo.minio
+                    video_data = minio_client.get_object(bucket, video_object.object_name)
+                    
+                    # Create response
+                    def generate():
+                        try:
+                            for chunk in video_data.stream(8192):
+                                yield chunk
+                        finally:
+                            video_data.close()
+                    
+                    response = Response(
+                        generate(),
+                        mimetype='video/mp4',
+                        headers={
+                            'Content-Disposition': f'inline; filename="compressed_{video_id}.mp4"',
+                            'Accept-Ranges': 'bytes'
+                        }
+                    )
+                    
+                    logger.info(f"🐛 DEBUG: Successfully serving compressed video")
+                    return response
+                else:
+                    logger.warning(f"🐛 DEBUG: No video.mp4 found in objects")
+                    
+            except Exception as serve_e:
+                logger.error(f"🐛 DEBUG: Failed to serve video: {serve_e}")
+                return jsonify({
+                    'error': f'Failed to serve video: {str(serve_e)}',
+                    'video_id': video_id,
+                    'bucket': bucket,
+                    'minio_info': minio_info
+                }), 500
+        
+        return jsonify({
+            'video_id': video_id,
+            'bucket': bucket,
+            'minio_compressed_path': meta_data.get('minio_compressed_path'),
+            'compression_info': meta_data.get('compression_info', {}),
+            'minio_info': minio_info,
+            'help': 'Add ?serve=true to download the video'
+        }), 200
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/video/<video_id>/compressed', methods=['GET'])
+@app.route('/api/video/annotated/<video_id>', methods=['GET'])
+@app.route('/api/v2/video/annotated/<video_id>', methods=['GET'])
+def serve_annotated_video(video_id):
+    """Serve annotated video with bounding boxes from MinIO or local storage"""
+    logger.info(f"🎨 Request to serve annotated video: {video_id}")
+    try:
+        # First try to get from database/MinIO
+        video_record = None
+        video_exists_in_db = False
+        status_data = None
+        meta_data = {}
+        
+        if DATABASE_ENABLED:
+            try:
+                status_data = db_video_service.get_video_status(video_id)
+                if 'error' not in status_data:
+                    video_exists_in_db = True
+                    logger.info(f"✅ Found video in database: {video_id}")
+                    
+                    # Get video record directly
+                    try:
+                        video_record = db_video_service.video_repo.get_video_by_id(video_id)
+                    except Exception as e:
+                        logger.warning(f"Could not get video record: {e}")
+                    
+                    # Get metadata
+                    if status_data:
+                        meta_data = status_data.get('meta_data', {})
+                    if not meta_data and video_record:
+                        meta_data = video_record.get('meta_data', {})
+                    
+                    # Use detectifai-videos bucket
+                    video_bucket = "detectifai-videos"
+                    if video_record:
+                        record_bucket = video_record.get('minio_bucket')
+                        if record_bucket == "detectifai-videos":
+                            video_bucket = record_bucket
+                    
+                    # Get annotated video path from metadata
+                    minio_annotated_path = meta_data.get('minio_annotated_path')
+                    annotated_video_available = meta_data.get('annotated_video_available', False)
+                    
+                    logger.info(f"📁 MinIO annotated path: {minio_annotated_path}")
+                    logger.info(f"📁 Annotated video available: {annotated_video_available}")
+                    
+                    # Try to serve from MinIO
+                    if minio_annotated_path and annotated_video_available:
+                        try:
+                            from minio.error import S3Error
+                            minio_client = db_video_service.video_repo.minio
+                            
+                            # Check if object exists
+                            try:
+                                minio_client.stat_object(video_bucket, minio_annotated_path)
+                                
+                                # Generate presigned URL
+                                from datetime import timedelta
+                                presigned_url = minio_client.presigned_get_object(
+                                    video_bucket,
+                                    minio_annotated_path,
+                                    expires=timedelta(hours=1)
+                                )
+                                logger.info(f"✅ Generated presigned URL for annotated video: {minio_annotated_path}")
+                                return redirect(presigned_url)
+                            except S3Error as e:
+                                if e.code == 'NoSuchKey':
+                                    logger.warning(f"⚠️ Annotated video not found in MinIO: {minio_annotated_path}")
+                                else:
+                                    logger.error(f"❌ MinIO error: {e}")
+                        except Exception as e:
+                            logger.warning(f"⚠️ Failed to get annotated video from MinIO: {e}")
+                    
+                    # Try local file
+                    annotated_video_path = meta_data.get('annotated_video_path')
+                    if annotated_video_path and os.path.exists(annotated_video_path):
+                        logger.info(f"✅ Serving annotated video from local path: {annotated_video_path}")
+                        return send_file(annotated_video_path, mimetype='video/mp4')
+                    
+            except Exception as e:
+                logger.error(f"❌ Error getting video status: {e}")
+        
+        # Fallback: check local storage
+        output_dir = os.path.join(OUTPUT_FOLDER, video_id)
+        annotated_dir = os.path.join(output_dir, 'annotated')
+        
+        if os.path.exists(annotated_dir):
+            video_files = [f for f in os.listdir(annotated_dir) if f.endswith('.mp4')]
+            if video_files:
+                video_filename = video_files[0]
+                logger.info(f"✅ Serving annotated video from local directory: {annotated_dir}/{video_filename}")
+                return send_from_directory(annotated_dir, video_filename)
+        
+        # If no annotated video, fallback to compressed or original
+        logger.warning(f"⚠️ Annotated video not found for {video_id}, falling back to compressed")
+        return serve_compressed_video(video_id)
+        
+    except Exception as e:
+        logger.error(f"❌ Error serving annotated video: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        return jsonify({'error': f'Failed to serve annotated video: {str(e)}'}), 500
+
+@app.route('/api/video/compressed/<video_id>', methods=['GET'])
+@app.route('/api/v2/video/compressed/<video_id>', methods=['GET'])
+def serve_compressed_video(video_id):
+    """Serve compressed processed video from MinIO or local storage"""
+    logger.info(f"🎬 Request to serve compressed video: {video_id}")
+    
+    # QUICK FIX: Redirect to working V3 endpoint
+    logger.info(f"🔄 Redirecting to working V3 endpoint: {video_id}")
+    return redirect(f'/api/v3/video/compressed/{video_id}')
+    
+    # ORIGINAL COMPLEX LOGIC (fallback if simple approach fails)
+    try:
+        # First try to get from database/MinIO
+        video_record = None
+        video_exists_in_db = False
+        status_data = None
+        meta_data = {}
+        
+        if DATABASE_ENABLED:
+            try:
+                status_data = db_video_service.get_video_status(video_id)
+                if 'error' not in status_data:
+                    video_exists_in_db = True
+                    logger.info(f"✅ Found video in database: {video_id}")
+                    logger.info(f"📊 Status data keys: {list(status_data.keys())}")
+                    
+                    # Get video record directly to access all fields including bucket
+                    try:
+                        video_record = db_video_service.video_repo.get_video_by_id(video_id)
+                        if video_record:
+                            logger.info(f"📁 Retrieved video record from database")
+                    except Exception as e:
+                        logger.warning(f"Could not get video record: {e}")
+                else:
+                    logger.warning(f"⚠️ Video not found in database status, but will still try MinIO: {video_id}")
+                    # Still try to get video record directly
+                    try:
+                        video_record = db_video_service.video_repo.get_video_by_id(video_id)
+                        if video_record:
+                            video_exists_in_db = True
+                            logger.info(f"✅ Found video record directly (status lookup failed)")
+                    except Exception as e:
+                        logger.warning(f"Could not get video record: {e}")
+                    
+                    # Try to get from MinIO directly
+                    # meta_data might be nested or at root level
+                    if status_data:
+                        meta_data = status_data.get('meta_data', {})
+                    if not meta_data and video_record:
+                        meta_data = video_record.get('meta_data', {})
+                        logger.info(f"📁 Retrieved meta_data from video record")
+                    
+                    # Get bucket from video record (should be "detectifai-videos")
+                    # Always use detectifai-videos bucket as confirmed by user
+                    video_bucket = "detectifai-videos"
+                    if video_record:
+                        record_bucket = video_record.get('minio_bucket')
+                        if record_bucket:
+                            logger.info(f"📦 Video bucket from record: {record_bucket}")
+                            # Use record bucket if it's detectifai-videos, otherwise use default
+                            if record_bucket == "detectifai-videos":
+                                video_bucket = record_bucket
+                            else:
+                                logger.warning(f"⚠️ Record bucket ({record_bucket}) doesn't match expected (detectifai-videos), using detectifai-videos")
+                                video_bucket = "detectifai-videos"
+                        else:
+                            logger.info(f"📦 No bucket in record, using detectifai-videos")
+                    else:
+                        logger.info(f"📦 No video record, using detectifai-videos bucket")
+                    
+                    # Ensure we're using the correct bucket
+                    if video_bucket != "detectifai-videos":
+                        logger.warning(f"⚠️ Bucket mismatch! Expected 'detectifai-videos', got '{video_bucket}'. Forcing to 'detectifai-videos'")
+                        video_bucket = "detectifai-videos"
+                    
+                    logger.info(f"📦 Final video bucket: {video_bucket}")
+                    
+                    minio_compressed_path = meta_data.get('minio_compressed_path') if meta_data else None
+                    
+                    # Also check compression_info for the path
+                    if not minio_compressed_path and meta_data:
+                        compression_info = meta_data.get('compression_info', {})
+                        minio_compressed_path = compression_info.get('minio_path')
+                    
+                    logger.info(f"📁 MinIO compressed path from metadata: {minio_compressed_path}")
+                    logger.info(f"📁 Processing status: {meta_data.get('processing_status') if meta_data else 'N/A'}")
+                    logger.info(f"📁 Full meta_data keys: {list(meta_data.keys()) if meta_data else 'N/A'}")
+            except Exception as e:
+                logger.warning(f"⚠️ Database lookup failed, but will still try MinIO: {e}")
+                import traceback
+                logger.debug(f"Database lookup traceback: {traceback.format_exc()}")
+        
+        # Always try MinIO first (even if database lookup failed, try standard path)
+        # This ensures we can serve videos even if database is temporarily unavailable
+        try:
+            from io import BytesIO
+            from minio.error import S3Error
+            
+            # Use detectifai-videos bucket as confirmed by user
+            video_bucket = "detectifai-videos"
+            
+            # Get minio_compressed_path from metadata if available
+            minio_compressed_path = meta_data.get('minio_compressed_path') if meta_data else None
+            if not minio_compressed_path and meta_data:
+                compression_info = meta_data.get('compression_info', {})
+                minio_compressed_path = compression_info.get('minio_path')
+            
+            # Get compressed video path from metadata or use standard path
+            # User confirmed: bucket is "detectifai-videos" and folder is "compressed"
+            # Standard path format: compressed/{video_id}/video.mp4
+            possible_paths = []
+            
+            # First, try the path from metadata if available
+            if minio_compressed_path:
+                # Normalize path - remove leading slash if present
+                normalized_path = minio_compressed_path.lstrip('/')
+                possible_paths.append(normalized_path)
+                logger.info(f"📁 Using path from metadata: {normalized_path}")
+            
+            # Always try the standard path format (user confirmed this is correct)
+            standard_path = f"compressed/{video_id}/video.mp4"
+            if standard_path not in possible_paths:
+                possible_paths.insert(0, standard_path)  # Try standard path first
+            
+            # Also try alternative formats as fallback
+            alternative_paths = [
+                f"compressed/{video_id}/compressed.mp4",
+            ]
+            for alt_path in alternative_paths:
+                if alt_path not in possible_paths:
+                    possible_paths.append(alt_path)
+            
+            logger.info(f"🔍 Will try {len(possible_paths)} possible paths in bucket: {video_bucket}")
+            for i, p in enumerate(possible_paths, 1):
+                logger.info(f"   {i}. {p}")
+            
+            # Debug: Log if DATABASE_ENABLED and which minio client we're using
+            logger.info(f"📋 DEBUG: DATABASE_ENABLED = {DATABASE_ENABLED}")
+            if DATABASE_ENABLED:
+                logger.info(f"📋 DEBUG: compression_bucket = {compression_bucket}")
+                logger.info(f"📋 DEBUG: video_bucket = {video_bucket}")
+                logger.info(f"📋 DEBUG: minio_client type = {type(minio_client)}")
+                logger.info(f"📋 DEBUG: minio_client available = {minio_client is not None}")
+            
+            video_data = None
+            successful_path = None
+            
+            # Try to get from video bucket (compressed videos are in same bucket as originals)
+            if DATABASE_ENABLED:
+                compression_bucket = db_video_service.compression_service.bucket
+                minio_client = db_video_service.video_repo.minio
+            else:
+                compression_bucket = video_bucket
+                # Need to create a MinIO client if database is not enabled
+                from database.config import DatabaseManager
+                db_manager = DatabaseManager()
+                minio_client = db_manager.minio_client
+            
+            # Try each possible path in the video bucket first
+            logger.info(f"🔍 Trying video bucket: {video_bucket}")
+            for minio_path in possible_paths:
+                try:
+                    logger.info(f"   Attempting: {video_bucket}/{minio_path}")
+                    # Verify bucket exists first
+                    if not minio_client.bucket_exists(video_bucket):
+                        logger.error(f"❌ Bucket '{video_bucket}' does not exist!")
+                        raise Exception(f"Bucket '{video_bucket}' does not exist")
+                    
+                    video_data = minio_client.get_object(
+                        video_bucket,
+                        minio_path
+                    )
+                    successful_path = minio_path
+                    logger.info(f"✅ Found compressed video in video bucket: {video_bucket} at {minio_path}")
+                    break
+                except S3Error as s3_err:
+                    error_code = getattr(s3_err, 'code', 'Unknown')
+                    error_msg = str(s3_err)
+                    logger.warning(f"   ❌ S3Error ({error_code}): {error_msg[:200]}")
+                    if error_code == 'NoSuchKey':
+                        logger.info(f"   ℹ️ Object '{minio_path}' not found in bucket '{video_bucket}'")
+                    
+                    # DEBUG: Let's list what's actually in the bucket at this path
+                    if error_code == 'NoSuchKey':
+                        try:
+                            prefix = '/'.join(minio_path.split('/')[:-1]) + '/'  # Get directory path
+                            logger.info(f"   🔍 DEBUG: Listing objects with prefix '{prefix}' in bucket '{video_bucket}'")
+                            debug_objects = list(minio_client.list_objects(video_bucket, prefix=prefix, recursive=True))
+                            if debug_objects:
+                                logger.info(f"   📦 DEBUG: Found {len(debug_objects)} objects:")
+                                for obj in debug_objects[:5]:  # Show first 5
+                                    logger.info(f"      - {obj.object_name} ({obj.size} bytes)")
+                            else:
+                                logger.info(f"   📦 DEBUG: No objects found with prefix '{prefix}'")
+                        except Exception as debug_e:
+                            logger.warning(f"   ⚠️ DEBUG: Failed to list objects: {debug_e}")
+                    continue
+                except Exception as e1:
+                    error_msg = str(e1)
+                    logger.warning(f"   ❌ Failed: {error_msg[:200]}")
+                    import traceback
+                    logger.debug(f"   Traceback: {traceback.format_exc()}")
+                    continue
+            
+            # If not found in video bucket, try compression bucket (should be same, but check anyway)
+            if not video_data and compression_bucket != video_bucket and DATABASE_ENABLED:
+                logger.info(f"🔍 Trying compression bucket: {compression_bucket}")
+                compression_minio = db_video_service.compression_service.minio
+                for minio_path in possible_paths:
+                    try:
+                        logger.info(f"   Attempting: {compression_bucket}/{minio_path}")
+                        if not compression_minio.bucket_exists(compression_bucket):
+                            logger.error(f"❌ Compression bucket '{compression_bucket}' does not exist!")
+                            continue
+                        
+                        video_data = compression_minio.get_object(
+                            compression_bucket,
+                            minio_path
+                        )
+                        successful_path = minio_path
+                        logger.info(f"✅ Found compressed video in compression bucket: {compression_bucket} at {minio_path}")
+                        break
+                    except S3Error as s3_err:
+                        error_code = getattr(s3_err, 'code', 'Unknown')
+                        logger.warning(f"   ❌ S3Error ({error_code}): {str(s3_err)[:200]}")
+                        continue
+                    except Exception as e2:
+                        logger.warning(f"   ❌ Failed: {str(e2)[:200]}")
+                        continue
+            elif not video_data and compression_bucket == video_bucket:
+                logger.info(f"ℹ️ Compression bucket is same as video bucket, skipping duplicate check")
+            
+            # If still not found, try listing objects to see what's available
+            if not video_data:
+                logger.warning(f"⚠️ Could not find video with standard paths, listing objects in bucket '{video_bucket}'...")
+                try:
+                    # List all objects with compressed prefix for this video
+                    search_prefix = f"compressed/{video_id}/"
+                    logger.info(f"🔍 Listing objects in '{video_bucket}' with prefix '{search_prefix}'")
+                    
+                    if not minio_client.bucket_exists(video_bucket):
+                        logger.error(f"❌ Bucket '{video_bucket}' does not exist! Cannot list objects.")
+                    else:
+                        objects = list(minio_client.list_objects(video_bucket, prefix=search_prefix, recursive=True))
+                        logger.info(f"📦 Found {len(objects)} objects in video bucket '{video_bucket}' with prefix '{search_prefix}'")
+                        
+                        if objects:
+                            logger.info(f"📋 Available objects:")
+                            for obj in objects:
+                                logger.info(f"   - {obj.object_name} ({obj.size} bytes, modified: {obj.last_modified})")
+                            
+                            # Try the first object found
+                            actual_path = objects[0].object_name
+                            logger.info(f"🔄 Trying first object found: {actual_path}")
+                            try:
+                                video_data = minio_client.get_object(video_bucket, actual_path)
+                                successful_path = actual_path
+                                logger.info(f"✅ Successfully retrieved video from path: {actual_path}")
+                            except Exception as get_err:
+                                logger.error(f"❌ Failed to get object '{actual_path}': {get_err}")
+                        else:
+                            logger.warning(f"⚠️ No objects found with prefix '{search_prefix}' in bucket '{video_bucket}'")
+                            
+                            # Try listing all objects in compressed folder
+                            logger.info(f"🔍 Listing all objects in 'compressed/' folder...")
+                            all_compressed = list(minio_client.list_objects(video_bucket, prefix="compressed/", recursive=True))
+                            logger.info(f"📦 Found {len(all_compressed)} total objects in 'compressed/' folder")
+                            if all_compressed:
+                                logger.info(f"📋 Sample objects in compressed folder:")
+                                for obj in all_compressed[:10]:  # Show first 10
+                                    logger.info(f"   - {obj.object_name}")
+                    
+                    # Also check compression bucket if different
+                    if not video_data and compression_bucket != video_bucket and DATABASE_ENABLED:
+                        logger.info(f"🔍 Listing objects in compression bucket '{compression_bucket}' with prefix '{search_prefix}'")
+                        compression_minio = db_video_service.compression_service.minio
+                        if compression_minio.bucket_exists(compression_bucket):
+                            objects2 = list(compression_minio.list_objects(compression_bucket, prefix=search_prefix, recursive=True))
+                            logger.info(f"📦 Found {len(objects2)} objects in compression bucket")
+                            if objects2:
+                                for obj in objects2:
+                                    logger.info(f"   - {obj.object_name} ({obj.size} bytes)")
+                                actual_path = objects2[0].object_name
+                                logger.info(f"🔄 Trying actual path found: {actual_path}")
+                                video_data = compression_minio.get_object(compression_bucket, actual_path)
+                                successful_path = actual_path
+                except Exception as list_err:
+                    logger.error(f"❌ Failed to list objects: {list_err}")
+                    import traceback
+                    logger.error(f"Traceback: {traceback.format_exc()}")
+            
+            if video_data:
+                # Successfully found video in MinIO
+                video_bytes = video_data.read()
+                video_data.close()
+                video_data.release_conn()
+                
+                response = send_file(
+                    BytesIO(video_bytes),
+                    mimetype='video/mp4',
+                    as_attachment=False,
+                    download_name=f"{video_id}_compressed.mp4"
+                )
+                response.headers['Accept-Ranges'] = 'bytes'
+                response.headers['Cache-Control'] = 'no-cache'
+                response.headers['Access-Control-Allow-Origin'] = '*'
+                response.headers['Access-Control-Allow-Methods'] = 'GET, OPTIONS'
+                response.headers['Access-Control-Allow-Headers'] = 'Range'
+                response.headers['Content-Type'] = 'video/mp4'
+                logger.info(f"✅ Served compressed video from MinIO for {video_id}")
+                return response
+            else:
+                logger.warning(f"⚠️ Could not retrieve video from MinIO. Tried {len(possible_paths)} paths in buckets {video_bucket} and {compression_bucket}")
+                # Fall through to local storage check
+        except S3Error as e:
+            logger.warning(f"⚠️ MinIO retrieval failed (S3Error), falling back to local storage: {e}")
+            import traceback
+            logger.error(f"S3Error traceback: {traceback.format_exc()}")
+            # Don't return, continue to local fallback
+        except Exception as e:
+            logger.warning(f"⚠️ MinIO retrieval failed, falling back to local storage: {e}")
+            import traceback
+            logger.error(f"Exception traceback: {traceback.format_exc()}")
+            # Don't return, continue to local fallback
+        
+        # Fallback: Find the compressed video file locally (ALWAYS try this, even if database lookup failed)
+        logger.info(f"🔍 Searching local file system for compressed video: {video_id}")
+        
+        # Get the local path from compression service if available
+        local_path_from_service = None
+        if DATABASE_ENABLED:
+            try:
+                # Try to get local path from compression service result
+                if not video_record:
+                    video_record = db_video_service.video_repo.get_video_by_id(video_id)
+                if video_record:
+                    meta_data = video_record.get('meta_data', {})
+                    # Check if we have compression info with local path
+                    compression_info = meta_data.get('compression_info', {})
+                    if compression_info and 'local_path' in compression_info:
+                        local_path_from_service = compression_info['local_path']
+                        logger.info(f"📁 Found local path from compression info: {local_path_from_service}")
+                    # Also check for compressed_path in compression_info (alternative field name)
+                    elif compression_info and 'compressed_path' in compression_info:
+                        local_path_from_service = compression_info['compressed_path']
+                        logger.info(f"📁 Found local path from compression_info.compressed_path: {local_path_from_service}")
+                    # Also check minio_compressed_path - might be a local path
+                    elif meta_data.get('minio_compressed_path'):
+                        potential_path = meta_data.get('minio_compressed_path')
+                        if os.path.exists(potential_path) and not potential_path.startswith('compressed/'):
+                            local_path_from_service = potential_path
+                            logger.info(f"📁 Found local path from minio_compressed_path: {local_path_from_service}")
+            except Exception as e:
+                logger.debug(f"Could not get local path from service: {e}")
+        
+        # List of possible local directories to check
+        possible_dirs = []
+        
+        # Add path from compression service if available
+        if local_path_from_service:
+            if os.path.exists(local_path_from_service):
+                possible_dirs.append(os.path.dirname(local_path_from_service))
+            elif os.path.exists(local_path_from_service):
+                # If it's a file path, use its directory
+                possible_dirs.append(os.path.dirname(local_path_from_service))
+        
+        # Add standard locations (check multiple possible locations)
+        possible_dirs.extend([
+            os.path.join("video_processing_outputs", "compressed", video_id),  # Standard location from compression service
+            os.path.join(OUTPUT_FOLDER, video_id, 'compressed'),
+            os.path.join("video_processing_outputs", video_id, "compressed"),
+            os.path.join("backend", "video_processing_outputs", "compressed", video_id),  # If running from root
+            os.path.join(".", "video_processing_outputs", "compressed", video_id),  # Current directory
+            os.path.join("video_processing_outputs", "compressed"),  # Check root compressed dir
+            os.path.join(OUTPUT_FOLDER, "compressed", video_id),  # Alternative location
+        ])
+        
+        # Also add direct file paths that might be stored in metadata
+        possible_file_paths = [
+            os.path.join("video_processing_outputs", "compressed", f"{video_id}_compressed.mp4"),
+            os.path.join(OUTPUT_FOLDER, "compressed", f"{video_id}_compressed.mp4"),
+            os.path.join("video_processing_outputs", "compressed", video_id, "video.mp4"),
+            os.path.join(OUTPUT_FOLDER, video_id, "compressed", "video.mp4"),
+        ]
+        
+        # Check direct file paths first
+        for file_path in possible_file_paths:
+            if os.path.exists(file_path) and os.path.isfile(file_path) and os.path.getsize(file_path) > 0:
+                logger.info(f"✅ Found compressed video file: {file_path} ({os.path.getsize(file_path)} bytes)")
+                try:
+                    response = send_file(
+                        file_path,
+                        mimetype='video/mp4',
+                        as_attachment=False,
+                        download_name=os.path.basename(file_path)
+                    )
+                    response.headers['Accept-Ranges'] = 'bytes'
+                    response.headers['Cache-Control'] = 'no-cache'
+                    response.headers['Access-Control-Allow-Origin'] = '*'
+                    response.headers['Access-Control-Allow-Methods'] = 'GET, OPTIONS'
+                    response.headers['Access-Control-Allow-Headers'] = 'Range'
+                    response.headers['Content-Type'] = 'video/mp4'
+                    logger.info(f"✅ Serving compressed video from file path: {file_path}")
+                    return response
+                except Exception as e:
+                    logger.warning(f"Failed to serve from file path {file_path}: {e}")
+                    continue
+        
+        # Also check if local_path_from_service is a direct file path
+        if local_path_from_service and os.path.exists(local_path_from_service) and os.path.isfile(local_path_from_service):
+            logger.info(f"✅ Found compressed video file directly: {local_path_from_service}")
+            try:
+                response = send_file(
+                    local_path_from_service,
+                    mimetype='video/mp4',
+                    as_attachment=False,
+                    download_name=os.path.basename(local_path_from_service)
+                )
+                response.headers['Accept-Ranges'] = 'bytes'
+                response.headers['Cache-Control'] = 'no-cache'
+                response.headers['Access-Control-Allow-Origin'] = '*'
+                response.headers['Access-Control-Allow-Methods'] = 'GET, OPTIONS'
+                response.headers['Access-Control-Allow-Headers'] = 'Range'
+                response.headers['Content-Type'] = 'video/mp4'
+                logger.info(f"✅ Serving compressed video from direct path: {local_path_from_service}")
+                return response
+            except Exception as e:
+                logger.warning(f"Failed to serve from direct path: {e}")
+        
+        # Remove duplicates while preserving order
+        seen = set()
+        unique_dirs = []
+        for d in possible_dirs:
+            if d not in seen:
+                seen.add(d)
+                unique_dirs.append(d)
+        
+        logger.info(f"🔍 Checking {len(unique_dirs)} possible local directories")
+        
+        for output_dir in unique_dirs:
+            logger.info(f"🔍 Checking directory: {output_dir}")
+            if os.path.exists(output_dir):
+                # Look for compressed video files
+                try:
+                    files = os.listdir(output_dir)
+                    logger.info(f"📁 Files in {output_dir}: {files}")
+                    
+                    for file in files:
+                        if file.endswith('.mp4'):
+                            video_path = os.path.join(output_dir, file)
+                            if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
+                                logger.info(f"✅ Found compressed video locally: {video_path} ({os.path.getsize(video_path)} bytes)")
+                                response = send_file(
+                                    video_path,
+                                    mimetype='video/mp4',
+                                    as_attachment=False,
+                                    download_name=file
+                                )
+                                # Add headers for video playback and streaming
+                                response.headers['Accept-Ranges'] = 'bytes'
+                                response.headers['Cache-Control'] = 'no-cache'
+                                response.headers['Access-Control-Allow-Origin'] = '*'
+                                response.headers['Access-Control-Allow-Methods'] = 'GET, OPTIONS'
+                                response.headers['Access-Control-Allow-Headers'] = 'Range'
+                                response.headers['Content-Type'] = 'video/mp4'
+                                logger.info(f"✅ Serving compressed video from local storage: {video_path}")
+                                return response
+                except Exception as dir_err:
+                    logger.warning(f"⚠️ Error reading directory {output_dir}: {dir_err}")
+                    continue
+        
+        logger.error(f"❌ No compressed video found for {video_id} in any location")
+        logger.error(f"   Checked {len(unique_dirs)} directories: {unique_dirs}")
+        
+        # Use video_exists_in_db from earlier check, or check again if not set
+        if not video_exists_in_db and DATABASE_ENABLED:
+            try:
+                if not video_record:
+                    video_record = db_video_service.video_repo.get_video_by_id(video_id)
+                video_exists_in_db = video_record is not None
+            except Exception as e:
+                logger.warning(f"Could not check if video exists: {e}")
+        
+        if not video_exists_in_db:
+            logger.error(f"❌ Video {video_id} does not exist in database")
+            return jsonify({'error': 'Video not found', 'video_id': video_id}), 404
+        else:
+            processing_status = 'unknown'
+            if video_record:
+                processing_status = video_record.get('meta_data', {}).get('processing_status', 'unknown')
+            logger.error(f"❌ Video {video_id} exists but compressed video not found")
+            logger.error(f"   Processing status: {processing_status}")
+            logger.error(f"   Checked {len(unique_dirs)} directories: {unique_dirs}")
+            return jsonify({
+                'error': 'Compressed video not found', 
+                'video_id': video_id, 
+                'checked_dirs': unique_dirs,
+                'processing_status': processing_status,
+                'message': 'Video exists but compressed version not available. Processing may still be in progress or compression may have failed.'
+            }), 404
+        
+    except Exception as e:
+        logger.error(f"Error serving compressed video: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/video/<video_id>/keyframes', methods=['GET'])
+def get_video_keyframes(video_id):
+    """Get list of keyframes with detection results"""
+    try:
+        frames_dir = os.path.join(OUTPUT_FOLDER, video_id, 'frames')
+        if not os.path.exists(frames_dir):
+            return jsonify({'error': 'Keyframes not found'}), 404
+        
+        # Load detection metadata
+        detection_metadata = {}
+        detection_metadata_path = os.path.join(OUTPUT_FOLDER, video_id, 'detection_metadata.json')
+        if os.path.exists(detection_metadata_path):
+            try:
+                with open(detection_metadata_path, 'r') as f:
+                    detection_metadata = json.load(f)
+            except Exception as e:
+                logger.warning(f"Could not load detection metadata: {e}")
+        
+        # Build detection lookup dictionary
+        detection_lookup = {}
+        for item in detection_metadata.get('detection_summary', []):
+            original_filename = os.path.basename(item['original_path'])
+            annotated_filename = os.path.basename(item['annotated_path']) if 'annotated_path' in item else None
+            detection_lookup[original_filename] = {
+                'has_detections': True,
+                'detection_count': item.get('detection_count', 0),
+                'objects': item.get('objects', []),
+                'confidence_avg': item.get('confidence_avg', 0.0),
+                'annotated_filename': annotated_filename
+            }
+            
+        keyframes = []
+        for file in os.listdir(frames_dir):
+            # Filter out annotated versions - only include original keyframes
+            if file.endswith('.jpg') and not file.endswith('_annotated.jpg'):
+                # Extract timestamp safely
+                timestamp = 0.0
+                try:
+                    if '_' in file:
+                        timestamp_part = file.split('_')[1].replace('s', '').replace('.jpg', '')
+                        timestamp = float(timestamp_part)
+                except (ValueError, IndexError):
+                    timestamp = 0.0
+                
+                # Build keyframe data with detection info
+                keyframe_data = {
+                    'filename': file,
+                    'url': f'/api/video/{video_id}/keyframe/{file}',
+                    'timestamp': timestamp,
+                    'has_detections': file in detection_lookup
+                }
+                
+                # Add detection details and annotated frame URL if available
+                if file in detection_lookup:
+                    detection_info = detection_lookup[file]
+                    keyframe_data['detection_count'] = detection_info['detection_count']
+                    keyframe_data['objects'] = detection_info['objects']
+                    keyframe_data['confidence_avg'] = detection_info['confidence_avg']
+                    
+                    # Provide annotated frame URL if it exists
+                    if detection_info['annotated_filename']:
+                        keyframe_data['annotated_url'] = f'/api/video/{video_id}/keyframe/{detection_info["annotated_filename"]}'
+                
+                keyframes.append(keyframe_data)
+        
+        # Sort by timestamp
+        keyframes.sort(key=lambda x: x['timestamp'])
+        
+        return jsonify({
+            'video_id': video_id,
+            'keyframes': keyframes,
+            'total_keyframes': len(keyframes),
+            'keyframes_with_detections': detection_metadata.get('frames_with_detections', 0),
+            'objects_detected': detection_metadata.get('objects_detected', {})
+        })
+        
+    except Exception as e:
+        logger.error(f"Error getting keyframes: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/video/<video_id>/keyframe/<filename>', methods=['GET'])
+@app.route('/api/v2/video/keyframe/<video_id>/<filename>', methods=['GET'])
+def serve_keyframe(video_id, filename):
+    """Serve individual keyframe image from MinIO or local storage"""
+    try:
+        # First try to get from MinIO (database-integrated)
+        if DATABASE_ENABLED:
+            try:
+                # Construct MinIO path from filename
+                # Filename format: frame_000001.jpg
+                # Try both path patterns (keyframes subfolder and flat)
+                from io import BytesIO
+                from minio.error import S3Error
+                
+                minio_paths_to_try = [
+                    f"{video_id}/keyframes/{filename}",
+                    f"{video_id}/{filename}",
+                ]
+                
+                keyframe_bytes = None
+                for minio_path in minio_paths_to_try:
+                    try:
+                        keyframe_data = db_video_service.keyframe_repo.minio.get_object(
+                            db_video_service.keyframe_repo.bucket,
+                            minio_path
+                        )
+                        keyframe_bytes = keyframe_data.read()
+                        keyframe_data.close()
+                        keyframe_data.release_conn()
+                        logger.info(f"✅ Served keyframe from MinIO: {minio_path}")
+                        break
+                    except S3Error:
+                        continue
+                
+                if keyframe_bytes:
+                    response = send_file(
+                        BytesIO(keyframe_bytes),
+                        mimetype='image/jpeg',
+                        as_attachment=False
+                    )
+                    response.headers['Cache-Control'] = 'public, max-age=3600'
+                    response.headers['Access-Control-Allow-Origin'] = '*'
+                    response.headers['Access-Control-Allow-Methods'] = 'GET, OPTIONS'
+                    response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
+                    return response
+                else:
+                    logger.warning(f"Keyframe not found in MinIO for any path: {minio_paths_to_try}")
+            except Exception as e:
+                logger.warning(f"MinIO retrieval failed, trying local: {e}")
+        
+        # Fallback: Try local filesystem (multiple possible locations)
+        local_paths_to_try = [
+            os.path.join(OUTPUT_FOLDER, video_id, 'frames', filename),
+            os.path.join('video_processing_outputs', 'keyframes', video_id, filename),
+            os.path.join(OUTPUT_FOLDER, video_id, filename),
+        ]
+        for keyframe_path in local_paths_to_try:
+            if os.path.exists(keyframe_path):
+                response = send_file(
+                    keyframe_path,
+                    mimetype='image/jpeg',
+                    as_attachment=False
+                )
+                response.headers['Access-Control-Allow-Origin'] = '*'
+                return response
+        
+        return jsonify({'error': 'Keyframe not found'}), 404
+        
+    except Exception as e:
+        logger.error(f"Error serving keyframe: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/minio/image/<bucket>/<path:object_path>', methods=['GET'])
+def serve_minio_image(bucket, object_path):
+    """
+    Unified endpoint to serve images from MinIO buckets
+    Supports:
+    - Keyframes: detectifai-keyframes/{video_id}/keyframes/frame_*.jpg
+    - Live stream keyframes: detectifai-keyframes/live/{camera_id}/*.jpg
+    - NLP images: nlp-images/*.jpg
+    - Face images: detectifai-faces/*.jpg
+    """
+    try:
+        from io import BytesIO
+        from minio.error import S3Error
+        
+        if not DATABASE_ENABLED:
+            return jsonify({'error': 'Database service not available'}), 503
+        
+        # Get MinIO client
+        minio_client = db_video_service.db_manager.minio_client
+        
+        # Verify bucket exists
+        if not minio_client.bucket_exists(bucket):
+            logger.warning(f"Bucket {bucket} does not exist")
+            return jsonify({'error': f'Bucket {bucket} not found'}), 404
+        
+        try:
+            # Get object from MinIO
+            image_data = minio_client.get_object(bucket, object_path)
+            image_bytes = image_data.read()
+            image_data.close()
+            image_data.release_conn()
+            
+            # Determine content type from file extension
+            content_type = 'image/jpeg'
+            if object_path.lower().endswith('.png'):
+                content_type = 'image/png'
+            elif object_path.lower().endswith('.webp'):
+                content_type = 'image/webp'
+            elif object_path.lower().endswith('.gif'):
+                content_type = 'image/gif'
+            
+            response = send_file(
+                BytesIO(image_bytes),
+                mimetype=content_type,
+                as_attachment=False
+            )
+            response.headers['Cache-Control'] = 'public, max-age=3600'
+            response.headers['Access-Control-Allow-Origin'] = '*'
+            response.headers['Access-Control-Allow-Methods'] = 'GET, OPTIONS'
+            response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
+            
+            logger.info(f"✅ Served image from MinIO: {bucket}/{object_path}")
+            return response
+            
+        except S3Error as e:
+            logger.error(f"MinIO error retrieving {bucket}/{object_path}: {e}")
+            if e.code == 'NoSuchKey':
+                return jsonify({'error': 'Image not found in MinIO'}), 404
+            return jsonify({'error': f'MinIO error: {str(e)}'}), 500
+            
+    except Exception as e:
+        logger.error(f"Error serving MinIO image: {e}")
+        return jsonify({'error': f'Error serving image: {str(e)}'}), 500
+
+
+
+@app.route('/api/v3/video/compressed/<video_id>', methods=['GET'])
+def serve_compressed_video_v3(video_id):
+    """NEW: Simple working compressed video endpoint with local fallback"""
+    logger.info(f"🆕 V3 Request to serve compressed video: {video_id}")
+    
+    # 1. Try MinIO if database is enabled
+    if DATABASE_ENABLED:
+        try:
+            # Get video record
+            video_record = db_video_service.video_repo.get_video_by_id(video_id)
+            if video_record:
+                logger.info(f"🆕 Found video record for: {video_id}")
+                
+                # Get MinIO client and bucket
+                minio_client = db_video_service.video_repo.minio
+                bucket = "detectifai-videos"
+                
+                # Standard path where compressed videos should be
+                minio_path = f"compressed/{video_id}/video.mp4"
+                
+                logger.info(f"🆕 Attempting to generate presigned URL for MinIO: {bucket}/{minio_path}")
+                
+                # Check if object exists first
+                stat = minio_client.stat_object(bucket, minio_path)
+                
+                # Generate presigned URL (valid for 1 hour)
+                from datetime import timedelta
+                presigned_url = minio_client.presigned_get_object(
+                    bucket, 
+                    minio_path, 
+                    expires=timedelta(hours=1),
+                    response_headers={
+                        'response-content-disposition': f'inline; filename="compressed_{video_id}.mp4"',
+                        'response-content-type': 'video/mp4'
+                    }
+                )
+                
+                # Fix for Docker vs Localhost networking issues
+                # If running locally but MinIO is in docker/internal network, URL might be unreachabled
+                # We assume if request comes to localhost, MinIO is also on localhost
+                if 'localhost' in request.host or '127.0.0.1' in request.host:
+                   # Replace internal hostname (like 'minio') with localhost if present in URL
+                   # This is a heuristic fix for common dev setups
+                   # Extract port from presigned URL keys
+                   parsed_url = urllib.parse.urlparse(presigned_url)
+                   if parsed_url.hostname not in ['localhost', '127.0.0.1']:
+                       new_netloc = parsed_url.netloc.replace(parsed_url.hostname, 'localhost')
+                       presigned_url = parsed_url._replace(netloc=new_netloc).geturl()
+                       logger.info(f"🔄 Adjusted presigned URL for localhost: {presigned_url}")
+                
+                logger.info(f"🆕 Redirecting to presigned URL for video: {video_id}")
+                return redirect(presigned_url, code=302)
+
+            else:
+                logger.warning(f"🆕 Video record not found in DB for: {video_id}")
+                
+        except Exception as minio_e:
+            logger.warning(f"🆕 MinIO compressed video failed: {minio_e}")
+            
+            # Fallback to original video if compressed doesn't exist
+            try:
+                logger.info(f"🔄 Trying original video as fallback for: {video_id}")
+                
+                # Get video record to find original path
+                video_record = db_video_service.video_repo.get_video_by_id(video_id)
+                if video_record and 'minio_object_key' in video_record:
+                    original_path = video_record['minio_object_key']
+                    bucket = video_record.get('minio_bucket', 'detectifai-videos')
+                    
+                    logger.info(f"🆕 Attempting original video from MinIO: {bucket}/{original_path}")
+                    
+                    # Check if original exists
+                    stat = minio_client.stat_object(bucket, original_path)
+                    
+                    # Generate presigned URL for original
+                    from datetime import timedelta
+                    presigned_url = minio_client.presigned_get_object(
+                        bucket, 
+                        original_path, 
+                        expires=timedelta(hours=1),
+                        response_headers={
+                            'response-content-disposition': f'inline; filename="video_{video_id}.mp4"',
+                            'response-content-type': 'video/mp4'
+                        }
+                    )
+                    
+                    # Fix for localhost
+                    if 'localhost' in request.host or '127.0.0.1' in request.host:
+                        parsed_url = urllib.parse.urlparse(presigned_url)
+                        if parsed_url.hostname not in ['localhost', '127.0.0.1']:
+                            new_netloc = parsed_url.netloc.replace(parsed_url.hostname, 'localhost')
+                            presigned_url = parsed_url._replace(netloc=new_netloc).geturl()
+                    
+                    logger.info(f"✅ Redirecting to ORIGINAL video for: {video_id}")
+                    return redirect(presigned_url, code=302)
+                    
+            except Exception as original_e:
+                logger.warning(f"🆕 Original video fallback also failed: {original_e}")
+
+    # 2. Fallback: Try local filesystem
+    logger.info(f"🔄 V3 Fallback: Checking local filesystem for video {video_id}")
+    
+    try:
+        # Possible local paths
+        possible_paths = [
+            os.path.join(OUTPUT_FOLDER, video_id, 'compressed', 'video.mp4'),
+            os.path.join(OUTPUT_FOLDER, video_id, 'compressed', f'{video_id}_compressed.mp4'),
+            os.path.join("video_processing_outputs", video_id, "compressed", "video.mp4"),
+            os.path.join(OUTPUT_FOLDER, "compressed", video_id, "video.mp4"),
+            # Also check upload folder if it was just uploaded but not fully processed
+            os.path.join(app.config['UPLOAD_FOLDER'], video_id, 'compressed', 'video.mp4')
+        ]
+        
+        for path in possible_paths:
+            if os.path.exists(path) and os.path.getsize(path) > 0:
+                logger.info(f"✅ Found compressed video locally: {path} ({os.path.getsize(path)} bytes)")
+                response = send_file(
+                    path,
+                    mimetype='video/mp4',
+                    as_attachment=False,
+                    download_name=f"compressed_{video_id}.mp4"
+                )
+                # Add headers for video playback and streaming
+                response.headers['Accept-Ranges'] = 'bytes'
+                response.headers['Cache-Control'] = 'public, max-age=3600'
+                response.headers['Access-Control-Allow-Origin'] = '*'
+                response.headers['Content-Type'] = 'video/mp4'
+                logger.info(f"✅ Serving compressed video from local fallback: {path}")
+                return response
+        
+        logger.error(f"❌ No compressed video found for {video_id} in local fallback paths")
+        return jsonify({'error': 'Video not found locally or in cloud'}), 404
+        
+    except Exception as local_e:
+        logger.error(f"❌ Local fallback error: {local_e}")
+        return jsonify({'error': str(local_e)}), 500
+
+@app.route('/api/minio/presigned/<bucket>/<path:object_path>', methods=['GET'])
+def get_minio_presigned_url(bucket, object_path):
+    """
+    Generate presigned URL for MinIO object
+    Useful for direct client access to images
+    """
+    try:
+        from datetime import timedelta
+        from minio.error import S3Error
+        
+        if not DATABASE_ENABLED:
+            return jsonify({'error': 'Database service not available'}), 503
+        
+        # Get expiration time from query parameter (default 1 hour)
+        expires_hours = request.args.get('expires', 1, type=int)
+        expires = timedelta(hours=expires_hours)
+        
+        # Get MinIO client
+        minio_client = db_video_service.db_manager.minio_client
+        
+        # Verify bucket exists
+        if not minio_client.bucket_exists(bucket):
+            return jsonify({'error': f'Bucket {bucket} not found'}), 404
+        
+        try:
+            # Generate presigned URL
+            presigned_url = minio_client.presigned_get_object(
+                bucket,
+                object_path,
+                expires=expires
+            )
+            
+            return jsonify({
+                'success': True,
+                'url': presigned_url,
+                'bucket': bucket,
+                'object_path': object_path,
+                'expires_in_hours': expires_hours
+            })
+            
+        except S3Error as e:
+            logger.error(f"MinIO error generating presigned URL: {e}")
+            return jsonify({'error': f'MinIO error: {str(e)}'}), 500
+            
+    except Exception as e:
+        logger.error(f"Error generating presigned URL: {e}")
+        return jsonify({'error': f'Error: {str(e)}'}), 500
+
+# ====== HELPER FUNCTIONS ======
+
+def _summarize_behaviors(behavior_events: List[Dict]) -> Dict:
+    """Summarize behavior analysis results"""
+    if not behavior_events:
+        return {
+            'total_behaviors': 0,
+            'by_type': {},
+            'most_common': None,
+            'average_confidence': 0.0,
+            'behavior_types': []
+        }
+    
+    # Count behaviors by type
+    behavior_counts = {}
+    confidences = []
+    behavior_types = []
+    
+    for event in behavior_events:
+        event_type = event.get('event_type', '')
+        # Extract behavior type from "behavior_fighting" -> "fighting"
+        if event_type.startswith('behavior_'):
+            behavior_type = event_type.replace('behavior_', '')
+            behavior_types.append(behavior_type)
+            behavior_counts[behavior_type] = behavior_counts.get(behavior_type, 0) + 1
+            
+            confidence = event.get('confidence_score', 0.0)
+            if confidence:
+                confidences.append(float(confidence))
+    
+    # Get most common behavior
+    most_common = None
+    if behavior_counts:
+        most_common = max(behavior_counts.items(), key=lambda x: x[1])[0]
+    
+    return {
+        'total_behaviors': len(behavior_events),
+        'by_type': behavior_counts,
+        'most_common': most_common,
+        'average_confidence': sum(confidences) / len(confidences) if confidences else 0.0,
+        'behavior_types': list(set(behavior_types))
+    }
+
+def _summarize_events(events: List[Dict]) -> Dict:
+    """Summarize events by type and threat level"""
+    summary = {
+        'by_type': {},
+        'by_threat_level': {},
+        'total_duration': 0.0,
+        'highest_confidence': 0.0
+    }
+    
+    for event in events:
+        # Count by type
+        event_type = event.get('event_type', 'unknown')
+        summary['by_type'][event_type] = summary['by_type'].get(event_type, 0) + 1
+        
+        # Count by threat level
+        threat_level = event.get('threat_level', 'low')
+        summary['by_threat_level'][threat_level] = summary['by_threat_level'].get(threat_level, 0) + 1
+        
+        # Calculate duration
+        start = event.get('start_timestamp', 0)
+        end = event.get('end_timestamp', 0)
+        summary['total_duration'] += (end - start)
+        
+        # Track highest confidence
+        confidence = event.get('confidence', 0)
+        summary['highest_confidence'] = max(summary['highest_confidence'], confidence)
+    
+    return summary
+
+def _summarize_detections(detections: List[Dict]) -> Dict:
+    """Summarize object detections by class and confidence"""
+    summary = {
+        'by_class': {},
+        'average_confidence': 0.0,
+        'highest_confidence': 0.0,
+        'threat_objects': []
+    }
+    
+    if not detections:
+        return summary
+    
+    total_confidence = 0.0
+    threat_classes = ['fire', 'gun', 'knife', 'smoke']
+    
+    for detection in detections:
+        # Count by class
+        class_name = detection.get('class_name', 'unknown')
+        summary['by_class'][class_name] = summary['by_class'].get(class_name, 0) + 1
+        
+        # Calculate confidence stats
+        confidence = detection.get('confidence', 0)
+        total_confidence += confidence
+        summary['highest_confidence'] = max(summary['highest_confidence'], confidence)
+        
+        # Track threat objects
+        if class_name in threat_classes and class_name not in summary['threat_objects']:
+            summary['threat_objects'].append(class_name)
+    
+    # Calculate average confidence
+    summary['average_confidence'] = total_confidence / len(detections) if detections else 0.0
+    
+    return summary
+
+def _assess_threat_level(events: List[Dict], detections: List[Dict]) -> Dict:
+    """Assess overall threat level based on events and detections"""
+    assessment = {
+        'overall_level': 'low',
+        'confidence_score': 0.0,
+        'risk_factors': [],
+        'recommendation': 'No immediate action required'
+    }
+    
+    risk_score = 0.0
+    risk_factors = []
+    
+    # Analyze events
+    critical_events = sum(1 for e in events if e.get('threat_level') == 'critical')
+    high_events = sum(1 for e in events if e.get('threat_level') == 'high')
+    
+    if critical_events > 0:
+        risk_score += critical_events * 10.0
+        risk_factors.append(f"{critical_events} critical events detected")
+    
+    if high_events > 0:
+        risk_score += high_events * 5.0
+        risk_factors.append(f"{high_events} high-risk events detected")
+    
+    # Analyze detections
+    critical_objects = sum(1 for d in detections if d.get('class_name') in ['fire', 'gun'])
+    high_objects = sum(1 for d in detections if d.get('class_name') == 'knife')
+    
+    if critical_objects > 0:
+        risk_score += critical_objects * 8.0
+        risk_factors.append(f"{critical_objects} critical objects detected (fire/gun)")
+    
+    if high_objects > 0:
+        risk_score += high_objects * 4.0
+        risk_factors.append(f"{high_objects} weapons detected (knife)")
+    
+    # Calculate overall threat level
+    if risk_score >= 20.0:
+        assessment['overall_level'] = 'critical'
+        assessment['recommendation'] = 'Immediate response required - potential emergency situation'
+    elif risk_score >= 10.0:
+        assessment['overall_level'] = 'high'
+        assessment['recommendation'] = 'Investigation recommended - elevated security concern'
+    elif risk_score >= 5.0:
+        assessment['overall_level'] = 'medium'
+        assessment['recommendation'] = 'Monitor situation - potential security interest'
+    else:
+        assessment['overall_level'] = 'low'
+        assessment['recommendation'] = 'Normal activity - routine monitoring sufficient'
+    
+    assessment['confidence_score'] = min(risk_score / 20.0, 1.0)  # Normalize to 0-1
+    assessment['risk_factors'] = risk_factors
+    
+    return assessment
+
+@app.route('/api/search/person-by-image', methods=['POST'])
+# @require_feature('image_search')  # Pro plan feature - Temporarily disabled for development
+def search_person_by_image():
+    """
+    Search for a person by uploading their image.
+    Uses facial recognition to find similar faces in the database.
+    Requires: Pro plan (image_search feature)
+    """
+    try:
+        # Check if image was uploaded
+        if 'image' not in request.files:
+            return jsonify({
+                'success': False,
+                'error': 'No image file provided'
+            }), 400
+        
+        file = request.files['image']
+        if file.filename == '':
+            return jsonify({
+                'success': False,
+                'error': 'No image file selected'
+            }), 400
+        
+        # Validate file type
+        if not file.filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
+            return jsonify({
+                'success': False,
+                'error': 'Invalid file type. Please upload an image file.'
+            }), 400
+        
+        # Save uploaded image temporarily
+        filename = secure_filename(f"search_{int(time.time())}_{file.filename}")
+        temp_path = os.path.join(UPLOAD_FOLDER, filename)
+        file.save(temp_path)
+        
+        try:
+            # Initialize facial recognition system
+            from facial_recognition import FacialRecognitionIntegrated
+            from config import VideoProcessingConfig
+            
+            config = VideoProcessingConfig()
+            config.enable_facial_recognition = True
+            
+            face_recognizer = FacialRecognitionIntegrated(config)
+            
+            if not face_recognizer.enabled:
+                return jsonify({
+                    'success': False,
+                    'error': 'Facial recognition system is not enabled or properly configured'
+                }), 500
+            
+            # Get search parameters from request
+            threshold = float(request.form.get('threshold', 0.6))
+            max_results = int(request.form.get('max_results', 10))
+            
+            # Perform image search
+            search_results = face_recognizer.search_person_by_image(
+                temp_path, 
+                k=max_results, 
+                threshold=threshold
+            )
+            
+            # Format results for frontend and enrich with event/video info from MongoDB
+            formatted_results = []
+            for result in search_results:
+                face_id = result['face_id']
+                event_id = None
+                video_id = None
+                start_timestamp = result.get('timestamp', 0.0)
+                end_timestamp = start_timestamp + 5.0  # Default 5 second clip
+                
+                # Try to extract event_id from face_id (format: face_{person}_{event}_{frame}_{index}_{uuid})
+                # Example: face_unknown_event_obj_detection_1234_000000_00_abc12345
+                face_id_parts = face_id.split('_')
+                if 'event' in face_id_parts:
+                    try:
+                        event_idx = face_id_parts.index('event')
+                        # Extract event type and timestamp
+                        event_type = '_'.join(face_id_parts[event_idx+1:event_idx+3])  # e.g., "obj_detection"
+                        event_timestamp = face_id_parts[event_idx+3] if len(face_id_parts) > event_idx+3 else None
+                        
+                        # Try to construct event_id
+                        if event_timestamp:
+                            potential_event_id = f"event_{event_type}_{event_timestamp}"
+                            logger.info(f"Extracted potential event_id from face_id: {potential_event_id}")
+                    except Exception as e:
+                        logger.warning(f"Could not parse event info from face_id {face_id}: {e}")
+                
+                # Try to get event_id and video_id from MongoDB
+                if DATABASE_ENABLED:
+                    try:
+                        # Query detected_faces collection for this face_id
+                        faces_collection = db_video_service.db_manager.db.detected_faces
+                        face_doc = faces_collection.find_one({"face_id": face_id})
+                        
+                        if face_doc:
+                            event_id = face_doc.get('event_id')
+                            logger.info(f"Found face_doc with event_id: {event_id}")
+                        else:
+                            logger.warning(f"No face document found for face_id: {face_id}")
+                            
+                            # Try alternative queries
+                            # Query by partial face_id match
+                            face_doc = faces_collection.find_one({"face_id": {"$regex": f"^{face_id[:20]}"}})
+                            if face_doc:
+                                event_id = face_doc.get('event_id')
+                                logger.info(f"Found face via regex with event_id: {event_id}")
+                        
+                        # Query events collection for video_id
+                        if event_id:
+                            from bson.objectid import ObjectId
+                            events_collection = db_video_service.db_manager.db.event
+                            # Try _id first (ObjectId), fallback to event_id field
+                            try:
+                                event_doc = events_collection.find_one({"_id": ObjectId(event_id)})
+                            except:
+                                event_doc = events_collection.find_one({"event_id": event_id})
+                            
+                            if event_doc:
+                                video_id = event_doc.get('video_id')
+                                # Get actual timestamps from event
+                                start_timestamp = event_doc.get('start_timestamp_ms', 0) / 1000.0
+                                end_timestamp = event_doc.get('end_timestamp_ms', 0) / 1000.0
+                                logger.info(f"Found event with video_id: {video_id}, timestamps: {start_timestamp}-{end_timestamp}")
+                            else:
+                                logger.warning(f"No event document found for event_id: {event_id}")
+                        else:
+                            logger.info("No event_id found, clip will not be available")
+                    except Exception as e:
+                        logger.warning(f"Could not fetch event/video info for face {face_id}: {e}")
+                        import traceback
+                        traceback.print_exc()
+                
+                # Get face detections for this face_id to enable annotation
+                face_detections_count = 0
+                if DATABASE_ENABLED and face_id:
+                    try:
+                        faces_collection = db_video_service.db_manager.db.detected_faces
+                        if video_id:
+                            face_detections_count = faces_collection.count_documents({
+                                "face_id": face_id,
+                                "video_id": video_id
+                            })
+                        elif event_id:
+                            face_detections_count = faces_collection.count_documents({
+                                "face_id": face_id,
+                                "event_id": event_id
+                            })
+                    except Exception as e:
+                        logger.warning(f"Could not count face detections: {e}")
+                
+                # Build thumbnail URL - ensure face image exists
+                thumbnail_url = None
+                if result.get('face_image_path') and os.path.exists(result['face_image_path']):
+                    thumbnail_url = f"/api/face-image/{face_id}"
+                    logger.info(f"✅ Face image exists at {result['face_image_path']}, thumbnail URL: {thumbnail_url}")
+                else:
+                    logger.warning(f"❌ Face image not found at {result.get('face_image_path')}")
+                
+                # Determine if clip is available
+                clip_is_available = event_id is not None and video_id is not None
+                logger.info(f"📹 Clip status for {face_id}: available={clip_is_available} (event_id={event_id}, video_id={video_id})")
+                
+                formatted_result = {
+                    'id': face_id,
+                    'face_id': face_id,
+                    'event_id': event_id,
+                    'video_id': video_id,
+                    'person_name': result['person_name'],
+                    'confidence': round(result['similarity_score'], 3),
+                    'person_confidence': round(result['person_confidence'], 3) if result.get('person_confidence') else 0.0,
+                    'timestamp': result['timestamp'],
+                    'start_timestamp': start_timestamp,
+                    'end_timestamp': end_timestamp,
+                    'event_context': result['event_context'],
+                    'detection_context': result['detection_context'],
+                    'thumbnail': thumbnail_url,
+                    'description': f"{result['person_name']} detected in {result['detection_context'].lower()}",
+                    'zone': 'Security Zone',  # Placeholder
+                    'has_face_image': thumbnail_url is not None,
+                    'clip_available': event_id is not None and video_id is not None,
+                    'annotated_clip_available': face_detections_count > 0 and event_id is not None and video_id is not None,
+                    'annotated_clip_url': (
+                        f"/api/event/clip/{event_id}/annotated?face_id={face_id}&person_name={urllib.parse.quote(result['person_name'])}" 
+                        if (event_id and face_id and result.get('person_name'))
+                        else (f"/api/event/clip/{event_id}/annotated?face_id={face_id}" if (event_id and face_id) else None)
+                    )
+                }
+                formatted_results.append(formatted_result)
+            
+            # Get system statistics
+            stats = face_recognizer.get_detection_stats()
+            
+            response_data = {
+                'success': True,
+                'results': formatted_results,
+                'total_matches': len(formatted_results),
+                'search_parameters': {
+                    'similarity_threshold': threshold,
+                    'max_results': max_results
+                },
+                'system_stats': {
+                    'total_faces_in_database': stats.get('total_faces_in_database', 0),
+                    'implementation_mode': stats.get('implementation_mode', 'unknown')
+                },
+                'message': f"Found {len(formatted_results)} matches with similarity >= {threshold}"
+            }
+            
+            return jsonify(response_data)
+            
+        finally:
+            # Clean up temporary file
+            if os.path.exists(temp_path):
+                os.remove(temp_path)
+                
+    except Exception as e:
+        logger.error(f"Error in person image search: {e}")
+        return jsonify({'error': str(e)}), 500
+
+
+# ===== VIDEO CAPTIONING ENDPOINTS =====
+
+@app.route('/api/captions/search', methods=['POST'])
+# @require_feature('nlp_search')  # Pro plan feature - Temporarily disabled for development
+def search_captions():
+    """Search video captions using semantic similarity. Requires: Pro plan (nlp_search feature)"""
+    try:
+        data = request.get_json()
+        query = data.get('query')
+        video_id = data.get('video_id')  # Optional filter
+        top_k = data.get('top_k', 10)
+        
+        if not query:
+            return jsonify({'error': 'Query is required'}), 400
+        
+        # Import and initialize captioning integrator
+        from video_captioning_integrator import VideoCaptioningIntegrator
+        from config import VideoProcessingConfig
+        
+        config = VideoProcessingConfig(enable_video_captioning=True)
+        captioning_integrator = VideoCaptioningIntegrator(config)
+        
+        if not captioning_integrator.enabled:
+            return jsonify({'error': 'Video captioning is not enabled'}), 503
+        
+        # Search captions
+        results = captioning_integrator.search_captions(query, video_id=video_id, top_k=top_k)
+        
+        return jsonify({
+            'success': True,
+            'query': query,
+            'total_results': len(results),
+            'results': results
+        })
+        
+    except Exception as e:
+        logger.error(f"Error searching captions: {e}")
+        return jsonify({'error': str(e)}), 500
+
+
+@app.route('/api/captions/video/<video_id>', methods=['GET'])
+def get_video_captions(video_id):
+    """Get all captions for a specific video"""
+    try:
+        # Import and initialize captioning integrator
+        from video_captioning_integrator import VideoCaptioningIntegrator
+        from config import VideoProcessingConfig
+        
+        config = VideoProcessingConfig(enable_video_captioning=True)
+        captioning_integrator = VideoCaptioningIntegrator(config)
+        
+        if not captioning_integrator.enabled:
+            return jsonify({'error': 'Video captioning is not enabled'}), 503
+        
+        # Get captions for video
+        captions = captioning_integrator.get_video_captions(video_id)
+        
+        return jsonify({
+            'success': True,
+            'video_id': video_id,
+            'total_captions': len(captions),
+            'captions': captions
+        })
+        
+    except Exception as e:
+        logger.error(f"Error getting video captions: {e}")
+        return jsonify({'error': str(e)}), 500
+
+
+@app.route('/api/captions/statistics', methods=['GET'])
+def get_captioning_statistics():
+    """Get video captioning service statistics"""
+    try:
+        # Import and initialize captioning integrator
+        from video_captioning_integrator import VideoCaptioningIntegrator
+        from config import VideoProcessingConfig
+        
+        config = VideoProcessingConfig(enable_video_captioning=True)
+        captioning_integrator = VideoCaptioningIntegrator(config)
+        
+        if not captioning_integrator.enabled:
+            return jsonify({'error': 'Video captioning is not enabled'}), 503
+        
+        # Get statistics
+        stats = captioning_integrator.get_statistics()
+        
+        return jsonify({
+            'success': True,
+            'statistics': stats
+        })
+        
+    except Exception as e:
+        logger.error(f"Error getting captioning statistics: {e}")
+        return jsonify({
+            'success': False,
+            'error': f'Internal server error: {str(e)}'
+        }), 500
+
+@app.route('/api/event/clip/<event_id>/annotated', methods=['GET'])
+def get_annotated_event_clip(event_id):
+    """
+    Generate and serve annotated event clip with face bounding boxes for a specific person
+    Query params: face_id (required), person_name (optional)
+    """
+    try:
+        if not DATABASE_ENABLED:
+            return jsonify({'error': 'Database not enabled'}), 500
+        
+        face_id = request.args.get('face_id')
+        person_name = request.args.get('person_name')
+        
+        if not face_id:
+            return jsonify({'error': 'face_id parameter is required'}), 400
+        
+        # Get event from database (using singular 'event' collection)
+        from bson.objectid import ObjectId
+        events_collection = db_video_service.db_manager.db.event
+        # Try _id first (ObjectId), fallback to event_id field
+        try:
+            event = events_collection.find_one({"_id": ObjectId(event_id)})
+        except:
+            event = events_collection.find_one({"event_id": event_id})
+        
+        if not event:
+            return jsonify({'error': 'Event not found'}), 404
+        
+        video_id = event.get('video_id')
+        start_timestamp_ms = int(event.get('start_timestamp_ms', 0))
+        end_timestamp_ms = int(event.get('end_timestamp_ms', 0))
+        
+        start_time = start_timestamp_ms / 1000.0
+        end_time = end_timestamp_ms / 1000.0
+        
+        # Get all face detections for this face_id in this video
+        faces_collection = db_video_service.db_manager.db.detected_faces
+        
+        # Try to get face detections with video_id first
+        face_detections = list(faces_collection.find({
+            "face_id": face_id,
+            "video_id": video_id
+        }))
+        
+        if not face_detections:
+            # Fallback: try to get from event_id
+            face_detections = list(faces_collection.find({
+                "face_id": face_id,
+                "event_id": event_id
+            }))
+        
+        if not face_detections:
+            # Last resort: get all detections for this face_id
+            face_detections = list(faces_collection.find({
+                "face_id": face_id
+            }))
+        
+        logger.info(f"Found {len(face_detections)} face detections for face_id {face_id}")
+        
+        # Get video path (same logic as get_event_clip)
+        video_record = db_video_service.video_repo.get_video_by_id(video_id)
+        if not video_record:
+            return jsonify({'error': 'Video not found'}), 404
+        
+        video_path = None
+        minio_key = video_record.get('minio_object_key')
+        if minio_key:
+            try:
+                import tempfile
+                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+                temp_path = temp_file.name
+                temp_file.close()
+                
+                db_video_service.video_repo.minio.fget_object(
+                    video_record.get('minio_bucket', db_video_service.video_repo.video_bucket),
+                    minio_key,
+                    temp_path
+                )
+                video_path = temp_path
+            except Exception as e:
+                logger.warning(f"Could not get video from MinIO: {e}")
+        
+        if not video_path:
+            # Try local compressed video
+            local_compressed = os.path.join('video_processing_outputs', 'compressed', video_id, 'video.mp4')
+            logger.info(f"Checking local compressed path: {os.path.abspath(local_compressed)}")
+            if os.path.exists(local_compressed):
+                video_path = local_compressed
+                logger.info(f"✅ Using local compressed video: {local_compressed}")
+            else:
+                logger.warning(f"❌ Local compressed video not found at: {os.path.abspath(local_compressed)}")
+                # Try database file_path
+                file_path = video_record.get('file_path')
+                if file_path and os.path.exists(file_path):
+                    video_path = file_path
+                    logger.info(f"Using file_path: {file_path}")
+                else:
+                    # Try uploads folder
+                    uploads_path = os.path.join(UPLOAD_FOLDER, video_id, 'video.mp4')
+                    if os.path.exists(uploads_path):
+                        video_path = uploads_path
+                        logger.info(f"Using uploads path: {uploads_path}")
+        
+        if not video_path or not os.path.exists(video_path):
+            logger.error(f"❌ Video file not found for video_id: {video_id}")
+            return jsonify({'error': 'Video file not found'}), 404
+        
+        # Convert face detections to list of dicts
+        from database.models import convert_objectid_to_string
+        face_detections_list = [convert_objectid_to_string(det) for det in face_detections]
+        
+        # Generate annotated clip
+        from event_clip_generator import EventClipGenerator
+        clip_generator = EventClipGenerator()
+        clip_path = clip_generator.extract_annotated_clip(
+            video_path, start_time, end_time, face_id, face_detections_list, video_id, person_name
+        )
+        
+        if not clip_path or not os.path.exists(clip_path):
+            return jsonify({'error': 'Failed to generate annotated clip'}), 500
+        
+        # Serve the clip
+        return send_file(clip_path, mimetype='video/mp4')
+        
+    except Exception as e:
+        logger.error(f"Error generating annotated event clip: {e}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/event/clip/<event_id>', methods=['GET'])
+def get_event_clip(event_id):
+    """
+    Generate and serve event clip for viewing/playing
+    """
+    try:
+        if not DATABASE_ENABLED:
+            return jsonify({'error': 'Database not enabled'}), 500
+        
+        # Get event from database (using singular 'event' collection)
+        from bson.objectid import ObjectId
+        events_collection = db_video_service.db_manager.db.event
+        # Try _id first (ObjectId), fallback to event_id field
+        try:
+            event = events_collection.find_one({"_id": ObjectId(event_id)})
+        except:
+            event = events_collection.find_one({"event_id": event_id})
+        
+        if not event:
+            return jsonify({'error': 'Event not found'}), 404
+        
+        video_id = event.get('video_id')
+        start_timestamp_ms = int(event.get('start_timestamp_ms', 0))
+        end_timestamp_ms = int(event.get('end_timestamp_ms', 0))
+        
+        start_time = start_timestamp_ms / 1000.0
+        end_time = end_timestamp_ms / 1000.0
+        
+        # Get video path
+        video_record = db_video_service.video_repo.get_video_by_id(video_id)
+        if not video_record:
+            return jsonify({'error': 'Video not found'}), 404
+        
+        # Try to get video path from MinIO or local storage
+        video_path = None
+        
+        # Try MinIO first
+        minio_key = video_record.get('minio_object_key')
+        if minio_key:
+            try:
+                # Download from MinIO to temp file
+                import tempfile
+                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+                temp_path = temp_file.name
+                temp_file.close()
+                
+                db_video_service.video_repo.minio.fget_object(
+                    video_record.get('minio_bucket', db_video_service.video_repo.video_bucket),
+                    minio_key,
+                    temp_path
+                )
+                video_path = temp_path
+            except Exception as e:
+                logger.warning(f"Could not get video from MinIO: {e}")
+        
+        # Fallback to local path
+        if not video_path:
+            # Try local compressed video
+            local_compressed = os.path.join('video_processing_outputs', 'compressed', video_id, 'video.mp4')
+            if os.path.exists(local_compressed):
+                video_path = local_compressed
+                logger.info(f"Using local compressed video: {local_compressed}")
+            else:
+                # Try database file_path
+                file_path = video_record.get('file_path')
+                if file_path and os.path.exists(file_path):
+                    video_path = file_path
+                else:
+                    # Try uploads folder
+                    uploads_path = os.path.join(UPLOAD_FOLDER, video_id, 'video.mp4')
+                    if os.path.exists(uploads_path):
+                        video_path = uploads_path
+        
+        if not video_path or not os.path.exists(video_path):
+            return jsonify({'error': 'Video file not found'}), 404
+        
+        # Generate clip
+        from event_clip_generator import EventClipGenerator
+        clip_generator = EventClipGenerator()
+        clip_path = clip_generator.extract_clip(
+            video_path, start_time, end_time, event_id, video_id
+        )
+        
+        if not clip_path or not os.path.exists(clip_path):
+            return jsonify({'error': 'Failed to generate clip'}), 500
+        
+        # Serve the clip
+        return send_file(clip_path, mimetype='video/mp4')
+        
+    except Exception as e:
+        logger.error(f"Error generating event clip: {e}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/event/clip/<event_id>/download', methods=['GET'])
+def download_event_clip(event_id):
+    """
+    Download event clip
+    """
+    try:
+        if not DATABASE_ENABLED:
+            return jsonify({'error': 'Database not enabled'}), 500
+        
+        # Get event from database (using singular 'event' collection)
+        from bson.objectid import ObjectId
+        events_collection = db_video_service.db_manager.db.event
+        # Try _id first (ObjectId), fallback to event_id field
+        try:
+            event = events_collection.find_one({"_id": ObjectId(event_id)})
+        except:
+            event = events_collection.find_one({"event_id": event_id})
+        
+        if not event:
+            return jsonify({'error': 'Event not found'}), 404
+        
+        video_id = event.get('video_id')
+        start_timestamp_ms = int(event.get('start_timestamp_ms', 0))
+        end_timestamp_ms = int(event.get('end_timestamp_ms', 0))
+        
+        start_time = start_timestamp_ms / 1000.0
+        end_time = end_timestamp_ms / 1000.0
+        
+        # Get video path (same logic as get_event_clip)
+        video_record = db_video_service.video_repo.get_video_by_id(video_id)
+        if not video_record:
+            return jsonify({'error': 'Video not found'}), 404
+        
+        video_path = None
+        minio_key = video_record.get('minio_object_key')
+        if minio_key:
+            try:
+                import tempfile
+                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
+                temp_path = temp_file.name
+                temp_file.close()
+                
+                db_video_service.video_repo.minio.fget_object(
+                    video_record.get('minio_bucket', db_video_service.video_repo.video_bucket),
+                    minio_key,
+                    temp_path
+                )
+                video_path = temp_path
+            except Exception as e:
+                logger.warning(f"Could not get video from MinIO: {e}")
+        
+        if not video_path:
+            # Try local compressed video
+            local_compressed = os.path.join('video_processing_outputs', 'compressed', video_id, 'video.mp4')
+            if os.path.exists(local_compressed):
+                video_path = local_compressed
+                logger.info(f"Using local compressed video: {local_compressed}")
+            else:
+                # Try database file_path
+                file_path = video_record.get('file_path')
+                if file_path and os.path.exists(file_path):
+                    video_path = file_path
+                else:
+                    # Try uploads folder
+                    uploads_path = os.path.join(UPLOAD_FOLDER, video_id, 'video.mp4')
+                    if os.path.exists(uploads_path):
+                        video_path = uploads_path
+        
+        if not video_path or not os.path.exists(video_path):
+            return jsonify({'error': 'Video file not found'}), 404
+        
+        # Generate clip
+        from event_clip_generator import EventClipGenerator
+        clip_generator = EventClipGenerator()
+        clip_path = clip_generator.extract_clip(
+            video_path, start_time, end_time, event_id, video_id
+        )
+        
+        if not clip_path or not os.path.exists(clip_path):
+            return jsonify({'error': 'Failed to generate clip'}), 500
+        
+        # Serve as download
+        return send_file(clip_path, mimetype='video/mp4', as_attachment=True, 
+                        download_name=f"event_{event_id}_clip.mp4")
+        
+    except Exception as e:
+        logger.error(f"Error downloading event clip: {e}")
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/face-image/<face_id>')
+def get_face_image(face_id):
+    """
+    Serve face images for the search results.
+    """
+    try:
+        # Construct face image path using absolute path
+        # BASE_DIR is project root, so model/faces should be at project root
+        # Try project root first
+        face_image_path = os.path.join(BASE_DIR, 'model', 'faces', f"{face_id}.jpg")
+        if not os.path.exists(face_image_path):
+            # Fallback to backend/model/faces (if model is in backend directory)
+            backend_dir = os.path.dirname(os.path.abspath(__file__))
+            face_image_path = os.path.join(backend_dir, 'model', 'faces', f"{face_id}.jpg")
+        if not os.path.exists(face_image_path):
+            # Final fallback to relative path from current working directory
+            face_image_path = os.path.join('model', 'faces', f"{face_id}.jpg")
+        
+        if not os.path.exists(face_image_path):
+            # Return a placeholder or 404
+            return jsonify({'error': 'Face image not found'}), 404
+        
+        return send_file(face_image_path, mimetype='image/jpeg')
+        
+    except Exception as e:
+        logger.error(f"Error serving face image {face_id}: {e}")
+        return jsonify({'error': 'Error serving face image'}), 500
+
+@app.route("/api/search/captions", methods=["POST"])
+# @require_feature('nlp_search')  # Pro plan feature - Temporarily disabled for development
+def search_nlp_captions():
+    """Search captions using sentence-transformer embeddings + cosine similarity.
+    
+    Searches both:
+      - event_description: behavior-level captions (e.g., "Accident behavior detected")
+      - video_captions: frame-level BLIP captions (e.g., "a car is parked in a parking lot")
+    
+    Requires: Pro plan (nlp_search feature)
+    """
+    try:
+        if not CAPTION_SEARCH_AVAILABLE:
+            return jsonify({
+                "error": "Caption search not available",
+                "message": "Caption search module not installed or not available"
+            }), 503
+        
+        data = request.json or {}
+        query_text = data.get("query", "").strip()
+        top_k = data.get("top_k", 10)
+        min_score = data.get("min_score", 0.0)
+        
+        if not query_text:
+            return jsonify({"error": "query is required"}), 400
+        
+        # Use query_retrieval.py logic for consistent results
+        try:
+            from nlp_search.query_retreival import retrieve_by_threshold
+            
+            # Connect to MongoDB using existing database service
+            if DATABASE_ENABLED and db_video_service and db_video_service.db_manager:
+                db = db_video_service.db_manager.db
+            else:
+                return jsonify({
+                    "error": "Database not available",
+                    "message": "Cannot connect to MongoDB for search"
+                }), 503
+            
+            # Use a lower default threshold (0.3) to catch semantic matches
+            # e.g., "car" matching "a car is parked in a parking lot" at ~0.45
+            threshold = max(min_score, 0.3) if min_score > 0 else 0.3
+            
+            # Perform search using query_retrieval logic (searches both collections)
+            results = retrieve_by_threshold(db, query_text, threshold=threshold)
+            
+            # Limit results to top_k
+            if top_k and len(results) > top_k:
+                results = results[:top_k]
+                
+        except Exception as e:
+            logger.error(f"Error using query_retrieval search: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return jsonify({
+                "error": "Search functionality unavailable",
+                "message": f"NLP search module error: {str(e)}"
+            }), 503
+        
+        # Format results for frontend
+        formatted_results = []
+        
+        for result in results:
+            source = result.get("source", "event_description")
+            video_ref = result.get("video_reference") or {}
+            image_url = None
+            video_id = result.get("video_id")
+            
+            if source == "video_captions":
+                # Build keyframe URL using the MinIO proxy path.
+                # The keyframe_repository saves at: {video_id}/frame_XXXXXX.jpg
+                # The Next.js /api/minio/image/[bucket]/[...path] proxy already works.
+                frame_id = result.get("frame_id")
+                if not frame_id:
+                    caption_id = result.get("description_id")
+                    if caption_id:
+                        vc_doc = db.video_captions.find_one(
+                            {"caption_id": caption_id}, {"frame_id": 1}
+                        )
+                        if vc_doc:
+                            frame_id = vc_doc.get("frame_id")
+                
+                if video_id and frame_id:
+                    # Use the MinIO proxy URL pattern (works through Next.js)
+                    image_url = f"/api/minio/image/detectifai-keyframes/{video_id}/{frame_id}.jpg"
+                        
+            elif video_ref and isinstance(video_ref, dict):
+                object_name = video_ref.get("object_name", "")
+                bucket = video_ref.get("bucket", "nlp-images")
+                if object_name and bucket:
+                    image_url = f"/api/minio/image/{bucket}/{object_name}"
+            
+            formatted_result = {
+                "id": result.get("description_id"),
+                "event_id": result.get("event_id"),
+                "video_id": video_id,
+                "description": result.get("caption", ""),
+                "caption": result.get("caption", ""),
+                "confidence": result.get("similarity", 0.0),
+                "similarity_score": result.get("similarity", 0.0),
+                "thumbnail": image_url,
+                "video_reference": video_ref if video_ref else None,
+                "start_timestamp_ms": result.get("start_timestamp_ms"),
+                "end_timestamp_ms": result.get("end_timestamp_ms"),
+                "timestamp": result.get("start_timestamp_ms"),
+                "zone": "N/A",
+                "source": source
+            }
+            formatted_results.append(formatted_result)
+        
+        return jsonify({
+            "query": query_text,
+            "results": formatted_results,
+            "total_results": len(formatted_results),
+            "threshold_used": threshold if 'threshold' in locals() else min_score
+        })
+        
+    except Exception as e:
+        logger.error(f"Error in caption search: {e}")
+        return jsonify({"error": f"Search failed: {str(e)}"}), 500
+
+# ====== LIVE STREAM ENDPOINTS ======
+
+@app.route('/api/live/start', methods=['POST'])
+def start_live_stream():
+    """Start live stream processing from webcam"""
+    try:
+        data = request.json or {}
+        camera_id = data.get('camera_id', 'webcam_01')
+        camera_index = data.get('camera_index', 0)  # 0 = default webcam
+        
+        from live_stream_processor import get_live_processor
+        
+        processor = get_live_processor(camera_id, get_security_focused_config())
+        
+        if processor.is_processing:
+            return jsonify({
+                'success': False,
+                'error': f'Live stream already running for camera {camera_id}'
+            }), 400
+        
+        # Just mark as ready - actual processing happens in feed endpoint
+        processor.camera_index = camera_index
+        
+        return jsonify({
+            'success': True,
+            'camera_id': camera_id,
+            'message': 'Live stream ready',
+            'video_feed_url': f'/api/live/feed/{camera_id}'
+        })
+        
+    except Exception as e:
+        logger.error(f"Error starting live stream: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+@app.route('/api/live/feed/<camera_id>')
+def live_video_feed(camera_id):
+    """Video feed endpoint for live stream - streams frames directly"""
+    logger.info(f"🎬 ===== VIDEO FEED REQUESTED ===== camera_id: {camera_id}")
+    try:
+        from live_stream_processor import get_live_processor
+        
+        processor = get_live_processor(camera_id)
+        camera_index = getattr(processor, 'camera_index', 0)
+        
+        logger.info(f"📹 Video feed requested for camera {camera_id} (index {camera_index})")
+        logger.info(f"📹 Processor is_processing: {processor.is_processing}")
+        logger.info(f"📹 Processor camera_index attribute: {getattr(processor, 'camera_index', 'NOT SET')}")
+        
+        # The generate_frames generator will handle the camera and processing
+        # This runs in the same thread as the Flask response
+        def generate():
+            frame_count = 0
+            try:
+                logger.info(f"🎬 Starting frame generation for {camera_id}")
+                for frame_data in processor.generate_frames(camera_index):
+                    frame_count += 1
+                    if frame_count % 30 == 0:  # Log every 30 frames
+                        logger.info(f"📹 Streaming frame {frame_count} for {camera_id}")
+                    yield frame_data
+            except Exception as gen_error:
+                logger.error(f"❌ Error in frame generator: {gen_error}")
+                import traceback
+                logger.error(traceback.format_exc())
+                # Yield an error frame
+                try:
+                    error_frame = processor._create_error_frame(f"Stream error: {str(gen_error)}")
+                    import cv2
+                    ret, buffer = cv2.imencode('.jpg', error_frame)
+                    if ret:
+                        yield (b'--frame\r\n'
+                               b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')
+                except Exception as frame_error:
+                    logger.error(f"❌ Could not create error frame: {frame_error}")
+        
+        return Response(
+            generate(),
+            mimetype='multipart/x-mixed-replace; boundary=frame',
+            headers={
+                'Cache-Control': 'no-cache, no-store, must-revalidate',
+                'Pragma': 'no-cache',
+                'Expires': '0',
+                'X-Accel-Buffering': 'no',  # Disable buffering for nginx
+                'Connection': 'keep-alive',
+                'Access-Control-Allow-Origin': '*',  # CORS header
+                'Access-Control-Allow-Methods': 'GET',
+                'Access-Control-Allow-Headers': 'Content-Type'
+            }
+        )
+        
+    except Exception as e:
+        logger.error(f"❌ Error in video feed endpoint: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        return jsonify({'error': str(e)}), 500
+
+@app.route('/api/live/stop/<camera_id>', methods=['POST'])
+def stop_live_stream(camera_id):
+    """Stop live stream processing"""
+    try:
+        from live_stream_processor import stop_live_processor
+        
+        stop_live_processor(camera_id)
+        
+        return jsonify({
+            'success': True,
+            'message': f'Live stream stopped for camera {camera_id}'
+        })
+        
+    except Exception as e:
+        logger.error(f"Error stopping live stream: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+@app.route('/api/live/stats/<camera_id>', methods=['GET'])
+def get_live_stats(camera_id):
+    """Get live stream processing statistics"""
+    try:
+        from live_stream_processor import get_live_processor
+        
+        processor = get_live_processor(camera_id)
+        stats = processor.get_stats()
+        
+        return jsonify({
+            'success': True,
+            'camera_id': camera_id,
+            'stats': stats
+        })
+        
+    except Exception as e:
+        logger.error(f"Error getting live stats: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+
+@app.route('/api/live/test-camera', methods=['GET'])
+def test_camera():
+    """Test if camera is available - helps debug camera issues"""
+    try:
+        import cv2
+        
+        camera_index = int(request.args.get('index', 0))
+        
+        logger.info(f"🔍 Testing camera {camera_index}...")
+        cap = cv2.VideoCapture(camera_index)
+        
+        if not cap.isOpened():
+            return jsonify({
+                'success': False,
+                'available': False,
+                'camera_index': camera_index,
+                'message': f'Camera {camera_index} could not be opened. Make sure the camera is connected and not in use by another application.'
+            }), 200
+        
+        # Try to read a frame
+        ret, frame = cap.read()
+        cap.release()
+        
+        if ret and frame is not None:
+            return jsonify({
+                'success': True,
+                'available': True,
+                'camera_index': camera_index,
+                'message': f'Camera {camera_index} is working correctly',
+                'frame_size': f'{frame.shape[1]}x{frame.shape[0]}',
+                'frame_channels': frame.shape[2] if len(frame.shape) > 2 else 1
+            })
+        else:
+            return jsonify({
+                'success': False,
+                'available': False,
+                'camera_index': camera_index,
+                'message': f'Camera {camera_index} opened but cannot read frames. The camera may be in use or not functioning properly.'
+            }), 200
+        
+    except Exception as e:
+        logger.error(f"Error testing camera: {e}")
+        import traceback
+        logger.error(traceback.format_exc())
+        return jsonify({
+            'success': False,
+            'available': False,
+            'error': str(e),
+            'message': f'Error testing camera: {str(e)}'
+        }), 500
+
+# ========================================
+# Register Subscription Routes Blueprint
+# ========================================
+try:
+    from subscription_routes import subscription_bp
+    app.register_blueprint(subscription_bp)
+    logger.info("✅ Subscription routes registered successfully")
+except Exception as e:
+    logger.error(f"❌ Failed to register subscription routes: {e}")
+
+# ========================================
+# Register Real-Time Alert Routes Blueprint
+# ========================================
+try:
+    from alert_routes import alert_bp
+    app.register_blueprint(alert_bp)
+    logger.info("✅ Real-time alert routes registered successfully")
+except Exception as e:
+    logger.error(f"❌ Failed to register alert routes: {e}")
+
+if __name__ == '__main__':
+    _port = int(os.environ.get('PORT', 7860))
+    _debug = os.environ.get('FLASK_DEBUG', 'false').lower() == 'true'
+    logger.info(f"Starting DetectifAI Flask API server on port {_port}...")
+    app.run(host='0.0.0.0', port=_port, debug=_debug)
\ No newline at end of file
diff --git a/behavior_analysis/action_recognition.py b/behavior_analysis/action_recognition.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c567c103372a93dd5e6fa1d366e2a9212d7c2e4
--- /dev/null
+++ b/behavior_analysis/action_recognition.py
@@ -0,0 +1,381 @@
+# ============================================================
+# FULLY FIXED ACTION RECOGNITION PIPELINE
+# Supports:
+#   - fight_detection.pt (3D ResNet18, state_dict)
+#   - road_accident.pt   (3D ResNet18, state_dict)
+#   - wallclimb.pt       (YOLO, Ultralytics)
+# ============================================================
+
+from dataclasses import dataclass, asdict
+import multiprocessing as mp
+import torch
+import cv2
+import numpy as np
+import os
+import time
+import json
+import logging
+from typing import List, Optional, Dict, Any
+from torchvision.models.video import r3d_18
+import torch.nn as nn
+
+# --- YOLO + PyTorch 2.6 compatibility ---
+from ultralytics import YOLO
+import ultralytics
+torch.serialization.add_safe_globals([ultralytics.nn.tasks.DetectionModel])
+
+# --- Logging ---
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+# ============================================================
+# FIXED MODEL PATHS
+# ============================================================
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+
+MODEL_PATHS = {
+    "fight_detection":   os.path.join(BASE_DIR, "fight_detection.pt"),
+    "road_accident":     os.path.join(BASE_DIR, "accident_detection.pt"),
+    "wallclimb":         os.path.join(BASE_DIR, "wallclimb.pt"),
+}
+
+# Define which models are 3D-ResNet (run separately) vs YOLO
+RESNET_MODELS = {"fight_detection", "road_accident"}
+YOLO_MODELS = {"wallclimb"}
+
+# ============================================================
+#  Dataclasses
+# ============================================================
+@dataclass
+class ActionPrediction:
+    timestamp: float
+    frame_index: int
+    label: str
+    confidence: float
+
+
+# ============================================================
+# MODEL LOADER (YOLO or 3D-ResNet)
+# ============================================================
+def load_model(model_path: str, device: torch.device):
+
+    name = os.path.basename(model_path).lower()
+
+    # -------- YOLO MODEL (wallclimb) --------
+    if "wall" in name or "yolo" in name:
+        logger.info(f"Loading YOLO model: {model_path}")
+        return YOLO(model_path)
+
+    # -------- TRY TorchScript --------
+    try:
+        model = torch.jit.load(model_path, map_location=device)
+        logger.info(f"Loaded TorchScript model")
+        model.eval()
+        return model
+    except:
+        pass
+
+    # -------- 3D-ResNet --------
+    try:
+        ckpt = torch.load(model_path, map_location=device)
+
+        if isinstance(ckpt, dict):
+            logger.info(f"Loading 3D-ResNet model: {model_path}")
+
+            model = r3d_18(weights=None)
+            model.fc = nn.Linear(512, 2)
+
+            state = ckpt.get("state_dict", ckpt)
+            model.load_state_dict(state)
+
+            model.to(device)
+            model.eval()
+            return model
+    except Exception as e:
+        logger.error(f"3D-ResNet load failed: {e}")
+
+    raise RuntimeError(f"Unsupported model format: {model_path}")
+
+
+# ============================================================
+# FRAME PREPROCESSING FOR 3D-ResNet
+# ============================================================
+def preprocess_clip(frames: List[np.ndarray], device: torch.device, target_size=None):
+    """
+    frames = list of 16 RGB frames
+    output: tensor (1, 3, 16, H, W)
+    """
+    processed = []
+
+    # default target size used in your training/preprocessing
+    if not target_size:
+        target_size = (112, 112)
+
+    for f in frames:
+        img = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
+
+        if target_size:
+            img = cv2.resize(img, (target_size[1], target_size[0]))
+
+        img = img / 255.0
+        img = img.transpose(2, 0, 1)
+        processed.append(img)
+
+    clip = np.stack(processed, axis=1)
+    tensor = torch.from_numpy(clip).float().unsqueeze(0).to(device)
+    return tensor
+
+
+# ============================================================
+# INTERPRET MODEL OUTPUT
+# ============================================================
+# Map class indices to action labels
+ACTION_LABELS = {
+    0: "fighting",
+    1: "accident",
+    2: "climbing"
+}
+
+# Per-action confidence thresholds
+ACTION_CONFIDENCE_THRESHOLDS = {
+    "fighting": 0.5,
+    "accident": 0.65,
+    "climbing": 0.8
+}
+
+def interpret_prediction(model, output, model_name, confidence_threshold=None):
+    """
+    Interpret model output and return one of three actions: "fighting", "accident", or "climbing".
+    If confidence is below 0.5, suppress the prediction and return ("no_action", 0.0).
+    
+    Model-specific handling:
+    - fight_detection: returns "fighting" if class 1, "no_action" for class 0
+    - road_accident: returns "accident" if class 1, "no_action" for class 0
+    - wallclimb (YOLO): returns "climbing" for class 2
+    """
+    # -------- YOLO (wallclimb) --------
+    if hasattr(model, "predict") and isinstance(output, list):
+        logger.info(f"🔍 YOLO prediction for {model_name}")
+        boxes = output[0].boxes
+        if boxes is None or len(boxes) == 0:
+            logger.info("🚫 No boxes detected by YOLO")
+            return ("no_action", 0.0)
+
+        best = boxes[0]
+        cls_idx = int(best.cls)
+        conf = float(best.conf)
+        
+        # YOLO returns climbing detections
+        label = "climbing" if cls_idx == 0 else "no_action"
+        
+        # Use per-action threshold or provided threshold
+        threshold = confidence_threshold if confidence_threshold is not None else ACTION_CONFIDENCE_THRESHOLDS.get(label, 0.5)
+        logger.info(f"🎯 YOLO detection: class_idx={cls_idx}, confidence={conf:.3f}, threshold={threshold}")
+        
+        # Suppress if confidence < threshold
+        if conf < threshold:
+            logger.info(f"🚫 Confidence {conf:.3f} below threshold {threshold}")
+            return ("no_action", 0.0)
+        
+        logger.info(f"✅ YOLO final result: {label} (conf: {conf:.3f})")
+        return (label, conf)
+
+    # -------- 3D-ResNet (fight_detection or road_accident) --------
+    if isinstance(output, torch.Tensor):
+        logger.info(f"🔍 3D-ResNet prediction for {model_name}")
+        probs = torch.softmax(output, dim=1)[0]
+        cls_idx = int(torch.argmax(probs).item())
+        conf = float(probs[cls_idx])
+        
+        logger.info(f"📊 Raw probabilities: {probs.tolist()}")
+        
+        # Model-specific mapping (class 0 = negative, class 1 = positive)
+        if "fight" in model_name.lower():
+            label = "fighting" if cls_idx == 1 else "no_action"
+            logger.info(f"🥊 Fight detection: class {cls_idx} -> {label}")
+        elif "accident" in model_name.lower() or "road" in model_name.lower():
+            # match user's naming and capitalization for saved frames
+            label = "Accident" if cls_idx == 1 else "no_action"
+        else:
+            label = "no_action"
+            logger.info(f"❓ Unknown model type, defaulting to no_action")
+        
+        # Use per-action threshold or provided threshold
+        threshold = confidence_threshold if confidence_threshold is not None else ACTION_CONFIDENCE_THRESHOLDS.get(label.lower(), 0.5)
+        logger.info(f"🎯 Predicted class: {cls_idx}, confidence: {conf:.3f}, threshold: {threshold}")
+        
+        # Suppress if confidence < threshold
+        if conf < threshold:
+            logger.info(f"🚫 Confidence {conf:.3f} below threshold {threshold}")
+            return ("no_action", 0.0)
+        
+        logger.info(f"✅ 3D-ResNet final result: {label} (conf: {conf:.3f})")
+        return (label, conf)
+    
+
+    return ("no_action", 0.0)
+
+
+# ============================================================
+# VIDEO PROCESSING
+# ============================================================
+def process_video_with_model(
+        video_path,
+        model_path,
+        output_dir,
+        model_name=None,
+        use_gpu=True,
+        frame_skip=1,
+        target_size=None,
+        annotate=True):
+
+    device = torch.device("cuda" if (use_gpu and torch.cuda.is_available()) else "cpu")
+
+    model_name = model_name or os.path.splitext(os.path.basename(model_path))[0]
+    logger.info(f"[{model_name}] Loading model...")
+
+    model = load_model(model_path, device)
+
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        logger.error(f"[{model_name}] Could not open video")
+        return
+
+    fps = cap.get(cv2.CAP_PROP_FPS) or 25
+    frame_buffer = []
+    idx = 0
+    frames_processed = 0
+    predictions = []
+
+    # annotation folder
+    anno_dir = os.path.join(output_dir, f"{model_name}_annotated")
+    if annotate:
+        os.makedirs(anno_dir, exist_ok=True)
+
+    start = time.time()
+
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+
+        if idx % frame_skip != 0:
+            idx += 1
+            continue
+
+        timestamp = idx / fps
+
+        try:
+            # -------- YOLO --------
+            if hasattr(model, "predict"):
+                output = model.predict(frame, verbose=False)
+                label, conf = interpret_prediction(model, output, model_name)
+
+            # -------- 3D-ResNet uses CLIPS of 16 frames --------
+            else:
+                frame_buffer.append(frame)
+
+                if len(frame_buffer) < 16:
+                    idx += 1
+                    continue
+
+                clip = preprocess_clip(frame_buffer[-16:], device, target_size)
+
+                with torch.no_grad():
+                    output = model(clip)
+
+                label, conf = interpret_prediction(model, output, model_name)
+
+            # Only record and annotate positive detections
+            if label != "no_action":
+                predictions.append(ActionPrediction(timestamp, idx, label, conf))
+                frames_processed += 1
+
+                # -------- Annotate output --------
+                if annotate:
+                    anno = frame.copy()
+                    cv2.putText(
+                        anno,
+                        f"{label} {conf:.2f}",
+                        (10, 35),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        1.0,
+                        (0, 255, 0),
+                        2,
+                    )
+                    cv2.imwrite(os.path.join(anno_dir, f"{idx:06}.jpg"), anno)
+
+        except Exception as e:
+            logger.error(f"[{model_name}] Error on frame {idx}: {e}")
+
+        idx += 1
+
+    cap.release()
+
+    # Save results
+    os.makedirs(output_dir, exist_ok=True)
+    json_path = os.path.join(output_dir, f"{os.path.basename(video_path)}__{model_name}.json")
+
+    with open(json_path, "w") as f:
+        json.dump({
+            "video": video_path,
+            "model": model_path,
+            "frames_processed": frames_processed,
+            "processing_time": time.time() - start,
+            "predictions": [asdict(p) for p in predictions]
+        }, f, indent=2)
+
+    logger.info(f"[{model_name}] Finished. Saved: {json_path}")
+
+
+# ============================================================
+# MULTI-MODEL EXECUTOR (Windows-safe)
+# ============================================================
+def run_models_on_videos(video_paths, model_paths,
+                         output_dir="./action_recognition_outputs",
+                         use_gpu=True, frame_skip=5,
+                         target_size=None, annotate=True):
+
+    os.makedirs(output_dir, exist_ok=True)
+    processes = []
+
+    for model_path in model_paths:
+        model_name = os.path.splitext(os.path.basename(model_path))[0]
+        for video in video_paths:
+
+            p = mp.Process(target=process_video_with_model,
+                           args=(video, model_path, output_dir, model_name,
+                                 use_gpu, frame_skip, target_size, annotate))
+            p.start()
+            processes.append(p)
+            logger.info(f"Started PID={p.pid} → {model_name}")
+
+    for p in processes:
+        p.join()
+        logger.info(f"PID={p.pid} finished with code {p.exitcode}")
+
+
+# ============================================================
+# MAIN
+# ============================================================
+if __name__ == "__main__":
+    mp.set_start_method("spawn", force=True)   # IMPORTANT FIX ON WINDOWS
+
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--videos", "-v", nargs="+", required=True)
+    parser.add_argument("--models", "-m", nargs="*", default=list(MODEL_PATHS.values()))
+    parser.add_argument("--output", "-o", default="./action_recognition_outputs")
+    parser.add_argument("--no-gpu", action="store_true")
+    parser.add_argument("--frame-skip", type=int, default=5)
+    parser.add_argument("--no-annotate", action="store_true")
+    args = parser.parse_args()
+
+    run_models_on_videos(
+        video_paths=args.videos,
+        model_paths=args.models,
+        output_dir=args.output,
+        use_gpu=not args.no_gpu,
+        frame_skip=max(1, args.frame_skip),
+        annotate=not args.no_annotate
+    )
diff --git a/behavior_analysis/wallclimb.pt b/behavior_analysis/wallclimb.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0c496d18b71c94d9885e308191dc8a4af0405808
--- /dev/null
+++ b/behavior_analysis/wallclimb.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b51bb0eec57891debefc3f1c1a53299229b716ac8385dfd759cc469058fe04e
+size 5352882
diff --git a/behavior_analysis/yolov11_wallclimb.pt b/behavior_analysis/yolov11_wallclimb.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19697084e4956131c35f6b4c00887f72c8e94dc3
--- /dev/null
+++ b/behavior_analysis/yolov11_wallclimb.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ae0285b20cf8ab66e4ddcf47f300c326c1b972e9bfc909e00f2cf6f65202ff3
+size 5359282
diff --git a/behavior_analysis_integrator.py b/behavior_analysis_integrator.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb5ce3cfb0a47f002b8596040856a4ff9fdb9c0b
--- /dev/null
+++ b/behavior_analysis_integrator.py
@@ -0,0 +1,580 @@
+"""
+Behavior Analysis Integrator for DetectifAI
+
+This module integrates behavior analysis (action recognition) into the video processing pipeline.
+It processes video segments/keyframes to detect suspicious behaviors like fighting, accidents, and climbing.
+Similar to ObjectDetectionIntegrator, it creates behavior-based events and identifies suspicious frames
+for facial recognition processing.
+"""
+
+import os
+import cv2
+import time
+import logging
+import json
+from typing import List, Dict, Any, Tuple, Optional
+from dataclasses import dataclass, asdict
+import numpy as np
+
+# Import behavior analysis module
+from behavior_analysis.action_recognition import (
+    load_model, preprocess_clip, interpret_prediction,
+    MODEL_PATHS, RESNET_MODELS, YOLO_MODELS, ActionPrediction
+)
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class BehaviorDetectionResult:
+    """Result of behavior detection on a frame or segment"""
+    frame_path: str
+    timestamp: float
+    frame_index: int
+    behavior_detected: str  # "fighting", "accident", "climbing", or "no_action"
+    confidence: float
+    model_used: str
+    processing_time: float
+
+
+@dataclass
+class BehaviorEvent:
+    """Behavior-based event created from detections"""
+    event_id: str
+    behavior_type: str
+    start_timestamp: float
+    end_timestamp: float
+    confidence: float
+    frame_indices: List[int]
+    keyframes: List[str]
+    model_used: str
+    importance_score: float
+
+
+class BehaviorAnalysisIntegrator:
+    """Integration layer between behavior analysis and video processing pipeline"""
+    
+    def __init__(self, config):
+        self.config = config
+        self.enabled = getattr(config, 'enable_behavior_analysis', False)
+        
+        logger.info(f"🔍 Initializing BehaviorAnalysisIntegrator - enabled: {self.enabled}")
+        
+        # Initialize models if enabled
+        self.models = {}
+        self.device = None
+        
+        if self.enabled:
+            try:
+                import torch
+                self.device = torch.device("cuda" if (torch.cuda.is_available() and getattr(config, 'use_gpu_acceleration', True)) else "cpu")
+                
+                # Load all available models
+                logger.info(f"🔧 Attempting to load models from: {MODEL_PATHS}")
+                for model_name, model_path in MODEL_PATHS.items():
+                    logger.info(f"📁 Checking model {model_name} at: {model_path}")
+                    if os.path.exists(model_path):
+                        try:
+                            logger.info(f"⏳ Loading {model_name}...")
+                            self.models[model_name] = load_model(model_path, self.device)
+                            logger.info(f"✅ Loaded behavior analysis model: {model_name}")
+                        except Exception as e:
+                            logger.error(f"❌ Failed to load {model_name}: {e}")
+                    else:
+                        logger.error(f"❌ Model file not found: {model_path}")
+                
+                if not self.models:
+                    logger.warning("⚠️ No behavior analysis models loaded, disabling behavior analysis")
+                    self.enabled = False
+                else:
+                    logger.info(f"✅ Behavior analysis initialized with {len(self.models)} models")
+                    
+            except ImportError:
+                logger.warning("⚠️ PyTorch not available, disabling behavior analysis")
+                self.enabled = False
+        else:
+            logger.info("Behavior analysis disabled in config")
+    
+    def detect_behavior_in_frame(self, frame_path: str, timestamp: float, frame_index: int = 0) -> List[BehaviorDetectionResult]:
+        """
+        Detect behaviors in a single frame
+        
+        Args:
+            frame_path: Path to frame image
+            timestamp: Timestamp in seconds
+            frame_index: Frame index number
+            
+        Returns:
+            List of BehaviorDetectionResult objects (one per model)
+        """
+        if not self.enabled or not self.models:
+            return []
+        
+        if not os.path.exists(frame_path):
+            logger.warning(f"Frame not found: {frame_path}")
+            return []
+        
+        results = []
+        frame = cv2.imread(frame_path)
+        if frame is None:
+            logger.warning(f"Failed to read frame: {frame_path}")
+            return []
+        
+        for model_name, model in self.models.items():
+            try:
+                start_time = time.time()
+                
+                # YOLO models (wallclimb)
+                if model_name in YOLO_MODELS:
+                    output = model.predict(frame, verbose=False)
+                    # Use default per-action thresholds from ACTION_CONFIDENCE_THRESHOLDS
+                    label, conf = interpret_prediction(model, output, model_name)
+                    
+                    logger.info(f"🔍 YOLO model {model_name} prediction: {label} (confidence: {conf:.3f})")
+                    
+                    if label != "no_action":
+                        result = BehaviorDetectionResult(
+                            frame_path=frame_path,
+                            timestamp=timestamp,
+                            frame_index=frame_index,
+                            behavior_detected=label,
+                            confidence=conf,
+                            model_used=model_name,
+                            processing_time=time.time() - start_time
+                        )
+                        results.append(result)
+                
+                # 3D-ResNet models need clips of 16 frames
+                # For single frame detection, we'll need to handle this differently
+                # For now, skip 3D-ResNet models for single frame detection
+                # They should be used with video segments instead
+                
+            except Exception as e:
+                logger.error(f"Error detecting behavior with {model_name}: {e}")
+                continue
+        
+        return results
+    
+    def detect_behavior_in_segment(self, video_path: str, start_time: float, end_time: float, 
+                                   frame_indices: List[int] = None) -> List[BehaviorDetectionResult]:
+        """
+        Detect behaviors in a video segment (for 3D-ResNet models that need temporal context)
+        
+        Args:
+            video_path: Path to video file
+            start_time: Start timestamp in seconds
+            end_time: End timestamp in seconds
+            frame_indices: Optional list of frame indices to process
+            
+        Returns:
+            List of BehaviorDetectionResult objects
+        """
+        if not self.enabled or not self.models:
+            return []
+        
+        if not os.path.exists(video_path):
+            logger.warning(f"Video not found: {video_path}")
+            return []
+        
+        results = []
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            logger.error(f"Could not open video: {video_path}")
+            return []
+        
+        fps = cap.get(cv2.CAP_PROP_FPS) or 25
+        start_frame = int(start_time * fps)
+        end_frame = int(end_time * fps)
+        
+        # Read frames for the segment
+        frame_buffer = []
+        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+        
+        for idx in range(start_frame, min(end_frame, int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))):
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_buffer.append(frame)
+        
+        cap.release()
+        
+        # Calculate mid frame index
+        mid_frame_idx = (start_frame + end_frame) // 2 if end_frame > start_frame else start_frame
+        return self._process_frame_buffer(frame_buffer, start_time, end_time, mid_frame_idx, video_path)
+    
+    def detect_behavior_in_segment_from_buffer(self, frame_buffer: List[np.ndarray], 
+                                               start_time: float, end_time: float,
+                                               frame_indices: List[int] = None) -> List[BehaviorDetectionResult]:
+        """
+        Detect behaviors in a frame buffer (for live streams)
+        
+        Args:
+            frame_buffer: List of frames (numpy arrays)
+            start_time: Start timestamp in seconds
+            end_time: End timestamp in seconds
+            frame_indices: Optional list of frame indices
+            
+        Returns:
+            List of BehaviorDetectionResult objects
+        """
+        if not self.enabled or not self.models:
+            return []
+        
+        if len(frame_buffer) < 16:
+            logger.debug(f"Frame buffer too short ({len(frame_buffer)} frames), skipping 3D-ResNet models")
+            return []
+        
+        # Use last 16 frames from buffer
+        frames_to_process = frame_buffer[-16:] if len(frame_buffer) >= 16 else frame_buffer
+        mid_frame_idx = len(frame_buffer) // 2 if frame_indices is None else (frame_indices[len(frame_indices) // 2] if frame_indices else len(frame_buffer) // 2)
+        
+        return self._process_frame_buffer(frames_to_process, start_time, end_time, mid_frame_idx, "live_stream")
+    
+    def _process_frame_buffer(self, frame_buffer: List[np.ndarray], start_time: float, 
+                             end_time: float, frame_index: int, video_path: str = "live_stream") -> List[BehaviorDetectionResult]:
+        """
+        Process frame buffer with behavior analysis models
+        
+        Args:
+            frame_buffer: List of frames (numpy arrays)
+            start_time: Start timestamp
+            end_time: End timestamp
+            frame_index: Frame index for result
+            video_path: Path to video file or "live_stream" for live streams
+            
+        Returns:
+            List of BehaviorDetectionResult objects
+        """
+        if len(frame_buffer) < 16:
+            return []
+        
+        results = []
+        
+        # Process with 3D-ResNet models (need 16-frame clips)
+        for model_name, model in self.models.items():
+            if model_name not in RESNET_MODELS:
+                continue
+            
+            try:
+                start_time_proc = time.time()
+                
+                # Process last 16 frames from buffer
+                clip = preprocess_clip(frame_buffer[-16:], self.device)
+                
+                import torch
+                model.eval()
+                with torch.no_grad():
+                    output = model(clip)
+                
+                # Use default per-action thresholds from ACTION_CONFIDENCE_THRESHOLDS
+                label, conf = interpret_prediction(model, output, model_name)
+                
+                logger.info(f"🔍 Model {model_name} prediction: {label} (confidence: {conf:.3f})")
+                
+                if label != "no_action":
+                    # Use middle timestamp of the segment
+                    mid_timestamp = (start_time + end_time) / 2
+                    
+                    result = BehaviorDetectionResult(
+                        frame_path="live_stream",  # Live stream identifier
+                        timestamp=mid_timestamp,
+                        frame_index=frame_index,
+                        behavior_detected=label,
+                        confidence=conf,
+                        model_used=model_name,
+                        processing_time=time.time() - start_time_proc
+                    )
+                    results.append(result)
+                    
+            except Exception as e:
+                logger.error(f"Error detecting behavior with {model_name} in segment: {e}")
+                continue
+        
+        return results
+    
+    def detect_behavior_in_keyframes(self, keyframes: List, video_path: str = None) -> List[BehaviorDetectionResult]:
+        """
+        Detect behaviors in keyframes
+        
+        Args:
+            keyframes: List of KeyframeResult objects
+            video_path: Optional path to video file (needed for 3D-ResNet models)
+            
+        Returns:
+            List of BehaviorDetectionResult objects
+        """
+        if not self.enabled:
+            logger.info("🚫 Behavior analysis disabled, skipping")
+            return []
+            
+        logger.info(f"🎬 Starting behavior detection on {len(keyframes)} keyframes")
+        logger.info(f"📹 Video path provided: {video_path}")
+        logger.info(f"🤖 Available models: {list(self.models.keys())}")
+        
+        logger.info(f"🔍 Running behavior analysis on {len(keyframes)} keyframes...")
+        
+        all_results = []
+        
+        # Process YOLO models (single frame) - wallclimb
+        yolo_models_available = [m for m in self.models.keys() if m in YOLO_MODELS]
+        logger.info(f"🎯 Processing YOLO models (single frame): {yolo_models_available}")
+        
+        for i, keyframe in enumerate(keyframes):
+            # Extract frame path and timestamp
+            frame_path = None
+            timestamp = 0.0
+            frame_index = i
+            
+            if hasattr(keyframe, 'frame_data'):
+                frame_path = keyframe.frame_data.frame_path if hasattr(keyframe.frame_data, 'frame_path') else None
+                timestamp = keyframe.frame_data.timestamp if hasattr(keyframe.frame_data, 'timestamp') else 0.0
+            elif hasattr(keyframe, 'frame_path'):
+                frame_path = keyframe.frame_path
+                timestamp = getattr(keyframe, 'timestamp', 0.0)
+            
+            if frame_path and os.path.exists(frame_path):
+                # Detect with YOLO models (single frame) - wallclimb
+                frame_results = self.detect_behavior_in_frame(frame_path, timestamp, frame_index)
+                all_results.extend(frame_results)
+        
+        # Process 3D-ResNet models (need 16-frame clips) - fighting, road_accident
+        if video_path and os.path.exists(video_path) and RESNET_MODELS:
+            resnet_models_available = [m for m in self.models.keys() if m in RESNET_MODELS]
+            logger.info(f"🎬 Processing 3D-ResNet models using video segments...")
+            logger.info(f"📊 Available ResNet models: {resnet_models_available}")
+            logger.info(f"📊 Total ResNet models to process: {len(resnet_models_available)}")
+            
+            # Group keyframes into temporal segments for 3D-ResNet processing
+            # Process segments of ~1 second (16 frames at ~30fps) around each keyframe
+            segment_window = 1.0  # 1 second window
+            
+            processed_segments = set()  # Track processed segments to avoid duplicates
+            
+            for keyframe in keyframes:
+                timestamp = 0.0
+                if hasattr(keyframe, 'frame_data'):
+                    timestamp = keyframe.frame_data.timestamp if hasattr(keyframe.frame_data, 'timestamp') else 0.0
+                elif hasattr(keyframe, 'timestamp'):
+                    timestamp = getattr(keyframe, 'timestamp', 0.0)
+                
+                if timestamp > 0:
+                    # Create segment around this keyframe
+                    start_time = max(0, timestamp - segment_window / 2)
+                    end_time = timestamp + segment_window / 2
+                    
+                    # Round to avoid processing same segment multiple times
+                    segment_key = (int(start_time * 10), int(end_time * 10))
+                    
+                    if segment_key not in processed_segments:
+                        processed_segments.add(segment_key)
+                        
+                        try:
+                            logger.info(f"🎥 Processing video segment: {start_time:.1f}s - {end_time:.1f}s")
+                            # Process segment with 3D-ResNet models
+                            segment_results = self.detect_behavior_in_segment(
+                                video_path=video_path,
+                                start_time=start_time,
+                                end_time=end_time,
+                                frame_indices=None
+                            )
+                            logger.info(f"📈 Segment results: {len(segment_results)} detections")
+                            for result in segment_results:
+                                logger.info(f"🔍 Detected: {result.behavior_detected} (conf: {result.confidence:.3f})")
+                            all_results.extend(segment_results)
+                        except Exception as e:
+                            logger.error(f"❌ Error processing segment {start_time:.1f}s-{end_time:.1f}s: {e}")
+                            continue
+        
+        logger.info(f"✅ Behavior analysis complete: {len(all_results)} behaviors detected")
+        return all_results
+    
+    def create_behavior_events(self, detection_results: List[BehaviorDetectionResult], 
+                              temporal_window: float = 5.0) -> List[BehaviorEvent]:
+        """
+        Create behavior-based events from detection results
+        
+        Args:
+            detection_results: List of BehaviorDetectionResult objects
+            temporal_window: Time window in seconds for grouping detections
+            
+        Returns:
+            List of BehaviorEvent objects
+        """
+        if not detection_results:
+            return []
+        
+        # Group detections by behavior type and temporal proximity
+        events = []
+        sorted_results = sorted(detection_results, key=lambda x: x.timestamp)
+        
+        current_event = None
+        event_id_counter = 0
+        
+        for result in sorted_results:
+            if result.behavior_detected == "no_action":
+                continue
+            
+            if current_event is None:
+                # Start new event
+                event_id_counter += 1
+                current_event = {
+                    'event_id': f"behavior_{result.behavior_detected}_{event_id_counter}",
+                    'behavior_type': result.behavior_detected,
+                    'start_timestamp': result.timestamp,
+                    'end_timestamp': result.timestamp,
+                    'confidences': [result.confidence],
+                    'frame_indices': [result.frame_index],
+                    'keyframes': [result.frame_path],
+                    'model_used': result.model_used
+                }
+            elif (result.behavior_detected == current_event['behavior_type'] and 
+                  result.timestamp - current_event['end_timestamp'] <= temporal_window):
+                # Extend current event
+                current_event['end_timestamp'] = result.timestamp
+                current_event['confidences'].append(result.confidence)
+                current_event['frame_indices'].append(result.frame_index)
+                current_event['keyframes'].append(result.frame_path)
+            else:
+                # Finalize current event and start new one
+                avg_confidence = sum(current_event['confidences']) / len(current_event['confidences'])
+                importance = avg_confidence * (current_event['end_timestamp'] - current_event['start_timestamp'] + 1)
+                
+                behavior_event = BehaviorEvent(
+                    event_id=current_event['event_id'],
+                    behavior_type=current_event['behavior_type'],
+                    start_timestamp=current_event['start_timestamp'],
+                    end_timestamp=current_event['end_timestamp'],
+                    confidence=avg_confidence,
+                    frame_indices=current_event['frame_indices'],
+                    keyframes=current_event['keyframes'],
+                    model_used=current_event['model_used'],
+                    importance_score=importance
+                )
+                events.append(behavior_event)
+                
+                # Start new event
+                event_id_counter += 1
+                current_event = {
+                    'event_id': f"behavior_{result.behavior_detected}_{event_id_counter}",
+                    'behavior_type': result.behavior_detected,
+                    'start_timestamp': result.timestamp,
+                    'end_timestamp': result.timestamp,
+                    'confidences': [result.confidence],
+                    'frame_indices': [result.frame_index],
+                    'keyframes': [result.frame_path],
+                    'model_used': result.model_used
+                }
+        
+        # Finalize last event
+        if current_event:
+            avg_confidence = sum(current_event['confidences']) / len(current_event['confidences'])
+            importance = avg_confidence * (current_event['end_timestamp'] - current_event['start_timestamp'] + 1)
+            
+            behavior_event = BehaviorEvent(
+                event_id=current_event['event_id'],
+                behavior_type=current_event['behavior_type'],
+                start_timestamp=current_event['start_timestamp'],
+                end_timestamp=current_event['end_timestamp'],
+                confidence=avg_confidence,
+                frame_indices=current_event['frame_indices'],
+                keyframes=current_event['keyframes'],
+                model_used=current_event['model_used'],
+                importance_score=importance
+            )
+            events.append(behavior_event)
+        
+        logger.info(f"✅ Created {len(events)} behavior-based events")
+        return events
+    
+    def process_keyframes_with_behavior_analysis(self, keyframes: List, video_path: str = None) -> Tuple[List[BehaviorDetectionResult], List[BehaviorEvent]]:
+        """
+        Process keyframes with behavior analysis and create behavior-based events
+        
+        Args:
+            keyframes: List of KeyframeResult objects
+            video_path: Optional path to video file (needed for 3D-ResNet models)
+            
+        Returns:
+            Tuple of (detection_results, behavior_events)
+        """
+        if not self.enabled:
+            logger.info("🚫 Behavior analysis disabled, skipping...")
+            return [], []
+            
+        logger.info("🚀 ===== STARTING BEHAVIOR ANALYSIS INTEGRATION =====")
+        logger.info(f"📊 Input: {len(keyframes)} keyframes, video_path: {video_path}")
+        logger.info(f"🤖 Loaded models: {list(self.models.keys())}")
+        logger.info(f"⚙️ Confidence thresholds: fighting={getattr(self.config, 'fighting_detection_confidence', 0.5)}, accident={getattr(self.config, 'accident_detection_confidence', 0.6)}, climbing={getattr(self.config, 'climbing_detection_confidence', 0.7)}")
+        
+        logger.info("🔍 Starting behavior analysis integration")
+        
+        # Run behavior detection on keyframes (with video_path for 3D-ResNet models)
+        detection_results = self.detect_behavior_in_keyframes(keyframes, video_path=video_path)
+        
+        # Create behavior-based events
+        temporal_window = getattr(self.config, 'behavior_event_temporal_window', 5.0)
+        logger.info(f"📅 Creating behavior events with temporal window: {temporal_window}s")
+        logger.info(f"📊 Total detections to process: {len(detection_results)}")
+        
+        positive_detections = [r for r in detection_results if r.behavior_detected != "no_action"]
+        logger.info(f"✅ Positive detections: {len(positive_detections)}")
+        for detection in positive_detections:
+            logger.info(f"   🎯 {detection.behavior_detected} at {detection.timestamp:.1f}s (conf: {detection.confidence:.3f})")
+            
+        behavior_events = self.create_behavior_events(detection_results, temporal_window)
+        
+        # Store detection metadata
+        if hasattr(self.config, 'output_base_dir') and detection_results:
+            detection_metadata = {
+                'total_keyframes': len(keyframes),
+                'frames_with_behaviors': len([r for r in detection_results if r.behavior_detected != "no_action"]),
+                'behaviors_detected': {
+                    'fighting': len([r for r in detection_results if r.behavior_detected == "fighting"]),
+                    'accident': len([r for r in detection_results if r.behavior_detected == "accident"]),
+                    'climbing': len([r for r in detection_results if r.behavior_detected == "climbing"])
+                },
+                'total_events': len(behavior_events),
+                'detection_summary': [asdict(r) for r in detection_results[:10]]  # First 10 for summary
+            }
+            
+            metadata_path = os.path.join(self.config.output_base_dir, 'behavior_analysis_metadata.json')
+            os.makedirs(os.path.dirname(metadata_path), exist_ok=True)
+            
+            with open(metadata_path, 'w') as f:
+                json.dump(detection_metadata, f, indent=2, default=str)
+            
+            logger.info(f"📊 Behavior analysis metadata saved: {metadata_path}")
+        
+        logger.info("🏁 ===== BEHAVIOR ANALYSIS INTEGRATION COMPLETE =====")
+        logger.info(f"📈 Summary:")
+        logger.info(f"   📊 Total detections: {len(detection_results)}")
+        logger.info(f"   ✅ Positive detections: {len([r for r in detection_results if r.behavior_detected != 'no_action'])}")
+        logger.info(f"   📅 Events created: {len(behavior_events)}")
+        
+        for event in behavior_events:
+            logger.info(f"   🎬 Event: {event.behavior_type} ({event.start_timestamp:.1f}s-{event.end_timestamp:.1f}s, conf: {event.confidence:.3f})")
+        
+        return detection_results, behavior_events
+    
+    def get_suspicious_frames(self, detection_results: List[BehaviorDetectionResult]) -> List[BehaviorDetectionResult]:
+        """
+        Get frames with suspicious behaviors (for facial recognition processing)
+        
+        Args:
+            detection_results: List of BehaviorDetectionResult objects
+            
+        Returns:
+            List of suspicious BehaviorDetectionResult objects
+        """
+        suspicious = [r for r in detection_results if r.behavior_detected != "no_action"]
+        logger.info(f"🔍 Identified {len(suspicious)} suspicious frames from behavior analysis")
+        return suspicious
+    
+    def get_behavior_analysis_summary(self) -> Dict[str, Any]:
+        """Get summary statistics of behavior analysis"""
+        return {
+            'enabled': self.enabled,
+            'models_loaded': list(self.models.keys()) if self.models else [],
+            'device': str(self.device) if self.device else None
+        }
+
diff --git a/config.py b/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..22cf513086ec31584d9361c0988b836057051693
--- /dev/null
+++ b/config.py
@@ -0,0 +1,369 @@
+"""
+Configuration settings for the Video Event Detection and Preprocessing Pipeline.
+
+This file contains all configurable parameters that can be tweaked to control:
+- Keyframe extraction sensitivity
+- Event detection thresholds
+- Video quality settings
+- Output formats and paths
+"""
+
+import os
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+
+@dataclass
+class VideoProcessingConfig:
+    """Main configuration class for video processing pipeline"""
+    
+    # ===== KEYFRAME EXTRACTION PARAMETERS =====
+    # Control how many keyframes are extracted
+    
+    # Base quality threshold (0.1-0.3): Lower = more keyframes, Higher = fewer but better quality
+    base_quality_threshold: float = 0.15
+    
+    # Motion detection threshold (0.005-0.02): Lower = more motion-sensitive, Higher = only significant motion
+    motion_threshold: float = 0.008
+    
+    # Burst sampling rate (1-10): Higher = more frames during high activity periods
+    burst_sampling_rate: int = 3
+    
+    # Frame sampling interval in seconds (0.5-3.0): Lower = more frequent sampling
+    frame_sampling_interval: float = 1.0
+    
+    # ===== EVENT DETECTION PARAMETERS =====
+    # Control how events are detected and prioritized
+    
+    # Event importance threshold (0.2-0.5): Lower = more events detected
+    event_importance_threshold: float = 0.25
+    
+    # Burst activity weight (1.5-3.0): Higher = burst frames get higher priority
+    burst_weight: float = 2.5
+    
+    # Temporal clustering window in seconds (10-30): Frames within this window are clustered
+    temporal_clustering_window: float = 15.0
+    
+    # Scene change detection threshold (0.01-0.05): Lower = more scene changes detected
+    scene_change_threshold: float = 0.02
+    
+    # ===== VIDEO SEGMENTATION PARAMETERS =====
+    # Control how video is divided into segments
+    
+    # Segment duration in seconds (30-60): Length of each temporal segment
+    segment_duration: float = 45.0
+    
+    # Keyframes per segment (3-8): How many keyframes to extract per segment
+    keyframes_per_segment: int = 5
+    
+    # ===== HIGHLIGHT REEL PARAMETERS =====
+    # Control the final summary video creation
+    
+    # Maximum summary duration in seconds (15-60): Total length of highlight reel
+    max_summary_duration: float = 25.0
+    
+    # Frame display duration in seconds (0.5-3.0): How long each frame is shown
+    frame_display_duration: float = 1.5
+    
+    # Maximum frames in summary (10-30): Total number of frames in highlight reel
+    max_summary_frames: int = 18
+    
+    # Summary video FPS (0.4-1.0): Playback speed of summary
+    summary_fps: float = 0.6
+    
+    # ===== DEDUPLICATION PARAMETERS =====
+    # Control duplicate frame removal
+    
+    # Similarity threshold (0.80-0.95): Higher = stricter deduplication
+    similarity_threshold: float = 0.85
+    
+    # Minimum time gap between frames in seconds (1-5): Prevents frames too close in time
+    min_frame_gap: float = 2.0
+    
+    # ===== COMPRESSION PARAMETERS =====
+    # Control video compression settings
+    
+    # Output resolution (720p, 1080p, or original)
+    output_resolution: str = "720p"
+    
+    # Compression quality (18-28): Lower = better quality, larger files
+    compression_crf: int = 23
+    
+    # Compression preset (ultrafast, fast, medium, slow): Affects encoding speed vs efficiency
+    compression_preset: str = "fast"
+    
+    # ===== ADAPTIVE ENHANCEMENT PARAMETERS =====
+    # Control image enhancement
+    
+    # Enable adaptive histogram equalization
+    enable_clahe: bool = True
+    
+    # CLAHE clip limit (1.0-4.0): Higher = more contrast enhancement
+    clahe_clip_limit: float = 2.0
+    
+    # Enable denoising
+    enable_denoising: bool = True
+    
+    # Denoising strength (3-10): Higher = more denoising
+    denoise_strength: int = 5
+    
+    # ===== OUTPUT SETTINGS =====
+    # Control output files and formats
+    
+    # Base output directory
+    output_base_dir: str = "video_processing_outputs"
+    
+    # Enable various output formats
+    generate_json_reports: bool = True
+    generate_html_gallery: bool = True
+    generate_compressed_video: bool = True
+    generate_segments: bool = True
+    generate_highlight_reels: bool = False  # Disabled for security focus - saves processing time
+    
+    # Video output format (mp4, avi, mov)
+    video_output_format: str = "mp4"
+    
+    # ===== ADVANCED PARAMETERS =====
+    # Fine-tuning for specific use cases
+    
+    # Enable GPU acceleration if available
+    use_gpu_acceleration: bool = True
+    
+    # Enable face detection for human-centric events
+    enable_face_detection: bool = False
+    
+    # Enable object detection for context-aware processing
+    enable_object_detection: bool = False
+    
+    # Enable facial recognition for suspicious person tracking (FULL implementation with FAISS + MongoDB)
+    enable_facial_recognition: bool = True
+    
+    # Face recognition confidence threshold (0.5-0.95)
+    face_recognition_confidence: float = 0.7
+    
+    # Face detection model to use (MTCNN for detection, FaceNet for embeddings)
+    face_detection_model: str = "mtcnn"
+    
+    # Face recognition model to use (InceptionResnetV1 with FAISS similarity search)
+    face_recognition_model: str = "facenet_faiss"
+    
+    # Enable suspicious person database and tracking
+    suspicious_person_tracking: bool = True
+    
+    # Face database settings
+    face_database_enabled: bool = True
+    
+    # ===== OBJECT DETECTION PARAMETERS =====
+    # Configuration for fire, knife, gun detection
+    
+    # Models directory path (relative to backend directory when running from project root)
+    models_dir: str = os.path.join(os.path.dirname(__file__), "models")
+    
+    # Object detection confidence threshold (0.1-0.9)
+    object_detection_confidence: float = 0.5
+    
+    # Temporal window for grouping object detections into events (seconds)
+    object_event_temporal_window: float = 5.0
+    
+    # Enable annotation of detected objects on keyframes
+    enable_object_annotation: bool = True
+    
+    # Object detection specific thresholds
+    fire_detection_confidence: float = 0.7     # Lower threshold for fire (safety critical)
+    weapon_detection_confidence: float = 0.7   # Higher threshold for weapons (reduce false positives)
+    
+    # Enable specific object types
+    enable_fire_detection: bool = True
+    enable_weapon_detection: bool = True
+    
+    # Object event importance multiplier
+    object_event_importance_multiplier: float = 2.0
+    
+    # ===== BEHAVIOR ANALYSIS PARAMETERS =====
+    # Configuration for behavior/action recognition (fighting, accidents, climbing)
+    
+    # Enable behavior analysis
+    enable_behavior_analysis: bool = False
+    
+    # Behavior analysis models directory
+    behavior_models_dir: str = os.path.join(os.path.dirname(__file__), "behavior_analysis")
+    
+    # Behavior detection confidence thresholds per action type (0.3-0.8)
+    fighting_detection_confidence: float = 0.5
+    accident_detection_confidence: float = 0.6
+    climbing_detection_confidence: float = 0.7
+    
+    # Temporal window for grouping behavior detections into events (seconds)
+    behavior_event_temporal_window: float = 5.0
+    
+    # Behavior event importance multiplier
+    behavior_event_importance_multiplier: float = 2.5
+    
+    # Enable specific behavior types
+    enable_fighting_detection: bool = True
+    enable_accident_detection: bool = True
+    enable_climbing_detection: bool = True
+    
+    # ===== VIDEO CAPTIONING PARAMETERS =====
+    # Configuration for video frame captioning with vision-language models
+    
+    # Enable video captioning
+    enable_video_captioning: bool = False
+    
+    # Vision model for caption generation
+    captioning_vision_model: str = "Salesforce/blip-image-captioning-base"
+    
+    # Embedding model for semantic search
+    captioning_embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
+    
+    # Device for captioning models (cpu or cuda)
+    captioning_device: str = "cpu"
+    
+    # Batch size for captioning (increased for better throughput)
+    captioning_batch_size: int = 8
+    
+    # Database paths for caption storage
+    captioning_db_path: str = None  # Will use default if None
+    captioning_vector_db_path: str = "./video_captioning_store"
+    
+    # Enable async processing for captioning
+    captioning_async: bool = True
+    
+    # Parallel processing workers (1-8): More workers = faster but more memory
+    num_workers: int = 4
+
+    def __post_init__(self):
+        """Validate configuration parameters"""
+        # Ensure output directory exists
+        os.makedirs(self.output_base_dir, exist_ok=True)
+        
+        # Validate thresholds
+        assert 0.1 <= self.base_quality_threshold <= 0.3, "Quality threshold must be between 0.1-0.3"
+        assert 0.005 <= self.motion_threshold <= 0.02, "Motion threshold must be between 0.005-0.02"
+        assert 0.8 <= self.similarity_threshold <= 0.95, "Similarity threshold must be between 0.8-0.95"
+
+# ===== PRESET CONFIGURATIONS =====
+
+def get_high_recall_config() -> VideoProcessingConfig:
+    """Configuration optimized for capturing more events (more keyframes)"""
+    return VideoProcessingConfig(
+        base_quality_threshold=0.12,      # Lower quality threshold
+        motion_threshold=0.005,           # Very sensitive motion detection
+        event_importance_threshold=0.20,   # Lower event threshold
+        max_summary_frames=25,            # More frames in summary
+        frame_sampling_interval=0.8,      # More frequent sampling
+        temporal_clustering_window=20.0,   # Wider clustering window
+        burst_weight=3.0,                 # Higher burst priority
+        keyframes_per_segment=6           # More keyframes per segment
+    )
+
+def get_high_precision_config() -> VideoProcessingConfig:
+    """Configuration optimized for quality over quantity (fewer but better keyframes)"""
+    return VideoProcessingConfig(
+        base_quality_threshold=0.20,      # Higher quality threshold
+        motion_threshold=0.015,           # Less sensitive motion detection
+        event_importance_threshold=0.35,   # Higher event threshold
+        max_summary_frames=12,            # Fewer frames in summary
+        frame_sampling_interval=1.5,      # Less frequent sampling
+        temporal_clustering_window=10.0,   # Tighter clustering
+        burst_weight=2.0,                 # Moderate burst priority
+        keyframes_per_segment=4           # Fewer keyframes per segment
+    )
+
+def get_balanced_config() -> VideoProcessingConfig:
+    """Balanced configuration for general use"""
+    return VideoProcessingConfig()  # Uses default values
+
+# Removed robbery detection config - using security_focused_config instead
+
+def get_security_focused_config() -> VideoProcessingConfig:
+    """Configuration optimized specifically for security and threat detection"""
+    return VideoProcessingConfig(
+        base_quality_threshold=0.12,
+        motion_threshold=0.005,           # Very sensitive
+        event_importance_threshold=0.20,
+        burst_weight=3.0,                 # Highest priority for burst activity
+        temporal_clustering_window=20.0,
+        max_summary_frames=25,
+        frame_display_duration=2.0,
+        similarity_threshold=0.82,
+        enable_clahe=True,
+        clahe_clip_limit=3.0,
+        # Enhanced object detection for security
+        enable_object_detection=True,
+        object_detection_confidence=0.4,  # Lower threshold for better recall
+        fire_detection_confidence=0.5,    # Very sensitive for fire
+        weapon_detection_confidence=0.7,  # Higher threshold for weapons to reduce false positives
+        object_event_temporal_window=8.0, # Longer window for complex events
+        enable_object_annotation=True,
+        object_event_importance_multiplier=3.0,  # High importance for security events
+        # Enhanced behavior analysis for security
+        enable_behavior_analysis=True,
+        fighting_detection_confidence=0.5,
+        accident_detection_confidence=0.6,
+        climbing_detection_confidence=0.7,
+        behavior_event_temporal_window=8.0,  # Longer window for complex events
+        behavior_event_importance_multiplier=3.0,  # High importance for security events
+        # Video captioning for semantic search
+        enable_video_captioning=True,
+        captioning_device="cpu"  # Change to "cuda" if GPU available
+    )
+
+# ===== PARAMETER ADJUSTMENT GUIDE =====
+
+PARAMETER_GUIDE = {
+    "More Keyframes": {
+        "base_quality_threshold": "Decrease (0.10-0.12)",
+        "motion_threshold": "Decrease (0.005-0.008)",
+        "event_importance_threshold": "Decrease (0.20-0.25)",
+        "max_summary_frames": "Increase (20-30)",
+        "keyframes_per_segment": "Increase (6-8)",
+        "frame_sampling_interval": "Decrease (0.5-1.0)"
+    },
+    "Fewer Keyframes": {
+        "base_quality_threshold": "Increase (0.18-0.25)",
+        "motion_threshold": "Increase (0.012-0.020)",
+        "event_importance_threshold": "Increase (0.30-0.40)",
+        "max_summary_frames": "Decrease (8-15)",
+        "keyframes_per_segment": "Decrease (3-4)",
+        "frame_sampling_interval": "Increase (1.5-2.5)"
+    },
+    "Better Quality": {
+        "base_quality_threshold": "Increase (0.18-0.25)",
+        "compression_crf": "Decrease (18-20)",
+        "enable_clahe": "True",
+        "enable_denoising": "True",
+        "output_resolution": "'1080p'"
+    },
+    "Faster Processing": {
+        "compression_preset": "'ultrafast'",
+        "num_workers": "Increase (6-8)",
+        "enable_face_detection": "False",
+        "enable_object_detection": "False",
+        "keyframes_per_segment": "Decrease (3-4)"
+    },
+    "More Sensitive Event Detection": {
+        "motion_threshold": "Decrease (0.005-0.008)",
+        "burst_weight": "Increase (2.5-3.0)",
+        "event_importance_threshold": "Decrease (0.20-0.25)",
+        "temporal_clustering_window": "Increase (15-25)"
+    }
+}
+
+def print_parameter_guide():
+    """Print parameter adjustment guide"""
+    print("🔧 VIDEO PROCESSING PARAMETER ADJUSTMENT GUIDE")
+    print("=" * 60)
+    
+    for goal, params in PARAMETER_GUIDE.items():
+        print(f"\n🎯 {goal}:")
+        for param, adjustment in params.items():
+            print(f"   • {param}: {adjustment}")
+    
+    print(f"\n📝 Available Preset Configurations:")
+    print(f"   • get_high_recall_config() - More keyframes, sensitive detection")
+    print(f"   • get_high_precision_config() - Fewer but higher quality keyframes")
+    print(f"   • get_balanced_config() - General purpose settings")
+    print(f"   • get_security_focused_config() - Optimized for security/threat detection")
+
+if __name__ == "__main__":
+    print_parameter_guide()
\ No newline at end of file
diff --git a/core/video_processing.py b/core/video_processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb168464d7f917048dff217c2f06959657a0122b
--- /dev/null
+++ b/core/video_processing.py
@@ -0,0 +1,384 @@
+"""
+Optimized Video Processing for DetectifAI
+
+This module contains optimized video processing components focusing on:
+- Efficient keyframe extraction for security footage
+- Selective frame enhancement only when needed
+- Memory-optimized processing for large surveillance videos
+"""
+
+import cv2
+import numpy as np
+import os
+import uuid
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass
+import time
+import logging
+
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+@dataclass
+class FrameData:
+    """Data structure for frame information"""
+    frame_path: str
+    timestamp: float
+    frame_number: int
+    quality_score: float
+    motion_score: float
+    burst_active: bool
+    enhancement_applied: bool
+    face_count: int = 0
+    object_count: int = 0
+
+@dataclass
+class KeyframeResult:
+    """Result structure for keyframe extraction"""
+    frame_data: FrameData
+    keyframe_score: float
+    selection_reason: str
+
+class OptimizedFrameEnhancer:
+    """Optimized frame enhancement for DetectifAI - only enhance when necessary"""
+    
+    def __init__(self, enable_clahe: bool = True, clahe_clip_limit: float = 2.0):
+        self.enable_clahe = enable_clahe
+        
+        # Initialize CLAHE (skip denoising for performance)
+        if enable_clahe:
+            self.clahe = cv2.createCLAHE(clipLimit=clahe_clip_limit, tileGridSize=(8, 8))
+        
+        logger.info(f"OptimizedFrameEnhancer initialized - CLAHE: {enable_clahe}")
+    
+    def enhance_frame_if_needed(self, frame: np.ndarray) -> Tuple[np.ndarray, bool]:
+        """
+        Enhance frame only if quality is poor (DetectifAI optimization)
+        
+        Args:
+            frame: Input frame as numpy array
+            
+        Returns:
+            Tuple of (enhanced_frame, enhancement_applied)
+        """
+        try:
+            # Quick quality assessment
+            if not self._needs_enhancement(frame):
+                return frame, False
+            
+            enhanced = frame.copy()
+            
+            # Apply CLAHE only to L channel for color frames
+            if len(frame.shape) == 3 and self.enable_clahe:
+                lab = cv2.cvtColor(enhanced, cv2.COLOR_BGR2LAB)
+                l_channel = lab[:, :, 0]
+                l_enhanced = self.clahe.apply(l_channel)
+                lab[:, :, 0] = l_enhanced
+                enhanced = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
+                return enhanced, True
+                
+            elif len(frame.shape) == 2 and self.enable_clahe:
+                # Grayscale frame
+                enhanced = self.clahe.apply(enhanced)
+                return enhanced, True
+            
+            return frame, False
+            
+        except Exception as e:
+            logger.error(f"Error enhancing frame: {e}")
+            return frame, False
+    
+    def _needs_enhancement(self, frame: np.ndarray) -> bool:
+        """
+        Quick quality check - only enhance genuinely poor quality frames
+        """
+        try:
+            # Convert to grayscale for analysis
+            if len(frame.shape) == 3:
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            else:
+                gray = frame
+            
+            # Check brightness and contrast
+            mean_brightness = np.mean(gray)
+            contrast = np.std(gray)
+            
+            # Only enhance if frame has quality issues
+            return (
+                mean_brightness < 50 or    # Too dark
+                mean_brightness > 200 or   # Too bright  
+                contrast < 30             # Low contrast
+            )
+            
+        except Exception:
+            return False
+
+class OptimizedVideoProcessor:
+    """
+    Optimized video processor for DetectifAI surveillance footage
+    """
+    
+    def __init__(self, config=None):
+        self.config = config
+        self.frame_enhancer = OptimizedFrameEnhancer(
+            enable_clahe=getattr(config, 'enable_adaptive_processing', True)
+        )
+        
+        # Processing statistics
+        self.processing_stats = {
+            'frames_processed': 0,
+            'frames_enhanced': 0,
+            'keyframes_extracted': 0,
+            'total_processing_time': 0.0
+        }
+        
+        logger.info("OptimizedVideoProcessor initialized")
+    
+    def extract_keyframes_optimized(self, video_path: str, output_dir: str,
+                                   fps_interval: float = 1.0) -> List[KeyframeResult]:
+        """
+        Extract keyframes with optimized processing for surveillance video
+
+        Args:
+            video_path: Path to input video
+            output_dir: Directory to save keyframes
+            fps_interval: Seconds between keyframes (default: 1 frame per second)
+
+        Returns:
+            List of KeyframeResult objects
+        """
+        start_time = time.time()
+        keyframes = []
+
+        try:
+            # Open video
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                logger.error(f"Could not open video: {video_path}")
+                return []
+
+            # Get video properties
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            duration = total_frames / fps if fps > 0 else 0
+
+            logger.info(f"Video properties: {total_frames} frames, {fps:.2f} FPS, {duration:.2f}s")
+
+            # Calculate frame interval
+            frame_interval = int(fps * fps_interval) if fps > 0 else 30
+
+            # Create output directory
+            frames_dir = os.path.join(output_dir, 'frames')
+            os.makedirs(frames_dir, exist_ok=True)
+
+            frame_count = 0
+            extracted_count = 0
+
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+
+                # Extract keyframes at specified intervals
+                if frame_count % frame_interval == 0:
+                    timestamp = frame_count / fps if fps > 0 else frame_count
+
+                    # Assess frame quality
+                    quality_score = self._assess_frame_quality(frame)
+
+                    # Enhance frame if needed
+                    enhanced_frame, enhancement_applied = self.frame_enhancer.enhance_frame_if_needed(frame)
+
+                    # Use consistent naming pattern for MinIO storage
+                    frame_filename = f"frame_{frame_count:06d}.jpg"
+                    frame_path = os.path.join(frames_dir, frame_filename)
+
+                    cv2.imwrite(frame_path, enhanced_frame)
+
+                    # Create frame data
+                    frame_data = FrameData(
+                        frame_path=frame_path,
+                        timestamp=timestamp,
+                        frame_number=frame_count,
+                        quality_score=quality_score,
+                        motion_score=0.0,  # Can be calculated if needed
+                        burst_active=False,
+                        enhancement_applied=enhancement_applied
+                    )
+
+                    keyframe_result = KeyframeResult(
+                        frame_data=frame_data,
+                        keyframe_score=quality_score,
+                        selection_reason="Regular interval extraction"
+                    )
+
+                    keyframes.append(keyframe_result)
+                    extracted_count += 1
+
+                    # Update stats
+                    if enhancement_applied:
+                        self.processing_stats['frames_enhanced'] += 1
+
+                frame_count += 1
+                self.processing_stats['frames_processed'] += 1
+
+                # Progress logging
+                if frame_count % 1000 == 0:
+                    progress = (frame_count / total_frames) * 100 if total_frames > 0 else 0
+                    logger.info(f"Progress: {progress:.1f}% ({frame_count}/{total_frames} frames)")
+
+            cap.release()
+
+            # Update final statistics
+            processing_time = time.time() - start_time
+            self.processing_stats['keyframes_extracted'] = extracted_count
+            self.processing_stats['total_processing_time'] = processing_time
+
+            logger.info(f"✅ Keyframe extraction complete:")
+            logger.info(f"   📊 Extracted {extracted_count} keyframes from {frame_count} frames")
+            logger.info(f"   ⚡ Enhanced {self.processing_stats['frames_enhanced']} frames")
+            logger.info(f"   ⏱️  Processing time: {processing_time:.2f}s")
+
+            return keyframes
+
+        except Exception as e:
+            logger.error(f"Error in keyframe extraction: {e}")
+            return []
+    
+    def _assess_frame_quality(self, frame: np.ndarray) -> float:
+        """
+        Quick frame quality assessment for keyframe selection
+        """
+        try:
+            # Convert to grayscale
+            if len(frame.shape) == 3:
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            else:
+                gray = frame
+            
+            # Calculate Laplacian variance (focus measure)
+            laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
+            
+            # Normalize to 0-1 scale (higher = better quality)
+            quality_score = min(laplacian_var / 1000.0, 1.0)
+            
+            return quality_score
+            
+        except Exception:
+            return 0.5  # Default quality score
+    
+    def extract_keyframes(self, video_path: str) -> List[KeyframeResult]:
+        """
+        Main keyframe extraction method for DetectifAI pipeline compatibility
+        
+        Args:
+            video_path: Path to input video file
+            
+        Returns:
+            List of KeyframeResult objects
+        """
+        if not self.config:
+            logger.error("No configuration provided for keyframe extraction")
+            return []
+        
+        # Use output directory from config
+        output_dir = getattr(self.config, 'output_base_dir', 'video_processing_outputs')
+        fps_interval = getattr(self.config, 'keyframe_extraction_fps', 1.0)
+        
+        return self.extract_keyframes_optimized(video_path, output_dir, fps_interval)
+    
+    def get_processing_stats(self) -> Dict[str, Any]:
+        """Get processing statistics"""
+        return self.processing_stats.copy()
+
+class StreamingVideoProcessor:
+    """
+    Streaming processor for large surveillance videos to reduce memory usage
+    """
+    
+    def __init__(self, config=None):
+        self.config = config
+        self.chunk_size = getattr(config, 'video_chunk_size', 1000)  # Process 1000 frames at a time
+        
+    def process_video_in_chunks(self, video_path: str, output_dir: str, 
+                               chunk_processor_func) -> Dict[str, Any]:
+        """
+        Process large videos in chunks to manage memory usage
+        
+        Args:
+            video_path: Path to input video
+            output_dir: Output directory
+            chunk_processor_func: Function to process each chunk
+            
+        Returns:
+            Dictionary with processing results
+        """
+        results = {
+            'total_chunks': 0,
+            'processed_chunks': 0,
+            'total_frames': 0,
+            'processing_time': 0.0
+        }
+        
+        start_time = time.time()
+        
+        try:
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                logger.error(f"Could not open video: {video_path}")
+                return results
+            
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            
+            results['total_frames'] = total_frames
+            results['total_chunks'] = (total_frames + self.chunk_size - 1) // self.chunk_size
+            
+            logger.info(f"Processing video in {results['total_chunks']} chunks of {self.chunk_size} frames")
+            
+            frame_count = 0
+            chunk_count = 0
+            
+            while frame_count < total_frames:
+                # Process chunk
+                chunk_frames = []
+                chunk_start = frame_count
+                
+                # Read chunk frames
+                for i in range(self.chunk_size):
+                    ret, frame = cap.read()
+                    if not ret:
+                        break
+                    
+                    chunk_frames.append({
+                        'frame': frame,
+                        'frame_number': frame_count,
+                        'timestamp': frame_count / fps if fps > 0 else frame_count
+                    })
+                    frame_count += 1
+                
+                if chunk_frames:
+                    # Process chunk
+                    chunk_processor_func(chunk_frames, chunk_count, output_dir)
+                    chunk_count += 1
+                    results['processed_chunks'] += 1
+                    
+                    # Clear memory
+                    del chunk_frames
+                    
+                    logger.info(f"Processed chunk {chunk_count}/{results['total_chunks']}")
+            
+            cap.release()
+            results['processing_time'] = time.time() - start_time
+            
+            logger.info(f"✅ Streaming processing complete in {results['processing_time']:.2f}s")
+            
+        except Exception as e:
+            logger.error(f"Error in streaming processing: {e}")
+        
+        return results
+
+def create_optimized_processor(config=None):
+    """Factory function to create optimized video processor"""
+    return OptimizedVideoProcessor(config)
\ No newline at end of file
diff --git a/database/config.py b/database/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..703716189713bc5a127cc094fc26cbb0d4c1c77d
--- /dev/null
+++ b/database/config.py
@@ -0,0 +1,173 @@
+"""
+Database Configuration for DetectifAI Backend
+
+This module handles connections to MongoDB Atlas and S3-compatible object storage
+(Backblaze B2) for the DetectifAI system.
+It provides centralized configuration and connection management.
+"""
+
+import os
+from pymongo import MongoClient
+from minio import Minio
+from minio.error import S3Error
+from dotenv import load_dotenv
+import logging
+from datetime import timedelta
+
+# Load environment variables
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+class DatabaseConfig:
+    """Configuration class for database connections"""
+    
+    def __init__(self):
+        # MongoDB Atlas connection (same as frontend)
+        self.mongo_uri = os.getenv(
+            'MONGO_URI', 
+            'mongodb+srv://detectifai_user:DetectifAI123@cluster0.6f9uj.mongodb.net/detectifai?retryWrites=true&w=majority&appName=Cluster0'
+        )
+        self.mongo_db_name = 'detectifai'
+        
+        # S3-compatible object storage (Backblaze B2)
+        self.minio_endpoint = os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com')
+        self.minio_access_key = os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001')
+        self.minio_secret_key = os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA')
+        self.minio_video_bucket = os.getenv('MINIO_VIDEO_BUCKET', 'detectifai-videos')
+        self.minio_keyframe_bucket = os.getenv('MINIO_KEYFRAME_BUCKET', 'detectifai-keyframes')
+        self.minio_reports_bucket = os.getenv('MINIO_REPORTS_BUCKET', 'detectifai-reports')
+        self.minio_secure = os.getenv('MINIO_SECURE', 'true').lower() == 'true'
+        # Extract region from endpoint for S3 signing (e.g. 'eu-central-003')
+        self.minio_region = os.getenv('MINIO_REGION', self._extract_region(self.minio_endpoint))
+
+    @staticmethod
+    def _extract_region(endpoint: str) -> str:
+        """Extract region from B2 S3 endpoint like s3.eu-central-003.backblazeb2.com"""
+        parts = endpoint.split('.')
+        if len(parts) >= 3 and parts[0] == 's3':
+            return parts[1]  # e.g. 'eu-central-003'
+        return ''
+
+class DatabaseManager:
+    """Central database manager for MongoDB and MinIO connections"""
+    
+    def __init__(self):
+        self.config = DatabaseConfig()
+        self._mongodb_client = None
+        self._db = None
+        self._minio_client = None
+        
+    @property
+    def mongo_client(self):
+        """Lazy loading MongoDB client"""
+        if self._mongodb_client is None:
+            try:
+                self._mongodb_client = MongoClient(self.config.mongo_uri)
+                # Test connection
+                self._mongodb_client.admin.command('ping')
+                logger.info("✅ MongoDB connection established successfully")
+            except Exception as e:
+                logger.error(f"❌ Failed to connect to MongoDB: {e}")
+                raise
+        return self._mongodb_client
+    
+    @property  
+    def db(self):
+        """Get MongoDB database instance"""
+        if self._db is None:
+            self._db = self.mongo_client[self.config.mongo_db_name]
+        return self._db
+    
+    @property
+    def minio_client(self):
+        """Lazy loading S3-compatible storage client — returns None when unavailable"""
+        if self._minio_client is None:
+            try:
+                self._minio_client = Minio(
+                    self.config.minio_endpoint,
+                    access_key=self.config.minio_access_key,
+                    secret_key=self.config.minio_secret_key,
+                    secure=self.config.minio_secure,
+                    region=self.config.minio_region or None
+                )
+                
+                # Test connection and verify buckets exist
+                self._ensure_bucket_exists()
+                logger.info("✅ S3 storage connection established (Backblaze B2)")
+                
+            except Exception as e:
+                logger.warning(f"⚠️ S3 storage unavailable (non-fatal): {e}")
+                self._minio_client = None  # keep it None so we can retry later
+                return None
+        return self._minio_client
+    
+    def _ensure_bucket_exists(self):
+        """Verify that the required S3 buckets exist on Backblaze B2"""
+        try:
+            for bucket_name in [
+                self.config.minio_video_bucket,
+                self.config.minio_keyframe_bucket,
+                self.config.minio_reports_bucket,
+            ]:
+                if self._minio_client.bucket_exists(bucket_name):
+                    logger.info(f"✅ S3 bucket verified: {bucket_name}")
+                else:
+                    logger.warning(f"⚠️ S3 bucket not found: {bucket_name} — create it in Backblaze B2 dashboard")
+        except S3Error as e:
+            logger.error(f"❌ Failed to verify S3 buckets: {e}")
+            raise
+    
+    def test_connections(self):
+        """Test both MongoDB and MinIO connections"""
+        mongodb_success = False
+        minio_success = False
+        
+        try:
+            # Test MongoDB
+            self.mongo_client.admin.command('ping')
+            collections = self.db.list_collection_names()
+            logger.info(f"✅ MongoDB test successful. Collections: {collections}")
+            print(f"✅ MongoDB connected successfully. Collections: {collections}")
+            mongodb_success = True
+            
+        except Exception as e:
+            logger.error(f"❌ MongoDB connection failed: {e}")
+            print(f"❌ MongoDB connection failed: {e}")
+        
+        try:
+            # Test S3 storage (Backblaze B2)
+            buckets = self.minio_client.list_buckets()
+            bucket_names = [bucket.name for bucket in buckets]
+            logger.info(f"✅ S3 storage test successful. Buckets: {bucket_names}")
+            print(f"✅ S3 storage (Backblaze B2) connected successfully. Buckets: {bucket_names}")
+            minio_success = True
+            
+        except Exception as e:
+            logger.error(f"❌ S3 storage connection failed: {e}")
+            print(f"❌ S3 storage connection failed: {e}")
+            print("💡 Check MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY env vars.")
+        
+        return mongodb_success  # At minimum, we need MongoDB working
+    
+    def close_connections(self):
+        """Close database connections"""
+        if self._mongodb_client:
+            self._mongodb_client.close()
+            logger.info("MongoDB connection closed")
+
+def get_presigned_url(minio_client, bucket_name: str, object_name: str, expires: timedelta = timedelta(hours=1)):
+    """Generate presigned URL for S3 object access (works with Backblaze B2)"""
+    try:
+        return minio_client.presigned_get_object(bucket_name, object_name, expires=expires)
+    except S3Error as e:
+        logger.error(f"Failed to generate presigned URL for {object_name}: {e}")
+        return None
+
+if __name__ == "__main__":
+    # Test connections
+    db_manager = DatabaseManager()
+    if db_manager.test_connections():
+        print("✅ All database connections working!")
+    else:
+        print("❌ Database connection issues detected")
\ No newline at end of file
diff --git a/database/keyframe_repository.py b/database/keyframe_repository.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e43d6cb609fcd9719137241842355993aaa72b3
--- /dev/null
+++ b/database/keyframe_repository.py
@@ -0,0 +1,243 @@
+"""
+Keyframe Repository for DetectifAI Database Operations
+
+This module provides MinIO storage and database operations for keyframes.
+"""
+
+import os
+import io
+import cv2
+import numpy as np
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+import logging
+from minio.error import S3Error
+
+logger = logging.getLogger(__name__)
+
+class KeyframeRepository:
+    """Repository for keyframe operations with S3 storage and MongoDB"""
+    
+    def __init__(self, db_manager):
+        self._db_manager = db_manager
+        self.db = db_manager.db
+        self.bucket = db_manager.config.minio_keyframe_bucket  # Use dedicated keyframes bucket
+        self.collection = self.db.keyframes  # MongoDB collection for keyframe metadata
+
+    @property
+    def minio(self):
+        """Lazy access to S3 storage — tolerates unavailable storage"""
+        return self._db_manager.minio_client
+    
+    def save_keyframe_to_minio(self, video_id: str, frame_data: bytes, frame_number: int, timestamp: float) -> Optional[str]:
+        """Save a single keyframe directly to S3 storage"""
+        if self.minio is None:
+            return None
+        try:
+            minio_path = f"{video_id}/frame_{frame_number:06d}.jpg"  # Use consistent naming pattern
+            
+            # Upload bytes directly to MinIO using BytesIO
+            from io import BytesIO
+            buffer = BytesIO(frame_data)
+            
+            self.minio.put_object(
+                self.bucket,
+                minio_path,
+                buffer,
+                length=len(frame_data),
+                content_type='image/jpeg'
+            )
+            logger.info(f"✅ Uploaded keyframe to MinIO: {minio_path}")
+            return minio_path
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to upload keyframe to MinIO: {e}")
+            return None
+    
+    def save_keyframes_batch(self, video_id: str, keyframes: List) -> List[Dict]:
+        """Save multiple keyframes directly to MinIO and locally, return their storage info"""
+        keyframe_info = []
+
+        try:
+            # Create local storage directory
+            local_dir = os.path.join("video_processing_outputs", "keyframes", video_id)
+            os.makedirs(local_dir, exist_ok=True)
+
+            for keyframe in keyframes:
+                # Handle KeyframeResult objects
+                frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+
+                frame = frame_data.get('frame')  # numpy array
+                frame_number = frame_data.get('frame_number', 0)
+                timestamp = frame_data.get('timestamp', 0.0)
+
+                if frame is not None:
+                    # Convert numpy array to jpg bytes
+                    is_success, buffer = cv2.imencode('.jpg', frame)
+                    if not is_success:
+                        continue
+
+                    frame_bytes = buffer.tobytes()
+
+                    # Save locally
+                    local_filename = f"frame_{frame_number:06d}.jpg"
+                    local_path = os.path.join(local_dir, local_filename)
+                    with open(local_path, 'wb') as f:
+                        f.write(frame_bytes)
+                    logger.info(f"✅ Keyframe saved locally: {local_path}")
+
+                    # Upload bytes directly to MinIO
+                    minio_path = self.save_keyframe_to_minio(
+                        video_id, frame_bytes, frame_number, timestamp
+                    )
+
+                    if minio_path:
+                        info = {
+                            'frame_number': frame_number,
+                            'timestamp': timestamp,
+                            'minio_path': minio_path,
+                            'local_path': local_path,
+                            'quality_score': frame_data.get('quality_score', 0.0),
+                            'enhancement_applied': frame_data.get('enhancement_applied', False)
+                        }
+                        keyframe_info.append(info)
+
+            logger.info(f"✅ Uploaded {len(keyframe_info)} keyframes to MinIO and saved locally for video {video_id}")
+            return keyframe_info
+
+        except Exception as e:
+            logger.error(f"❌ Failed to upload keyframes batch: {e}")
+            return keyframe_info  # Return whatever was successful
+
+    def get_keyframe_presigned_url(self, minio_path: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for keyframe access"""
+        if self.minio is None:
+            return None
+        try:
+            return self.minio.presigned_get_object(self.bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL for keyframe: {e}")
+            return None
+
+    def get_video_keyframes_presigned_urls(self, video_id: str, expires: timedelta = timedelta(hours=1)) -> List[Dict]:
+        """Get presigned URLs for all keyframes of a video"""
+        if self.minio is None:
+            return self._get_keyframes_from_local(video_id) if hasattr(self, '_get_keyframes_from_local') else []
+        try:
+            # Try both storage patterns:
+            #   1) {video_id}/keyframes/frame_*.jpg  (legacy / some pipelines)
+            #   2) {video_id}/frame_*.jpg            (save_keyframe_to_minio pattern)
+            logger.info(f"🔍 Looking for keyframes in bucket '{self.bucket}' for video '{video_id}'")
+            objects = list(self.minio.list_objects(self.bucket, prefix=f"{video_id}/keyframes/", recursive=True))
+            if not objects:
+                # Fallback: flat storage path used by save_keyframe_to_minio
+                objects = list(self.minio.list_objects(self.bucket, prefix=f"{video_id}/", recursive=True))
+            logger.info(f"📦 Found {len(objects)} objects in MinIO for keyframes")
+
+            keyframes_urls = []
+            for obj in objects:
+                if obj.object_name.endswith('.jpg'):
+                    # Extract frame number and timestamp from filename
+                    filename = obj.object_name.split('/')[-1]  # e.g., "frame_000001.jpg"
+                    frame_number = 0
+                    timestamp = 0.0
+
+                    try:
+                        # Parse frame number from filename like "frame_000001.jpg"
+                        if 'frame_' in filename:
+                            frame_str = filename.split('_')[1].split('.')[0]
+                            frame_number = int(frame_str)
+                            # Estimate timestamp from frame number (assuming 30 fps)
+                            timestamp = frame_number / 30.0
+                    except (ValueError, IndexError):
+                        pass
+                    
+                    # Try to get metadata from MinIO object
+                    try:
+                        obj_stat = self.minio.stat_object(self.bucket, obj.object_name)
+                        if obj_stat.metadata:
+                            # Extract timestamp from metadata if available
+                            if 'timestamp' in obj_stat.metadata:
+                                try:
+                                    timestamp = float(obj_stat.metadata['timestamp'])
+                                except:
+                                    pass
+                            if 'frame_number' in obj_stat.metadata:
+                                try:
+                                    frame_number = int(obj_stat.metadata['frame_number'])
+                                except:
+                                    pass
+                    except:
+                        pass
+
+                    # Generate presigned URL and API URL
+                    presigned_url = self.get_keyframe_presigned_url(obj.object_name, expires=expires)
+                    # Also provide API endpoint URL for direct serving
+                    api_url = f"/api/minio/image/{self.bucket}/{obj.object_name}"
+
+                    if presigned_url:
+                        keyframes_urls.append({
+                            'frame_number': frame_number,
+                            'timestamp': timestamp,
+                            'minio_path': obj.object_name,
+                            'presigned_url': presigned_url,
+                            'url': api_url,  # Use API endpoint for better reliability
+                            'api_url': api_url,
+                            'filename': filename
+                        })
+
+            # Sort by frame number
+            keyframes_urls.sort(key=lambda x: x['frame_number'])
+
+            logger.info(f"✅ Generated {len(keyframes_urls)} presigned URLs for video {video_id} keyframes")
+            return keyframes_urls
+
+        except Exception as e:
+            logger.error(f"❌ Failed to get keyframes presigned URLs for video {video_id}: {e}")
+            return []
+    
+    def create_keyframe(self, keyframe_doc: Dict[str, Any]) -> Optional[str]:
+        """
+        Save keyframe metadata to MongoDB
+        
+        Args:
+            keyframe_doc: Dictionary containing keyframe metadata:
+                - camera_id: Camera identifier (for live streams)
+                - video_id: Video identifier (for uploaded videos, optional)
+                - timestamp: Frame timestamp in seconds
+                - timestamp_ms: Frame timestamp in milliseconds
+                - frame_index: Frame number/index
+                - minio_path: Path to keyframe in MinIO
+                - objects_detected: List of detected objects
+                - behaviors_detected: List of detected behaviors
+                - motion_detected: Whether motion was detected
+                - motion_score: Motion detection score
+                - created_at: Creation timestamp
+        
+        Returns:
+            MongoDB document ID or None
+        """
+        try:
+            # Ensure required fields
+            if 'created_at' not in keyframe_doc:
+                keyframe_doc['created_at'] = datetime.utcnow()
+            
+            # Convert numpy types if present
+            try:
+                from database.models import convert_numpy_types, prepare_for_mongodb
+                keyframe_doc = convert_numpy_types(keyframe_doc)
+                keyframe_doc = prepare_for_mongodb(keyframe_doc)
+            except ImportError:
+                # Fallback if models not available
+                pass
+            
+            # Insert into MongoDB
+            result = self.collection.insert_one(keyframe_doc)
+            logger.info(f"✅ Saved keyframe metadata to MongoDB: {keyframe_doc.get('minio_path', 'unknown')}")
+            return str(result.inserted_id)
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save keyframe metadata to MongoDB: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return None
diff --git a/database/models.py b/database/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a37735a2b263a71054ae9155c97e2edf5c70ecc
--- /dev/null
+++ b/database/models.py
@@ -0,0 +1,432 @@
+"""
+Data Models for DetectifAI Database Integration
+
+This module defines data models that map EXACTLY to the MongoDB collections
+defined in DetectifAI_db/database_setup.py schema.
+
+CRITICAL RULES:
+1. Only use fields defined in the MongoDB schema validators
+2. Extra fields must go in meta_data for video_file or use related collections
+3. Always convert numpy types before MongoDB operations
+4. Timestamps in events must be milliseconds (int/long), not seconds (float)
+"""
+
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from bson import ObjectId
+from dataclasses import dataclass, asdict
+import json
+import numpy as np
+
+# ========================================
+# Schema-Compliant Data Models
+# ========================================
+
+@dataclass
+class VideoFileModel:
+    """Maps EXACTLY to video_file collection schema in MongoDB Atlas"""
+    # Required fields (from schema)
+    video_id: str
+    user_id: str
+    file_path: str  # MinIO path or local path
+    
+    # Optional fields (from schema)
+    minio_object_key: Optional[str] = None
+    minio_bucket: Optional[str] = None
+    codec: Optional[str] = None
+    fps: Optional[float] = 30.0  # bsonType: double - must be float
+    upload_date: Optional[datetime] = None
+    duration_secs: Optional[int] = None  # bsonType: int - must be INTEGER not float
+    file_size_bytes: Optional[int] = None  # bsonType: long
+    meta_data: Optional[Dict] = None  # Store ALL extra fields here (processing_status, resolution, etc.)
+    
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion with proper type conversion"""
+        data = asdict(self)
+        
+        # Set defaults
+        if data.get('upload_date') is None:
+            data['upload_date'] = datetime.utcnow()
+        if data.get('fps') is None:
+            data['fps'] = 30.0
+        
+        # Ensure duration is integer (MongoDB schema requires int)
+        if data.get('duration_secs') is not None:
+            data['duration_secs'] = int(data['duration_secs'])
+        
+        # Ensure file_size is integer (MongoDB schema requires long)
+        if data.get('file_size_bytes') is not None:
+            data['file_size_bytes'] = int(data['file_size_bytes'])
+        
+        # Ensure fps is float (MongoDB schema requires double)
+        if data.get('fps') is not None:
+            data['fps'] = float(data['fps'])
+        
+        return data
+
+@dataclass
+class EventModel:
+    """Maps EXACTLY to event collection schema in MongoDB Atlas"""
+    # Required fields (from schema)
+    event_id: str
+    video_id: str
+    start_timestamp_ms: int  # bsonType: long - MUST be milliseconds as INTEGER
+    end_timestamp_ms: int    # bsonType: long - MUST be milliseconds as INTEGER
+    
+    # Optional fields (from schema)
+    event_type: Optional[str] = None  # 'object_detection', 'motion', 'fire', 'weapon', etc.
+    confidence_score: Optional[float] = None  # bsonType: double (NOT 'confidence')
+    is_verified: bool = False
+    is_false_positive: bool = False
+    verified_at: Optional[datetime] = None
+    verified_by: Optional[str] = None
+    visual_embedding: Optional[List[float]] = None  # For future FAISS integration
+    bounding_boxes: Optional[Dict] = None  # Store detection bboxes here as object
+    
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion with proper type conversion"""
+        data = asdict(self)
+        
+        # Ensure timestamps are integers (milliseconds) - CRITICAL for MongoDB long type
+        data['start_timestamp_ms'] = int(data['start_timestamp_ms'])
+        data['end_timestamp_ms'] = int(data['end_timestamp_ms'])
+        
+        # Ensure confidence_score is float
+        if data.get('confidence_score') is not None:
+            data['confidence_score'] = float(data['confidence_score'])
+        
+        # Set default empty arrays/objects for schema compliance
+        if data.get('visual_embedding') is None:
+            data['visual_embedding'] = []
+        if data.get('bounding_boxes') is None:
+            data['bounding_boxes'] = {}
+        
+        return data
+
+@dataclass
+class EventDescriptionModel:
+    """Maps EXACTLY to event_description collection schema"""
+    # Required fields
+    description_id: str
+    event_id: str
+    text_embedding: List[float]  # Required (empty array if not generated yet)
+    
+    # Optional fields
+    caption: Optional[str] = None
+    confidence: Optional[float] = None
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('updated_at') is None:
+            data['updated_at'] = datetime.utcnow()
+        # Ensure text_embedding is always a list
+        if data.get('text_embedding') is None:
+            data['text_embedding'] = []
+        return data
+
+@dataclass
+class EventCaptionModel:
+    """Maps EXACTLY to event_caption collection schema"""
+    # Required fields
+    description_id: str
+    description: str
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        return asdict(self)
+
+@dataclass
+class EventClipModel:
+    """Maps EXACTLY to event_clip collection schema"""
+    # Required fields
+    clip_id: str
+    event_id: str
+    clip_path: str
+    
+    # Optional fields
+    thumbnail_path: Optional[str] = None
+    minio_object_key: Optional[str] = None
+    minio_bucket: Optional[str] = None
+    duration_ms: Optional[int] = None  # bsonType: long
+    extracted_at: Optional[datetime] = None
+    file_size_bytes: Optional[int] = None  # bsonType: long
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('extracted_at') is None:
+            data['extracted_at'] = datetime.utcnow()
+        # Ensure integer types
+        if data.get('duration_ms') is not None:
+            data['duration_ms'] = int(data['duration_ms'])
+        if data.get('file_size_bytes') is not None:
+            data['file_size_bytes'] = int(data['file_size_bytes'])
+        return data
+
+@dataclass
+class DetectedFaceModel:
+    """Maps EXACTLY to detected_faces collection schema"""
+    # Required fields
+    face_id: str
+    event_id: str
+    detected_at: datetime
+    
+    # Optional fields
+    confidence_score: Optional[float] = None
+    face_embedding: Optional[List[float]] = None
+    minio_object_key: Optional[str] = None
+    minio_bucket: Optional[str] = None
+    face_image_path: Optional[str] = None
+    bounding_boxes: Optional[Dict] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('face_embedding') is None:
+            data['face_embedding'] = []
+        return data
+
+@dataclass
+class FaceMatchModel:
+    """Maps EXACTLY to face_matches collection schema"""
+    # Required fields
+    match_id: str
+    face_id_1: str
+    face_id_2: str
+    similarity_score: float
+    
+    # Optional fields
+    matched_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('matched_at') is None:
+            data['matched_at'] = datetime.utcnow()
+        return data
+
+# ========================================
+# Helper Functions for Type Safety
+# ========================================
+
+def convert_numpy_types(obj):
+    """
+    Recursively convert numpy types to native Python types for MongoDB compatibility.
+    
+    MongoDB cannot serialize numpy types directly, causing BSON errors.
+    This function ensures all numpy integers become int, numpy floats become float, etc.
+    """
+    if isinstance(obj, dict):
+        return {key: convert_numpy_types(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_numpy_types(item) for item in obj]
+    elif isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, np.bool_):
+        return bool(obj)
+    else:
+        return obj
+
+def seconds_to_milliseconds(seconds: float) -> int:
+    """Convert seconds (float) to milliseconds (int) for MongoDB long type"""
+    return int(seconds * 1000)
+
+def milliseconds_to_seconds(milliseconds: int) -> float:
+    """Convert milliseconds (int) to seconds (float) for display"""
+    return float(milliseconds) / 1000.0
+
+def prepare_for_mongodb(data: Dict) -> Dict:
+    """
+    Prepare data dictionary for MongoDB insertion.
+    - Remove None ObjectId fields
+    - Convert numpy types to Python natives
+    """
+    # First convert numpy types
+    data = convert_numpy_types(data)
+    
+    # Remove None ObjectId fields
+    cleaned_data = {}
+    for key, value in data.items():
+        if key == '_id' and value is None:
+            continue
+        cleaned_data[key] = value
+    return cleaned_data
+
+def convert_objectid_to_string(doc: Dict) -> Dict:
+    """Convert ObjectId fields to strings for JSON serialization"""
+    if isinstance(doc, dict):
+        for key, value in doc.items():
+            if isinstance(value, ObjectId):
+                doc[key] = str(value)
+            elif isinstance(value, list):
+                doc[key] = [
+                    convert_objectid_to_string(item) if isinstance(item, dict) 
+                    else str(item) if isinstance(item, ObjectId) 
+                    else item 
+                    for item in value
+                ]
+            elif isinstance(value, dict):
+                doc[key] = convert_objectid_to_string(value)
+    return doc
+
+
+# ========================================
+# Subscription & Payment Models
+# ========================================
+
+@dataclass
+class SubscriptionPlanModel:
+    """Maps to subscription_plans collection with Stripe integration"""
+    # Required fields
+    plan_id: str
+    plan_name: str
+    price: float
+    
+    # Optional fields
+    description: Optional[str] = None
+    features: Optional[str] = None  # Comma-separated feature list
+    storage_limit: Optional[int] = None
+    is_active: bool = True
+    stripe_product_id: Optional[str] = None
+    stripe_price_ids: Optional[Dict[str, str]] = None  # {"monthly": "price_xxx", "yearly": "price_xxx"}
+    billing_periods: Optional[List[str]] = None  # ["monthly", "yearly"]
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('updated_at') is None:
+            data['updated_at'] = datetime.utcnow()
+        if data.get('stripe_price_ids') is None:
+            data['stripe_price_ids'] = {}
+        if data.get('billing_periods') is None:
+            data['billing_periods'] = []
+        return data
+
+
+@dataclass
+class UserSubscriptionModel:
+    """Maps to user_subscriptions collection with Stripe integration"""
+    # Required fields
+    subscription_id: str
+    user_id: str
+    plan_id: str
+    
+    # Optional fields
+    start_date: Optional[datetime] = None
+    end_date: Optional[datetime] = None
+    stripe_customer_id: Optional[str] = None
+    stripe_subscription_id: Optional[str] = None
+    billing_period: Optional[str] = None  # "monthly" or "yearly"
+    status: Optional[str] = "active"  # 'active', 'canceled', 'past_due', 'trialing'
+    current_period_start: Optional[datetime] = None
+    current_period_end: Optional[datetime] = None
+    cancel_at_period_end: bool = False
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('start_date') is None:
+            data['start_date'] = datetime.utcnow()
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('updated_at') is None:
+            data['updated_at'] = datetime.utcnow()
+        return data
+
+
+@dataclass
+class SubscriptionEventModel:
+    """Maps to subscription_events collection for audit trail"""
+    # Required fields
+    event_id: str
+    subscription_id: str
+    event_type: str  # 'created', 'updated', 'canceled', 'payment_succeeded', etc.
+    
+    # Optional fields
+    stripe_event_id: Optional[str] = None
+    event_data: Optional[Dict] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('event_data') is None:
+            data['event_data'] = {}
+        return data
+
+
+@dataclass
+class PaymentHistoryModel:
+    """Maps to payment_history collection for transaction records"""
+    # Required fields
+    payment_id: str
+    user_id: str
+    amount: float
+    
+    # Optional fields
+    stripe_payment_intent_id: Optional[str] = None
+    currency: str = "USD"
+    status: Optional[str] = None  # 'succeeded', 'pending', 'failed'
+    payment_method: Optional[str] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        # Ensure amount is float
+        data['amount'] = float(data['amount'])
+        return data
+
+
+@dataclass
+class SubscriptionUsageModel:
+    """Maps to subscription_usage collection for analytics and limits"""
+    # Required fields
+    usage_id: str
+    user_id: str
+    usage_type: str  # 'video_processed', 'storage_used', 'searches_performed'
+    
+    # Optional fields
+    usage_value: Optional[float] = None
+    usage_date: Optional[datetime] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('usage_date') is None:
+            data['usage_date'] = datetime.utcnow()
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('usage_value') is not None:
+            data['usage_value'] = float(data['usage_value'])
+        return data
+
diff --git a/database/models_backup.py b/database/models_backup.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a45f51abfa627bf1a67e9b33b40a692d73ca44c
--- /dev/null
+++ b/database/models_backup.py
@@ -0,0 +1,330 @@
+"""
+Data Models for DetectifAI Database Integration
+
+This module defines data models that map EXACTLY to the MongoDB collections
+defined in DetectifAI_db/database_setup.py schema.
+
+CRITICAL: Only use fields defined in the MongoDB schema validators.
+Extra fields must go in meta_data for video_file or use related collections.
+"""
+
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from bson import ObjectId
+from dataclasses import dataclass, asdict
+import json
+import numpy as np
+
+@dataclass
+class VideoFileModel:
+    """Maps EXACTLY to video_file collection schema in MongoDB Atlas"""
+    # Required fields (from schema)
+    video_id: str
+    user_id: str
+    file_path: str  # MinIO path or local path
+    
+    # Optional fields (from schema)
+    minio_object_key: Optional[str] = None
+    minio_bucket: Optional[str] = None
+    codec: Optional[str] = None
+    fps: Optional[float] = 30.0  # bsonType: double - must be float
+    upload_date: Optional[datetime] = None
+    duration_secs: Optional[int] = None  # bsonType: int - must be INTEGER not float
+    file_size_bytes: Optional[int] = None  # bsonType: long
+    meta_data: Optional[Dict] = None  # Store ALL extra fields here (processing_status, resolution, etc.)
+    
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion with proper type conversion"""
+        data = asdict(self)
+        
+        # Set defaults
+        if data.get('upload_date') is None:
+            data['upload_date'] = datetime.utcnow()
+        if data.get('fps') is None:
+            data['fps'] = 30.0
+        
+        # Ensure duration is integer (MongoDB schema requires int)
+        if data.get('duration_secs') is not None:
+            data['duration_secs'] = int(data['duration_secs'])
+        
+        # Ensure file_size is integer (MongoDB schema requires long)
+        if data.get('file_size_bytes') is not None:
+            data['file_size_bytes'] = int(data['file_size_bytes'])
+        
+        # Ensure fps is float (MongoDB schema requires double)
+        if data.get('fps') is not None:
+            data['fps'] = float(data['fps'])
+        
+        return data
+
+@dataclass 
+class DetectedFaceModel:
+    """Maps to existing detected_faces collection"""
+    video_id: str
+    frame_timestamp: float
+    face_bbox: List[float]  # [x1, y1, x2, y2]
+    confidence: float
+    face_encoding: Optional[List[float]] = None
+    keyframe_minio_path: Optional[str] = None
+    keyframe_id: Optional[ObjectId] = None
+    person_id: Optional[str] = None
+    is_suspicious: bool = False
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        return asdict(self)
+
+@dataclass
+class EventModel:
+    """Maps EXACTLY to event collection schema in MongoDB Atlas"""
+    # Required fields (from schema)
+    event_id: str
+    video_id: str
+    start_timestamp_ms: int  # bsonType: long - MUST be milliseconds as INTEGER
+    end_timestamp_ms: int    # bsonType: long - MUST be milliseconds as INTEGER
+    
+    # Optional fields (from schema)
+    event_type: Optional[str] = None  # 'object_detection', 'motion', 'fire', 'weapon', etc.
+    confidence_score: Optional[float] = None  # bsonType: double (NOT 'confidence')
+    is_verified: bool = False
+    is_false_positive: bool = False
+    verified_at: Optional[datetime] = None
+    verified_by: Optional[str] = None
+    visual_embedding: Optional[List[float]] = None  # For future FAISS integration
+    bounding_boxes: Optional[Dict] = None  # Store detection bboxes here as object
+    
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion with proper type conversion"""
+        data = asdict(self)
+        
+        # Ensure timestamps are integers (milliseconds) - CRITICAL for MongoDB long type
+        data['start_timestamp_ms'] = int(data['start_timestamp_ms'])
+        data['end_timestamp_ms'] = int(data['end_timestamp_ms'])
+        
+        # Ensure confidence_score is float
+        if data.get('confidence_score') is not None:
+            data['confidence_score'] = float(data['confidence_score'])
+        
+        # Set default empty arrays/objects for schema compliance
+        if data.get('visual_embedding') is None:
+            data['visual_embedding'] = []
+        if data.get('bounding_boxes') is None:
+            data['bounding_boxes'] = {}
+        
+        return data
+
+@dataclass
+class EventCaptionModel:
+    """Maps to existing event_caption collection"""
+    event_id: ObjectId
+    video_id: str
+    caption_text: str
+    generated_by: str = "system"  # system, user, ai
+    confidence: Optional[float] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+
+@dataclass
+class EventClipModel:
+    """Maps to existing event_clip collection"""
+    event_id: ObjectId
+    video_id: str
+    clip_start_timestamp: float
+    clip_end_timestamp: float
+    minio_clip_path: str
+    clip_duration: float
+    frame_count: int
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+
+@dataclass
+class EventDescriptionModel:
+    """Maps to existing event_description collection"""
+    event_id: ObjectId
+    video_id: str
+    description_text: str
+    description_type: str = "automatic"  # automatic, manual, ai_generated
+    tags: Optional[List[str]] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+
+@dataclass
+class FaceMatchModel:
+    """Maps to existing face_matches collection"""
+    video_id: str
+    face_1_id: ObjectId
+    face_2_id: ObjectId
+    similarity_score: float
+    match_confidence: float
+    is_match: bool
+    person_id: Optional[str] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+
+# New models for video processing pipeline
+
+@dataclass
+class KeyframeModel:
+    """New collection for extracted keyframes"""
+    video_id: str
+    frame_number: int
+    timestamp: float
+    quality_score: float
+    motion_score: float
+    minio_path: str
+    enhancement_applied: bool = False
+    face_count: int = 0
+    object_detections: Optional[List[Dict]] = None
+    processing_metadata: Optional[Dict] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('object_detections') is None:
+            data['object_detections'] = []
+        return data
+
+@dataclass
+class VideoSegmentModel:
+    """New collection for video segments"""
+    video_id: str
+    segment_id: int
+    start_timestamp: float
+    end_timestamp: float
+    duration: float
+    start_frame: int
+    end_frame: int
+    keyframe_ids: List[ObjectId]
+    activity_level: str  # low, medium, high
+    motion_statistics: Optional[Dict] = None
+    segment_minio_path: Optional[str] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+
+@dataclass
+class ProcessingJobModel:
+    """New collection for tracking processing jobs"""
+    video_id: str
+    job_type: str = "complete_processing"  # complete_processing, keyframe_extraction, object_detection
+    status: str = "queued"  # queued, processing, completed, failed
+    progress: int = 0  # 0-100
+    message: str = ""
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    processing_stats: Optional[Dict] = None
+    error_details: Optional[Dict] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+
+@dataclass
+class ObjectDetectionModel:
+    """Detailed object detection results"""
+    video_id: str
+    keyframe_id: ObjectId
+    detection_id: str
+    class_name: str  # fire, smoke, knife, gun
+    confidence: float
+    bbox: List[float]  # [x1, y1, x2, y2]
+    center_point: List[float]  # [x, y]
+    area: float
+    frame_timestamp: float
+    detection_model: str  # 'fire' for fire_YOLO11.pt, 'weapon' for weapon_YOLO11.pt
+    threat_level: str = "low"
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+
+class ModelFactory:
+    """Factory class for creating model instances from database documents"""
+    
+    @staticmethod
+    def create_video_file(doc: Dict) -> VideoFileModel:
+        """Create VideoFileModel from MongoDB document"""
+        return VideoFileModel(**doc)
+    
+    @staticmethod
+    def create_keyframe(doc: Dict) -> KeyframeModel:
+        """Create KeyframeModel from MongoDB document"""
+        return KeyframeModel(**doc)
+    
+    @staticmethod
+    def create_event(doc: Dict) -> EventModel:
+        """Create EventModel from MongoDB document"""
+        return EventModel(**doc)
+    
+    @staticmethod
+    def create_processing_job(doc: Dict) -> ProcessingJobModel:
+        """Create ProcessingJobModel from MongoDB document"""
+        return ProcessingJobModel(**doc)
+
+# Helper functions for database operations
+
+def prepare_for_mongodb(data: Dict) -> Dict:
+    """Prepare data dictionary for MongoDB insertion"""
+    # Remove None ObjectId fields
+    cleaned_data = {}
+    for key, value in data.items():
+        if key == '_id' and value is None:
+            continue
+        cleaned_data[key] = value
+    return cleaned_data
+
+def convert_objectid_to_string(doc: Dict) -> Dict:
+    """Convert ObjectId fields to strings for JSON serialization"""
+    if isinstance(doc, dict):
+        for key, value in doc.items():
+            if isinstance(value, ObjectId):
+                doc[key] = str(value)
+            elif isinstance(value, list):
+                doc[key] = [convert_objectid_to_string(item) if isinstance(item, dict) else str(item) if isinstance(item, ObjectId) else item for item in value]
+            elif isinstance(value, dict):
+                doc[key] = convert_objectid_to_string(value)
+    return doc
\ No newline at end of file
diff --git a/database/repositories.py b/database/repositories.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b5f3b8e292aef0fca435f51786fb1c9ce45ac56
--- /dev/null
+++ b/database/repositories.py
@@ -0,0 +1,516 @@
+"""
+Repository Classes for DetectifAI Database Operations
+
+This module provides data access layer for MongoDB and MinIO operations.
+Each repository handles CRUD operations for specific collections.
+"""
+
+import os
+import io
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+from bson import ObjectId
+from pymongo.collection import Collection
+from minio import Minio
+from minio.error import S3Error
+import logging
+
+from .models import (
+    VideoFileModel, EventModel, EventDescriptionModel, DetectedFaceModel,
+    prepare_for_mongodb, convert_objectid_to_string, convert_numpy_types,
+    seconds_to_milliseconds
+)
+
+logger = logging.getLogger(__name__)
+
+class BaseRepository:
+    """Base repository class with common functionality"""
+    
+    def __init__(self, db_manager):
+        self.db = db_manager.db
+        self._db_manager = db_manager
+        self.video_bucket = db_manager.config.minio_video_bucket
+        self.keyframe_bucket = db_manager.config.minio_keyframe_bucket
+
+    @property
+    def minio(self):
+        """Lazy access to S3 storage — tolerates unavailable storage"""
+        return self._db_manager.minio_client
+
+class VideoRepository(BaseRepository):
+    """Repository for video_file collection operations"""
+    
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.video_file
+    
+    def create_video_record(self, video_data: Dict) -> str:
+        """Create new video record matching MongoDB schema exactly"""
+        try:
+            # Extract required fields
+            video_id = video_data.get('video_id')
+            user_id = video_data.get('user_id', 'system')
+            file_path = video_data.get('file_path', f"videos/{video_id}.mp4")
+            
+            # Build schema-compliant record
+            record = {
+                "video_id": video_id,
+                "user_id": user_id,
+                "file_path": file_path,
+                "upload_date": datetime.utcnow()
+            }
+            
+            # Add optional schema fields
+            if 'fps' in video_data:
+                record['fps'] = float(video_data['fps'])  # Ensure double type
+            else:
+                record['fps'] = 30.0  # Default
+            
+            if 'duration' in video_data or 'duration_secs' in video_data:
+                duration = video_data.get('duration_secs') or video_data.get('duration', 0)
+                record['duration_secs'] = int(duration)  # Ensure integer
+            
+            if 'file_size' in video_data or 'file_size_bytes' in video_data:
+                file_size = video_data.get('file_size_bytes') or video_data.get('file_size', 0)
+                record['file_size_bytes'] = int(file_size)  # Ensure long
+            
+            if 'codec' in video_data:
+                record['codec'] = str(video_data['codec'])
+            
+            if 'minio_object_key' in video_data:
+                record['minio_object_key'] = video_data['minio_object_key']
+            
+            if 'minio_bucket' in video_data:
+                record['minio_bucket'] = video_data['minio_bucket']
+            
+            # Build meta_data object for extra fields
+            meta_data = {}
+            extra_fields = [
+                'processing_status', 'resolution', 'filename', 'keyframe_count',
+                'event_count', 'compression_applied', 'enhancement_applied',
+                'error_message', 'processing_config'
+            ]
+            
+            for field in extra_fields:
+                if field in video_data:
+                    meta_data[field] = video_data[field]
+            
+            if meta_data:
+                record['meta_data'] = meta_data
+            
+            # Convert numpy types and prepare for MongoDB
+            record = prepare_for_mongodb(record)
+            
+            result = self.collection.insert_one(record)
+            logger.info(f"✅ Created video record: {video_id}")
+            return str(result.inserted_id)
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to create video record: {e}")
+            raise
+    
+    def get_video_by_id(self, video_id: str) -> Optional[Dict]:
+        """Get video record by video_id"""
+        try:
+            doc = self.collection.find_one({"video_id": video_id})
+            if doc:
+                return convert_objectid_to_string(doc)
+            return None
+        except Exception as e:
+            logger.error(f"❌ Failed to get video {video_id}: {e}")
+            return None
+    
+    def update_processing_status(self, video_id: str, status: str, metadata: Dict = None):
+        """Update video processing status in meta_data field"""
+        try:
+            # Get current meta_data
+            video = self.collection.find_one({"video_id": video_id})
+            if not video:
+                logger.warning(f"⚠️ Video not found for status update: {video_id}")
+                return
+            
+            current_meta = video.get('meta_data', {})
+            current_meta['processing_status'] = status
+            current_meta['last_updated'] = datetime.utcnow().isoformat()
+            
+            # Add any additional metadata
+            if metadata:
+                current_meta.update(metadata)
+            
+            result = self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": {"meta_data": current_meta}}
+            )
+            
+            if result.matched_count > 0:
+                logger.info(f"✅ Updated video status: {video_id} -> {status}")
+            else:
+                logger.warning(f"⚠️ Video not found for status update: {video_id}")
+                
+        except Exception as e:
+            logger.error(f"❌ Failed to update video status: {e}")
+            raise
+    
+    def update_metadata(self, video_id: str, metadata: Dict):
+        """Update video meta_data field with processing information"""
+        try:
+            # Get current meta_data
+            video = self.collection.find_one({"video_id": video_id})
+            if not video:
+                logger.warning(f"⚠️ Video not found: {video_id}")
+                return
+            
+            current_meta = video.get('meta_data', {})
+            current_meta.update(metadata)
+            
+            result = self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": {"meta_data": current_meta}}
+            )
+            
+            logger.info(f"✅ Updated video metadata: {video_id}")
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to update video metadata: {e}")
+            raise
+    
+    def upload_video_to_minio(self, local_path: str, video_id: str) -> str:
+        """Upload video file to S3 storage"""
+        if self.minio is None:
+            logger.warning("S3 storage unavailable — skipping video upload to object storage")
+            return f"local://{local_path}"
+        try:
+            minio_path = f"original/{video_id}/video.mp4"
+            
+            with open(local_path, 'rb') as file_data:
+                file_info = os.stat(local_path)
+                self.minio.put_object(
+                    self.video_bucket,
+                    minio_path,
+                    file_data,
+                    length=file_info.st_size,
+                    content_type='video/mp4'
+                )
+            
+            logger.info(f"✅ Uploaded video to S3: {minio_path}")
+            return minio_path
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to upload video to S3: {e}")
+            raise
+    
+    def get_video_presigned_url(self, minio_path: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for video access"""
+        if self.minio is None:
+            return None
+        try:
+            return self.minio.presigned_get_object(self.video_bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL: {e}")
+            return None
+
+    def get_compressed_video_presigned_url(self, video_id: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for compressed video access"""
+        if self.minio is None:
+            return None
+        try:
+            minio_path = f"compressed/{video_id}/video.mp4"
+            return self.minio.presigned_get_object(self.video_bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL for compressed video: {e}")
+            return None
+
+
+# ========================================
+# Event Repository (Schema-Compliant)
+# ========================================
+
+class EventRepository(BaseRepository):
+    """Repository for event collection operations - Schema Compliant"""
+    
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.event
+        self.event_description_collection = self.db.event_description
+    
+    def create_event(self, event_data: Dict) -> str:
+        """Create event - alias for save_event for compatibility"""
+        return self.save_event(event_data)
+    
+    def save_event(self, event_data: Dict) -> str:
+        """Save event matching MongoDB schema exactly"""
+        try:
+            import uuid
+            
+            # Extract required fields
+            event_id = event_data.get('event_id', str(uuid.uuid4()))
+            video_id = event_data.get('video_id', event_data.get('camera_id', 'unknown'))
+            
+            # Convert timestamps: seconds (float) -> milliseconds (int)
+            start_time = event_data.get('start_timestamp', 0.0)
+            end_time = event_data.get('end_timestamp', 0.0)
+            start_timestamp_ms = seconds_to_milliseconds(start_time)
+            end_timestamp_ms = seconds_to_milliseconds(end_time)
+            
+            # Build schema-compliant event document
+            event_doc = {
+                "event_id": event_id,
+                "video_id": video_id,
+                "start_timestamp_ms": int(start_timestamp_ms),
+                "end_timestamp_ms": int(end_timestamp_ms),
+                "event_type": event_data.get('event_type', 'motion'),
+                "confidence_score": float(event_data.get('confidence', 0.0)),
+                "is_verified": False,
+                "is_false_positive": False,
+                "verified_at": None,
+                "verified_by": None,
+                "visual_embedding": [],
+                "bounding_boxes": event_data.get('bounding_boxes', {})
+            }
+            
+            # Convert numpy types
+            event_doc = convert_numpy_types(event_doc)
+            event_doc = prepare_for_mongodb(event_doc)
+            
+            result = self.collection.insert_one(event_doc)
+            logger.info(f"✅ Saved event: {event_id} ({event_data.get('event_type')})")
+            
+            # If there's additional description info, save to event_description
+            if event_data.get('description') or event_data.get('caption'):
+                self._save_event_description(event_id, event_data)
+            
+            return str(result.inserted_id)
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save event: {e}")
+            raise
+    
+    def save_detection_events(self, video_id: str, detection_groups: List[Dict]) -> List[str]:
+        """Save object detection events with proper schema compliance"""
+        event_ids = []
+        
+        try:
+            for group in detection_groups:
+                # Build bounding_boxes object
+                bboxes = {
+                    "detections": [
+                        {
+                            "class": det.get('class_name', ''),
+                            "confidence": float(det.get('confidence', 0.0)),
+                            "bbox": [float(x) for x in det.get('bbox', [0, 0, 0, 0])],
+                            "timestamp": float(det.get('frame_timestamp', 0.0)),
+                            "model": det.get('detection_model', '')
+                        }
+                        for det in group.get('detections', [])
+                    ]
+                }
+                
+                event_data = {
+                    "video_id": video_id,
+                    "start_timestamp": group.get('start_timestamp', 0.0),
+                    "end_timestamp": group.get('end_timestamp', 0.0),
+                    "event_type": f"object_detection_{group.get('class', 'unknown')}",
+                    "confidence": group.get('max_confidence', 0.0),
+                    "bounding_boxes": bboxes,
+                    "description": f"Detected {len(group.get('detections', []))} {group.get('class', 'object')}(s)"
+                }
+                
+                event_id = self.save_event(event_data)
+                event_ids.append(event_id)
+            
+            logger.info(f"✅ Saved {len(event_ids)} detection events for video {video_id}")
+            return event_ids
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save detection events: {e}")
+            raise
+    
+    def _save_event_description(self, event_id: str, event_data: Dict):
+        """Save detailed event description to event_description collection.
+        
+        Generates real text embeddings using SentenceTransformer (all-mpnet-base-v2)
+        for compatibility with NLP search in query_retreival.py.
+        """
+        try:
+            import uuid
+            
+            description_text = event_data.get('description') or event_data.get('caption', '')
+            
+            if not description_text:
+                return
+            
+            # Generate real text embedding for NLP search
+            text_embedding = self._generate_text_embedding(description_text)
+            
+            description_doc = {
+                "description_id": str(uuid.uuid4()),
+                "event_id": event_id,
+                "caption": description_text,
+                "text_embedding": text_embedding,
+                "confidence": float(event_data.get('confidence', 0.0)),
+                "created_at": datetime.utcnow(),
+                "updated_at": datetime.utcnow()
+            }
+            
+            description_doc = prepare_for_mongodb(description_doc)
+            self.event_description_collection.insert_one(description_doc)
+            logger.info(f"✅ Saved event description for {event_id} (embedding: {len(text_embedding)}-dim)")
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save event description: {e}")
+    
+    def _generate_text_embedding(self, text: str) -> list:
+        """Generate text embedding using SentenceTransformer.
+        
+        Lazy-loads the model on first call and caches it as a class attribute.
+        Uses all-mpnet-base-v2 (768-dim) for NLP search compatibility.
+        """
+        # Lazy-load and cache the model at class level
+        if not hasattr(EventRepository, '_embedding_model'):
+            EventRepository._embedding_model = None
+        
+        if EventRepository._embedding_model is None:
+            try:
+                from sentence_transformers import SentenceTransformer
+                EventRepository._embedding_model = SentenceTransformer('all-mpnet-base-v2')
+                logger.info("✅ Loaded SentenceTransformer (all-mpnet-base-v2) for event embeddings")
+            except Exception as e:
+                logger.error(f"Failed to load SentenceTransformer: {e}")
+                return []
+        
+        try:
+            import numpy as np
+            embedding = EventRepository._embedding_model.encode(text, normalize_embeddings=True)
+            return embedding.astype(np.float32).tolist()
+        except Exception as e:
+            logger.error(f"Failed to generate text embedding: {e}")
+            return []
+    
+    def get_events_by_video_id(self, video_id: str, event_type: str = None) -> List[Dict]:
+        """Get events for a video with optional type filtering"""
+        try:
+            query = {"video_id": video_id}
+            if event_type:
+                query["event_type"] = event_type
+            
+            events = list(self.collection.find(query).sort("start_timestamp_ms", 1))
+            
+            # Convert ObjectIds to strings
+            for event in events:
+                event = convert_objectid_to_string(event)
+            
+            return events
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to get events for video {video_id}: {e}")
+            return []
+    
+    def mark_as_false_positive(self, event_id: str):
+        """Mark event as false positive (for deduplication)"""
+        try:
+            self.collection.update_one(
+                {"event_id": event_id},
+                {"$set": {"is_false_positive": True}}
+            )
+            logger.info(f"✅ Marked event {event_id} as false positive")
+        except Exception as e:
+            logger.error(f"❌ Failed to mark event as false positive: {e}")
+
+
+# ========================================
+# Report Repository
+# ========================================
+
+class ReportRepository(BaseRepository):
+    """Repository for report storage and retrieval operations"""
+    
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.reports_bucket = db_manager.config.minio_reports_bucket
+    
+    def upload_report_to_minio(self, local_path: str, video_id: str, filename: str) -> str:
+        """
+        Upload report file to S3 storage
+        
+        Args:
+            local_path: Path to local report file
+            video_id: Video identifier
+            filename: Report filename (e.g., report_20260130_123456.html)
+            
+        Returns:
+            S3 object path
+        """
+        if self.minio is None:
+            logger.warning("S3 storage unavailable — skipping report upload to object storage")
+            return f"local://{local_path}"
+        try:
+            minio_path = f"reports/{video_id}/{filename}"
+            
+            # Determine content type based on file extension
+            content_type = 'text/html' if filename.endswith('.html') else 'application/pdf'
+            
+            with open(local_path, 'rb') as file_data:
+                file_info = os.stat(local_path)
+                self.minio.put_object(
+                    self.reports_bucket,
+                    minio_path,
+                    file_data,
+                    length=file_info.st_size,
+                    content_type=content_type
+                )
+            
+            logger.info(f"✅ Uploaded report to S3: {minio_path}")
+            return minio_path
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to upload report to S3: {e}")
+            raise
+    
+    def get_report_presigned_url(self, video_id: str, filename: str, expires: timedelta = timedelta(hours=24)) -> str:
+        """
+        Generate presigned URL for report access
+        """
+        if self.minio is None:
+            return None
+        try:
+            minio_path = f"reports/{video_id}/{filename}"
+            url = self.minio.presigned_get_object(self.reports_bucket, minio_path, expires=expires)
+            logger.info(f"✅ Generated presigned URL for report: {filename}")
+            return url
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL for report: {e}")
+            return None
+    
+    def list_reports_for_video(self, video_id: str) -> List[Dict[str, Any]]:
+        """
+        List all reports for a video
+        """
+        if self.minio is None:
+            return []
+        try:
+            prefix = f"reports/{video_id}/"
+            objects = self.minio.list_objects(self.reports_bucket, prefix=prefix, recursive=True)
+            
+            reports = []
+            for obj in objects:
+                reports.append({
+                    'filename': obj.object_name.split('/')[-1],
+                    'path': obj.object_name,
+                    'size': obj.size,
+                    'last_modified': obj.last_modified,
+                    'content_type': 'text/html' if obj.object_name.endswith('.html') else 'application/pdf'
+                })
+            
+            logger.info(f"✅ Found {len(reports)} reports for video {video_id}")
+            return reports
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to list reports for video {video_id}: {e}")
+            return []
+
+
+# Remove KeyframeRepository - collection doesn't exist in schema
+# Remove ProcessingJobRepository - collection doesn't exist in schema  
+# Remove ObjectDetectionRepository - collection doesn't exist in schema
+
+# Only VideoRepository, EventRepository, and ReportRepository are schema-compliant and remain above
\ No newline at end of file
diff --git a/database/repositories_old.py b/database/repositories_old.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad66778726474958f13baaabb800f268d5805ca9
--- /dev/null
+++ b/database/repositories_old.py
@@ -0,0 +1,653 @@
+"""
+Repository Classes for DetectifAI Database Operations
+
+This module provides data access layer for MongoDB and MinIO operations.
+Each repository handles CRUD operations for specific collections.
+"""
+
+import os
+import io
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+from bson import ObjectId
+from pymongo.collection import Collection
+from minio import Minio
+from minio.error import S3Error
+import logging
+
+from .models import (
+    VideoFileModel, EventModel, EventDescriptionModel, DetectedFaceModel,
+    prepare_for_mongodb, convert_objectid_to_string, convert_numpy_types,
+    seconds_to_milliseconds
+)
+
+logger = logging.getLogger(__name__)
+
+class BaseRepository:
+    """Base repository class with common functionality"""
+    
+    def __init__(self, db_manager):
+        self.db = db_manager.db
+        self.minio = db_manager.minio_client
+        self.video_bucket = db_manager.config.minio_video_bucket
+        self.keyframe_bucket = db_manager.config.minio_keyframe_bucket
+
+class VideoRepository(BaseRepository):
+    """Repository for video_file collection operations"""
+    
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.video_file
+    
+    def create_video_record(self, video_data: Dict) -> str:
+        """Create new video record matching MongoDB schema exactly"""
+        try:
+            # Extract required fields
+            video_id = video_data.get('video_id')
+            user_id = video_data.get('user_id', 'system')
+            file_path = video_data.get('file_path', f"videos/{video_id}.mp4")
+            
+            # Build schema-compliant record
+            record = {
+                "video_id": video_id,
+                "user_id": user_id,
+                "file_path": file_path,
+                "upload_date": datetime.utcnow()
+            }
+            
+            # Add optional schema fields
+            if 'fps' in video_data:
+                record['fps'] = float(video_data['fps'])  # Ensure double type
+            else:
+                record['fps'] = 30.0  # Default
+            
+            if 'duration' in video_data or 'duration_secs' in video_data:
+                duration = video_data.get('duration_secs') or video_data.get('duration', 0)
+                record['duration_secs'] = int(duration)  # Ensure integer
+            
+            if 'file_size' in video_data or 'file_size_bytes' in video_data:
+                file_size = video_data.get('file_size_bytes') or video_data.get('file_size', 0)
+                record['file_size_bytes'] = int(file_size)  # Ensure long
+            
+            if 'codec' in video_data:
+                record['codec'] = str(video_data['codec'])
+            
+            if 'minio_object_key' in video_data:
+                record['minio_object_key'] = video_data['minio_object_key']
+            
+            if 'minio_bucket' in video_data:
+                record['minio_bucket'] = video_data['minio_bucket']
+            
+            # Build meta_data object for extra fields
+            meta_data = {}
+            extra_fields = [
+                'processing_status', 'resolution', 'filename', 'keyframe_count',
+                'event_count', 'compression_applied', 'enhancement_applied',
+                'error_message', 'processing_config'
+            ]
+            
+            for field in extra_fields:
+                if field in video_data:
+                    meta_data[field] = video_data[field]
+            
+            if meta_data:
+                record['meta_data'] = meta_data
+            
+            # Convert numpy types and prepare for MongoDB
+            record = prepare_for_mongodb(record)
+            
+            result = self.collection.insert_one(record)
+            logger.info(f"✅ Created video record: {video_id}")
+            return str(result.inserted_id)
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to create video record: {e}")
+            raise
+    
+    def get_video_by_id(self, video_id: str) -> Optional[Dict]:
+        """Get video record by video_id"""
+        try:
+            doc = self.collection.find_one({"video_id": video_id})
+            if doc:
+                return convert_objectid_to_string(doc)
+            return None
+        except Exception as e:
+            logger.error(f"❌ Failed to get video {video_id}: {e}")
+            return None
+    
+    def update_processing_status(self, video_id: str, status: str, metadata: Dict = None):
+        """Update video processing status in meta_data field"""
+        try:
+            # Get current meta_data
+            video = self.collection.find_one({"video_id": video_id})
+            if not video:
+                logger.warning(f"⚠️ Video not found for status update: {video_id}")
+                return
+            
+            current_meta = video.get('meta_data', {})
+            current_meta['processing_status'] = status
+            current_meta['last_updated'] = datetime.utcnow().isoformat()
+            
+            # Add any additional metadata
+            if metadata:
+                current_meta.update(metadata)
+            
+            result = self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": {"meta_data": current_meta}}
+            )
+            
+            if result.matched_count > 0:
+                logger.info(f"✅ Updated video status: {video_id} -> {status}")
+            else:
+                logger.warning(f"⚠️ Video not found for status update: {video_id}")
+                
+        except Exception as e:
+            logger.error(f"❌ Failed to update video status: {e}")
+            raise
+    
+    def update_metadata(self, video_id: str, metadata: Dict):
+        """Update video meta_data field with processing information"""
+        try:
+            # Get current meta_data
+            video = self.collection.find_one({"video_id": video_id})
+            if not video:
+                logger.warning(f"⚠️ Video not found: {video_id}")
+                return
+            
+            current_meta = video.get('meta_data', {})
+            current_meta.update(metadata)
+            
+            result = self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": {"meta_data": current_meta}}
+            )
+            
+            logger.info(f"✅ Updated video metadata: {video_id}")
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to update video metadata: {e}")
+            raise
+    
+    def upload_video_to_minio(self, local_path: str, video_id: str) -> str:
+        """Upload video file to MinIO storage"""
+        try:
+            minio_path = f"original/{video_id}/video.mp4"
+            
+            with open(local_path, 'rb') as file_data:
+                file_info = os.stat(local_path)
+                self.minio.put_object(
+                    self.video_bucket,
+                    minio_path,
+                    file_data,
+                    length=file_info.st_size,
+                    content_type='video/mp4'
+                )
+            
+            logger.info(f"✅ Uploaded video to MinIO: {minio_path}")
+            return minio_path
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to upload video to MinIO: {e}")
+            raise
+    
+    def get_video_presigned_url(self, minio_path: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for video access"""
+        try:
+            return self.minio.presigned_get_object(self.video_bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL: {e}")
+            return None
+
+class KeyframeRepository(BaseRepository):
+    """Repository for keyframes collection operations"""
+    
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.keyframes
+    
+    def save_keyframes_batch(self, video_id: str, keyframes_data: List[Dict]) -> List[str]:
+        """Save multiple keyframes to MinIO and MongoDB"""
+        keyframe_ids = []
+        
+        try:
+            for i, kf_data in enumerate(keyframes_data):
+                # Extract frame data from keyframe result
+                frame_data = kf_data.frame_data if hasattr(kf_data, 'frame_data') else kf_data
+                
+                # Upload keyframe image to MinIO using correct bucket path structure
+                minio_path = f"{video_id}/frame_{frame_data['frame_number']:06d}.jpg"
+                
+                # Handle both file path and frame data scenarios
+                if 'frame_path' in frame_data:
+                    local_path = frame_data['frame_path']
+                    if os.path.exists(local_path):
+                        with open(local_path, 'rb') as img_file:
+                            file_info = os.stat(local_path)
+                            self.minio.put_object(
+                                self.keyframe_bucket,
+                                minio_path,
+                                img_file,
+                                length=file_info.st_size,
+                                content_type='image/jpeg'
+                            )
+                    else:
+                        logger.warning(f"⚠️ Keyframe file not found: {local_path}")
+                        continue
+                
+                # Create keyframe document
+                keyframe_doc = {
+                    "video_id": video_id,
+                    "frame_number": frame_data.get('frame_number', i),
+                    "timestamp": frame_data.get('timestamp', 0.0),
+                    "quality_score": frame_data.get('quality_score', 0.0),
+                    "motion_score": frame_data.get('motion_score', 0.0),
+                    "minio_path": minio_path,
+                    "enhancement_applied": frame_data.get('enhancement_applied', False),
+                    "face_count": frame_data.get('face_count', 0),
+                    "object_detections": [],
+                    "created_at": datetime.utcnow()
+                }
+                
+                result = self.collection.insert_one(keyframe_doc)
+                keyframe_ids.append(str(result.inserted_id))
+            
+            logger.info(f"✅ Saved {len(keyframe_ids)} keyframes for video {video_id}")
+            return keyframe_ids
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save keyframes batch: {e}")
+            raise
+    
+    def get_keyframes_by_video_id(self, video_id: str, has_detections: bool = False, 
+                                limit: int = None) -> List[Dict]:
+        """Get keyframes for a video with optional filtering"""
+        try:
+            query = {"video_id": video_id}
+            
+            if has_detections:
+                query["object_detections"] = {"$exists": True, "$not": {"$size": 0}}
+            
+            cursor = self.collection.find(query).sort("timestamp", 1)
+            
+            if limit:
+                cursor = cursor.limit(limit)
+            
+            keyframes = list(cursor)
+            
+            # Convert ObjectIds to strings and add presigned URLs
+            for kf in keyframes:
+                kf = convert_objectid_to_string(kf)
+                kf['presigned_url'] = self.minio.presigned_get_object(
+                    self.bucket, 
+                    kf['minio_path'], 
+                    expires=timedelta(hours=1)
+                )
+            
+            return keyframes
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to get keyframes for video {video_id}: {e}")
+            return []
+    
+    def update_keyframe_detections(self, keyframe_id: str, detections: List[Dict]):
+        """Update keyframe with object detection results"""
+        try:
+            self.collection.update_one(
+                {"_id": ObjectId(keyframe_id)},
+                {"$set": {
+                    "object_detections": detections,
+                    "updated_at": datetime.utcnow()
+                }}
+            )
+            logger.info(f"✅ Updated keyframe {keyframe_id} with {len(detections)} detections")
+        except Exception as e:
+            logger.error(f"❌ Failed to update keyframe detections: {e}")
+
+class EventRepository(BaseRepository):
+    """Repository for event collection operations - Schema Compliant"""
+    
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.event
+        self.event_description_collection = self.db.event_description
+    
+    def save_event(self, event_data: Dict) -> str:
+        """Save event matching MongoDB schema exactly"""
+        try:
+            import uuid
+            
+            # Extract required fields
+            event_id = event_data.get('event_id', str(uuid.uuid4()))
+            video_id = event_data['video_id']
+            
+            # Convert timestamps: seconds (float) -> milliseconds (int)
+            start_time = event_data.get('start_timestamp', 0.0)
+            end_time = event_data.get('end_timestamp', 0.0)
+            start_timestamp_ms = seconds_to_milliseconds(start_time)
+            end_timestamp_ms = seconds_to_milliseconds(end_time)
+            
+            # Build schema-compliant event document
+            event_doc = {
+                "event_id": event_id,
+                "video_id": video_id,
+                "start_timestamp_ms": int(start_timestamp_ms),
+                "end_timestamp_ms": int(end_timestamp_ms),
+                "event_type": event_data.get('event_type', 'motion'),
+                "confidence_score": float(event_data.get('confidence', 0.0)),
+                "is_verified": False,
+                "is_false_positive": False,
+                "verified_at": None,
+                "verified_by": None,
+                "visual_embedding": [],
+                "bounding_boxes": event_data.get('bounding_boxes', {})
+            }
+            
+            # Convert numpy types
+            event_doc = convert_numpy_types(event_doc)
+            event_doc = prepare_for_mongodb(event_doc)
+            
+            result = self.collection.insert_one(event_doc)
+            logger.info(f"✅ Saved event: {event_id} ({event_data.get('event_type')})")
+            
+            # If there's additional description info, save to event_description
+            if event_data.get('description') or event_data.get('caption'):
+                self._save_event_description(event_id, event_data)
+            
+            return str(result.inserted_id)
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save event: {e}")
+            raise
+    
+    def save_detection_events(self, video_id: str, detection_groups: List[Dict]) -> List[str]:
+        """Save object detection events with proper schema compliance"""
+        event_ids = []
+        
+        try:
+            for group in detection_groups:
+                # Build bounding_boxes object
+                bboxes = {
+                    "detections": [
+                        {
+                            "class": det.get('class_name', ''),
+                            "confidence": float(det.get('confidence', 0.0)),
+                            "bbox": [float(x) for x in det.get('bbox', [0, 0, 0, 0])],
+                            "timestamp": float(det.get('frame_timestamp', 0.0)),
+                            "model": det.get('detection_model', '')
+                        }
+                        for det in group.get('detections', [])
+                    ]
+                }
+                
+                event_data = {
+                    "video_id": video_id,
+                    "start_timestamp": group.get('start_timestamp', 0.0),
+                    "end_timestamp": group.get('end_timestamp', 0.0),
+                    "event_type": f"object_detection_{group.get('class', 'unknown')}",
+                    "confidence": group.get('max_confidence', 0.0),
+                    "bounding_boxes": bboxes,
+                    "description": f"Detected {len(group.get('detections', []))} {group.get('class', 'object')}(s)"
+                }
+                
+                event_id = self.save_event(event_data)
+                event_ids.append(event_id)
+            
+            logger.info(f"✅ Saved {len(event_ids)} detection events for video {video_id}")
+            return event_ids
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save detection events: {e}")
+            raise
+    
+    def _save_event_description(self, event_id: str, event_data: Dict):
+        """Save detailed event description to event_description collection"""
+        try:
+            import uuid
+            
+            description_text = event_data.get('description') or event_data.get('caption', '')
+            
+            if not description_text:
+                return
+            
+            description_doc = {
+                "description_id": str(uuid.uuid4()),
+                "event_id": event_id,
+                "caption": description_text,
+                "text_embedding": [],  # TODO: Generate embedding in future
+                "confidence": float(event_data.get('confidence', 0.0)),
+                "created_at": datetime.utcnow(),
+                "updated_at": datetime.utcnow()
+            }
+            
+            description_doc = prepare_for_mongodb(description_doc)
+            self.event_description_collection.insert_one(description_doc)
+            logger.info(f"✅ Saved event description for {event_id}")
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save event description: {e}")
+    
+    def get_events_by_video_id(self, video_id: str, event_type: str = None) -> List[Dict]:
+        """Get events for a video with optional type filtering"""
+        try:
+            query = {"video_id": video_id}
+            if event_type:
+                query["event_type"] = event_type
+            
+            events = list(self.collection.find(query).sort("start_timestamp_ms", 1))
+            
+            # Convert ObjectIds to strings
+            for event in events:
+                event = convert_objectid_to_string(event)
+            
+            return events
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to get events for video {video_id}: {e}")
+            return []
+    
+    def mark_as_false_positive(self, event_id: str):
+        """Mark event as false positive (for deduplication)"""
+        try:
+            self.collection.update_one(
+                {"event_id": event_id},
+                {"$set": {"is_false_positive": True}}
+            )
+            logger.info(f"✅ Marked event {event_id} as false positive")
+        except Exception as e:
+            logger.error(f"❌ Failed to mark event as false positive: {e}")
+
+# Remove KeyframeRepository - collection doesn't exist in schema
+# Remove ProcessingJobRepository - collection doesn't exist in schema  
+# Remove ObjectDetectionRepository - collection doesn't exist in schema
+
+# Keeping only repositories for schema-defined collections below:
+
+        event_ids = []
+        
+        try:
+            for event_data in detection_events:
+                # Calculate threat level based on detected objects
+                threat_level = self._calculate_threat_level(event_data.get('object_class', ''))
+                
+                event_doc = {
+                    "video_id": video_id,
+                    "event_type": "object_detection",
+                    "start_timestamp": event_data.get('start_timestamp', 0.0),
+                    "end_timestamp": event_data.get('end_timestamp', 0.0),
+                    "confidence": event_data.get('confidence', 0.0),
+                    "importance_score": event_data.get('importance_score', 0.0),
+                    "threat_level": threat_level,
+                    "object_detections": event_data.get('detections', []),
+                    "keyframe_paths": event_data.get('keyframe_paths', []),
+                    "is_canonical": False,
+                    "created_at": datetime.utcnow()
+                }
+                
+                result = self.collection.insert_one(event_doc)
+                event_ids.append(str(result.inserted_id))
+            
+            logger.info(f"✅ Saved {len(event_ids)} object detection events for video {video_id}")
+            return event_ids
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save object detection events: {e}")
+            raise
+    
+    def get_events_by_video_id(self, video_id: str, event_type: str = None) -> List[Dict]:
+        """Get events for a video with optional type filtering"""
+        try:
+            query = {"video_id": video_id}
+            if event_type:
+                query["event_type"] = event_type
+            
+            events = list(self.collection.find(query).sort("start_timestamp", 1))
+            
+            # Convert ObjectIds to strings
+            for event in events:
+                event = convert_objectid_to_string(event)
+            
+            return events
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to get events for video {video_id}: {e}")
+            return []
+    
+    def _calculate_threat_level(self, object_class: str) -> str:
+        """Calculate threat level based on detected object class"""
+        threat_map = {
+            'fire': 'critical',
+            'gun': 'critical',
+            'knife': 'high',
+            'smoke': 'medium'
+        }
+        return threat_map.get(object_class.lower(), 'low')
+
+class ProcessingJobRepository(BaseRepository):
+    """Repository for processing_jobs collection operations"""
+    
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.processing_jobs
+    
+    def create_processing_job(self, video_id: str, job_type: str = "complete_processing") -> str:
+        """Create new processing job record"""
+        try:
+            job_doc = {
+                "video_id": video_id,
+                "job_type": job_type,
+                "status": "queued",
+                "progress": 0,
+                "message": "Processing job queued",
+                "created_at": datetime.utcnow()
+            }
+            
+            result = self.collection.insert_one(job_doc)
+            logger.info(f"✅ Created processing job: {video_id}")
+            return str(result.inserted_id)
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to create processing job: {e}")
+            raise
+    
+    def update_job_progress(self, video_id: str, progress: int, message: str, status: str = None):
+        """Update processing job progress and status"""
+        try:
+            update_data = {
+                "progress": progress,
+                "message": message,
+                "updated_at": datetime.utcnow()
+            }
+            
+            if status:
+                update_data["status"] = status
+                if status == "processing" and not self.collection.find_one({"video_id": video_id, "started_at": {"$exists": True}}):
+                    update_data["started_at"] = datetime.utcnow()
+                elif status in ["completed", "failed"]:
+                    update_data["completed_at"] = datetime.utcnow()
+            
+            self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": update_data}
+            )
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to update job progress: {e}")
+    
+    def get_job_status(self, video_id: str) -> Optional[Dict]:
+        """Get processing job status"""
+        try:
+            job = self.collection.find_one({"video_id": video_id})
+            if job:
+                return convert_objectid_to_string(job)
+            return None
+        except Exception as e:
+            logger.error(f"❌ Failed to get job status: {e}")
+            return None
+
+class ObjectDetectionRepository(BaseRepository):
+    """Repository for object detection results"""
+    
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.object_detections
+    
+    def save_detection_batch(self, video_id: str, detections: List[Dict]) -> List[str]:
+        """Save object detection results"""
+        detection_ids = []
+        
+        try:
+            for detection in detections:
+                detection_doc = {
+                    "video_id": video_id,
+                    "keyframe_id": ObjectId(detection.get('keyframe_id')) if detection.get('keyframe_id') else None,
+                    "detection_id": f"{video_id}_{detection.get('frame_number', 0)}_{len(detection_ids)}",
+                    "class_name": detection.get('class_name', ''),
+                    "confidence": detection.get('confidence', 0.0),
+                    "bbox": detection.get('bbox', [0, 0, 0, 0]),
+                    "center_point": detection.get('center_point', [0, 0]),
+                    "area": detection.get('area', 0.0),
+                    "frame_timestamp": detection.get('frame_timestamp', 0.0),
+                    "detection_model": detection.get('detection_model', ''),
+                    "threat_level": self._calculate_threat_level(detection.get('class_name', '')),
+                    "created_at": datetime.utcnow()
+                }
+                
+                result = self.collection.insert_one(detection_doc)
+                detection_ids.append(str(result.inserted_id))
+            
+            logger.info(f"✅ Saved {len(detection_ids)} detection results for video {video_id}")
+            return detection_ids
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to save detection results: {e}")
+            raise
+    
+    def get_detections_by_video_id(self, video_id: str, class_filter: str = None) -> List[Dict]:
+        """Get object detections for a video"""
+        try:
+            query = {"video_id": video_id}
+            if class_filter:
+                query["class_name"] = class_filter
+            
+            detections = list(self.collection.find(query).sort("frame_timestamp", 1))
+            
+            # Convert ObjectIds to strings
+            for detection in detections:
+                detection = convert_objectid_to_string(detection)
+            
+            return detections
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to get detections for video {video_id}: {e}")
+            return []
+    
+    def _calculate_threat_level(self, class_name: str) -> str:
+        """Calculate threat level based on detected object class"""
+        threat_map = {
+            'fire': 'critical',
+            'gun': 'critical',
+            'knife': 'high',
+            'smoke': 'medium'
+        }
+        return threat_map.get(class_name.lower(), 'low')
\ No newline at end of file
diff --git a/database/storage_logger.py b/database/storage_logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..19639024f92840ba3593d3a891880c27a58c45b4
--- /dev/null
+++ b/database/storage_logger.py
@@ -0,0 +1,41 @@
+"""
+Storage Logging Configuration for MinIO and Database Operations
+"""
+
+import logging
+import os
+from datetime import datetime
+
+def setup_storage_logger():
+    """Configure logger for storage operations"""
+    logger = logging.getLogger('storage_operations')
+    logger.setLevel(logging.DEBUG)
+    
+    # Create logs directory if it doesn't exist
+    logs_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'logs')
+    os.makedirs(logs_dir, exist_ok=True)
+    
+    # File handler for storage operations
+    log_file = os.path.join(logs_dir, f'storage_{datetime.now().strftime("%Y%m%d")}.log')
+    file_handler = logging.FileHandler(log_file)
+    file_handler.setLevel(logging.DEBUG)
+    
+    # Console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    
+    # Create formatter
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    file_handler.setFormatter(formatter)
+    console_handler.setFormatter(formatter)
+    
+    # Add handlers
+    logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+    
+    return logger
+
+# Initialize logger
+storage_logger = setup_storage_logger()
\ No newline at end of file
diff --git a/database/video_compression_service.py b/database/video_compression_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f89fc2d55d2699f62d1588244c41a88b0c9c734
--- /dev/null
+++ b/database/video_compression_service.py
@@ -0,0 +1,379 @@
+"""
+Video Compression and Storage Service for DetectifAI
+
+This module handles video compression and MinIO storage for compressed videos.
+"""
+
+import os
+import cv2
+import subprocess
+import logging
+from io import BytesIO
+from typing import Dict, Optional
+from datetime import timedelta
+from minio.error import S3Error
+
+logger = logging.getLogger(__name__)
+
+class VideoCompressionService:
+    """Service for compressing videos and storing in S3-compatible storage"""
+
+    def __init__(self, db_manager, config=None):
+        self._db_manager = db_manager
+        self.bucket = db_manager.config.minio_video_bucket  # Store compressed videos in the videos bucket
+        self.config = config
+
+        # Default compression settings
+        self.output_resolution = "720p"  # 720p for web delivery
+        self.compression_crf = 23  # 0-51, lower = better quality (23 is default)
+        self.compression_preset = "medium"  # ultrafast to veryslow
+
+        # Check if FFmpeg is available
+        self.ffmpeg_available = self._check_ffmpeg_available()
+
+    @property
+    def minio(self):
+        """Lazy access to S3 storage — tolerates unavailable storage"""
+        return self._db_manager.minio_client
+
+    def _check_ffmpeg_available(self) -> bool:
+        """Check if FFmpeg is available on the system"""
+        try:
+            result = subprocess.run(
+                ['ffmpeg', '-version'],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            return result.returncode == 0
+        except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError):
+            return False
+
+    def compress_and_store(self, input_path: str, video_id: str) -> Optional[Dict]:
+        """Compress video and store in MinIO and locally"""
+        try:
+            # Create local storage directory
+            local_dir = os.path.join("video_processing_outputs", "compressed", video_id)
+            os.makedirs(local_dir, exist_ok=True)
+            local_path = os.path.join(local_dir, "video.mp4")
+
+            # Use BytesIO for in-memory compression
+            from io import BytesIO
+            compressed_buffer = BytesIO()
+
+            # Try FFmpeg first if available, otherwise use OpenCV
+            if self.ffmpeg_available:
+                success = self._compress_with_ffmpeg_to_buffer(input_path, compressed_buffer)
+                if not success:
+                    logger.warning("FFmpeg compression failed, falling back to OpenCV")
+                    compressed_buffer.seek(0)  # Reset buffer position
+                    success = self._compress_with_opencv_to_buffer(input_path, compressed_buffer)
+            else:
+                logger.info("FFmpeg not available, using OpenCV compression")
+                success = self._compress_with_opencv_to_buffer(input_path, compressed_buffer)
+
+            if not success:
+                logger.error("Both compression methods failed")
+                return None
+
+            # Get buffer contents
+            compressed_buffer.seek(0)
+            compressed_data = compressed_buffer.getvalue()
+            compressed_size = len(compressed_data)
+
+            # Save locally
+            with open(local_path, 'wb') as f:
+                f.write(compressed_data)
+            logger.info(f"✅ Video saved locally: {local_path}")
+
+            # Calculate compression stats
+            original_size = os.path.getsize(input_path)
+            compression_ratio = ((original_size - compressed_size) / original_size) * 100
+
+            # Upload directly to S3 using consistent path structure (skip if unavailable)
+            minio_path = None
+            if self.minio is not None:
+                try:
+                    minio_path = f"compressed/{video_id}/video.mp4"
+                    compressed_buffer.seek(0)  # Reset buffer for S3 upload
+                    self.minio.put_object(
+                        self.bucket,
+                        minio_path,
+                        compressed_buffer,
+                        length=compressed_size,
+                        content_type='video/mp4'
+                    )
+                except Exception as s3_err:
+                    logger.warning(f"⚠️ S3 upload skipped for compressed video: {s3_err}")
+                    minio_path = None
+            else:
+                logger.info("S3 storage unavailable — compressed video stored locally only")
+
+            result = {
+                'success': True,
+                'minio_path': minio_path,
+                'local_path': local_path,
+                'original_size': original_size,
+                'compressed_size': compressed_size,
+                'compression_ratio': round(compression_ratio, 2),
+                'output_resolution': self.output_resolution
+            }
+
+            logger.info(f"✅ Video compressed and stored: {compression_ratio:.1f}% reduction")
+            return result
+
+        except Exception as e:
+            logger.error(f"❌ Compression and storage failed: {e}")
+            return None
+
+    def get_compressed_video_presigned_url(self, video_id: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for compressed video access"""
+        if self.minio is None:
+            return None
+        try:
+            minio_path = f"compressed/{video_id}/video.mp4"
+            return self.minio.presigned_get_object(self.bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL for compressed video: {e}")
+            return None
+    
+    def _compress_with_ffmpeg(self, input_path: str, output_path: str) -> bool:
+        """Compress video using FFmpeg"""
+        try:
+            # Build FFmpeg command
+            cmd = [
+                'ffmpeg',
+                '-i', input_path,
+                '-c:v', 'libx264',  # H.264 codec
+                '-crf', str(self.compression_crf),
+                '-preset', self.compression_preset,
+                '-movflags', '+faststart',  # Enable web playback
+                '-y'  # Overwrite output file
+            ]
+            
+            # Add resolution scaling if needed
+            if self.output_resolution == "720p":
+                cmd.extend(['-vf', 'scale=1280:720:force_original_aspect_ratio=decrease'])  # Scale to 720p preserving aspect ratio
+            elif self.output_resolution == "480p":
+                cmd.extend(['-vf', 'scale=854:480:force_original_aspect_ratio=decrease'])  # Scale to 480p preserving aspect ratio
+            
+            cmd.append(output_path)
+            
+            # Run FFmpeg
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True
+            )
+            
+            if result.returncode == 0 and os.path.exists(output_path):
+                logger.info("✅ FFmpeg compression successful")
+                return True
+            else:
+                logger.error(f"FFmpeg error: {result.stderr}")
+                return False
+                
+        except Exception as e:
+            logger.error(f"FFmpeg compression failed: {e}")
+            return False
+    
+    def _compress_with_ffmpeg_to_buffer(self, input_path: str, output_buffer: BytesIO) -> bool:
+        """Compress video using FFmpeg with temporary file (more reliable than pipe)"""
+        import tempfile
+        try:
+            # Create temporary file for FFmpeg output
+            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
+                temp_path = temp_file.name
+            
+            # Build FFmpeg command to output to temporary file
+            cmd = [
+                'ffmpeg',
+                '-i', input_path,
+                '-c:v', 'libx264',  # H.264 codec
+                '-crf', str(self.compression_crf),
+                '-preset', self.compression_preset,
+                '-movflags', '+faststart',  # Enable web playback (safe for file output)
+                '-y'  # Overwrite output
+            ]
+            
+            # Add resolution scaling if needed
+            if self.output_resolution == "720p":
+                cmd.extend(['-vf', 'scale=1280:720:force_original_aspect_ratio=decrease'])  # Scale to 720p preserving aspect ratio
+            elif self.output_resolution == "480p":
+                cmd.extend(['-vf', 'scale=854:480:force_original_aspect_ratio=decrease'])  # Scale to 480p preserving aspect ratio
+            
+            # Add output file
+            cmd.append(temp_path)
+            
+            # Run FFmpeg
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=300  # 5 minute timeout
+            )
+            
+            if result.returncode == 0 and os.path.exists(temp_path):
+                # Read temporary file into buffer
+                with open(temp_path, 'rb') as f:
+                    output_buffer.write(f.read())
+                
+                # Clean up temporary file
+                os.unlink(temp_path)
+                
+                logger.info("✅ FFmpeg compression to buffer successful")
+                return True
+            else:
+                # Clean up temporary file on error
+                if os.path.exists(temp_path):
+                    os.unlink(temp_path)
+                logger.error(f"FFmpeg error: {result.stderr}")
+                return False
+                
+        except Exception as e:
+            logger.error(f"FFmpeg compression to buffer failed: {e}")
+            return False
+    
+    def _compress_with_opencv_to_buffer(self, input_path: str, output_buffer: BytesIO) -> bool:
+        """Fallback compression using OpenCV directly to a buffer"""
+        try:
+            # Open input video
+            cap = cv2.VideoCapture(input_path)
+            if not cap.isOpened():
+                logger.error(f"Cannot open input video: {input_path}")
+                return False
+            
+            # Get video properties
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            
+            # Calculate new dimensions
+            if self.output_resolution == "720p":
+                new_height = 720
+                new_width = int((width / height) * new_height)
+            elif self.output_resolution == "480p":
+                new_height = 480
+                new_width = int((width / height) * new_height)
+            else:
+                new_width, new_height = width, height
+            
+            # Create temporary file for OpenCV (required for VideoWriter)
+            import tempfile
+            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
+                temp_path = temp_file.name
+            
+            # Create video writer with best available codec
+            # Prioritize H.264 (avc1) for browser compatibility
+            codecs_to_try = [
+                ('avc1', 'H.264'), 
+                ('h264', 'H.264'), 
+                ('X264', 'H.264'), 
+                ('mp4v', 'MPEG-4')
+            ]
+            
+            out = None
+            used_codec = None
+            
+            for fourcc_code, name in codecs_to_try:
+                try:
+                    fourcc = cv2.VideoWriter_fourcc(*fourcc_code)
+                    out = cv2.VideoWriter(temp_path, fourcc, fps, (new_width, new_height))
+                    if out.isOpened():
+                        used_codec = name
+                        logger.info(f"✅ Using codec: {name} ({fourcc_code})")
+                        break
+                    out.release()
+                except Exception as e:
+                    logger.debug(f"Codec {fourcc_code} failed: {e}")
+                    
+            if not out or not out.isOpened():
+                logger.error("❌ No suitable video codec found")
+                return False
+            
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                
+                # Resize frame if needed
+                if (new_width, new_height) != (width, height):
+                    frame = cv2.resize(frame, (new_width, new_height))
+                
+                out.write(frame)
+            
+            cap.release()
+            out.release()
+            
+            # Read compressed file into buffer
+            if os.path.exists(temp_path):
+                with open(temp_path, 'rb') as f:
+                    output_buffer.write(f.read())
+                os.unlink(temp_path)  # Delete temporary file
+                logger.info("✅ OpenCV compression to buffer successful")
+                return True
+            else:
+                logger.error("OpenCV compression failed - output file not created")
+                return False
+                
+        except Exception as e:
+            logger.error(f"OpenCV compression to buffer failed: {e}")
+            return False
+    
+    def _compress_with_opencv(self, input_path: str, output_path: str) -> bool:
+        """Fallback compression using OpenCV"""
+        try:
+            # Open input video
+            cap = cv2.VideoCapture(input_path)
+            if not cap.isOpened():
+                logger.error(f"Cannot open input video: {input_path}")
+                return False
+            
+            # Get video properties
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            
+            # Calculate new dimensions
+            if self.output_resolution == "720p":
+                new_height = 720
+                new_width = int((width / height) * new_height)
+            elif self.output_resolution == "480p":
+                new_height = 480
+                new_width = int((width / height) * new_height)
+            else:
+                new_width, new_height = width, height
+            
+            # Create video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            out = cv2.VideoWriter(
+                output_path,
+                fourcc,
+                fps,
+                (new_width, new_height)
+            )
+            
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                
+                # Resize frame
+                if (new_width, new_height) != (width, height):
+                    frame = cv2.resize(frame, (new_width, new_height))
+                
+                out.write(frame)
+            
+            cap.release()
+            out.release()
+            
+            if os.path.exists(output_path):
+                logger.info("✅ OpenCV compression successful")
+                return True
+            else:
+                logger.error("OpenCV compression failed - output file not created")
+                return False
+                
+        except Exception as e:
+            logger.error(f"OpenCV compression failed: {e}")
+            return False
\ No newline at end of file
diff --git a/database_video_service.py b/database_video_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..aaff4b0511cf55b0fd7e07e26b0d7fbe7c136c7f
--- /dev/null
+++ b/database_video_service.py
@@ -0,0 +1,1804 @@
+"""
+Database-Integrated Video Processing Service
+
+This service integrates the existing video processing pipeline with MongoDB and MinIO storage.
+It replaces local file storage with database persistence while maintaining all processing capabilities.
+"""
+
+import os
+import cv2
+import time
+import threading
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+import logging
+import uuid
+import json
+
+# Import existing processing components
+from config import VideoProcessingConfig
+from main_pipeline import CompleteVideoProcessingPipeline
+from core.video_processing import OptimizedVideoProcessor
+from object_detection import ObjectDetector
+from behavior_analysis_integrator import BehaviorAnalysisIntegrator
+from event_aggregation import EventDetector
+from video_segmentation import VideoSegmentationEngine
+
+# Import database components
+from database.config import DatabaseManager
+from database.repositories import VideoRepository, EventRepository
+from database.keyframe_repository import KeyframeRepository
+from database.video_compression_service import VideoCompressionService
+from database.models import (
+    convert_numpy_types, 
+    seconds_to_milliseconds, 
+    milliseconds_to_seconds,
+    prepare_for_mongodb
+)
+
+logger = logging.getLogger(__name__)
+
+class DatabaseIntegratedVideoService:
+    """Enhanced video processing service with database integration"""
+    
+    def __init__(self, config: VideoProcessingConfig = None):
+        """Initialize service with database connections and processing components"""
+        self.config = config or VideoProcessingConfig()
+        
+        # Initialize database connections
+        self.db_manager = DatabaseManager()
+        
+        # Initialize repositories (including keyframe and compression)
+        self.video_repo = VideoRepository(self.db_manager)
+        self.event_repo = EventRepository(self.db_manager)
+        self.keyframe_repo = KeyframeRepository(self.db_manager)
+        self.compression_service = VideoCompressionService(self.db_manager, self.config)
+        
+        # Initialize processing components
+        self.video_processor = OptimizedVideoProcessor(self.config)
+        self.event_detector = EventDetector(self.config)
+        self.segmentation_engine = VideoSegmentationEngine(self.config)
+        
+        # Initialize object detector if enabled
+        self.object_detector = None
+        if self.config.enable_object_detection:
+            try:
+                self.object_detector = ObjectDetector(self.config)
+                logger.info("✅ Object detection enabled")
+            except Exception as e:
+                logger.warning(f"⚠️ Object detection initialization failed: {e}")
+                self.config.enable_object_detection = False
+        
+        # Initialize behavior analyzer if enabled
+        self.behavior_analyzer = None
+        if getattr(self.config, 'enable_behavior_analysis', False):
+            try:
+                self.behavior_analyzer = BehaviorAnalysisIntegrator(self.config)
+                logger.info("✅ Behavior analysis enabled")
+            except Exception as e:
+                logger.warning(f"⚠️ Behavior analysis initialization failed: {e}")
+                self.config.enable_behavior_analysis = False
+        
+        # Initialize video captioning if enabled
+        self.video_captioning = None
+        if getattr(self.config, 'enable_video_captioning', False):
+            try:
+                from video_captioning_integrator import VideoCaptioningIntegrator
+                self.video_captioning = VideoCaptioningIntegrator(self.config, db_manager=self.db_manager)
+                logger.info("✅ Video captioning enabled (MongoDB + FAISS)")
+            except Exception as e:
+                logger.warning(f"⚠️ Video captioning initialization failed: {e}")
+                self.config.enable_video_captioning = False
+        
+        logger.info("✅ Database-integrated video service initialized")
+    
+    def process_video_with_database_storage(self, video_path: str, video_id: str, user_id: str = None):
+        """
+        Main processing pipeline with database integration
+        
+        Args:
+            video_path: Path to uploaded video file
+            video_id: Unique identifier for the video
+            user_id: Optional user identifier
+        """
+        logger.info(f"🚀 Starting database-integrated processing for video: {video_id}")
+        
+        try:
+            # Check if MongoDB record already exists (created during upload)
+            existing_video = self.video_repo.get_video_by_id(video_id)
+            if not existing_video:
+                logger.warning(f"⚠️ Video record not found in MongoDB for {video_id}, creating now...")
+                # Fallback: create record if it doesn't exist
+                video_metadata = self._extract_video_metadata(video_path)
+                video_record = {
+                    "video_id": video_id,
+                    "user_id": user_id or "system",
+                    "file_path": f"videos/{video_id}/video.mp4",
+                    "minio_object_key": f"original/{video_id}/video.mp4",
+                    "minio_bucket": self.video_repo.video_bucket,
+                    "codec": "h264",
+                    "fps": float(video_metadata.get("fps", 30.0)),
+                    "upload_date": datetime.utcnow(),
+                    "duration_secs": int(video_metadata.get("duration", 0)),
+                    "file_size_bytes": int(video_metadata.get("file_size", 0)),
+                    "meta_data": {
+                        "filename": os.path.basename(video_path),
+                        "resolution": video_metadata.get("resolution"),
+                        "processing_status": "processing",
+                        "processing_progress": 0,
+                        "processing_message": "Starting processing..."
+                    }
+                }
+                self.video_repo.create_video_record(video_record)
+            else:
+                logger.info(f"✅ MongoDB record already exists for {video_id}, proceeding with processing...")
+            
+            # Update status: processing started
+            self.video_repo.update_metadata(video_id, {
+                "processing_status": "processing",
+                "processing_progress": 10,
+                "processing_message": "Starting video processing pipeline..."
+            })
+            
+            # Step 1: Extract keyframes and upload to MinIO
+            self.video_repo.update_metadata(video_id, {
+                "processing_progress": 15,
+                "processing_message": "Extracting and uploading keyframes..."
+            })
+            keyframes = self.video_processor.extract_keyframes(video_path)
+            
+            # Process keyframes directly for MinIO upload
+            keyframe_batch = []
+            for kf in keyframes:
+                frame_data = kf.frame_data if hasattr(kf, 'frame_data') else kf
+
+                # Extract keyframe information consistently
+                keyframe_info = {
+                    'frame_path': frame_data.frame_path if hasattr(frame_data, 'frame_path') else None,
+                    'frame_number': frame_data.frame_number if hasattr(frame_data, 'frame_number') else 0,
+                    'timestamp': frame_data.timestamp if hasattr(frame_data, 'timestamp') else 0.0,
+                    'enhancement_applied': frame_data.enhancement_applied if hasattr(frame_data, 'enhancement_applied') else False
+                }
+
+                # If we have a numpy frame directly, we might need to save it to a file first
+                if hasattr(frame_data, 'frame') and frame_data.frame is not None:
+                    # Save numpy array to temporary file for upload
+                    import tempfile
+                    import cv2
+                    import numpy as np
+
+                    with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp_file:
+                        temp_path = temp_file.name
+                        cv2.imwrite(temp_path, cv2.cvtColor(frame_data.frame, cv2.COLOR_RGB2BGR))
+                        keyframe_info['frame_path'] = temp_path
+
+                keyframe_batch.append(keyframe_info)
+            
+            # Process and upload keyframes to MinIO
+            logger.info(f"Uploading {len(keyframe_batch)} keyframes to MinIO...")
+            
+            keyframe_info = []
+            for idx, kf_info in enumerate(keyframe_batch):
+                frame_path = kf_info.get('frame_path')
+
+                if frame_path and os.path.exists(frame_path):
+                    try:
+                        # Create MinIO path
+                        frame_number = kf_info.get('frame_number', idx)
+                        timestamp = kf_info.get('timestamp', 0.0)
+                        minio_path = f"{video_id}/keyframes/frame_{frame_number:06d}.jpg"
+
+                        # Upload to MinIO with metadata
+                        with open(frame_path, 'rb') as f:
+                            file_size = os.path.getsize(frame_path)
+                            metadata = {
+                                "frame_number": str(frame_number),
+                                "timestamp": str(timestamp),
+                                "enhancement_applied": str(kf_info.get('enhancement_applied', False))
+                            }
+
+                            self.keyframe_repo.minio.put_object(
+                                self.keyframe_repo.bucket,
+                                minio_path,
+                                f,
+                                file_size,
+                                content_type='image/jpeg',
+                                metadata=metadata
+                            )
+
+                            keyframe_info.append({
+                                "frame_number": frame_number,
+                                "timestamp": timestamp,
+                                "minio_path": minio_path,
+                                "size_bytes": file_size,
+                                "uploaded_at": datetime.utcnow().isoformat()
+                            })
+
+                    except Exception as e:
+                        logger.error(f"Failed to upload keyframe {frame_path}: {e}")
+                        continue
+                        
+                if (idx + 1) % 10 == 0:
+                    logger.info(f"Uploaded {idx + 1}/{len(keyframe_batch)} keyframes")
+            
+            # Step 2: Update MongoDB with keyframe MinIO paths (link metadata)
+            # Store each keyframe's MinIO path in MongoDB metadata
+            keyframe_metadata = []
+            for kf in keyframe_info:
+                keyframe_metadata.append({
+                    "frame_number": kf["frame_number"],
+                    "timestamp": kf["timestamp"],
+                    "minio_path": kf["minio_path"],
+                    "minio_bucket": self.keyframe_repo.bucket,
+                    "size_bytes": kf["size_bytes"],
+                    "uploaded_at": kf["uploaded_at"]
+                })
+            
+            # Update video metadata with keyframe information and MinIO links
+            self.video_repo.update_metadata(video_id, {
+                "keyframe_info": keyframe_metadata,  # Full metadata with MinIO paths
+                "keyframe_count": len(keyframe_info),
+                "keyframe_bucket": self.keyframe_repo.bucket,
+                "keyframes_minio_paths": [kf["minio_path"] for kf in keyframe_info],  # Quick access list
+                "upload_stats": {
+                    "total_frames": len(keyframe_batch),
+                    "uploaded_frames": len(keyframe_info),
+                    "upload_completed": datetime.utcnow().isoformat()
+                }
+            })
+            logger.info(f"✅ Uploaded {len(keyframe_info)} keyframes to MinIO and linked in MongoDB")
+            
+            # Enrich original keyframe objects with MinIO metadata for downstream processing
+            # This ensures video captioning and other modules can access MinIO paths
+            for idx, kf in enumerate(keyframes):
+                if idx < len(keyframe_metadata):
+                    kf_meta = keyframe_metadata[idx]
+                    # Add MinIO metadata to keyframe object
+                    if hasattr(kf, 'frame_data'):
+                        kf.frame_data.minio_path = kf_meta['minio_path']
+                        kf.frame_data.minio_bucket = kf_meta['minio_bucket']
+                    else:
+                        kf.minio_path = kf_meta['minio_path']
+                        kf.minio_bucket = kf_meta['minio_bucket']
+            
+            logger.info(f"✅ Enriched {len(keyframes)} keyframe objects with MinIO metadata")
+            
+            # Step 2: Generate compressed video and upload to MinIO (MOVED UP - Priority for playback)
+            compressed_minio_path = None
+            if self.config.generate_compressed_video:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 20,
+                    "processing_message": "Generating and uploading compressed video..."
+                })
+                logger.info("📦 ===== STARTING VIDEO COMPRESSION (PRIORITY) ===== ")
+                compressed_minio_path = self._generate_compressed_video(video_path, video_id)
+                if compressed_minio_path:
+                    logger.info(f"✅ Compressed video uploaded to MinIO: {compressed_minio_path}")
+                    # Update metadata immediately so video is playable
+                    self.video_repo.update_metadata(video_id, {
+                        "minio_compressed_path": compressed_minio_path
+                    })
+                    self.video_repo.collection.update_one(
+                        {"video_id": video_id},
+                        {"$set": {"meta_data.minio_compressed_path": compressed_minio_path}}
+                    )
+                else:
+                    logger.warning("⚠️ Video compression failed, continuing with other processing")
+            
+            # Step 3: Object detection (if enabled)
+            detection_results = []
+            if self.config.enable_object_detection and self.object_detector:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 40,
+                    "processing_message": "Running object detection..."
+                })
+                detection_results = self._run_object_detection_on_keyframes(
+                    video_id, keyframes
+                )
+            
+            # Step 4: Behavior analysis (if enabled)
+            behavior_results = []
+            behavior_events = []
+            if self.config.enable_behavior_analysis and self.behavior_analyzer:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 55,
+                    "processing_message": "Running behavior analysis (fight/accident/climbing detection)..."
+                })
+                logger.info("🚀 ===== STARTING BEHAVIOR ANALYSIS ===== ")
+                logger.info(f"📹 Processing video: {video_path}")
+                logger.info(f"🔧 Available models: {list(self.behavior_analyzer.models.keys())}")
+                
+                # Pass video_path for 3D-ResNet models (fighting, road_accident) which need 16-frame clips
+                behavior_results, behavior_events = self.behavior_analyzer.process_keyframes_with_behavior_analysis(keyframes, video_path=video_path)
+                
+                # Store behavior detections in keyframes
+                for i, keyframe in enumerate(keyframes):
+                    frame_path = keyframe.frame_data.frame_path if hasattr(keyframe, 'frame_data') else None
+                    timestamp = keyframe.frame_data.timestamp if hasattr(keyframe, 'frame_data') else 0
+                    
+                    # Find behavior detections for this frame
+                    frame_behaviors = [r for r in behavior_results if r.frame_path == frame_path and abs(r.timestamp - timestamp) < 0.1]
+                    
+                    if frame_behaviors:
+                        for behavior in frame_behaviors:
+                            if not hasattr(keyframe, 'behaviors'):
+                                keyframe.behaviors = []
+                            keyframe.behaviors.append({
+                                "type": behavior.behavior_detected,
+                                "confidence": behavior.confidence,
+                                "model": behavior.model_used,
+                                "timestamp": behavior.timestamp
+                            })
+                
+                logger.info(f"✅ Behavior analysis complete: {len(behavior_results)} detections, {len(behavior_events)} events")
+            
+            # Step 5: Event detection and aggregation
+            self.video_repo.update_metadata(video_id, {
+                "processing_progress": 70,
+                "processing_message": "Detecting and aggregating events..."
+            })
+            
+            # Create events from object detections
+            event_ids = []
+            object_events = []
+            if detection_results:
+                object_events = self._create_object_events_from_detections(detection_results)
+                # Save events using EventRepository
+                for event in object_events:
+                    event['video_id'] = video_id  # Add video_id to event data
+                    event_id = self.event_repo.save_event(event)
+                    event_ids.append(event_id)
+            
+            # Create and save events from behavior analysis
+            if behavior_events:
+                logger.info(f"📅 Creating {len(behavior_events)} behavior-based events...")
+                for behavior_event in behavior_events:
+                    event_dict = {
+                        "video_id": video_id,
+                        "event_type": f"behavior_{behavior_event.behavior_type}",
+                        "start_timestamp": behavior_event.start_timestamp,
+                        "end_timestamp": behavior_event.end_timestamp,
+                        "confidence_score": float(behavior_event.confidence),
+                        "keyframes": behavior_event.keyframes,
+                        "importance_score": float(behavior_event.importance_score),
+                        "description": f"{behavior_event.behavior_type.capitalize()} behavior detected",
+                        "detection_data": {
+                            "model_used": behavior_event.model_used,
+                            "frame_indices": behavior_event.frame_indices,
+                            "behavior_type": behavior_event.behavior_type
+                        }
+                    }
+                    try:
+                        event_id = self.event_repo.save_event(event_dict)
+                        event_ids.append(event_id)
+                        logger.info(f"✅ Saved behavior event: {behavior_event.behavior_type} at {behavior_event.start_timestamp:.1f}s")
+                    except Exception as e:
+                        logger.error(f"❌ Failed to save behavior event: {e}")
+            
+            # Step 5.5: Run facial recognition on frames with detections (if enabled)
+            face_results = []
+            if self.config.enable_facial_recognition and (detection_results or behavior_results) and event_ids:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 75,
+                    "processing_message": "Running facial recognition on suspicious frames..."
+                })
+                try:
+                    from facial_recognition import FacialRecognitionIntegrated
+                    face_detector = FacialRecognitionIntegrated(self.config)
+                    
+                    # Get frames that have detections for facial recognition
+                    frames_with_detections = []
+                    for i, keyframe in enumerate(keyframes):
+                        frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+                        frame_path = (
+                            frame_data.frame_path if hasattr(frame_data, 'frame_path')
+                            else getattr(frame_data, 'path', None)
+                        )
+                        timestamp = (
+                            frame_data.timestamp if hasattr(frame_data, 'timestamp')
+                            else getattr(frame_data, 'timestamp', 0.0)
+                        )
+                        
+                        # Check if this frame has object detections
+                        has_object_detection = any(
+                            abs(d['frame_timestamp'] - timestamp) < 0.5 
+                            for d in detection_results
+                        )
+                        
+                        # Check if this frame has behavior detections
+                        has_behavior_detection = any(
+                            abs(b.timestamp - timestamp) < 0.5 and b.behavior_detected != "no_action"
+                            for b in behavior_results
+                        )
+                        
+                        if (has_object_detection or has_behavior_detection) and frame_path and os.path.exists(frame_path):
+                            frames_with_detections.append((frame_path, timestamp))
+                    
+                    # Run facial recognition on suspicious frames
+                    for frame_path, timestamp in frames_with_detections:
+                        try:
+                            # Find associated event_id for this timestamp
+                            associated_event_id = None
+                            for event_id, event in zip(event_ids, object_events):
+                                if (event.get('start_timestamp', 0) <= timestamp <= 
+                                    event.get('end_timestamp', float('inf'))):
+                                    associated_event_id = event_id
+                                    break
+                            
+                            if not associated_event_id and event_ids:
+                                associated_event_id = event_ids[0]  # Fallback to first event
+                            
+                            # Detect faces in frame
+                            face_result = face_detector.detect_faces_in_frame(frame_path, timestamp)
+                            
+                            # Convert FaceDetectionResult to list of face info dictionaries
+                            if face_result and face_result.faces_detected > 0:
+                                # Extract face information from FaceDetectionResult
+                                for i in range(face_result.faces_detected):
+                                    face_id = face_result.detected_face_ids[i] if face_result.detected_face_ids and i < len(face_result.detected_face_ids) else f"face_{uuid.uuid4().hex[:8]}"
+                                    bounding_box = face_result.face_bounding_boxes[i] if i < len(face_result.face_bounding_boxes) else [0, 0, 0, 0]
+                                    confidence = face_result.face_confidence_scores[i] if i < len(face_result.face_confidence_scores) else 0.0
+                                    matched_person = face_result.matched_persons[i] if face_result.matched_persons and i < len(face_result.matched_persons) else None
+                                    
+                                    # Construct face_info dictionary
+                                    face_info = {
+                                        'face_id': face_id,
+                                        'bounding_box': bounding_box,
+                                        'confidence': confidence,
+                                        'person_name': matched_person.split('(')[0].strip() if matched_person else None,
+                                        'face_image_path': None  # Will be set if saved
+                                    }
+                                    
+                                    # Try to get face image path from MongoDB if it was saved
+                                    try:
+                                        faces_collection = self.db_manager.db.detected_faces
+                                        existing_face = faces_collection.find_one({'face_id': face_id})
+                                        if existing_face:
+                                            face_info['face_image_path'] = existing_face.get('face_image_path')
+                                    except:
+                                        pass
+                                    
+                                    # Get frame number from frame path if possible
+                                    frame_number = 0
+                                    try:
+                                        # Try to extract frame number from frame_path
+                                        import re
+                                        frame_match = re.search(r'frame_(\d+)', frame_path)
+                                        if frame_match:
+                                            frame_number = int(frame_match.group(1))
+                                        else:
+                                            # Estimate from timestamp (assuming 30 fps)
+                                            frame_number = int(timestamp * 30)
+                                    except:
+                                        frame_number = int(timestamp * 30)  # Fallback estimate
+                                    
+                                    # Process this face_info - Save face to MongoDB detected_faces collection
+                                    # Convert bounding_box array [x1, y1, x2, y2] to bounding_boxes object {x1, y1, x2, y2}
+                                    bounding_box_array = face_info.get('bounding_box', [])
+                                    bounding_boxes_obj = {}
+                                    if isinstance(bounding_box_array, list) and len(bounding_box_array) >= 4:
+                                        bounding_boxes_obj = {
+                                            'x1': int(bounding_box_array[0]),
+                                            'y1': int(bounding_box_array[1]),
+                                            'x2': int(bounding_box_array[2]),
+                                            'y2': int(bounding_box_array[3])
+                                        }
+                                    
+                                    face_data = {
+                                        'face_id': face_info.get('face_id', f"face_{uuid.uuid4().hex[:8]}"),
+                                        'event_id': associated_event_id or f"event_{uuid.uuid4().hex[:8]}",
+                                        'detected_at': datetime.utcnow(),
+                                        'confidence_score': float(face_info.get('confidence', 0.0)),
+                                        'bounding_box': bounding_box_array,  # Keep array format for backward compatibility
+                                        'bounding_boxes': bounding_boxes_obj,  # Object format required by MongoDB schema
+                                        'person_name': face_info.get('person_name'),
+                                        'person_confidence': None,
+                                        'face_image_path': '',  # Initialize as empty string (schema requires string)
+                                        'minio_object_key': None,
+                                        'minio_bucket': None,
+                                        'frame_number': frame_number,  # Store frame number to link to keyframes
+                                        'timestamp': float(timestamp),  # Store timestamp in seconds to link to keyframes
+                                        'video_id': video_id  # Store video_id for easier querying
+                                    }
+                                    
+                                    # Upload face image to MinIO if available
+                                    # First try to save face image from the face detection result
+                                    temp_face_path = None
+                                    try:
+                                        # Get face crop from the detection result
+                                        if i < len(face_result.face_bounding_boxes):
+                                            # Load frame and crop face
+                                            import cv2
+                                            frame_img = cv2.imread(frame_path)
+                                            if frame_img is not None:
+                                                box = face_result.face_bounding_boxes[i]
+                                                x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
+                                                
+                                                # Ensure valid coordinates
+                                                x1, y1 = max(0, x1), max(0, y1)
+                                                x2, y2 = min(frame_img.shape[1], x2), min(frame_img.shape[0], y2)
+                                                
+                                                if x2 > x1 and y2 > y1:
+                                                    face_crop = frame_img[y1:y2, x1:x2]
+                                                    
+                                                    # Create temp directory if it doesn't exist
+                                                    temp_dir = "temp_faces"
+                                                    os.makedirs(temp_dir, exist_ok=True)
+                                                    
+                                                    # Save face crop temporarily
+                                                    temp_face_path = os.path.join(temp_dir, f"{face_data['face_id']}.jpg")
+                                                    cv2.imwrite(temp_face_path, face_crop)
+                                                    
+                                                    # Verify file was created
+                                                    if os.path.exists(temp_face_path):
+                                                        # Upload to MinIO
+                                                        minio_face_path = f"{video_id}/faces/{face_data['face_id']}.jpg"
+                                                        with open(temp_face_path, 'rb') as f:
+                                                            file_size = os.path.getsize(temp_face_path)
+                                                            self.keyframe_repo.minio.put_object(
+                                                                self.keyframe_repo.bucket,
+                                                                minio_face_path,
+                                                                f,
+                                                                file_size,
+                                                                content_type='image/jpeg'
+                                                            )
+                                                        
+                                                        face_data['minio_object_key'] = minio_face_path
+                                                        face_data['minio_bucket'] = self.keyframe_repo.bucket
+                                                        face_data['face_image_path'] = minio_face_path  # Store MinIO path, not temp path
+                                                        logger.info(f"✅ Uploaded face image to MinIO: {minio_face_path}")
+                                                    else:
+                                                        logger.warning(f"Failed to create temp face file: {temp_face_path}")
+                                                else:
+                                                    logger.warning(f"Invalid bounding box coordinates: ({x1}, {y1}, {x2}, {y2})")
+                                    except Exception as e:
+                                        logger.warning(f"Failed to upload face image to MinIO: {e}")
+                                        import traceback
+                                        logger.debug(traceback.format_exc())
+                                    
+                                    # Clean up temp file AFTER MongoDB save (not before)
+                                    # Save to MongoDB
+                                    try:
+                                        # Ensure face_image_path is a string (not None) for schema validation
+                                        if not face_data.get('face_image_path'):
+                                            face_data['face_image_path'] = ''  # Empty string is valid
+                                        
+                                        faces_collection = self.db_manager.db.detected_faces
+                                        faces_collection.insert_one(face_data)
+                                        face_results.append(face_data)
+                                        logger.info(f"✅ Saved face to MongoDB: {face_data['face_id']}")
+                                    except Exception as e:
+                                        logger.error(f"Failed to save face to MongoDB: {e}")
+                                        import traceback
+                                        logger.debug(traceback.format_exc())
+                                        # Still add to results even if MongoDB save fails
+                                        face_results.append(face_data)
+                                    
+                                    # Clean up temp file AFTER MongoDB save
+                                    if temp_face_path and os.path.exists(temp_face_path):
+                                        try:
+                                            os.remove(temp_face_path)
+                                        except Exception as e:
+                                            logger.warning(f"Failed to remove temp face file: {e}")
+                                    
+                        except Exception as e:
+                            logger.error(f"Facial recognition error for frame {frame_path}: {e}")
+                            continue
+                    
+                    logger.info(f"✅ Facial recognition completed: {len(face_results)} faces detected")
+                    
+                    # Update metadata with face count
+                    self.video_repo.update_metadata(video_id, {
+                        "face_count": len(face_results),
+                        "facial_recognition_completed": True
+                    })
+                    
+                except ImportError:
+                    logger.warning("Facial recognition module not available")
+                except Exception as e:
+                    logger.error(f"Facial recognition failed: {e}")
+            
+            # Step 6: Video Captioning (MOVED TO END - Last step, won't block other processing)
+            captioning_results = {}
+            if self.config.enable_video_captioning and self.video_captioning:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 90,
+                    "processing_message": "Generating video captions with AI..."
+                })
+                logger.info("🎬 ===== STARTING VIDEO CAPTIONING (FINAL STEP) ===== ")
+                logger.info(f"📹 Processing {len(keyframes)} keyframes for captioning")
+                
+                try:
+                    captioning_results = self.video_captioning.process_keyframes_with_captioning(
+                        keyframes, 
+                        video_id=video_id
+                    )
+                    
+                    # Update video metadata with captioning info
+                    self.video_repo.update_metadata(video_id, {
+                        "total_captions": captioning_results.get('total_captions', 0),
+                        "captioning_enabled": captioning_results.get('enabled', False)
+                    })
+                    
+                    logger.info(f"✅ Video captioning complete: {captioning_results.get('total_captions', 0)} captions generated")
+                    logger.info(f"💾 Captions saved to MongoDB, embeddings saved to FAISS")
+                except Exception as caption_error:
+                    logger.error(f"❌ Video captioning failed (non-fatal): {caption_error}")
+                    # Don't fail the entire pipeline if captioning fails
+                    captioning_results = {'enabled': True, 'total_captions': 0, 'errors': [str(caption_error)]}
+            
+            # Step 7: Finalize processing
+            final_meta_data = {
+                "processing_status": "completed",
+                "processing_progress": 100,
+                "processing_message": "Processing completed successfully!",
+                "keyframe_count": len(keyframes),
+                "detection_count": len(detection_results),
+                "event_count": len(object_events) if detection_results else 0,
+                "face_count": len(face_results) if 'face_results' in locals() else 0,
+                "caption_count": captioning_results.get('total_captions', 0) if captioning_results else 0,
+                "processed_at": datetime.utcnow().isoformat()
+            }
+            
+            # Compressed video path was already set in Step 2
+            # No need to update again here
+            
+            self.video_repo.update_processing_status(video_id, "completed")
+            self.video_repo.update_metadata(video_id, final_meta_data)
+            
+            logger.info(f"✅ Video processing completed successfully: {video_id}")
+            
+            # Cleanup temporary files
+            self._cleanup_temp_files(video_path, keyframes)
+            
+        except Exception as e:
+            logger.error(f"❌ Video processing failed for {video_id}: {e}")
+            
+            # Update status to failed
+            self.video_repo.update_processing_status(video_id, "failed")
+            self.video_repo.update_metadata(video_id, {
+                "processing_progress": 0,
+                "processing_message": f"Processing failed: {str(e)}",
+                "error_message": str(e),
+                "failed_at": datetime.utcnow().isoformat()
+            })
+            
+            raise
+    
+    def _extract_video_metadata(self, video_path: str) -> Dict:
+        """Extract metadata from video file with schema-compliant field names"""
+        try:
+            cap = cv2.VideoCapture(video_path)
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            duration = frame_count / fps if fps > 0 else 0
+            file_size = os.path.getsize(video_path)
+            cap.release()
+            
+            return {
+                "duration": duration,
+                "fps": float(fps),
+                "resolution": f"{width}x{height}",
+                "file_size": int(file_size),
+                "frame_count": int(frame_count)
+            }
+        except Exception as e:
+            logger.error(f"Failed to extract video metadata: {e}")
+            return {"file_size": os.path.getsize(video_path)}
+    
+    def _run_object_detection_on_keyframes(self, video_id: str, keyframes: List) -> List[Dict]:
+        """Run object detection on extracted keyframes, create annotated frames, and upload to MinIO"""
+        detection_results = []
+        annotated_keyframes_info = []  # Store info about annotated keyframes
+        
+        try:
+            for i, keyframe in enumerate(keyframes):
+                # Get frame data
+                frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+                
+                # Get frame path depending on structure
+                frame_path = (
+                    frame_data.frame_path if hasattr(frame_data, 'frame_path')
+                    else getattr(frame_data, 'path', None)
+                )
+                
+                if frame_path and os.path.exists(frame_path):
+                    # Get timestamp from frame data
+                    timestamp = (
+                        frame_data.timestamp if hasattr(frame_data, 'timestamp')
+                        else getattr(frame_data, 'timestamp', 0.0)
+                    )
+                    
+                    frame_number = getattr(frame_data, 'frame_number', i)
+                    
+                    # Run detection on this keyframe
+                    detection_result = self.object_detector.detect_objects_in_frame(
+                        frame_path, 
+                        timestamp
+                    )
+                    
+                    # Process detected objects and create annotated frame if detections exist
+                    annotated_minio_path = None
+                    if detection_result and detection_result.detected_objects:
+                        # Create annotated version of the frame
+                        try:
+                            annotated_path = self.object_detector.annotate_frame_with_detections(
+                                frame_path, 
+                                detection_result
+                            )
+                            
+                            # Upload annotated frame to MinIO
+                            if annotated_path and os.path.exists(annotated_path):
+                                annotated_minio_path = f"{video_id}/keyframes/annotated/frame_{frame_number:06d}_annotated.jpg"
+                                
+                                with open(annotated_path, 'rb') as f:
+                                    file_size = os.path.getsize(annotated_path)
+                                    metadata = {
+                                        "frame_number": str(frame_number),
+                                        "timestamp": str(timestamp),
+                                        "is_annotated": "true",
+                                        "detection_count": str(len(detection_result.detected_objects))
+                                    }
+                                    
+                                    self.keyframe_repo.minio.put_object(
+                                        self.keyframe_repo.bucket,
+                                        annotated_minio_path,
+                                        f,
+                                        file_size,
+                                        content_type='image/jpeg',
+                                        metadata=metadata
+                                    )
+                                
+                                annotated_keyframes_info.append({
+                                    "frame_number": frame_number,
+                                    "timestamp": timestamp,
+                                    "minio_path": annotated_minio_path,
+                                    "original_minio_path": f"{video_id}/keyframes/frame_{frame_number:06d}.jpg",
+                                    "detection_count": len(detection_result.detected_objects),
+                                    "objects": [obj.class_name for obj in detection_result.detected_objects],
+                                    "confidence_avg": sum(obj.confidence for obj in detection_result.detected_objects) / len(detection_result.detected_objects) if detection_result.detected_objects else 0.0
+                                })
+                                
+                                logger.info(f"✅ Uploaded annotated keyframe to MinIO: {annotated_minio_path}")
+                        except Exception as e:
+                            logger.warning(f"Failed to create/upload annotated keyframe: {e}")
+                    
+                    # Process detected objects for detection_results
+                    if detection_result and detection_result.detected_objects:
+                        for obj in detection_result.detected_objects:
+                            detection_data = {
+                                "frame_number": frame_number,
+                                "class_name": str(obj.class_name),
+                                "confidence": float(obj.confidence),
+                                "bbox": [int(x) for x in obj.bbox[:4]],  # Convert to list of ints
+                                "center_point": [float(x) for x in obj.center_point],
+                                "area": float(obj.area),
+                                "frame_timestamp": float(obj.frame_timestamp),
+                                "detection_model": str(obj.detection_model),
+                                "annotated_minio_path": annotated_minio_path  # Link to annotated frame
+                            }
+                            # Apply numpy type conversion
+                            detection_data = convert_numpy_types(detection_data)
+                            detection_results.append(detection_data)
+            
+            # Store annotated keyframes info in MongoDB metadata
+            if annotated_keyframes_info:
+                self.video_repo.update_metadata(video_id, {
+                    "annotated_keyframes_info": annotated_keyframes_info,
+                    "annotated_keyframes_count": len(annotated_keyframes_info)
+                })
+                logger.info(f"✅ Stored {len(annotated_keyframes_info)} annotated keyframes metadata")
+            
+            logger.info(f"✅ Object detection completed: {len(detection_results)} detections")
+            return detection_results
+            
+        except Exception as e:
+            logger.error(f"Object detection failed: {e}")
+            import traceback
+            logger.debug(traceback.format_exc())
+            return []
+    
+    def _create_object_events_from_detections(self, detection_results: List[Dict]) -> List[Dict]:
+        """Convert object detections into aggregated schema-compliant events"""
+        events = []
+        
+        try:
+            # Group detections by class and temporal proximity
+            detection_groups = self._group_detections_by_class_and_time(detection_results)
+            
+            for class_name, detections in detection_groups.items():
+                if not detections:
+                    continue
+                
+                # Create event from detection group
+                start_time_secs = min(d['frame_timestamp'] for d in detections)
+                end_time_secs = max(d['frame_timestamp'] for d in detections)
+                avg_confidence = sum(d['confidence'] for d in detections) / len(detections)
+                
+                # Calculate importance score based on threat level and confidence
+                threat_multiplier = {'fire': 3.0, 'gun': 3.0, 'knife': 2.0, 'smoke': 1.5}.get(class_name, 1.0)
+                importance_score = avg_confidence * threat_multiplier
+                
+                # Create schema-compliant event structure
+                event = {
+                    "event_type": f"object_detection_{class_name}",
+                    "start_timestamp": start_time_secs,
+                    "end_timestamp": end_time_secs,
+                    "confidence_score": avg_confidence,
+                    "importance_score": importance_score,
+                    "bounding_boxes": [
+                        {
+                            "x": d['bbox'][0],
+                            "y": d['bbox'][1],
+                            "width": d['bbox'][2] - d['bbox'][0],
+                            "height": d['bbox'][3] - d['bbox'][1],
+                            "confidence": d['confidence'],
+                            "class_name": d['class_name']
+                        }
+                        for d in detections
+                    ],
+                    "detected_object_type": class_name,
+                    "detection_count": len(detections),
+                    "threat_level": self._calculate_threat_level(class_name, avg_confidence)
+                }
+                
+                events.append(event)
+            
+            return events
+            
+        except Exception as e:
+            logger.error(f"Failed to create object events: {e}")
+            return []
+    
+    def _calculate_threat_level(self, class_name: str, confidence: float) -> str:
+        """Calculate threat level based on object class and confidence"""
+        if class_name in ['fire', 'gun'] and confidence > 0.7:
+            return 'critical'
+        elif class_name in ['fire', 'gun', 'knife'] and confidence > 0.5:
+            return 'high'
+        elif class_name in ['smoke', 'knife']:
+            return 'medium'
+        else:
+            return 'low'
+    
+    def _group_detections_by_class_and_time(self, detections: List[Dict], time_window: float = 5.0) -> Dict[str, List[Dict]]:
+        """Group detections by object class and temporal proximity"""
+        grouped = {}
+        
+        # Sort detections by timestamp
+        sorted_detections = sorted(detections, key=lambda x: x['frame_timestamp'])
+        
+        for detection in sorted_detections:
+            class_name = detection['class_name']
+            
+            if class_name not in grouped:
+                grouped[class_name] = []
+            
+            grouped[class_name].append(detection)
+        
+        return grouped
+    
+    def _generate_compressed_video(self, video_path: str, video_id: str) -> Optional[str]:
+        """Generate compressed version of video and upload to MinIO"""
+        try:
+            # Use compression service to compress and store video
+            result = self.compression_service.compress_and_store(video_path, video_id)
+            
+            if result and result.get('success'):
+                compression_info = {
+                    'original_size_bytes': result['original_size'],
+                    'compressed_size_bytes': result['compressed_size'],
+                    'compression_ratio': result['compression_ratio'],
+                    'output_resolution': result['output_resolution'],
+                    'local_path': result.get('local_path'),  # Store local path for fallback
+                    'minio_path': result.get('minio_path')  # Store MinIO path
+                }
+                
+                # Update video metadata with compression info (including local path)
+                self.video_repo.update_metadata(video_id, {
+                    'compression_info': compression_info,
+                    'minio_compressed_path': result.get('minio_path')  # Also store at top level for easy access
+                })
+                
+                logger.info(f"✅ Stored compression info with local path: {result.get('local_path')}")
+                return result['minio_path']
+            else:
+                logger.error("Video compression failed")
+                return None
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to generate compressed video: {e}")
+            return None
+    
+    def _cleanup_temp_files(self, video_path: str, keyframes: List):
+        """Clean up temporary files after processing"""
+        try:
+            # Remove uploaded video file
+            if os.path.exists(video_path):
+                os.remove(video_path)
+            
+            # Remove temporary keyframe files
+            for keyframe in keyframes:
+                frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+                
+                # Get frame path depending on structure
+                frame_path = (
+                    frame_data.frame_path if hasattr(frame_data, 'frame_path')
+                    else getattr(frame_data, 'path', None)
+                )
+                
+                if frame_path and os.path.exists(frame_path):
+                    os.remove(frame_path)
+            
+            logger.info("✅ Temporary files cleaned up")
+            
+        except Exception as e:
+            logger.error(f"⚠️ Failed to cleanup temp files: {e}")
+    
+    def get_video_status(self, video_id: str) -> Dict:
+        """Get processing status for a video"""
+        video = self.video_repo.get_video_by_id(video_id)
+
+        if not video:
+            return {"error": "Video not found"}
+
+        meta_data = video.get("meta_data", {})
+
+        status_data = {
+            "video_id": video_id,
+            "status": meta_data.get("processing_status", "unknown"),
+            "filename": meta_data.get("filename"),
+            "upload_date": video.get("upload_date"),
+            "duration": video.get("duration_secs"),
+            "fps": video.get("fps"),
+            "file_size_bytes": video.get("file_size_bytes"),
+            "resolution": meta_data.get("resolution"),
+            "keyframe_count": meta_data.get("keyframe_count", 0),
+            "detection_count": meta_data.get("detection_count", 0),
+            "event_count": meta_data.get("event_count", 0),
+            "processing_progress": meta_data.get("processing_progress", 0),
+            "processing_message": meta_data.get("processing_message", "")
+        }
+
+        # Add presigned URLs for accessing content
+        try:
+            # Original video URL
+            minio_original_path = meta_data.get("minio_original_path")
+            if minio_original_path:
+                status_data["original_video_url"] = self.video_repo.get_video_presigned_url(minio_original_path)
+
+            # Compressed video URL (if available)
+            minio_compressed_path = meta_data.get("minio_compressed_path")
+            if minio_compressed_path:
+                # Always use the API endpoint which will handle MinIO/local fallback
+                status_data["compressed_video_url"] = f"/api/video/compressed/{video_id}"
+                # Also try to get presigned URL as alternative
+                try:
+                    presigned_url = self.compression_service.get_compressed_video_presigned_url(video_id)
+                    if presigned_url:
+                        status_data["compressed_video_presigned_url"] = presigned_url
+                except:
+                    pass
+            else:
+                # Check if compression was completed but path not set
+                if meta_data.get("processing_status") == "completed":
+                    # Try to construct path and use API endpoint
+                    status_data["compressed_video_url"] = f"/api/video/compressed/{video_id}"
+
+            # Keyframes URLs (if available)
+            if meta_data.get("keyframe_count", 0) > 0:
+                try:
+                    keyframes_urls = self.keyframe_repo.get_video_keyframes_presigned_urls(video_id)
+                    # If no URLs from MinIO, try to get from MongoDB metadata
+                    if not keyframes_urls and meta_data.get("keyframe_info"):
+                        # Generate URLs from stored metadata
+                        keyframes_urls = []
+                        for kf_info in meta_data.get("keyframe_info", []):
+                            minio_path = kf_info.get("minio_path")
+                            if minio_path:
+                                presigned_url = self.keyframe_repo.get_keyframe_presigned_url(minio_path)
+                                # Also provide API endpoint URL
+                                api_url = f"/api/minio/image/{self.keyframe_repo.bucket}/{minio_path}"
+                                if presigned_url:
+                                    keyframes_urls.append({
+                                        'frame_number': kf_info.get("frame_number", 0),
+                                        'timestamp': kf_info.get("timestamp", 0.0),
+                                        'minio_path': minio_path,
+                                        'presigned_url': presigned_url,
+                                        'url': api_url,  # Use API endpoint for better reliability
+                                        'api_url': api_url,
+                                        'filename': minio_path.split('/')[-1]
+                                    })
+                    status_data["keyframes_urls"] = keyframes_urls
+                except Exception as e:
+                    logger.warning(f"Failed to get keyframes URLs: {e}")
+                    status_data["keyframes_urls"] = []
+
+        except Exception as e:
+            logger.warning(f"Failed to generate presigned URLs for video {video_id}: {e}")
+
+        return status_data
+    
+    def get_video_keyframes(self, video_id: str, filter_detections: bool = False, limit: int = None) -> Dict:
+        """Get keyframes for a video with optional filtering and presigned URLs"""
+        try:
+            # Get video record to check if it exists
+            video = self.video_repo.get_video_by_id(video_id)
+            if not video:
+                return {"error": "Video not found"}
+
+            # Get keyframes with presigned URLs from keyframe repository
+            keyframes_urls = self.keyframe_repo.get_video_keyframes_presigned_urls(video_id)
+            
+            # Fallback: If no keyframes from MinIO, try to get from MongoDB metadata
+            if not keyframes_urls:
+                meta_data = video.get("meta_data", {})
+                keyframe_info = meta_data.get("keyframe_info", [])
+                if keyframe_info:
+                    logger.info(f"Using MongoDB metadata for keyframes: {len(keyframe_info)} keyframes")
+                    for kf_info in keyframe_info:
+                        minio_path = kf_info.get("minio_path")
+                        if minio_path:
+                            try:
+                                presigned_url = self.keyframe_repo.get_keyframe_presigned_url(minio_path)
+                                if presigned_url:
+                                    keyframes_urls.append({
+                                        'frame_number': kf_info.get("frame_number", 0),
+                                        'timestamp': kf_info.get("timestamp", 0.0),
+                                        'minio_path': minio_path,
+                                        'presigned_url': presigned_url,
+                                        'url': presigned_url,
+                                        'filename': minio_path.split('/')[-1]
+                                    })
+                            except Exception as e:
+                                logger.warning(f"Failed to generate presigned URL for {minio_path}: {e}")
+            
+            # Get events to determine which keyframes have detections
+            events = self.event_repo.get_events_by_video_id(video_id)
+            detection_events = [e for e in events if e.get("event_type", "").startswith("object_detection_")]
+            
+            # Create a map of timestamps that have detections
+            detection_timestamps = set()
+            for event in detection_events:
+                start_ms = event.get("start_timestamp_ms", 0)
+                end_ms = event.get("end_timestamp_ms", 0)
+                # Convert milliseconds to seconds and create range
+                start_sec = start_ms / 1000.0
+                end_sec = end_ms / 1000.0
+                # Add timestamps in 1-second intervals
+                for t in range(int(start_sec), int(end_sec) + 1):
+                    detection_timestamps.add(t)
+
+            # Get annotated keyframes info from metadata
+            meta_data = video.get("meta_data", {})
+            annotated_keyframes_info = meta_data.get("annotated_keyframes_info", [])
+            annotated_lookup = {kf.get("frame_number"): kf for kf in annotated_keyframes_info}
+            
+            # Get faces for this video to check which keyframes have faces
+            faces_data = self.get_video_faces(video_id)
+            faces = faces_data.get("faces", [])
+            
+            # Create a map of frame_numbers and timestamps that have faces
+            frames_with_faces = set()
+            timestamps_with_faces = set()
+            for face in faces:
+                face_frame = face.get('frame_number', 0)
+                face_timestamp = face.get('timestamp', 0)
+                if face_frame:
+                    frames_with_faces.add(face_frame)
+                if face_timestamp:
+                    timestamps_with_faces.add(face_timestamp)
+            
+            # Enhance keyframes with detection info and annotated URLs
+            enhanced_keyframes = []
+            for kf in keyframes_urls:
+                timestamp_sec = kf.get('timestamp', 0)
+                frame_number = kf.get('frame_number', 0)
+                
+                # Check if this timestamp has detections (within 1 second tolerance)
+                has_detections = any(abs(timestamp_sec - dt) < 1.0 for dt in detection_timestamps)
+                
+                # Check if this keyframe has faces (by frame_number or timestamp)
+                has_faces = (
+                    frame_number in frames_with_faces or
+                    any(abs(timestamp_sec - ft) < 0.5 for ft in timestamps_with_faces)
+                )
+                
+                enhanced_kf = {
+                    **kf,
+                    'has_detections': has_detections,
+                    'has_faces': has_faces,  # Add face detection flag
+                    'url': kf.get('presigned_url'),  # Add url alias for compatibility
+                }
+                
+                # Add annotated frame info if available
+                if frame_number in annotated_lookup:
+                    annotated_info = annotated_lookup[frame_number]
+                    # Generate presigned URL for annotated frame
+                    try:
+                        annotated_presigned_url = self.keyframe_repo.get_keyframe_presigned_url(
+                            annotated_info.get("minio_path")
+                        )
+                        if annotated_presigned_url:
+                            enhanced_kf['annotated_url'] = annotated_presigned_url
+                            enhanced_kf['annotated_presigned_url'] = annotated_presigned_url
+                            enhanced_kf['detection_count'] = annotated_info.get("detection_count", 0)
+                            enhanced_kf['objects'] = annotated_info.get("objects", [])
+                            enhanced_kf['confidence_avg'] = annotated_info.get("confidence_avg", 0.0)
+                            enhanced_kf['has_detections'] = True  # Override if annotated frame exists
+                    except Exception as e:
+                        logger.warning(f"Failed to get presigned URL for annotated keyframe: {e}")
+                
+                # If this keyframe has faces, prioritize showing "Face Detected" over object names
+                if has_faces:
+                    # Count faces for this keyframe
+                    face_count = sum(
+                        1 for face in faces 
+                        if (face.get('frame_number') == frame_number or 
+                            abs(face.get('timestamp', 0) - timestamp_sec) < 0.5)
+                    )
+                    enhanced_kf['face_count'] = face_count
+                    # Add "Face Detected" to objects list if not already present, and prioritize it
+                    if enhanced_kf.get('objects'):
+                        # Check if "Face" is already in objects
+                        has_face_in_objects = any('face' in str(obj).lower() for obj in enhanced_kf['objects'])
+                        if not has_face_in_objects:
+                            # Add "Face Detected" at the beginning
+                            enhanced_kf['objects'] = ['Face Detected'] + enhanced_kf['objects']
+                        else:
+                            # Move "Face Detected" to front, remove duplicates
+                            face_objects = [obj for obj in enhanced_kf['objects'] if 'face' in str(obj).lower()]
+                            other_objects = [obj for obj in enhanced_kf['objects'] if 'face' not in str(obj).lower()]
+                            enhanced_kf['objects'] = ['Face Detected'] + other_objects
+                    else:
+                        enhanced_kf['objects'] = ['Face Detected']
+                    # Update detection count to include faces
+                    enhanced_kf['detection_count'] = enhanced_kf.get('detection_count', 0) + face_count
+                
+                enhanced_keyframes.append(enhanced_kf)
+
+            # Apply filtering if requested
+            if filter_detections:
+                filtered_keyframes = [kf for kf in enhanced_keyframes if kf.get('has_detections', False)]
+            else:
+                filtered_keyframes = enhanced_keyframes
+
+            # Apply limit if specified
+            if limit and limit > 0:
+                filtered_keyframes = filtered_keyframes[:limit]
+
+            # Get video metadata for additional context
+            meta_data = video.get("meta_data", {})
+            keyframe_count = meta_data.get("keyframe_count", 0)
+
+            return {
+                "video_id": video_id,
+                "keyframes": filtered_keyframes,
+                "total_keyframes": len(filtered_keyframes),
+                "filter_applied": filter_detections,
+                "limit_applied": limit if limit and limit > 0 else None,
+                "keyframe_count": keyframe_count
+            }
+
+        except Exception as e:
+            logger.error(f"Failed to get keyframes for video {video_id}: {e}")
+            return {"error": str(e)}
+
+    def get_video_events(self, video_id: str, event_type: str = None) -> Dict:
+        """Get events for a video"""
+        events = self.event_repo.get_events_by_video_id(video_id)
+
+        # Filter by event type if specified
+        if event_type:
+            events = [e for e in events if e.get("event_type") == event_type]
+
+        return {
+            "video_id": video_id,
+            "events": events,
+            "total_events": len(events)
+        }
+    
+    def get_video_detections(self, video_id: str, class_filter: str = None) -> Dict:
+        """Get object detections for a video from events"""
+        try:
+            # Get all events for this video
+            events = self.event_repo.get_events_by_video_id(video_id)
+            
+            # Filter events that are object detection events
+            detection_events = [e for e in events if e.get("event_type", "").startswith("object_detection_")]
+            
+            # Apply class filter if specified
+            if class_filter:
+                detection_events = [e for e in detection_events if e.get("event_type") == f"object_detection_{class_filter}"]
+            
+            # Extract detections from bounding_boxes
+            detections = []
+            for event in detection_events:
+                bboxes = event.get("bounding_boxes", {})
+                
+                # Handle different bounding_boxes structures
+                event_detections = []
+                if isinstance(bboxes, dict):
+                    event_detections = bboxes.get("detections", [])
+                elif isinstance(bboxes, list):
+                    # If bounding_boxes is a list directly
+                    event_detections = bboxes
+                
+                # Also check if detections are stored directly in event
+                if not event_detections:
+                    event_detections = event.get("detections", [])
+                
+                for det in event_detections:
+                    # Handle both dict and list formats
+                    if isinstance(det, dict):
+                        detection = {
+                            "class_name": det.get("class", det.get("class_name", "unknown")),
+                            "confidence": float(det.get("confidence", 0.0)),
+                            "bbox": det.get("bbox", [0, 0, 0, 0]),
+                            "timestamp": float(det.get("timestamp", event.get("start_timestamp_ms", 0) / 1000.0)),
+                            "event_id": event.get("event_id"),
+                            "model": det.get("model", "unknown")
+                        }
+                        detections.append(detection)
+                    elif isinstance(det, list) and len(det) >= 4:
+                        # Handle list format [x, y, width, height, class, confidence]
+                        detection = {
+                            "class_name": str(det[4]) if len(det) > 4 else "unknown",
+                            "confidence": float(det[5]) if len(det) > 5 else 0.0,
+                            "bbox": [int(det[0]), int(det[1]), int(det[0] + det[2]), int(det[1] + det[3])] if len(det) >= 4 else [0, 0, 0, 0],
+                            "timestamp": float(event.get("start_timestamp_ms", 0) / 1000.0),
+                            "event_id": event.get("event_id"),
+                            "model": "unknown"
+                        }
+                        detections.append(detection)
+                
+                # Also extract from event_type if no detections found
+                if not detections and event.get("event_type"):
+                    event_type = event.get("event_type", "")
+                    if event_type.startswith("object_detection_"):
+                        class_name = event_type.replace("object_detection_", "")
+                        detection = {
+                            "class_name": class_name,
+                            "confidence": float(event.get("confidence_score", 0.0)),
+                            "bbox": [0, 0, 0, 0],  # No bbox info available
+                            "timestamp": float(event.get("start_timestamp_ms", 0) / 1000.0),
+                            "event_id": event.get("event_id"),
+                            "model": "unknown"
+                        }
+                        detections.append(detection)
+            
+            return {
+                "video_id": video_id,
+                "detections": detections,
+                "total_detections": len(detections)
+            }
+            
+        except Exception as e:
+            logger.error(f"Failed to get detections for video {video_id}: {e}")
+            return {
+                "video_id": video_id,
+                "detections": [],
+                "total_detections": 0,
+                "error": str(e)
+            }
+    
+    def get_video_faces(self, video_id: str) -> Dict:
+        """Get detected faces for a video (through events)"""
+        try:
+            # Get all events for this video
+            events = self.event_repo.get_events_by_video_id(video_id)
+            event_ids = [e.get('event_id') for e in events if e.get('event_id')]
+            
+            if not event_ids:
+                return {
+                    "video_id": video_id,
+                    "faces": [],
+                    "total_faces": 0
+                }
+            
+            # Query detected_faces collection for faces associated with these events
+            faces_collection = self.db_manager.db.detected_faces
+            faces = list(faces_collection.find({"event_id": {"$in": event_ids}}))
+            
+            # Convert ObjectIds to strings
+            from database.models import convert_objectid_to_string
+            faces = [convert_objectid_to_string(face) for face in faces]
+            
+            return {
+                "video_id": video_id,
+                "faces": faces,
+                "total_faces": len(faces)
+            }
+            
+        except Exception as e:
+            logger.error(f"Failed to get faces for video {video_id}: {e}")
+            return {
+                "video_id": video_id,
+                "faces": [],
+                "total_faces": 0,
+                "error": str(e)
+            }
+    
+    def process_video_complete(self, video_path: str, video_id: str, user_id: str = None, 
+                             upload_to_minio: bool = True, enable_compression: bool = True,
+                             enable_object_detection: bool = True, enable_behavior_analysis: bool = True,
+                             enable_event_aggregation: bool = True,
+                             enable_deduplication: bool = True) -> Dict:
+        """
+        Complete video processing pipeline with all features
+        
+        Args:
+            video_path: Path to the video file
+            video_id: Unique identifier for the video
+            user_id: User identifier
+            upload_to_minio: Whether to upload to MinIO storage
+            enable_compression: Whether to compress the video
+            enable_object_detection: Whether to run object detection
+            enable_event_aggregation: Whether to aggregate events
+            enable_deduplication: Whether to deduplicate similar events
+            
+        Returns:
+            Dict with processing results and statistics
+        """
+        logger.info(f"🔥 Starting complete pipeline processing for {video_id}")
+        
+        start_time = time.time()
+        results = {
+            "video_id": video_id,
+            "status": "processing",
+            "minio_uploaded": False,
+            "processing_stats": {}
+        }
+        
+        try:
+            # Step 1: Create video record with metadata
+            logger.info("📝 Creating video record...")
+            video_metadata = self._extract_video_metadata(video_path)
+            
+            # Create schema-compliant video record
+            video_record = {
+                "video_id": video_id,
+                "user_id": user_id or "system",
+                "file_path": f"videos/{video_id}.mp4",
+                "fps": video_metadata.get("fps", 30.0),
+                "duration_secs": int(video_metadata.get("duration", 0)),
+                "file_size_bytes": video_metadata.get("file_size", 0),
+                "codec": "h264",  # default codec
+                "meta_data": {
+                    "processing_status": "processing",
+                    "filename": os.path.basename(video_path),
+                    "resolution": video_metadata.get("resolution"),
+                    "frame_count": video_metadata.get("frame_count")
+                }
+            }
+            
+            video_doc_id = self.video_repo.create_video_record(video_record)
+            logger.info(f"✅ Created video record: {video_id}")
+            
+            # Step 2: Upload to MinIO (if enabled and available)
+            minio_uploaded = False
+            if upload_to_minio:
+                try:
+                    logger.info("☁️ Uploading to MinIO...")
+                    minio_path = self.video_repo.upload_video_to_minio(video_path, video_id)
+                    minio_uploaded = True
+                    self.video_repo.update_metadata(video_id, {"minio_original_path": minio_path})
+                    logger.info(f"✅ Video uploaded to MinIO: {minio_path}")
+                except Exception as e:
+                    logger.warning(f"⚠️ MinIO upload failed (graceful fallback): {e}")
+            
+            results["minio_uploaded"] = minio_uploaded
+            
+            # Step 3: Process keyframes with object detection
+            logger.info("🔑 Processing keyframes...")
+            keyframes = self.video_processor.extract_keyframes(video_path)
+            logger.info(f"✅ Extracted {len(keyframes)} keyframes")
+            
+            # Run object detection on keyframes if enabled
+            detection_results = []
+            if enable_object_detection and self.object_detector:
+                logger.info("🎯 Running object detection...")
+                for i, keyframe in enumerate(keyframes):
+                    # Handle KeyframeResult objects correctly
+                    frame_path = keyframe.frame_data.frame_path if hasattr(keyframe, 'frame_data') else None
+                    timestamp = keyframe.frame_data.timestamp if hasattr(keyframe, 'frame_data') else 0
+                    
+                    if frame_path and os.path.exists(frame_path):
+                        result = self.object_detector.detect_objects_in_frame(frame_path, timestamp)
+                        detections = []
+                        
+                        if result and result.detected_objects:
+                            for obj in result.detected_objects:
+                                detection_dict = {
+                                    "class_name": str(obj.class_name),
+                                    "confidence": float(obj.confidence),
+                                    "bbox": [int(x) for x in obj.bbox[:4]],
+                                    "frame_timestamp": float(timestamp),
+                                    "annotated_path": getattr(obj, 'annotated_path', None)
+                                }
+                                # Apply numpy type conversion
+                                detection_dict = convert_numpy_types(detection_dict)
+                                detections.append(detection_dict)
+                            
+                        # Store detections in keyframe (add as attribute)
+                        keyframe.object_detections = detections
+                        detection_results.extend(detections)
+                        
+                        # Log fire detections specifically
+                        fire_detections = [d for d in detections if d.get('class_name') == 'fire']
+                        if fire_detections:
+                            logger.info(f"🔥 Fire detected at {timestamp:.1f}s (confidence: {fire_detections[0].get('confidence', 0):.2f})")
+                
+                logger.info(f"✅ Found {len(detection_results)} object detections")
+            
+            # Step 3b: Run behavior analysis on keyframes if enabled
+            behavior_results = []
+            behavior_events = []
+            if enable_behavior_analysis and self.behavior_analyzer:
+                logger.info("🔍 Running behavior analysis...")
+                # Pass video_path for 3D-ResNet models (fighting, road_accident) which need 16-frame clips
+                behavior_results, behavior_events = self.behavior_analyzer.process_keyframes_with_behavior_analysis(keyframes, video_path=video_path)
+                
+                # Store behavior detections in keyframes
+                for i, keyframe in enumerate(keyframes):
+                    frame_path = keyframe.frame_data.frame_path if hasattr(keyframe, 'frame_data') else None
+                    timestamp = keyframe.frame_data.timestamp if hasattr(keyframe, 'frame_data') else 0
+                    
+                    # Find behavior detections for this frame
+                    frame_behaviors = [r for r in behavior_results if r.frame_path == frame_path and abs(r.timestamp - timestamp) < 0.1]
+                    if frame_behaviors:
+                        behavior_detections = []
+                        for behavior in frame_behaviors:
+                            behavior_dict = {
+                                "behavior_type": behavior.behavior_detected,
+                                "confidence": float(behavior.confidence),
+                                "frame_timestamp": float(behavior.timestamp),
+                                "model_used": behavior.model_used
+                            }
+                            behavior_dict = convert_numpy_types(behavior_dict)
+                            behavior_detections.append(behavior_dict)
+                        
+                        keyframe.behavior_detections = behavior_detections
+                
+                logger.info(f"✅ Found {len(behavior_results)} behavior detections, {len(behavior_events)} behavior events")
+            
+            # Step 4: Event aggregation and deduplication
+            events = []
+            if enable_event_aggregation:
+                logger.info("📅 Performing event aggregation...")
+                
+                # Group detections by type and time proximity
+                detection_events = self._aggregate_detection_events(keyframes, video_id)
+                events.extend(detection_events)
+                
+                # Add behavior events
+                if behavior_events:
+                    for behavior_event in behavior_events:
+                        event_dict = {
+                            "event_type": f"behavior_{behavior_event.behavior_type}",
+                            "start_timestamp": behavior_event.start_timestamp,
+                            "end_timestamp": behavior_event.end_timestamp,
+                            "confidence_score": float(behavior_event.confidence),
+                            "keyframes": behavior_event.keyframes,
+                            "importance_score": float(behavior_event.importance_score),
+                            "description": f"{behavior_event.behavior_type.capitalize()} detected",
+                            "detection_data": {
+                                "model_used": behavior_event.model_used,
+                                "frame_indices": behavior_event.frame_indices
+                            }
+                        }
+                        event_dict = convert_numpy_types(event_dict)
+                        events.append(event_dict)
+                
+                if enable_deduplication:
+                    logger.info("🔄 Deduplicating similar events...")
+                    events = self._deduplicate_events(events)
+                
+                # Store events in database using EventRepository
+                logger.info(f"💾 Saving {len(events)} events to database...")
+                for event in events:
+                    try:
+                        # EventRepository.save_event expects event dict with proper structure
+                        # It will handle timestamp conversion and field mapping
+                        event['video_id'] = video_id  # Add video_id to event data
+                        self.event_repo.save_event(event)
+                    except Exception as e:
+                        logger.error(f"Failed to save event: {e}")
+                
+                logger.info(f"✅ Stored {len(events)} events in database")
+            
+            # Step 5: Create annotated video with bounding boxes (if detections exist)
+            annotated_video_path = None
+            annotated_minio_path = None
+            if enable_object_detection and detection_results and self.object_detector:
+                try:
+                    logger.info("🎨 Creating annotated video with bounding boxes...")
+                    
+                    # Convert keyframes to detection results format for annotation
+                    detection_result_objects = []
+                    for keyframe in keyframes:
+                        if hasattr(keyframe, 'object_detections') and keyframe.object_detections:
+                            # Create ObjectDetectionResult-like object
+                            from object_detection import ObjectDetectionResult, DetectedObject
+                            from core.video_processing import FrameData
+                            
+                            detected_objects = []
+                            for det in keyframe.object_detections:
+                                detected_objects.append(DetectedObject(
+                                    class_name=det['class_name'],
+                                    confidence=det['confidence'],
+                                    bbox=det['bbox']
+                                ))
+                            
+                            if detected_objects:
+                                frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else None
+                                frame_path = frame_data.frame_path if frame_data else None
+                                timestamp = frame_data.timestamp if frame_data else 0
+                                
+                                if frame_path:
+                                    detection_result_objects.append(ObjectDetectionResult(
+                                        frame_path=frame_path,
+                                        timestamp=timestamp,
+                                        detected_objects=detected_objects,
+                                        total_detections=len(detected_objects)
+                                    ))
+                    
+                    if detection_result_objects:
+                        # Create annotated video
+                        annotated_video_path = f"video_processing_outputs/annotated/{video_id}_annotated.mp4"
+                        os.makedirs(os.path.dirname(annotated_video_path), exist_ok=True)
+                        
+                        annotated_path = self.object_detector.create_annotated_video(
+                            video_path,
+                            detection_result_objects,
+                            annotated_video_path
+                        )
+                        
+                        if annotated_path and os.path.exists(annotated_path):
+                            annotated_video_path = annotated_path
+                            
+                            # Upload annotated video to MinIO
+                            try:
+                                annotated_minio_path = f"annotated/{video_id}/video_annotated.mp4"
+                                with open(annotated_video_path, 'rb') as file_data:
+                                    file_info = os.stat(annotated_video_path)
+                                    self.video_repo.minio.put_object(
+                                        self.video_repo.video_bucket,
+                                        annotated_minio_path,
+                                        file_data,
+                                        length=file_info.st_size,
+                                        content_type='video/mp4'
+                                    )
+                                logger.info(f"✅ Uploaded annotated video to MinIO: {annotated_minio_path}")
+                                
+                                # Update metadata with annotated video path
+                                self.video_repo.update_metadata(video_id, {
+                                    "minio_annotated_path": annotated_minio_path,
+                                    "annotated_video_path": annotated_video_path
+                                })
+                            except Exception as e:
+                                logger.warning(f"⚠️ Failed to upload annotated video to MinIO: {e}")
+                            
+                            logger.info(f"✅ Annotated video created: {annotated_video_path}")
+                        else:
+                            logger.warning("⚠️ Annotated video creation returned no path")
+                    else:
+                        logger.info("ℹ️ No detections found, skipping annotated video creation")
+                        
+                except Exception as e:
+                    logger.warning(f"⚠️ Annotated video creation failed: {e}")
+                    import traceback
+                    logger.error(traceback.format_exc())
+            
+            # Step 6: Video compression (if enabled)
+            compression_info = {}
+            if enable_compression:
+                try:
+                    logger.info("📦 Compressing video...")
+                    from video_compression import OptimizedVideoCompressor
+                    compressor = OptimizedVideoCompressor()
+                    
+                    compressed_path = f"video_processing_outputs/compressed/{video_id}_compressed.mp4"
+                    os.makedirs(os.path.dirname(compressed_path), exist_ok=True)
+                    
+                    compression_result = compressor.compress_video(video_path, compressed_path)
+                    
+                    if compression_result.get('success'):
+                        original_size = os.path.getsize(video_path) / (1024 * 1024)  # MB
+                        compressed_size = os.path.getsize(compressed_path) / (1024 * 1024)  # MB
+                        compression_ratio = (1 - compressed_size / original_size) * 100 if original_size > 0 else 0
+                        
+                        compression_info = {
+                            "original_size_mb": round(original_size, 2),
+                            "compressed_size_mb": round(compressed_size, 2),
+                            "compression_ratio": round(compression_ratio, 1),
+                            "compressed_path": compressed_path
+                        }
+                        
+                        self.video_repo.update_metadata(video_id, {"minio_compressed_path": compressed_path})
+                        logger.info(f"✅ Video compressed: {compression_ratio:.1f}% reduction")
+                    
+                except Exception as e:
+                    logger.warning(f"⚠️ Video compression failed: {e}")
+            
+            # Step 7: Update final status
+            processing_time = time.time() - start_time
+            
+            final_meta_data = {
+                "processing_status": "completed",
+                "keyframe_count": len(keyframes),
+                "detection_count": len(detection_results),
+                "behavior_detection_count": len(behavior_results),
+                "behavior_event_count": len(behavior_events),
+                "event_count": len(events),
+                "processing_time_seconds": round(processing_time, 2),
+                "processed_at": datetime.utcnow().isoformat(),
+                "compressed_video_info": compression_info,
+                "annotated_video_available": bool(annotated_minio_path),
+                "annotated_video_path": annotated_minio_path
+            }
+            
+            self.video_repo.update_processing_status(video_id, "completed")
+            self.video_repo.update_metadata(video_id, final_meta_data)
+            
+            results.update({
+                "status": "completed",
+                "processing_stats": final_meta_data,
+                "keyframes_extracted": len(keyframes),
+                "objects_detected": len(detection_results),
+                "behaviors_detected": len(behavior_results),
+                "behavior_events": len(behavior_events),
+                "events_created": len(events),
+                "processing_time": processing_time
+            })
+            
+            logger.info(f"🎉 Complete pipeline processing finished for {video_id} in {processing_time:.1f}s")
+            return results
+            
+        except Exception as e:
+            logger.error(f"❌ Processing failed for {video_id}: {e}")
+            
+            # Update status to failed
+            try:
+                self.video_repo.update_processing_status(video_id, "failed")
+                self.video_repo.update_metadata(video_id, {
+                    "error_message": str(e),
+                    "failed_at": datetime.utcnow().isoformat()
+                })
+            except:
+                pass
+                
+            results.update({
+                "status": "failed",
+                "error": str(e)
+            })
+            
+            raise e
+    
+    def _aggregate_detection_events(self, keyframes, video_id):
+        """Aggregate object detections into schema-compliant events"""
+        events = []
+        
+        # Group keyframes with detections by detection type
+        detection_groups = {}
+        for keyframe in keyframes:
+            # Handle KeyframeResult objects
+            detections = getattr(keyframe, 'object_detections', [])
+            frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+            
+            for detection in detections:
+                class_name = detection.get('class_name', 'unknown')
+                if class_name not in detection_groups:
+                    detection_groups[class_name] = []
+                detection_groups[class_name].append({
+                    'keyframe': keyframe,
+                    'detection': detection,
+                    'timestamp': frame_data.timestamp if hasattr(frame_data, 'timestamp') else 0
+                })
+        
+        # Create events for each detection type
+        for class_name, detections in detection_groups.items():
+            if not detections:
+                continue
+                
+            # Sort by timestamp
+            detections.sort(key=lambda x: x['timestamp'])
+            
+            # Group nearby detections into events (within 3 seconds)
+            current_event = None
+            
+            for det_info in detections:
+                timestamp = det_info['timestamp']
+                confidence = det_info['detection'].get('confidence', 0)
+                bbox = det_info['detection'].get('bbox', [0, 0, 0, 0])
+                
+                # Check if this detection belongs to current event
+                if current_event and timestamp - current_event['end_timestamp'] <= 3.0:
+                    # Extend current event
+                    current_event['end_timestamp'] = timestamp
+                    current_event['confidence_score'] = max(current_event['confidence_score'], confidence)
+                    current_event['bounding_boxes'].append({
+                        "x": int(bbox[0]),
+                        "y": int(bbox[1]),
+                        "width": int(bbox[2] - bbox[0]),
+                        "height": int(bbox[3] - bbox[1]),
+                        "confidence": float(confidence),
+                        "class_name": class_name
+                    })
+                else:
+                    # Start new event
+                    if current_event:
+                        events.append(current_event)
+                    
+                    threat_level = self._calculate_threat_level(class_name, confidence)
+                    importance_score = 0.9 if class_name == 'fire' else 0.7 if class_name in ['knife', 'gun'] else 0.5
+                    
+                    current_event = {
+                        'event_type': f'object_detection_{class_name}',
+                        'start_timestamp': timestamp,
+                        'end_timestamp': timestamp,
+                        'confidence_score': confidence,
+                        'importance_score': importance_score,
+                        'threat_level': threat_level,
+                        'bounding_boxes': [{
+                            "x": int(bbox[0]),
+                            "y": int(bbox[1]),
+                            "width": int(bbox[2] - bbox[0]),
+                            "height": int(bbox[3] - bbox[1]),
+                            "confidence": float(confidence),
+                            "class_name": class_name
+                        }],
+                        'detected_object_type': class_name
+                    }
+            
+            # Add final event
+            if current_event:
+                events.append(current_event)
+        
+        return events
+    
+    def _deduplicate_events(self, events):
+        """Remove duplicate or very similar events and mark them as false positives"""
+        if len(events) <= 1:
+            return events
+        
+        # Sort events by start timestamp
+        events.sort(key=lambda x: x.get('start_timestamp', 0))
+        
+        deduplicated = []
+        
+        for event in events:
+            # Check if this event is too similar to recent events
+            is_duplicate = False
+            
+            for recent_event in deduplicated[-3:]:  # Check last 3 events
+                # Same type and overlapping time window
+                if (event.get('event_type') == recent_event.get('event_type') and
+                    abs(event.get('start_timestamp', 0) - recent_event.get('end_timestamp', 0)) <= 5.0):
+                    
+                    # Check if same object types detected
+                    event_objects = {event.get('detected_object_type')}
+                    recent_objects = {recent_event.get('detected_object_type')}
+                    
+                    if event_objects & recent_objects:  # Common objects
+                        is_duplicate = True
+                        
+                        # Merge into the existing event (extend time window, keep highest confidence)
+                        recent_event['end_timestamp'] = max(
+                            recent_event.get('end_timestamp', 0),
+                            event.get('end_timestamp', 0)
+                        )
+                        recent_event['confidence_score'] = max(
+                            recent_event.get('confidence_score', 0),
+                            event.get('confidence_score', 0)
+                        )
+                        recent_event['bounding_boxes'].extend(event.get('bounding_boxes', []))
+                        break
+            
+            if not is_duplicate:
+                deduplicated.append(event)
+        
+        logger.info(f"🔄 Deduplication: {len(events)} → {len(deduplicated)} events")
+        return deduplicated
\ No newline at end of file
diff --git a/detectifai_events.py b/detectifai_events.py
new file mode 100644
index 0000000000000000000000000000000000000000..5915ba8ccd661f5f166008e60ad8e03c9320e86c
--- /dev/null
+++ b/detectifai_events.py
@@ -0,0 +1,577 @@
+"""
+DetectifAI Security Event System
+
+This module defines the specific security event types and processing logic
+according to DetectifAI's scope: assault/fighting, weapons, fire, jumping over wall,
+road accidents, and suspicious person re-occurrence.
+"""
+
+import os
+import time
+import logging
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass, asdict
+from enum import Enum
+import json
+
+logger = logging.getLogger(__name__)
+
+class DetectifAIEventType(Enum):
+    """DetectifAI-specific security event types"""
+    FIRE_DETECTION = "fire_detection"
+    WEAPON_DETECTION = "weapon_detection"  # knife, gun
+    PHYSICAL_ASSAULT = "physical_assault"  # fighting, violence
+    WALL_JUMPING = "wall_jumping"         # perimeter breach
+    ROAD_ACCIDENT = "road_accident"       # vehicle collision
+    SUSPICIOUS_PERSON_REOCCURRENCE = "suspicious_person_reoccurrence"
+    GENERAL_MOTION = "general_motion"     # fallback for unclassified motion
+
+class ThreatLevel(Enum):
+    """Security threat levels for DetectifAI events"""
+    CRITICAL = "critical"  # Immediate response required (fire, weapons)
+    HIGH = "high"         # Urgent attention needed (assault, suspicious person)
+    MEDIUM = "medium"     # Monitor closely (wall jumping, accidents)
+    LOW = "low"          # General awareness (motion)
+
+@dataclass
+class DetectifAIEvent:
+    """Enhanced event structure specific to DetectifAI security requirements"""
+    event_id: str
+    event_type: DetectifAIEventType
+    threat_level: ThreatLevel
+    start_timestamp: float
+    end_timestamp: float
+    duration: float
+    confidence: float
+    
+    # Location and detection details
+    keyframes: List[str]
+    detection_details: Dict[str, Any]  # Specific to event type
+    
+    # Security-specific fields
+    requires_immediate_response: bool
+    investigation_priority: int  # 1-10 scale
+    
+    # Person tracking (for applicable events)
+    persons_detected: List[Dict] = None
+    is_person_reoccurrence: bool = False
+    
+    # Context and description
+    description: str = ""
+    security_notes: str = ""
+    
+    # Metadata
+    processing_timestamp: float = None
+    detection_model_used: str = ""
+
+@dataclass
+class DetectifAICanonicalEvent:
+    """Canonical representation of aggregated DetectifAI security events"""
+    canonical_id: str
+    event_type: DetectifAIEventType
+    threat_level: ThreatLevel
+    
+    # Temporal information
+    start_time: float
+    end_time: float
+    total_duration: float
+    
+    # Aggregation details
+    aggregated_events_count: int
+    aggregated_event_ids: List[str]
+    representative_frame: str
+    all_keyframes: List[str]
+    
+    # Security assessment
+    max_confidence: float
+    average_confidence: float
+    investigation_priority: int
+    requires_immediate_response: bool
+    
+    # Detection summary
+    total_detections: int
+    detection_summary: Dict[str, Any]
+    
+    # Person tracking summary
+    unique_persons_count: int = 0
+    suspicious_persons: List[Dict] = None
+    person_reoccurrences: int = 0
+    
+    # Investigation details
+    description: str = ""
+    security_assessment: str = ""
+    recommended_actions: List[str] = None
+
+class DetectifAIEventProcessor:
+    """Process and classify events according to DetectifAI security requirements"""
+    
+    def __init__(self, config):
+        self.config = config
+        
+        # DetectifAI-specific thresholds
+        self.threat_thresholds = {
+            DetectifAIEventType.FIRE_DETECTION: {
+                ThreatLevel.CRITICAL: 0.7,
+                ThreatLevel.HIGH: 0.5,
+                ThreatLevel.MEDIUM: 0.3,
+                ThreatLevel.LOW: 0.1
+            },
+            DetectifAIEventType.WEAPON_DETECTION: {
+                ThreatLevel.CRITICAL: 0.8,
+                ThreatLevel.HIGH: 0.6,
+                ThreatLevel.MEDIUM: 0.4,
+                ThreatLevel.LOW: 0.2
+            },
+            DetectifAIEventType.PHYSICAL_ASSAULT: {
+                ThreatLevel.CRITICAL: 0.9,
+                ThreatLevel.HIGH: 0.7,
+                ThreatLevel.MEDIUM: 0.5,
+                ThreatLevel.LOW: 0.3
+            },
+            DetectifAIEventType.WALL_JUMPING: {
+                ThreatLevel.HIGH: 0.8,
+                ThreatLevel.MEDIUM: 0.6,
+                ThreatLevel.LOW: 0.4
+            },
+            DetectifAIEventType.ROAD_ACCIDENT: {
+                ThreatLevel.HIGH: 0.8,
+                ThreatLevel.MEDIUM: 0.6,
+                ThreatLevel.LOW: 0.4
+            },
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: {
+                ThreatLevel.HIGH: 0.9,
+                ThreatLevel.MEDIUM: 0.7,
+                ThreatLevel.LOW: 0.5
+            }
+        }
+        
+        # Processing statistics
+        self.processing_stats = {
+            'motion_events_processed': 0,
+            'object_events_processed': 0,
+            'detectifai_events_created': 0,
+            'facial_recognition_events': 0,
+            'placeholder_events_created': 0
+        }
+        
+        logger.info("DetectifAI Event Processor initialized")
+    
+    def process_security_events(self, keyframes: List, motion_events: List, object_events: List = None) -> List[DetectifAIEvent]:
+        """Main method to process all security events and convert to DetectifAI format"""
+        logger.info("🔍 Processing security events for DetectifAI system")
+        
+        detectifai_events = []
+        
+        # Convert object detection events
+        if object_events:
+            object_detectifai_events = self.convert_object_detection_to_detectifai_events(object_events)
+            detectifai_events.extend(object_detectifai_events)
+            self.processing_stats['object_events_processed'] = len(object_events)
+        
+        # Create placeholder events from motion
+        placeholder_events = self.create_placeholder_events(keyframes, motion_events)
+        detectifai_events.extend(placeholder_events)
+        self.processing_stats['motion_events_processed'] = len(motion_events)
+        self.processing_stats['placeholder_events_created'] = len(placeholder_events)
+        
+        # Update final count
+        self.processing_stats['detectifai_events_created'] = len(detectifai_events)
+        
+        logger.info(f"✅ DetectifAI processing complete: {len(detectifai_events)} security events created")
+        return detectifai_events
+    
+    def get_processing_stats(self) -> Dict[str, Any]:
+        """Get processing statistics"""
+        return self.processing_stats.copy()
+    
+    def convert_object_detection_to_detectifai_events(self, object_events: List[Dict]) -> List[DetectifAIEvent]:
+        """Convert object detection events to DetectifAI security events"""
+        detectifai_events = []
+        
+        for obj_event in object_events:
+            # Determine DetectifAI event type
+            object_class = obj_event.get('object_class', '').lower()
+            
+            if object_class == 'fire':
+                event_type = DetectifAIEventType.FIRE_DETECTION
+            elif object_class in ['knife', 'gun']:
+                event_type = DetectifAIEventType.WEAPON_DETECTION
+            else:
+                event_type = DetectifAIEventType.GENERAL_MOTION
+            
+            # Assess threat level
+            confidence = obj_event.get('confidence', 0.0)
+            threat_level = self._assess_threat_level(event_type, confidence)
+            
+            # Create DetectifAI event
+            detectifai_event = DetectifAIEvent(
+                event_id=f"detectifai_{obj_event['event_id']}",
+                event_type=event_type,
+                threat_level=threat_level,
+                start_timestamp=obj_event['start_timestamp'],
+                end_timestamp=obj_event['end_timestamp'],
+                duration=obj_event['end_timestamp'] - obj_event['start_timestamp'],
+                confidence=confidence,
+                keyframes=obj_event.get('keyframes', []),
+                detection_details={
+                    'object_class': object_class,
+                    'detection_count': obj_event.get('detection_count', 0),
+                    'max_confidence': obj_event.get('max_confidence', confidence),
+                    'detection_data': obj_event.get('detection_details', [])
+                },
+                requires_immediate_response=threat_level in [ThreatLevel.CRITICAL, ThreatLevel.HIGH],
+                investigation_priority=self._calculate_investigation_priority(event_type, threat_level, confidence),
+                description=self._generate_detectifai_description(event_type, object_class, confidence),
+                processing_timestamp=time.time(),
+                detection_model_used=f"object_detection_{object_class}"
+            )
+            
+            detectifai_events.append(detectifai_event)
+        
+        logger.info(f"Converted {len(object_events)} object events to {len(detectifai_events)} DetectifAI events")
+        return detectifai_events
+    
+    def create_placeholder_events(self, keyframes: List, motion_events: List) -> List[DetectifAIEvent]:
+        """Create placeholder events for unimplemented DetectifAI modules"""
+        placeholder_events = []
+        
+        # Convert high-motion events to potential security events (placeholders)
+        for motion_event in motion_events:
+            if hasattr(motion_event, 'motion_intensity') and motion_event.motion_intensity > 0.015:
+                # High motion could be assault/fighting (placeholder)
+                placeholder_event = DetectifAIEvent(
+                    event_id=f"placeholder_assault_{motion_event.event_id}",
+                    event_type=DetectifAIEventType.PHYSICAL_ASSAULT,
+                    threat_level=ThreatLevel.MEDIUM,  # Conservative for placeholder
+                    start_timestamp=motion_event.start_timestamp,
+                    end_timestamp=motion_event.end_timestamp,
+                    duration=motion_event.end_timestamp - motion_event.start_timestamp,
+                    confidence=0.5,  # Placeholder confidence
+                    keyframes=motion_event.keyframes,
+                    detection_details={
+                        'placeholder': True,
+                        'motion_intensity': motion_event.motion_intensity,
+                        'original_event_type': motion_event.event_type
+                    },
+                    requires_immediate_response=False,
+                    investigation_priority=5,
+                    description=f"Potential physical assault detected (placeholder) - High motion intensity: {motion_event.motion_intensity:.3f}",
+                    security_notes="PLACEHOLDER: Requires fight detection module implementation",
+                    processing_timestamp=time.time(),
+                    detection_model_used="placeholder_fight_detection"
+                )
+                placeholder_events.append(placeholder_event)
+        
+        # Add other placeholder event types based on analysis
+        # Wall jumping, road accidents, etc. can be added here based on scene analysis
+        
+        logger.info(f"Created {len(placeholder_events)} placeholder DetectifAI events")
+        return placeholder_events
+    
+    def _assess_threat_level(self, event_type: DetectifAIEventType, confidence: float) -> ThreatLevel:
+        """Assess threat level based on event type and confidence"""
+        if event_type not in self.threat_thresholds:
+            return ThreatLevel.LOW
+        
+        thresholds = self.threat_thresholds[event_type]
+        
+        for threat_level in [ThreatLevel.CRITICAL, ThreatLevel.HIGH, ThreatLevel.MEDIUM, ThreatLevel.LOW]:
+            if threat_level in thresholds and confidence >= thresholds[threat_level]:
+                return threat_level
+        
+        return ThreatLevel.LOW
+    
+    def _calculate_investigation_priority(self, event_type: DetectifAIEventType, 
+                                        threat_level: ThreatLevel, confidence: float) -> int:
+        """Calculate investigation priority (1-10 scale)"""
+        base_priorities = {
+            DetectifAIEventType.FIRE_DETECTION: 9,
+            DetectifAIEventType.WEAPON_DETECTION: 8,
+            DetectifAIEventType.PHYSICAL_ASSAULT: 7,
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: 6,
+            DetectifAIEventType.WALL_JUMPING: 5,
+            DetectifAIEventType.ROAD_ACCIDENT: 4,
+            DetectifAIEventType.GENERAL_MOTION: 2
+        }
+        
+        base_priority = base_priorities.get(event_type, 2)
+        
+        # Adjust based on threat level
+        threat_multipliers = {
+            ThreatLevel.CRITICAL: 1.0,
+            ThreatLevel.HIGH: 0.9,
+            ThreatLevel.MEDIUM: 0.7,
+            ThreatLevel.LOW: 0.5
+        }
+        
+        adjusted_priority = int(base_priority * threat_multipliers[threat_level])
+        
+        # Boost for high confidence
+        if confidence > 0.8:
+            adjusted_priority = min(10, adjusted_priority + 1)
+        
+        return max(1, min(10, adjusted_priority))
+    
+    def _generate_detectifai_description(self, event_type: DetectifAIEventType, 
+                                       object_class: str, confidence: float) -> str:
+        """Generate DetectifAI-specific event descriptions"""
+        descriptions = {
+            DetectifAIEventType.FIRE_DETECTION: f"🔥 Fire detected with {confidence:.1%} confidence - Immediate evacuation may be required",
+            DetectifAIEventType.WEAPON_DETECTION: f"⚠️ Weapon ({object_class}) detected with {confidence:.1%} confidence - Security alert triggered",
+            DetectifAIEventType.PHYSICAL_ASSAULT: f"👊 Physical assault detected with {confidence:.1%} confidence - Intervention may be needed",
+            DetectifAIEventType.WALL_JUMPING: f"🧗 Perimeter breach (wall jumping) detected with {confidence:.1%} confidence",
+            DetectifAIEventType.ROAD_ACCIDENT: f"🚗 Road accident detected with {confidence:.1%} confidence - Emergency services may be needed",
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: f"👤 Suspicious person re-occurrence detected with {confidence:.1%} confidence",
+            DetectifAIEventType.GENERAL_MOTION: f"📊 General motion activity detected"
+        }
+        
+        return descriptions.get(event_type, f"Security event detected: {event_type.value}")
+
+class DetectifAIEventAggregator:
+    """Simplified event aggregation focused on DetectifAI security requirements"""
+    
+    def __init__(self, config):
+        self.config = config
+        self.temporal_window = getattr(config, 'detectifai_temporal_window', 10.0)  # seconds
+        
+    def aggregate_detectifai_events(self, events: List[DetectifAIEvent]) -> List[DetectifAICanonicalEvent]:
+        """Aggregate DetectifAI events into canonical security events"""
+        logger.info(f"Aggregating {len(events)} DetectifAI events")
+        
+        if not events:
+            return []
+        
+        # Group events by type for focused aggregation
+        events_by_type = {}
+        for event in events:
+            if event.event_type not in events_by_type:
+                events_by_type[event.event_type] = []
+            events_by_type[event.event_type].append(event)
+        
+        canonical_events = []
+        canonical_id_counter = 1
+        
+        # Process each event type separately with DetectifAI-specific logic
+        for event_type, type_events in events_by_type.items():
+            type_canonical = self._aggregate_by_detectifai_type(
+                event_type, type_events, canonical_id_counter
+            )
+            canonical_events.extend(type_canonical)
+            canonical_id_counter += len(type_canonical)
+        
+        # Sort by investigation priority
+        canonical_events.sort(key=lambda e: e.investigation_priority, reverse=True)
+        
+        logger.info(f"Created {len(canonical_events)} canonical DetectifAI events")
+        return canonical_events
+    
+    def _aggregate_by_detectifai_type(self, event_type: DetectifAIEventType, 
+                                    events: List[DetectifAIEvent], 
+                                    start_id: int) -> List[DetectifAICanonicalEvent]:
+        """Aggregate events of specific DetectifAI type"""
+        if not events:
+            return []
+        
+        # Sort events by timestamp
+        events.sort(key=lambda e: e.start_timestamp)
+        
+        # Group events within temporal window
+        clusters = []
+        current_cluster = [events[0]]
+        
+        for i in range(1, len(events)):
+            current_event = events[i]
+            last_in_cluster = current_cluster[-1]
+            
+            # Check if events should be clustered
+            time_gap = current_event.start_timestamp - last_in_cluster.end_timestamp
+            
+            if time_gap <= self.temporal_window:
+                current_cluster.append(current_event)
+            else:
+                clusters.append(current_cluster)
+                current_cluster = [current_event]
+        
+        # Don't forget the last cluster
+        if current_cluster:
+            clusters.append(current_cluster)
+        
+        # Create canonical events from clusters
+        canonical_events = []
+        for i, cluster in enumerate(clusters):
+            canonical_event = self._create_detectifai_canonical_event(
+                event_type, cluster, start_id + i
+            )
+            canonical_events.append(canonical_event)
+        
+        return canonical_events
+    
+    def _create_detectifai_canonical_event(self, event_type: DetectifAIEventType, 
+                                         cluster: List[DetectifAIEvent], 
+                                         canonical_id: int) -> DetectifAICanonicalEvent:
+        """Create canonical event from DetectifAI event cluster"""
+        # Find highest priority event as representative
+        representative = max(cluster, key=lambda e: e.investigation_priority)
+        
+        # Aggregate temporal information
+        start_time = min(e.start_timestamp for e in cluster)
+        end_time = max(e.end_timestamp for e in cluster)
+        total_duration = end_time - start_time
+        
+        # Aggregate confidence and priority
+        max_confidence = max(e.confidence for e in cluster)
+        avg_confidence = sum(e.confidence for e in cluster) / len(cluster)
+        max_priority = max(e.investigation_priority for e in cluster)
+        
+        # Collect all keyframes
+        all_keyframes = []
+        for event in cluster:
+            all_keyframes.extend(event.keyframes)
+        unique_keyframes = list(set(all_keyframes))
+        
+        # Aggregate detection information
+        total_detections = sum(
+            event.detection_details.get('detection_count', 1) for event in cluster
+        )
+        
+        # Determine if immediate response required
+        requires_immediate_response = any(e.requires_immediate_response for e in cluster)
+        
+        # Get highest threat level
+        threat_levels = [ThreatLevel.LOW, ThreatLevel.MEDIUM, ThreatLevel.HIGH, ThreatLevel.CRITICAL]
+        max_threat_level = max((e.threat_level for e in cluster), key=lambda t: threat_levels.index(t))
+        
+        # Create detection summary
+        detection_summary = {
+            'total_events_aggregated': len(cluster),
+            'detection_methods': list(set(e.detection_model_used for e in cluster)),
+            'confidence_range': {
+                'min': min(e.confidence for e in cluster),
+                'max': max_confidence,
+                'average': avg_confidence
+            },
+            'detection_details': [e.detection_details for e in cluster]
+        }
+        
+        # Generate description and assessment
+        description = self._generate_canonical_description(event_type, cluster, max_confidence)
+        security_assessment = self._generate_security_assessment(event_type, max_threat_level, len(cluster))
+        recommended_actions = self._get_recommended_actions(event_type, max_threat_level)
+        
+        canonical_event = DetectifAICanonicalEvent(
+            canonical_id=f"detectifai_canonical_{canonical_id:04d}",
+            event_type=event_type,
+            threat_level=max_threat_level,
+            start_time=start_time,
+            end_time=end_time,
+            total_duration=total_duration,
+            aggregated_events_count=len(cluster),
+            aggregated_event_ids=[e.event_id for e in cluster],
+            representative_frame=representative.keyframes[0] if representative.keyframes else "",
+            all_keyframes=unique_keyframes,
+            max_confidence=max_confidence,
+            average_confidence=avg_confidence,
+            investigation_priority=max_priority,
+            requires_immediate_response=requires_immediate_response,
+            total_detections=total_detections,
+            detection_summary=detection_summary,
+            description=description,
+            security_assessment=security_assessment,
+            recommended_actions=recommended_actions
+        )
+        
+        return canonical_event
+    
+    def _generate_canonical_description(self, event_type: DetectifAIEventType, 
+                                      cluster: List[DetectifAIEvent], confidence: float) -> str:
+        """Generate description for canonical DetectifAI event"""
+        event_count = len(cluster)
+        duration = max(e.end_timestamp for e in cluster) - min(e.start_timestamp for e in cluster)
+        
+        base_descriptions = {
+            DetectifAIEventType.FIRE_DETECTION: f"Fire incident - {event_count} detections over {duration:.1f}s",
+            DetectifAIEventType.WEAPON_DETECTION: f"Weapon threat - {event_count} detections over {duration:.1f}s",
+            DetectifAIEventType.PHYSICAL_ASSAULT: f"Physical assault incident - {event_count} events over {duration:.1f}s",
+            DetectifAIEventType.WALL_JUMPING: f"Perimeter breach - {event_count} wall jumping events over {duration:.1f}s",
+            DetectifAIEventType.ROAD_ACCIDENT: f"Road accident - {event_count} incidents over {duration:.1f}s",
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: f"Suspicious person alert - {event_count} re-occurrences",
+            DetectifAIEventType.GENERAL_MOTION: f"Motion activity - {event_count} events over {duration:.1f}s"
+        }
+        
+        return base_descriptions.get(event_type, f"Security event: {event_type.value}")
+    
+    def _generate_security_assessment(self, event_type: DetectifAIEventType, 
+                                    threat_level: ThreatLevel, event_count: int) -> str:
+        """Generate security assessment for canonical event"""
+        assessments = {
+            (DetectifAIEventType.FIRE_DETECTION, ThreatLevel.CRITICAL): "CRITICAL: Immediate evacuation and fire response required",
+            (DetectifAIEventType.WEAPON_DETECTION, ThreatLevel.CRITICAL): "CRITICAL: Armed threat present - immediate security intervention",
+            (DetectifAIEventType.PHYSICAL_ASSAULT, ThreatLevel.HIGH): "HIGH: Violence in progress - security response needed",
+            (DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE, ThreatLevel.HIGH): "HIGH: Known suspicious individual returned - monitor closely"
+        }
+        
+        specific_assessment = assessments.get((event_type, threat_level))
+        if specific_assessment:
+            return specific_assessment
+        
+        # Generic assessment based on threat level
+        generic_assessments = {
+            ThreatLevel.CRITICAL: f"CRITICAL threat level - immediate response required",
+            ThreatLevel.HIGH: f"HIGH priority security event - urgent attention needed", 
+            ThreatLevel.MEDIUM: f"MEDIUM priority - monitor and assess situation",
+            ThreatLevel.LOW: f"LOW priority - general awareness sufficient"
+        }
+        
+        return generic_assessments.get(threat_level, "Security event requires assessment")
+    
+    def _get_recommended_actions(self, event_type: DetectifAIEventType, 
+                               threat_level: ThreatLevel) -> List[str]:
+        """Get recommended actions for DetectifAI event types"""
+        actions_map = {
+            DetectifAIEventType.FIRE_DETECTION: [
+                "Verify fire location and extent",
+                "Initiate evacuation procedures if confirmed",
+                "Contact fire department",
+                "Monitor spread and safety of personnel"
+            ],
+            DetectifAIEventType.WEAPON_DETECTION: [
+                "Verify weapon type and threat level",
+                "Alert security personnel immediately",
+                "Consider lockdown procedures",
+                "Contact law enforcement if confirmed threat"
+            ],
+            DetectifAIEventType.PHYSICAL_ASSAULT: [
+                "Assess severity of altercation",
+                "Dispatch security to location",
+                "Consider medical assistance",
+                "Document incident for investigation"
+            ],
+            DetectifAIEventType.WALL_JUMPING: [
+                "Verify perimeter breach",
+                "Check intruder location and intent",
+                "Review security footage",
+                "Assess security protocol effectiveness"
+            ],
+            DetectifAIEventType.ROAD_ACCIDENT: [
+                "Assess severity of accident",
+                "Check for injuries",
+                "Contact emergency services if needed",
+                "Manage traffic flow around incident"
+            ],
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: [
+                "Review person's previous incidents",
+                "Monitor current activities closely",
+                "Alert security personnel",
+                "Consider preventive measures"
+            ]
+        }
+        
+        base_actions = actions_map.get(event_type, ["Monitor situation", "Assess threat level", "Take appropriate action"])
+        
+        # Add threat-level specific actions
+        if threat_level == ThreatLevel.CRITICAL:
+            base_actions.insert(0, "IMMEDIATE ACTION REQUIRED")
+        elif threat_level == ThreatLevel.HIGH:
+            base_actions.insert(0, "URGENT: Prioritize response")
+        
+        return base_actions
\ No newline at end of file
diff --git a/event_aggregation.py b/event_aggregation.py
new file mode 100644
index 0000000000000000000000000000000000000000..3de425f8847caa77b409945ae0ae84458bdb4d04
--- /dev/null
+++ b/event_aggregation.py
@@ -0,0 +1,819 @@
+"""
+Event Aggregation and Deduplication Module
+
+This module handles:
+- Event detection and clustering
+- Temporal aggregation of related events
+- Duplicate frame removal using similarity detection
+- Canonical event generation
+"""
+
+import numpy as np
+import cv2
+import json
+import os
+from typing import List, Dict, Tuple, Set, Any, Optional
+from dataclasses import dataclass, asdict
+import imagehash
+from PIL import Image
+from collections import defaultdict
+import logging
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class Event:
+    """Represents a detected event"""
+    event_id: str
+    start_timestamp: float
+    end_timestamp: float
+    event_type: str
+    confidence: float
+    keyframes: List[str]  # Frame paths
+    importance_score: float
+    motion_intensity: float
+    description: str = ""
+    # Object detection specific fields
+    object_class: str = ""           # For object-based events (fire, knife, gun)
+    detection_count: int = 0         # Number of detections in this event
+    max_confidence: float = 0.0      # Highest confidence detection
+    is_object_event: bool = False    # Flag to identify object-based events
+    detection_details: List = None   # Raw detection data
+
+@dataclass
+class CanonicalEvent:
+    """Canonical representation of aggregated events"""
+    canonical_id: str
+    event_type: str
+    representative_frame: str
+    start_time: float
+    end_time: float
+    duration: float
+    confidence: float
+    frame_count: int
+    aggregated_events: List[str]  # Event IDs
+    description: str
+    similarity_cluster: int
+    # Enhanced object detection fields
+    contains_objects: bool = False           # Whether this canonical event has object detections
+    detected_object_classes: List[str] = None  # List of detected object classes
+    object_detection_summary: Dict = None      # Summary of object detections
+    threat_level: str = "low"                # Threat assessment: low, medium, high, critical
+
+class SimilarityCalculator:
+    """Calculate similarity between frames using multiple methods"""
+    
+    def __init__(self, similarity_threshold: float = 0.85):
+        self.similarity_threshold = similarity_threshold
+        
+    def calculate_histogram_similarity(self, frame1: np.ndarray, frame2: np.ndarray) -> float:
+        """Calculate histogram-based similarity"""
+        try:
+            # Convert to HSV for better color comparison
+            hsv1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2HSV)
+            hsv2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2HSV)
+            
+            # Calculate histograms
+            hist1 = cv2.calcHist([hsv1], [0, 1, 2], None, [50, 60, 60], [0, 180, 0, 256, 0, 256])
+            hist2 = cv2.calcHist([hsv2], [0, 1, 2], None, [50, 60, 60], [0, 180, 0, 256, 0, 256])
+            
+            # Calculate correlation
+            correlation = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)
+            return max(0.0, correlation)
+            
+        except Exception as e:
+            logger.error(f"Histogram similarity calculation failed: {e}")
+            return 0.0
+    
+    def calculate_perceptual_hash_similarity(self, frame1_path: str, frame2_path: str) -> float:
+        """Calculate perceptual hash similarity"""
+        try:
+            # Load images with PIL for imagehash
+            img1 = Image.open(frame1_path)
+            img2 = Image.open(frame2_path)
+            
+            # Calculate perceptual hashes
+            hash1 = imagehash.phash(img1)
+            hash2 = imagehash.phash(img2)
+            
+            # Calculate similarity (lower hash difference = higher similarity)
+            hash_diff = hash1 - hash2
+            similarity = 1.0 - (hash_diff / 64.0)  # Normalize to 0-1
+            
+            return max(0.0, similarity)
+            
+        except Exception as e:
+            logger.error(f"Perceptual hash similarity calculation failed: {e}")
+            return 0.0
+    
+    def calculate_structural_similarity(self, frame1: np.ndarray, frame2: np.ndarray) -> float:
+        """Calculate structural similarity using template matching"""
+        try:
+            # Convert to grayscale
+            gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
+            gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
+            
+            # Resize to same dimensions if needed
+            if gray1.shape != gray2.shape:
+                h, w = min(gray1.shape[0], gray2.shape[0]), min(gray1.shape[1], gray2.shape[1])
+                gray1 = cv2.resize(gray1, (w, h))
+                gray2 = cv2.resize(gray2, (w, h))
+            
+            # Calculate normalized cross-correlation
+            result = cv2.matchTemplate(gray1, gray2, cv2.TM_CCOEFF_NORMED)
+            similarity = result[0, 0]
+            
+            return max(0.0, similarity)
+            
+        except Exception as e:
+            logger.error(f"Structural similarity calculation failed: {e}")
+            return 0.0
+    
+    def calculate_combined_similarity(self, frame1_path: str, frame2_path: str) -> float:
+        """Calculate combined similarity score using multiple methods"""
+        try:
+            # Load frames
+            frame1 = cv2.imread(frame1_path)
+            frame2 = cv2.imread(frame2_path)
+            
+            if frame1 is None or frame2 is None:
+                return 0.0
+            
+            # Calculate different similarity metrics
+            hist_sim = self.calculate_histogram_similarity(frame1, frame2)
+            hash_sim = self.calculate_perceptual_hash_similarity(frame1_path, frame2_path)
+            struct_sim = self.calculate_structural_similarity(frame1, frame2)
+            
+            # Weighted combination
+            combined_similarity = (
+                hist_sim * 0.4 +      # Histogram similarity
+                hash_sim * 0.4 +      # Perceptual hash similarity
+                struct_sim * 0.2      # Structural similarity
+            )
+            
+            return min(1.0, combined_similarity)
+            
+        except Exception as e:
+            logger.error(f"Combined similarity calculation failed: {e}")
+            return 0.0
+
+class EventDetector:
+    """Detect events from keyframes"""
+    
+    def __init__(self, config):
+        self.config = config
+        self.event_types = {
+            'high_motion': {'motion_threshold': config.motion_threshold * 2},
+            'burst_activity': {'requires_burst': True},
+            'scene_change': {'change_threshold': config.scene_change_threshold},
+            'quality_peak': {'quality_threshold': config.base_quality_threshold * 1.5}
+        }
+    
+    def detect_events(self, keyframes: List) -> List[Event]:
+        """Detect events from keyframes"""
+        logger.info(f"Detecting events from {len(keyframes)} keyframes")
+        
+        events = []
+        event_id_counter = 1
+        
+        # Temporal clustering for event detection
+        clusters = self._create_temporal_clusters(keyframes)
+        
+        for cluster in clusters:
+            if len(cluster) == 0:
+                continue
+                
+            # Analyze cluster for event types
+            cluster_events = self._analyze_cluster_for_events(cluster, event_id_counter)
+            events.extend(cluster_events)
+            event_id_counter += len(cluster_events)
+        
+        logger.info(f"Detected {len(events)} events")
+        return events
+    
+    def _create_temporal_clusters(self, keyframes: List) -> List[List]:
+        """Create temporal clusters of keyframes"""
+        if not keyframes:
+            return []
+        
+        # Sort keyframes by timestamp
+        sorted_keyframes = sorted(keyframes, key=lambda x: x.frame_data.timestamp)
+        
+        clusters = []
+        current_cluster = [sorted_keyframes[0]]
+        
+        for i in range(1, len(sorted_keyframes)):
+            current_kf = sorted_keyframes[i]
+            last_kf = current_cluster[-1]
+            
+            time_gap = current_kf.frame_data.timestamp - last_kf.frame_data.timestamp
+            
+            # If gap is within clustering window, add to current cluster
+            if time_gap <= self.config.temporal_clustering_window:
+                current_cluster.append(current_kf)
+            else:
+                # Start new cluster
+                if len(current_cluster) > 0:
+                    clusters.append(current_cluster)
+                current_cluster = [current_kf]
+        
+        # Don't forget the last cluster
+        if len(current_cluster) > 0:
+            clusters.append(current_cluster)
+        
+        return clusters
+    
+    def _analyze_cluster_for_events(self, cluster: List, start_event_id: int) -> List[Event]:
+        """Analyze a temporal cluster for different event types"""
+        events = []
+        
+        if not cluster:
+            return events
+        
+        # Calculate cluster metrics
+        motion_scores = [kf.frame_data.motion_score for kf in cluster]
+        quality_scores = [kf.frame_data.quality_score for kf in cluster]
+        burst_frames = [kf for kf in cluster if kf.frame_data.burst_active]
+        
+        start_time = min(kf.frame_data.timestamp for kf in cluster)
+        end_time = max(kf.frame_data.timestamp for kf in cluster)
+        
+        max_motion = max(motion_scores) if motion_scores else 0
+        avg_motion = sum(motion_scores) / len(motion_scores) if motion_scores else 0
+        max_quality = max(quality_scores) if quality_scores else 0
+        
+        # High motion event
+        if max_motion > self.config.motion_threshold * 2:
+            event = Event(
+                event_id=f"event_{start_event_id:04d}",
+                start_timestamp=start_time,
+                end_timestamp=end_time,
+                event_type="high_motion",
+                confidence=min(max_motion * 2, 1.0),
+                keyframes=[kf.frame_data.frame_path for kf in cluster],
+                importance_score=max_motion + (avg_motion * 0.5),
+                motion_intensity=max_motion,
+                description=f"High motion event with peak intensity {max_motion:.3f}"
+            )
+            events.append(event)
+            start_event_id += 1
+        
+        # Burst activity event
+        if len(burst_frames) >= 2:
+            event = Event(
+                event_id=f"event_{start_event_id:04d}",
+                start_timestamp=start_time,
+                end_timestamp=end_time,
+                event_type="burst_activity",
+                confidence=min(len(burst_frames) / len(cluster), 1.0),
+                keyframes=[kf.frame_data.frame_path for kf in burst_frames],
+                importance_score=len(burst_frames) * 0.3 + avg_motion,
+                motion_intensity=max_motion,
+                description=f"Burst activity with {len(burst_frames)} active frames"
+            )
+            events.append(event)
+            start_event_id += 1
+        
+        # Quality peak event
+        if max_quality > self.config.base_quality_threshold * 1.5:
+            high_quality_frames = [kf for kf in cluster if kf.frame_data.quality_score > self.config.base_quality_threshold * 1.3]
+            if high_quality_frames:
+                event = Event(
+                    event_id=f"event_{start_event_id:04d}",
+                    start_timestamp=start_time,
+                    end_timestamp=end_time,
+                    event_type="quality_peak",
+                    confidence=max_quality,
+                    keyframes=[kf.frame_data.frame_path for kf in high_quality_frames],
+                    importance_score=max_quality + (len(high_quality_frames) * 0.1),
+                    motion_intensity=max_motion,
+                    description=f"High quality event with peak score {max_quality:.3f}"
+                )
+                events.append(event)
+        
+        return events
+    
+    def convert_object_events_to_standard_format(self, object_events: List[Dict]) -> List[Event]:
+        """Convert object events from object detection module to standard Event format"""
+        standard_events = []
+        
+        for obj_event in object_events:
+            # Convert object event dict to Event dataclass
+            event = Event(
+                event_id=obj_event['event_id'],
+                start_timestamp=obj_event['start_timestamp'],
+                end_timestamp=obj_event['end_timestamp'],
+                event_type=obj_event['event_type'],
+                confidence=obj_event['confidence'],
+                keyframes=obj_event['keyframes'],
+                importance_score=obj_event['importance_score'],
+                motion_intensity=obj_event.get('motion_intensity', 0.0),
+                description=obj_event['description'],
+                # Object-specific fields
+                object_class=obj_event.get('object_class', ''),
+                detection_count=obj_event.get('detection_count', 0),
+                max_confidence=obj_event.get('max_confidence', obj_event['confidence']),
+                is_object_event=True,
+                detection_details=obj_event.get('detection_details', [])
+            )
+            standard_events.append(event)
+        
+        return standard_events
+    
+    def convert_behavior_events_to_standard_format(self, behavior_events: List) -> List[Event]:
+        """Convert behavior events from behavior analysis module to standard Event format"""
+        standard_events = []
+        
+        for behavior_event in behavior_events:
+            # Handle both dataclass and dict formats
+            if hasattr(behavior_event, 'behavior_type'):
+                # Dataclass format (from BehaviorEvent)
+                event = Event(
+                    event_id=behavior_event.event_id,
+                    start_timestamp=behavior_event.start_timestamp,
+                    end_timestamp=behavior_event.end_timestamp,
+                    event_type=f"behavior_{behavior_event.behavior_type}",
+                    confidence=behavior_event.confidence,
+                    keyframes=behavior_event.keyframes,
+                    importance_score=behavior_event.importance_score,
+                    motion_intensity=0.0,  # Behavior events don't have motion intensity
+                    description=f"{behavior_event.behavior_type.capitalize()} detected (confidence: {behavior_event.confidence:.2f})",
+                    # Use object_class field to store behavior type for consistency
+                    object_class=behavior_event.behavior_type,
+                    detection_count=len(behavior_event.frame_indices),
+                    max_confidence=behavior_event.confidence,
+                    is_object_event=False,  # Behavior events are separate from object events
+                    detection_details=[{
+                        'model_used': behavior_event.model_used,
+                        'frame_indices': behavior_event.frame_indices
+                    }]
+                )
+            else:
+                # Dict format (fallback)
+                event = Event(
+                    event_id=behavior_event.get('event_id', f"behavior_{len(standard_events)}"),
+                    start_timestamp=behavior_event.get('start_timestamp', 0.0),
+                    end_timestamp=behavior_event.get('end_timestamp', 0.0),
+                    event_type=f"behavior_{behavior_event.get('behavior_type', 'unknown')}",
+                    confidence=behavior_event.get('confidence', 0.0),
+                    keyframes=behavior_event.get('keyframes', []),
+                    importance_score=behavior_event.get('importance_score', 0.0),
+                    motion_intensity=0.0,
+                    description=behavior_event.get('description', 'Behavior detected'),
+                    object_class=behavior_event.get('behavior_type', ''),
+                    detection_count=len(behavior_event.get('frame_indices', [])),
+                    max_confidence=behavior_event.get('confidence', 0.0),
+                    is_object_event=False,
+                    detection_details=[{
+                        'model_used': behavior_event.get('model_used', 'unknown'),
+                        'frame_indices': behavior_event.get('frame_indices', [])
+                    }]
+                )
+            
+            standard_events.append(event)
+        
+        return standard_events
+    
+    def assess_threat_level(self, event: Event) -> str:
+        """Assess threat level for events, particularly object-based events"""
+        if not event.is_object_event:
+            # For motion events, use motion intensity and burst activity
+            if event.event_type == "high_motion" and event.motion_intensity > 0.015:
+                return "medium"
+            elif event.event_type == "burst_activity":
+                return "medium"
+            else:
+                return "low"
+        
+        # Object-based threat assessment
+        threat_map = {
+            'fire': {
+                'low': 0.3,      # Confidence thresholds
+                'medium': 0.5,
+                'high': 0.7,
+                'critical': 0.85
+            },
+            'gun': {
+                'low': 0.4,
+                'medium': 0.6,
+                'high': 0.8,
+                'critical': 0.9
+            },
+            'knife': {
+                'low': 0.4,
+                'medium': 0.6,
+                'high': 0.75,
+                'critical': 0.85
+            }
+        }
+        
+        obj_class = event.object_class.lower()
+        confidence = event.max_confidence
+        
+        if obj_class in threat_map:
+            thresholds = threat_map[obj_class]
+            if confidence >= thresholds['critical']:
+                return "critical"
+            elif confidence >= thresholds['high']:
+                return "high"
+            elif confidence >= thresholds['medium']:
+                return "medium"
+            else:
+                return "low"
+        
+        return "medium"  # Default for unknown object types
+
+class EventDeduplicationEngine:
+    """Remove duplicate events and create canonical representations"""
+    
+    def __init__(self, config):
+        self.config = config
+        self.similarity_calculator = SimilarityCalculator(config.similarity_threshold)
+    
+    def deduplicate_events(self, events: List[Event]) -> Tuple[List[CanonicalEvent], Dict[str, Any]]:
+        """
+        Deduplicate events and create canonical representations
+        
+        Returns:
+            Tuple of (canonical_events, deduplication_stats)
+        """
+        logger.info(f"Deduplicating {len(events)} events")
+        
+        if not events:
+            return [], {}
+        
+        # Group events by type first
+        events_by_type = defaultdict(list)
+        for event in events:
+            events_by_type[event.event_type].append(event)
+        
+        canonical_events = []
+        dedup_stats = {
+            'original_events': len(events),
+            'canonical_events': 0,
+            'duplicates_removed': 0,
+            'similarity_clusters': 0
+        }
+        
+        canonical_id_counter = 1
+        
+        # Process each event type separately
+        for event_type, type_events in events_by_type.items():
+            type_canonical = self._deduplicate_events_by_type(
+                type_events, event_type, canonical_id_counter
+            )
+            canonical_events.extend(type_canonical)
+            canonical_id_counter += len(type_canonical)
+        
+        # Update stats
+        dedup_stats['canonical_events'] = len(canonical_events)
+        dedup_stats['duplicates_removed'] = dedup_stats['original_events'] - dedup_stats['canonical_events']
+        dedup_stats['similarity_clusters'] = len(canonical_events)
+        
+        logger.info(f"Deduplication complete: {len(canonical_events)} canonical events created")
+        return canonical_events, dedup_stats
+    
+    def _deduplicate_events_by_type(self, events: List[Event], event_type: str, 
+                                  start_canonical_id: int) -> List[CanonicalEvent]:
+        """Deduplicate events of the same type"""
+        if not events:
+            return []
+        
+        # Create similarity matrix
+        similarity_matrix = self._create_similarity_matrix(events)
+        
+        # Cluster similar events
+        clusters = self._cluster_similar_events(events, similarity_matrix)
+        
+        # Create canonical events from clusters
+        canonical_events = []
+        for i, cluster in enumerate(clusters):
+            canonical_event = self._create_canonical_event(
+                cluster, event_type, start_canonical_id + i, i
+            )
+            canonical_events.append(canonical_event)
+        
+        return canonical_events
+    
+    def _create_similarity_matrix(self, events: List[Event]) -> np.ndarray:
+        """Create similarity matrix between events"""
+        n = len(events)
+        similarity_matrix = np.zeros((n, n))
+        
+        for i in range(n):
+            for j in range(i, n):
+                if i == j:
+                    similarity_matrix[i, j] = 1.0
+                else:
+                    # Calculate similarity between representative frames
+                    sim_score = self._calculate_event_similarity(events[i], events[j])
+                    similarity_matrix[i, j] = sim_score
+                    similarity_matrix[j, i] = sim_score
+        
+        return similarity_matrix
+    
+    def _calculate_event_similarity(self, event1: Event, event2: Event) -> float:
+        """Calculate similarity between two events (enhanced for object events)"""
+        try:
+            # Object events similarity
+            if event1.is_object_event and event2.is_object_event:
+                return self._calculate_object_event_similarity(event1, event2)
+            elif event1.is_object_event != event2.is_object_event:
+                # Different event types (object vs motion) - lower similarity
+                return 0.1
+            
+            # Motion events similarity (original logic)
+            # Time overlap similarity
+            time_overlap = self._calculate_time_overlap(event1, event2)
+            
+            # Frame content similarity (use representative frames)
+            frame1 = event1.keyframes[0] if event1.keyframes else None
+            frame2 = event2.keyframes[0] if event2.keyframes else None
+            
+            content_similarity = 0.0
+            if frame1 and frame2 and os.path.exists(frame1) and os.path.exists(frame2):
+                content_similarity = self.similarity_calculator.calculate_combined_similarity(frame1, frame2)
+            
+            # Motion intensity similarity
+            motion_sim = 1.0 - abs(event1.motion_intensity - event2.motion_intensity)
+            
+            # Combined similarity
+            combined_similarity = (
+                time_overlap * 0.3 +
+                content_similarity * 0.5 +
+                motion_sim * 0.2
+            )
+            
+            return combined_similarity
+            
+        except Exception as e:
+            logger.error(f"Event similarity calculation failed: {e}")
+            return 0.0
+    
+    def _calculate_object_event_similarity(self, event1: Event, event2: Event) -> float:
+        """Calculate similarity between two object events"""
+        try:
+            # Object class similarity (must be same class)
+            if event1.object_class != event2.object_class:
+                return 0.0  # Different object types are not similar
+            
+            # Time proximity
+            time_gap = abs(event1.start_timestamp - event2.start_timestamp)
+            time_similarity = max(0.0, 1.0 - (time_gap / self.config.object_event_temporal_window))
+            
+            # Confidence similarity
+            conf_diff = abs(event1.confidence - event2.confidence)
+            conf_similarity = max(0.0, 1.0 - conf_diff)
+            
+            # Detection count similarity
+            count_diff = abs(event1.detection_count - event2.detection_count)
+            count_similarity = max(0.0, 1.0 - (count_diff / max(event1.detection_count, event2.detection_count, 1)))
+            
+            # Frame content similarity
+            frame1 = event1.keyframes[0] if event1.keyframes else None
+            frame2 = event2.keyframes[0] if event2.keyframes else None
+            
+            content_similarity = 0.0
+            if frame1 and frame2 and os.path.exists(frame1) and os.path.exists(frame2):
+                content_similarity = self.similarity_calculator.calculate_combined_similarity(frame1, frame2)
+            
+            # Combined similarity for object events
+            combined_similarity = (
+                time_similarity * 0.4 +      # Time proximity is important
+                content_similarity * 0.3 +    # Visual similarity
+                conf_similarity * 0.2 +       # Confidence similarity
+                count_similarity * 0.1        # Detection count similarity
+            )
+            
+            return combined_similarity
+            
+        except Exception as e:
+            logger.error(f"Object event similarity calculation failed: {e}")
+            return 0.0
+    
+    def _calculate_time_overlap(self, event1: Event, event2: Event) -> float:
+        """Calculate temporal overlap between events"""
+        start1, end1 = event1.start_timestamp, event1.end_timestamp
+        start2, end2 = event2.start_timestamp, event2.end_timestamp
+        
+        # Calculate overlap
+        overlap_start = max(start1, start2)
+        overlap_end = min(end1, end2)
+        
+        if overlap_start >= overlap_end:
+            return 0.0
+        
+        overlap_duration = overlap_end - overlap_start
+        total_duration = max(end1, end2) - min(start1, start2)
+        
+        return overlap_duration / total_duration if total_duration > 0 else 0.0
+    
+    def _cluster_similar_events(self, events: List[Event], similarity_matrix: np.ndarray) -> List[List[Event]]:
+        """Cluster similar events using similarity threshold"""
+        n = len(events)
+        visited = [False] * n
+        clusters = []
+        
+        for i in range(n):
+            if visited[i]:
+                continue
+            
+            # Start new cluster
+            cluster = [events[i]]
+            visited[i] = True
+            
+            # Find similar events
+            for j in range(i + 1, n):
+                if not visited[j] and similarity_matrix[i, j] >= self.config.similarity_threshold:
+                    cluster.append(events[j])
+                    visited[j] = True
+            
+            clusters.append(cluster)
+        
+        return clusters
+    
+    def _create_canonical_event(self, cluster: List[Event], event_type: str, 
+                              canonical_id: int, cluster_id: int) -> CanonicalEvent:
+        """Create canonical event from cluster of similar events"""
+        if not cluster:
+            raise ValueError("Cannot create canonical event from empty cluster")
+        
+        # Find representative event (highest importance score)
+        representative = max(cluster, key=lambda e: e.importance_score)
+        
+        # Aggregate properties
+        start_time = min(e.start_timestamp for e in cluster)
+        end_time = max(e.end_timestamp for e in cluster)
+        duration = end_time - start_time
+        
+        avg_confidence = sum(e.confidence for e in cluster) / len(cluster)
+        
+        # Collect all keyframes
+        all_keyframes = []
+        for event in cluster:
+            all_keyframes.extend(event.keyframes)
+        
+        # Remove duplicate frame paths
+        unique_keyframes = list(set(all_keyframes))
+        
+        # Check if this cluster contains object events
+        object_events = [e for e in cluster if e.is_object_event]
+        contains_objects = len(object_events) > 0
+        
+        # Object detection summary
+        detected_classes = []
+        object_summary = None
+        threat_level = "low"
+        
+        if contains_objects:
+            # Collect detected object classes
+            detected_classes = list(set(e.object_class for e in object_events if e.object_class))
+            
+            # Calculate object detection summary
+            total_detections = sum(e.detection_count for e in object_events)
+            max_confidence = max(e.max_confidence for e in object_events)
+            avg_obj_confidence = sum(e.confidence for e in object_events) / len(object_events)
+            
+            object_summary = {
+                'total_detections': total_detections,
+                'max_confidence': max_confidence,
+                'average_confidence': avg_obj_confidence,
+                'detected_classes': detected_classes,
+                'object_events_count': len(object_events)
+            }
+            
+            # Assess threat level based on object classes and confidence
+            threat_level = self._assess_canonical_threat_level(object_events)
+        
+        # Create enhanced description
+        if contains_objects:
+            objects_str = ", ".join(detected_classes)
+            description = f"{event_type.replace('_', ' ').title()} with {objects_str} detected - {len(cluster)} events aggregated"
+        else:
+            description = f"{event_type.replace('_', ' ').title()} event aggregated from {len(cluster)} similar events"
+        
+        canonical_event = CanonicalEvent(
+            canonical_id=f"canonical_{canonical_id:04d}",
+            event_type=event_type,
+            representative_frame=representative.keyframes[0] if representative.keyframes else "",
+            start_time=start_time,
+            end_time=end_time,
+            duration=duration,
+            confidence=avg_confidence,
+            frame_count=len(unique_keyframes),
+            aggregated_events=[e.event_id for e in cluster],
+            description=description,
+            similarity_cluster=cluster_id,
+            # Enhanced object detection fields
+            contains_objects=contains_objects,
+            detected_object_classes=detected_classes,
+            object_detection_summary=object_summary,
+            threat_level=threat_level
+        )
+        
+        return canonical_event
+    
+    def _assess_canonical_threat_level(self, object_events: List[Event]) -> str:
+        """Assess threat level for canonical event containing object events"""
+        if not object_events:
+            return "low"
+        
+        # Get highest threat level from individual events
+        threat_levels = ["low", "medium", "high", "critical"]
+        max_threat_index = 0
+        
+        for event in object_events:
+            event_threat = self._assess_individual_threat_level(event)
+            threat_index = threat_levels.index(event_threat) if event_threat in threat_levels else 0
+            max_threat_index = max(max_threat_index, threat_index)
+        
+        # Additional factors for canonical events
+        max_confidence = max(e.max_confidence for e in object_events)
+        total_detections = sum(e.detection_count for e in object_events)
+        unique_classes = len(set(e.object_class for e in object_events))
+        
+        # Escalate threat if multiple factors present
+        if unique_classes > 1:  # Multiple types of objects detected
+            max_threat_index = min(max_threat_index + 1, len(threat_levels) - 1)
+        
+        if total_detections > 10:  # Many detections
+            max_threat_index = min(max_threat_index + 1, len(threat_levels) - 1)
+        
+        if max_confidence > 0.9:  # Very high confidence
+            max_threat_index = min(max_threat_index + 1, len(threat_levels) - 1)
+        
+        return threat_levels[max_threat_index]
+    
+    def _assess_individual_threat_level(self, event: Event) -> str:
+        """Assess threat level for individual event (duplicate of EventDetector method)"""
+        if not event.is_object_event:
+            # For motion events, use motion intensity and burst activity
+            if event.event_type == "high_motion" and event.motion_intensity > 0.015:
+                return "medium"
+            elif event.event_type == "burst_activity":
+                return "medium"
+            else:
+                return "low"
+        
+        # Object-based threat assessment
+        threat_map = {
+            'fire': {
+                'low': 0.3,      # Confidence thresholds
+                'medium': 0.5,
+                'high': 0.7,
+                'critical': 0.85
+            },
+            'gun': {
+                'low': 0.4,
+                'medium': 0.6,
+                'high': 0.8,
+                'critical': 0.9
+            },
+            'knife': {
+                'low': 0.4,
+                'medium': 0.6,
+                'high': 0.75,
+                'critical': 0.85
+            }
+        }
+        
+        obj_class = event.object_class.lower()
+        confidence = event.max_confidence
+        
+        if obj_class in threat_map:
+            thresholds = threat_map[obj_class]
+            if confidence >= thresholds['critical']:
+                return "critical"
+            elif confidence >= thresholds['high']:
+                return "high"
+            elif confidence >= thresholds['medium']:
+                return "medium"
+            else:
+                return "low"
+        
+        return "medium"  # Default for unknown object types
+    
+    def save_canonical_events(self, canonical_events: List[CanonicalEvent], 
+                            output_path: str) -> bool:
+        """Save canonical events to JSON file"""
+        try:
+            # Convert to serializable format
+            events_data = {
+                'metadata': {
+                    'total_canonical_events': len(canonical_events),
+                    'generation_timestamp': datetime.now().isoformat(),
+                    'deduplication_threshold': self.config.similarity_threshold
+                },
+                'canonical_events': [asdict(event) for event in canonical_events]
+            }
+            
+            with open(output_path, 'w') as f:
+                json.dump(events_data, f, indent=2)
+            
+            logger.info(f"Canonical events saved to: {output_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Failed to save canonical events: {e}")
+            return False
\ No newline at end of file
diff --git a/event_clip_generator.py b/event_clip_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1dc517cebfacc8d3540e68a4776e8c31c38b30b
--- /dev/null
+++ b/event_clip_generator.py
@@ -0,0 +1,390 @@
+"""
+Event Clip Generator
+
+Generates video clips from events for viewing, playing, and downloading.
+Extracts clips from the original or compressed video based on event timestamps.
+Supports annotation with face bounding boxes for person search results.
+"""
+
+import os
+import cv2
+import subprocess
+import logging
+import uuid
+from typing import Optional, Dict, Any, List, Tuple
+from pathlib import Path
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+
+class EventClipGenerator:
+    """Generate video clips from events"""
+    
+    def __init__(self, output_dir: str = "video_processing_outputs/clips"):
+        self.output_dir = output_dir
+        os.makedirs(self.output_dir, exist_ok=True)
+    
+    def extract_clip(self, video_path: str, start_time: float, end_time: float, 
+                   event_id: str, video_id: str = None) -> Optional[str]:
+        """
+        Extract a video clip from a video file
+        
+        Args:
+            video_path: Path to source video
+            start_time: Start timestamp in seconds
+            end_time: End timestamp in seconds
+            event_id: Event identifier
+            video_id: Optional video identifier for organizing clips
+            
+        Returns:
+            Path to extracted clip file, or None if extraction failed
+        """
+        if not os.path.exists(video_path):
+            logger.error(f"Video file not found: {video_path}")
+            return None
+        
+        try:
+            # Create clip filename
+            clip_id = f"{event_id}_{uuid.uuid4().hex[:8]}"
+            clip_filename = f"{clip_id}.mp4"
+            
+            # Create output directory for this video if video_id provided
+            if video_id:
+                clip_dir = os.path.join(self.output_dir, video_id)
+                os.makedirs(clip_dir, exist_ok=True)
+                clip_path = os.path.join(clip_dir, clip_filename)
+            else:
+                clip_path = os.path.join(self.output_dir, clip_filename)
+            
+            # Calculate duration
+            duration = end_time - start_time
+            
+            # Use ffmpeg to extract clip (more reliable than OpenCV)
+            try:
+                # Try ffmpeg first (faster and more reliable)
+                cmd = [
+                    'ffmpeg',
+                    '-i', video_path,
+                    '-ss', str(start_time),
+                    '-t', str(duration),
+                    '-c', 'copy',  # Copy codec (fast, no re-encoding)
+                    '-avoid_negative_ts', 'make_zero',
+                    '-y',  # Overwrite output file
+                    clip_path
+                ]
+                
+                result = subprocess.run(
+                    cmd,
+                    capture_output=True,
+                    text=True,
+                    timeout=60  # 60 second timeout
+                )
+                
+                if result.returncode == 0 and os.path.exists(clip_path):
+                    logger.info(f"✅ Extracted clip: {clip_path} ({duration:.2f}s)")
+                    return clip_path
+                else:
+                    logger.warning(f"FFmpeg extraction failed, trying OpenCV fallback: {result.stderr}")
+                    # Fallback to OpenCV
+                    return self._extract_clip_opencv(video_path, start_time, end_time, clip_path)
+                    
+            except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError) as e:
+                logger.warning(f"FFmpeg not available or failed: {e}, using OpenCV fallback")
+                # Fallback to OpenCV
+                return self._extract_clip_opencv(video_path, start_time, end_time, clip_path)
+                
+        except Exception as e:
+            logger.error(f"Error extracting clip: {e}")
+            return None
+    
+    def _extract_clip_opencv(self, video_path: str, start_time: float, 
+                            end_time: float, output_path: str) -> Optional[str]:
+        """Extract clip using OpenCV (fallback method)"""
+        try:
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                logger.error(f"Could not open video: {video_path}")
+                return None
+            
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            
+            # Calculate frame numbers
+            start_frame = int(start_time * fps)
+            end_frame = int(end_time * fps)
+            
+            # Set starting position
+            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+            
+            # Create video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            
+            frame_count = start_frame
+            while frame_count <= end_frame:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                
+                out.write(frame)
+                frame_count += 1
+            
+            cap.release()
+            out.release()
+            
+            # Convert to browser-compatible format using ffmpeg
+            if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+                try:
+                    browser_compatible_path = output_path.replace('.mp4', '_h264.mp4')
+                    cmd = [
+                        'ffmpeg',
+                        '-i', output_path,
+                        '-c:v', 'libx264',  # H.264 codec for browser compatibility
+                        '-preset', 'fast',
+                        '-crf', '23',
+                        '-c:a', 'aac',  # AAC audio codec
+                        '-movflags', '+faststart',  # Enable streaming
+                        '-y',
+                        browser_compatible_path
+                    ]
+                    
+                    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+                    
+                    if result.returncode == 0 and os.path.exists(browser_compatible_path):
+                        # Remove the original mp4v file and rename
+                        os.remove(output_path)
+                        os.rename(browser_compatible_path, output_path)
+                        logger.info(f"✅ Extracted clip using OpenCV (H.264): {output_path}")
+                        return output_path
+                    else:
+                        logger.warning(f"FFmpeg conversion failed: {result.stderr}")
+                        logger.info(f"✅ Extracted clip using OpenCV (mp4v): {output_path}")
+                        return output_path
+                except Exception as e:
+                    logger.warning(f"FFmpeg not available for conversion: {e}")
+                    logger.info(f"✅ Extracted clip using OpenCV: {output_path}")
+                    return output_path
+            else:
+                logger.error(f"OpenCV extraction failed: output file is empty or missing")
+                return None
+                
+        except Exception as e:
+            logger.error(f"OpenCV clip extraction error: {e}")
+            return None
+    
+    def extract_annotated_clip(self, video_path: str, start_time: float, end_time: float,
+                              face_id: str, face_detections: List[Dict[str, Any]],
+                              video_id: str = None, person_name: str = None) -> Optional[str]:
+        """
+        Extract and annotate a video clip with bounding boxes for a specific person
+        
+        Args:
+            video_path: Path to source video
+            start_time: Start timestamp in seconds
+            end_time: End timestamp in seconds
+            face_id: Face identifier to highlight
+            face_detections: List of face detection records with bounding boxes and timestamps
+            video_id: Optional video identifier
+            person_name: Optional person name to display on annotations
+            
+        Returns:
+            Path to annotated clip file, or None if extraction failed
+        """
+        if not os.path.exists(video_path):
+            logger.error(f"Video file not found: {video_path}")
+            return None
+        
+        try:
+            # Create annotated clip filename
+            clip_id = f"annotated_{face_id}_{uuid.uuid4().hex[:8]}"
+            clip_filename = f"{clip_id}.mp4"
+            
+            # Create output directory
+            if video_id:
+                clip_dir = os.path.join(self.output_dir, video_id, "annotated")
+                os.makedirs(clip_dir, exist_ok=True)
+                clip_path = os.path.join(clip_dir, clip_filename)
+            else:
+                annotated_dir = os.path.join(self.output_dir, "annotated")
+                os.makedirs(annotated_dir, exist_ok=True)
+                clip_path = os.path.join(annotated_dir, clip_filename)
+            
+            # Open video
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                logger.error(f"Could not open video: {video_path}")
+                return None
+            
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            
+            # Calculate frame numbers
+            start_frame = int(start_time * fps)
+            end_frame = min(int(end_time * fps), total_frames - 1)
+            
+            # Create a map of frame_number -> bounding boxes for quick lookup
+            frame_bbox_map = {}
+            for detection in face_detections:
+                if detection.get('face_id') == face_id:
+                    # Try multiple timestamp fields
+                    timestamp = (
+                        detection.get('timestamp') or 
+                        detection.get('detected_at') or
+                        (detection.get('detected_at').timestamp() if isinstance(detection.get('detected_at'), type(datetime.now())) else 0) or
+                        0
+                    )
+                    
+                    # If timestamp is a datetime object, convert to seconds
+                    if hasattr(timestamp, 'timestamp'):
+                        timestamp = timestamp.timestamp()
+                    
+                    frame_num = int(timestamp * fps) if timestamp > 0 else 0
+                    
+                    # Try multiple bbox field names
+                    bbox = (
+                        detection.get('bounding_box') or 
+                        detection.get('bounding_boxes') or
+                        None
+                    )
+                    
+                    if bbox:
+                        # Handle different bbox formats: [x1, y1, x2, y2] or {"x1": ..., "y1": ..., ...}
+                        try:
+                            if isinstance(bbox, dict):
+                                x1 = int(bbox.get('x1', bbox.get(0, 0)))
+                                y1 = int(bbox.get('y1', bbox.get(1, 0)))
+                                x2 = int(bbox.get('x2', bbox.get(2, 0)))
+                                y2 = int(bbox.get('y2', bbox.get(3, 0)))
+                            elif isinstance(bbox, list) and len(bbox) >= 4:
+                                x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
+                            else:
+                                continue
+                            
+                            # Validate bounding box coordinates
+                            if x1 >= 0 and y1 >= 0 and x2 > x1 and y2 > y1:
+                                # Store for multiple nearby frames to handle timestamp inaccuracies
+                                for offset in range(-2, 3):  # ±2 frames tolerance
+                                    frame_bbox_map[frame_num + offset] = (x1, y1, x2, y2)
+                        except (ValueError, TypeError) as e:
+                            logger.warning(f"Invalid bounding box format: {bbox}, error: {e}")
+                            continue
+            
+            # Set starting position
+            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+            
+            # Create video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            out = cv2.VideoWriter(clip_path, fourcc, fps, (width, height))
+            
+            frame_count = start_frame
+            frames_annotated = 0
+            
+            while frame_count <= end_frame:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                
+                # Check if this frame has a bounding box for this face
+                if frame_count in frame_bbox_map:
+                    x1, y1, x2, y2 = frame_bbox_map[frame_count]
+                    
+                    # Draw bounding box (green for person detection)
+                    color = (0, 255, 0)  # Green in BGR
+                    thickness = 3
+                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
+                    
+                    # Draw label
+                    label = person_name if person_name else "Detected Person"
+                    label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
+                    
+                    # Draw label background
+                    cv2.rectangle(frame, (x1, y1 - label_size[1] - 10),
+                                 (x1 + label_size[0] + 10, y1), color, -1)
+                    
+                    # Draw label text
+                    cv2.putText(frame, label, (x1 + 5, y1 - 5),
+                               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+                    
+                    frames_annotated += 1
+                
+                out.write(frame)
+                frame_count += 1
+            
+            cap.release()
+            out.release()
+            
+            # Convert to browser-compatible format using ffmpeg
+            if os.path.exists(clip_path) and os.path.getsize(clip_path) > 0:
+                try:
+                    browser_compatible_path = clip_path.replace('.mp4', '_h264.mp4')
+                    cmd = [
+                        'ffmpeg',
+                        '-i', clip_path,
+                        '-c:v', 'libx264',  # H.264 codec for browser compatibility
+                        '-preset', 'fast',
+                        '-crf', '23',
+                        '-c:a', 'aac',  # AAC audio codec
+                        '-movflags', '+faststart',  # Enable streaming
+                        '-y',
+                        browser_compatible_path
+                    ]
+                    
+                    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+                    
+                    if result.returncode == 0 and os.path.exists(browser_compatible_path):
+                        # Remove the original mp4v file and rename
+                        os.remove(clip_path)
+                        os.rename(browser_compatible_path, clip_path)
+                        logger.info(f"✅ Created annotated clip: {clip_path} ({frames_annotated} frames annotated)")
+                        return clip_path
+                    else:
+                        logger.warning(f"FFmpeg conversion failed, returning OpenCV output: {result.stderr}")
+                        logger.info(f"✅ Created annotated clip (mp4v): {clip_path} ({frames_annotated} frames annotated)")
+                        return clip_path
+                except Exception as e:
+                    logger.warning(f"FFmpeg not available for conversion: {e}")
+                    logger.info(f"✅ Created annotated clip (mp4v): {clip_path} ({frames_annotated} frames annotated)")
+                    return clip_path
+            else:
+                logger.error(f"Annotated clip creation failed: output file is empty or missing")
+                return None
+                
+        except Exception as e:
+            logger.error(f"Error creating annotated clip: {e}")
+            return None
+    
+    def get_clip_info(self, clip_path: str) -> Dict[str, Any]:
+        """Get information about a clip file"""
+        if not os.path.exists(clip_path):
+            return {}
+        
+        try:
+            cap = cv2.VideoCapture(clip_path)
+            if not cap.isOpened():
+                return {}
+            
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            duration = frame_count / fps if fps > 0 else 0
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            file_size = os.path.getsize(clip_path)
+            
+            cap.release()
+            
+            return {
+                'duration': duration,
+                'fps': fps,
+                'frame_count': frame_count,
+                'resolution': f"{width}x{height}",
+                'file_size': file_size,
+                'file_size_mb': round(file_size / (1024 * 1024), 2)
+            }
+        except Exception as e:
+            logger.error(f"Error getting clip info: {e}")
+            return {}
+
diff --git a/extract_upload_keyframes.py b/extract_upload_keyframes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f855d3dfe9b80c493ce1fee4fbb04e922ff87fc
--- /dev/null
+++ b/extract_upload_keyframes.py
@@ -0,0 +1,240 @@
+"""
+Extract keyframes from videos and upload to S3-compatible storage (Backblaze B2).
+
+For each video that has captions but no keyframes in storage:
+1. Get the frame_ids from video_captions
+2. Get the video source (local file or S3)
+3. Extract those exact frames using OpenCV
+4. Upload to S3 at {video_id}/frame_XXXXXX.jpg
+"""
+import os
+import sys
+import io
+import tempfile
+import cv2
+from pymongo import MongoClient
+from minio import Minio
+from dotenv import load_dotenv
+
+load_dotenv()
+
+MONGO_URI = os.getenv("MONGO_URI", "mongodb+srv://detectifai_user:DetectifAI123@cluster0.6f9uj.mongodb.net/detectifai?retryWrites=true&w=majority&appName=Cluster0")
+client = MongoClient(MONGO_URI)
+db = client.detectifai
+
+minio_client = Minio(
+    os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com'),
+    access_key=os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001'),
+    secret_key=os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA'),
+    secure=os.getenv('MINIO_SECURE', 'true').lower() == 'true',
+    region=os.getenv('MINIO_REGION', 'eu-central-003') or None
+)
+KEYFRAME_BUCKET = os.getenv('MINIO_KEYFRAME_BUCKET', 'detectifai-keyframes')
+VIDEO_BUCKET = os.getenv('MINIO_VIDEO_BUCKET', 'detectifai-videos')
+
+BASE_DIR = os.getenv('BASE_DIR', r"d:\FAST\Final Year Project\sem1_finalized_malaika\sem1")
+
+def get_video_source(video_id):
+    """Return path to video file. Download from MinIO if not local."""
+    # Check local uploads first
+    local_path = os.path.join(BASE_DIR, "uploads", video_id, "video.mp4")
+    if os.path.isfile(local_path) and os.path.getsize(local_path) > 0:
+        print(f"  Using local file: {local_path}")
+        return local_path
+    
+    # Check MinIO
+    rec = db.video_file.find_one({"video_id": video_id}, {"minio_object_key": 1, "minio_bucket": 1})
+    if rec and rec.get("minio_object_key"):
+        bucket = rec.get("minio_bucket", VIDEO_BUCKET)
+        obj_key = rec["minio_object_key"]
+        
+        # Verify the object actually exists before downloading
+        try:
+            minio_client.stat_object(bucket, obj_key)
+        except Exception:
+            print(f"  MinIO object not found: {bucket}/{obj_key}")
+            return None
+        
+        print(f"  Downloading from MinIO: {bucket}/{obj_key}")
+        tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
+        minio_client.fget_object(bucket, obj_key, tmp_path)
+        print(f"  Downloaded to: {tmp_path}")
+        return tmp_path
+    
+    return None
+
+
+import numpy as np
+
+
+def upload_placeholder_keyframes(video_id, frame_ids):
+    """Generate and upload placeholder keyframe images for videos whose source is gone."""
+    uploaded = 0
+    
+    for frame_id in frame_ids:
+        # Get the caption text for this frame to display on placeholder
+        caption_doc = db.video_captions.find_one(
+            {"video_id": video_id, "frame_id": frame_id},
+            {"caption": 1, "_id": 0}
+        )
+        caption_text = caption_doc.get("caption", "No caption") if caption_doc else "No caption"
+        
+        # Create a 640x360 dark gradient placeholder image
+        img = np.zeros((360, 640, 3), dtype=np.uint8)
+        # Dark blue gradient
+        for y in range(360):
+            val = int(30 + (y / 360) * 40)
+            img[y, :] = [val, int(val * 0.8), int(val * 0.5)]
+        
+        # Add text
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        # Video ID
+        cv2.putText(img, video_id, (20, 40), font, 0.5, (150, 150, 150), 1)
+        # Frame ID
+        cv2.putText(img, frame_id, (20, 70), font, 0.5, (150, 150, 150), 1)
+        # Camera icon placeholder
+        cv2.rectangle(img, (270, 130), (370, 210), (80, 80, 80), 2)
+        cv2.putText(img, "VIDEO", (284, 178), font, 0.6, (120, 120, 120), 1)
+        # Caption (wrap if long)
+        words = caption_text[:80].split()
+        line = ""
+        y_pos = 250
+        for w in words:
+            test = line + " " + w if line else w
+            if len(test) > 50:
+                cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
+                y_pos += 22
+                line = w
+            else:
+                line = test
+        if line:
+            cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
+        
+        # Encode as JPEG
+        success, buffer = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 85])
+        if not success:
+            continue
+        
+        minio_path = f"{video_id}/{frame_id}.jpg"
+        data = io.BytesIO(buffer.tobytes())
+        minio_client.put_object(
+            KEYFRAME_BUCKET, minio_path, data,
+            length=len(buffer.tobytes()),
+            content_type='image/jpeg'
+        )
+        uploaded += 1
+    
+    return uploaded
+
+
+def extract_and_upload_keyframes(video_id, frame_ids):
+    """Extract specific frames from video and upload to MinIO."""
+    video_path = get_video_source(video_id)
+    if not video_path:
+        print(f"  No video source found — generating placeholder keyframes")
+        return upload_placeholder_keyframes(video_id, frame_ids)
+    
+    # Parse frame numbers from frame_ids like "frame_000060"
+    frame_numbers = {}
+    for fid in frame_ids:
+        try:
+            num = int(fid.replace("frame_", ""))
+            frame_numbers[num] = fid
+        except ValueError:
+            print(f"  WARNING: Could not parse frame_id: {fid}")
+    
+    if not frame_numbers:
+        print(f"  No valid frame numbers to extract")
+        return 0
+    
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        print(f"  ERROR: Could not open video: {video_path}")
+        return 0
+    
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    print(f"  Video: {total_frames} frames, {fps:.1f} fps")
+    
+    uploaded = 0
+    max_frame = max(frame_numbers.keys())
+    
+    for frame_num in sorted(frame_numbers.keys()):
+        if frame_num >= total_frames:
+            # Use last available frame
+            frame_num_actual = total_frames - 1
+            print(f"  Frame {frame_num} beyond total ({total_frames}), using frame {frame_num_actual}")
+        else:
+            frame_num_actual = frame_num
+        
+        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num_actual)
+        ret, frame = cap.read()
+        if not ret:
+            print(f"  ERROR: Could not read frame {frame_num_actual}")
+            continue
+        
+        # Encode as JPEG
+        success, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
+        if not success:
+            print(f"  ERROR: Could not encode frame {frame_num}")
+            continue
+        
+        frame_id = frame_numbers[frame_num]
+        minio_path = f"{video_id}/{frame_id}.jpg"
+        
+        # Upload to MinIO
+        data = io.BytesIO(buffer.tobytes())
+        minio_client.put_object(
+            KEYFRAME_BUCKET,
+            minio_path,
+            data,
+            length=len(buffer.tobytes()),
+            content_type='image/jpeg'
+        )
+        uploaded += 1
+    
+    cap.release()
+    
+    # Clean up temp file if downloaded from MinIO
+    tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
+    if os.path.exists(tmp_path) and video_path == tmp_path:
+        os.remove(tmp_path)
+    
+    return uploaded
+
+
+def main():
+    # Get all video_ids with captions
+    caption_vids = db.video_captions.distinct("video_id")
+    
+    for video_id in caption_vids:
+        if video_id.startswith("test_"):
+            continue
+        
+        # Check if keyframes already exist in MinIO
+        existing = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
+        if len(existing) > 0:
+            print(f"SKIP {video_id}: already has {len(existing)} keyframes in MinIO")
+            continue
+        
+        # Get frame_ids from captions
+        frame_ids = db.video_captions.distinct("frame_id", {"video_id": video_id})
+        if not frame_ids:
+            print(f"SKIP {video_id}: no frame_ids in captions")
+            continue
+        
+        print(f"\nPROCESSING {video_id}: {len(frame_ids)} frames to extract")
+        uploaded = extract_and_upload_keyframes(video_id, frame_ids)
+        print(f"  Uploaded {uploaded}/{len(frame_ids)} keyframes to MinIO")
+    
+    print("\n=== DONE ===")
+    # Final check
+    for video_id in caption_vids:
+        if video_id.startswith("test_"):
+            continue
+        objs = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
+        print(f"  {video_id}: {len(objs)} keyframes in MinIO")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/facial_recognition.py b/facial_recognition.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a18b2dfb024c7f3f813303f68a965f38597fd05
--- /dev/null
+++ b/facial_recognition.py
@@ -0,0 +1,926 @@
+"""
+Facial Recognition Module for DetectifAI
+
+This module handles facial recognition for suspicious activity frames:
+- Face detection using MTCNN (primary) or OpenCV Haar cascades (fallback)
+- Face embeddings using FaceNet (primary) or histogram-based (fallback)
+- FAISS vector similarity search (primary) or cosine similarity (fallback)
+- MongoDB metadata storage with local JSON fallback
+- Integration with suspicious activity detection pipeline
+
+Workflow (matches activity diagram):
+1. Receive frame from suspicious event (object detection)
+2. Run face detection
+3. If faces detected: crop faces, generate embeddings, store in FAISS/index
+4. Upload face crops to storage, save metadata to MongoDB/JSON
+5. Search for similar embeddings, link with previous incidents
+6. Assign new person ID if no match found
+
+Author: DetectifAI Team
+"""
+
+import os
+import cv2
+import numpy as np
+import logging
+import json
+import uuid
+import time
+import warnings
+from typing import List, Tuple, Optional, Dict, Any
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+
+# Advanced imports (with fallbacks)
+try:
+    import torch
+    from facenet_pytorch import MTCNN, InceptionResnetV1
+    import faiss
+    from pymongo import MongoClient
+    from dotenv import load_dotenv
+    import joblib
+    ADVANCED_AVAILABLE = True
+    load_dotenv()
+except ImportError:
+    ADVANCED_AVAILABLE = False
+
+warnings.filterwarnings('ignore')
+logger = logging.getLogger(__name__)
+
+# ========================================
+# Configuration
+# ========================================
+
+# MongoDB Configuration
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/") if ADVANCED_AVAILABLE else None
+MONGO_DB_NAME = "detectifai"
+
+# FAISS Configuration
+FAISS_INDEX_PATH = "model/faiss_face_index.bin"
+FAISS_ID_MAP_PATH = "model/faiss_id_map.json"
+EMBEDDING_DIM = 512  # InceptionResnetV1 produces 512-dim embeddings
+
+# Trained Models Configuration
+TRAINED_MODEL_DIR = "model/trained_models"
+CLASSIFIER_PATH = os.path.join(TRAINED_MODEL_DIR, "classifier_svm.pkl")
+ENCODER_PATH = os.path.join(TRAINED_MODEL_DIR, "label_encoder.pkl")
+
+# Simple fallback configuration
+SIMPLE_INDEX_PATH = "model/simple_face_index.json"
+
+# Face storage
+FACES_DIR = "model/faces"
+
+# ========================================
+# Data Models
+# ========================================
+
+@dataclass
+class FaceDetectionResult:
+    """Result of face detection in a frame"""
+    frame_path: str
+    timestamp: float
+    faces_detected: int
+    face_embeddings: List[np.ndarray]
+    face_bounding_boxes: List[Tuple[int, int, int, int]]
+    face_confidence_scores: List[float]
+    processing_time: float
+    detected_face_ids: List[str] = None
+    matched_persons: List[str] = None
+
+@dataclass 
+class SuspiciousPerson:
+    """Information about a suspicious person"""
+    person_id: str
+    first_detected: float  # timestamp
+    last_seen: float       # timestamp
+    face_embedding: Optional[np.ndarray]
+    associated_events: List[str]  # event IDs where this person appeared
+    threat_level: str
+    notes: str
+    detection_count: int
+    face_id: str = ""  # Primary face_id
+
+# ========================================
+# Advanced Implementation (FAISS + FaceNet)
+# ========================================
+
+class AdvancedFaceDetector:
+    """Advanced face detector using MTCNN"""
+    
+    def __init__(self, device='cpu', min_face_size=60):  # Increased from 40 to 60 for stricter filtering
+        self.device = torch.device(device)
+        self.mtcnn = MTCNN(
+            image_size=160,
+            margin=20,
+            min_face_size=min_face_size,  # Larger minimum to reject small circular objects
+            thresholds=[0.8, 0.9, 0.9],  # Very strict thresholds (was [0.7, 0.8, 0.8]) to eliminate false positives
+            factor=0.709,
+            keep_all=True,
+            device=self.device
+        )
+        logger.info(f"[AdvancedFaceDetector] Initialized MTCNN on {device} with min_face_size={min_face_size}, strict thresholds=[0.8, 0.9, 0.9]")
+    
+    def detect_faces(self, frame: np.ndarray) -> Tuple[List[np.ndarray], List[np.ndarray], List[float]]:
+        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        boxes, probs = self.mtcnn.detect(rgb_frame, landmarks=False)
+
+        if boxes is None:
+            return [], [], []
+        
+        faces = self.mtcnn.extract(rgb_frame, boxes, save_path=None)
+        if faces is None:
+            return [], [], []
+        
+        valid_faces, valid_boxes, valid_probs = [], [], []
+        for face, prob, box in zip(faces, probs, boxes):
+            # Very strict probability threshold (increased from 0.85 to 0.90)
+            if face is not None and prob > 0.90:
+                # Additional validation to filter false positives (e.g., tires, wheels)
+                if self._is_valid_face(face, box):
+                    valid_faces.append(face)
+                    valid_boxes.append(box)
+                    valid_probs.append(prob)
+                else:
+                    logger.debug(f"Rejected detection (prob={prob:.3f}) - failed quality validation")
+        
+        return valid_faces, valid_boxes, valid_probs
+    
+    def _is_valid_face(self, face_tensor: torch.Tensor, box: np.ndarray) -> bool:
+        """Validate detected face to filter out false positives like tires, wheels, circular objects"""
+        try:
+            # 1. Check bounding box aspect ratio (faces should be ~1:1.2, not perfectly circular like tires)
+            x1, y1, x2, y2 = box
+            width = x2 - x1
+            height = y2 - y1
+            
+            if width <= 0 or height <= 0:
+                return False
+            
+            aspect_ratio = width / height
+            # Reject if too circular (like tires) or too elongated - tightened range
+            if aspect_ratio < 0.7 or aspect_ratio > 1.5:
+                logger.debug(f"Rejected: aspect_ratio={aspect_ratio:.2f} (tires ~1.0, faces 0.75-1.35)")
+                return False
+            
+            # 2. Check minimum face size (reject small detections) - increased to 60px
+            if width < 60 or height < 60:
+                logger.debug(f"Rejected: too small ({width}x{height}) - minimum is 60x60")
+                return False
+            
+            # 3. Check face tensor for quality (reject blurry or low-contrast images like tire treads)
+            face_np = face_tensor.permute(1, 2, 0).cpu().numpy()
+            
+            # Check variance (faces should have good contrast, tires are uniform) - increased threshold
+            variance = np.var(face_np)
+            if variance < 0.02:  # Increased from 0.01 to 0.02 for stricter filtering
+                logger.debug(f"Rejected: low variance={variance:.4f} (uniform object, likely tire)")
+                return False
+            
+            # 4. Check edge density (faces have more complex edges than smooth tire surfaces)
+            gray = cv2.cvtColor((face_np * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
+            edges = cv2.Canny(gray, 50, 150)
+            edge_density = np.sum(edges > 0) / (edges.shape[0] * edges.shape[1])
+            
+            # Tires have uniform circular edges, faces have complex features - tightened range
+            if edge_density < 0.08 or edge_density > 0.35:  # Narrowed from (0.05, 0.4) to (0.08, 0.35)
+                logger.debug(f"Rejected: edge_density={edge_density:.3f} (abnormal edge pattern)")
+                return False
+            
+            return True
+            
+        except Exception as e:
+            logger.warning(f"Face validation error: {e}")
+            return False  # Reject on error to be safe
+
+class AdvancedFaceEmbedder:
+    """Advanced face embedder using FaceNet"""
+    
+    def __init__(self, device='cpu', weights='vggface2'):
+        self.device = torch.device(device)
+        self.model = InceptionResnetV1(pretrained=weights).eval().to(self.device)
+        logger.info(f"[AdvancedFaceEmbedder] Loaded InceptionResnetV1 on {device}")
+    
+    def generate_embedding(self, face_tensor: torch.Tensor) -> np.ndarray:
+        with torch.no_grad():
+            face_tensor = face_tensor.to(self.device).unsqueeze(0)
+            embedding = self.model(face_tensor).cpu().numpy().flatten()
+        return embedding
+
+class PersonClassifier:
+    """Person identification using trained SVM classifier"""
+    
+    def __init__(self, classifier_path: str = CLASSIFIER_PATH, encoder_path: str = ENCODER_PATH,
+                 confidence_threshold: float = 0.5):
+        self.confidence_threshold = confidence_threshold
+        self.enabled = False
+        
+        if ADVANCED_AVAILABLE and os.path.exists(classifier_path) and os.path.exists(encoder_path):
+            try:
+                self.classifier = joblib.load(classifier_path)
+                self.label_encoder = joblib.load(encoder_path)
+                self.enabled = True
+                logger.info(f"[PersonClassifier] ✅ Model loaded, {len(self.label_encoder.classes_)} identities recognized.")
+            except Exception as e:
+                logger.warning(f"[PersonClassifier] ⚠️ Failed to load model: {e}")
+        else:
+            logger.info("[PersonClassifier] Trained models not available, using generic face tracking")
+    
+    def identify_person(self, embedding: np.ndarray) -> Tuple[Optional[str], float]:
+        """Identify person from face embedding using SVM classifier"""
+        if not self.enabled:
+            return None, 0.0
+        
+        try:
+            probs = self.classifier.predict_proba(embedding.reshape(1, -1))[0]
+            best_idx = np.argmax(probs)
+            conf = probs[best_idx]
+            
+            if conf >= self.confidence_threshold:
+                return self.label_encoder.classes_[best_idx], float(conf)
+            return None, float(conf)
+        except Exception as e:
+            logger.error(f"[PersonClassifier] Error: {e}")
+            return None, 0.0
+
+class FAISSFaceIndex:
+    """FAISS index manager for fast similarity search"""
+    
+    def __init__(self, embedding_dim: int = 512, index_path: str = FAISS_INDEX_PATH, 
+                 id_map_path: str = FAISS_ID_MAP_PATH):
+        self.embedding_dim = embedding_dim
+        self.index_path = index_path
+        self.id_map_path = id_map_path
+        self.index = None
+        self.id_map = {}
+        self.reverse_map = {}
+        
+        os.makedirs(os.path.dirname(index_path), exist_ok=True)
+        self._load_or_create_index()
+    
+    def _load_or_create_index(self):
+        if os.path.exists(self.index_path) and os.path.exists(self.id_map_path):
+            try:
+                self.index = faiss.read_index(self.index_path)
+                with open(self.id_map_path, 'r') as f:
+                    data = json.load(f)
+                    self.id_map = {int(k): v for k, v in data.items()}
+                    self.reverse_map = {v: int(k) for k, v in self.id_map.items()}
+                logger.info(f"[FAISS] Loaded index with {self.index.ntotal} embeddings")
+            except Exception as e:
+                logger.warning(f"[FAISS] Error loading index: {e}")
+                self._create_new_index()
+        else:
+            self._create_new_index()
+    
+    def _create_new_index(self):
+        self.index = faiss.IndexFlatIP(self.embedding_dim)
+        self.id_map = {}
+        self.reverse_map = {}
+        logger.info(f"[FAISS] Created new index (dim={self.embedding_dim})")
+    
+    def add_embedding(self, face_id: str, embedding: np.ndarray) -> int:
+        if face_id in self.reverse_map:
+            return self.reverse_map[face_id]
+        
+        embedding = embedding.astype('float32').reshape(1, -1)
+        embedding = embedding / np.linalg.norm(embedding)
+        
+        idx = self.index.ntotal
+        self.index.add(embedding)
+        
+        self.id_map[idx] = face_id
+        self.reverse_map[face_id] = idx
+        
+        return idx
+    
+    def search(self, query_embedding: np.ndarray, k: int = 5, threshold: float = 0.6) -> List[Tuple[str, float]]:
+        if self.index.ntotal == 0:
+            return []
+        
+        query_embedding = query_embedding.astype('float32').reshape(1, -1)
+        query_embedding = query_embedding / np.linalg.norm(query_embedding)
+        
+        similarities, indices = self.index.search(query_embedding, min(k, self.index.ntotal))
+        
+        results = []
+        for sim, idx in zip(similarities[0], indices[0]):
+            if idx in self.id_map and sim >= threshold:
+                results.append((self.id_map[idx], float(sim)))
+        
+        return results
+    
+    def save(self):
+        os.makedirs(os.path.dirname(self.index_path), exist_ok=True)
+        faiss.write_index(self.index, self.index_path)
+        with open(self.id_map_path, 'w') as f:
+            json.dump(self.id_map, f)
+
+class MongoDBFaceStorage:
+    """MongoDB storage for face metadata"""
+    
+    def __init__(self, mongo_uri: str, db_name: str = MONGO_DB_NAME):
+        try:
+            self.client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
+            self.db = self.client[db_name]
+            self.faces_collection = self.db['detected_faces']
+            self.client.server_info()  # Test connection
+            self.enabled = True
+            logger.info("[MongoDB] Connected successfully")
+        except Exception as e:
+            logger.warning(f"[MongoDB] Connection failed: {e}")
+            self.enabled = False
+    
+    def save_face(self, data: Dict) -> str:
+        if not self.enabled:
+            return ""
+        
+        data['detected_at'] = datetime.utcnow()
+        if 'face_embedding' in data:
+            del data['face_embedding']  # Don't store embeddings in MongoDB
+        data['face_embedding'] = []
+        
+        try:
+            result = self.faces_collection.insert_one(data)
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"[MongoDB] Error saving face: {e}")
+            return ""
+    
+    def close(self):
+        if hasattr(self, 'client'):
+            self.client.close()
+
+# ========================================
+# Simple Implementation (OpenCV + Histograms)
+# ========================================
+
+class SimpleFaceDetector:
+    """Simple face detector using OpenCV Haar cascades"""
+    
+    def __init__(self, device='cpu'):
+        self.device = device
+        cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
+        self.face_cascade = cv2.CascadeClassifier(cascade_path)
+        logger.info(f"[SimpleFaceDetector] Initialized with OpenCV Haar cascades")
+    
+    def detect_faces(self, frame: np.ndarray) -> Tuple[List[np.ndarray], List[np.ndarray], List[float]]:
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        faces = self.face_cascade.detectMultiScale(gray, 1.1, 4, minSize=(30, 30))
+        
+        face_crops = []
+        boxes = []
+        confidences = []
+        
+        for (x, y, w, h) in faces:
+            face_crop = frame[y:y+h, x:x+w]
+            face_crops.append(face_crop)
+            boxes.append([x, y, x+w, y+h])
+            confidences.append(0.8)
+        
+        return face_crops, boxes, confidences
+
+class SimpleFaceEmbedder:
+    """Simple face embedder using histograms"""
+    
+    def __init__(self, device='cpu'):
+        self.device = device
+        logger.info(f"[SimpleFaceEmbedder] Using histogram-based embeddings")
+    
+    def generate_embedding(self, face_crop: np.ndarray) -> np.ndarray:
+        if isinstance(face_crop, np.ndarray) and len(face_crop.shape) == 3:
+            face_resized = cv2.resize(face_crop, (64, 64))
+            hsv = cv2.cvtColor(face_resized, cv2.COLOR_BGR2HSV)
+            
+            hist_h = cv2.calcHist([hsv], [0], None, [16], [0, 180])
+            hist_s = cv2.calcHist([hsv], [1], None, [16], [0, 256])
+            hist_v = cv2.calcHist([hsv], [2], None, [16], [0, 256])
+            
+            embedding = np.concatenate([hist_h.flatten(), hist_s.flatten(), hist_v.flatten()])
+            return embedding / np.linalg.norm(embedding)
+        else:
+            return np.random.rand(48) / np.linalg.norm(np.random.rand(48))
+
+class SimpleFaceIndex:
+    """Simple face index using cosine similarity"""
+    
+    def __init__(self, index_path: str = SIMPLE_INDEX_PATH):
+        self.index_path = index_path
+        self.faces_db = {}
+        
+        os.makedirs(os.path.dirname(index_path), exist_ok=True)
+        self._load_index()
+    
+    def _load_index(self):
+        if os.path.exists(self.index_path):
+            try:
+                with open(self.index_path, 'r') as f:
+                    data = json.load(f)
+                    self.faces_db = {face_id: np.array(embedding) 
+                                   for face_id, embedding in data.items()}
+                logger.info(f"[SimpleFaceIndex] Loaded {len(self.faces_db)} faces")
+            except Exception as e:
+                logger.warning(f"[SimpleFaceIndex] Error loading: {e}")
+                self.faces_db = {}
+        else:
+            self.faces_db = {}
+    
+    def add_embedding(self, face_id: str, embedding: np.ndarray) -> int:
+        if face_id in self.faces_db:
+            return len(self.faces_db)
+        
+        self.faces_db[face_id] = embedding
+        return len(self.faces_db)
+    
+    def search(self, query_embedding: np.ndarray, k: int = 5, threshold: float = 0.6) -> List[Tuple[str, float]]:
+        if not self.faces_db:
+            return []
+        
+        similarities = []
+        for face_id, stored_embedding in self.faces_db.items():
+            similarity = np.dot(query_embedding, stored_embedding) / (
+                np.linalg.norm(query_embedding) * np.linalg.norm(stored_embedding))
+            
+            if similarity >= threshold:
+                similarities.append((face_id, float(similarity)))
+        
+        similarities.sort(key=lambda x: x[1], reverse=True)
+        return similarities[:k]
+    
+    def save(self):
+        try:
+            data = {face_id: embedding.tolist() 
+                   for face_id, embedding in self.faces_db.items()}
+            
+            with open(self.index_path, 'w') as f:
+                json.dump(data, f)
+            
+            logger.debug(f"[SimpleFaceIndex] Saved {len(self.faces_db)} faces")
+        except Exception as e:
+            logger.error(f"[SimpleFaceIndex] Error saving: {e}")
+
+# ========================================
+# Main Facial Recognition Class
+# ========================================
+
+class FacialRecognitionIntegrated:
+    """
+    Unified facial recognition system for DetectifAI.
+    
+    Automatically uses advanced implementation (MTCNN + FaceNet + FAISS + MongoDB) 
+    if available, otherwise falls back to simple implementation (OpenCV + Histograms + JSON).
+    
+    Applies facial recognition ONLY to suspicious frames detected by object detection.
+    """
+    
+    def __init__(self, config):
+        self.config = config
+        self.enabled = getattr(config, 'enable_facial_recognition', False)
+        self.confidence_threshold = getattr(config, 'face_recognition_confidence', 0.7)
+        self.similarity_threshold = 0.6
+        self.device = 'cuda' if torch.cuda.is_available() and getattr(config, 'use_gpu_acceleration', False) else 'cpu'
+        
+        # Create faces directory
+        self.faces_dir = Path(FACES_DIR)
+        self.faces_dir.mkdir(exist_ok=True, parents=True)
+        
+        # Determine implementation mode
+        self.advanced_mode = ADVANCED_AVAILABLE and self.enabled
+        
+        # Initialize components only if enabled
+        if self.enabled:
+            self._initialize_components()
+        
+        # Detection statistics
+        self.detection_stats = {
+            'implementation_mode': 'advanced' if self.advanced_mode else 'simple',
+            'frames_processed': 0,
+            'faces_detected': 0,
+            'suspicious_persons_tracked': 0,
+            'reoccurrences_detected': 0,
+            'new_faces_added': 0,
+            'face_matches_found': 0
+        }
+        
+        # Suspicious persons database
+        self.suspicious_persons_db = {}
+        
+        if not self.enabled:
+            logger.info("[FacialRecognition] Disabled - skipping initialization")
+        else:
+            mode = "Advanced (MTCNN + FaceNet + FAISS)" if self.advanced_mode else "Simple (OpenCV + Histograms)"
+            logger.info(f"[FacialRecognition] ✅ Initialized in {mode} mode")
+    
+    def _initialize_components(self):
+        """Initialize facial recognition components based on available dependencies"""
+        try:
+            if self.advanced_mode:
+                # Advanced implementation
+                self.detector = AdvancedFaceDetector(self.device)
+                self.embedder = AdvancedFaceEmbedder(self.device)
+                self.face_index = FAISSFaceIndex()
+                self.person_classifier = PersonClassifier()  # Add trained SVM classifier
+                
+                # MongoDB storage (optional)
+                if MONGO_URI:
+                    self.mongodb_storage = MongoDBFaceStorage(MONGO_URI)
+                else:
+                    self.mongodb_storage = None
+                    logger.info("[FacialRecognition] MongoDB not configured, using local storage only")
+                
+            else:
+                # Simple implementation
+                self.detector = SimpleFaceDetector()
+                self.embedder = SimpleFaceEmbedder()
+                self.face_index = SimpleFaceIndex()
+                self.person_classifier = None  # No classifier in simple mode
+                self.mongodb_storage = None
+                
+        except Exception as e:
+            logger.error(f"[FacialRecognition] ❌ Initialization failed: {e}")
+            self.enabled = False
+            raise
+    
+    def _generate_face_id(self, frame_number: int, face_index: int, person_name: Optional[str] = None, event_id: str = "unknown") -> str:
+        """Generate unique face ID"""
+        prefix = f"{person_name.replace(' ', '_')}" if person_name else "unknown"
+        unique_id = str(uuid.uuid4())[:8]
+        return f"face_{prefix}_event_{event_id}_{frame_number:06d}_{face_index:02d}_{unique_id}"
+    
+    def _save_face_image(self, face_data, face_id: str) -> str:
+        """Save face image to disk"""
+        try:
+            path = self.faces_dir / f"{face_id}.jpg"
+            
+            if self.advanced_mode and isinstance(face_data, torch.Tensor):
+                # Convert tensor to numpy array (MTCNN returns normalized tensors in range [0, 1])
+                face_np = face_data.permute(1, 2, 0).cpu().numpy()
+                # Convert from [0,1] float to [0,255] uint8
+                face_np = (face_np * 128 + 127.5).clip(0, 255).astype(np.uint8)
+                # MTCNN outputs RGB, convert to BGR for OpenCV
+                face_bgr = cv2.cvtColor(face_np, cv2.COLOR_RGB2BGR)
+                # Resize to reasonable display size (e.g., 160x160)
+                face_bgr = cv2.resize(face_bgr, (160, 160))
+                cv2.imwrite(str(path), face_bgr)
+                logger.debug(f"Saved advanced face image to {path}")
+            elif isinstance(face_data, np.ndarray):
+                # Direct numpy array (from simple mode or already processed)
+                # Ensure it's in proper format
+                if face_data.dtype != np.uint8:
+                    face_data = (face_data * 255).astype(np.uint8) if face_data.max() <= 1.0 else face_data.astype(np.uint8)
+                # Resize if too large
+                if face_data.shape[0] > 300 or face_data.shape[1] > 300:
+                    face_data = cv2.resize(face_data, (160, 160))
+                cv2.imwrite(str(path), face_data)
+                logger.debug(f"Saved simple face image to {path}")
+            else:
+                logger.error(f"Unknown face_data type: {type(face_data)}")
+                return ""
+                
+            return str(path)
+        except Exception as e:
+            logger.error(f"[FacialRecognition] Error saving face image: {e}")
+            import traceback
+            traceback.print_exc()
+            return ""
+    
+    def detect_faces_in_frame(self, frame_path: str, timestamp: float) -> FaceDetectionResult:
+        """
+        Detect faces in a single frame (for suspicious frames only).
+        
+        Args:
+            frame_path: Path to the frame image
+            timestamp: Timestamp of the frame in video
+            
+        Returns:
+            FaceDetectionResult with detected faces and metadata
+        """
+        if not self.enabled:
+            return FaceDetectionResult(
+                frame_path=frame_path,
+                timestamp=timestamp,
+                faces_detected=0,
+                face_embeddings=[],
+                face_bounding_boxes=[],
+                face_confidence_scores=[],
+                processing_time=0.0
+            )
+        
+        start_time = time.time()
+        
+        try:
+            # Load frame
+            frame = cv2.imread(frame_path)
+            if frame is None:
+                logger.error(f"Could not load frame: {frame_path}")
+                return FaceDetectionResult(
+                    frame_path=frame_path,
+                    timestamp=timestamp,
+                    faces_detected=0,
+                    face_embeddings=[],
+                    face_bounding_boxes=[],
+                    face_confidence_scores=[],
+                    processing_time=0.0
+                )
+            
+            # Detect faces
+            faces, boxes, probs = self.detector.detect_faces(frame)
+            
+            # Generate embeddings and process faces
+            face_embeddings = []
+            detected_face_ids = []
+            matched_persons = []
+            
+            for i, (face, box, prob) in enumerate(zip(faces, boxes, probs)):
+                # Generate embedding
+                embedding = self.embedder.generate_embedding(face)
+                face_embeddings.append(embedding)
+                
+                # Try person identification using trained classifier
+                person_name, person_confidence = None, 0.0
+                if self.person_classifier and self.person_classifier.enabled:
+                    person_name, person_confidence = self.person_classifier.identify_person(embedding)
+                
+                # Search for similar faces in FAISS index
+                matches = self.face_index.search(embedding, k=1, threshold=self.similarity_threshold)
+                
+                if matches:
+                    # Found matching face
+                    matched_face_id, similarity = matches[0]
+                    detected_face_ids.append(matched_face_id)
+                    
+                    if person_name:
+                        matched_persons.append(f"{person_name} (confidence: {person_confidence:.2f})")
+                        logger.info(f"👤 Known person identified: {person_name} (confidence: {person_confidence:.2f}, face similarity: {similarity:.3f})")
+                    else:
+                        matched_persons.append(f"person_{matched_face_id}")
+                        logger.info(f"👤 Face match found: {matched_face_id} (similarity: {similarity:.3f})")
+                    
+                    self.detection_stats['face_matches_found'] += 1
+                else:
+                    # New face - save to index
+                    frame_number = int(timestamp * 30)  # Estimate frame number
+                    new_face_id = self._generate_face_id(frame_number, i, person_name, event_id=f"obj_detection_{int(timestamp)}")
+                    
+                    # Add to FAISS index
+                    self.face_index.add_embedding(new_face_id, embedding)
+                    
+                    # Save face image
+                    face_path = self._save_face_image(face, new_face_id)
+                    
+                    # Save metadata to MongoDB if available
+                    if self.mongodb_storage and self.mongodb_storage.enabled:
+                        face_metadata = {
+                            'face_id': new_face_id,
+                            'frame_path': frame_path,
+                            'timestamp': timestamp,
+                            'confidence': float(prob),
+                            'person_name': person_name,
+                            'person_confidence': float(person_confidence) if person_name else None,
+                            'bounding_box': [int(x) for x in box],
+                            'face_image_path': face_path
+                        }
+                        self.mongodb_storage.save_face(face_metadata)
+                    
+                    detected_face_ids.append(new_face_id)
+                    
+                    if person_name:
+                        matched_persons.append(f"{person_name} (NEW, confidence: {person_confidence:.2f})")
+                        logger.info(f"👤 NEW known person detected: {person_name} (confidence: {person_confidence:.2f})")
+                    else:
+                        matched_persons.append(f"new_unknown_person_{new_face_id}")
+                        logger.info(f"👤 NEW unknown face detected: {new_face_id}")
+                    
+                    self.detection_stats['new_faces_added'] += 1
+            
+            # Save face index
+            self.face_index.save()
+            
+            processing_time = time.time() - start_time
+            self.detection_stats['frames_processed'] += 1
+            self.detection_stats['faces_detected'] += len(faces)
+            
+            # Convert boxes to expected format
+            face_bounding_boxes = [(int(box[0]), int(box[1]), int(box[2]), int(box[3])) for box in boxes]
+            
+            result = FaceDetectionResult(
+                frame_path=frame_path,
+                timestamp=timestamp,
+                faces_detected=len(faces),
+                face_embeddings=face_embeddings,
+                face_bounding_boxes=face_bounding_boxes,
+                face_confidence_scores=probs,
+                processing_time=processing_time,
+                detected_face_ids=detected_face_ids,
+                matched_persons=matched_persons
+            )
+            
+            if faces:
+                logger.info(f"👤 Processed {len(faces)} faces in suspicious frame at {timestamp:.2f}s")
+            
+            return result
+            
+        except Exception as e:
+            logger.error(f"[FacialRecognition] Error processing frame {frame_path}: {e}")
+            return FaceDetectionResult(
+                frame_path=frame_path,
+                timestamp=timestamp,
+                faces_detected=0,
+                face_embeddings=[],
+                face_bounding_boxes=[],
+                face_confidence_scores=[],
+                processing_time=time.time() - start_time
+            )
+    
+    def track_suspicious_persons(self, face_results: List[FaceDetectionResult], 
+                               detectifai_events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Track suspicious persons and detect re-occurrences."""
+        if not self.enabled or not face_results:
+            logger.info("👤 Facial recognition disabled or no face results - skipping person tracking")
+            return []
+        
+        logger.info(f"👤 Tracking suspicious persons across {len(face_results)} face detection results")
+        
+        reoccurrence_events = []
+        person_timeline = {}  # face_id -> list of timestamps
+        
+        # Build person timeline from face results
+        for face_result in face_results:
+            if face_result.detected_face_ids:
+                for face_id in face_result.detected_face_ids:
+                    if face_id not in person_timeline:
+                        person_timeline[face_id] = []
+                    person_timeline[face_id].append(face_result.timestamp)
+        
+        # Look for re-occurrences (same person appearing multiple times)
+        for face_id, timestamps in person_timeline.items():
+            if len(timestamps) > 1:
+                # Create re-occurrence event
+                timestamps.sort()
+                reoccurrence_event = {
+                    'event_id': f"reoccurrence_{face_id}_{int(timestamps[-1])}",
+                    'start_timestamp': timestamps[0],
+                    'end_timestamp': timestamps[-1],
+                    'event_type': 'suspicious_person_reoccurrence',
+                    'confidence': 0.85,
+                    'max_confidence': 0.85,
+                    'keyframes': [r.frame_path for r in face_results if face_id in (r.detected_face_ids or [])],
+                    'importance_score': 4.0,
+                    'description': f"Suspicious person {face_id} appeared {len(timestamps)} times",
+                    'detection_details': {
+                        'person_id': face_id,
+                        'appearances': len(timestamps),
+                        'time_span': timestamps[-1] - timestamps[0],
+                        'timestamps': timestamps
+                    }
+                }
+                reoccurrence_events.append(reoccurrence_event)
+                self.detection_stats['reoccurrences_detected'] += 1
+        
+        # Save face index
+        if self.face_index:
+            self.face_index.save()
+        
+        # Update statistics
+        self.detection_stats['suspicious_persons_tracked'] = len(person_timeline)
+        
+        logger.info(f"👤 Person tracking complete: {len(person_timeline)} unique persons, {len(reoccurrence_events)} re-occurrences")
+        
+        return reoccurrence_events
+    
+    def search_person_by_image(self, image_path: str, k: int = 10, threshold: float = 0.6) -> List[Dict[str, Any]]:
+        """
+        Search for a person by uploading their image.
+        
+        Args:
+            image_path: Path to the uploaded image
+            k: Number of top matches to return
+            threshold: Similarity threshold for matches
+            
+        Returns:
+            List of matched persons with their occurrences
+        """
+        if not self.enabled:
+            logger.warning("[FacialRecognition] System not enabled")
+            return []
+        
+        try:
+            # Load the uploaded image
+            frame = cv2.imread(image_path)
+            if frame is None:
+                logger.error(f"Could not load image: {image_path}")
+                return []
+            
+            # Detect faces in the uploaded image
+            faces, boxes, probs = self.detector.detect_faces(frame)
+            
+            if not faces:
+                logger.info("No faces detected in uploaded image")
+                return []
+            
+            # Use the first detected face for search
+            query_face = faces[0]
+            query_embedding = self.embedder.generate_embedding(query_face)
+            
+            # Search for similar faces in the database
+            matches = self.face_index.search(query_embedding, k=k, threshold=threshold)
+            
+            if not matches:
+                logger.info("No similar faces found in database")
+                return []
+            
+            # Group matches by person/event and gather occurrence information
+            search_results = []
+            
+            for face_id, similarity in matches:
+                # Parse face_id to extract information
+                # face_id format: face_{person}_{event}_{frame}_{face_index}_{unique_id}
+                parts = face_id.split('_')
+                if len(parts) >= 6:
+                    person_part = parts[1] if parts[1] != 'unknown' else 'Unknown Person'
+                    event_part = '_'.join(parts[2:4])  # event_obj_detection or similar
+                    
+                    # Check if we have face image saved
+                    face_image_path = str(self.faces_dir / f"{face_id}.jpg")
+                    has_face_image = os.path.exists(face_image_path)
+                    
+                    # Try to get person identification from trained classifier
+                    person_name, person_confidence = None, 0.0
+                    if self.person_classifier and self.person_classifier.enabled:
+                        person_name, person_confidence = self.person_classifier.identify_person(query_embedding)
+                    
+                    result = {
+                        'face_id': face_id,
+                        'person_name': person_name if person_name else person_part.replace('_', ' ').title(),
+                        'person_confidence': person_confidence,
+                        'similarity_score': similarity,
+                        'event_context': event_part,
+                        'face_image_path': face_image_path if has_face_image else None,
+                        'timestamp': self._extract_timestamp_from_face_id(face_id),
+                        'detection_context': 'Suspicious Activity Detection'
+                    }
+                    search_results.append(result)
+                
+                else:
+                    # Fallback for differently formatted face_ids
+                    person_name, person_confidence = None, 0.0
+                    if self.person_classifier and self.person_classifier.enabled:
+                        person_name, person_confidence = self.person_classifier.identify_person(query_embedding)
+                    
+                    result = {
+                        'face_id': face_id,
+                        'person_name': person_name if person_name else 'Unknown Person',
+                        'person_confidence': person_confidence,
+                        'similarity_score': similarity,
+                        'event_context': 'security_event',
+                        'face_image_path': str(self.faces_dir / f"{face_id}.jpg") if os.path.exists(self.faces_dir / f"{face_id}.jpg") else None,
+                        'timestamp': 0.0,
+                        'detection_context': 'Security Event'
+                    }
+                    search_results.append(result)
+            
+            # Sort by similarity score (highest first)
+            search_results.sort(key=lambda x: x['similarity_score'], reverse=True)
+            
+            logger.info(f"👤 Image search complete: Found {len(search_results)} matches with similarity >= {threshold}")
+            
+            return search_results
+            
+        except Exception as e:
+            logger.error(f"[FacialRecognition] Error in image search: {e}")
+            return []
+    
+    def _extract_timestamp_from_face_id(self, face_id: str) -> float:
+        """Extract timestamp from face_id format"""
+        try:
+            parts = face_id.split('_')
+            if len(parts) >= 6:
+                # Try to extract from event part (e.g., event_obj_detection_123)
+                for part in parts:
+                    if part.isdigit():
+                        return float(part)
+            return 0.0
+        except:
+            return 0.0
+
+    def get_detection_stats(self) -> Dict[str, Any]:
+        """Get facial recognition detection statistics"""
+        stats = self.detection_stats.copy()
+        if hasattr(self, 'face_index'):
+            if self.advanced_mode:
+                stats['total_faces_in_database'] = self.face_index.index.ntotal if self.face_index.index else 0
+            else:
+                stats['total_faces_in_database'] = len(self.face_index.faces_db) if self.face_index else 0
+        return stats
+    
+    def cleanup(self):
+        """Cleanup resources"""
+        if hasattr(self, 'face_index'):
+            self.face_index.save()
+        if hasattr(self, 'mongodb_storage') and self.mongodb_storage:
+            self.mongodb_storage.close()
+        logger.info("[FacialRecognition] Cleanup completed")
+
+# For backward compatibility
+FacialRecognitionPlaceholder = FacialRecognitionIntegrated
\ No newline at end of file
diff --git a/highlight_reel.py b/highlight_reel.py
new file mode 100644
index 0000000000000000000000000000000000000000..39e628f93566f6912833ac75498078afe2d25e16
--- /dev/null
+++ b/highlight_reel.py
@@ -0,0 +1,542 @@
+"""
+Highlight Reel Generation Module
+
+This module creates video summaries and highlight reels using various strategies:
+- Event-aware summarization
+- Ultra-comprehensive coverage
+- Quality-focused highlights
+- Motion-based highlights
+"""
+
+import cv2
+import os
+import numpy as np
+from typing import List, Dict, Any, Tuple, Optional
+import json
+import logging
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+class HighlightReelGenerator:
+    """Generate highlight reels from processed video segments"""
+    
+    def __init__(self, config):
+        self.config = config
+        self.highlights_dir = os.path.join(config.output_base_dir, "highlights")
+        os.makedirs(self.highlights_dir, exist_ok=True)
+    
+    def create_event_aware_highlight_reel(self, segments: List, canonical_events: List = None) -> str:
+        """
+        Create highlight reel focusing on detected events
+        
+        Args:
+            segments: List of video segments
+            canonical_events: List of canonical events (optional)
+            
+        Returns:
+            Path to generated highlight reel
+        """
+        logger.info("Creating event-aware highlight reel")
+        
+        output_path = os.path.join(self.highlights_dir, "event_aware_highlights.mp4")
+        
+        # Detect event segments
+        event_segments = self._detect_event_segments(segments)
+        
+        # Select keyframes with event priority
+        selected_keyframes = self._select_event_aware_keyframes(
+            segments, event_segments, canonical_events
+        )
+        
+        # Create video
+        success = self._create_highlight_video(
+            selected_keyframes, 
+            output_path,
+            "Event-Aware Highlights"
+        )
+        
+        if success:
+            logger.info(f"Event-aware highlight reel created: {output_path}")
+            return output_path
+        else:
+            logger.error("Failed to create event-aware highlight reel")
+            return ""
+    
+    def create_ultra_comprehensive_highlight_reel(self, segments: List) -> str:
+        """
+        Create comprehensive highlight reel capturing maximum important moments
+        
+        Args:
+            segments: List of video segments
+            
+        Returns:
+            Path to generated highlight reel
+        """
+        logger.info("Creating ultra-comprehensive highlight reel")
+        
+        output_path = os.path.join(self.highlights_dir, "ultra_comprehensive_highlights.mp4")
+        
+        # Use ultra-sensitive selection
+        selected_keyframes = self._select_ultra_comprehensive_keyframes(segments)
+        
+        # Create video
+        success = self._create_highlight_video(
+            selected_keyframes,
+            output_path,
+            "Ultra-Comprehensive Highlights"
+        )
+        
+        if success:
+            logger.info(f"Ultra-comprehensive highlight reel created: {output_path}")
+            return output_path
+        else:
+            logger.error("Failed to create ultra-comprehensive highlight reel")
+            return ""
+    
+    def create_quality_focused_highlight_reel(self, segments: List) -> str:
+        """
+        Create highlight reel focusing on highest quality frames
+        
+        Args:
+            segments: List of video segments
+            
+        Returns:
+            Path to generated highlight reel
+        """
+        logger.info("Creating quality-focused highlight reel")
+        
+        output_path = os.path.join(self.highlights_dir, "quality_focused_highlights.mp4")
+        
+        # Select highest quality keyframes
+        selected_keyframes = self._select_quality_focused_keyframes(segments)
+        
+        # Create video
+        success = self._create_highlight_video(
+            selected_keyframes,
+            output_path,
+            "Quality-Focused Highlights"
+        )
+        
+        if success:
+            logger.info(f"Quality-focused highlight reel created: {output_path}")
+            return output_path
+        else:
+            logger.error("Failed to create quality-focused highlight reel")
+            return ""
+    
+    def _detect_event_segments(self, segments: List) -> List[int]:
+        """Detect which segments contain significant events"""
+        event_segments = []
+        
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            if not keyframes:
+                continue
+            
+            # Calculate segment activity metrics
+            motion_scores = [kf['frame_data']['motion_score'] for kf in keyframes]
+            burst_count = sum(1 for kf in keyframes if kf['frame_data']['burst_active'])
+            max_motion = max(motion_scores) if motion_scores else 0
+            avg_motion = sum(motion_scores) / len(motion_scores) if motion_scores else 0
+            
+            # Event detection criteria
+            is_event_segment = (
+                max_motion > self.config.motion_threshold or
+                avg_motion > self.config.motion_threshold * 0.5 or
+                burst_count >= 1
+            )
+            
+            if is_event_segment:
+                segment_id = segment.get('segment_id', len(event_segments))
+                event_segments.append(segment_id)
+        
+        return event_segments
+    
+    def _select_event_aware_keyframes(self, segments: List, event_segments: List[int], 
+                                    canonical_events: List = None) -> List[Dict]:
+        """Select keyframes with event awareness"""
+        selected_keyframes = []
+        
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            if not keyframes:
+                continue
+                
+            segment_id = segment.get('segment_id', 0)
+            
+            if segment_id in event_segments:
+                # Event segment: select multiple keyframes
+                scored_keyframes = []
+                
+                for kf in keyframes:
+                    frame_data = kf['frame_data']
+                    base_score = kf['keyframe_score']
+                    motion_score = frame_data['motion_score']
+                    is_burst = frame_data['burst_active']
+                    
+                    # Event-aware scoring
+                    event_score = base_score
+                    if motion_score > self.config.motion_threshold:
+                        event_score += motion_score * 0.5
+                    if is_burst:
+                        event_score *= self.config.burst_weight
+                    
+                    scored_keyframes.append({
+                        'keyframe_data': kf,
+                        'event_score': event_score,
+                        'timestamp': frame_data['timestamp'],
+                        'is_event': True,
+                        'segment_id': segment_id
+                    })
+                
+                # Select top keyframes from event segment
+                scored_keyframes.sort(key=lambda x: x['event_score'], reverse=True)
+                num_select = min(3, max(2, len([kf for kf in keyframes if kf['frame_data']['burst_active']])))
+                selected_keyframes.extend(scored_keyframes[:num_select])
+                
+            else:
+                # Regular segment: select best keyframe
+                best_kf = max(keyframes, key=lambda x: x['keyframe_score'])
+                if best_kf['keyframe_score'] >= self.config.base_quality_threshold:
+                    selected_keyframes.append({
+                        'keyframe_data': best_kf,
+                        'event_score': best_kf['keyframe_score'],
+                        'timestamp': best_kf['frame_data']['timestamp'],
+                        'is_event': False,
+                        'segment_id': segment_id
+                    })
+        
+        # Sort by timestamp and limit
+        selected_keyframes.sort(key=lambda x: x['timestamp'])
+        
+        if len(selected_keyframes) > self.config.max_summary_frames:
+            # Prioritize by event score
+            selected_keyframes.sort(key=lambda x: x['event_score'], reverse=True)
+            selected_keyframes = selected_keyframes[:self.config.max_summary_frames]
+            selected_keyframes.sort(key=lambda x: x['timestamp'])
+        
+        return selected_keyframes
+    
+    def _select_ultra_comprehensive_keyframes(self, segments: List) -> List[Dict]:
+        """Select keyframes with ultra-comprehensive coverage"""
+        all_important_frames = []
+        
+        # Ultra-low thresholds for comprehensive coverage
+        ultra_motion_threshold = self.config.motion_threshold * 0.5
+        ultra_quality_threshold = self.config.base_quality_threshold * 0.8
+        
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            segment_id = segment.get('segment_id', 0)
+            
+            for kf in keyframes:
+                frame_data = kf['frame_data']
+                base_score = kf['keyframe_score']
+                motion_score = frame_data['motion_score']
+                is_burst = frame_data['burst_active']
+                timestamp = frame_data['timestamp']
+                
+                # Ultra-comprehensive scoring
+                importance = base_score
+                
+                # Any motion is important
+                if motion_score > ultra_motion_threshold:
+                    importance += motion_score * 1.0
+                elif motion_score > 0:
+                    importance += motion_score * 0.5
+                
+                # Burst frames are critical
+                if is_burst:
+                    importance *= 3.0
+                
+                # Quality bonus
+                if base_score > self.config.base_quality_threshold * 1.1:
+                    importance += 0.1
+                
+                # Include frame if it meets any importance criteria
+                include_frame = (
+                    importance > 0.20 or
+                    motion_score > ultra_motion_threshold or
+                    is_burst or
+                    base_score > ultra_quality_threshold
+                )
+                
+                if include_frame:
+                    all_important_frames.append({
+                        'keyframe_data': kf,
+                        'importance_score': importance,
+                        'motion_score': motion_score,
+                        'is_burst': is_burst,
+                        'timestamp': timestamp,
+                        'segment_id': segment_id
+                    })
+        
+        # Sort by importance and ensure temporal diversity
+        all_important_frames.sort(key=lambda x: x['importance_score'], reverse=True)
+        
+        selected_frames = []
+        covered_timeframes = set()
+        
+        for frame in all_important_frames:
+            timestamp = frame['timestamp']
+            timeframe = int(timestamp // 5) * 5  # 5-second bins
+            
+            if timeframe not in covered_timeframes or len(selected_frames) < self.config.max_summary_frames:
+                selected_frames.append({
+                    'keyframe_data': frame['keyframe_data'],
+                    'event_score': frame['importance_score'],
+                    'timestamp': timestamp,
+                    'is_event': frame['is_burst'] or frame['motion_score'] > self.config.motion_threshold,
+                    'segment_id': frame['segment_id']
+                })
+                covered_timeframes.add(timeframe)
+                
+                if len(selected_frames) >= self.config.max_summary_frames:
+                    break
+        
+        # Sort by timestamp
+        selected_frames.sort(key=lambda x: x['timestamp'])
+        return selected_frames
+    
+    def _select_quality_focused_keyframes(self, segments: List) -> List[Dict]:
+        """Select keyframes focusing on quality"""
+        all_quality_frames = []
+        
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            segment_id = segment.get('segment_id', 0)
+            
+            for kf in keyframes:
+                frame_data = kf['frame_data']
+                quality_score = frame_data['quality_score']
+                
+                # Only include high-quality frames
+                if quality_score >= self.config.base_quality_threshold * 1.2:
+                    all_quality_frames.append({
+                        'keyframe_data': kf,
+                        'event_score': quality_score,
+                        'timestamp': frame_data['timestamp'],
+                        'is_event': False,
+                        'segment_id': segment_id
+                    })
+        
+        # Sort by quality score and limit
+        all_quality_frames.sort(key=lambda x: x['event_score'], reverse=True)
+        
+        # Ensure temporal diversity
+        selected_frames = []
+        last_timestamp = -float('inf')
+        min_gap = 3.0  # Minimum 3 seconds between frames
+        
+        for frame in all_quality_frames:
+            if frame['timestamp'] - last_timestamp >= min_gap:
+                selected_frames.append(frame)
+                last_timestamp = frame['timestamp']
+                
+                if len(selected_frames) >= self.config.max_summary_frames:
+                    break
+        
+        # Sort by timestamp
+        selected_frames.sort(key=lambda x: x['timestamp'])
+        return selected_frames
+    
+    def _create_highlight_video(self, selected_keyframes: List[Dict], output_path: str, 
+                              title: str = "Highlight Reel") -> bool:
+        """Create highlight video from selected keyframes"""
+        if not selected_keyframes:
+            logger.error("No keyframes selected for highlight reel")
+            return False
+        
+        try:
+            # Read first frame to get dimensions
+            first_frame_path = selected_keyframes[0]['keyframe_data']['frame_data']['frame_path']
+            first_image = cv2.imread(first_frame_path)
+            
+            if first_image is None:
+                logger.error(f"Cannot read first frame: {first_frame_path}")
+                return False
+            
+            height, width = first_image.shape[:2]
+            
+            # Set up video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            fps = self.config.summary_fps
+            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            
+            if not out.isOpened():
+                logger.error("Cannot create video writer")
+                return False
+            
+            # Add frames to video
+            frames_added = 0
+            logger.info(f"Creating {title} with {len(selected_keyframes)} frames")
+            
+            for kf in selected_keyframes:
+                frame_path = kf['keyframe_data']['frame_data']['frame_path']
+                
+                if os.path.exists(frame_path):
+                    frame = cv2.imread(frame_path)
+                    if frame is not None:
+                        # Resize frame if needed
+                        if frame.shape[:2] != (height, width):
+                            frame = cv2.resize(frame, (width, height))
+                        
+                        out.write(frame)
+                        frames_added += 1
+                        
+                        # Log frame info
+                        timestamp = kf['timestamp']
+                        mins = int(timestamp // 60)
+                        secs = timestamp % 60
+                        event_type = "EVENT" if kf['is_event'] else "QUALITY"
+                        logger.debug(f"Added frame: {mins:02d}:{secs:04.1f} - {event_type}")
+                    else:
+                        logger.warning(f"Cannot read frame: {frame_path}")
+                else:
+                    logger.warning(f"Frame not found: {frame_path}")
+            
+            out.release()
+            
+            # Verify output
+            if frames_added > 0 and os.path.exists(output_path):
+                file_size = os.path.getsize(output_path) / (1024*1024)
+                duration = frames_added / fps
+                
+                logger.info(f"✅ {title} created successfully!")
+                logger.info(f"📁 Path: {output_path}")
+                logger.info(f"📊 {frames_added} frames, {duration:.1f}s duration, {file_size:.1f} MB")
+                
+                return True
+            else:
+                logger.error("Failed to create video file")
+                return False
+                
+        except Exception as e:
+            logger.error(f"Error creating highlight video: {e}")
+            return False
+    
+    def create_custom_highlight_reel(self, segments: List, selection_criteria: Dict[str, Any]) -> str:
+        """
+        Create custom highlight reel based on specific criteria
+        
+        Args:
+            segments: List of video segments
+            selection_criteria: Custom criteria for frame selection
+            
+        Returns:
+            Path to generated highlight reel
+        """
+        logger.info(f"Creating custom highlight reel with criteria: {selection_criteria}")
+        
+        output_path = os.path.join(self.highlights_dir, "custom_highlights.mp4")
+        
+        # Apply custom selection
+        selected_keyframes = self._apply_custom_selection(segments, selection_criteria)
+        
+        # Create video
+        success = self._create_highlight_video(
+            selected_keyframes,
+            output_path,
+            "Custom Highlights"
+        )
+        
+        if success:
+            logger.info(f"Custom highlight reel created: {output_path}")
+            return output_path
+        else:
+            logger.error("Failed to create custom highlight reel")
+            return ""
+    
+    def _apply_custom_selection(self, segments: List, criteria: Dict[str, Any]) -> List[Dict]:
+        """Apply custom selection criteria"""
+        selected_keyframes = []
+        
+        # Extract criteria
+        min_motion = criteria.get('min_motion_score', 0.0)
+        min_quality = criteria.get('min_quality_score', self.config.base_quality_threshold)
+        require_burst = criteria.get('require_burst', False)
+        max_frames = criteria.get('max_frames', self.config.max_summary_frames)
+        time_range = criteria.get('time_range', None)  # (start, end) tuple
+        
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            
+            for kf in keyframes:
+                frame_data = kf['frame_data']
+                timestamp = frame_data['timestamp']
+                motion_score = frame_data['motion_score']
+                quality_score = frame_data['quality_score']
+                is_burst = frame_data['burst_active']
+                
+                # Apply criteria
+                meets_criteria = True
+                
+                if motion_score < min_motion:
+                    meets_criteria = False
+                
+                if quality_score < min_quality:
+                    meets_criteria = False
+                
+                if require_burst and not is_burst:
+                    meets_criteria = False
+                
+                if time_range:
+                    start_time, end_time = time_range
+                    if not (start_time <= timestamp <= end_time):
+                        meets_criteria = False
+                
+                if meets_criteria:
+                    selected_keyframes.append({
+                        'keyframe_data': kf,
+                        'event_score': kf['keyframe_score'],
+                        'timestamp': timestamp,
+                        'is_event': is_burst or motion_score > self.config.motion_threshold,
+                        'segment_id': segment.get('segment_id', 0)
+                    })
+        
+        # Sort and limit
+        selected_keyframes.sort(key=lambda x: x['event_score'], reverse=True)
+        selected_keyframes = selected_keyframes[:max_frames]
+        selected_keyframes.sort(key=lambda x: x['timestamp'])
+        
+        return selected_keyframes
+    
+    def generate_highlight_reel_metadata(self, selected_keyframes: List[Dict], 
+                                       output_path: str) -> bool:
+        """Generate metadata file for highlight reel"""
+        try:
+            metadata = {
+                'generation_info': {
+                    'timestamp': datetime.now().isoformat(),
+                    'total_frames': len(selected_keyframes),
+                    'selection_config': {
+                        'max_summary_frames': self.config.max_summary_frames,
+                        'summary_fps': self.config.summary_fps,
+                        'motion_threshold': self.config.motion_threshold,
+                        'quality_threshold': self.config.base_quality_threshold
+                    }
+                },
+                'frame_details': []
+            }
+            
+            for i, kf in enumerate(selected_keyframes):
+                frame_detail = {
+                    'sequence_number': i + 1,
+                    'timestamp': kf['timestamp'],
+                    'is_event_frame': kf['is_event'],
+                    'segment_id': kf['segment_id'],
+                    'event_score': kf['event_score'],
+                    'frame_path': kf['keyframe_data']['frame_data']['frame_path']
+                }
+                metadata['frame_details'].append(frame_detail)
+            
+            with open(output_path, 'w') as f:
+                json.dump(metadata, f, indent=2)
+            
+            logger.info(f"Highlight reel metadata saved: {output_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Failed to save highlight reel metadata: {e}")
+            return False
\ No newline at end of file
diff --git a/json_reports.py b/json_reports.py
new file mode 100644
index 0000000000000000000000000000000000000000..83b1d4d562c3107ee1933ac8437be27d83374d79
--- /dev/null
+++ b/json_reports.py
@@ -0,0 +1,575 @@
+"""
+JSON Reports Generation Module
+
+This module handles:
+- Processing results JSON reports
+- Canonical events JSON
+- Segment analysis reports
+- Performance statistics
+- HTML gallery generation
+"""
+
+import json
+import os
+import cv2
+import base64
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+import logging
+
+logger = logging.getLogger(__name__)
+
+class ReportGenerator:
+    """Generate comprehensive JSON reports and HTML galleries"""
+    
+    def __init__(self, config):
+        self.config = config
+        self.reports_dir = os.path.join(config.output_base_dir, "reports")
+        os.makedirs(self.reports_dir, exist_ok=True)
+    
+    def generate_processing_results_report(self, 
+                                         keyframes: List,
+                                         events: List,
+                                         canonical_events: List,
+                                         segments: List,
+                                         processing_stats: Dict[str, Any]) -> str:
+        """Generate comprehensive processing results report"""
+        
+        logger.info("Generating processing results report")
+        
+        report = {
+            'metadata': {
+                'generation_timestamp': datetime.now().isoformat(),
+                'report_version': '1.0',
+                'processing_config': self._get_config_summary()
+            },
+            'summary': {
+                'total_keyframes_extracted': len(keyframes),
+                'total_events_detected': len(events),
+                'canonical_events_created': len(canonical_events),
+                'video_segments_created': len(segments),
+                'processing_duration': processing_stats.get('total_processing_time', 0)
+            },
+            'keyframe_analysis': self._analyze_keyframes(keyframes),
+            'event_analysis': self._analyze_events(events),
+            'canonical_event_analysis': self._analyze_canonical_events(canonical_events),
+            'segment_analysis': self._analyze_segments(segments),
+            'performance_statistics': processing_stats,
+            'quality_metrics': self._calculate_quality_metrics(keyframes, events)
+        }
+        
+        # Save report
+        output_path = os.path.join(self.reports_dir, "processing_results.json")
+        
+        try:
+            with open(output_path, 'w') as f:
+                json.dump(report, f, indent=2)
+            
+            logger.info(f"Processing results report saved: {output_path}")
+            return output_path
+            
+        except Exception as e:
+            logger.error(f"Failed to save processing results report: {e}")
+            return ""
+    
+    def generate_canonical_events_report(self, canonical_events: List) -> str:
+        """Generate canonical events JSON report"""
+        
+        logger.info("Generating canonical events report")
+        
+        report = {
+            'metadata': {
+                'generation_timestamp': datetime.now().isoformat(),
+                'total_canonical_events': len(canonical_events),
+                'deduplication_threshold': self.config.similarity_threshold
+            },
+            'canonical_events': []
+        }
+        
+        for event in canonical_events:
+            event_data = {
+                'canonical_id': event.canonical_id,
+                'event_type': event.event_type,
+                'representative_frame': event.representative_frame,
+                'time_range': {
+                    'start_time': event.start_time,
+                    'end_time': event.end_time,
+                    'duration': event.duration
+                },
+                'confidence': event.confidence,
+                'frame_count': event.frame_count,
+                'aggregated_events': event.aggregated_events,
+                'description': event.description,
+                'similarity_cluster': event.similarity_cluster
+            }
+            report['canonical_events'].append(event_data)
+        
+        # Save report
+        output_path = os.path.join(self.reports_dir, "canonical_events.json")
+        
+        try:
+            with open(output_path, 'w') as f:
+                json.dump(report, f, indent=2)
+            
+            logger.info(f"Canonical events report saved: {output_path}")
+            return output_path
+            
+        except Exception as e:
+            logger.error(f"Failed to save canonical events report: {e}")
+            return ""
+    
+    def generate_segments_report(self, segments: List) -> str:
+        """Generate video segments analysis report"""
+        
+        logger.info("Generating video segments report")
+        
+        report = {
+            'metadata': {
+                'generation_timestamp': datetime.now().isoformat(),
+                'total_segments': len(segments),
+                'segment_duration': self.config.segment_duration,
+                'keyframes_per_segment': self.config.keyframes_per_segment
+            },
+            'summary_statistics': self._get_segments_summary(segments),
+            'segments': []
+        }
+        
+        for segment in segments:
+            segment_data = {
+                'segment_id': segment.segment_id,
+                'time_range': {
+                    'start_timestamp': segment.start_timestamp,
+                    'end_timestamp': segment.end_timestamp,
+                    'duration': segment.duration
+                },
+                'frame_range': {
+                    'start_frame': segment.start_frame,
+                    'end_frame': segment.end_frame
+                },
+                'segment_classification': {
+                    'segment_type': segment.segment_type,
+                    'activity_level': segment.activity_level
+                },
+                'statistics': {
+                    'motion_statistics': segment.motion_statistics,
+                    'quality_statistics': segment.quality_statistics,
+                    'keyframe_count': len(segment.keyframes)
+                },
+                'keyframes': segment.keyframes
+            }
+            report['segments'].append(segment_data)
+        
+        # Save report
+        output_path = os.path.join(self.reports_dir, "video_segments.json")
+        
+        try:
+            with open(output_path, 'w') as f:
+                json.dump(report, f, indent=2)
+            
+            logger.info(f"Video segments report saved: {output_path}")
+            return output_path
+            
+        except Exception as e:
+            logger.error(f"Failed to save video segments report: {e}")
+            return ""
+    
+    def generate_html_gallery(self, keyframes: List, canonical_events: List = None, 
+                            segments: List = None, title: str = "Video Processing Gallery") -> str:
+        """Generate interactive HTML gallery of keyframes and events"""
+        
+        logger.info("Generating HTML gallery")
+        
+        html_content = self._create_html_gallery(keyframes, canonical_events, segments, title)
+        
+        # Save HTML gallery
+        output_path = os.path.join(self.reports_dir, "canonical_gallery.html")
+        
+        try:
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(html_content)
+            
+            logger.info(f"HTML gallery saved: {output_path}")
+            return output_path
+            
+        except Exception as e:
+            logger.error(f"Failed to save HTML gallery: {e}")
+            return ""
+    
+    def _get_config_summary(self) -> Dict[str, Any]:
+        """Get summary of configuration settings"""
+        return {
+            'base_quality_threshold': self.config.base_quality_threshold,
+            'motion_threshold': self.config.motion_threshold,
+            'event_importance_threshold': self.config.event_importance_threshold,
+            'similarity_threshold': self.config.similarity_threshold,
+            'segment_duration': self.config.segment_duration,
+            'max_summary_frames': self.config.max_summary_frames,
+            'output_resolution': self.config.output_resolution,
+            'enable_clahe': self.config.enable_clahe,
+            'enable_denoising': self.config.enable_denoising
+        }
+    
+    def _analyze_keyframes(self, keyframes: List) -> Dict[str, Any]:
+        """Analyze keyframe extraction results"""
+        if not keyframes:
+            return {}
+        
+        # Extract metrics
+        quality_scores = [kf.frame_data.quality_score for kf in keyframes]
+        motion_scores = [kf.frame_data.motion_score for kf in keyframes]
+        selection_reasons = [kf.selection_reason for kf in keyframes]
+        burst_frames = [kf for kf in keyframes if kf.frame_data.burst_active]
+        enhanced_frames = [kf for kf in keyframes if kf.frame_data.enhancement_applied]
+        
+        # Count selection reasons
+        reason_counts = {}
+        for reason in selection_reasons:
+            reason_counts[reason] = reason_counts.get(reason, 0) + 1
+        
+        # Calculate statistics
+        analysis = {
+            'total_keyframes': len(keyframes),
+            'quality_statistics': {
+                'min': float(min(quality_scores)),
+                'max': float(max(quality_scores)),
+                'mean': float(sum(quality_scores) / len(quality_scores)),
+                'std': float(np.std(quality_scores))
+            },
+            'motion_statistics': {
+                'min': float(min(motion_scores)),
+                'max': float(max(motion_scores)),
+                'mean': float(sum(motion_scores) / len(motion_scores)),
+                'std': float(np.std(motion_scores))
+            },
+            'selection_reason_distribution': reason_counts,
+            'burst_frames_count': len(burst_frames),
+            'enhanced_frames_count': len(enhanced_frames),
+            'enhancement_rate': len(enhanced_frames) / len(keyframes) * 100
+        }
+        
+        return analysis
+    
+    def _analyze_events(self, events: List) -> Dict[str, Any]:
+        """Analyze detected events"""
+        if not events:
+            return {}
+        
+        # Event type distribution
+        event_types = [event.event_type for event in events]
+        type_counts = {}
+        for event_type in event_types:
+            type_counts[event_type] = type_counts.get(event_type, 0) + 1
+        
+        # Confidence statistics
+        confidences = [event.confidence for event in events]
+        importance_scores = [event.importance_score for event in events]
+        durations = [event.end_timestamp - event.start_timestamp for event in events]
+        
+        analysis = {
+            'total_events': len(events),
+            'event_type_distribution': type_counts,
+            'confidence_statistics': {
+                'min': float(min(confidences)),
+                'max': float(max(confidences)),
+                'mean': float(sum(confidences) / len(confidences))
+            },
+            'importance_statistics': {
+                'min': float(min(importance_scores)),
+                'max': float(max(importance_scores)),
+                'mean': float(sum(importance_scores) / len(importance_scores))
+            },
+            'duration_statistics': {
+                'min': float(min(durations)),
+                'max': float(max(durations)),
+                'mean': float(sum(durations) / len(durations))
+            }
+        }
+        
+        return analysis
+    
+    def _analyze_canonical_events(self, canonical_events: List) -> Dict[str, Any]:
+        """Analyze canonical events"""
+        if not canonical_events:
+            return {}
+        
+        # Type distribution
+        event_types = [event.event_type for event in canonical_events]
+        type_counts = {}
+        for event_type in event_types:
+            type_counts[event_type] = type_counts.get(event_type, 0) + 1
+        
+        # Statistics
+        durations = [event.duration for event in canonical_events]
+        frame_counts = [event.frame_count for event in canonical_events]
+        confidences = [event.confidence for event in canonical_events]
+        
+        analysis = {
+            'total_canonical_events': len(canonical_events),
+            'event_type_distribution': type_counts,
+            'duration_statistics': {
+                'min': float(min(durations)),
+                'max': float(max(durations)),
+                'mean': float(sum(durations) / len(durations))
+            },
+            'frame_count_statistics': {
+                'min': int(min(frame_counts)),
+                'max': int(max(frame_counts)),
+                'mean': float(sum(frame_counts) / len(frame_counts))
+            },
+            'confidence_statistics': {
+                'min': float(min(confidences)),
+                'max': float(max(confidences)),
+                'mean': float(sum(confidences) / len(confidences))
+            }
+        }
+        
+        return analysis
+    
+    def _analyze_segments(self, segments: List) -> Dict[str, Any]:
+        """Analyze video segments"""
+        if not segments:
+            return {}
+        
+        # Type and activity distribution
+        segment_types = [seg.segment_type for seg in segments]
+        activity_levels = [seg.activity_level for seg in segments]
+        
+        type_counts = {}
+        for seg_type in segment_types:
+            type_counts[seg_type] = type_counts.get(seg_type, 0) + 1
+        
+        activity_counts = {}
+        for activity in activity_levels:
+            activity_counts[activity] = activity_counts.get(activity, 0) + 1
+        
+        analysis = {
+            'total_segments': len(segments),
+            'segment_type_distribution': type_counts,
+            'activity_level_distribution': activity_counts,
+            'average_segment_duration': float(sum(seg.duration for seg in segments) / len(segments)),
+            'total_keyframes': sum(len(seg.keyframes) for seg in segments)
+        }
+        
+        return analysis
+    
+    def _calculate_quality_metrics(self, keyframes: List, events: List) -> Dict[str, Any]:
+        """Calculate overall quality metrics"""
+        if not keyframes:
+            return {}
+        
+        # Coverage metrics
+        total_frames_extracted = len(keyframes)
+        burst_frames = len([kf for kf in keyframes if kf.frame_data.burst_active])
+        high_quality_frames = len([kf for kf in keyframes if kf.frame_data.quality_score > self.config.base_quality_threshold * 1.2])
+        high_motion_frames = len([kf for kf in keyframes if kf.frame_data.motion_score > self.config.motion_threshold])
+        
+        # Event coverage
+        event_coverage = len(events) / total_frames_extracted if total_frames_extracted > 0 else 0
+        
+        metrics = {
+            'frame_extraction_efficiency': {
+                'total_frames_extracted': total_frames_extracted,
+                'burst_frame_rate': burst_frames / total_frames_extracted * 100,
+                'high_quality_frame_rate': high_quality_frames / total_frames_extracted * 100,
+                'high_motion_frame_rate': high_motion_frames / total_frames_extracted * 100
+            },
+            'event_detection_efficiency': {
+                'events_per_keyframe': event_coverage,
+                'total_events_detected': len(events)
+            },
+            'processing_quality_score': self._calculate_overall_quality_score(keyframes, events)
+        }
+        
+        return metrics
+    
+    def _calculate_overall_quality_score(self, keyframes: List, events: List) -> float:
+        """Calculate overall processing quality score (0-100)"""
+        if not keyframes:
+            return 0.0
+        
+        # Component scores
+        avg_quality = sum(kf.frame_data.quality_score for kf in keyframes) / len(keyframes)
+        avg_motion = sum(kf.frame_data.motion_score for kf in keyframes) / len(keyframes)
+        burst_rate = len([kf for kf in keyframes if kf.frame_data.burst_active]) / len(keyframes)
+        event_rate = len(events) / len(keyframes) if len(keyframes) > 0 else 0
+        
+        # Weighted combination
+        quality_score = (
+            avg_quality * 40 +       # 40% weight on frame quality
+            avg_motion * 30 +        # 30% weight on motion detection
+            burst_rate * 20 +        # 20% weight on burst detection
+            event_rate * 10          # 10% weight on event detection
+        ) * 100
+        
+        return min(100.0, quality_score)
+    
+    def _get_segments_summary(self, segments: List) -> Dict[str, Any]:
+        """Get summary statistics for segments"""
+        if not segments:
+            return {}
+        
+        # Activity level distribution
+        activity_levels = [seg.activity_level for seg in segments]
+        activity_counts = {}
+        for level in activity_levels:
+            activity_counts[level] = activity_counts.get(level, 0) + 1
+        
+        # Segment type distribution
+        segment_types = [seg.segment_type for seg in segments]
+        type_counts = {}
+        for seg_type in segment_types:
+            type_counts[seg_type] = type_counts.get(seg_type, 0) + 1
+        
+        return {
+            'total_segments': len(segments),
+            'activity_level_distribution': activity_counts,
+            'segment_type_distribution': type_counts
+        }
+    
+    def _create_html_gallery(self, keyframes: List, canonical_events: List = None, 
+                           segments: List = None, title: str = "Video Processing Gallery") -> str:
+        """Create HTML gallery content"""
+        
+        html_template = f"""
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{title}</title>
+    <style>
+        body {{ font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }}
+        .header {{ text-align: center; margin-bottom: 30px; }}
+        .stats {{ display: flex; justify-content: space-around; margin-bottom: 30px; }}
+        .stat-card {{ background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
+        .gallery {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 20px; }}
+        .frame-card {{ background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
+        .frame-image {{ width: 100%; height: 200px; object-fit: cover; }}
+        .frame-info {{ padding: 15px; }}
+        .frame-info h3 {{ margin: 0 0 10px 0; color: #333; }}
+        .frame-info p {{ margin: 5px 0; color: #666; font-size: 14px; }}
+        .event-badge {{ display: inline-block; padding: 3px 8px; border-radius: 12px; font-size: 12px; color: white; margin-right: 5px; }}
+        .burst-activity {{ background-color: #e74c3c; }}
+        .high-motion {{ background-color: #f39c12; }}
+        .high-quality {{ background-color: #27ae60; }}
+        .context-frame {{ background-color: #3498db; }}
+        .timestamp {{ font-weight: bold; color: #2c3e50; }}
+        .score {{ color: #8e44ad; font-weight: bold; }}
+    </style>
+</head>
+<body>
+    <div class="header">
+        <h1>{title}</h1>
+        <p>Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
+    </div>
+    
+    <div class="stats">
+        <div class="stat-card">
+            <h3>Keyframes</h3>
+            <p>{len(keyframes)} extracted</p>
+        </div>
+        <div class="stat-card">
+            <h3>Events</h3>
+            <p>{len(canonical_events) if canonical_events else 0} canonical</p>
+        </div>
+        <div class="stat-card">
+            <h3>Segments</h3>
+            <p>{len(segments) if segments else 0} temporal</p>
+        </div>
+    </div>
+    
+    <div class="gallery">
+"""
+        
+        # Add keyframes to gallery
+        for i, kf in enumerate(keyframes[:50]):  # Limit to first 50 for performance
+            try:
+                frame_path = kf.frame_data.frame_path
+                
+                # Convert image to base64 for embedding
+                image_data = ""
+                if os.path.exists(frame_path):
+                    try:
+                        with open(frame_path, 'rb') as img_file:
+                            image_data = base64.b64encode(img_file.read()).decode('utf-8')
+                    except Exception as e:
+                        logger.warning(f"Could not encode image {frame_path}: {e}")
+                
+                # Format timestamp
+                timestamp = kf.frame_data.timestamp
+                mins = int(timestamp // 60)
+                secs = timestamp % 60
+                time_str = f"{mins:02d}:{secs:04.1f}"
+                
+                # Determine badge class
+                badge_class = "context-frame"
+                if kf.frame_data.burst_active:
+                    badge_class = "burst-activity"
+                elif kf.frame_data.motion_score > self.config.motion_threshold:
+                    badge_class = "high-motion"
+                elif kf.frame_data.quality_score > self.config.base_quality_threshold * 1.2:
+                    badge_class = "high-quality"
+                
+                html_template += f"""
+        <div class="frame-card">
+            {"<img class='frame-image' src='data:image/jpeg;base64," + image_data + "' alt='Keyframe " + str(i+1) + "'>" if image_data else "<div class='frame-image' style='background-color: #ddd; display: flex; align-items: center; justify-content: center;'>Image not available</div>"}
+            <div class="frame-info">
+                <h3>Frame {i+1}</h3>
+                <p><span class="timestamp">Time: {time_str}</span></p>
+                <p>Quality: <span class="score">{kf.frame_data.quality_score:.3f}</span></p>
+                <p>Motion: <span class="score">{kf.frame_data.motion_score:.4f}</span></p>
+                <p>Keyframe Score: <span class="score">{kf.keyframe_score:.3f}</span></p>
+                <p><span class="event-badge {badge_class}">{kf.selection_reason}</span></p>
+                {"<p>✨ Enhanced</p>" if kf.frame_data.enhancement_applied else ""}
+            </div>
+        </div>
+"""
+                
+            except Exception as e:
+                logger.warning(f"Error processing keyframe {i}: {e}")
+        
+        html_template += """
+    </div>
+</body>
+</html>
+"""
+        
+        return html_template
+    
+    def generate_captioning_report(self, captioning_results: Dict[str, Any], statistics: Dict[str, Any]) -> str:
+        """Generate video captioning results report"""
+        
+        logger.info("Generating video captioning report")
+        
+        report = {
+            'metadata': {
+                'generation_timestamp': datetime.now().isoformat(),
+                'report_version': '1.0'
+            },
+            'summary': {
+                'captioning_enabled': captioning_results.get('enabled', False),
+                'total_captions_generated': captioning_results.get('total_captions', 0),
+                'processing_time': captioning_results.get('processing_time', 0),
+                'errors_count': len(captioning_results.get('errors', []))
+            },
+            'statistics': statistics,
+            'captions': captioning_results.get('captions', []),
+            'errors': captioning_results.get('errors', [])
+        }
+        
+        # Save report
+        output_path = os.path.join(self.reports_dir, "video_captioning.json")
+        
+        try:
+            with open(output_path, 'w') as f:
+                json.dump(report, f, indent=2)
+            
+            logger.info(f"Video captioning report saved: {output_path}")
+            return output_path
+            
+        except Exception as e:
+            logger.error(f"Failed to save video captioning report: {e}")
+            return ""
+
+# Import numpy for statistics
+import numpy as np
\ No newline at end of file
diff --git a/live_stream_processor.py b/live_stream_processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4f09668acc618cfc752d6de1e165d1182457700
--- /dev/null
+++ b/live_stream_processor.py
@@ -0,0 +1,866 @@
+"""
+Live Stream Processor for DetectifAI
+
+Processes live webcam/CCTV footage through the same pipeline as uploaded videos:
+- Object detection (fire, weapons)
+- Behavior analysis (fighting, accidents, climbing)
+- Facial recognition on suspicious frames
+- Real-time event detection
+- Storage in MongoDB and MinIO
+"""
+
+import cv2
+import numpy as np
+import io
+import os
+import time
+import threading
+import logging
+import uuid
+from datetime import datetime
+from typing import Optional, Dict, Any, List, Tuple
+from pathlib import Path
+
+from config import VideoProcessingConfig, get_security_focused_config
+from object_detection import ObjectDetector
+from behavior_analysis_integrator import BehaviorAnalysisIntegrator
+from database.config import DatabaseManager
+from database.repositories import VideoRepository, EventRepository
+from database.keyframe_repository import KeyframeRepository
+
+# Real-time alert engine
+try:
+    from real_time_alerts import get_alert_engine, RealTimeAlertEngine
+    ALERTS_AVAILABLE = True
+except ImportError:
+    ALERTS_AVAILABLE = False
+    logging.warning("Real-time alerts module not available")
+
+logger = logging.getLogger(__name__)
+
+
+class LiveStreamProcessor:
+    """Process live video streams with DetectifAI pipeline"""
+    
+    def __init__(self, config: VideoProcessingConfig = None, camera_id: str = "webcam_01"):
+        """
+        Initialize live stream processor
+        
+        Args:
+            config: VideoProcessingConfig object
+            camera_id: Unique identifier for the camera/stream
+        """
+        self.config = config or get_security_focused_config()
+        self.camera_id = camera_id
+        self.is_processing = False
+        self.cap = None
+        self.camera_index = 0  # Default camera index
+        self.frame_count = 0
+        self.last_keyframe_time = 0
+        self.keyframe_interval = 1.0  # Extract keyframe every 1 second
+        
+        # Initialize database connections
+        self.db_manager = DatabaseManager()
+        self.video_repo = VideoRepository(self.db_manager)
+        self.event_repo = EventRepository(self.db_manager)
+        self.keyframe_repo = KeyframeRepository(self.db_manager)
+        
+        # Initialize processing components
+        self.object_detector = None
+        if self.config.enable_object_detection:
+            try:
+                self.object_detector = ObjectDetector(self.config)
+                logger.info("✅ Object detection enabled for live stream")
+            except Exception as e:
+                logger.warning(f"⚠️ Object detection initialization failed: {e}")
+                self.config.enable_object_detection = False
+        
+        self.behavior_analyzer = None
+        if getattr(self.config, 'enable_behavior_analysis', False):
+            try:
+                self.behavior_analyzer = BehaviorAnalysisIntegrator(self.config)
+                logger.info("✅ Behavior analysis enabled for live stream")
+            except Exception as e:
+                logger.warning(f"⚠️ Behavior analysis initialization failed: {e}")
+                self.config.enable_behavior_analysis = False
+        
+        # Initialize facial recognition if enabled
+        self.face_recognizer = None
+        if getattr(self.config, 'enable_facial_recognition', False):
+            try:
+                from facial_recognition import FacialRecognitionIntegrated
+                self.face_recognizer = FacialRecognitionIntegrated(self.config)
+                logger.info("✅ Facial recognition enabled for live stream")
+            except Exception as e:
+                logger.warning(f"⚠️ Facial recognition initialization failed: {e}")
+        
+        # Frame buffer for behavior analysis (needs 16 frames)
+        self.frame_buffer = []
+        self.buffer_size = 16
+        
+        # Motion detection
+        self.prev_frame_gray = None
+        self.motion_threshold = 25
+        
+        # Real-time alert engine
+        self.alert_engine = None
+        if ALERTS_AVAILABLE:
+            try:
+                self.alert_engine = get_alert_engine()
+                self.alert_engine.load_flagged_persons()
+                logger.info("✅ Real-time alert engine connected for live stream")
+            except Exception as e:
+                logger.warning(f"⚠️ Alert engine initialization failed: {e}")
+        
+        # Statistics
+        self.stats = {
+            'frames_processed': 0,
+            'keyframes_extracted': 0,
+            'objects_detected': 0,
+            'behaviors_detected': 0,
+            'events_created': 0,
+            'alerts_generated': 0,
+            'start_time': None
+        }
+        
+        logger.info(f"✅ Live stream processor initialized for camera: {camera_id}")
+    
+    def preprocess_frame(self, frame: np.ndarray) -> Optional[np.ndarray]:
+        """
+        Preprocess frame: resize, enhance, check quality
+        
+        Args:
+            frame: Input frame from camera
+            
+        Returns:
+            Preprocessed frame or None if frame is too blurry
+        """
+        if frame is None:
+            return None
+        
+        # Resize to standard size for processing
+        target_size = (640, 640)
+        processed = cv2.resize(frame, target_size)
+        
+        # Check for blur using Laplacian variance
+        gray = cv2.cvtColor(processed, cv2.COLOR_BGR2GRAY)
+        laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
+        
+        # Skip blurry frames
+        if laplacian_var < 100:
+            return None
+        
+        return processed
+    
+    def detect_motion(self, frame_gray: np.ndarray) -> Tuple[bool, float]:
+        """
+        Detect motion in frame
+        
+        Args:
+            frame_gray: Grayscale frame
+            
+        Returns:
+            (motion_detected, motion_score)
+        """
+        if self.prev_frame_gray is None:
+            self.prev_frame_gray = frame_gray
+            return False, 0.0
+        
+        diff = cv2.absdiff(self.prev_frame_gray, frame_gray)
+        self.prev_frame_gray = frame_gray
+        
+        motion_score = np.sum(diff > self.motion_threshold)
+        motion_detected = motion_score > 5000
+        
+        return motion_detected, float(motion_score)
+    
+    def process_frame(self, frame: np.ndarray, timestamp: float) -> Dict[str, Any]:
+        """
+        Process a single frame through the pipeline
+        
+        Args:
+            frame: Input frame
+            timestamp: Frame timestamp in seconds
+            
+        Returns:
+            Processing results dictionary
+        """
+        results = {
+            'timestamp': timestamp,
+            'frame_count': self.frame_count,
+            'objects_detected': [],
+            'behaviors_detected': [],
+            'motion_detected': False,
+            'motion_score': 0.0,
+            'events': []
+        }
+        
+        # Preprocess frame
+        processed_frame = self.preprocess_frame(frame)
+        if processed_frame is None:
+            return results
+        
+        # Detect motion
+        gray = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2GRAY)
+        motion_detected, motion_score = self.detect_motion(gray)
+        results['motion_detected'] = motion_detected
+        results['motion_score'] = motion_score
+        
+        # Add to frame buffer for behavior analysis
+        self.frame_buffer.append(processed_frame.copy())
+        if len(self.frame_buffer) > self.buffer_size:
+            self.frame_buffer.pop(0)
+        
+        # Object detection (run on every frame with motion, or periodically)
+        # For real-time display, we want detections to show immediately
+        should_run_detection = motion_detected or (self.frame_count % 30 == 0)  # Every 30 frames or on motion
+        
+        if self.object_detector and should_run_detection:
+            try:
+                # Create a temporary keyframe-like object
+                from core.video_processing import KeyframeResult, FrameData
+                frame_data = FrameData(
+                    frame_path=None,  # Live frame, no file path
+                    timestamp=timestamp,
+                    frame_index=self.frame_count
+                )
+                keyframe = KeyframeResult(
+                    frame_data=frame_data,
+                    quality_score=0.8,
+                    is_keyframe=True
+                )
+                
+                # Store frame temporarily for detection
+                import tempfile
+                temp_dir = tempfile.gettempdir()
+                temp_frame_path = os.path.join(temp_dir, f"live_frame_{self.camera_id}_{self.frame_count}.jpg")
+                cv2.imwrite(temp_frame_path, processed_frame)
+                keyframe.frame_data.frame_path = temp_frame_path
+                
+                # Run object detection
+                detection_result = self.object_detector.detect_objects_in_keyframes([keyframe])
+                if detection_result and len(detection_result) > 0:
+                    detections = detection_result[0]
+                    if hasattr(detections, 'total_detections') and detections.total_detections > 0:
+                        results['objects_detected'] = [
+                            {
+                                'class': det.class_name,
+                                'confidence': float(det.confidence),
+                                'bbox': det.bbox
+                            }
+                            for det in detections.detections
+                        ]
+                        self.stats['objects_detected'] += len(results['objects_detected'])
+                        
+                        # Log detections in real-time
+                        obj_classes = [obj['class'] for obj in results['objects_detected']]
+                        logger.info(f"🎯 REAL-TIME DETECTION: {len(results['objects_detected'])} object(s) detected: {', '.join(obj_classes)} (frame {self.frame_count})")
+                        
+                        # Generate real-time alerts for each detection
+                        if self.alert_engine:
+                            for det in results['objects_detected']:
+                                alert = self.alert_engine.process_detection(
+                                    camera_id=self.camera_id,
+                                    detection_class=det['class'],
+                                    confidence=det['confidence'],
+                                    bounding_boxes=[det],
+                                    frame=processed_frame,
+                                    timestamp=timestamp,
+                                    video_id=f"live_{self.camera_id}",
+                                )
+                                if alert:
+                                    self.stats['alerts_generated'] = self.stats.get('alerts_generated', 0) + 1
+                
+                # Clean up temp file
+                try:
+                    os.remove(temp_frame_path)
+                except:
+                    pass
+                    
+            except Exception as e:
+                logger.warning(f"Error in object detection: {e}")
+        
+        # Behavior analysis (on frame buffer) - use frame buffer method for live streams
+        if self.behavior_analyzer and len(self.frame_buffer) >= 16 and motion_detected:
+            try:
+                # Use frame buffer method for live streams (no video file needed)
+                behavior_results = self.behavior_analyzer.detect_behavior_in_segment_from_buffer(
+                    frame_buffer=self.frame_buffer,
+                    start_time=timestamp - (len(self.frame_buffer) / 30.0),  # Approximate start time
+                    end_time=timestamp,
+                    frame_indices=list(range(max(0, self.frame_count - len(self.frame_buffer) + 1), self.frame_count + 1))
+                )
+                
+                if behavior_results:
+                    results['behaviors_detected'] = [
+                        {
+                            'behavior_type': r.behavior_detected,  # Use behavior_type for consistency
+                            'behavior': r.behavior_detected,  # Keep both for compatibility
+                            'confidence': float(r.confidence),
+                            'model': r.model_used
+                        }
+                        for r in behavior_results
+                    ]
+                    self.stats['behaviors_detected'] += len(results['behaviors_detected'])
+                    
+                    # Log behaviors in real-time
+                    behavior_types = [b['behavior_type'] for b in results['behaviors_detected']]
+                    logger.info(f"🎭 REAL-TIME BEHAVIOR: {len(results['behaviors_detected'])} behavior(s) detected: {', '.join(behavior_types)} (frame {self.frame_count})")
+                    
+                    # Generate real-time alerts for each behavior
+                    if self.alert_engine:
+                        for beh in results['behaviors_detected']:
+                            alert = self.alert_engine.process_detection(
+                                camera_id=self.camera_id,
+                                detection_class=beh['behavior_type'],
+                                confidence=beh['confidence'],
+                                frame=processed_frame,
+                                timestamp=timestamp,
+                                video_id=f"live_{self.camera_id}",
+                            )
+                            if alert:
+                                self.stats['alerts_generated'] = self.stats.get('alerts_generated', 0) + 1
+            except Exception as e:
+                logger.warning(f"Error in behavior analysis: {e}")
+        
+        # Facial recognition on suspicious frames
+        if self.face_recognizer and (results['objects_detected'] or results['behaviors_detected']):
+            try:
+                # Process frame for facial recognition
+                face_results = self.face_recognizer.detect_faces_in_frame(
+                    processed_frame,
+                    frame_number=self.frame_count,
+                    timestamp=timestamp,
+                    event_id=f"live_{self.camera_id}_{int(timestamp)}"
+                )
+                if face_results:
+                    results['faces_detected'] = len(face_results)
+                    
+                    # Check for suspicious person re-appearance
+                    if self.alert_engine:
+                        for face in face_results:
+                            face_id = face.get('face_id') if isinstance(face, dict) else getattr(face, 'face_id', None)
+                            match_score = face.get('confidence', 0.0) if isinstance(face, dict) else getattr(face, 'confidence_score', 0.0)
+                            if face_id and match_score:
+                                alert = self.alert_engine.process_suspicious_person(
+                                    camera_id=self.camera_id,
+                                    face_id=str(face_id),
+                                    face_match_score=float(match_score),
+                                    frame=processed_frame,
+                                    timestamp=timestamp,
+                                )
+                                if alert:
+                                    self.stats['alerts_generated'] = self.stats.get('alerts_generated', 0) + 1
+            except Exception as e:
+                logger.warning(f"Error in facial recognition: {e}")
+        
+        return results
+    
+    def save_keyframe(self, frame: np.ndarray, results: Dict[str, Any], timestamp: float) -> Optional[str]:
+        """
+        Save keyframe to MinIO and MongoDB (matches uploaded video pipeline)
+        
+        Args:
+            frame: Frame to save
+            results: Processing results
+            timestamp: Frame timestamp
+            
+        Returns:
+            MinIO object path or None
+        """
+        try:
+            # Encode frame as JPEG (same as uploaded video pipeline)
+            is_success, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
+            if not is_success:
+                logger.warning(f"⚠️ Failed to encode frame {self.frame_count} as JPEG")
+                return None
+            
+            frame_bytes = buffer.tobytes()
+            frame_size = len(frame_bytes)
+            
+            # Generate object name (consistent with uploaded video pipeline)
+            timestamp_str = datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
+            object_name = f"live/{self.camera_id}/{timestamp_str}.jpg"
+            
+            # Upload to MinIO (same method as uploaded video pipeline)
+            minio_client = self.keyframe_repo.minio  # Use minio client from keyframe repository
+            bucket = self.keyframe_repo.bucket  # Use bucket from keyframe repository
+            
+            logger.info(f"📤 Uploading keyframe to MinIO: {bucket}/{object_name} ({frame_size} bytes)")
+            
+            # Use BytesIO for in-memory upload (same as uploaded video pipeline)
+            from io import BytesIO
+            frame_buffer = BytesIO(frame_bytes)
+            
+            # Add metadata like uploaded video pipeline
+            metadata = {
+                "frame_index": str(self.frame_count),
+                "timestamp": str(timestamp),
+                "camera_id": self.camera_id,
+                "motion_detected": str(results.get('motion_detected', False)),
+                "motion_score": str(results.get('motion_score', 0.0))
+            }
+            
+            minio_client.put_object(
+                bucket,
+                object_name,
+                frame_buffer,
+                length=frame_size,
+                content_type="image/jpeg",
+                metadata=metadata
+            )
+            
+            logger.info(f"✅ Uploaded keyframe to MinIO: {bucket}/{object_name}")
+            
+            # Save to MongoDB (same as uploaded video pipeline)
+            keyframe_doc = {
+                "camera_id": self.camera_id,
+                "video_id": f"live_{self.camera_id}",  # Use consistent video_id format
+                "timestamp": timestamp,
+                "timestamp_ms": int(timestamp * 1000),
+                "frame_index": self.frame_count,
+                "frame_number": self.frame_count,  # Also include frame_number for consistency
+                "minio_path": object_name,
+                "minio_bucket": bucket,
+                "objects_detected": results.get('objects_detected', []),
+                "behaviors_detected": results.get('behaviors_detected', []),
+                "motion_detected": results.get('motion_detected', False),
+                "motion_score": results.get('motion_score', 0.0),
+                "created_at": datetime.utcnow()
+            }
+            
+            # Use create_keyframe method (same as uploaded video pipeline)
+            keyframe_id = self.keyframe_repo.create_keyframe(keyframe_doc)
+            if keyframe_id:
+                logger.info(f"✅ Saved keyframe metadata to MongoDB: {object_name} (ID: {keyframe_id})")
+            else:
+                logger.warning(f"⚠️ Failed to save keyframe metadata to MongoDB: {object_name}")
+            
+            self.stats['keyframes_extracted'] += 1
+            
+            # Return full path for URL generation
+            return f"{bucket}/{object_name}"
+            
+        except Exception as e:
+            logger.error(f"❌ Error saving keyframe: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return None
+    
+    def create_event(self, results: Dict[str, Any], start_time: float, end_time: float) -> Optional[str]:
+        """
+        Create event from processing results (matches uploaded video pipeline)
+        
+        Args:
+            results: Processing results
+            start_time: Event start time
+            end_time: Event end time
+            
+        Returns:
+            Event ID or None
+        """
+        try:
+            # Determine event type based on detections (same logic as uploaded video pipeline)
+            event_type = "motion"
+            if results.get('objects_detected'):
+                # Get the primary object class for event type
+                primary_object = results['objects_detected'][0].get('class', 'object')
+                event_type = f"object_detection_{primary_object}"
+            elif results.get('behaviors_detected'):
+                primary_behavior = results['behaviors_detected'][0].get('behavior_type', 'behavior')
+                event_type = f"behavior_detection_{primary_behavior}"
+            
+            # Calculate confidence from detections (same as uploaded video pipeline)
+            confidences = []
+            if results.get('objects_detected'):
+                confidences.extend([float(r.get('confidence', 0.0)) for r in results['objects_detected']])
+            if results.get('behaviors_detected'):
+                confidences.extend([float(r.get('confidence', 0.0)) for r in results['behaviors_detected']])
+            max_confidence = max(confidences) if confidences else 0.0
+            
+            # Build bounding boxes structure (same format as uploaded video pipeline)
+            bounding_boxes = {}
+            if results.get('objects_detected'):
+                bounding_boxes["detections"] = [
+                    {
+                        "class": det.get('class', 'unknown'),
+                        "confidence": float(det.get('confidence', 0.0)),
+                        "bbox": [float(x) for x in det.get('bbox', [0, 0, 0, 0])],
+                        "timestamp": float(start_time),
+                        "model": det.get('detection_model', 'fire' if det.get('class') == 'fire' else 'weapon')
+                    
+                    }
+                    for det in results['objects_detected']
+                ]
+            
+            # Create event document (matches uploaded video pipeline schema)
+            event_doc = {
+                "event_id": f"live_{self.camera_id}_{int(start_time)}_{uuid.uuid4().hex[:8]}",
+                "camera_id": self.camera_id,
+                "video_id": f"live_{self.camera_id}",  # Use camera_id as video_id for live streams
+                "event_type": event_type,
+                "start_timestamp": start_time,
+                "end_timestamp": end_time,
+                "start_timestamp_ms": int(start_time * 1000),
+                "end_timestamp_ms": int(end_time * 1000),
+                "confidence": max_confidence,
+                "confidence_score": max_confidence,  # Also include confidence_score for schema compliance
+                "description": f"Live stream event: {event_type} detected",
+                "bounding_boxes": bounding_boxes,
+                "metadata": {
+                    "camera_id": self.camera_id,
+                    "objects_detected": results.get('objects_detected', []),
+                    "behaviors_detected": results.get('behaviors_detected', []),
+                    "motion_score": results.get('motion_score', 0.0),
+                    "source": "live_stream"
+                }
+            }
+            
+            logger.info(f"📝 Creating event: {event_type} (confidence: {max_confidence:.2f})")
+            event_id = self.event_repo.create_event(event_doc)
+            
+            if event_id:
+                logger.info(f"✅ Created event in MongoDB: {event_doc['event_id']} (MongoDB ID: {event_id})")
+                self.stats['events_created'] += 1
+            else:
+                logger.warning(f"⚠️ Failed to create event in MongoDB: {event_doc['event_id']}")
+            
+            return event_id
+            
+        except Exception as e:
+            logger.error(f"❌ Error creating event: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return None
+    
+    def generate_frames(self, camera_index: int = 0):
+        """
+        Generator function for video frames with processing
+        
+        Args:
+            camera_index: Camera device index (0 for default webcam)
+            
+        Yields:
+            Processed frame bytes for streaming
+        """
+        # Release any existing camera connection
+        if self.cap is not None:
+            try:
+                self.cap.release()
+            except:
+                pass
+        
+        # Try to open camera with retries
+        max_retries = 3
+        self.cap = None
+        
+        for attempt in range(max_retries):
+            try:
+                logger.info(f"Attempting to open camera {camera_index} (attempt {attempt + 1}/{max_retries})")
+                self.cap = cv2.VideoCapture(camera_index)
+                
+                # Give camera time to initialize
+                time.sleep(0.5)
+                
+                if self.cap.isOpened():
+                    # Test if we can actually read a frame
+                    ret, test_frame = self.cap.read()
+                    if ret and test_frame is not None:
+                        logger.info(f"✅ Successfully opened camera {camera_index}")
+                        break
+                    else:
+                        logger.warning(f"Camera {camera_index} opened but cannot read frames")
+                        self.cap.release()
+                        self.cap = None
+                else:
+                    logger.warning(f"Camera {camera_index} failed to open")
+                    if self.cap:
+                        self.cap.release()
+                        self.cap = None
+            except Exception as e:
+                logger.error(f"Error opening camera {camera_index}: {e}")
+                if self.cap:
+                    try:
+                        self.cap.release()
+                    except:
+                        pass
+                    self.cap = None
+        
+        if self.cap is None or not self.cap.isOpened():
+            error_msg = f"❌ Could not open camera {camera_index} after {max_retries} attempts"
+            logger.error(error_msg)
+            # Yield an error frame
+            error_frame = self._create_error_frame(error_msg)
+            ret, buffer = cv2.imencode('.jpg', error_frame)
+            if ret:
+                yield (b'--frame\r\n'
+                       b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')
+            return
+        
+        # Set camera properties
+        try:
+            self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
+            self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
+            self.cap.set(cv2.CAP_PROP_FPS, 30)
+            # Set buffer size to reduce latency
+            self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+        except Exception as e:
+            logger.warning(f"Could not set camera properties: {e}")
+        
+        self.is_processing = True
+        self.stats['start_time'] = time.time()
+        self.frame_count = 0
+        self.last_keyframe_time = time.time()
+        
+        logger.info(f"🎥 Started live stream processing for camera {camera_index}")
+        logger.info(f"📊 Camera properties: {self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)}x{self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)} @ {self.cap.get(cv2.CAP_PROP_FPS)} FPS")
+        logger.info(f"🔄 Entering frame generation loop...")
+        
+        current_event_start = None
+        event_results = None
+        
+        try:
+            consecutive_failures = 0
+            max_failures = 10
+            while self.is_processing:
+                ret, frame = self.cap.read()
+                if not ret or frame is None:
+                    consecutive_failures += 1
+                    if consecutive_failures >= max_failures:
+                        logger.error(f"❌ Failed to read {max_failures} consecutive frames from camera")
+                        break
+                    logger.warning(f"⚠️ Failed to read frame from camera (failure {consecutive_failures}/{max_failures})")
+                    time.sleep(0.1)  # Brief pause before retry
+                    continue
+                
+                consecutive_failures = 0  # Reset on success
+                self.frame_count += 1
+                self.stats['frames_processed'] += 1
+                
+                if self.frame_count == 1:
+                    logger.info(f"✅ Successfully read first frame! Frame shape: {frame.shape}")
+                current_time = time.time()
+                timestamp = current_time - self.stats['start_time']
+                
+                # Process frame
+                results = self.process_frame(frame, timestamp)
+                
+                # Extract keyframe periodically or on significant events
+                should_extract_keyframe = (
+                    (current_time - self.last_keyframe_time >= self.keyframe_interval) or
+                    results.get('objects_detected') or
+                    results.get('behaviors_detected')
+                )
+                
+                if should_extract_keyframe:
+                    self.save_keyframe(frame, results, timestamp)
+                    self.last_keyframe_time = current_time
+                
+                # Track events
+                if results.get('objects_detected') or results.get('behaviors_detected'):
+                    if current_event_start is None:
+                        current_event_start = timestamp
+                        event_results = results
+                    else:
+                        # Update event results
+                        event_results['objects_detected'].extend(results.get('objects_detected', []))
+                        event_results['behaviors_detected'].extend(results.get('behaviors_detected', []))
+                else:
+                    # End event if it exists
+                    if current_event_start is not None:
+                        self.create_event(event_results, current_event_start, timestamp)
+                        current_event_start = None
+                        event_results = None
+                
+                # Draw annotations on frame
+                annotated_frame = self.annotate_frame(frame, results)
+                
+                # Encode frame for streaming
+                ret, buffer = cv2.imencode('.jpg', annotated_frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
+                if ret:
+                    frame_bytes = buffer.tobytes()
+                    if self.frame_count % 30 == 0:  # Log every 30 frames
+                        logger.debug(f"📹 Yielding frame {self.frame_count} ({len(frame_bytes)} bytes)")
+                    yield (b'--frame\r\n'
+                           b'Content-Type: image/jpeg\r\n\r\n' + frame_bytes + b'\r\n')
+                else:
+                    logger.warning(f"⚠️ Failed to encode frame {self.frame_count}")
+                
+                # Small delay to control frame rate
+                time.sleep(0.033)  # ~30 FPS
+                
+        except Exception as e:
+            logger.error(f"Error in frame generation: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+        finally:
+            self.stop()
+    
+    def _create_error_frame(self, error_message: str) -> np.ndarray:
+        """Create an error frame to display when camera fails"""
+        frame = np.zeros((480, 640, 3), dtype=np.uint8)
+        frame.fill(20)  # Dark background
+        
+        # Add error text
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        text = "Camera Error"
+        text_size = cv2.getTextSize(text, font, 1, 2)[0]
+        text_x = (640 - text_size[0]) // 2
+        text_y = 200
+        cv2.putText(frame, text, (text_x, text_y), font, 1, (0, 0, 255), 2)
+        
+        # Add error message (split if too long)
+        msg_lines = error_message.split(' ')
+        line = ""
+        y_offset = 250
+        for word in msg_lines:
+            test_line = line + word + " "
+            test_size = cv2.getTextSize(test_line, font, 0.6, 1)[0]
+            if test_size[0] > 600:
+                cv2.putText(frame, line, (20, y_offset), font, 0.6, (255, 255, 255), 1)
+                line = word + " "
+                y_offset += 30
+            else:
+                line = test_line
+        if line:
+            cv2.putText(frame, line, (20, y_offset), font, 0.6, (255, 255, 255), 1)
+        
+        return frame
+    
+    def annotate_frame(self, frame: np.ndarray, results: Dict[str, Any]) -> np.ndarray:
+        """
+        Draw annotations on frame (detections, behaviors, etc.) - matches uploaded video pipeline
+        
+        Args:
+            frame: Input frame
+            results: Processing results
+            
+        Returns:
+            Annotated frame
+        """
+        annotated = frame.copy()
+        
+        # Draw object detections with color coding (same as uploaded video pipeline)
+        for obj in results.get('objects_detected', []):
+            bbox = obj.get('bbox', [0, 0, 100, 100])
+            class_name = obj.get('class', 'object')
+            confidence = float(obj.get('confidence', 0.0))
+            
+            x1, y1, x2, y2 = map(int, bbox)
+            
+            # Color coding based on object class (same as uploaded video pipeline)
+            color_map = {
+                'fire': (255, 255, 0),    # Cyan/Blue (BGR)
+                'knife': (0, 255, 255),   # Yellow (BGR)
+                'gun': (0, 255, 0),       # Green (BGR)
+                'smoke': (128, 128, 128)  # Gray (BGR)
+            }
+            color = color_map.get(class_name.lower(), (0, 0, 255))  # Default red
+            
+            # Draw bounding box with thicker line for visibility
+            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 3)
+            
+            # Draw label with background (same style as uploaded video pipeline)
+            label = f"{class_name}: {confidence:.2f}"
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            font_scale = 0.6
+            thickness = 2
+            label_size, baseline = cv2.getTextSize(label, font, font_scale, thickness)
+            
+            # Draw label background
+            cv2.rectangle(annotated, 
+                         (x1, y1 - label_size[1] - 10), 
+                         (x1 + label_size[0], y1), 
+                         color, -1)
+            
+            # Draw label text
+            cv2.putText(annotated, label, (x1, y1 - 5),
+                       font, font_scale, (255, 255, 255), thickness)
+        
+        # Draw behavior detections (same style as uploaded video pipeline)
+        behavior_y_offset = 30
+        for behavior in results.get('behaviors_detected', []):
+            behavior_type = behavior.get('behavior_type', behavior.get('behavior', 'unknown'))
+            confidence = float(behavior.get('confidence', 0.0))
+            label = f"{behavior_type.upper()}: {confidence:.2f}"
+            
+            # Color coding for behaviors
+            behavior_colors = {
+                'fighting': (0, 0, 255),      # Red
+                'road_accident': (0, 165, 255),  # Orange
+                'wallclimb': (255, 0, 255)   # Magenta
+            }
+            behavior_color = behavior_colors.get(behavior_type.lower(), (0, 255, 0))  # Default green
+            
+            # Draw behavior label with background
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            font_scale = 0.7
+            thickness = 2
+            label_size, baseline = cv2.getTextSize(label, font, font_scale, thickness)
+            
+            # Background for behavior label
+            cv2.rectangle(annotated, 
+                         (10, behavior_y_offset - label_size[1] - 5), 
+                         (10 + label_size[0], behavior_y_offset + 5), 
+                         behavior_color, -1)
+            
+            cv2.putText(annotated, label, (10, behavior_y_offset),
+                       font, font_scale, (255, 255, 255), thickness)
+            behavior_y_offset += 35
+        
+        # Draw motion indicator (if motion detected)
+        if results.get('motion_detected'):
+            motion_label = f"MOTION: {results.get('motion_score', 0.0):.0f}"
+            cv2.putText(annotated, motion_label, (10, behavior_y_offset),
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
+            behavior_y_offset += 30
+        
+        # Draw face detection indicator
+        if results.get('faces_detected', 0) > 0:
+            face_label = f"FACES: {results['faces_detected']}"
+            cv2.putText(annotated, face_label, (10, behavior_y_offset),
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 192, 203), 2)
+            behavior_y_offset += 30
+        
+        # Draw stats at bottom (same as uploaded video pipeline)
+        stats_text = f"Frame: {self.frame_count} | Objects: {len(results.get('objects_detected', []))} | Events: {self.stats['events_created']}"
+        cv2.putText(annotated, stats_text, (10, annotated.shape[0] - 10),
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        
+        return annotated
+    
+    def stop(self):
+        """Stop processing and release resources"""
+        self.is_processing = False
+        if self.cap:
+            self.cap.release()
+        logger.info("🛑 Live stream processing stopped")
+    
+    def get_stats(self) -> Dict[str, Any]:
+        """Get processing statistics"""
+        runtime = time.time() - self.stats['start_time'] if self.stats['start_time'] else 0
+        return {
+            **self.stats,
+            'runtime_seconds': runtime,
+            'fps': self.stats['frames_processed'] / runtime if runtime > 0 else 0,
+            'is_processing': self.is_processing
+        }
+
+
+# Global processor instances (one per camera)
+_live_processors = {}
+
+
+def get_live_processor(camera_id: str = "webcam_01", config: VideoProcessingConfig = None) -> LiveStreamProcessor:
+    """Get or create a live stream processor for a camera"""
+    if camera_id not in _live_processors:
+        _live_processors[camera_id] = LiveStreamProcessor(config, camera_id)
+    return _live_processors[camera_id]
+
+
+def stop_live_processor(camera_id: str):
+    """Stop and remove a live stream processor"""
+    if camera_id in _live_processors:
+        _live_processors[camera_id].stop()
+        del _live_processors[camera_id]
+
diff --git a/main_pipeline.py b/main_pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cc2346a4c390fdcf5f4ede1a52cc8c832301e9c
--- /dev/null
+++ b/main_pipeline.py
@@ -0,0 +1,665 @@
+"""
+DetectifAI Complete Video Processing Pipeline
+
+This is the main pipeline that orchestrates all DetectifAI components:
+- Optimized video processing with selective frame enhancement
+- DetectifAI event detection and security analysis
+- Object detection with fire/weapon recognition
+- Event aggregation and threat assessment
+- Highlight reel generation for security incidents
+- Compression and comprehensive reporting
+- API integration for real-time frontend updates
+"""
+
+import os
+import time
+import logging
+from typing import Dict, List, Any, Tuple, Optional
+from datetime import datetime
+import json
+
+# Import all components
+from config import VideoProcessingConfig, get_security_focused_config, get_high_recall_config
+from core.video_processing import OptimizedVideoProcessor
+from event_aggregation import EventDetector, EventDeduplicationEngine
+from video_segmentation import VideoSegmentationEngine
+from highlight_reel import HighlightReelGenerator
+from video_compression import VideoCompressor
+from json_reports import ReportGenerator
+from object_detection import ObjectDetectionIntegrator
+from behavior_analysis_integrator import BehaviorAnalysisIntegrator
+from video_captioning_integrator import VideoCaptioningIntegrator
+
+from detectifai_events import DetectifAIEventType, ThreatLevel
+
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        logging.FileHandler('logs/detectifai_pipeline.log')
+    ]
+)
+logger = logging.getLogger(__name__)
+
+class CompleteVideoProcessingPipeline:
+    """Complete video processing pipeline orchestrating all components"""
+    
+    def __init__(self, config: VideoProcessingConfig = None, db_manager=None):
+        """
+        Initialize the complete processing pipeline
+        
+        Args:
+            config: VideoProcessingConfig object, uses default if None
+            db_manager: Optional DatabaseManager for MongoDB integration
+        """
+        self.config = config or VideoProcessingConfig()
+        self.db_manager = db_manager
+        self.processing_stats = {
+            'start_time': None,
+            'end_time': None,
+            'total_processing_time': 0,
+            'component_times': {},
+            'memory_usage': {},
+            'errors': []
+        }
+        
+        # Initialize components
+        logger.info("Initializing video processing pipeline components")
+        
+        try:
+            self.video_processor = OptimizedVideoProcessor(self.config)
+            self.event_detector = EventDetector(self.config)
+            self.deduplication_engine = EventDeduplicationEngine(self.config)
+            self.segmentation_engine = VideoSegmentationEngine(self.config)
+            self.highlight_generator = HighlightReelGenerator(self.config)
+            self.compressor = VideoCompressor(self.config)
+            self.report_generator = ReportGenerator(self.config)
+            self.object_detector = ObjectDetectionIntegrator(self.config)
+            
+            # Initialize behavior analyzer if enabled
+            self.behavior_analyzer = None
+            if getattr(self.config, 'enable_behavior_analysis', False):
+                try:
+                    self.behavior_analyzer = BehaviorAnalysisIntegrator(self.config)
+                    logger.info("✅ Behavior analysis enabled")
+                except Exception as e:
+                    logger.warning(f"⚠️ Behavior analysis initialization failed: {e}")
+                    self.config.enable_behavior_analysis = False
+            
+            # Initialize video captioning if enabled
+            self.video_captioning = None
+            if getattr(self.config, 'enable_video_captioning', False):
+                try:
+                    self.video_captioning = VideoCaptioningIntegrator(self.config, db_manager=db_manager)
+                    logger.info("✅ Video captioning enabled (MongoDB + FAISS)")
+                except Exception as e:
+                    logger.warning(f"⚠️ Video captioning initialization failed: {e}")
+                    self.config.enable_video_captioning = False
+            
+            logger.info("✅ All pipeline components initialized successfully")
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to initialize pipeline components: {e}")
+            raise
+    
+    def process_video_complete(self, video_path: str, output_name: str = None) -> Dict[str, Any]:
+        """
+        Process video through complete pipeline
+        
+        Args:
+            video_path: Path to input video file
+            output_name: Optional custom output name (uses video filename if None)
+            
+        Returns:
+            Dictionary containing all processing results and output paths
+        """
+        logger.info(f"🚀 Starting complete video processing pipeline")
+        logger.info(f"📁 Input video: {video_path}")
+        
+        if not os.path.exists(video_path):
+            raise FileNotFoundError(f"Video file not found: {video_path}")
+        
+        # Initialize processing stats
+        self.processing_stats['start_time'] = time.time()
+        
+        # Prepare output naming
+        if output_name is None:
+            output_name = os.path.splitext(os.path.basename(video_path))[0]
+        
+        results = {
+            'input_video': video_path,
+            'output_name': output_name,
+            'config_used': self.config.__dict__.copy(),
+            'processing_stats': self.processing_stats,
+            'outputs': {}
+        }
+        # Ensure there is a concrete output directory for this run so downstream
+        # steps (annotated video creation, reports, etc.) can write reliably.
+        output_dir = os.path.join(self.config.output_base_dir, output_name)
+        os.makedirs(output_dir, exist_ok=True)
+        results['outputs']['output_directory'] = output_dir
+        
+        try:
+            # Step 1: Extract keyframes with adaptive enhancement
+            logger.info("🎬 Step 1: Extracting keyframes with adaptive enhancement...")
+            step_start = time.time()
+            
+            keyframes = self.video_processor.extract_keyframes(video_path)
+            
+            self.processing_stats['component_times']['keyframe_extraction'] = time.time() - step_start
+            results['outputs']['total_keyframes'] = len(keyframes)
+            
+            logger.info(f"✅ Extracted {len(keyframes)} keyframes")
+            
+            # Step 2: Create video segments
+            logger.info("📊 Step 2: Creating video segments...")
+            step_start = time.time()
+            
+            segments = self.segmentation_engine.create_video_segments(video_path, keyframes)
+            
+            self.processing_stats['component_times']['segmentation'] = time.time() - step_start
+            results['outputs']['total_segments'] = len(segments)
+            
+            logger.info(f"✅ Created {len(segments)} video segments")
+            
+            # Step 3: Object Detection (if enabled)
+            detection_results = []
+            object_events = []
+            if self.config.enable_object_detection:
+                logger.info("🎯 Step 3a: Running object detection...")
+                step_start = time.time()
+                
+                detection_results, object_events = self.object_detector.process_keyframes_with_object_detection(keyframes)
+                
+                self.processing_stats['component_times']['object_detection'] = time.time() - step_start
+                results['outputs']['total_object_detections'] = len(detection_results)
+                results['outputs']['total_object_events'] = len(object_events)
+                
+                logger.info(f"✅ Object detection complete: {len(object_events)} object-based events created")
+            
+            # Step 3b: Behavior Analysis (if enabled)
+            behavior_results = []
+            behavior_events = []
+            if self.config.enable_behavior_analysis and self.behavior_analyzer:
+                logger.info("🔍 Step 3b: Running behavior analysis...")
+                step_start = time.time()
+                
+                behavior_results, behavior_events = self.behavior_analyzer.process_keyframes_with_behavior_analysis(keyframes)
+                
+                self.processing_stats['component_times']['behavior_analysis'] = time.time() - step_start
+                results['outputs']['total_behavior_detections'] = len(behavior_results)
+                results['outputs']['total_behavior_events'] = len(behavior_events)
+                
+                logger.info(f"✅ Behavior analysis complete: {len(behavior_events)} behavior-based events created")
+            
+            # Step 3c: Video Captioning (if enabled)
+            captioning_results = {}
+            if self.config.enable_video_captioning and self.video_captioning:
+                logger.info("🎬 Step 3c: Running video captioning...")
+                step_start = time.time()
+                
+                captioning_results = self.video_captioning.process_keyframes_with_captioning(
+                    keyframes, 
+                    video_id=output_name
+                )
+                
+                self.processing_stats['component_times']['video_captioning'] = time.time() - step_start
+                results['outputs']['total_captions'] = captioning_results.get('total_captions', 0)
+                results['outputs']['captioning_enabled'] = captioning_results.get('enabled', False)
+                
+                logger.info(f"✅ Video captioning complete: {captioning_results.get('total_captions', 0)} captions generated")
+                logger.info(f"💾 Captions saved to MongoDB, embeddings saved to FAISS")
+            
+
+            
+            # Step 4: Detect motion-based events
+            logger.info("🎯 Step 4: Detecting motion-based events...")
+            step_start = time.time()
+            
+            motion_events = self.event_detector.detect_events(keyframes)
+            
+            self.processing_stats['component_times']['event_detection'] = time.time() - step_start
+            
+            # Convert object events to standard format and combine
+            standard_object_events = []
+            if object_events:
+                standard_object_events = self.event_detector.convert_object_events_to_standard_format(object_events)
+            
+            # Convert behavior events to standard format
+            standard_behavior_events = []
+            if behavior_events:
+                standard_behavior_events = self.event_detector.convert_behavior_events_to_standard_format(behavior_events)
+            
+            all_events = motion_events + standard_object_events + standard_behavior_events
+            results['outputs']['total_motion_events'] = len(motion_events)
+            results['outputs']['total_events'] = len(all_events)
+            
+            logger.info(f"✅ Detected {len(motion_events)} motion events + {len(object_events)} object events + {len(behavior_events)} behavior events = {len(all_events)} total events")
+            
+            # Step 4.5: DetectifAI Security Event Processing (includes facial recognition)
+            logger.info("🔍 Step 4.5: DetectifAI Security Event Processing...")
+            step_start = time.time()
+            
+            detectifai_events = []
+            facial_recognition_stats = {}
+            
+            try:
+                from detectifai_events import DetectifAIEventProcessor
+                
+                detectifai_processor = DetectifAIEventProcessor(self.config)
+                detectifai_events = detectifai_processor.process_security_events(
+                    keyframes=keyframes,
+                    motion_events=motion_events,
+                    object_events=object_events
+                )
+                
+                # Additional facial recognition processing if available
+                try:
+                    from facial_recognition import FacialRecognitionIntegrated
+                    
+                    if hasattr(self.config, 'enable_facial_recognition') and self.config.enable_facial_recognition:
+                        logger.info("👤 Processing facial recognition for suspicious activity frames...")
+                        face_detector = FacialRecognitionIntegrated(self.config)
+                        
+                        # Apply facial recognition ONLY to frames with suspicious activity (object detections)
+                        face_results = []
+                        suspicious_frames = []
+                        
+                        # Find frames with object detections (suspicious activity)
+                        suspicious_frames = []
+                        if detection_results:
+                            suspicious_frames.extend([result for result in detection_results if result.total_detections > 0])
+                        
+                        # Also find frames with behavior detections (suspicious activity)
+                        if behavior_results and self.behavior_analyzer:
+                            behavior_suspicious = self.behavior_analyzer.get_suspicious_frames(behavior_results)
+                            suspicious_frames.extend(behavior_suspicious)
+                            logger.info(f"🔍 Found {len(behavior_suspicious)} suspicious frames from behavior analysis")
+                        
+                        # Remove duplicates based on frame_path
+                        seen_paths = set()
+                        unique_suspicious_frames = []
+                        for frame in suspicious_frames:
+                            frame_path = frame.frame_path if hasattr(frame, 'frame_path') else getattr(frame, 'frame_path', None)
+                            if frame_path and frame_path not in seen_paths:
+                                seen_paths.add(frame_path)
+                                unique_suspicious_frames.append(frame)
+                        
+                        logger.info(f"👤 Applying facial recognition to {len(unique_suspicious_frames)} suspicious frames (from object detection + behavior analysis)")
+                        
+                        # Run face detection on suspicious frames only
+                        for suspicious_frame in unique_suspicious_frames:
+                            frame_path = suspicious_frame.frame_path if hasattr(suspicious_frame, 'frame_path') else getattr(suspicious_frame, 'frame_path', None)
+                            timestamp = suspicious_frame.timestamp if hasattr(suspicious_frame, 'timestamp') else getattr(suspicious_frame, 'timestamp', 0.0)
+                            
+                            if frame_path and os.path.exists(frame_path):
+                                face_result = face_detector.detect_faces_in_frame(
+                                    frame_path, 
+                                    timestamp
+                                )
+                                if face_result.faces_detected > 0:
+                                    face_results.append(face_result)
+                        
+                        # Track suspicious persons and detect re-occurrences
+                        if face_results:
+                            reoccurrence_events = face_detector.track_suspicious_persons(face_results, detectifai_events)
+                            
+                            # Convert re-occurrence events to DetectifAI format
+                            for reoccurrence in reoccurrence_events:
+                                # Create DetectifAI event from reoccurrence
+                                from detectifai_events import DetectifAIEvent, DetectifAIEventType, ThreatLevel
+                                
+                                detectifai_reoccurrence = DetectifAIEvent(
+                                    event_id=reoccurrence['event_id'],
+                                    event_type=DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE,
+                                    threat_level=ThreatLevel.HIGH,
+                                    start_timestamp=reoccurrence['start_timestamp'],
+                                    end_timestamp=reoccurrence['end_timestamp'],
+                                    confidence=reoccurrence.get('max_confidence', reoccurrence['confidence']),
+                                    keyframes=reoccurrence['keyframes'],
+                                    importance_score=reoccurrence.get('importance_score', 4.0),
+                                    detection_data={
+                                        'person_details': reoccurrence.get('detection_details', {}),
+                                        'placeholder': True
+                                    },
+                                    requires_immediate_response=True,
+                                    investigation_priority=5.0,
+                                    description=reoccurrence.get('description', 'Suspicious person re-occurrence detected'),
+                                    processing_timestamp=time.time(),
+                                    detection_model_used='facial_recognition_placeholder'
+                                )
+                                
+                                detectifai_events.append(detectifai_reoccurrence)
+                                logger.info(f"Added facial recognition event: {detectifai_reoccurrence.event_id}")
+                        
+                        facial_recognition_stats = face_detector.get_detection_stats()
+                
+                except ImportError:
+                    logger.info("Facial recognition module not available - skipping")
+                except Exception as e:
+                    logger.error(f"Error in facial recognition processing: {e}")
+                    facial_recognition_stats = {'error': str(e)}
+            
+            except ImportError:
+                logger.info("DetectifAI events module not available - using standard event processing")
+            
+            self.processing_stats['component_times']['detectifai_processing'] = time.time() - step_start
+            results['outputs']['detectifai_events'] = len(detectifai_events)
+            results['outputs']['facial_recognition_stats'] = facial_recognition_stats
+            
+            logger.info(f"✅ DetectifAI processing complete: {len(detectifai_events)} security events created")
+
+            # Step 5: Deduplicate events and create canonical events
+            logger.info("🔄 Step 5: Deduplicating events...")
+            step_start = time.time()
+            
+            canonical_events, dedup_stats = self.deduplication_engine.deduplicate_events(all_events)
+            
+            self.processing_stats['component_times']['deduplication'] = time.time() - step_start
+            results['outputs']['canonical_events'] = len(canonical_events)
+            results['outputs']['deduplication_stats'] = dedup_stats
+            
+            logger.info(f"✅ Created {len(canonical_events)} canonical events")
+            
+            # Step 5: Generate highlight reels (optional)
+            highlight_paths = {}
+            if self.config.generate_highlight_reels:
+                logger.info("🎥 Step 5: Generating highlight reels...")
+                step_start = time.time()
+                
+                highlight_paths = self._generate_all_highlight_reels(segments, canonical_events)
+                
+                self.processing_stats['component_times']['highlight_generation'] = time.time() - step_start
+                logger.info(f"✅ Generated {len(highlight_paths)} highlight reels")
+            else:
+                logger.info("⏭️ Step 5: Skipping highlight reel generation (disabled in config)")
+            
+            results['outputs']['highlight_reels'] = highlight_paths
+            
+            # Step 5.5: Create annotated video with bounding boxes (if detections exist)
+            annotated_video_path = None
+            if self.config.enable_object_detection and detection_results:
+                logger.info("🎨 Step 5.5: Creating annotated video with bounding boxes...")
+                step_start = time.time()
+                
+                try:
+                    # Create annotated video with detection bounding boxes
+                    annotated_output_path = os.path.join(
+                        results['outputs']['output_directory'], 
+                        f"{output_name}_annotated.mp4"
+                    )
+                    
+                    annotated_video_path = self.object_detector.create_annotated_video(
+                        video_path, 
+                        detection_results,
+                        annotated_output_path
+                    )
+                    
+                    self.processing_stats['component_times']['video_annotation'] = time.time() - step_start
+                    
+                    if annotated_video_path:
+                        logger.info(f"✅ Annotated video created: {annotated_video_path}")
+                        results['outputs']['annotated_video'] = annotated_video_path
+                    else:
+                        logger.warning("⚠️ Annotated video creation failed")
+                        
+                except Exception as e:
+                    logger.error(f"Error creating annotated video: {str(e)}")
+            
+            # Step 6: Compress video
+            if self.config.generate_compressed_video:
+                logger.info("🗜️  Step 6: Compressing video...")
+                step_start = time.time()
+                
+                # Compress the annotated video if it exists, otherwise compress original
+                video_to_compress = annotated_video_path if annotated_video_path else video_path
+                
+                compressed_path = self.compressor.compress_video(
+                    video_to_compress, 
+                    f"{output_name}_compressed.{self.config.video_output_format}"
+                )
+                
+                results['outputs']['compressed_video'] = compressed_path
+                self.processing_stats['component_times']['compression'] = time.time() - step_start
+                
+                if compressed_path:
+                    logger.info(f"✅ Video compressed: {compressed_path}")
+                else:
+                    logger.warning("⚠️ Video compression failed")
+            
+            # Step 7: Generate reports
+            logger.info("📋 Step 7: Generating reports...")
+            step_start = time.time()
+            
+            report_paths = self._generate_all_reports(keyframes, all_events, canonical_events, segments, detection_results, behavior_results)
+            results['outputs']['reports'] = report_paths
+            
+            self.processing_stats['component_times']['report_generation'] = time.time() - step_start
+            
+            logger.info(f"✅ Generated {len(report_paths)} reports")
+            
+            # Step 8: Save segment files
+            if self.config.generate_segments:
+                logger.info("💾 Step 8: Saving segment files...")
+                
+                segments_report_path = self.segmentation_engine.save_segments_metadata(
+                    segments, 
+                    os.path.join(self.config.output_base_dir, "reports", "video_segments.json")
+                )
+                
+                individual_segments_saved = self.segmentation_engine.save_individual_segment_files(segments)
+                
+                results['outputs']['segments_saved'] = individual_segments_saved
+                
+                logger.info("✅ Segment files saved")
+            
+            # Finalize processing stats
+            self.processing_stats['end_time'] = time.time()
+            self.processing_stats['total_processing_time'] = (
+                self.processing_stats['end_time'] - self.processing_stats['start_time']
+            )
+            
+            logger.info(f"🎉 PIPELINE COMPLETE!")
+            logger.info(f"⏱️  Total processing time: {self.processing_stats['total_processing_time']:.2f} seconds")
+            
+            return results
+            
+        except Exception as e:
+            logger.error(f"❌ Pipeline processing failed: {e}")
+            self.processing_stats['errors'].append(str(e))
+            raise
+    
+    def _generate_all_highlight_reels(self, segments: List, canonical_events: List) -> Dict[str, str]:
+        """Generate all types of highlight reels"""
+        highlight_paths = {}
+        
+        try:
+            # Event-aware highlight reel
+            event_aware_path = self.highlight_generator.create_event_aware_highlight_reel(
+                segments, canonical_events
+            )
+            if event_aware_path:
+                highlight_paths['event_aware'] = event_aware_path
+            
+            # Ultra-comprehensive highlight reel
+            comprehensive_path = self.highlight_generator.create_ultra_comprehensive_highlight_reel(segments)
+            if comprehensive_path:
+                highlight_paths['ultra_comprehensive'] = comprehensive_path
+            
+            # Quality-focused highlight reel
+            quality_path = self.highlight_generator.create_quality_focused_highlight_reel(segments)
+            if quality_path:
+                highlight_paths['quality_focused'] = quality_path
+                
+        except Exception as e:
+            logger.error(f"Error generating highlight reels: {e}")
+            self.processing_stats['errors'].append(f"Highlight generation error: {e}")
+        
+        return highlight_paths
+    
+    def _generate_all_reports(self, keyframes: List, events: List, 
+                            canonical_events: List, segments: List, 
+                            detection_results: List = None, behavior_results: List = None) -> Dict[str, str]:
+        """Generate all types of reports"""
+        report_paths = {}
+        
+        try:
+            # Processing results report (enhanced with object detection and behavior analysis)
+            processing_report = self.report_generator.generate_processing_results_report(
+                keyframes, events, canonical_events, segments, self.processing_stats, detection_results, behavior_results
+            )
+            if processing_report:
+                report_paths['processing_results'] = processing_report
+            
+            # Canonical events report
+            canonical_report = self.report_generator.generate_canonical_events_report(canonical_events)
+            if canonical_report:
+                report_paths['canonical_events'] = canonical_report
+            
+            # Segments report
+            segments_report = self.report_generator.generate_segments_report(segments)
+            if segments_report:
+                report_paths['video_segments'] = segments_report
+            
+            # Object detection report (if enabled)
+            if self.config.enable_object_detection and detection_results:
+                object_detection_report = self.report_generator.generate_object_detection_report(
+                    detection_results, self.object_detector.get_object_detection_summary()
+                )
+                if object_detection_report:
+                    report_paths['object_detection'] = object_detection_report
+            
+            # Behavior analysis report (if enabled)
+            if self.config.enable_behavior_analysis and behavior_results and self.behavior_analyzer:
+                behavior_analysis_report = self.report_generator.generate_behavior_analysis_report(
+                    behavior_results, self.behavior_analyzer.get_behavior_analysis_summary()
+                )
+                if behavior_analysis_report:
+                    report_paths['behavior_analysis'] = behavior_analysis_report
+            
+
+            
+            # HTML gallery (enhanced with object detection and behavior analysis)
+            if self.config.generate_html_gallery:
+                html_gallery = self.report_generator.generate_html_gallery(
+                    keyframes, canonical_events, segments, detection_results, behavior_results
+                )
+                if html_gallery:
+                    report_paths['html_gallery'] = html_gallery
+                    
+        except Exception as e:
+            logger.error(f"Error generating reports: {e}")
+            self.processing_stats['errors'].append(f"Report generation error: {e}")
+        
+        return report_paths
+    
+    def get_processing_summary(self) -> Dict[str, Any]:
+        """Get summary of processing results"""
+        return {
+            'total_processing_time': self.processing_stats.get('total_processing_time', 0),
+            'component_times': self.processing_stats.get('component_times', {}),
+            'errors_encountered': len(self.processing_stats.get('errors', [])),
+            'processing_config': {
+                'base_quality_threshold': self.config.base_quality_threshold,
+                'motion_threshold': self.config.motion_threshold,
+                'max_summary_frames': self.config.max_summary_frames,
+                'output_resolution': self.config.output_resolution
+            }
+        }
+    
+    def process_multiple_videos(self, video_directory: str) -> Dict[str, Dict[str, Any]]:
+        """
+        Process multiple videos in a directory
+        
+        Args:
+            video_directory: Directory containing video files
+            
+        Returns:
+            Dictionary mapping video paths to processing results
+        """
+        logger.info(f"🎬 Processing multiple videos from: {video_directory}")
+        
+        if not os.path.exists(video_directory):
+            raise FileNotFoundError(f"Video directory not found: {video_directory}")
+        
+        # Find video files
+        video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv'}
+        video_files = []
+        
+        for filename in os.listdir(video_directory):
+            if any(filename.lower().endswith(ext) for ext in video_extensions):
+                video_files.append(os.path.join(video_directory, filename))
+        
+        logger.info(f"Found {len(video_files)} video files to process")
+        
+        batch_results = {}
+        successful_count = 0
+        
+        for i, video_path in enumerate(video_files, 1):
+            try:
+                logger.info(f"📹 Processing video {i}/{len(video_files)}: {os.path.basename(video_path)}")
+                
+                results = self.process_video_complete(video_path)
+                batch_results[video_path] = results
+                successful_count += 1
+                
+                logger.info(f"✅ Successfully processed {os.path.basename(video_path)}")
+                
+            except Exception as e:
+                logger.error(f"❌ Failed to process {os.path.basename(video_path)}: {e}")
+                batch_results[video_path] = {'error': str(e)}
+        
+        logger.info(f"🎉 Batch processing complete: {successful_count}/{len(video_files)} successful")
+        
+        return batch_results
+
+
+def main():
+    """Main function demonstrating pipeline usage"""
+    
+    # Example usage with different configurations
+    
+    print("🎬 Video Processing Pipeline Demo")
+    print("=" * 50)
+    
+    # For security detection - use specialized config
+    security_config = get_security_focused_config()
+    pipeline_security = CompleteVideoProcessingPipeline(security_config)
+    
+    # For high recall (more keyframes) - use high recall config
+    high_recall_config = get_high_recall_config()
+    pipeline_high_recall = CompleteVideoProcessingPipeline(high_recall_config)
+    
+    # Example video processing
+    video_file = "rob.mp4"  # Replace with your video file
+    
+    if os.path.exists(video_file):
+        print(f"\n🎯 Processing with security detection config...")
+        results = pipeline_security.process_video_complete(video_file)
+        
+        print(f"\n📊 Processing Summary:")
+        summary = pipeline_security.get_processing_summary()
+        for key, value in summary.items():
+            print(f"  {key}: {value}")
+            
+        print(f"\\n📁 Output files created:")
+        for category, outputs in results['outputs'].items():
+            if isinstance(outputs, dict):
+                print(f"  {category}:")
+                for name, path in outputs.items():
+                    print(f"    - {name}: {path}")
+            else:
+                print(f"  {category}: {outputs}")
+    else:
+        print(f"❌ Video file not found: {video_file}")
+        print("\\n💡 Available configuration presets:")
+        print("  - get_security_focused_config() - Optimized for security/threat detection")
+        print("  - get_high_recall_config() - More keyframes, sensitive detection") 
+        print("  - get_high_precision_config() - Fewer but higher quality keyframes")
+        print("  - get_balanced_config() - General purpose settings")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/model/app.py b/model/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b1baf4c36049ec0ac0418d1610ad98b23fe51a9
--- /dev/null
+++ b/model/app.py
@@ -0,0 +1,640 @@
+"""
+DetectifAI - Facial Recognition with FAISS + MongoDB Integration
+Embeddings stored in FAISS, metadata stored in MongoDB, linked by face_id.
+
+Author: AI Assistant
+"""
+
+import os
+import json
+import uuid
+import numpy as np
+import cv2
+from datetime import datetime
+from pathlib import Path
+from typing import List, Tuple, Optional, Dict
+import torch
+from facenet_pytorch import MTCNN, InceptionResnetV1
+import warnings
+import joblib
+import faiss
+from pymongo import MongoClient
+from dotenv import load_dotenv
+from bson import ObjectId
+
+warnings.filterwarnings('ignore')
+
+# Load environment variables
+load_dotenv()
+
+# ========================================
+# Configuration
+# ========================================
+
+TRAINED_MODEL_DIR = "trained_models"
+CLASSIFIER_PATH = os.path.join(TRAINED_MODEL_DIR, "classifier_svm.pkl")
+ENCODER_PATH = os.path.join(TRAINED_MODEL_DIR, "label_encoder.pkl")
+
+ENABLE_PERSON_ID = True
+CONFIDENCE_THRESHOLD = 0.5
+
+# MongoDB Configuration
+MONGO_URI = os.getenv("MONGO_URI")
+MONGO_DB_NAME = "detectifai"
+
+# FAISS Configuration
+FAISS_INDEX_PATH = "faiss_face_index.bin"
+FAISS_ID_MAP_PATH = "faiss_id_map.json"
+EMBEDDING_DIM = 512  # InceptionResnetV1 produces 512-dim embeddings
+
+
+# ========================================
+# Helper Functions from data_models.py
+# ========================================
+
+def convert_numpy_types(obj):
+    """Recursively convert numpy types to native Python types for MongoDB compatibility."""
+    if isinstance(obj, dict):
+        return {key: convert_numpy_types(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_numpy_types(item) for item in obj]
+    elif isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, np.bool_):
+        return bool(obj)
+    else:
+        return obj
+
+
+def prepare_for_mongodb(data: Dict) -> Dict:
+    """Prepare data dictionary for MongoDB insertion."""
+    data = convert_numpy_types(data)
+    cleaned_data = {}
+    for key, value in data.items():
+        if key == '_id' and value is None:
+            continue
+        cleaned_data[key] = value
+    return cleaned_data
+
+
+def seconds_to_milliseconds(seconds: float) -> int:
+    """Convert seconds (float) to milliseconds (int) for MongoDB long type"""
+    return int(seconds * 1000)
+
+
+# ========================================
+# Person Classifier
+# ========================================
+
+class PersonClassifier:
+    def __init__(self, classifier_path: str, encoder_path: str, confidence_threshold: float = 0.5):
+        self.confidence_threshold = confidence_threshold
+        self.enabled = False
+        
+        try:
+            self.classifier = joblib.load(classifier_path)
+            self.label_encoder = joblib.load(encoder_path)
+            self.enabled = True
+            print(f"[PersonClassifier] ✅ Model loaded, {len(self.label_encoder.classes_)} identities recognized.")
+        except Exception as e:
+            print(f"[PersonClassifier] ⚠️ Failed to load model: {e}")
+    
+    def identify_person(self, embedding: np.ndarray) -> Tuple[Optional[str], float]:
+        if not self.enabled:
+            return None, 0.0
+        try:
+            probs = self.classifier.predict_proba(embedding.reshape(1, -1))[0]
+            best_idx = np.argmax(probs)
+            conf = probs[best_idx]
+            if conf >= self.confidence_threshold:
+                return self.label_encoder.classes_[best_idx], float(conf)
+            return None, float(conf)
+        except Exception as e:
+            print(f"[PersonClassifier] Error: {e}")
+            return None, 0.0
+
+
+# ========================================
+# Face Detection and Embedding
+# ========================================
+
+class FaceDetector:
+    def __init__(self, device='cpu', min_face_size=20):
+        self.device = torch.device(device)
+        self.mtcnn = MTCNN(
+            image_size=160,
+            margin=20,
+            min_face_size=min_face_size,
+            thresholds=[0.5, 0.6, 0.6],
+            factor=0.709,
+            keep_all=True,
+            device=self.device
+        )
+        print(f"[FaceDetector] Initialized on {device}")
+    
+    def detect_faces(self, frame: np.ndarray) -> Tuple[List[np.ndarray], List[np.ndarray], List[float]]:
+        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        boxes, probs = self.mtcnn.detect(rgb_frame, landmarks=False)
+
+        if boxes is None:
+            return [], [], []
+        
+        faces = self.mtcnn.extract(rgb_frame, boxes, save_path=None)
+        if faces is None:
+            return [], [], []
+        
+        valid_faces, valid_boxes, valid_probs = [], [], []
+        for face, prob, box in zip(faces, probs, boxes):
+            if prob > 0.1:
+                valid_faces.append(face)
+                valid_boxes.append(box)
+                valid_probs.append(float(prob))
+        return valid_faces, valid_boxes, valid_probs
+
+
+class FaceEmbedder:
+    def __init__(self, device='cpu', weights='vggface2'):
+        self.device = torch.device(device)
+        self.model = InceptionResnetV1(pretrained=weights).eval().to(self.device)
+        print(f"[FaceEmbedder] Loaded InceptionResnetV1 on {device}")
+    
+    def generate_embedding(self, face_tensor: torch.Tensor) -> np.ndarray:
+        with torch.no_grad():
+            face_tensor = face_tensor.unsqueeze(0).to(self.device)
+            embedding = self.model(face_tensor).cpu().numpy().flatten()
+            embedding = embedding / np.linalg.norm(embedding)
+        return embedding
+
+
+# ========================================
+# FAISS Index Manager (Embeddings Only)
+# ========================================
+
+class FAISSFaceIndex:
+    """
+    FAISS index manager for fast similarity search of face embeddings.
+    Stores ONLY embeddings in FAISS, metadata goes to MongoDB.
+    """
+    
+    def __init__(self, embedding_dim: int = 512, index_path: str = FAISS_INDEX_PATH, 
+                 id_map_path: str = FAISS_ID_MAP_PATH):
+        self.embedding_dim = embedding_dim
+        self.index_path = index_path
+        self.id_map_path = id_map_path
+        self.index = None
+        self.id_map = {}  # Maps FAISS index position -> face_id
+        self.reverse_map = {}  # Maps face_id -> FAISS index position
+        
+        self._load_or_create_index()
+    
+    def _load_or_create_index(self):
+        """Load existing FAISS index or create new one"""
+        if os.path.exists(self.index_path) and os.path.exists(self.id_map_path):
+            try:
+                self.index = faiss.read_index(self.index_path)
+                with open(self.id_map_path, 'r') as f:
+                    # Load as list of tuples and convert back to dict
+                    id_list = json.load(f)
+                    self.id_map = {int(k): v for k, v in id_list}
+                    self.reverse_map = {v: int(k) for k, v in id_list}
+                print(f"[FAISS] ✅ Loaded index with {self.index.ntotal} embeddings")
+            except Exception as e:
+                print(f"[FAISS] ⚠️ Error loading index: {e}. Creating new index.")
+                self._create_new_index()
+        else:
+            self._create_new_index()
+    
+    def _create_new_index(self):
+        """Create new FAISS index using IndexFlatIP for cosine similarity (Inner Product)"""
+        # Using IndexFlatIP for cosine similarity on normalized vectors
+        self.index = faiss.IndexFlatIP(self.embedding_dim)
+        self.id_map = {}
+        self.reverse_map = {}
+        print(f"[FAISS] ✅ Created new index (dim={self.embedding_dim}, metric=InnerProduct)")
+    
+    def add_embedding(self, face_id: str, embedding: np.ndarray) -> int:
+        """
+        Add face embedding to FAISS index.
+        Returns the FAISS index position.
+        """
+        if face_id in self.reverse_map:
+            print(f"[FAISS] ⚠️ Face {face_id} already in index, skipping")
+            return self.reverse_map[face_id]
+        
+        # Ensure embedding is normalized and correct shape
+        embedding = embedding.astype('float32').reshape(1, -1)
+        embedding = embedding / np.linalg.norm(embedding)
+        
+        # Add to FAISS index
+        idx = self.index.ntotal
+        self.index.add(embedding)
+        
+        # Update mappings
+        self.id_map[idx] = face_id
+        self.reverse_map[face_id] = idx
+        
+        print(f"[FAISS] Added {face_id} at index {idx}")
+        return idx
+    
+    def search(self, query_embedding: np.ndarray, k: int = 5, threshold: float = 0.6) -> List[Tuple[str, float]]:
+        """
+        Search for similar faces in FAISS index using cosine similarity.
+        
+        Args:
+            query_embedding: Face embedding to search for
+            k: Number of nearest neighbors to return
+            threshold: Minimum similarity score (0-1)
+        
+        Returns:
+            List of (face_id, similarity_score) tuples
+        """
+        if self.index.ntotal == 0:
+            return []
+        
+        # Prepare query - normalize for cosine similarity
+        query_embedding = query_embedding.astype('float32').reshape(1, -1)
+        query_embedding = query_embedding / np.linalg.norm(query_embedding)
+        
+        # Search FAISS index (IndexFlatIP returns inner product = cosine similarity for normalized vectors)
+        similarities, indices = self.index.search(query_embedding, min(k, self.index.ntotal))
+        
+        # Filter by threshold and return results
+        results = []
+        for sim, idx in zip(similarities[0], indices[0]):
+            if idx == -1:  # FAISS returns -1 for invalid results
+                continue
+            similarity = float(sim)  # Already cosine similarity
+            if similarity >= threshold:
+                face_id = self.id_map[idx]
+                results.append((face_id, similarity))
+        
+        return results
+    
+    def get_embedding(self, face_id: str) -> Optional[np.ndarray]:
+        """Retrieve embedding from FAISS by face_id"""
+        if face_id not in self.reverse_map:
+            return None
+        
+        idx = self.reverse_map[face_id]
+        embedding = self.index.reconstruct(int(idx))
+        return embedding
+    
+    def save(self):
+        """Save FAISS index and ID mappings to disk"""
+        faiss.write_index(self.index, self.index_path)
+        with open(self.id_map_path, 'w') as f:
+            # Convert to list of tuples for JSON serialization
+            id_list = [[k, v] for k, v in self.id_map.items()]
+            json.dump(id_list, f)
+        print(f"[FAISS] 💾 Saved index ({self.index.ntotal} embeddings)")
+    
+    def rebuild_from_mongodb(self, mongo_db):
+        """
+        Rebuild FAISS index from MongoDB detected_faces collection.
+        NOTE: This requires embeddings to be stored in MongoDB temporarily,
+        or you need to re-extract embeddings from face images.
+        """
+        print("[FAISS] 🔄 Rebuilding index from MongoDB...")
+        self._create_new_index()
+        
+        faces_collection = mongo_db['detected_faces']
+        count = 0
+        
+        # Only works if face_embedding exists in MongoDB
+        for face_doc in faces_collection.find({"face_embedding": {"$exists": True, "$ne": []}}):
+            face_id = face_doc['face_id']
+            embedding = np.array(face_doc['face_embedding'], dtype=np.float32)
+            
+            if len(embedding) == self.embedding_dim:
+                self.add_embedding(face_id, embedding)
+                count += 1
+        
+        self.save()
+        print(f"[FAISS] ✅ Rebuilt index with {count} faces from MongoDB")
+
+
+# ========================================
+# MongoDB Storage Handler (Metadata Only)
+# ========================================
+
+class MongoDBFaceStorage:
+    """
+    Stores face metadata in MongoDB Atlas (NO embeddings).
+    Embeddings are stored in FAISS only.
+    """
+    
+    def __init__(self, mongo_uri: str, db_name: str = MONGO_DB_NAME):
+        self.client = MongoClient(mongo_uri)
+        self.db = self.client[db_name]
+        self.faces_collection = self.db['detected_faces']
+        self.matches_collection = self.db['face_matches']
+        self.events_collection = self.db['event']
+        print(f"[MongoDB] ✅ Connected to {db_name}")
+    
+    def save_face(self, data: Dict) -> str:
+        """
+        Save detected face metadata to MongoDB (NO embedding).
+        Embedding is stored in FAISS separately.
+        """
+        data['detected_at'] = datetime.utcnow()
+        
+        # Remove embedding if present - it goes to FAISS only
+        if 'face_embedding' in data:
+            del data['face_embedding']
+        
+        # Set empty face_embedding array as per schema (required field)
+        data['face_embedding'] = []
+        
+        data = prepare_for_mongodb(data)
+        
+        result = self.faces_collection.insert_one(data)
+        print(f"[MongoDB] Face saved: {data['face_id']} (metadata only)")
+        return str(result.inserted_id)
+    
+    def save_face_match(self, match_data: Dict) -> str:
+        """Save face match to MongoDB"""
+        match_data['matched_at'] = datetime.utcnow()
+        match_data = prepare_for_mongodb(match_data)
+        
+        result = self.matches_collection.insert_one(match_data)
+        print(f"[MongoDB] Match saved: {match_data['match_id']}")
+        return str(result.inserted_id)
+    
+    def save_event(self, event_data: Dict) -> str:
+        """Save event to MongoDB"""
+        event_data = prepare_for_mongodb(event_data)
+        
+        result = self.events_collection.insert_one(event_data)
+        print(f"[MongoDB] Event saved: {event_data['event_id']}")
+        return str(result.inserted_id)
+    
+    def get_face_by_id(self, face_id: str) -> Optional[Dict]:
+        """Retrieve face metadata by face_id (no embedding)"""
+        return self.faces_collection.find_one({"face_id": face_id})
+    
+    def update_face_metadata(self, face_id: str, update_data: Dict):
+        """Update face metadata in MongoDB"""
+        update_data = prepare_for_mongodb(update_data)
+        self.faces_collection.update_one(
+            {"face_id": face_id},
+            {"$set": update_data}
+        )
+    
+    def close(self):
+        """Close MongoDB connection"""
+        self.client.close()
+
+
+# ========================================
+# Main DetectifAI Pipeline
+# ========================================
+
+class DetectifAI:
+    def __init__(self, video_path: str, event_id: str, frame_skip: int = 5,
+                 output_faces_dir: str = "faces", device: str = "cpu",
+                 output_video_path: Optional[str] = "output_annotated.mp4",
+                 enable_person_id: bool = True, classifier_path=None, encoder_path=None,
+                 similarity_threshold: float = 0.6):
+        
+        self.video_path = video_path
+        self.event_id = event_id
+        self.frame_skip = frame_skip
+        self.output_faces_dir = Path(output_faces_dir)
+        self.output_faces_dir.mkdir(exist_ok=True)
+        self.output_video_path = output_video_path
+        self.similarity_threshold = similarity_threshold
+
+        # Initialize ML components
+        self.detector = FaceDetector(device=device)
+        self.embedder = FaceEmbedder(device=device)
+        
+        # Initialize FAISS (embeddings) + MongoDB (metadata)
+        self.faiss_index = FAISSFaceIndex()
+        self.storage = MongoDBFaceStorage(MONGO_URI)
+        
+        # Initialize person classifier
+        self.person_classifier = None
+        if enable_person_id and classifier_path and encoder_path:
+            self.person_classifier = PersonClassifier(classifier_path, encoder_path)
+    
+    def _generate_face_id(self, frame_number: int, face_index: int, person_name: Optional[str] = None) -> str:
+        """Generate unique face ID"""
+        prefix = f"{person_name.replace(' ', '_')}" if person_name else "unknown"
+        unique_id = str(uuid.uuid4())[:8]
+        return f"face_{prefix}_{self.event_id}_{frame_number:06d}_{face_index:02d}_{unique_id}"
+    
+    def _save_face_image(self, face_tensor: torch.Tensor, face_id: str) -> str:
+        """Save face image to disk"""
+        face_np = face_tensor.permute(1, 2, 0).numpy()
+        face_np = ((face_np + 1) / 2 * 255).astype(np.uint8)
+        face_bgr = cv2.cvtColor(face_np, cv2.COLOR_RGB2BGR)
+        path = self.output_faces_dir / f"{face_id}.jpg"
+        cv2.imwrite(str(path), face_bgr)
+        return str(path)
+    
+    def process_video(self):
+        """
+        Process video: detect faces, store embeddings in FAISS, metadata in MongoDB, annotate video.
+        
+        Workflow:
+        1. Detect faces and generate embeddings
+        2. Search FAISS for similar faces
+        3. If match: Link to existing face_id, save match to MongoDB
+        4. If new: Save metadata to MongoDB, save embedding to FAISS
+        5. Annotate video frame with results
+        """
+        cap = cv2.VideoCapture(self.video_path)
+        if not cap.isOpened():
+            raise ValueError(f"Cannot open video: {self.video_path}")
+        
+        # Setup video writer
+        fps = int(cap.get(cv2.CAP_PROP_FPS))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        duration_secs = total_frames / fps if fps > 0 else 0
+        
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(self.output_video_path, fourcc, fps, (width, height))
+
+        # Save event metadata to MongoDB
+        event_data = {
+            'event_id': self.event_id,
+            'video_id': f"video_{self.event_id}",
+            'start_timestamp_ms': 0,
+            'end_timestamp_ms': seconds_to_milliseconds(duration_secs),
+            'event_type': 'face_detection',
+            'confidence_score': 0.0,
+            'is_verified': False,
+            'is_false_positive': False,
+            'bounding_boxes': {}
+        }
+        self.storage.save_event(event_data)
+
+        frame_number = 0
+        new_faces = 0
+        total_matches = 0
+        
+        print(f"\n[DetectifAI] 🎬 Processing video: {self.video_path}")
+        print(f"[DetectifAI] 📊 Total frames: {total_frames}, FPS: {fps}, Duration: {duration_secs:.2f}s")
+        print(f"[DetectifAI] 🔍 Similarity threshold: {self.similarity_threshold}")
+        print(f"[DetectifAI] 📦 Storage: FAISS (embeddings) + MongoDB (metadata)")
+        
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            
+            frame_number += 1
+            
+            # Progress indicator
+            if frame_number % 100 == 0:
+                progress = (frame_number / total_frames) * 100
+                print(f"[DetectifAI] Processing... {progress:.1f}% ({frame_number}/{total_frames})")
+            
+            if frame_number % self.frame_skip != 0:
+                out.write(frame)
+                continue
+            
+            # Detect faces
+            faces, boxes, probs = self.detector.detect_faces(frame)
+            
+            for i, (face, box, prob) in enumerate(zip(faces, boxes, probs)):
+                # Generate embedding
+                embedding = self.embedder.generate_embedding(face)
+                
+                # Identify person (if classifier enabled)
+                person_name, conf = (None, 0.0)
+                if self.person_classifier and self.person_classifier.enabled:
+                    person_name, conf = self.person_classifier.identify_person(embedding)
+                
+                # Search FAISS for similar faces (embeddings stored in FAISS only)
+                matches = self.faiss_index.search(embedding, k=1, threshold=self.similarity_threshold)
+                
+                if matches:
+                    # Face match found - link to existing face
+                    matched_face_id, similarity = matches[0]
+                    face_id = matched_face_id
+                    
+                    # Save match to MongoDB
+                    match_id = str(uuid.uuid4())
+                    match_data = {
+                        'match_id': match_id,
+                        'face_id_1': matched_face_id,
+                        'face_id_2': f"detection_{self.event_id}_{frame_number:06d}_{i:02d}",
+                        'similarity_score': float(similarity)
+                    }
+                    self.storage.save_face_match(match_data)
+                    total_matches += 1
+                    
+                    # Update existing face metadata (e.g., last seen)
+                    self.storage.update_face_metadata(
+                        matched_face_id,
+                        {'last_seen_frame': frame_number, 'last_seen_at': datetime.utcnow()}
+                    )
+                    
+                else:
+                    # New face detected
+                    face_id = self._generate_face_id(frame_number, i, person_name)
+                    face_path = self._save_face_image(face, face_id)
+                    
+                    # Save metadata to MongoDB (NO embedding)
+                    face_data = {
+                        'face_id': face_id,
+                        'event_id': self.event_id,
+                        'detected_at': datetime.utcnow(),
+                        'confidence_score': float(conf) if person_name else float(prob),
+                        'face_image_path': face_path,
+                        'bounding_boxes': {
+                            'x1': int(box[0]),
+                            'y1': int(box[1]),
+                            'x2': int(box[2]),
+                            'y2': int(box[3])
+                        },
+                        'first_seen_frame': frame_number,
+                        'last_seen_frame': frame_number,
+                        'person_name': person_name,
+                        'person_confidence': float(conf) if person_name else None
+                    }
+                    self.storage.save_face(face_data)
+                    
+                    # Save embedding to FAISS ONLY
+                    faiss_idx = self.faiss_index.add_embedding(face_id, embedding)
+                    new_faces += 1
+                    similarity = 0.0
+
+                # Draw annotations on frame
+                (x1, y1, x2, y2) = map(int, box)
+                
+                # Color coding:
+                # Green = Identified person (classifier)
+                # Orange = Matched face (FAISS)
+                # Red = New face
+                if person_name:
+                    color = (0, 255, 0)  # Green
+                    label = f"{person_name} ({conf:.2f})"
+                elif similarity > 0:
+                    color = (0, 165, 255)  # Orange
+                    label = f"Match ({similarity:.2f})"
+                else:
+                    color = (0, 0, 255)  # Red
+                    label = f"New Face"
+                
+                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
+            
+            out.write(frame)
+        
+        cap.release()
+        out.release()
+        
+        # Save FAISS index to disk
+        self.faiss_index.save()
+        
+        # Close MongoDB connection
+        self.storage.close()
+        
+        print(f"\n[DetectifAI] ✅ Processing complete!")
+        print(f"[DetectifAI] 📊 Statistics:")
+        print(f"  - New faces detected: {new_faces}")
+        print(f"  - Face matches found: {total_matches}")
+        print(f"  - Total faces in FAISS index: {self.faiss_index.index.ntotal}")
+        print(f"[DetectifAI] 🎥 Annotated video: {self.output_video_path}")
+        print(f"[DetectifAI] 💾 FAISS index saved: {FAISS_INDEX_PATH}")
+        print(f"[DetectifAI] 🗄️  Metadata stored in MongoDB Atlas")
+
+
+# ========================================
+# Example Usage
+# ========================================
+
+if __name__ == "__main__":
+    VIDEO_PATH = "suspicious_activity.mp4"
+    EVENT_ID = f"event_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+    
+    print("="*60)
+    print("DetectifAI - FAISS (Embeddings) + MongoDB (Metadata)")
+    print("="*60)
+    
+    detectif = DetectifAI(
+        video_path=VIDEO_PATH,
+        event_id=EVENT_ID,
+        frame_skip=5,
+        device=DEVICE,
+        enable_person_id=ENABLE_PERSON_ID,
+        classifier_path=CLASSIFIER_PATH,
+        encoder_path=ENCODER_PATH,
+        similarity_threshold=0.6
+    )
+    
+    detectif.process_video()
+    
+    print("\n✅ All done!")
+    print("📦 Embeddings: Stored in FAISS (faiss_face_index.bin)")
+    print("🗄️  Metadata: Stored in MongoDB Atlas")
+    print("🔗 Linked by: face_id")
\ No newline at end of file
diff --git a/model/faiss_face_index.bin b/model/faiss_face_index.bin
new file mode 100644
index 0000000000000000000000000000000000000000..825b45221ee815c2f0a48342ab955616263d567f
--- /dev/null
+++ b/model/faiss_face_index.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64274a5f57d9bc6f058e8bc3a870b0c35861369f13f3b899a416a89b6c85a3c2
+size 139309
diff --git a/model/faiss_id_map.json b/model/faiss_id_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d7f3c8d3365fb32c2c3b1fc533362f87dad22282
--- /dev/null
+++ b/model/faiss_id_map.json
@@ -0,0 +1 @@
+[[0, "face_n000228_event_20251114_143630_000005_00_a6b777d9"], [1, "face_unknown_event_20251114_143630_000005_01_958373b6"], [2, "face_unknown_event_20251114_143630_000005_02_e791541d"], [3, "face_unknown_event_20251114_143630_000005_03_96a8d640"], [4, "face_unknown_event_20251114_143630_000005_04_f3f00d0d"], [5, "face_unknown_event_20251114_143630_000005_05_f2c98134"], [6, "face_n000203_event_20251114_143630_000005_06_dd4b32c8"], [7, "face_unknown_event_20251114_143630_000005_07_3414ecee"], [8, "face_unknown_event_20251114_143630_000005_08_a0458cb1"], [9, "face_unknown_event_20251114_143630_000010_00_09f4f6dd"], [10, "face_unknown_event_20251114_143630_000010_03_de8ac817"], [11, "face_unknown_event_20251114_143630_000015_04_10b78b10"], [12, "face_unknown_event_20251114_143630_000020_06_5f7318de"], [13, "face_unknown_event_20251114_143630_000020_07_48c16e9b"], [14, "face_unknown_event_20251114_143630_000020_08_8d077aa4"], [15, "face_unknown_event_20251114_143630_000020_10_bab8232d"], [16, "face_unknown_event_20251114_143630_000025_01_762a0607"], [17, "face_n000203_event_20251114_143630_000025_03_6aded09b"], [18, "face_unknown_event_20251114_143630_000025_07_d073aacd"], [19, "face_n000202_event_20251114_143630_000030_02_c052169c"], [20, "face_unknown_event_20251114_143630_000030_05_be617641"], [21, "face_unknown_event_20251114_143630_000060_04_b86fdac1"], [22, "face_unknown_event_20251114_143630_000060_10_d74b2583"], [23, "face_unknown_event_20251114_143630_000065_05_b075fb2e"], [24, "face_unknown_event_20251114_143630_000065_09_c95106bc"], [25, "face_unknown_event_20251114_143630_000075_04_bbf8c6b4"], [26, "face_unknown_event_20251114_143630_000075_06_0c3474a3"], [27, "face_unknown_event_20251114_143630_000075_07_214cdec6"], [28, "face_n000302_event_20251114_143630_000075_08_f3369ec8"], [29, "face_unknown_event_20251114_143630_000075_09_11c89dc9"], [30, "face_unknown_event_20251114_143630_000080_03_94c5b51b"], [31, "face_unknown_event_20251114_143630_000080_06_fc2fc5bf"], [32, "face_unknown_event_20251114_143630_000090_05_b3960d12"], [33, "face_unknown_event_20251114_143630_000095_00_07e32cb1"], [34, "face_unknown_event_20251114_143630_000100_07_2b019d14"], [35, "face_unknown_event_20251114_143630_000120_01_29213415"], [36, "face_unknown_event_20251114_143630_000120_05_9cb0973f"], [37, "face_unknown_event_20251114_143630_000140_01_a53b508d"], [38, "face_unknown_event_20251114_143630_000150_04_5c79f631"], [39, "face_unknown_event_20251114_143630_000170_02_8ed019c6"], [40, "face_n000203_event_20251114_143630_000180_01_80a82782"], [41, "face_unknown_event_20251114_143630_000180_03_df2fc5bb"], [42, "face_unknown_event_20251114_143630_000195_02_b4bd72e3"], [43, "face_n000266_event_20251114_143630_000205_03_efb166bd"], [44, "face_unknown_event_20251114_143630_000210_02_281227a9"], [45, "face_n000302_event_20251114_143630_000230_02_cad52f76"], [46, "face_unknown_event_20251114_143630_000235_03_5a836b3c"], [47, "face_unknown_event_20251114_143630_000240_04_869c1da7"], [48, "face_unknown_event_20251114_143630_000250_00_c160e63a"], [49, "face_unknown_event_20251114_143630_000265_02_1022540e"], [50, "face_unknown_event_20251114_143630_000265_03_35eabf08"], [51, "face_unknown_event_20251114_143630_000280_01_da47894b"], [52, "face_unknown_event_20251114_143630_000285_01_c2ae6c51"], [53, "face_unknown_event_20251114_143630_000295_02_7314be44"], [54, "face_unknown_event_20251114_143630_000295_04_e68c3151"], [55, "face_n000203_event_20251114_143630_000305_00_48b969ab"], [56, "face_unknown_event_20251114_143630_000305_05_20046e9f"], [57, "face_unknown_event_20251114_143630_000340_04_5c42ee97"], [58, "face_unknown_event_20251114_143630_000370_02_68dde01a"], [59, "face_unknown_event_20251114_143630_000385_02_9d3e54f2"], [60, "face_n000185_event_20251114_143630_000390_01_7068c7cc"], [61, "face_unknown_event_20251114_143630_000390_02_4df50034"], [62, "face_unknown_event_20251114_143630_000395_01_0b426b78"], [63, "face_n000255_event_20251114_143630_000400_01_381f4a45"], [64, "face_unknown_event_20251114_143630_000405_00_ea6834f7"], [65, "face_unknown_event_20251114_143630_000405_01_38293ae3"], [66, "face_unknown_event_20251114_143630_000405_02_cc568829"], [67, "face_n000163_event_20251114_143630_000445_00_b0685ae3"]]
\ No newline at end of file
diff --git a/model/simple_face_index.json b/model/simple_face_index.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b
--- /dev/null
+++ b/model/simple_face_index.json
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/model/trained_models/README.md b/model/trained_models/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d0b47e3b83022a3b2c9926b19a3c522702641ee5
--- /dev/null
+++ b/model/trained_models/README.md
@@ -0,0 +1,27 @@
+# DetectifAI Model
+
+Accuracy: 99.35%
+Classifier: svm
+Classes: 30
+
+## Files
+- classifier_svm.pkl
+- label_encoder.pkl
+- metadata.json
+
+## Integration
+
+```python
+import joblib
+
+classifier = joblib.load('classifier_svm.pkl')
+encoder = joblib.load('label_encoder.pkl')
+
+# Use with DetectifAI
+detectif = DetectifAI(
+    ...,
+    classifier_path='trained_models/classifier_svm.pkl',
+    encoder_path='trained_models/label_encoder.pkl',
+    enable_person_id=True
+)
+```
diff --git a/model/trained_models/class_centroids.pkl b/model/trained_models/class_centroids.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..b111dff87893f85fd0a74d5f32a175b7b8ba9cd7
--- /dev/null
+++ b/model/trained_models/class_centroids.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8b3ba14d19317fe46bcd79d82778904482802648b742f78b6db1bc5c8e42c0b
+size 64082
diff --git a/model/trained_models/classifier_svm.pkl b/model/trained_models/classifier_svm.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..4ff2a60a60c58a0427314f97029e01d39dc675ef
--- /dev/null
+++ b/model/trained_models/classifier_svm.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42d17990a87a25c0ba8f928ef61eee80790a9cec6dae0812d3e7deae8ae713dc
+size 10932115
diff --git a/model/trained_models/label_encoder.pkl b/model/trained_models/label_encoder.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e952b274b71d2b5a7818682a505c44458a8f08ef
--- /dev/null
+++ b/model/trained_models/label_encoder.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dca733da3a44c15f1d24818701bca4c71b6c96da3dba67c65970813a72cb665d
+size 1167
diff --git a/model/trained_models/metadata.json b/model/trained_models/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..ab5798c0eff78c4185ccf0cf8da9870efc47bb06
--- /dev/null
+++ b/model/trained_models/metadata.json
@@ -0,0 +1,40 @@
+{
+  "accuracy": 0.9934621944286527,
+  "classifier": "svm",
+  "classes": 30,
+  "people": [
+    "n000080",
+    "n000081",
+    "n000097",
+    "n000103",
+    "n000163",
+    "n000176",
+    "n000185",
+    "n000202",
+    "n000203",
+    "n000225",
+    "n000227",
+    "n000228",
+    "n000234",
+    "n000241",
+    "n000255",
+    "n000257",
+    "n000266",
+    "n000270",
+    "n000282",
+    "n000302",
+    "n000314",
+    "n000326",
+    "n000332",
+    "n000378",
+    "n000395",
+    "n000397",
+    "n000399",
+    "n000419",
+    "n000438",
+    "n000472"
+  ],
+  "distance_threshold": 0.9,
+  "rejection_enabled": true,
+  "trained": "2025-12-08T12:49:12.342760"
+}
\ No newline at end of file
diff --git a/models/best (2).pt b/models/best (2).pt
new file mode 100644
index 0000000000000000000000000000000000000000..08a5c9d2f0b791e10fd6cf096bb0ae8c12fe85c8
--- /dev/null
+++ b/models/best (2).pt	
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:061c8d002b57d75013fe0414bf68202b50cce5aeb4364f1b92656f98b5fdb27b
+size 5438419
diff --git a/models/classifier_svm.pkl b/models/classifier_svm.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..a8867f83262dfe841c0049fc5c683e66b3487de4
--- /dev/null
+++ b/models/classifier_svm.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6055bbf5688036abbae55c37c9f9b8c4311a8abdaa8b404096bf6b59e4681528
+size 11059907
diff --git a/models/fire_YOLO11.pt b/models/fire_YOLO11.pt
new file mode 100644
index 0000000000000000000000000000000000000000..865e22550fa64786d7fa53b94657d428baf67407
--- /dev/null
+++ b/models/fire_YOLO11.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cbeb9569d8c4057734a81437b90c3e95e55ebdd40a5c5ea7dd7873a247cef13
+size 5479187
diff --git a/models/label_encoder.pkl b/models/label_encoder.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..e952b274b71d2b5a7818682a505c44458a8f08ef
--- /dev/null
+++ b/models/label_encoder.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dca733da3a44c15f1d24818701bca4c71b6c96da3dba67c65970813a72cb665d
+size 1167
diff --git a/models/merged_fire_knife_gun.pt b/models/merged_fire_knife_gun.pt
new file mode 100644
index 0000000000000000000000000000000000000000..08a5c9d2f0b791e10fd6cf096bb0ae8c12fe85c8
--- /dev/null
+++ b/models/merged_fire_knife_gun.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:061c8d002b57d75013fe0414bf68202b50cce5aeb4364f1b92656f98b5fdb27b
+size 5438419
diff --git a/models/metadata.json b/models/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..a8b3252f85727043f2d7698f9c3ad1bf1b34797d
--- /dev/null
+++ b/models/metadata.json
@@ -0,0 +1,38 @@
+{
+  "accuracy": 0.9923339011925043,
+  "classifier": "svm",
+  "classes": 30,
+  "people": [
+    "n000080",
+    "n000081",
+    "n000097",
+    "n000103",
+    "n000163",
+    "n000176",
+    "n000185",
+    "n000202",
+    "n000203",
+    "n000225",
+    "n000227",
+    "n000228",
+    "n000234",
+    "n000241",
+    "n000255",
+    "n000257",
+    "n000266",
+    "n000270",
+    "n000282",
+    "n000302",
+    "n000314",
+    "n000326",
+    "n000332",
+    "n000378",
+    "n000395",
+    "n000397",
+    "n000399",
+    "n000419",
+    "n000438",
+    "n000472"
+  ],
+  "trained": "2025-11-13T11:25:50.254327"
+}
\ No newline at end of file
diff --git a/models/weapon_YOLO11.pt b/models/weapon_YOLO11.pt
new file mode 100644
index 0000000000000000000000000000000000000000..213ef4db0574e28d79c7f0e60c8f93e52abbf239
--- /dev/null
+++ b/models/weapon_YOLO11.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47e0703f043674a0b6970ae941e06b9e4e1fbd5f7ca0e7d90a066f86de2a320d
+size 5440339
diff --git a/nlp_search/query_retreival.py b/nlp_search/query_retreival.py
new file mode 100644
index 0000000000000000000000000000000000000000..d306f92cfe3843165f5a0d762c1263e87467e09c
--- /dev/null
+++ b/nlp_search/query_retreival.py
@@ -0,0 +1,269 @@
+"""nlp_search/query_retreival.py
+
+Command-line utility to run a natural-language query against stored
+captions in MongoDB and return matching keyframes/captions above a similarity threshold.
+
+Behavior:
+ - Connects to MongoDB (MONGO_URI via env)
+ - Loads the SentenceTransformer model to encode the query
+ - Loads caption embeddings from the `event_descriptions` collection
+   (documents should include `description_id`, `caption`, `text_embedding`,
+    `event_id`, and `video_reference`)
+ - Computes cosine similarity between query embedding and stored embeddings
+ - Returns only matches with similarity >= 0.85 (85%) by default
+ - Results include: caption, similarity_score (0..1), event_id (if present),
+   video reference, and timestamps (from `events` collection if event exists)
+
+Usage:
+  python query_retreival.py --query "fire in building"
+  python query_retreival.py -q "dog sitting" --threshold 0.80 --json
+
+"""
+
+import os
+import argparse
+import json
+from dotenv import load_dotenv
+from pymongo import MongoClient
+import numpy as np
+
+try:
+    from sentence_transformers import SentenceTransformer
+except Exception as e:
+    raise ImportError("Please install sentence-transformers: pip install sentence-transformers")
+
+
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai")
+
+
+def connect_db():
+    client = MongoClient(MONGO_URI)
+    db = client.get_default_database()
+    return db
+
+
+def load_caption_embeddings(db):
+    """Load captions and embeddings from both `event_description` and `video_captions`.
+
+    Merges results from:
+      - event_description: behavior-level captions (e.g., "Accident behavior detected")
+      - video_captions: frame-level BLIP captions (e.g., "a car is parked in a parking lot")
+
+    Returns:
+      docs: list of dicts with keys: description_id, caption, event_id, video_reference, source
+      emb_matrix: np.ndarray shape (N, D) of float32 (normalized)
+    """
+    docs = []
+    embeddings = []
+
+    # --- 1. Load from event_description (behavior-level) ---
+    coll_ed = db.get_collection("event_description")
+    cursor_ed = coll_ed.find({"text_embedding": {"$exists": True, "$ne": []}}, {
+        "_id": 0,
+        "description_id": 1,
+        "caption": 1,
+        "event_id": 1,
+        "text_embedding": 1,
+        "video_reference": 1
+    })
+
+    for doc in cursor_ed:
+        emb = doc.get("text_embedding")
+        if not emb:
+            continue
+        try:
+            arr = np.asarray(emb, dtype="float32")
+            norm = np.linalg.norm(arr)
+            if norm == 0:
+                continue
+            arr = arr / norm
+            embeddings.append(arr)
+            docs.append({
+                "description_id": doc.get("description_id"),
+                "caption": doc.get("caption"),
+                "event_id": doc.get("event_id"),
+                "video_reference": doc.get("video_reference"),
+                "source": "event_description"
+            })
+        except Exception:
+            continue
+
+    # --- 2. Load from video_captions (frame-level BLIP captions) ---
+    coll_vc = db.get_collection("video_captions")
+    cursor_vc = coll_vc.find({"text_embedding": {"$exists": True, "$ne": []}}, {
+        "_id": 0,
+        "caption_id": 1,
+        "sanitized_caption": 1,
+        "raw_caption": 1,
+        "video_id": 1,
+        "frame_id": 1,
+        "timestamp": 1,
+        "text_embedding": 1,
+    })
+
+    for doc in cursor_vc:
+        emb = doc.get("text_embedding")
+        if not emb:
+            continue
+        try:
+            arr = np.asarray(emb, dtype="float32")
+            norm = np.linalg.norm(arr)
+            if norm == 0:
+                continue
+            arr = arr / norm
+            embeddings.append(arr)
+            caption_text = doc.get("sanitized_caption") or doc.get("raw_caption", "")
+            docs.append({
+                "description_id": doc.get("caption_id"),
+                "caption": caption_text,
+                "event_id": None,
+                "video_id": doc.get("video_id"),
+                "frame_id": doc.get("frame_id"),
+                "timestamp": doc.get("timestamp"),
+                "video_reference": None,
+                "source": "video_captions"
+            })
+        except Exception:
+            continue
+
+    if embeddings:
+        emb_matrix = np.stack(embeddings, axis=0).astype("float32")
+    else:
+        emb_matrix = np.zeros((0, 0), dtype="float32")
+
+    return docs, emb_matrix
+
+
+def compute_similarities(q_emb, emb_matrix):
+    """Compute cosine similarities between q_emb (D,) and emb_matrix (N, D)."""
+    if emb_matrix.size == 0:
+        return np.array([])
+    # ensure normalized
+    q = q_emb / (np.linalg.norm(q_emb) + 1e-12)
+    sims = np.dot(emb_matrix, q.astype("float32"))
+    return sims
+
+
+def retrieve_by_threshold(db, query_text, threshold=0.5):
+    """Retrieve captions with similarity above threshold.
+    
+    Args:
+        db: MongoDB database connection
+        query_text: Query string
+        threshold: Similarity threshold (0..1), default 0.85 (85%)
+    
+    Returns:
+        List of results sorted by similarity (descending)
+    """    
+    model = SentenceTransformer("all-mpnet-base-v2")
+    q_emb = model.encode(query_text, normalize_embeddings=True).astype("float32")
+
+    docs, emb_matrix = load_caption_embeddings(db)
+
+    if emb_matrix.size == 0:
+        print("No caption embeddings found in database. Run upload_captions.py first.")
+        return []
+
+    sims = compute_similarities(q_emb, emb_matrix)
+
+    # Filter by threshold and sort descending
+    mask = sims >= threshold
+    idxs = np.where(mask)[0]
+    idxs = idxs[np.argsort(-sims[idxs])]  # Sort by similarity descending
+
+    results = []
+    events_coll = db.get_collection("events")
+    keyframes_coll = db.get_collection("keyframes")
+
+    for idx in idxs:
+        score = float(sims[idx])
+        doc = docs[idx]
+        source = doc.get("source", "event_description")
+
+        # Attempt to fetch timestamps from events collection
+        start_ts = None
+        end_ts = None
+        video_id = doc.get("video_id")
+        video_reference = doc.get("video_reference")
+
+        if doc.get("event_id"):
+            ev = events_coll.find_one({"event_id": doc.get("event_id")}, {"_id": 0, "start_timestamp_ms": 1, "end_timestamp_ms": 1, "video_id": 1})
+            if ev:
+                start_ts = ev.get("start_timestamp_ms")
+                end_ts = ev.get("end_timestamp_ms")
+                video_id = video_id or ev.get("video_id")
+
+        # For video_captions source, try to find the keyframe image in MinIO
+        if source == "video_captions" and not video_reference:
+            frame_id = doc.get("frame_id")
+            if frame_id:
+                # Try to find keyframe record for this frame
+                kf = keyframes_coll.find_one(
+                    {"frame_id": frame_id},
+                    {"_id": 0, "minio_bucket": 1, "minio_object_name": 1, "timestamp_ms": 1}
+                ) if keyframes_coll is not None else None
+                if kf and kf.get("minio_object_name"):
+                    video_reference = {
+                        "bucket": kf.get("minio_bucket", "keyframes"),
+                        "object_name": kf.get("minio_object_name")
+                    }
+                    start_ts = start_ts or kf.get("timestamp_ms")
+
+            # Use timestamp from the caption if still missing
+            if not start_ts and doc.get("timestamp"):
+                try:
+                    start_ts = int(float(doc.get("timestamp")) * 1000) if doc.get("timestamp") else None
+                except (ValueError, TypeError):
+                    pass
+
+        result = {
+            "description_id": doc.get("description_id"),
+            "caption": doc.get("caption"),
+            "event_id": doc.get("event_id"),
+            "video_reference": video_reference,
+            "video_id": video_id,
+            "frame_id": doc.get("frame_id"),
+            "start_timestamp_ms": start_ts,
+            "end_timestamp_ms": end_ts,
+            "similarity": score,
+            "source": source
+        }
+        results.append(result)
+
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Query NLP captions and retrieve matching keyframes/events from DB with similarity >= threshold")
+    parser.add_argument("--query", "-q", required=True, help="Query text")
+    parser.add_argument("--threshold", "-t", type=float, default=0.85, help="Similarity threshold (0..1), default 0.85 (85%)")
+    parser.add_argument("--json", action="store_true", help="Print results as JSON")
+    args = parser.parse_args()
+
+    # Validate threshold
+    if not (0.0 <= args.threshold <= 1.0):
+        print("Error: threshold must be between 0.0 and 1.0")
+        return
+
+    db = connect_db()
+    results = retrieve_by_threshold(db, args.query, threshold=args.threshold)
+
+    if args.json:
+        print(json.dumps(results, indent=2, default=str))
+    else:
+        if not results:
+            print(f"Query: {args.query}\nNo matches found with similarity >= {args.threshold:.0%}")
+        else:
+            print(f"Query: {args.query}\nFound {len(results)} match(es) with similarity >= {args.threshold:.0%}:")
+            for i, r in enumerate(results, 1):
+                sim = r.get("similarity", 0.0)
+                start = r.get("start_timestamp_ms")
+                end = r.get("end_timestamp_ms")
+                vidref = r.get("video_reference") or {}
+                video_obj = vidref.get("object_name") if isinstance(vidref, dict) else None
+                print(f"[{i}] Score: {sim:.4f} ({sim:.0%}) | Caption: {r.get('caption')} | Video Obj: {video_obj} | start_ms: {start} | end_ms: {end}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/object_detection.py b/object_detection.py
new file mode 100644
index 0000000000000000000000000000000000000000..1738a824c67eca03c8761a2df0aa0acf22b95522
--- /dev/null
+++ b/object_detection.py
@@ -0,0 +1,676 @@
+"""
+Object Detection Module for DetectifAI
+
+This module handles:
+- Fire detection using fire_YOLO11.pt
+- Knife and gun detection using weapon_YOLO11.pt
+- Multi-model forking approach for parallel inference
+- Integration with video processing pipeline
+- Object-based event generation
+"""
+
+import cv2
+import torch
+import numpy as np
+import os
+import logging
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass
+from ultralytics import YOLO
+import time
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class DetectedObject:
+    """Represents a detected object"""
+    class_name: str
+    confidence: float
+    bbox: Tuple[int, int, int, int]  # (x1, y1, x2, y2)
+    center_point: Tuple[int, int]
+    area: float
+    frame_timestamp: float
+    detection_model: str
+
+@dataclass
+class ObjectDetectionResult:
+    """Result of object detection on a frame"""
+    frame_path: str
+    timestamp: float
+    detected_objects: List[DetectedObject]
+    total_detections: int
+    detection_confidence_avg: float
+    processing_time: float
+
+class ObjectDetector:
+    """Main object detection class using YOLOv11 models"""
+    
+    def __init__(self, config):
+        """
+        Initialize object detector with trained models
+        
+        Args:
+            config: VideoProcessingConfig object with object detection settings
+        """
+        self.config = config
+        self.models = {}
+        self.class_names = {}
+        self.confidence_threshold = config.object_detection_confidence
+        self.device = 'cuda' if torch.cuda.is_available() and config.use_gpu_acceleration else 'cpu'
+        
+        logger.info(f"Initializing ObjectDetector on device: {self.device}")
+        
+        # Load models
+        self._load_models()
+        
+        # Statistics
+        self.detection_stats = {
+            'total_frames_processed': 0,
+            'total_objects_detected': 0,
+            'detection_times': [],
+            'objects_by_class': {},
+            'confidence_scores': []
+        }
+    
+    def _load_models(self):
+        """Load YOLOv11 models separately: fire_YOLO11.pt and weapon_YOLO11.pt (multi-model forking)"""
+        try:
+            # Fire detection model
+            fire_model_path = os.path.join(self.config.models_dir, "fire_YOLO11.pt")
+            if os.path.exists(fire_model_path):
+                logger.info(f"Loading fire detection model: {fire_model_path}")
+                self.models['fire'] = YOLO(fire_model_path)
+                self.models['fire'].to(self.device)
+                # Class names mapping for fire model: 0='Fire' (only detecting Fire class, ignoring class 1)
+                self.class_names['fire'] = ['Fire']
+                logger.info("✅ Fire detection model loaded successfully (detecting only 'Fire' class)")
+            else:
+                logger.warning(f"Fire model not found at: {fire_model_path}")
+            
+            # Weapon detection model (gun + knife)
+            weapon_model_path = os.path.join(self.config.models_dir, "weapon_YOLO11.pt")
+            if os.path.exists(weapon_model_path):
+                logger.info(f"Loading weapon detection model: {weapon_model_path}")
+                self.models['weapon'] = YOLO(weapon_model_path)
+                self.models['weapon'].to(self.device)
+                # Class names mapping for weapon model: 0='gun', 1='knife' (CORRECTED ORDER)
+                self.class_names['weapon'] = ['gun', 'knife']
+                logger.info("✅ Weapon detection model loaded successfully (gun, knife)")
+            else:
+                logger.warning(f"Weapon model not found at: {weapon_model_path}")
+            
+            if not self.models:
+                logger.error("❌ No object detection models loaded!")
+                raise FileNotFoundError("No object detection models found")
+            
+            logger.info(f"📊 Loaded {len(self.models)} object detection models: {list(self.models.keys())}")
+            
+        except Exception as e:
+            logger.error(f"❌ Failed to load object detection models: {e}")
+            raise
+    
+    def detect_objects_in_frame(self, frame_path: str, timestamp: float) -> ObjectDetectionResult:
+        """
+        Detect objects in a single frame
+        
+        Args:
+            frame_path: Path to the frame image
+            timestamp: Timestamp of the frame in video
+            
+        Returns:
+            ObjectDetectionResult with all detected objects
+        """
+        start_time = time.time()
+        
+        # Load frame
+        frame = cv2.imread(frame_path)
+        if frame is None:
+            logger.error(f"Could not load frame: {frame_path}")
+            return ObjectDetectionResult(
+                frame_path=frame_path,
+                timestamp=timestamp,
+                detected_objects=[],
+                total_detections=0,
+                detection_confidence_avg=0.0,
+                processing_time=0.0
+            )
+        
+        detected_objects = []
+        
+        # Run detection with each model
+        for model_name, model in self.models.items():
+            try:
+                # Run inference
+                results = model(frame, conf=self.confidence_threshold, verbose=False)
+                
+                # Process results
+                for result in results:
+                    if result.boxes is not None:
+                        boxes = result.boxes.xyxy.cpu().numpy()  # x1, y1, x2, y2
+                        confidences = result.boxes.conf.cpu().numpy()
+                        classes = result.boxes.cls.cpu().numpy().astype(int)
+                        
+                        for i, (box, conf, cls) in enumerate(zip(boxes, confidences, classes)):
+                            # For fire model, only process class 0 (Fire), skip class 1
+                            if model_name == 'fire' and cls != 0:
+                                continue
+                            
+                            # Get class name
+                            if model_name in self.class_names and cls < len(self.class_names[model_name]):
+                                class_name = self.class_names[model_name][cls]
+                            else:
+                                class_name = f"unknown_{cls}"
+                            
+                            # Apply specific confidence thresholds based on object type
+                            confidence_threshold = self.confidence_threshold  # default
+                            if class_name.lower() == 'fire':
+                                confidence_threshold = getattr(self.config, 'fire_detection_confidence', 0.4)
+                            elif class_name in ['knife', 'gun']:
+                                confidence_threshold = getattr(self.config, 'weapon_detection_confidence', 0.7)
+                            
+                            # Skip detection if confidence is below specific threshold
+                            if float(conf) < confidence_threshold:
+                                continue
+                            
+                            # Calculate center point and area
+                            x1, y1, x2, y2 = box.astype(int)
+                            center_x = int((x1 + x2) / 2)
+                            center_y = int((y1 + y2) / 2)
+                            area = (x2 - x1) * (y2 - y1)
+                            
+                            detected_object = DetectedObject(
+                                class_name=class_name,
+                                confidence=float(conf),
+                                bbox=(x1, y1, x2, y2),
+                                center_point=(center_x, center_y),
+                                area=area,
+                                frame_timestamp=timestamp,
+                                detection_model=model_name
+                            )
+                            
+                            detected_objects.append(detected_object)
+                            
+                            # Update statistics
+                            if class_name not in self.detection_stats['objects_by_class']:
+                                self.detection_stats['objects_by_class'][class_name] = 0
+                            self.detection_stats['objects_by_class'][class_name] += 1
+                            self.detection_stats['confidence_scores'].append(float(conf))
+            
+            except Exception as e:
+                logger.error(f"Error running {model_name} detection: {e}")
+                continue
+        
+        # Calculate processing time and statistics
+        processing_time = time.time() - start_time
+        self.detection_stats['detection_times'].append(processing_time)
+        self.detection_stats['total_frames_processed'] += 1
+        self.detection_stats['total_objects_detected'] += len(detected_objects)
+        
+        # Calculate average confidence
+        avg_confidence = np.mean([obj.confidence for obj in detected_objects]) if detected_objects else 0.0
+        
+        result = ObjectDetectionResult(
+            frame_path=frame_path,
+            timestamp=timestamp,
+            detected_objects=detected_objects,
+            total_detections=len(detected_objects),
+            detection_confidence_avg=float(avg_confidence),
+            processing_time=processing_time
+        )
+        
+        if detected_objects:
+            object_summary = ", ".join([f"{obj.class_name}({obj.confidence:.2f})" for obj in detected_objects])
+            logger.info(f"🎯 Detected {len(detected_objects)} objects at {timestamp:.2f}s: {object_summary}")
+        
+        return result
+    
+    def detect_objects_in_keyframes(self, keyframes: List) -> List[ObjectDetectionResult]:
+        """
+        Run object detection on all keyframes
+        
+        Args:
+            keyframes: List of KeyframeResult objects from video processing
+            
+        Returns:
+            List of ObjectDetectionResult objects
+        """
+        logger.info(f"🔍 Running object detection on {len(keyframes)} keyframes")
+        
+        detection_results = []
+        
+        for i, keyframe in enumerate(keyframes):
+            try:
+                frame_path = keyframe.frame_data.frame_path
+                timestamp = keyframe.frame_data.timestamp
+                
+                # Run detection
+                result = self.detect_objects_in_frame(frame_path, timestamp)
+                detection_results.append(result)
+                
+                # Progress logging
+                if (i + 1) % 10 == 0 or i == len(keyframes) - 1:
+                    logger.info(f"📊 Object detection progress: {i + 1}/{len(keyframes)} frames processed")
+                
+            except Exception as e:
+                logger.error(f"Error detecting objects in keyframe {i}: {e}")
+                continue
+        
+        # Log final statistics
+        total_objects = sum(r.total_detections for r in detection_results)
+        frames_with_objects = sum(1 for r in detection_results if r.total_detections > 0)
+        avg_processing_time = np.mean([r.processing_time for r in detection_results]) if detection_results else 0
+        
+        logger.info(f"🎯 Object Detection Summary:")
+        logger.info(f"   📊 Total objects detected: {total_objects}")
+        logger.info(f"   📊 Frames with objects: {frames_with_objects}/{len(keyframes)}")
+        logger.info(f"   📊 Average processing time: {avg_processing_time:.3f}s per frame")
+        logger.info(f"   📊 Objects by class: {self.detection_stats['objects_by_class']}")
+        
+        return detection_results
+    
+    def create_object_based_events(self, detection_results: List[ObjectDetectionResult], 
+                                 temporal_window: float = 5.0) -> List[Dict[str, Any]]:
+        """
+        Create events based on object detections
+        
+        Args:
+            detection_results: List of ObjectDetectionResult objects
+            temporal_window: Time window for grouping detections (seconds)
+            
+        Returns:
+            List of object-based events
+        """
+        logger.info(f"🎯 Creating object-based events from {len(detection_results)} detection results")
+        
+        # Filter results with detections
+        results_with_objects = [r for r in detection_results if r.total_detections > 0]
+        
+        if not results_with_objects:
+            logger.info("No objects detected, no object-based events created")
+            return []
+        
+        # Group detections by object class
+        events_by_class = {}
+        
+        for result in results_with_objects:
+            for obj in result.detected_objects:
+                class_name = obj.class_name
+                
+                if class_name not in events_by_class:
+                    events_by_class[class_name] = []
+                
+                events_by_class[class_name].append({
+                    'timestamp': result.timestamp,
+                    'confidence': obj.confidence,
+                    'bbox': obj.bbox,
+                    'frame_path': result.frame_path,
+                    'object': obj
+                })
+        
+        # Create temporal events for each class
+        object_events = []
+        event_id_counter = 1000  # Start from 1000 to differentiate from motion events
+        
+        for class_name, detections in events_by_class.items():
+            # Sort by timestamp
+            detections.sort(key=lambda x: x['timestamp'])
+            
+            # Group into temporal windows
+            current_event_detections = []
+            current_event_start = None
+            
+            for detection in detections:
+                timestamp = detection['timestamp']
+                
+                if current_event_start is None:
+                    # Start new event
+                    current_event_start = timestamp
+                    current_event_detections = [detection]
+                elif timestamp - current_event_start <= temporal_window:
+                    # Add to current event
+                    current_event_detections.append(detection)
+                else:
+                    # Finish current event and start new one
+                    if current_event_detections:
+                        event = self._create_event_from_detections(
+                            class_name, current_event_detections, event_id_counter
+                        )
+                        object_events.append(event)
+                        event_id_counter += 1
+                    
+                    # Start new event
+                    current_event_start = timestamp
+                    current_event_detections = [detection]
+            
+            # Don't forget the last event
+            if current_event_detections:
+                event = self._create_event_from_detections(
+                    class_name, current_event_detections, event_id_counter
+                )
+                object_events.append(event)
+                event_id_counter += 1
+        
+        logger.info(f"✅ Created {len(object_events)} object-based events")
+        for event in object_events:
+            logger.info(f"   🎯 {event['event_type']}: {event['start_timestamp']:.2f}s - {event['end_timestamp']:.2f}s "
+                       f"(confidence: {event['confidence']:.2f})")
+        
+        return object_events
+    
+    def _create_event_from_detections(self, class_name: str, detections: List[Dict], 
+                                    event_id: int) -> Dict[str, Any]:
+        """Create an event from a group of detections"""
+        start_time = min(d['timestamp'] for d in detections)
+        end_time = max(d['timestamp'] for d in detections)
+        confidences = [d['confidence'] for d in detections]
+        avg_confidence = np.mean(confidences)
+        max_confidence = max(confidences)
+        
+        # Determine event type and importance
+        event_type = f"{class_name}_detection"
+        importance_score = max_confidence * len(detections) * 2.0  # Higher importance for object events
+        
+        # Get keyframes with detections
+        keyframes = [d['frame_path'] for d in detections]
+        
+        # Create description
+        description = f"{class_name.title()} detected with {avg_confidence:.2f} average confidence over {len(detections)} frames"
+        
+        return {
+            'event_id': f"obj_event_{event_id:04d}",
+            'start_timestamp': start_time,
+            'end_timestamp': end_time,
+            'event_type': event_type,
+            'confidence': avg_confidence,
+            'max_confidence': max_confidence,
+            'keyframes': keyframes,
+            'importance_score': importance_score,
+            'motion_intensity': 0.0,  # Object events don't have motion intensity
+            'description': description,
+            'object_class': class_name,
+            'detection_count': len(detections),
+            'duration': end_time - start_time,
+            'detection_details': detections
+        }
+    
+    def get_detection_statistics(self) -> Dict[str, Any]:
+        """Get comprehensive detection statistics"""
+        stats = self.detection_stats.copy()
+        
+        if stats['detection_times']:
+            stats['avg_detection_time'] = np.mean(stats['detection_times'])
+            stats['max_detection_time'] = max(stats['detection_times'])
+            stats['min_detection_time'] = min(stats['detection_times'])
+        
+        if stats['confidence_scores']:
+            stats['avg_confidence'] = np.mean(stats['confidence_scores'])
+            stats['max_confidence'] = max(stats['confidence_scores'])
+            stats['min_confidence'] = min(stats['confidence_scores'])
+        
+        return stats
+    
+    def annotate_frame_with_detections(self, frame_path: str, 
+                                     detection_result: ObjectDetectionResult,
+                                     output_path: str = None) -> str:
+        """
+        Annotate frame with bounding boxes and labels
+        
+        Args:
+            frame_path: Path to input frame
+            detection_result: ObjectDetectionResult for the frame
+            output_path: Optional output path, auto-generated if None
+            
+        Returns:
+            Path to annotated frame
+        """
+        frame = cv2.imread(frame_path)
+        if frame is None:
+            logger.error(f"Could not load frame for annotation: {frame_path}")
+            return frame_path
+        
+        # Draw bounding boxes and labels
+        for obj in detection_result.detected_objects:
+            x1, y1, x2, y2 = obj.bbox
+            
+            # Choose color based on object class (BGR format)
+            color_map = {
+                'fire': (255, 255, 0),    # Neon Cyan/Blue
+                'knife': (0, 255, 255),   # Neon Yellow
+                'gun': (0, 255, 0)        # Neon Green
+            }
+            color = color_map.get(obj.class_name, (255, 255, 255))  # Default white
+            
+            # Draw bounding box
+            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+            
+            # Draw label with confidence
+            label = f"{obj.class_name}: {obj.confidence:.2f}"
+            label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
+            
+            # Draw label background
+            cv2.rectangle(frame, (x1, y1 - label_size[1] - 10), 
+                         (x1 + label_size[0], y1), color, -1)
+            
+            # Draw label text
+            cv2.putText(frame, label, (x1, y1 - 5), 
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+        
+        # Generate output path if not provided
+        if output_path is None:
+            base_name = os.path.splitext(os.path.basename(frame_path))[0]
+            output_dir = os.path.dirname(frame_path)
+            output_path = os.path.join(output_dir, f"{base_name}_annotated.jpg")
+        
+        # Save annotated frame
+        cv2.imwrite(output_path, frame)
+        return output_path
+
+
+class ObjectDetectionIntegrator:
+    """Integration layer between object detection and video processing pipeline"""
+    
+    def __init__(self, config):
+        self.config = config
+        self.detector = ObjectDetector(config) if config.enable_object_detection else None
+    
+    def process_keyframes_with_object_detection(self, keyframes: List) -> Tuple[List, List[Dict[str, Any]]]:
+        """
+        Process keyframes with object detection and create object-based events
+        
+        Args:
+            keyframes: List of KeyframeResult objects
+            
+        Returns:
+            Tuple of (detection_results, object_events)
+        """
+        if not self.config.enable_object_detection or not self.detector:
+            logger.info("Object detection disabled, skipping...")
+            return [], []
+        
+        logger.info("🎯 Starting object detection integration")
+        
+        # Run object detection on keyframes
+        detection_results = self.detector.detect_objects_in_keyframes(keyframes)
+        
+        # Create annotated frames for keyframes WITH detections
+        annotated_frames = []
+        frames_with_detections = []
+        
+        for result in detection_results:
+            if result.total_detections > 0:
+                # Create annotated version of the frame
+                annotated_path = self.detector.annotate_frame_with_detections(
+                    result.frame_path, result
+                )
+                
+                # Store metadata about frames with detections
+                frames_with_detections.append({
+                    'original_path': result.frame_path,
+                    'annotated_path': annotated_path,
+                    'timestamp': result.timestamp,
+                    'detection_count': result.total_detections,
+                    'objects': [obj.class_name for obj in result.detected_objects],
+                    'confidence_avg': result.detection_confidence_avg
+                })
+                
+                annotated_frames.append(annotated_path)
+                
+                logger.info(f"🎯 Annotated frame at {result.timestamp:.2f}s with {result.total_detections} detections")
+        
+        # Create object-based events
+        object_events = self.detector.create_object_based_events(
+            detection_results, 
+            temporal_window=self.config.object_event_temporal_window
+        )
+        
+        # Store detection metadata in config for later retrieval
+        if hasattr(self.config, 'output_base_dir'):
+            detection_metadata = {
+                'total_keyframes': len(keyframes),
+                'frames_with_detections': len(frames_with_detections),
+                'annotated_frames': annotated_frames,
+                'detection_summary': frames_with_detections,
+                'objects_detected': self.detector.detection_stats['objects_by_class'].copy()
+            }
+            
+            # Save metadata to output directory
+            metadata_path = os.path.join(self.config.output_base_dir, 'detection_metadata.json')
+            os.makedirs(os.path.dirname(metadata_path), exist_ok=True)
+            
+            import json
+            with open(metadata_path, 'w') as f:
+                json.dump(detection_metadata, f, indent=2)
+                
+            logger.info(f"📊 Detection metadata saved: {metadata_path}")
+        
+        logger.info(f"✅ Object detection integration complete: {len(object_events)} events created")
+        logger.info(f"📊 Annotated {len(annotated_frames)} frames with detections out of {len(keyframes)} total keyframes")
+        
+        return detection_results, object_events
+    
+    def create_annotated_video(self, video_path: str, detection_results: List, output_path: str = None) -> str:
+        """
+        Create an annotated video with bounding boxes drawn on frames with detections
+        
+        Args:
+            video_path: Path to the original video
+            detection_results: List of ObjectDetectionResult from keyframe detection
+            output_path: Optional output path for annotated video
+            
+        Returns:
+            Path to the created annotated video
+        """
+        if not self.detector or not detection_results:
+            logger.warning("No detector or detection results available for video annotation")
+            return None
+        
+        logger.info(f"🎨 Creating annotated video with bounding boxes...")
+        
+        # Open input video
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            logger.error(f"Cannot open video: {video_path}")
+            return None
+        
+        # Get video properties
+        fps = int(cap.get(cv2.CAP_PROP_FPS))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        
+        # Build detection lookup by timestamp
+        detection_lookup = {}
+        for result in detection_results:
+            if result.total_detections > 0:
+                detection_lookup[result.timestamp] = result
+        
+        # Create output path if not provided
+        if output_path is None:
+            video_dir = os.path.dirname(video_path)
+            video_name = os.path.splitext(os.path.basename(video_path))[0]
+            output_path = os.path.join(video_dir, f"{video_name}_annotated.mp4")
+        
+        # Ensure output directory exists
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        
+        # Create video writer
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+        
+        if not out.isOpened():
+            logger.error(f"Cannot create output video: {output_path}")
+            cap.release()
+            return None
+        
+        frame_count = 0
+        frames_annotated = 0
+        
+        logger.info(f"Processing {total_frames} frames at {fps} FPS...")
+        
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            
+            # Calculate timestamp
+            timestamp = round(frame_count / fps, 2)
+            
+            # Check if this timestamp has detections
+            if timestamp in detection_lookup:
+                result = detection_lookup[timestamp]
+                
+                # Draw bounding boxes and labels
+                for obj in result.detected_objects:
+                    x1, y1, x2, y2 = obj.bbox
+                    
+                    # Choose color based on object class (BGR format)
+                    color_map = {
+                        'fire': (255, 255, 0),    # Neon Cyan/Blue
+                        'knife': (0, 255, 255),   # Neon Yellow
+                        'gun': (0, 255, 0)        # Neon Green
+                    }
+                    color = color_map.get(obj.class_name, (255, 255, 255))
+                    
+                    # Draw bounding box
+                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+                    
+                    # Draw label with confidence
+                    label = f"{obj.class_name}: {obj.confidence:.2f}"
+                    label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
+                    
+                    # Draw label background
+                    cv2.rectangle(frame, (x1, y1 - label_size[1] - 10), 
+                                 (x1 + label_size[0], y1), color, -1)
+                    
+                    # Draw label text
+                    cv2.putText(frame, label, (x1, y1 - 5), 
+                               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+                
+                frames_annotated += 1
+            
+            # Write frame to output video
+            out.write(frame)
+            frame_count += 1
+            
+            # Progress logging
+            if frame_count % 100 == 0:
+                progress = (frame_count / total_frames) * 100
+                logger.info(f"Progress: {progress:.1f}% ({frame_count}/{total_frames} frames)")
+        
+        # Release resources
+        cap.release()
+        out.release()
+        
+        logger.info(f"✅ Annotated video created: {output_path}")
+        logger.info(f"📊 Annotated {frames_annotated} frames out of {total_frames} total frames")
+        
+        return output_path
+    
+    def get_object_detection_summary(self) -> Dict[str, Any]:
+        """Get summary of object detection results"""
+        if not self.detector:
+            return {'enabled': False}
+        
+        stats = self.detector.get_detection_statistics()
+        stats['enabled'] = True
+        return stats
\ No newline at end of file
diff --git a/real_time_alerts.py b/real_time_alerts.py
new file mode 100644
index 0000000000000000000000000000000000000000..342bbae4af7dbacecf0953b2035d3161a161dcdd
--- /dev/null
+++ b/real_time_alerts.py
@@ -0,0 +1,852 @@
+"""
+Real-Time Alert Engine for DetectifAI
+
+This module provides the core alert engine for processing live stream detections
+and generating real-time alerts with:
+- Threat classification (critical, high, medium, low)
+- Suspicious person re-appearance tracking via MinIO face store
+- Alert deduplication and cooldown management
+- Alert queue for SSE broadcast to frontend clients
+- False positive feedback loop for improving accuracy
+
+Alert Types:
+- Object Detection: gun, knife, fire
+- Behavior Detection: fight, accident, wall_climb  
+- Suspicious Person Re-appearance: previously flagged face detected again
+"""
+
+import uuid
+import time
+import threading
+import logging
+from datetime import datetime, timedelta
+from typing import Dict, List, Any, Optional, Tuple
+from dataclasses import dataclass, asdict, field
+from enum import Enum
+from collections import deque
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+# ========================================
+# Alert Enums & Data Models
+# ========================================
+
+class AlertSeverity(Enum):
+    CRITICAL = "critical"   # Immediate danger: fire, gun
+    HIGH = "high"           # Serious threat: knife, fight
+    MEDIUM = "medium"       # Suspicious: wall_climb, accident
+    LOW = "low"             # Informational: suspicious person re-appearance
+    
+
+class AlertType(Enum):
+    OBJECT_DETECTION = "object_detection"
+    BEHAVIOR_DETECTION = "behavior_detection"
+    SUSPICIOUS_PERSON = "suspicious_person"
+    
+
+class AlertStatus(Enum):
+    PENDING = "pending"         # Awaiting user confirmation
+    CONFIRMED = "confirmed"     # User confirmed as real threat
+    DISMISSED = "dismissed"     # User dismissed as false positive
+    AUTO_EXPIRED = "auto_expired"  # No response within timeout
+    
+
+# Threat classification mapping
+THREAT_CLASSIFICATION = {
+    # Object detections
+    "fire": {"severity": AlertSeverity.CRITICAL, "type": AlertType.OBJECT_DETECTION, 
+             "display_name": "🔥 Fire Detected", "description": "Fire/flames detected in camera feed",
+             "requires_confirmation": True},
+    "gun": {"severity": AlertSeverity.CRITICAL, "type": AlertType.OBJECT_DETECTION,
+            "display_name": "🔫 Weapon (Gun) Detected", "description": "Firearm detected in camera feed",
+            "requires_confirmation": True},
+    "knife": {"severity": AlertSeverity.HIGH, "type": AlertType.OBJECT_DETECTION,
+              "display_name": "🔪 Weapon (Knife) Detected", "description": "Knife/blade detected in camera feed",
+              "requires_confirmation": True},
+    
+    # Behavior detections
+    "fighting": {"severity": AlertSeverity.HIGH, "type": AlertType.BEHAVIOR_DETECTION,
+                 "display_name": "👊 Fight Detected", "description": "Physical altercation detected",
+                 "requires_confirmation": True},
+    "road_accident": {"severity": AlertSeverity.MEDIUM, "type": AlertType.BEHAVIOR_DETECTION,
+                      "display_name": "🚗 Accident Detected", "description": "Vehicle/road accident detected",
+                      "requires_confirmation": True},
+    "wallclimb": {"severity": AlertSeverity.MEDIUM, "type": AlertType.BEHAVIOR_DETECTION,
+                  "display_name": "🧗 Wall Climbing Detected", "description": "Unauthorized climbing/trespassing detected",
+                  "requires_confirmation": True},
+    
+    # Suspicious person re-appearance
+    "suspicious_reappearance": {"severity": AlertSeverity.LOW, "type": AlertType.SUSPICIOUS_PERSON,
+                                "display_name": "👤 Suspicious Person Re-appeared", 
+                                "description": "A previously flagged person has been detected again",
+                                "requires_confirmation": True},
+}
+
+
+@dataclass
+class RealTimeAlert:
+    """Single real-time alert with all metadata"""
+    alert_id: str
+    camera_id: str
+    alert_type: str          # From AlertType enum value
+    detection_class: str     # e.g., 'fire', 'gun', 'fighting'
+    severity: str            # From AlertSeverity enum value  
+    display_name: str
+    description: str
+    confidence: float
+    timestamp: float         # Unix timestamp
+    timestamp_iso: str       # ISO formatted datetime string
+    status: str = "pending"  # From AlertStatus enum value
+    
+    # Detection details
+    bounding_boxes: List[Dict] = field(default_factory=list)
+    frame_snapshot_path: Optional[str] = None  # MinIO path to frame snapshot
+    frame_snapshot_url: Optional[str] = None   # Presigned URL for frontend
+    
+    # Suspicious person tracking  
+    face_id: Optional[str] = None
+    face_match_score: Optional[float] = None
+    previous_events: List[str] = field(default_factory=list)  # Previous event IDs involving this person
+    
+    # User feedback
+    confirmed_by: Optional[str] = None
+    confirmed_at: Optional[str] = None
+    feedback_note: Optional[str] = None
+    
+    # Linked event in MongoDB
+    event_id: Optional[str] = None
+    video_id: Optional[str] = None
+    
+    requires_confirmation: bool = True
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dict for JSON serialization and MongoDB storage"""
+        data = asdict(self)
+        return data
+    
+    def to_sse_payload(self) -> Dict[str, Any]:
+        """Convert to lightweight SSE payload for frontend"""
+        return {
+            "alert_id": self.alert_id,
+            "camera_id": self.camera_id,
+            "alert_type": self.alert_type,
+            "detection_class": self.detection_class,
+            "severity": self.severity,
+            "display_name": self.display_name,
+            "description": self.description,
+            "confidence": round(self.confidence, 3),
+            "timestamp": self.timestamp,
+            "timestamp_iso": self.timestamp_iso,
+            "status": self.status,
+            "bounding_boxes": self.bounding_boxes,
+            "frame_snapshot_url": self.frame_snapshot_url,
+            "face_id": self.face_id,
+            "face_match_score": self.face_match_score,
+            "requires_confirmation": self.requires_confirmation,
+            "event_id": self.event_id,
+        }
+
+
+# ========================================
+# Alert Engine (Singleton)
+# ========================================
+
+class RealTimeAlertEngine:
+    """
+    Central alert engine that processes detections from the live stream pipeline
+    and manages the alert lifecycle:
+    
+    1. Detection comes in from LiveStreamProcessor
+    2. Engine classifies threat severity
+    3. Checks for suspicious person re-appearance
+    4. Deduplicates against recent alerts (cooldown)
+    5. Stores snapshot frame in MinIO
+    6. Pushes alert to SSE broadcast queue
+    7. Persists alert to MongoDB
+    8. Handles user confirmation/dismissal feedback
+    """
+    
+    _instance = None
+    _lock = threading.Lock()
+    
+    def __new__(cls, *args, **kwargs):
+        """Singleton pattern — one alert engine for the whole app"""
+        with cls._lock:
+            if cls._instance is None:
+                cls._instance = super().__new__(cls)
+                cls._instance._initialized = False
+        return cls._instance
+    
+    def __init__(self):
+        if self._initialized:
+            return
+        self._initialized = True
+        
+        # Alert queue for SSE broadcast (thread-safe deque)
+        self._alert_queue: deque = deque(maxlen=500)
+        self._alert_subscribers: List[Any] = []  # SSE subscriber queues
+        self._subscriber_lock = threading.Lock()
+        
+        # Active alerts (pending user confirmation)
+        self._active_alerts: Dict[str, RealTimeAlert] = {}
+        self._alert_history: deque = deque(maxlen=1000)
+        
+        # Cooldown tracking to prevent duplicate alerts
+        # Key: (camera_id, detection_class), Value: last_alert_timestamp
+        self._cooldown_tracker: Dict[Tuple[str, str], float] = {}
+        self._cooldown_seconds = {
+            AlertSeverity.CRITICAL.value: 10,   # 10s cooldown for critical (fire, gun)
+            AlertSeverity.HIGH.value: 15,        # 15s for high
+            AlertSeverity.MEDIUM.value: 20,      # 20s for medium
+            AlertSeverity.LOW.value: 30,         # 30s for low
+        }
+        
+        # Suspicious person tracking
+        self._flagged_faces: Dict[str, Dict] = {}  # face_id -> metadata
+        
+        # Database connections (lazy loaded)
+        self._db_manager = None
+        self._minio_client = None
+        
+        # Statistics
+        self.stats = {
+            "total_alerts": 0,
+            "confirmed_alerts": 0,
+            "dismissed_alerts": 0,
+            "pending_alerts": 0,
+            "alerts_by_type": {},
+            "alerts_by_severity": {},
+        }
+        
+        logger.info("✅ Real-Time Alert Engine initialized")
+    
+    @property
+    def db_manager(self):
+        """Lazy-load database manager"""
+        if self._db_manager is None:
+            from database.config import DatabaseManager
+            self._db_manager = DatabaseManager()
+        return self._db_manager
+    
+    @property
+    def alerts_collection(self):
+        """Get MongoDB alerts collection"""
+        return self.db_manager.db.real_time_alerts
+    
+    @property
+    def minio_client(self):
+        """Lazy-load MinIO client"""
+        if self._minio_client is None:
+            self._minio_client = self.db_manager.minio_client
+        return self._minio_client
+    
+    # ========================================
+    # SSE Subscription Management
+    # ========================================
+    
+    def subscribe(self):
+        """
+        Create a new SSE subscriber queue.
+        Returns a queue that the SSE endpoint will read from.
+        """
+        import queue
+        q = queue.Queue(maxsize=100)
+        with self._subscriber_lock:
+            self._alert_subscribers.append(q)
+        logger.info(f"📡 New SSE subscriber connected (total: {len(self._alert_subscribers)})")
+        return q
+    
+    def unsubscribe(self, q):
+        """Remove an SSE subscriber queue"""
+        with self._subscriber_lock:
+            if q in self._alert_subscribers:
+                self._alert_subscribers.remove(q)
+        logger.info(f"📡 SSE subscriber disconnected (total: {len(self._alert_subscribers)})")
+    
+    def _broadcast_alert(self, alert: RealTimeAlert):
+        """Push alert to all SSE subscribers"""
+        payload = alert.to_sse_payload()
+        dead_subscribers = []
+        
+        with self._subscriber_lock:
+            for q in self._alert_subscribers:
+                try:
+                    q.put_nowait(payload)
+                except Exception:
+                    dead_subscribers.append(q)
+            
+            # Clean up dead subscribers
+            for q in dead_subscribers:
+                self._alert_subscribers.remove(q)
+    
+    def _broadcast_update(self, alert_id: str, update_data: Dict):
+        """Broadcast alert status update to all subscribers"""
+        payload = {"type": "alert_update", "alert_id": alert_id, **update_data}
+        dead_subscribers = []
+        
+        with self._subscriber_lock:
+            for q in self._alert_subscribers:
+                try:
+                    q.put_nowait(payload)
+                except Exception:
+                    dead_subscribers.append(q)
+            
+            for q in dead_subscribers:
+                self._alert_subscribers.remove(q)
+    
+    # ========================================
+    # Core Alert Processing
+    # ========================================
+    
+    def process_detection(
+        self,
+        camera_id: str,
+        detection_class: str,
+        confidence: float,
+        bounding_boxes: List[Dict] = None,
+        frame: Any = None,
+        timestamp: float = None,
+        face_id: str = None,
+        face_match_score: float = None,
+        video_id: str = None,
+    ) -> Optional[RealTimeAlert]:
+        """
+        Process a detection from the live stream and potentially create an alert.
+        
+        Args:
+            camera_id: Camera identifier
+            detection_class: Type of detection (e.g., 'fire', 'gun', 'fighting')
+            confidence: Detection confidence (0.0 - 1.0)
+            bounding_boxes: List of bounding box dicts
+            frame: OpenCV frame (numpy array) for snapshot
+            timestamp: Detection timestamp
+            face_id: Face ID if facial recognition matched
+            face_match_score: Face match similarity score
+            video_id: Associated video ID
+            
+        Returns:
+            RealTimeAlert if alert was created, None if suppressed by cooldown
+        """
+        if timestamp is None:
+            timestamp = time.time()
+        
+        # Normalize detection class
+        detection_key = detection_class.lower().strip()
+        
+        # Look up threat classification
+        threat_info = THREAT_CLASSIFICATION.get(detection_key)
+        if threat_info is None:
+            logger.debug(f"Unknown detection class '{detection_key}', skipping alert")
+            return None
+        
+        # Check cooldown
+        if self._is_on_cooldown(camera_id, detection_key, threat_info["severity"].value):
+            logger.debug(f"Alert suppressed (cooldown): {detection_key} on {camera_id}")
+            return None
+        
+        # Check confidence threshold
+        min_confidence = self._get_min_confidence(detection_key)
+        if confidence < min_confidence:
+            logger.debug(f"Alert suppressed (low confidence {confidence:.2f} < {min_confidence}): {detection_key}")
+            return None
+        
+        # Create alert
+        now = datetime.utcnow()
+        alert = RealTimeAlert(
+            alert_id=f"alert_{uuid.uuid4().hex[:12]}",
+            camera_id=camera_id,
+            alert_type=threat_info["type"].value,
+            detection_class=detection_key,
+            severity=threat_info["severity"].value,
+            display_name=threat_info["display_name"],
+            description=threat_info["description"],
+            confidence=float(confidence),
+            timestamp=timestamp,
+            timestamp_iso=now.isoformat() + "Z",
+            status=AlertStatus.PENDING.value,
+            bounding_boxes=bounding_boxes or [],
+            requires_confirmation=threat_info["requires_confirmation"],
+            video_id=video_id or f"live_{camera_id}",
+            face_id=face_id,
+            face_match_score=float(face_match_score) if face_match_score else None,
+        )
+        
+        # Save frame snapshot to MinIO
+        if frame is not None:
+            snapshot_path = self._save_frame_snapshot(camera_id, alert.alert_id, frame)
+            if snapshot_path:
+                alert.frame_snapshot_path = snapshot_path
+                alert.frame_snapshot_url = self._get_snapshot_url(snapshot_path)
+        
+        # Check suspicious person re-appearance
+        if face_id and face_match_score:
+            previous = self._check_suspicious_person(face_id)
+            if previous:
+                alert.previous_events = previous.get("event_ids", [])
+                # Upgrade alert info for re-appearance
+                alert.description = (
+                    f"{threat_info['description']}. "
+                    f"⚠️ This person was previously involved in {len(previous.get('event_ids', []))} incident(s)."
+                )
+        
+        # Store in active alerts and history
+        self._active_alerts[alert.alert_id] = alert
+        self._alert_history.appendleft(alert)
+        
+        # Update cooldown
+        self._cooldown_tracker[(camera_id, detection_key)] = timestamp
+        
+        # Update stats
+        self.stats["total_alerts"] += 1
+        self.stats["pending_alerts"] += 1
+        self.stats["alerts_by_type"][detection_key] = self.stats["alerts_by_type"].get(detection_key, 0) + 1
+        self.stats["alerts_by_severity"][alert.severity] = self.stats["alerts_by_severity"].get(alert.severity, 0) + 1
+        
+        # Persist to MongoDB (async)
+        threading.Thread(target=self._persist_alert, args=(alert,), daemon=True).start()
+        
+        # Broadcast to SSE subscribers
+        self._broadcast_alert(alert)
+        
+        logger.info(
+            f"🚨 ALERT: [{alert.severity.upper()}] {alert.display_name} "
+            f"(confidence: {confidence:.2f}) on camera {camera_id}"
+        )
+        
+        return alert
+    
+    def process_suspicious_person(
+        self,
+        camera_id: str,
+        face_id: str,
+        face_match_score: float,
+        frame: Any = None,
+        timestamp: float = None,
+        matched_person_info: Dict = None,
+    ) -> Optional[RealTimeAlert]:
+        """
+        Process a suspicious person re-appearance detection.
+        Called when facial recognition matches a previously flagged face.
+        
+        Args:
+            camera_id: Camera identifier
+            face_id: Matched face ID
+            face_match_score: Similarity score (0.0-1.0)
+            frame: Current frame
+            timestamp: Detection timestamp
+            matched_person_info: Previous incident info for this person
+        """
+        if timestamp is None:
+            timestamp = time.time()
+        
+        # Only alert if we have a meaningful match
+        if face_match_score < 0.6:
+            return None
+        
+        # Check cooldown for this specific face
+        cooldown_key = (camera_id, f"face_{face_id}")
+        last_alert_time = self._cooldown_tracker.get(cooldown_key, 0)
+        if (timestamp - last_alert_time) < 60:  # 60s cooldown per face
+            return None
+        
+        # Create alert
+        return self.process_detection(
+            camera_id=camera_id,
+            detection_class="suspicious_reappearance",
+            confidence=face_match_score,
+            frame=frame,
+            timestamp=timestamp,
+            face_id=face_id,
+            face_match_score=face_match_score,
+        )
+    
+    # ========================================
+    # User Feedback (Confirm / Dismiss)
+    # ========================================
+    
+    def confirm_alert(self, alert_id: str, user_id: str = None, note: str = None) -> Optional[Dict]:
+        """
+        User confirms alert as real threat.
+        Updates MongoDB, stats, and broadcasts update.
+        """
+        alert = self._active_alerts.get(alert_id)
+        if not alert:
+            # Try loading from DB
+            alert = self._load_alert_from_db(alert_id)
+            if not alert:
+                logger.warning(f"Alert not found: {alert_id}")
+                return None
+        
+        alert.status = AlertStatus.CONFIRMED.value
+        alert.confirmed_by = user_id
+        alert.confirmed_at = datetime.utcnow().isoformat() + "Z"
+        alert.feedback_note = note
+        
+        # Update stats
+        self.stats["confirmed_alerts"] += 1
+        self.stats["pending_alerts"] = max(0, self.stats["pending_alerts"] - 1)
+        
+        # Flag the person as suspicious for future tracking
+        if alert.face_id:
+            self._flag_suspicious_person(alert.face_id, alert)
+        
+        # Update in MongoDB
+        threading.Thread(
+            target=self._update_alert_in_db, 
+            args=(alert_id, {
+                "status": alert.status,
+                "confirmed_by": user_id,
+                "confirmed_at": datetime.utcnow(),
+                "feedback_note": note,
+                "is_verified": True,
+                "is_false_positive": False,
+            }),
+            daemon=True
+        ).start()
+        
+        # Also update the linked event in the event collection
+        if alert.event_id:
+            threading.Thread(
+                target=self._update_linked_event,
+                args=(alert.event_id, True, False),
+                daemon=True
+            ).start()
+        
+        # Broadcast update
+        self._broadcast_update(alert_id, {
+            "status": "confirmed",
+            "confirmed_by": user_id,
+            "confirmed_at": alert.confirmed_at,
+        })
+        
+        logger.info(f"✅ Alert CONFIRMED: {alert_id} ({alert.display_name}) by {user_id}")
+        return alert.to_dict()
+    
+    def dismiss_alert(self, alert_id: str, user_id: str = None, note: str = None) -> Optional[Dict]:
+        """
+        User dismisses alert as false positive.
+        Updates MongoDB, stats, and broadcasts update.
+        """
+        alert = self._active_alerts.get(alert_id)
+        if not alert:
+            alert = self._load_alert_from_db(alert_id)
+            if not alert:
+                logger.warning(f"Alert not found: {alert_id}")
+                return None
+        
+        alert.status = AlertStatus.DISMISSED.value
+        alert.confirmed_by = user_id
+        alert.confirmed_at = datetime.utcnow().isoformat() + "Z"
+        alert.feedback_note = note
+        
+        # Update stats
+        self.stats["dismissed_alerts"] += 1
+        self.stats["pending_alerts"] = max(0, self.stats["pending_alerts"] - 1)
+        
+        # Update in MongoDB
+        threading.Thread(
+            target=self._update_alert_in_db,
+            args=(alert_id, {
+                "status": alert.status,
+                "confirmed_by": user_id,
+                "confirmed_at": datetime.utcnow(),
+                "feedback_note": note,
+                "is_verified": True,
+                "is_false_positive": True,
+            }),
+            daemon=True
+        ).start()
+        
+        # Also mark linked event as false positive
+        if alert.event_id:
+            threading.Thread(
+                target=self._update_linked_event,
+                args=(alert.event_id, True, True),
+                daemon=True
+            ).start()
+        
+        # Broadcast update
+        self._broadcast_update(alert_id, {
+            "status": "dismissed",
+            "confirmed_by": user_id,
+            "confirmed_at": alert.confirmed_at,
+        })
+        
+        logger.info(f"❌ Alert DISMISSED: {alert_id} ({alert.display_name}) by {user_id}")
+        return alert.to_dict()
+    
+    # ========================================
+    # Alert Queries
+    # ========================================
+    
+    def get_active_alerts(self, camera_id: str = None) -> List[Dict]:
+        """Get all pending (unconfirmed) alerts, optionally filtered by camera"""
+        alerts = []
+        for alert in self._active_alerts.values():
+            if alert.status == AlertStatus.PENDING.value:
+                if camera_id is None or alert.camera_id == camera_id:
+                    alerts.append(alert.to_sse_payload())
+        return sorted(alerts, key=lambda a: a["timestamp"], reverse=True)
+    
+    def get_alert_history(self, limit: int = 50, camera_id: str = None, 
+                          severity: str = None, status: str = None) -> List[Dict]:
+        """Get alert history with optional filters"""
+        alerts = []
+        for alert in self._alert_history:
+            if camera_id and alert.camera_id != camera_id:
+                continue
+            if severity and alert.severity != severity:
+                continue
+            if status and alert.status != status:
+                continue
+            alerts.append(alert.to_dict())
+            if len(alerts) >= limit:
+                break
+        return alerts
+    
+    def get_alert_by_id(self, alert_id: str) -> Optional[Dict]:
+        """Get a single alert by ID"""
+        alert = self._active_alerts.get(alert_id)
+        if alert:
+            return alert.to_dict()
+        # Try DB
+        loaded = self._load_alert_from_db(alert_id)
+        if loaded:
+            return loaded.to_dict()
+        return None
+    
+    def get_stats(self) -> Dict:
+        """Get alert statistics"""
+        return {
+            **self.stats,
+            "active_subscribers": len(self._alert_subscribers),
+            "active_pending_count": sum(
+                1 for a in self._active_alerts.values() 
+                if a.status == AlertStatus.PENDING.value
+            ),
+        }
+    
+    # ========================================
+    # Suspicious Person Tracking
+    # ========================================
+    
+    def _flag_suspicious_person(self, face_id: str, alert: RealTimeAlert):
+        """Flag a person as suspicious for future re-appearance tracking"""
+        if face_id not in self._flagged_faces:
+            self._flagged_faces[face_id] = {
+                "face_id": face_id,
+                "flagged_at": datetime.utcnow().isoformat(),
+                "event_ids": [],
+                "alert_ids": [],
+                "incident_count": 0,
+            }
+        
+        entry = self._flagged_faces[face_id]
+        entry["event_ids"].append(alert.event_id or alert.alert_id)
+        entry["alert_ids"].append(alert.alert_id)
+        entry["incident_count"] += 1
+        entry["last_seen"] = datetime.utcnow().isoformat()
+        
+        # Also persist to MongoDB for cross-session tracking
+        threading.Thread(
+            target=self._persist_flagged_person, args=(face_id, entry), daemon=True
+        ).start()
+        
+        logger.info(f"🏷️ Person {face_id[:8]}... flagged as suspicious (incidents: {entry['incident_count']})")
+    
+    def _check_suspicious_person(self, face_id: str) -> Optional[Dict]:
+        """Check if a face belongs to a previously flagged person"""
+        # Check in-memory cache first
+        if face_id in self._flagged_faces:
+            return self._flagged_faces[face_id]
+        
+        # Check MongoDB
+        try:
+            doc = self.alerts_collection.find_one(
+                {"face_id": face_id, "status": "confirmed"},
+                sort=[("timestamp", -1)]
+            )
+            if doc:
+                return {
+                    "face_id": face_id,
+                    "event_ids": [doc.get("event_id", "")],
+                    "incident_count": 1,
+                }
+        except Exception as e:
+            logger.warning(f"Error checking suspicious person: {e}")
+        
+        return None
+    
+    def _persist_flagged_person(self, face_id: str, entry: Dict):
+        """Persist flagged person to MongoDB"""
+        try:
+            self.db_manager.db.flagged_persons.update_one(
+                {"face_id": face_id},
+                {"$set": entry, "$setOnInsert": {"created_at": datetime.utcnow()}},
+                upsert=True
+            )
+        except Exception as e:
+            logger.error(f"Error persisting flagged person: {e}")
+    
+    def load_flagged_persons(self):
+        """Load flagged persons from MongoDB on startup"""
+        try:
+            docs = self.db_manager.db.flagged_persons.find({})
+            for doc in docs:
+                face_id = doc.get("face_id")
+                if face_id:
+                    self._flagged_faces[face_id] = {
+                        "face_id": face_id,
+                        "flagged_at": doc.get("flagged_at", ""),
+                        "event_ids": doc.get("event_ids", []),
+                        "alert_ids": doc.get("alert_ids", []),
+                        "incident_count": doc.get("incident_count", 0),
+                        "last_seen": doc.get("last_seen", ""),
+                    }
+            logger.info(f"📋 Loaded {len(self._flagged_faces)} flagged persons from database")
+        except Exception as e:
+            logger.warning(f"Could not load flagged persons: {e}")
+    
+    # ========================================
+    # Internal Helpers
+    # ========================================
+    
+    def _is_on_cooldown(self, camera_id: str, detection_class: str, severity: str) -> bool:
+        """Check if a detection is within cooldown period"""
+        key = (camera_id, detection_class)
+        last_time = self._cooldown_tracker.get(key, 0)
+        cooldown = self._cooldown_seconds.get(severity, 15)
+        return (time.time() - last_time) < cooldown
+    
+    def _get_min_confidence(self, detection_class: str) -> float:
+        """Get minimum confidence threshold for alerting"""
+        thresholds = {
+            "fire": 0.65,
+            "gun": 0.60,
+            "knife": 0.60,
+            "fighting": 0.55,
+            "road_accident": 0.50,
+            "wallclimb": 0.50,
+            "suspicious_reappearance": 0.55,
+        }
+        return thresholds.get(detection_class, 0.50)
+    
+    def _save_frame_snapshot(self, camera_id: str, alert_id: str, frame) -> Optional[str]:
+        """Save alert frame snapshot to MinIO"""
+        try:
+            import cv2
+            from io import BytesIO
+            
+            # Encode frame
+            is_success, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
+            if not is_success:
+                return None
+            
+            frame_bytes = buffer.tobytes()
+            timestamp_str = datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
+            object_name = f"alerts/{camera_id}/{alert_id}_{timestamp_str}.jpg"
+            bucket = self.db_manager.config.minio_keyframe_bucket
+            
+            frame_buffer = BytesIO(frame_bytes)
+            self.minio_client.put_object(
+                bucket,
+                object_name,
+                frame_buffer,
+                length=len(frame_bytes),
+                content_type="image/jpeg",
+                metadata={"alert_id": alert_id, "camera_id": camera_id}
+            )
+            
+            return f"{bucket}/{object_name}"
+            
+        except Exception as e:
+            logger.warning(f"Failed to save alert snapshot: {e}")
+            return None
+    
+    def _get_snapshot_url(self, snapshot_path: str) -> Optional[str]:
+        """Generate presigned URL for alert snapshot"""
+        try:
+            parts = snapshot_path.split("/", 1)
+            if len(parts) != 2:
+                return None
+            bucket, object_name = parts
+            url = self.minio_client.presigned_get_object(
+                bucket, object_name, expires=timedelta(hours=2)
+            )
+            return url
+        except Exception as e:
+            logger.warning(f"Failed to generate snapshot URL: {e}")
+            return None
+    
+    def _persist_alert(self, alert: RealTimeAlert):
+        """Persist alert to MongoDB"""
+        try:
+            doc = alert.to_dict()
+            doc["created_at"] = datetime.utcnow()
+            self.alerts_collection.insert_one(doc)
+            logger.debug(f"Persisted alert to MongoDB: {alert.alert_id}")
+        except Exception as e:
+            logger.error(f"Failed to persist alert: {e}")
+    
+    def _update_alert_in_db(self, alert_id: str, update_data: Dict):
+        """Update alert in MongoDB"""
+        try:
+            update_data["updated_at"] = datetime.utcnow()
+            self.alerts_collection.update_one(
+                {"alert_id": alert_id},
+                {"$set": update_data}
+            )
+        except Exception as e:
+            logger.error(f"Failed to update alert in DB: {e}")
+    
+    def _update_linked_event(self, event_id: str, is_verified: bool, is_false_positive: bool):
+        """Update the linked event in the main event collection"""
+        try:
+            self.db_manager.db.event.update_one(
+                {"event_id": event_id},
+                {"$set": {
+                    "is_verified": is_verified,
+                    "is_false_positive": is_false_positive,
+                    "verified_at": datetime.utcnow(),
+                }}
+            )
+        except Exception as e:
+            logger.error(f"Failed to update linked event: {e}")
+    
+    def _load_alert_from_db(self, alert_id: str) -> Optional[RealTimeAlert]:
+        """Load alert from MongoDB"""
+        try:
+            doc = self.alerts_collection.find_one({"alert_id": alert_id})
+            if doc:
+                # Remove MongoDB _id field
+                doc.pop("_id", None)
+                doc.pop("created_at", None)
+                doc.pop("updated_at", None)
+                return RealTimeAlert(**{k: v for k, v in doc.items() if k in RealTimeAlert.__dataclass_fields__})
+        except Exception as e:
+            logger.error(f"Failed to load alert from DB: {e}")
+        return None
+
+
+# ========================================
+# Module-level convenience functions
+# ========================================
+
+def get_alert_engine() -> RealTimeAlertEngine:
+    """Get the singleton alert engine instance"""
+    return RealTimeAlertEngine()
+
+
+def process_live_detection(camera_id: str, detection_class: str, confidence: float, 
+                           frame=None, **kwargs) -> Optional[RealTimeAlert]:
+    """Convenience function to process a detection and potentially generate an alert"""
+    engine = get_alert_engine()
+    return engine.process_detection(
+        camera_id=camera_id,
+        detection_class=detection_class,
+        confidence=confidence,
+        frame=frame,
+        **kwargs
+    )
diff --git a/report_generation/README.md b/report_generation/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5ab20dd60c10123173bfb43faa14ee19962ad6b9
--- /dev/null
+++ b/report_generation/README.md
@@ -0,0 +1,290 @@
+# DetectifAI Report Generation Module
+
+Automatically generates professional forensic incident reports from detected surveillance events using a local LLM.
+
+## 📋 Overview
+
+This module takes already-processed event data (detections, timestamps, captions, keyframes) and uses a local instruction-tuned LLM to generate structured, professional reports exportable as PDF or HTML.
+
+### Key Features
+
+- **Offline Operation**: Uses local LLM (Qwen2.5-3B-Instruct or Phi-3-mini)
+- **Deterministic Output**: No hallucinations - only uses provided data
+- **Professional Reports**: Structured Markdown converted to PDF/HTML
+- **Evidence Integration**: Embeds keyframes and face crops
+- **Zero Cloud Dependencies**: Everything runs locally
+
+## 🛠️ Prerequisites
+
+### System Requirements
+
+| Component | Minimum | Recommended |
+|-----------|---------|-------------|
+| RAM | 8 GB | 16 GB |
+| CPU | 4 cores | 8+ cores |
+| Disk Space | 5 GB | 10 GB |
+| GPU (optional) | None | NVIDIA with CUDA |
+
+### Software Requirements
+
+1. **Python 3.9+** (already installed for DetectifAI)
+2. **GTK3 Runtime** (for WeasyPrint PDF export on Windows)
+
+## 📦 Installation
+
+### Step 1: Install Python Dependencies
+
+```bash
+# Navigate to backend directory
+cd backend
+
+# Install report generation dependencies
+pip install llama-cpp-python huggingface_hub jinja2 markdown weasyprint reportlab Pillow
+```
+
+**Note for GPU acceleration (optional):**
+```bash
+# For NVIDIA CUDA support
+CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python --force-reinstall --no-cache-dir
+
+# For Windows with CUDA
+set CMAKE_ARGS=-DLLAMA_CUBLAS=on
+pip install llama-cpp-python --force-reinstall --no-cache-dir
+```
+
+### Step 2: Install GTK3 (Required for PDF Export on Windows)
+
+WeasyPrint requires GTK3 runtime. Choose one method:
+
+**Option A: Using MSYS2 (Recommended)**
+1. Download MSYS2 from: https://www.msys2.org/
+2. Install and open MSYS2 terminal
+3. Run: `pacman -S mingw-w64-x86_64-gtk3`
+4. Add to PATH: `C:\msys64\mingw64\bin`
+
+**Option B: Standalone GTK3**
+1. Download from: https://github.com/nicothin/MSYS2-GTK-Windows
+2. Extract to `C:\GTK3`
+3. Add `C:\GTK3\bin` to system PATH
+
+**Option C: Skip PDF (Use HTML only)**
+- If GTK3 is problematic, use HTML export or the `SimplePDFExporter` (reportlab-based)
+
+### Step 3: Download the LLM Model
+
+The module will auto-download on first use, but you can pre-download:
+
+```bash
+# Run the download script
+python -c "from report_generation.llm_engine import LLMEngine; e = LLMEngine(); e.download_model()"
+```
+
+**Or manually download:**
+
+| Model | Size | License | Download |
+|-------|------|---------|----------|
+| **Qwen2.5-3B-Instruct** (Primary) | ~2 GB | Apache 2.0 | [HuggingFace](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GGUF) |
+| **Phi-3-mini-4k-instruct** (Alt) | ~2.3 GB | MIT | [HuggingFace](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf) |
+
+Download the `q4_k_m.gguf` quantized version and place in:
+```
+backend/report_generation/models/qwen2.5-3b-instruct-q4_k_m.gguf
+```
+
+## 🚀 Usage
+
+### Basic Usage
+
+```python
+from report_generation import ReportGenerator
+
+# Initialize generator
+generator = ReportGenerator()
+
+# Generate report for a video
+report = generator.generate_report(
+    video_id="video_20240101_120000_abc123"
+)
+
+# Export as HTML
+html_path = generator.export_html(report)
+print(f"HTML report: {html_path}")
+
+# Export as PDF
+pdf_path = generator.export_pdf(report)
+print(f"PDF report: {pdf_path}")
+```
+
+### With Time Range Filter
+
+```python
+from datetime import datetime
+
+report = generator.generate_report(
+    video_id="video_20240101_120000_abc123",
+    time_range=(
+        datetime(2024, 1, 1, 12, 0, 0),
+        datetime(2024, 1, 1, 13, 0, 0)
+    )
+)
+```
+
+### Selective Sections
+
+```python
+# Generate only specific sections
+report = generator.generate_report(
+    video_id="video_123",
+    include_sections=['header', 'executive_summary', 'timeline', 'conclusion']
+)
+```
+
+### Without LLM (Fallback Mode)
+
+If the LLM fails to load, the module automatically uses fallback templates:
+
+```python
+from report_generation.config import ReportConfig
+
+# Explicitly disable LLM
+config = ReportConfig()
+config.llm.n_gpu_layers = 0  # CPU only
+config.llm.n_threads = 2     # Reduce for slow systems
+
+generator = ReportGenerator(config)
+```
+
+## 📁 Module Structure
+
+```
+report_generation/
+├── __init__.py              # Package initialization
+├── config.py                # Configuration (LLM, paths, settings)
+├── llm_engine.py            # LLM loading and inference
+├── prompt_templates.py      # Prompt engineering templates
+├── data_collector.py        # MongoDB data collection
+├── report_builder.py        # Main orchestration
+├── html_renderer.py         # Jinja2 HTML generation
+├── pdf_exporter.py          # WeasyPrint/reportlab PDF export
+├── templates/               # HTML/CSS templates
+│   ├── report_base.html
+│   └── report_styles.css
+├── models/                  # LLM model files (.gitignored)
+│   └── .gitkeep
+└── README.md
+```
+
+## 📊 Report Sections
+
+| Section | Description | LLM Generated |
+|---------|-------------|---------------|
+| **Header** | Report ID, metadata, video info | ❌ |
+| **Executive Summary** | Overview of findings | ✅ |
+| **Incident Timeline** | Chronological event list | ✅ |
+| **Evidence Catalog** | Keyframes and face crops | ✅ |
+| **Observations** | Pattern analysis | ✅ |
+| **Conclusion** | Summary and recommendations | ✅ |
+
+## ⚙️ Configuration
+
+Edit `config.py` or pass custom config:
+
+```python
+from report_generation.config import ReportConfig, LLMConfig
+
+# Custom LLM settings
+llm_config = LLMConfig(
+    n_threads=8,           # More CPU threads
+    n_gpu_layers=35,       # Offload to GPU
+    temperature=0.1,       # Low for determinism
+    max_tokens=2048        # Max output length
+)
+
+config = ReportConfig(llm=llm_config)
+config.organization_name = "My Security Company"
+config.report_classification = "INTERNAL"
+
+generator = ReportGenerator(config)
+```
+
+## 🔧 Troubleshooting
+
+### LLM Won't Load
+
+```
+Error: Model not found
+```
+**Solution:** Download the model manually or check path in `config.py`
+
+### PDF Export Fails on Windows
+
+```
+OSError: cannot load library 'gobject-2.0-0'
+```
+**Solution:** Install GTK3 runtime and add to PATH (see Step 2)
+
+### Out of Memory
+
+```
+RuntimeError: CUDA out of memory
+```
+**Solutions:**
+- Set `n_gpu_layers=0` for CPU-only
+- Use smaller context: `n_ctx=2048`
+- Close other applications
+
+### Slow Generation
+
+**Solutions:**
+- Increase `n_threads` (up to CPU core count)
+- Enable GPU with `n_gpu_layers > 0`
+- Use smaller model (Phi-3 instead of Qwen)
+
+## 📝 API Reference
+
+### ReportGenerator
+
+```python
+class ReportGenerator:
+    def __init__(config: ReportConfig = None)
+    def initialize() -> bool
+    def generate_report(
+        video_id: str,
+        time_range: Tuple[datetime, datetime] = None,
+        include_sections: List[str] = None
+    ) -> GeneratedReport
+    def export_html(report: GeneratedReport, output_path: str = None) -> str
+    def export_pdf(report: GeneratedReport, output_path: str = None) -> str
+```
+
+### GeneratedReport
+
+```python
+@dataclass
+class GeneratedReport:
+    report_id: str
+    video_id: str
+    title: str
+    generated_at: datetime
+    time_range: Tuple[datetime, datetime]
+    sections: List[ReportSection]
+    metadata: Dict[str, Any]
+    statistics: Dict[str, Any]
+```
+
+## 🔒 Security Notes
+
+1. **Local Processing**: All data stays on your machine
+2. **No Cloud Calls**: LLM runs entirely offline
+3. **Fact-Based**: Reports only contain provided data
+4. **Confidential Marking**: Reports are marked CONFIDENTIAL by default
+
+## 📄 License
+
+This module is part of DetectifAI and follows the project license.
+
+The recommended LLM models have the following licenses:
+- **Qwen2.5**: Apache 2.0
+- **Phi-3**: MIT
+
+Both are free for commercial use.
diff --git a/report_generation/__init__.py b/report_generation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..090028fa65c7b425a57a66f8b65083b354976055
--- /dev/null
+++ b/report_generation/__init__.py
@@ -0,0 +1,34 @@
+"""
+DetectifAI Report Generation Module
+
+Automatically generates professional forensic incident reports from 
+detected surveillance events using a local LLM.
+
+Features:
+- Offline operation with local LLM (Qwen2.5-3B-Instruct)
+- Structured report generation (Markdown/JSON)
+- PDF and HTML export
+- Evidence image embedding
+- Deterministic, fact-based output (no hallucinations)
+
+Usage:
+    from report_generation import ReportGenerator
+    
+    generator = ReportGenerator()
+    report = generator.generate_report(
+        video_id="video_20240101_120000_abc123",
+        time_range=("2024-01-01 12:00:00", "2024-01-01 13:00:00")
+    )
+    
+    # Export as PDF
+    generator.export_pdf(report, "incident_report.pdf")
+    
+    # Export as HTML
+    generator.export_html(report, "incident_report.html")
+"""
+
+from .report_builder import ReportGenerator
+from .config import ReportConfig
+
+__all__ = ['ReportGenerator', 'ReportConfig']
+__version__ = '1.0.0'
diff --git a/report_generation/config.py b/report_generation/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f25856b23f750757ffd477e3c928493da6f9546
--- /dev/null
+++ b/report_generation/config.py
@@ -0,0 +1,133 @@
+"""
+Report Generation Configuration
+
+Defines all configuration parameters for the report generation module,
+including LLM settings, paths, and report formatting options.
+"""
+
+import os
+from dataclasses import dataclass, field
+from typing import Optional, List, Dict
+from pathlib import Path
+
+
+@dataclass
+class LLMConfig:
+    """Configuration for the local LLM engine."""
+    
+    # Model selection - Qwen2.5-3B-Instruct recommended for speed + quality balance
+    # Alternative: Phi-3-mini-4k-instruct (MIT license, slightly larger)
+    model_name: str = "qwen2.5-3b-instruct-q4_k_m.gguf"
+    
+    # HuggingFace repo for downloading
+    hf_repo: str = "Qwen/Qwen2.5-3B-Instruct-GGUF"
+    hf_filename: str = "qwen2.5-3b-instruct-q4_k_m.gguf"  # ~2GB quantized
+    
+    # Alternative model (MIT license, more permissive)
+    alt_model_name: str = "Phi-3-mini-4k-instruct-q4.gguf"
+    alt_hf_repo: str = "microsoft/Phi-3-mini-4k-instruct-gguf"
+    alt_hf_filename: str = "Phi-3-mini-4k-instruct-q4.gguf"  # ~2.3GB
+    
+    # Local model path
+    models_dir: str = field(default_factory=lambda: os.path.join(
+        os.path.dirname(__file__), 'models'
+    ))
+    
+    # LLM inference parameters
+    n_ctx: int = 4096  # Context window size
+    n_threads: int = 4  # CPU threads (adjust based on your system)
+    n_gpu_layers: int = 0  # Set > 0 if you have GPU with CUDA
+    temperature: float = 0.3  # Slightly higher for faster generation
+    top_p: float = 0.9
+    max_tokens: int = 512  # Reduced for faster generation
+    repeat_penalty: float = 1.1
+    
+    # Timeout settings
+    timeout_seconds: int = 60  # Max time for LLM generation
+    
+    @property
+    def model_path(self) -> str:
+        """Get full path to model file."""
+        return os.path.join(self.models_dir, self.model_name)
+    
+    @property
+    def alt_model_path(self) -> str:
+        """Get full path to alternative model file."""
+        return os.path.join(self.models_dir, self.alt_model_name)
+
+
+@dataclass
+class ReportConfig:
+    """Configuration for report generation and export."""
+    
+    # LLM configuration
+    llm: LLMConfig = field(default_factory=LLMConfig)
+    
+    # Report output settings
+    output_dir: str = field(default_factory=lambda: os.path.join(
+        os.path.dirname(os.path.dirname(__file__)), 'video_processing_outputs', 'reports'
+    ))
+    
+    # Template paths
+    templates_dir: str = field(default_factory=lambda: os.path.join(
+        os.path.dirname(__file__), 'templates'
+    ))
+    prompts_dir: str = field(default_factory=lambda: os.path.join(
+        os.path.dirname(__file__), 'prompts'
+    ))
+    
+    # Report content settings
+    include_executive_summary: bool = True
+    include_timeline: bool = True
+    include_evidence_images: bool = True
+    include_observations: bool = True
+    include_face_crops: bool = True
+    max_images_per_event: int = 3
+    max_events_in_report: int = 50
+    
+    # Image settings
+    thumbnail_width: int = 400
+    thumbnail_quality: int = 85
+    
+    # PDF settings
+    pdf_page_size: str = "A4"
+    pdf_margin_mm: int = 20
+    
+    # Report metadata
+    organization_name: str = "DetectifAI Security System"
+    report_classification: str = "CONFIDENTIAL"
+    
+    # MongoDB connection (uses existing DetectifAI config)
+    use_database: bool = True
+    
+    def __post_init__(self):
+        """Create necessary directories."""
+        os.makedirs(self.output_dir, exist_ok=True)
+        os.makedirs(self.llm.models_dir, exist_ok=True)
+        os.makedirs(self.templates_dir, exist_ok=True)
+        os.makedirs(self.prompts_dir, exist_ok=True)
+
+
+# Default configuration instance
+default_config = ReportConfig()
+
+
+def get_report_config(**kwargs) -> ReportConfig:
+    """
+    Get report configuration with optional overrides.
+    
+    Args:
+        **kwargs: Override any config parameter
+        
+    Returns:
+        ReportConfig instance
+    """
+    config = ReportConfig()
+    
+    for key, value in kwargs.items():
+        if hasattr(config, key):
+            setattr(config, key, value)
+        elif hasattr(config.llm, key):
+            setattr(config.llm, key, value)
+    
+    return config
diff --git a/report_generation/data_collector.py b/report_generation/data_collector.py
new file mode 100644
index 0000000000000000000000000000000000000000..29ac16350b70a11180f7072d00e8bd8f6226cca0
--- /dev/null
+++ b/report_generation/data_collector.py
@@ -0,0 +1,600 @@
+"""
+Data Collector Module
+
+Gathers all required data from MongoDB and file system for report generation:
+- Events (object detection, behavior analysis)
+- Keyframes and their captions
+- Face detections and crops
+- Video metadata
+- Processing statistics
+"""
+
+import os
+import logging
+from datetime import datetime
+from typing import Optional, Dict, Any, List, Tuple
+from pathlib import Path
+from datetime import timedelta
+
+try:
+    from ..database.config import DatabaseManager, get_presigned_url
+except ImportError:
+    # Fallback for when running directly or in different context
+    import sys
+    sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))
+    from backend.database.config import DatabaseManager, get_presigned_url
+
+from .config import ReportConfig
+
+logger = logging.getLogger(__name__)
+
+
+class DataCollector:
+    """
+    Collects all data required for report generation from
+    MongoDB and the file system.
+    """
+    
+    def __init__(self, config: Optional[ReportConfig] = None):
+        """
+        Initialize the data collector.
+        
+        Args:
+            config: Report configuration (uses default if None)
+        """
+        self.config = config or ReportConfig()
+        self.db = None
+        self.db_manager = None
+        self._connect_database()
+    
+    def _connect_database(self):
+        """Connect to MongoDB."""
+        if not self.config.use_database:
+            logger.warning("Database disabled in config")
+            return
+        
+        try:
+            self.db_manager = DatabaseManager()
+            self.db = self.db_manager.db
+            logger.info("✅ Connected to MongoDB via DatabaseManager")
+            
+            # Ensure MinIO is connected
+            try:
+                if self.db_manager.minio_client:
+                    logger.info("✅ MinIO client available for Report generation")
+            except Exception as e:
+                logger.warning(f"⚠️ MinIO client not available: {e}")
+                
+        except Exception as e:
+            logger.error(f"Failed to connect to database: {e}")
+            self.db = None
+    
+    def collect_video_metadata(self, video_id: str) -> Dict[str, Any]:
+        """
+        Collect metadata for a video.
+        
+        Args:
+            video_id: Video identifier
+            
+        Returns:
+            Video metadata dictionary
+        """
+        metadata = {
+            'video_id': video_id,
+            'camera_id': 'Unknown',
+            'location': 'Not specified',
+            'fps': 0,
+            'duration': 0,
+            'resolution': 'Unknown',
+            'upload_time': None,
+            'processed_time': None
+        }
+        
+        if self.db is None:
+            return metadata
+        
+        try:
+            # Try video_file collection (DetectifAI schema) first, then video_metadata
+            video_doc = self.db.video_file.find_one({'video_id': video_id})
+            if not video_doc:
+                video_doc = self.db.video_metadata.find_one({'video_id': video_id})
+            
+                if self.db_manager and self.db_manager.minio_client:
+                    # Try to generate presigned URL for the video
+                    try:
+                        # Determine bucket and key
+                        bucket = video_doc.get('minio_bucket') or self.db_manager.config.minio_video_bucket
+                        key = video_doc.get('minio_object_key')
+                        if not key and video_doc.get('video_id'):
+                            # Try constructing standard key if not saved
+                            key = f"original/{video_doc.get('video_id')}/video.mp4"
+                        
+                        if key:
+                            url = get_presigned_url(
+                                self.db_manager.minio_client, 
+                                bucket, 
+                                key, 
+                                expires=timedelta(hours=24) # 24 hour validity for reports
+                            )
+                            if url:
+                                metadata['video_url'] = url
+                                logger.info(f"Generated presigned URL for video {video_id}")
+                    except Exception as e:
+                        logger.warning(f"Failed to generate video URL: {e}")
+
+            if video_doc:
+                 # video_file has duration_secs, fps in meta_data; video_metadata has duration, fps directly
+                duration = video_doc.get('duration_secs') or video_doc.get('duration', 0)
+                fps = video_doc.get('fps') or (video_doc.get('meta_data') or {}).get('fps', 0)
+                res = (video_doc.get('meta_data') or {}).get('resolution') or f"{video_doc.get('width', 0)}x{video_doc.get('height', 0)}"
+                metadata.update({
+                    'camera_id': video_doc.get('camera_id', 'Unknown'),
+                    'location': video_doc.get('location', 'Not specified'),
+                    'fps': float(fps) if fps else 0,
+                    'duration': float(duration) if duration else 0,
+                    'resolution': str(res) if res else 'Unknown',
+                    'upload_time': video_doc.get('upload_date') or video_doc.get('upload_time'),
+                    'processed_time': video_doc.get('processed_time'),
+                    'original_filename': video_doc.get('original_filename', video_doc.get('filename', 'Unknown'))
+                })
+            
+        except Exception as e:
+            logger.error(f"Error collecting video metadata: {e}")
+        
+        return metadata
+    
+    def collect_events(
+        self,
+        video_id: str,
+        time_range: Optional[Tuple[datetime, datetime]] = None,
+        event_types: Optional[List[str]] = None,
+        min_threat_level: Optional[str] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Collect events for a video within optional time range.
+        
+        Args:
+            video_id: Video identifier
+            time_range: Optional (start, end) datetime tuple
+            event_types: Optional filter for event types
+            min_threat_level: Minimum threat level to include
+            
+        Returns:
+            List of event dictionaries
+        """
+        events = []
+        
+        if self.db is None:
+            return events
+        
+        try:
+            # Build query - use event collection (DetectifAI schema: event_id, video_id, start_timestamp_ms, event_type)
+            query = {'video_id': video_id}
+            
+            if time_range:
+                start_ms = int(time_range[0].timestamp() * 1000) if hasattr(time_range[0], 'timestamp') else 0
+                end_ms = int(time_range[1].timestamp() * 1000) if hasattr(time_range[1], 'timestamp') else 0
+                query['start_timestamp_ms'] = {'$gte': start_ms, '$lte': end_ms}
+            
+            if event_types:
+                query['event_type'] = {'$in': event_types}
+            
+            # Query event collection (not canonical_events)
+            cursor = self.db.event.find(query).sort('start_timestamp_ms', 1)
+            
+            threat_order = {'critical': 4, 'high': 3, 'medium': 2, 'low': 1}
+            min_level = threat_order.get(min_threat_level, 0) if min_threat_level else 0
+            
+            for doc in cursor:
+                start_ms = doc.get('start_timestamp_ms', 0)
+                ts = datetime.utcfromtimestamp(start_ms / 1000.0) if start_ms else None
+                desc = doc.get('description', '')
+                if not desc and doc.get('event_type'):
+                    desc = f"Event: {doc.get('event_type', 'unknown')}"
+                event = {
+                    'event_id': str(doc.get('event_id', doc.get('_id', ''))),
+                    'event_type': doc.get('event_type', 'unknown'),
+                    'timestamp': ts,
+                    'frame_number': 0,
+                    'threat_level': 'medium',
+                    'confidence': float(doc.get('confidence_score', 0)),
+                    'caption': desc,
+                    'description': desc,
+                    'keyframe_id': None,
+                    'keyframe_path': None,
+                    'detections': doc.get('bounding_boxes', []) if isinstance(doc.get('bounding_boxes'), list) else doc.get('bounding_boxes', {}).get('detections', []),
+                    'metadata': {}
+                }
+                
+                event_level = threat_order.get(event['threat_level'], 2)
+                if event_level >= min_level:
+                    events.append(event)
+            
+            logger.info(f"Collected {len(events)} events for video {video_id}")
+            
+        except Exception as e:
+            logger.error(f"Error collecting events: {e}")
+        
+        return events
+    
+    def collect_keyframes(
+        self,
+        video_id: str,
+        event_ids: Optional[List[str]] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Collect keyframes and their captions.
+        
+        Args:
+            video_id: Video identifier
+            event_ids: Optional list of event IDs to filter by
+            
+        Returns:
+            List of keyframe dictionaries
+        """
+        keyframes = []
+        
+        if self.db is None:
+            return keyframes
+        
+        try:
+            query = {'video_id': video_id}
+            
+            if event_ids:
+                query['event_id'] = {'$in': event_ids}
+            
+            # Sort by frame_number or created_at (DetectifAI keyframes use frame_number, created_at)
+            cursor = self.db.keyframes.find(query).sort('frame_number', 1)
+            for doc in cursor:
+                ts = doc.get('timestamp') or doc.get('created_at')
+                if isinstance(ts, (int, float)):
+                    ts = datetime.utcfromtimestamp(ts)
+                frame_num = doc.get('frame_number') or doc.get('frame_index', 0)
+                image_path = doc.get('image_path') or doc.get('minio_path', '')
+                # Generate MinIO URL if available
+                image_url = None
+                if self.db_manager and self.db_manager.minio_client:
+                    minio_path = doc.get('minio_path')
+                    bucket = doc.get('bucket') or doc.get('minio_bucket') or 'detectifai-keyframes'
+                    
+                    if minio_path:
+                        image_url = get_presigned_url(
+                            self.db_manager.minio_client,
+                            bucket,
+                            minio_path,
+                            expires=timedelta(hours=24)
+                        )
+                
+                keyframe = {
+                    'keyframe_id': str(doc.get('_id', doc.get('keyframe_id', ''))),
+                    'video_id': doc.get('video_id'),
+                    'timestamp': ts,
+                    'frame_number': int(frame_num) if frame_num is not None else 0,
+                    'caption': doc.get('caption', ''),
+                    'image_path': image_path,
+                    'image_url': image_url,  # Add URL
+                    'bucket': bucket,
+                    'minio_path': doc.get('minio_path'),
+                    'event_id': doc.get('event_id'),
+                    'detections': doc.get('objects_detected', doc.get('detections', []))
+                }
+                keyframes.append(keyframe)
+            
+            logger.info(f"Collected {len(keyframes)} keyframes for video {video_id}")
+            
+        except Exception as e:
+            logger.error(f"Error collecting keyframes: {e}")
+        
+        return keyframes
+    
+    def collect_face_detections(
+        self,
+        video_id: str,
+        include_crops: bool = True
+    ) -> List[Dict[str, Any]]:
+        """
+        Collect face detections and optionally their crop paths.
+        
+        Args:
+            video_id: Video identifier
+            include_crops: Whether to include crop file paths
+            
+        Returns:
+            List of face detection dictionaries
+        """
+        faces = []
+        
+        if self.db is None:
+            return faces
+        
+        try:
+            query = {'video_id': video_id}
+            # DetectifAI uses detected_faces collection
+            coll = self.db.detected_faces
+            cursor = coll.find(query).sort('timestamp', 1)
+            
+            for doc in cursor:
+                # Generate MinIO URL for face crop if available
+                crop_url = None
+                minio_path = doc.get('minio_object_key') or doc.get('face_image_path')
+                # If face_image_path is a path but not a minio key, we might need to be careful
+                # DetectifAI usually stores minio path in face_image_path if uploaded
+                
+                if self.db_manager and self.db_manager.minio_client and minio_path and not os.path.isabs(minio_path):
+                     bucket = doc.get('minio_bucket') or 'detectifai-keyframes' # Faces often in keyframes bucket in subdir
+                     crop_url = get_presigned_url(
+                        self.db_manager.minio_client,
+                        bucket,
+                        minio_path,
+                        expires=timedelta(hours=24)
+                     )
+
+                face = {
+                    'face_id': str(doc.get('_id', doc.get('face_id', ''))),
+                    'video_id': doc.get('video_id'),
+                    'timestamp': datetime.utcfromtimestamp(doc.get('timestamp')) if isinstance(doc.get('timestamp'), (int, float)) else doc.get('timestamp'),
+                    'frame_number': doc.get('frame_number', 0),
+                    'confidence': doc.get('confidence', 0),
+                    'bbox': doc.get('bbox', {}),
+                    'person_id': doc.get('person_id'),
+                    'crop_path': doc.get('crop_path', '') if include_crops else None,
+                    'minio_path': minio_path,
+                    'crop_url': crop_url
+                }
+                faces.append(face)
+            
+            logger.info(f"Collected {len(faces)} face detections for video {video_id}")
+            
+        except Exception as e:
+            logger.error(f"Error collecting face detections: {e}")
+        
+        return faces
+    
+    def collect_captions(self, video_id: str) -> List[Dict[str, Any]]:
+        """
+        Collect video captions from the captioning module.
+        
+        Args:
+            video_id: Video identifier
+            
+        Returns:
+            List of caption dictionaries
+        """
+        captions = []
+        
+        if self.db is None:
+            return captions
+        
+        try:
+            # Try video_captions collection
+            cursor = self.db.video_captions.find({'video_id': video_id}).sort('timestamp', 1)
+            
+            for doc in cursor:
+                caption = {
+                    'caption_id': str(doc.get('_id', '')),
+                    'video_id': doc.get('video_id'),
+                    'timestamp': datetime.utcfromtimestamp(doc.get('timestamp')) if isinstance(doc.get('timestamp'), (int, float)) else doc.get('timestamp'),
+                    'frame_number': doc.get('frame_number', 0),
+                    'caption': doc.get('caption', ''),
+                    'keyframe_id': doc.get('keyframe_id'),
+                    'confidence': doc.get('confidence', 0)
+                }
+                captions.append(caption)
+            
+            logger.info(f"Collected {len(captions)} captions for video {video_id}")
+            
+        except Exception as e:
+            logger.error(f"Error collecting captions: {e}")
+        
+        return captions
+    
+    def collect_all_report_data(
+        self,
+        video_id: str,
+        time_range: Optional[Tuple[datetime, datetime]] = None
+    ) -> Dict[str, Any]:
+        """
+        Collect all data needed for report generation.
+        
+        Args:
+            video_id: Video identifier
+            time_range: Optional time range filter
+            
+        Returns:
+            Dictionary with all collected data
+        """
+        logger.info(f"Collecting all report data for video: {video_id}")
+        
+        # Collect all data types
+        metadata = self.collect_video_metadata(video_id)
+        events = self.collect_events(video_id, time_range)
+        keyframes = self.collect_keyframes(video_id)
+        faces = self.collect_face_detections(video_id)
+        captions = self.collect_captions(video_id)
+        
+        # Merge captions into keyframes where possible
+        caption_map = {c.get('keyframe_id'): c.get('caption') for c in captions if c.get('keyframe_id')}
+        for kf in keyframes:
+            if not kf.get('caption') and kf.get('keyframe_id') in caption_map:
+                kf['caption'] = caption_map[kf['keyframe_id']]
+        
+        # Compute statistics
+        threat_levels = {'critical': 0, 'high': 0, 'medium': 0, 'low': 0}
+        event_types = {}
+        
+        for event in events:
+            level = event.get('threat_level', 'low')
+            threat_levels[level] = threat_levels.get(level, 0) + 1
+            
+            etype = event.get('event_type', 'unknown')
+            event_types[etype] = event_types.get(etype, 0) + 1
+        
+        # Compute patterns for observations
+        patterns = self._compute_patterns(events, faces)
+        
+        # Determine time range from data if not specified
+        if not time_range and events:
+            timestamps = [e.get('timestamp') for e in events if e.get('timestamp')]
+            if timestamps:
+                time_range = (min(timestamps), max(timestamps))
+        
+        report_data = {
+            'video_id': video_id,
+            'metadata': metadata,
+            'events': events,
+            'keyframes': keyframes,
+            'faces': faces,
+            'captions': captions,
+            'statistics': {
+                'total_events': len(events),
+                'threat_levels': threat_levels,
+                'event_types': event_types,
+                'total_keyframes': len(keyframes),
+                'total_faces': len(faces),
+                'duration_minutes': metadata.get('duration', 0) / 60
+            },
+            'patterns': patterns,
+            'time_range': time_range,
+            'collection_time': datetime.utcnow()
+        }
+        
+        logger.info(f"Report data collection complete: {len(events)} events, "
+                   f"{len(keyframes)} keyframes, {len(faces)} faces")
+        
+        return report_data
+    
+    def _compute_patterns(
+        self,
+        events: List[Dict[str, Any]],
+        faces: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """
+        Compute patterns from events and faces for observations.
+        
+        Args:
+            events: List of events
+            faces: List of face detections
+            
+        Returns:
+            Dictionary of computed patterns
+        """
+        patterns = {
+            'time_clusters': [],
+            'escalation': None,
+            'repeated_faces': {},
+            'event_correlations': []
+        }
+        
+        # Count face appearances
+        face_counts = {}
+        for face in faces:
+            pid = face.get('person_id') or face.get('face_id', 'unknown')
+            face_counts[pid] = face_counts.get(pid, 0) + 1
+        
+        # Find repeated faces (appearing more than once)
+        patterns['repeated_faces'] = {
+            fid: count for fid, count in face_counts.items() if count > 1
+        }
+        
+        # Detect time clusters (events within 60 seconds of each other)
+        if events:
+            clusters = []
+            current_cluster = []
+            
+            sorted_events = sorted(
+                [e for e in events if e.get('timestamp')],
+                key=lambda x: x['timestamp']
+            )
+            
+            for event in sorted_events:
+                if not current_cluster:
+                    current_cluster = [event]
+                else:
+                    time_diff = (event['timestamp'] - current_cluster[-1]['timestamp']).total_seconds()
+                    if time_diff <= 60:
+                        current_cluster.append(event)
+                    else:
+                        if len(current_cluster) >= 2:
+                            clusters.append({
+                                'start': current_cluster[0]['timestamp'],
+                                'end': current_cluster[-1]['timestamp'],
+                                'event_count': len(current_cluster)
+                            })
+                        current_cluster = [event]
+            
+            # Don't forget last cluster
+            if len(current_cluster) >= 2:
+                clusters.append({
+                    'start': current_cluster[0]['timestamp'],
+                    'end': current_cluster[-1]['timestamp'],
+                    'event_count': len(current_cluster)
+                })
+            
+            patterns['time_clusters'] = clusters
+        
+        # Detect escalation (increasing threat levels over time)
+        if len(events) >= 3:
+            threat_order = {'low': 1, 'medium': 2, 'high': 3, 'critical': 4}
+            threat_sequence = [
+                threat_order.get(e.get('threat_level', 'low'), 1)
+                for e in sorted_events if e.get('threat_level')
+            ]
+            
+            if len(threat_sequence) >= 3:
+                # Check if generally increasing
+                increasing = sum(1 for i in range(len(threat_sequence)-1) 
+                               if threat_sequence[i+1] >= threat_sequence[i])
+                
+                if increasing / (len(threat_sequence) - 1) > 0.6:
+                    patterns['escalation'] = 'increasing'
+                elif increasing / (len(threat_sequence) - 1) < 0.4:
+                    patterns['escalation'] = 'decreasing'
+                else:
+                    patterns['escalation'] = 'stable'
+        
+        return patterns
+    
+    def get_image_path(
+        self,
+        image_id: str,
+        image_type: str = 'keyframe'
+    ) -> Optional[str]:
+        """
+        Get the file path for an image.
+        
+        Args:
+            image_id: Image identifier
+            image_type: 'keyframe' or 'face'
+            
+        Returns:
+            File path or None if not found
+        """
+        if self.db is None:
+            return None
+        
+        try:
+            if image_type == 'keyframe':
+                doc = self.db.keyframes.find_one({'keyframe_id': image_id})
+                if doc:
+                    # Prefer URL if available via minio link
+                    if self.db_manager and self.db_manager.minio_client and doc.get('minio_path'):
+                         bucket = doc.get('bucket') or doc.get('minio_bucket') or 'detectifai-keyframes'
+                         url = get_presigned_url(self.db_manager.minio_client, bucket, doc['minio_path'], timedelta(hours=24))
+                         if url:
+                             return url
+                    return doc.get('image_path')
+            elif image_type == 'face':
+                doc = self.db.detected_faces.find_one({'face_id': image_id})
+                if doc:
+                    # Prefer URL
+                    minio_path = doc.get('minio_object_key') or doc.get('face_image_path')
+                    if self.db_manager and self.db_manager.minio_client and minio_path and not os.path.isabs(minio_path):
+                         bucket = doc.get('minio_bucket') or 'detectifai-keyframes'
+                         url = get_presigned_url(self.db_manager.minio_client, bucket, minio_path, timedelta(hours=24))
+                         if url:
+                             return url
+                    return doc.get('crop_path')
+        except Exception as e:
+            logger.error(f"Error getting image path: {e}")
+        
+        return None
diff --git a/report_generation/html_renderer.py b/report_generation/html_renderer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d7c5cbe1997069a94d5f9bf2f0378220dca709d
--- /dev/null
+++ b/report_generation/html_renderer.py
@@ -0,0 +1,869 @@
+"""
+HTML Renderer for Report Generation
+
+Renders GeneratedReport objects to HTML using Jinja2 templates.
+Handles image embedding, Markdown conversion, and styling.
+"""
+
+import os
+import base64
+import logging
+import markdown
+from datetime import datetime
+from typing import Optional, Dict, Any
+from pathlib import Path
+
+from .config import ReportConfig
+
+logger = logging.getLogger(__name__)
+
+
+class HTMLRenderer:
+    """
+    Renders reports to HTML format using Jinja2 templates.
+    """
+    
+    def __init__(self, config: Optional[ReportConfig] = None):
+        """
+        Initialize the HTML renderer.
+        
+        Args:
+            config: Report configuration
+        """
+        self.config = config or ReportConfig()
+        self._setup_jinja()
+        self._setup_markdown()
+    
+    def _setup_jinja(self):
+        """Setup Jinja2 environment."""
+        try:
+            from jinja2 import Environment, FileSystemLoader, select_autoescape
+            
+            # Check if templates directory exists, create default template if not
+            templates_dir = self.config.templates_dir
+            if not os.path.exists(os.path.join(templates_dir, 'report_base.html')):
+                self._create_default_template()
+            
+            self.jinja_env = Environment(
+                loader=FileSystemLoader(templates_dir),
+                autoescape=select_autoescape(['html', 'xml'])
+            )
+            
+            # Add custom filters
+            self.jinja_env.filters['markdown'] = self._markdown_filter
+            self.jinja_env.filters['format_datetime'] = self._format_datetime
+            
+        except ImportError:
+            logger.error("Jinja2 not installed. Install with: pip install Jinja2")
+            raise
+    
+    def _setup_markdown(self):
+        """Setup Markdown processor."""
+        self.md = markdown.Markdown(
+            extensions=['tables', 'fenced_code', 'nl2br', 'toc'],
+            output_format='html5'
+        )
+    
+    def _markdown_filter(self, text: str) -> str:
+        """Jinja2 filter to convert Markdown to HTML."""
+        if not text:
+            return ''
+        self.md.reset()
+        return self.md.convert(text)
+    
+    def _format_datetime(self, dt, format_str: str = '%Y-%m-%d %H:%M:%S') -> str:
+        """Jinja2 filter to format datetime objects."""
+        if isinstance(dt, datetime):
+            return dt.strftime(format_str)
+        return str(dt)
+    
+    def _create_default_template(self):
+        """Create default HTML template if not exists."""
+        template_path = os.path.join(self.config.templates_dir, 'report_base.html')
+        css_path = os.path.join(self.config.templates_dir, 'report_styles.css')
+        
+        os.makedirs(self.config.templates_dir, exist_ok=True)
+        
+        # Default HTML template with improved structure
+        html_template = '''<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{{ report.title }}</title>
+    <style>
+        {% include 'report_styles.css' %}
+    </style>
+</head>
+<body>
+    <div class="report-container">
+        <!-- Report Header -->
+        <header class="report-header">
+            <div class="logo">
+                <h1>🛡️ DetectifAI</h1>
+                <p class="subtitle">AI-Powered Surveillance Analysis Report</p>
+            </div>
+            <div class="report-meta">
+                <span class="classification {{ report.metadata.classification|default('CONFIDENTIAL')|lower }}">
+                    {{ report.metadata.classification|default('CONFIDENTIAL') }}
+                </span>
+            </div>
+        </header>
+
+        <!-- Report Content -->
+        <main class="report-content">
+            {% for section in report.sections|sort(attribute='order') %}
+            <section class="report-section section-{{ section.name }}" id="section-{{ section.name }}">
+                <div class="section-content">
+                    {{ section.content|markdown|safe }}
+                </div>
+                
+                {% if section.images %}
+                <div class="evidence-gallery">
+                    <h3 class="gallery-title">Evidence Images</h3>
+                    <div class="gallery-grid">
+                        {% for img in section.images[:max_images] %}
+                        <figure class="evidence-item">
+                            {% if img.embedded_data %}
+                            <img src="data:image/jpeg;base64,{{ img.embedded_data }}" 
+                                 alt="{{ img.caption|default('Evidence image') }}"
+                                 class="evidence-image">
+                            {% elif img.path %}
+                            <img src="{{ img.path }}" 
+                                 alt="{{ img.caption|default('Evidence image') }}"
+                                 class="evidence-image">
+                            {% elif img.url %}
+                            <img src="{{ img.url }}" 
+                                 alt="{{ img.caption|default('Evidence image') }}"
+                                 class="evidence-image">
+                            {% else %}
+                            <div class="image-placeholder">
+                                <span>📷 Image: {{ img.id }}</span>
+                            </div>
+                            {% endif %}
+                            <figcaption>{{ img.caption|default('Evidence ' + loop.index|string) }}</figcaption>
+                        </figure>
+                        {% endfor %}
+                    </div>
+                </div>
+                {% endif %}
+            </section>
+            <div class="section-divider"></div>
+            {% endfor %}
+        </main>
+
+        <!-- Report Footer -->
+        <footer class="report-footer">
+            <div class="footer-content">
+                <div class="footer-info">
+                    <p><strong>Report ID:</strong> {{ report.report_id }}</p>
+                    <p><strong>Generated:</strong> {{ report.generated_at|format_datetime }}</p>
+                </div>
+                <p class="disclaimer">
+                    ⚠️ This report was automatically generated by DetectifAI using AI analysis. 
+                    All findings should be verified by qualified security personnel before taking action.
+                </p>
+            </div>
+        </footer>
+    </div>
+</body>
+</html>'''
+
+        # Default CSS styles with improved readability
+        css_styles = '''/* DetectifAI Report Styles - Enhanced Readability */
+:root {
+    --primary-color: #1a365d;
+    --secondary-color: #2d3748;
+    --accent-color: #3182ce;
+    --danger-color: #e53e3e;
+    --warning-color: #dd6b20;
+    --success-color: #38a169;
+    --bg-color: #ffffff;
+    --text-color: #2d3748;
+    --border-color: #e2e8f0;
+    --section-bg: #f8fafc;
+}
+
+* {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
+}
+
+body {
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    font-size: 12pt;
+    line-height: 1.8;
+    color: var(--text-color);
+    background-color: #f7fafc;
+}
+
+.report-container {
+    max-width: 210mm;
+    margin: 20px auto;
+    background: var(--bg-color);
+    box-shadow: 0 4px 30px rgba(0,0,0,0.15);
+}
+
+/* Header Styles */
+.report-header {
+    background: linear-gradient(135deg, var(--primary-color) 0%, #2c5282 100%);
+    color: white;
+    padding: 40px 50px;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    border-bottom: 4px solid var(--accent-color);
+}
+
+.report-header .logo h1 {
+    font-size: 32pt;
+    margin-bottom: 8px;
+    font-weight: 700;
+}
+
+.report-header .subtitle {
+    font-size: 12pt;
+    opacity: 0.95;
+    font-weight: 300;
+}
+
+.classification {
+    padding: 10px 20px;
+    border-radius: 6px;
+    font-weight: bold;
+    text-transform: uppercase;
+    font-size: 10pt;
+    letter-spacing: 1px;
+}
+
+.classification.confidential {
+    background: var(--danger-color);
+}
+
+.classification.internal {
+    background: var(--warning-color);
+}
+
+.classification.public {
+    background: var(--success-color);
+}
+
+/* Content Styles */
+.report-content {
+    padding: 50px;
+}
+
+.report-section {
+    margin-bottom: 50px;
+    page-break-inside: avoid;
+}
+
+.section-content {
+    background: var(--section-bg);
+    padding: 30px;
+    border-radius: 8px;
+    border-left: 4px solid var(--accent-color);
+}
+
+.section-divider {
+    height: 2px;
+    background: linear-gradient(to right, transparent, var(--border-color), transparent);
+    margin: 40px 0;
+}
+
+h1, h2, h3, h4 {
+    color: var(--primary-color);
+    margin-top: 30px;
+    margin-bottom: 20px;
+    font-weight: 600;
+}
+
+h1 { 
+    font-size: 28pt; 
+    border-bottom: 3px solid var(--accent-color); 
+    padding-bottom: 15px;
+    margin-top: 0;
+}
+
+h2 { 
+    font-size: 20pt; 
+    border-bottom: 2px solid var(--border-color); 
+    padding-bottom: 12px;
+    margin-top: 0;
+}
+
+h3 { 
+    font-size: 16pt;
+    color: var(--secondary-color);
+}
+
+h4 { 
+    font-size: 13pt;
+    color: var(--secondary-color);
+}
+
+p {
+    margin-bottom: 16px;
+    text-align: justify;
+}
+
+strong {
+    color: var(--primary-color);
+    font-weight: 600;
+}
+
+/* Table Styles - Enhanced for Professional Reports */
+table {
+    width: 100%;
+    border-collapse: collapse;
+    margin: 25px 0;
+    font-size: 11pt;
+    box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+    border-radius: 8px;
+    overflow: hidden;
+    background: white;
+}
+
+thead {
+    background: var(--primary-color);
+    color: white;
+}
+
+th, td {
+    border: 1px solid var(--border-color);
+    padding: 14px 16px;
+    text-align: left;
+    vertical-align: top;
+}
+
+th {
+    background: var(--primary-color);
+    color: white;
+    font-weight: 600;
+    text-transform: uppercase;
+    font-size: 10pt;
+    letter-spacing: 0.5px;
+    position: sticky;
+    top: 0;
+    z-index: 10;
+}
+
+tbody tr:nth-child(odd) {
+    background: white;
+}
+
+tbody tr:nth-child(even) {
+    background: #f8fafc;
+}
+
+tbody tr:hover {
+    background: #edf2f7;
+    transition: background 0.2s ease;
+}
+
+td:first-child {
+    font-weight: 600;
+    color: var(--secondary-color);
+}
+
+/* Table caption */
+table caption {
+    caption-side: top;
+    padding: 10px;
+    font-weight: 600;
+    color: var(--primary-color);
+    text-align: left;
+    font-size: 12pt;
+}
+
+/* Evidence Gallery */
+.evidence-gallery {
+    margin: 30px 0;
+    padding: 25px;
+    background: white;
+    border-radius: 8px;
+    border: 1px solid var(--border-color);
+}
+
+.gallery-title {
+    font-size: 14pt;
+    color: var(--primary-color);
+    margin-bottom: 20px;
+    padding-bottom: 10px;
+    border-bottom: 2px solid var(--accent-color);
+}
+
+.gallery-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
+    gap: 25px;
+}
+
+.evidence-item {
+    border: 2px solid var(--border-color);
+    border-radius: 10px;
+    overflow: hidden;
+    background: #f8fafc;
+    transition: transform 0.2s ease, box-shadow 0.2s ease;
+}
+
+.evidence-item:hover {
+    transform: translateY(-4px);
+    box-shadow: 0 6px 20px rgba(0,0,0,0.15);
+}
+
+.evidence-image {
+    width: 100%;
+    height: 180px;
+    object-fit: cover;
+    border-bottom: 2px solid var(--border-color);
+}
+
+.image-placeholder {
+    width: 100%;
+    height: 180px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: linear-gradient(135deg, #edf2f7 0%, #e2e8f0 100%);
+    color: #718096;
+    font-size: 11pt;
+    border-bottom: 2px solid var(--border-color);
+}
+
+.evidence-item figcaption {
+    padding: 12px;
+    font-size: 10pt;
+    color: #4a5568;
+    background: white;
+    font-weight: 500;
+    text-align: center;
+    line-height: 1.4;
+    border-top: 1px solid var(--border-color);
+}
+
+.evidence-item figcaption::before {
+    content: "📷 ";
+    color: var(--accent-color);
+}
+
+/* Image counter for evidence */
+.evidence-gallery {
+    counter-reset: evidence-counter;
+}
+
+.evidence-item {
+    counter-increment: evidence-counter;
+}
+
+.evidence-item figcaption::before {
+    content: "Evidence #" counter(evidence-counter) ": ";
+    font-weight: 600;
+    color: var(--primary-color);
+    display: block;
+    margin-bottom: 4px;
+}
+
+/* Lists - Enhanced Formatting */
+ul, ol {
+    margin: 16px 0;
+    padding-left: 30px;
+}
+
+ul {
+    list-style-type: disc;
+}
+
+ul ul {
+    list-style-type: circle;
+    margin-top: 8px;
+}
+
+ol {
+    list-style-type: decimal;
+}
+
+ol ol {
+    list-style-type: lower-alpha;
+    margin-top: 8px;
+}
+
+li {
+    margin-bottom: 10px;
+    line-height: 1.6;
+    padding-left: 8px;
+}
+
+li::marker {
+    color: var(--accent-color);
+    font-weight: 600;
+}
+
+/* Blockquotes for important notes */
+blockquote {
+    margin: 20px 0;
+    padding: 20px 25px;
+    border-left: 4px solid var(--accent-color);
+    background: #f8fafc;
+    border-radius: 0 8px 8px 0;
+    font-style: italic;
+    color: var(--secondary-color);
+}
+
+blockquote p {
+    margin-bottom: 0;
+}
+
+blockquote strong {
+    font-style: normal;
+    color: var(--primary-color);
+}
+
+/* Code blocks */
+code {
+    background: #edf2f7;
+    padding: 3px 8px;
+    border-radius: 4px;
+    font-size: 10pt;
+    font-family: 'Courier New', monospace;
+    color: #c53030;
+}
+
+pre {
+    background: #2d3748;
+    color: #e2e8f0;
+    padding: 20px;
+    border-radius: 8px;
+    overflow-x: auto;
+    font-size: 10pt;
+    margin: 20px 0;
+    line-height: 1.4;
+}
+
+pre code {
+    background: transparent;
+    padding: 0;
+    color: inherit;
+}
+
+/* Definition Lists for Metadata */
+dl {
+    margin: 20px 0;
+    display: grid;
+    grid-template-columns: max-content auto;
+    gap: 12px 20px;
+}
+
+dt {
+    font-weight: 600;
+    color: var(--primary-color);
+    text-align: right;
+}
+
+dt::after {
+    content: ":";
+}
+
+dd {
+    margin: 0;
+    color: var(--text-color);
+}
+
+/* Horizontal spacing improvements */
+.section-content > *:first-child {
+    margin-top: 0;
+}
+
+.section-content > *:last-child {
+    margin-bottom: 0;
+}
+
+/* Footer Styles */
+.report-footer {
+    background: var(--section-bg);
+    padding: 30px 50px;
+    border-top: 3px solid var(--accent-color);
+    font-size: 10pt;
+    color: #718096;
+}
+
+.footer-content {
+    max-width: 100%;
+}
+
+.footer-info {
+    margin-bottom: 20px;
+    padding-bottom: 20px;
+    border-bottom: 1px solid var(--border-color);
+}
+
+.footer-info p {
+    margin-bottom: 8px;
+    text-align: left;
+}
+
+.disclaimer {
+    margin-top: 20px;
+    font-style: italic;
+    padding: 15px;
+    background: #fff3cd;
+    border-left: 4px solid var(--warning-color);
+    border-radius: 4px;
+    color: #856404;
+    text-align: left;
+}
+
+/* Print/PDF Styles - Professional Multi-Page Layout */
+@media print, (min-width: 0) {
+    body {
+        background: white;
+        font-size: 11pt;
+    }
+    
+    .report-container {
+        box-shadow: none;
+        max-width: 100%;
+        margin: 0;
+    }
+    
+    .report-header {
+        page-break-after: avoid;
+    }
+    
+    .report-section {
+        page-break-inside: avoid;
+        orphans: 3;
+        widows: 3;
+    }
+    
+    h1, h2, h3, h4, h5, h6 {
+        page-break-after: avoid;
+        page-break-inside: avoid;
+    }
+    
+    .section-divider {
+        page-break-before: avoid;
+        page-break-after: avoid;
+    }
+    
+    table {
+        page-break-inside: avoid;
+    }
+    
+    thead {
+        display: table-header-group;
+    }
+    
+    tfoot {
+        display: table-footer-group;
+    }
+    
+    .evidence-gallery {
+        page-break-inside: avoid;
+    }
+    
+    .evidence-item {
+        page-break-inside: avoid;
+    }
+    
+    /* Force proper page breaks for long sections */
+    .section-metadata,
+    .section-details,
+    .section-summary {
+        page-break-inside: avoid;
+    }
+    
+    /* Ensure images don't break awkwardly */
+    img {
+        max-width: 100%;
+        page-break-inside: avoid;
+    }
+    
+    a {
+        text-decoration: none;
+        color: var(--primary-color);
+    }
+    
+    /* Print URLs for important links */
+    a[href^="http"]::after {
+        content: " (" attr(href) ")";
+        font-size: 8pt;
+        color: #718096;
+    }
+}
+
+/* Threat Level Indicators */
+.threat-critical { 
+    color: #c53030; 
+    font-weight: bold;
+    background: #fff5f5;
+    padding: 2px 6px;
+    border-radius: 3px;
+}
+
+.threat-high { 
+    color: #dd6b20; 
+    font-weight: bold;
+    background: #fffaf0;
+    padding: 2px 6px;
+    border-radius: 3px;
+}
+
+.threat-medium { 
+    color: #d69e2e;
+    background: #fffff0;
+    padding: 2px 6px;
+    border-radius: 3px;
+}
+
+.threat-low { 
+    color: #38a169;
+    background: #f0fff4;
+    padding: 2px 6px;
+    border-radius: 3px;
+}
+
+/* Horizontal rules */
+hr {
+    border: none;
+    border-top: 2px solid var(--border-color);
+    margin: 30px 0;
+}'''
+
+        with open(template_path, 'w', encoding='utf-8') as f:
+            f.write(html_template)
+        
+        with open(css_path, 'w', encoding='utf-8') as f:
+            f.write(css_styles)
+        
+        logger.info(f"Created default templates at {self.config.templates_dir}")
+    
+    def _embed_images(self, report) -> None:
+        """
+        Embed images as base64 in the report.
+        
+        Args:
+            report: GeneratedReport object (modified in place)
+        """
+        for section in report.sections:
+            for img in section.images:
+                if img.get('path') and os.path.exists(img['path']):
+                    try:
+                        with open(img['path'], 'rb') as f:
+                            img_data = f.read()
+                        img['embedded_data'] = base64.b64encode(img_data).decode('utf-8')
+                    except Exception as e:
+                        logger.warning(f"Failed to embed image {img['path']}: {e}")
+    
+    def _process_image_placeholders(self, content: str, data_collector) -> str:
+        """
+        Replace image placeholders with actual image references.
+        
+        Args:
+            content: Report content with placeholders
+            data_collector: DataCollector instance
+            
+        Returns:
+            Processed content
+        """
+        import re
+        
+        # Replace [[IMAGE:id]] placeholders
+        def replace_image(match):
+            img_id = match.group(1)
+            path = data_collector.get_image_path(img_id, 'keyframe') if data_collector else None
+            # Check for URL or local path
+            if path and (path.startswith('http') or os.path.exists(path)):
+                return f'![Keyframe {img_id}]({path})'
+            return f'*[Image {img_id} not available]*'
+        
+        # Replace [[FACE:id]] placeholders
+        def replace_face(match):
+            face_id = match.group(1)
+            path = data_collector.get_image_path(face_id, 'face') if data_collector else None
+            if path and (path.startswith('http') or os.path.exists(path)):
+                return f'![Face {face_id}]({path})'
+            return f'*[Face {face_id} not available]*'
+        
+        content = re.sub(r'\[\[IMAGE:([^\]]+)\]\]', replace_image, content)
+        content = re.sub(r'\[\[FACE:([^\]]+)\]\]', replace_face, content)
+        
+        return content
+    
+    def _cleanup_remaining_placeholders(self, report) -> None:
+        """
+        Remove any remaining [[IMAGE:...]] and [[FACE:...]] placeholders
+        from all section content. These are replaced with italic 'not available'
+        messages so no raw placeholder text appears in the final report.
+        """
+        import re
+        for section in report.sections:
+            if not section.content:
+                continue
+            section.content = re.sub(
+                r'\[\[IMAGE:[^\]]+\]\]',
+                '*[Image not available]*',
+                section.content
+            )
+            section.content = re.sub(
+                r'\[\[FACE:[^\]]+\]\]',
+                '*[Face image not available]*',
+                section.content
+            )
+    
+    def render(
+        self,
+        report,
+        output_path: Optional[str] = None,
+        embed_images: bool = True
+    ) -> str:
+        """
+        Render report to HTML.
+        
+        Args:
+            report: GeneratedReport object
+            output_path: Output file path (auto-generated if None)
+            embed_images: Whether to embed images as base64
+            
+        Returns:
+            Path to generated HTML file
+        """
+        logger.info(f"Rendering HTML report: {report.report_id}")
+        
+        # Embed images if requested
+        if embed_images and self.config.include_evidence_images:
+            self._embed_images(report)
+        
+        # Clean up any remaining [[IMAGE:...]] and [[FACE:...]] placeholders in section content
+        self._cleanup_remaining_placeholders(report)
+        
+        # Generate output path if not provided
+        if not output_path:
+            output_path = os.path.join(
+                self.config.output_dir,
+                f"{report.report_id}.html"
+            )
+        
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        
+        # Render template
+        try:
+            template = self.jinja_env.get_template('report_base.html')
+            
+            html_content = template.render(
+                report=report,
+                config=self.config,
+                max_images=self.config.max_images_per_event * 10,
+                generated_at=datetime.utcnow()
+            )
+            
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(html_content)
+            
+            logger.info(f"✅ HTML report saved to: {output_path}")
+            return output_path
+            
+        except Exception as e:
+            logger.error(f"Error rendering HTML: {e}")
+            raise
diff --git a/report_generation/llm_engine.py b/report_generation/llm_engine.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbc14361a47c1c110f854b9c7412ceecf5de6d2c
--- /dev/null
+++ b/report_generation/llm_engine.py
@@ -0,0 +1,337 @@
+"""
+Local LLM Engine for Report Generation
+
+Handles loading and inference with local GGUF models using llama-cpp-python.
+Supports both Qwen2.5-3B-Instruct and Phi-3-mini as fallback.
+"""
+
+import os
+import logging
+from typing import Optional, Dict, Any, List
+from pathlib import Path
+
+from .config import ReportConfig, LLMConfig
+
+logger = logging.getLogger(__name__)
+
+
+class LLMEngine:
+    """
+    Local LLM engine using llama-cpp-python.
+    
+    Provides deterministic, instruction-following text generation
+    for structured report content.
+    """
+    
+    def __init__(self, config: Optional[ReportConfig] = None):
+        """
+        Initialize the LLM engine.
+        
+        Args:
+            config: Report configuration (uses default if None)
+        """
+        self.config = config or ReportConfig()
+        self.llm_config = self.config.llm
+        self.model = None
+        self._is_loaded = False
+        self._model_type = None  # 'qwen' or 'phi'
+        
+    @property
+    def is_loaded(self) -> bool:
+        """Check if model is loaded."""
+        return self._is_loaded
+    
+    def download_model(self, use_alternative: bool = False) -> str:
+        """
+        Download model from HuggingFace Hub.
+        
+        Args:
+            use_alternative: If True, download Phi-3 instead of Qwen
+            
+        Returns:
+            Path to downloaded model
+        """
+        try:
+            from huggingface_hub import hf_hub_download
+        except ImportError:
+            raise ImportError(
+                "huggingface_hub is required to download models. "
+                "Install with: pip install huggingface_hub"
+            )
+        
+        if use_alternative:
+            repo_id = self.llm_config.alt_hf_repo
+            filename = self.llm_config.alt_hf_filename
+            local_path = self.llm_config.alt_model_path
+        else:
+            repo_id = self.llm_config.hf_repo
+            filename = self.llm_config.hf_filename
+            local_path = self.llm_config.model_path
+        
+        logger.info(f"Downloading model from {repo_id}/{filename}...")
+        
+        # Download to models directory
+        downloaded_path = hf_hub_download(
+            repo_id=repo_id,
+            filename=filename,
+            local_dir=self.llm_config.models_dir,
+            local_dir_use_symlinks=False
+        )
+        
+        logger.info(f"Model downloaded to: {downloaded_path}")
+        return downloaded_path
+    
+    def load_model(self, force_reload: bool = False) -> bool:
+        """
+        Load the LLM model into memory.
+        
+        Args:
+            force_reload: Force reload even if already loaded
+            
+        Returns:
+            True if successful, False otherwise
+        """
+        if self._is_loaded and not force_reload:
+            logger.info("Model already loaded")
+            return True
+        
+        try:
+            from llama_cpp import Llama
+        except ImportError:
+            raise ImportError(
+                "llama-cpp-python is required. Install with:\n"
+                "pip install llama-cpp-python\n"
+                "For GPU support: CMAKE_ARGS='-DLLAMA_CUBLAS=on' pip install llama-cpp-python"
+            )
+        
+        # Try primary model first, then alternative
+        model_path = self.llm_config.model_path
+        if not os.path.exists(model_path):
+            model_path = self.llm_config.alt_model_path
+            self._model_type = 'phi'
+        else:
+            self._model_type = 'qwen'
+        
+        if not os.path.exists(model_path):
+            logger.warning("No model found. Attempting to download...")
+            try:
+                model_path = self.download_model()
+                self._model_type = 'qwen'
+            except Exception as e:
+                logger.error(f"Failed to download model: {e}")
+                return False
+        
+        logger.info(f"Loading model from: {model_path}")
+        logger.info(f"Model type: {self._model_type}")
+        
+        try:
+            self.model = Llama(
+                model_path=model_path,
+                n_ctx=self.llm_config.n_ctx,
+                n_threads=self.llm_config.n_threads,
+                n_gpu_layers=self.llm_config.n_gpu_layers,
+                verbose=False
+            )
+            self._is_loaded = True
+            logger.info("✅ Model loaded successfully")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Failed to load model: {e}")
+            self._is_loaded = False
+            return False
+    
+    def _format_prompt(self, system_prompt: str, user_prompt: str) -> str:
+        """
+        Format prompt according to model's chat template.
+        
+        Args:
+            system_prompt: System instructions
+            user_prompt: User's request
+            
+        Returns:
+            Formatted prompt string
+        """
+        if self._model_type == 'qwen':
+            # Qwen2.5 chat format
+            return f"""<|im_start|>system
+{system_prompt}<|im_end|>
+<|im_start|>user
+{user_prompt}<|im_end|>
+<|im_start|>assistant
+"""
+        else:
+            # Phi-3 chat format
+            return f"""<|system|>
+{system_prompt}<|end|>
+<|user|>
+{user_prompt}<|end|>
+<|assistant|>
+"""
+    
+    def generate(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        stop_sequences: Optional[List[str]] = None
+    ) -> Dict[str, Any]:
+        """
+        Generate text using the loaded LLM.
+        
+        Args:
+            system_prompt: System instructions for the model
+            user_prompt: The actual prompt/request
+            max_tokens: Override max tokens (uses config default if None)
+            temperature: Override temperature (uses config default if None)
+            stop_sequences: Custom stop sequences
+            
+        Returns:
+            Dict with 'text', 'tokens_used', 'finish_reason'
+        """
+        if not self._is_loaded:
+            if not self.load_model():
+                return {
+                    'text': '',
+                    'tokens_used': 0,
+                    'finish_reason': 'error',
+                    'error': 'Model not loaded'
+                }
+        
+        # Format the prompt
+        formatted_prompt = self._format_prompt(system_prompt, user_prompt)
+        
+        # Set parameters
+        max_tokens = max_tokens or self.llm_config.max_tokens
+        temperature = temperature or self.llm_config.temperature
+        
+        # Default stop sequences based on model type
+        if stop_sequences is None:
+            if self._model_type == 'qwen':
+                stop_sequences = ["<|im_end|>", "<|im_start|>"]
+            else:
+                stop_sequences = ["<|end|>", "<|user|>"]
+        
+        logger.debug(f"Generating with max_tokens={max_tokens}, temp={temperature}")
+        
+        try:
+            output = self.model(
+                formatted_prompt,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=self.llm_config.top_p,
+                repeat_penalty=self.llm_config.repeat_penalty,
+                stop=stop_sequences,
+                echo=False
+            )
+            
+            generated_text = output['choices'][0]['text'].strip()
+            finish_reason = output['choices'][0].get('finish_reason', 'stop')
+            tokens_used = output.get('usage', {}).get('total_tokens', 0)
+            
+            return {
+                'text': generated_text,
+                'tokens_used': tokens_used,
+                'finish_reason': finish_reason,
+                'error': None
+            }
+            
+        except Exception as e:
+            logger.error(f"Generation error: {e}")
+            return {
+                'text': '',
+                'tokens_used': 0,
+                'finish_reason': 'error',
+                'error': str(e)
+            }
+    
+    def generate_structured(
+        self,
+        system_prompt: str,
+        user_prompt: str,
+        output_format: str = 'markdown'
+    ) -> Dict[str, Any]:
+        """
+        Generate structured output (Markdown or JSON).
+        
+        Args:
+            system_prompt: System instructions
+            user_prompt: User request
+            output_format: 'markdown' or 'json'
+            
+        Returns:
+            Dict with generated content
+        """
+        # Add format instructions to system prompt
+        if output_format == 'json':
+            format_instruction = "\nYou MUST respond with valid JSON only. No explanations outside the JSON."
+        else:
+            format_instruction = "\nYou MUST respond with properly formatted Markdown only."
+        
+        enhanced_system = system_prompt + format_instruction
+        
+        result = self.generate(enhanced_system, user_prompt)
+        
+        # Parse JSON if requested
+        if output_format == 'json' and result['text']:
+            import json
+            try:
+                # Try to extract JSON from the response
+                text = result['text']
+                # Find JSON boundaries
+                start = text.find('{')
+                end = text.rfind('}') + 1
+                if start != -1 and end > start:
+                    json_str = text[start:end]
+                    result['parsed'] = json.loads(json_str)
+                else:
+                    result['parsed'] = None
+                    result['parse_error'] = 'No JSON object found in response'
+            except json.JSONDecodeError as e:
+                result['parsed'] = None
+                result['parse_error'] = str(e)
+        
+        return result
+    
+    def unload_model(self):
+        """Unload model from memory."""
+        if self.model:
+            del self.model
+            self.model = None
+        self._is_loaded = False
+        self._model_type = None
+        logger.info("Model unloaded")
+    
+    def get_model_info(self) -> Dict[str, Any]:
+        """Get information about the loaded model."""
+        return {
+            'is_loaded': self._is_loaded,
+            'model_type': self._model_type,
+            'model_path': self.llm_config.model_path if self._model_type == 'qwen' else self.llm_config.alt_model_path,
+            'context_size': self.llm_config.n_ctx,
+            'gpu_layers': self.llm_config.n_gpu_layers,
+            'threads': self.llm_config.n_threads
+        }
+
+
+# Singleton instance for reuse
+_engine_instance: Optional[LLMEngine] = None
+
+
+def get_llm_engine(config: Optional[ReportConfig] = None) -> LLMEngine:
+    """
+    Get or create the LLM engine singleton.
+    
+    Args:
+        config: Optional configuration override
+        
+    Returns:
+        LLMEngine instance
+    """
+    global _engine_instance
+    
+    if _engine_instance is None or config is not None:
+        _engine_instance = LLMEngine(config)
+    
+    return _engine_instance
diff --git a/report_generation/models/.gitkeep b/report_generation/models/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..43d127182f6de41073f911cea456e14d774c6ad6
--- /dev/null
+++ b/report_generation/models/.gitkeep
@@ -0,0 +1,3 @@
+# Keep this directory for LLM model files
+# Model files (.gguf) should be placed here
+# Recommended: qwen2.5-3b-instruct-q4_k_m.gguf (~2GB)
diff --git a/report_generation/pdf_exporter.py b/report_generation/pdf_exporter.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a7887bfaace41de75cc8cadb766dcec2ee44eab
--- /dev/null
+++ b/report_generation/pdf_exporter.py
@@ -0,0 +1,760 @@
+"""
+PDF Exporter for Report Generation
+
+Exports HTML reports to PDF using WeasyPrint.
+Handles page setup, headers/footers, and print styling.
+"""
+
+import os
+import logging
+from datetime import datetime
+from typing import Optional
+
+from .config import ReportConfig
+from .html_renderer import HTMLRenderer
+
+logger = logging.getLogger(__name__)
+
+
+class PDFExporter:
+    """
+    Exports reports to PDF format using WeasyPrint.
+    """
+    
+    def __init__(self, config: Optional[ReportConfig] = None):
+        """
+        Initialize the PDF exporter.
+        
+        Args:
+            config: Report configuration
+        """
+        self.config = config or ReportConfig()
+        self.html_renderer = HTMLRenderer(config)
+        self._weasyprint_available = self._check_weasyprint()
+    
+    def _check_weasyprint(self) -> bool:
+        """Check if WeasyPrint is available."""
+        try:
+            import weasyprint
+            return True
+        except ImportError:
+            logger.warning(
+                "WeasyPrint not installed. PDF export will not be available.\n"
+                "Install with: pip install weasyprint\n"
+                "Note: WeasyPrint requires GTK libraries. On Windows, install GTK3:\n"
+                "https://github.com/nicothin/MSYS2-GTK-Windows"
+            )
+            return False
+        except OSError as e:
+            logger.warning(
+                f"WeasyPrint dependencies not found: {e}\n"
+                "On Windows, GTK3 runtime is required. Install from:\n"
+                "https://github.com/nicothin/MSYS2-GTK-Windows"
+            )
+            return False
+    
+    def export(
+        self,
+        report,
+        output_path: Optional[str] = None,
+        embed_images: bool = True
+    ) -> str:
+        """
+        Export report to PDF.
+        
+        Args:
+            report: GeneratedReport object
+            output_path: Output file path (auto-generated if None)
+            embed_images: Whether to embed images
+            
+        Returns:
+            Path to generated PDF file
+        """
+        if not self._weasyprint_available:
+            raise RuntimeError(
+                "WeasyPrint is not available. Cannot export to PDF.\n"
+                "Install with: pip install weasyprint"
+            )
+        
+        logger.info(f"Exporting PDF report: {report.report_id}")
+        
+        # Generate output path if not provided
+        if not output_path:
+            output_path = os.path.join(
+                self.config.output_dir,
+                f"{report.report_id}.pdf"
+            )
+        
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        
+        # First render to HTML
+        html_path = output_path.replace('.pdf', '.html')
+        self.html_renderer.render(report, html_path, embed_images)
+        
+        # Convert to PDF using WeasyPrint
+        try:
+            from weasyprint import HTML, CSS
+            from weasyprint.text.fonts import FontConfiguration
+            
+            font_config = FontConfiguration()
+            
+            # Additional PDF-specific CSS for professional formatting
+            pdf_css = CSS(string='''
+                @page {
+                    size: A4;
+                    margin: 20mm 20mm 25mm 20mm;
+                    
+                    @top-center {
+                        content: "DETECTIFAI INCIDENT REPORT | CONFIDENTIAL";
+                        font-size: 8pt;
+                        font-weight: bold;
+                        color: #1a365d;
+                        text-transform: uppercase;
+                        letter-spacing: 1px;
+                        border-bottom: 1px solid #e2e8f0;
+                        padding-bottom: 3mm;
+                    }
+                    
+                    @bottom-left {
+                        content: "Report ID: ''' + report.report_id + '''";
+                        font-size: 8pt;
+                        color: #718096;
+                        font-family: monospace;
+                    }
+                    
+                    @bottom-center {
+                        content: "Generated: ''' + datetime.now().strftime('%Y-%m-%d %H:%M UTC') + '''";
+                        font-size: 7pt;
+                        color: #a0aec0;
+                    }
+                    
+                    @bottom-right {
+                        content: "Page " counter(page) " of " counter(pages);
+                        font-size: 8pt;
+                        color: #718096;
+                        font-weight: bold;
+                    }
+                }
+                
+                @page :first {
+                    @top-center { content: none; }
+                    margin-top: 15mm;
+                }
+                
+                /* Better page break control */
+                h1, h2, h3 {
+                    page-break-after: avoid;
+                    page-break-inside: avoid;
+                }
+                
+                table {
+                    page-break-inside: avoid;
+                }
+                
+                .evidence-gallery {
+                    page-break-inside: avoid;
+                }
+                
+                .report-section {
+                    orphans: 3;
+                    widows: 3;
+                }
+                
+                /* Ensure good typography */
+                body {
+                    font-size: 10pt;
+                    line-height: 1.6;
+                }
+                
+                p, li {
+                    text-align: justify;
+                    hyphens: auto;
+                }
+            ''', font_config=font_config)
+            
+            # Generate PDF
+            html = HTML(filename=html_path)
+            html.write_pdf(
+                output_path,
+                stylesheets=[pdf_css],
+                font_config=font_config
+            )
+            
+            logger.info(f"✅ PDF report saved to: {output_path}")
+            
+            # Optionally clean up intermediate HTML
+            # os.remove(html_path)
+            
+            return output_path
+            
+        except Exception as e:
+            logger.error(f"Error exporting PDF: {e}")
+            raise
+    
+    def export_from_html(
+        self,
+        html_path: str,
+        output_path: Optional[str] = None,
+        report_id: str = "UNKNOWN"
+    ) -> str:
+        """
+        Export an existing HTML file to PDF.
+        
+        Args:
+            html_path: Path to HTML file
+            output_path: Output PDF path
+            report_id: Report ID for footer
+            
+        Returns:
+            Path to generated PDF
+        """
+        if not self._weasyprint_available:
+            raise RuntimeError("WeasyPrint is not available")
+        
+        if not os.path.exists(html_path):
+            raise FileNotFoundError(f"HTML file not found: {html_path}")
+        
+        if not output_path:
+            output_path = html_path.replace('.html', '.pdf')
+        
+        try:
+            from weasyprint import HTML, CSS
+            from weasyprint.text.fonts import FontConfiguration
+            
+            font_config = FontConfiguration()
+            
+            pdf_css = CSS(string=f'''
+                @page {{
+                    size: A4;
+                    margin: 20mm;
+                    
+                    @bottom-left {{
+                        content: "Report ID: {report_id}";
+                        font-size: 8pt;
+                        color: #718096;
+                    }}
+                    
+                    @bottom-right {{
+                        content: "Page " counter(page);
+                        font-size: 8pt;
+                        color: #718096;
+                    }}
+                }}
+            ''', font_config=font_config)
+            
+            html = HTML(filename=html_path)
+            html.write_pdf(output_path, stylesheets=[pdf_css], font_config=font_config)
+            
+            logger.info(f"✅ PDF exported to: {output_path}")
+            return output_path
+            
+        except Exception as e:
+            logger.error(f"Error exporting PDF from HTML: {e}")
+            raise
+
+
+class SimplePDFExporter:
+    """
+    Fallback PDF exporter using reportlab (simpler, fewer dependencies).
+    Use this if WeasyPrint installation is problematic.
+    """
+    
+    def __init__(self, config: Optional[ReportConfig] = None):
+        """Initialize simple PDF exporter."""
+        self.config = config or ReportConfig()
+        self._check_reportlab()
+    
+    def _check_reportlab(self) -> bool:
+        """Check if reportlab is available."""
+        try:
+            from reportlab.lib.pagesizes import A4
+            from reportlab.platypus import SimpleDocTemplate
+            return True
+        except ImportError:
+            logger.warning(
+                "reportlab not installed. Install with: pip install reportlab"
+            )
+            return False
+    
+    def _convert_inline_markdown(self, text: str) -> str:
+        """Convert inline markdown (bold, italic, links) to ReportLab XML tags."""
+        import re
+        # Remove image markdown references (they are handled separately)
+        text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', r'[\1]', text)
+        # Convert markdown links to just text
+        text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
+        # Convert **bold** to <b>bold</b>
+        text = re.sub(r'\*\*([^*]+)\*\*', r'<b>\1</b>', text)
+        # Convert *italic* to <i>italic</i>
+        text = re.sub(r'\*([^*]+)\*', r'<i>\1</i>', text)
+        # Escape XML special characters (but not our tags)
+        text = text.replace('&', '&amp;')
+        text = text.replace('<b>', '\x00b\x00').replace('</b>', '\x00/b\x00')
+        text = text.replace('<i>', '\x00i\x00').replace('</i>', '\x00/i\x00')
+        text = text.replace('<', '&lt;').replace('>', '&gt;')
+        text = text.replace('\x00b\x00', '<b>').replace('\x00/b\x00', '</b>')
+        text = text.replace('\x00i\x00', '<i>').replace('\x00/i\x00', '</i>')
+        # Strip remaining placeholder markers
+        text = re.sub(r'\[\[IMAGE:[^\]]*\]\]', '[Image not available]', text)
+        text = re.sub(r'\[\[FACE:[^\]]*\]\]', '[Face image not available]', text)
+        return text
+    
+    def _parse_markdown_table(self, lines):
+        """Parse markdown table lines into a list of rows (list of cells)."""
+        rows = []
+        for line in lines:
+            line = line.strip()
+            if line.startswith('|') and line.endswith('|'):
+                cells = [c.strip() for c in line[1:-1].split('|')]
+                # Skip separator rows (e.g., | --- | --- |)
+                if all(set(c.strip()) <= {'-', ':', ' '} for c in cells):
+                    continue
+                rows.append(cells)
+            elif '|' in line:
+                cells = [c.strip() for c in line.split('|')]
+                cells = [c for c in cells if c]
+                if all(set(c.strip()) <= {'-', ':', ' '} for c in cells):
+                    continue
+                if cells:
+                    rows.append(cells)
+        return rows
+    
+    def _download_image(self, url: str, max_width: float = 400, max_height: float = 250):
+        """Download an image from URL and return a ReportLab Image element."""
+        try:
+            import urllib.request
+            import tempfile
+            from reportlab.platypus import Image
+            from reportlab.lib.units import mm
+            
+            # Download to temp file
+            tmp_fd, tmp_path = tempfile.mkstemp(suffix='.jpg')
+            os.close(tmp_fd)
+            
+            req = urllib.request.Request(url, headers={'User-Agent': 'DetectifAI-Report/1.0'})
+            with urllib.request.urlopen(req, timeout=10) as response:
+                with open(tmp_path, 'wb') as f:
+                    f.write(response.read())
+            
+            # Create Image element with proper sizing
+            img = Image(tmp_path)
+            # Scale to fit within max dimensions while maintaining aspect ratio
+            iw, ih = img.drawWidth, img.drawHeight
+            if iw > 0 and ih > 0:
+                ratio = min(max_width / iw, max_height / ih, 1.0)
+                img.drawWidth = iw * ratio
+                img.drawHeight = ih * ratio
+            
+            return img
+        except Exception as e:
+            logger.debug(f"Could not download image from {url}: {e}")
+            return None
+    
+    def _parse_section_content(self, content: str, styles: dict):
+        """Parse markdown content into a list of ReportLab flowable elements."""
+        import re
+        from reportlab.platypus import Paragraph, Spacer, Table, TableStyle, HRFlowable
+        from reportlab.lib import colors
+        from reportlab.lib.units import mm
+        
+        elements = []
+        lines = content.split('\n')
+        i = 0
+        
+        while i < len(lines):
+            line = lines[i].rstrip()
+            stripped = line.strip()
+            
+            # Skip empty lines
+            if not stripped:
+                i += 1
+                continue
+            
+            # Horizontal rule
+            if stripped in ('---', '***', '___'):
+                elements.append(Spacer(1, 6))
+                elements.append(HRFlowable(
+                    width="100%", thickness=1.5,
+                    color=colors.HexColor('#e2e8f0'),
+                    spaceBefore=6, spaceAfter=6
+                ))
+                i += 1
+                continue
+            
+            # Headings
+            if stripped.startswith('# ') and not stripped.startswith('## '):
+                text = self._convert_inline_markdown(stripped[2:].strip())
+                elements.append(Spacer(1, 12))
+                elements.append(Paragraph(text, styles['ReportTitle']))
+                elements.append(Spacer(1, 8))
+                i += 1
+                continue
+            
+            if stripped.startswith('## '):
+                text = self._convert_inline_markdown(stripped[3:].strip())
+                elements.append(Spacer(1, 10))
+                elements.append(Paragraph(text, styles['SectionTitle']))
+                elements.append(Spacer(1, 6))
+                i += 1
+                continue
+            
+            if stripped.startswith('### '):
+                text = self._convert_inline_markdown(stripped[4:].strip())
+                elements.append(Spacer(1, 8))
+                elements.append(Paragraph(text, styles['SubsectionTitle']))
+                elements.append(Spacer(1, 4))
+                i += 1
+                continue
+            
+            # Image references: ![alt](url)
+            img_match = re.match(r'^!\[([^\]]*)\]\(([^)]+)\)\s*$', stripped)
+            if img_match:
+                alt_text = img_match.group(1)
+                img_url = img_match.group(2)
+                img_element = self._download_image(img_url)
+                if img_element:
+                    elements.append(Spacer(1, 4))
+                    elements.append(img_element)
+                    if alt_text:
+                        elements.append(Paragraph(
+                            f"<i>{self._convert_inline_markdown(alt_text)}</i>",
+                            styles['ImageCaption']
+                        ))
+                    elements.append(Spacer(1, 6))
+                else:
+                    elements.append(Paragraph(
+                        f"<i>[{alt_text or 'Image'} — could not be loaded]</i>",
+                        styles['ImageCaption']
+                    ))
+                i += 1
+                continue
+            
+            # Table (collect consecutive lines with |)
+            if '|' in stripped and (stripped.startswith('|') or stripped.count('|') >= 2):
+                table_lines = []
+                while i < len(lines) and ('|' in lines[i].strip()):
+                    table_lines.append(lines[i])
+                    i += 1
+                
+                rows = self._parse_markdown_table(table_lines)
+                if rows and len(rows) >= 1:
+                    # Convert cells to Paragraphs for text wrapping
+                    col_count = max(len(r) for r in rows)
+                    # Normalize all rows to same column count
+                    for r_idx in range(len(rows)):
+                        while len(rows[r_idx]) < col_count:
+                            rows[r_idx].append('')
+                    
+                    table_data = []
+                    for r_idx, row in enumerate(rows):
+                        table_row = []
+                        for cell in row:
+                            cell_text = self._convert_inline_markdown(cell)
+                            style_name = 'TableHeader' if r_idx == 0 else 'TableCell'
+                            table_row.append(Paragraph(cell_text, styles[style_name]))
+                        table_data.append(table_row)
+                    
+                    # Calculate column widths (distribute evenly)
+                    available_width = 155 * mm  # A4 width minus margins
+                    col_widths = [available_width / col_count] * col_count
+                    
+                    table = Table(table_data, colWidths=col_widths, repeatRows=1)
+                    table.setStyle(TableStyle([
+                        # Header row
+                        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1a365d')),
+                        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
+                        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                        ('FONTSIZE', (0, 0), (-1, 0), 9),
+                        # Body rows
+                        ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
+                        ('FONTSIZE', (0, 1), (-1, -1), 9),
+                        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [colors.white, colors.HexColor('#f8fafc')]),
+                        # Grid
+                        ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#e2e8f0')),
+                        ('TOPPADDING', (0, 0), (-1, -1), 6),
+                        ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
+                        ('LEFTPADDING', (0, 0), (-1, -1), 8),
+                        ('RIGHTPADDING', (0, 0), (-1, -1), 8),
+                        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+                    ]))
+                    elements.append(Spacer(1, 6))
+                    elements.append(table)
+                    elements.append(Spacer(1, 8))
+                continue
+            
+            # Bullet list items
+            if stripped.startswith('- ') or stripped.startswith('* '):
+                bullet_text = self._convert_inline_markdown(stripped[2:].strip())
+                elements.append(Paragraph(
+                    f"\u2022  {bullet_text}",
+                    styles['BulletItem']
+                ))
+                elements.append(Spacer(1, 3))
+                i += 1
+                continue
+            
+            # Numbered list items
+            num_match = re.match(r'^(\d+)\.\s+(.*)', stripped)
+            if num_match:
+                num = num_match.group(1)
+                item_text = self._convert_inline_markdown(num_match.group(2).strip())
+                elements.append(Paragraph(
+                    f"{num}.  {item_text}",
+                    styles['BulletItem']
+                ))
+                elements.append(Spacer(1, 3))
+                i += 1
+                continue
+            
+            # Regular paragraph — collect consecutive non-special lines
+            para_lines = [stripped]
+            i += 1
+            while i < len(lines):
+                next_stripped = lines[i].strip()
+                if not next_stripped:
+                    i += 1
+                    break
+                # Stop if next line is a special element
+                if (next_stripped.startswith('#') or next_stripped.startswith('- ') or
+                    next_stripped.startswith('* ') or next_stripped.startswith('|') or
+                    next_stripped in ('---', '***', '___') or
+                    re.match(r'^\d+\.\s+', next_stripped) or
+                    re.match(r'^!\[', next_stripped)):
+                    break
+                para_lines.append(next_stripped)
+                i += 1
+            
+            para_text = ' '.join(para_lines)
+            para_text = self._convert_inline_markdown(para_text)
+            if para_text.strip():
+                elements.append(Paragraph(para_text, styles['BodyText']))
+                elements.append(Spacer(1, 6))
+        
+        return elements
+    
+    def export(self, report, output_path: Optional[str] = None) -> str:
+        """
+        Export report to PDF using reportlab with proper formatting.
+        
+        Handles markdown headings, tables, bold text, bullet lists,
+        images from URLs, and proper section structure.
+        """
+        try:
+            from reportlab.lib.pagesizes import A4
+            from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+            from reportlab.lib.units import mm
+            from reportlab.lib.enums import TA_LEFT, TA_CENTER, TA_JUSTIFY
+            from reportlab.platypus import (
+                SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
+                PageBreak, HRFlowable
+            )
+            from reportlab.lib import colors
+            
+        except ImportError:
+            raise RuntimeError("reportlab is not installed")
+        
+        if not output_path:
+            output_path = os.path.join(
+                self.config.output_dir,
+                f"{report.report_id}.pdf"
+            )
+        
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        
+        # Create document with page numbering
+        doc = SimpleDocTemplate(
+            output_path,
+            pagesize=A4,
+            rightMargin=20*mm,
+            leftMargin=20*mm,
+            topMargin=25*mm,
+            bottomMargin=25*mm,
+            title=f"DetectifAI Incident Report - {report.video_id}",
+            author="DetectifAI Security System"
+        )
+        
+        # Define custom styles
+        styles = getSampleStyleSheet()
+        
+        styles.add(ParagraphStyle(
+            name='ReportTitle',
+            parent=styles['Heading1'],
+            fontSize=22,
+            spaceAfter=16,
+            spaceBefore=0,
+            textColor=colors.HexColor('#1a365d'),
+            leading=28
+        ))
+        styles.add(ParagraphStyle(
+            name='SectionTitle',
+            parent=styles['Heading2'],
+            fontSize=15,
+            spaceBefore=16,
+            spaceAfter=8,
+            textColor=colors.HexColor('#1a365d'),
+            leading=20,
+            borderWidth=0,
+            borderColor=colors.HexColor('#3182ce'),
+            borderPadding=(0, 0, 4, 0)
+        ))
+        styles.add(ParagraphStyle(
+            name='SubsectionTitle',
+            parent=styles['Heading3'],
+            fontSize=12,
+            spaceBefore=10,
+            spaceAfter=6,
+            textColor=colors.HexColor('#2d3748'),
+            leading=16
+        ))
+        styles.add(ParagraphStyle(
+            name='BodyText',
+            parent=styles['Normal'],
+            fontSize=10,
+            leading=15,
+            alignment=TA_JUSTIFY,
+            spaceBefore=2,
+            spaceAfter=4,
+            textColor=colors.HexColor('#2d3748')
+        ))
+        styles.add(ParagraphStyle(
+            name='BulletItem',
+            parent=styles['Normal'],
+            fontSize=10,
+            leading=14,
+            leftIndent=20,
+            spaceBefore=2,
+            spaceAfter=2,
+            textColor=colors.HexColor('#2d3748')
+        ))
+        styles.add(ParagraphStyle(
+            name='TableHeader',
+            parent=styles['Normal'],
+            fontSize=9,
+            leading=12,
+            textColor=colors.white,
+            fontName='Helvetica-Bold'
+        ))
+        styles.add(ParagraphStyle(
+            name='TableCell',
+            parent=styles['Normal'],
+            fontSize=9,
+            leading=12,
+            textColor=colors.HexColor('#2d3748')
+        ))
+        styles.add(ParagraphStyle(
+            name='ImageCaption',
+            parent=styles['Normal'],
+            fontSize=8,
+            leading=11,
+            alignment=TA_CENTER,
+            textColor=colors.HexColor('#718096'),
+            spaceBefore=2,
+            spaceAfter=6
+        ))
+        styles.add(ParagraphStyle(
+            name='FooterText',
+            parent=styles['Normal'],
+            fontSize=8,
+            leading=10,
+            textColor=colors.HexColor('#718096'),
+            alignment=TA_CENTER
+        ))
+        
+        # Build story (list of flowable elements)
+        story = []
+        
+        # --- Title Banner ---
+        story.append(Paragraph("DetectifAI", styles['ReportTitle']))
+        story.append(Paragraph(
+            "<i>AI-Powered Surveillance Analysis Report</i>",
+            styles['BodyText']
+        ))
+        story.append(Spacer(1, 6))
+        story.append(HRFlowable(
+            width="100%", thickness=2,
+            color=colors.HexColor('#3182ce'),
+            spaceBefore=4, spaceAfter=12
+        ))
+        
+        # --- Report Metadata Table ---
+        meta_data = [
+            ['Report ID:', report.report_id],
+            ['Video ID:', report.video_id],
+            ['Classification:', report.metadata.get('classification', 'CONFIDENTIAL')],
+            ['Generated:', report.generated_at.strftime('%Y-%m-%d %H:%M:%S UTC')],
+        ]
+        meta_table = Table(meta_data, colWidths=[35*mm, 120*mm])
+        meta_table.setStyle(TableStyle([
+            ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
+            ('FONTNAME', (1, 0), (1, -1), 'Helvetica'),
+            ('FONTSIZE', (0, 0), (-1, -1), 10),
+            ('TEXTCOLOR', (0, 0), (0, -1), colors.HexColor('#1a365d')),
+            ('TEXTCOLOR', (1, 0), (1, -1), colors.HexColor('#2d3748')),
+            ('TOPPADDING', (0, 0), (-1, -1), 4),
+            ('BOTTOMPADDING', (0, 0), (-1, -1), 4),
+            ('VALIGN', (0, 0), (-1, -1), 'TOP'),
+        ]))
+        story.append(meta_table)
+        story.append(Spacer(1, 12))
+        story.append(HRFlowable(
+            width="100%", thickness=1,
+            color=colors.HexColor('#e2e8f0'),
+            spaceBefore=6, spaceAfter=12
+        ))
+        
+        # --- Sections ---
+        for section in sorted(report.sections, key=lambda x: x.order):
+            content = section.content or ''
+            
+            # Parse section content markdown into ReportLab elements
+            section_elements = self._parse_section_content(content, styles)
+            story.extend(section_elements)
+            
+            # Add evidence gallery images if any
+            if section.images:
+                for img_data in section.images:
+                    url = img_data.get('url')
+                    caption = img_data.get('caption', '')
+                    if url:
+                        img_el = self._download_image(url)
+                        if img_el:
+                            story.append(Spacer(1, 4))
+                            story.append(img_el)
+                            if caption:
+                                story.append(Paragraph(
+                                    f"<i>{self._convert_inline_markdown(caption)}</i>",
+                                    styles['ImageCaption']
+                                ))
+                            story.append(Spacer(1, 6))
+            
+            # Section separator
+            story.append(Spacer(1, 8))
+            story.append(HRFlowable(
+                width="80%", thickness=0.5,
+                color=colors.HexColor('#e2e8f0'),
+                spaceBefore=4, spaceAfter=8
+            ))
+        
+        # --- Footer ---
+        story.append(Spacer(1, 20))
+        story.append(HRFlowable(
+            width="100%", thickness=1.5,
+            color=colors.HexColor('#3182ce'),
+            spaceBefore=6, spaceAfter=8
+        ))
+        story.append(Paragraph(
+            f"Report ID: {report.report_id} | Generated: {report.generated_at.strftime('%Y-%m-%d %H:%M:%S UTC')}",
+            styles['FooterText']
+        ))
+        story.append(Spacer(1, 4))
+        story.append(Paragraph(
+            "<i>This report was automatically generated by DetectifAI. "
+            "All findings are based on AI analysis and should be verified by qualified personnel.</i>",
+            styles['FooterText']
+        ))
+        
+        # Build PDF
+        doc.build(story)
+        
+        logger.info(f"✅ Simple PDF report saved to: {output_path}")
+        return output_path
diff --git a/report_generation/prompt_templates.py b/report_generation/prompt_templates.py
new file mode 100644
index 0000000000000000000000000000000000000000..6876c5bc46a5f7fbf650441ffbf5f4feee851a0b
--- /dev/null
+++ b/report_generation/prompt_templates.py
@@ -0,0 +1,388 @@
+"""
+Prompt Templates for Report Generation
+
+Contains all prompt templates used by the LLM to generate
+structured report content. Templates are designed for:
+- Deterministic, fact-based output
+- Professional forensic tone
+- Structured Markdown format
+- No hallucinations or assumptions
+"""
+
+from typing import Dict, Any, List
+from dataclasses import dataclass
+from datetime import datetime
+
+
+@dataclass
+class PromptTemplate:
+    """A prompt template with system and user components."""
+    name: str
+    system_prompt: str
+    user_template: str
+    description: str
+
+
+# =============================================================================
+# SYSTEM PROMPTS
+# =============================================================================
+
+SYSTEM_PROMPT_REPORT = """You are a professional forensic report writer for a CCTV surveillance system called DetectifAI.
+
+Your role is to convert raw AI detection data into formal, professional incident reports.
+
+CRITICAL RULES:
+1. ONLY use information explicitly provided in the input data
+2. NEVER invent, assume, or hallucinate any facts
+3. Use neutral, professional language - no emotions or opinions
+4. If data is missing, state "Data not available" - do not guess
+5. Use precise timestamps and measurements
+6. Refer to detected persons as "Individual A", "Individual B", etc.
+7. Do not make legal judgments or accusations
+8. Write in third person, past tense
+9. Use Markdown formatting for structure
+10. Do NOT include a section heading (## or **Title**) at the start of your output - it will be added automatically
+11. Do NOT use [[IMAGE:...]] or [[FACE:...]] placeholder syntax - images are handled separately
+12. If no evidence or faces exist, clearly state that in bold (e.g., **No faces detected**)
+
+OUTPUT FORMAT:
+- Use ### for sub-headings within the section if needed
+- Use bullet points for lists
+- Use tables for structured data where appropriate
+- Do NOT start your output with the section title
+"""
+
+SYSTEM_PROMPT_SUMMARY = """You are a professional forensic report writer creating executive summaries.
+
+RULES:
+1. Summarize ONLY the facts provided - no assumptions
+2. Keep summaries concise (2-4 paragraphs)
+3. Highlight key events and their timestamps
+4. Use neutral, professional tone
+5. Do not speculate on intent or future actions
+6. Output in Markdown format
+7. Do NOT include a heading like "## Executive Summary" or "**Executive Summary**" - it will be added automatically
+"""
+
+SYSTEM_PROMPT_TIMELINE = """You are creating a chronological incident timeline from surveillance data.
+
+RULES:
+1. List events in strict chronological order
+2. Include precise timestamps (HH:MM:SS format)
+3. Describe events factually using provided captions
+4. Use consistent terminology
+5. Output as Markdown table or list
+6. Do NOT include a heading like "## Incident Timeline" - it will be added automatically
+7. Do NOT use [[IMAGE:...]] placeholder syntax
+"""
+
+SYSTEM_PROMPT_OBSERVATIONS = """You are analyzing surveillance detection patterns for a forensic report.
+
+RULES:
+1. Identify patterns ONLY from provided data
+2. Note repeated appearances of same individuals (by face ID)
+3. Note escalation patterns in event severity
+4. Note correlations between events (time proximity, location)
+5. Do NOT speculate on intent or motivation
+6. Use hedging language: "appears to", "data suggests", "observed pattern"
+7. Output in Markdown format
+8. Do NOT include a heading like "## Observations" - it will be added automatically
+"""
+
+
+# =============================================================================
+# USER PROMPT TEMPLATES
+# =============================================================================
+
+def format_executive_summary_prompt(
+    video_id: str,
+    camera_info: Dict[str, Any],
+    time_range: tuple,
+    event_summary: Dict[str, Any],
+    total_events: int,
+    threat_levels: Dict[str, int]
+) -> str:
+    """
+    Format prompt for executive summary generation.
+    
+    Args:
+        video_id: Video identifier
+        camera_info: Camera metadata (location, ID)
+        time_range: (start_time, end_time) tuple
+        event_summary: Summary of events by type
+        total_events: Total number of events
+        threat_levels: Count of events by threat level
+        
+    Returns:
+        Formatted user prompt
+    """
+    return f"""Generate an Executive Summary for the following surveillance analysis:
+
+VIDEO ANALYSIS DATA:
+- Video ID: {video_id}
+- Camera: {camera_info.get('camera_id', 'Unknown')}
+- Location: {camera_info.get('location', 'Not specified')}
+- Analysis Period: {time_range[0]} to {time_range[1]}
+- Total Events Detected: {total_events}
+
+EVENT BREAKDOWN:
+{_format_event_summary(event_summary)}
+
+THREAT LEVEL DISTRIBUTION:
+- Critical: {threat_levels.get('critical', 0)} events
+- High: {threat_levels.get('high', 0)} events
+- Medium: {threat_levels.get('medium', 0)} events
+- Low: {threat_levels.get('low', 0)} events
+
+Write a professional 2-3 paragraph executive summary covering:
+1. Overview of the analyzed footage
+2. Key findings and notable events
+3. Overall security assessment based on the data
+
+Use ONLY the information provided above. Do not invent additional details."""
+
+
+def format_timeline_prompt(events: List[Dict[str, Any]]) -> str:
+    """
+    Format prompt for timeline generation.
+    
+    Args:
+        events: List of event dictionaries with timestamp, type, caption
+        
+    Returns:
+        Formatted user prompt
+    """
+    events_text = "\n".join([
+        f"- [{e.get('timestamp', 'Unknown')}] Type: {e.get('event_type', 'Unknown')} | "
+        f"Caption: {e.get('caption', 'No caption')} | "
+        f"Threat: {e.get('threat_level', 'Unknown')} | "
+        f"Keyframe: {e.get('keyframe_id', 'None')}"
+        for e in events
+    ])
+    
+    return f"""Create a detailed incident timeline from the following detected events:
+
+DETECTED EVENTS:
+{events_text}
+
+Generate a chronological timeline in Markdown format with:
+1. Each event on its own line with timestamp
+2. Brief factual description based on the caption
+3. Threat level indicator
+4. Do NOT include a section heading - it will be added automatically
+
+Format as a Markdown table:
+| Time | Event Type | Description | Threat Level |
+|------|------------|-------------|--------------|
+"""
+
+
+def format_observations_prompt(
+    events: List[Dict[str, Any]],
+    faces_detected: List[Dict[str, Any]],
+    patterns: Dict[str, Any]
+) -> str:
+    """
+    Format prompt for observations section.
+    
+    Args:
+        events: List of events
+        faces_detected: List of detected faces with IDs
+        patterns: Pre-computed patterns (repeated faces, time clusters)
+        
+    Returns:
+        Formatted user prompt
+    """
+    # Format face appearances
+    face_summary = ""
+    if faces_detected:
+        face_counts = {}
+        for face in faces_detected:
+            fid = face.get('face_id', 'unknown')
+            face_counts[fid] = face_counts.get(fid, 0) + 1
+        
+        face_summary = "\n".join([
+            f"- Face ID {fid}: appeared {count} time(s)"
+            for fid, count in face_counts.items()
+        ])
+    else:
+        face_summary = "No faces detected"
+    
+    # Format event clusters
+    cluster_info = patterns.get('time_clusters', 'No clustering data')
+    escalation_info = patterns.get('escalation', 'No escalation data')
+    
+    return f"""Analyze the following surveillance data and identify observable patterns:
+
+FACE DETECTION SUMMARY:
+{face_summary}
+
+EVENT CLUSTERING:
+{cluster_info}
+
+ESCALATION PATTERN:
+{escalation_info}
+
+TOTAL EVENTS: {len(events)}
+
+Based ONLY on the data above, write an Observations section that:
+1. Notes any individuals appearing multiple times
+2. Identifies time periods with concentrated activity
+3. Notes any escalation in event severity over time
+4. Highlights correlations between different event types
+
+Use hedging language ("appears to", "data suggests") and cite specific data points.
+Do NOT speculate on intent or make accusations."""
+
+
+def format_evidence_section_prompt(
+    keyframes: List[Dict[str, Any]],
+    face_crops: List[Dict[str, Any]]
+) -> str:
+    """
+    Format prompt for evidence section.
+    
+    Args:
+        keyframes: List of keyframe metadata
+        face_crops: List of face crop metadata
+        
+    Returns:
+        Formatted user prompt
+    """
+    keyframe_list = "\n".join([
+        f"- Keyframe {kf.get('keyframe_id', 'unknown')}: "
+        f"Time {kf.get('timestamp', 'unknown')}, "
+        f"Caption: {kf.get('caption', 'No caption')}"
+        for kf in keyframes[:20]  # Limit to 20
+    ])
+    
+    face_list = "\n".join([
+        f"- Face {fc.get('face_id', 'unknown')}: "
+        f"Time {fc.get('timestamp', 'unknown')}, "
+        f"Confidence: {fc.get('confidence', 'unknown')}"
+        for fc in face_crops[:10]  # Limit to 10
+    ])
+    
+    return f"""Create an Evidence Section cataloging the following visual evidence:
+
+KEYFRAMES:
+{keyframe_list}
+
+FACE DETECTIONS:
+{face_list}
+
+Generate a Markdown Evidence Section that:
+1. Lists each piece of evidence with a brief description
+2. Groups related evidence together
+3. Notes the timestamp and relevance of each item
+4. Do NOT include a heading like "## Evidence" - it will be added automatically
+5. Do NOT use [[IMAGE:...]] or [[FACE:...]] placeholders - images are handled separately
+6. If no keyframes or faces exist, state that clearly in bold
+
+Format with clear sub-headers and organized presentation."""
+
+
+def format_conclusion_prompt(
+    total_events: int,
+    critical_events: int,
+    high_events: int,
+    duration_minutes: float,
+    key_findings: List[str]
+) -> str:
+    """
+    Format prompt for conclusion section.
+    
+    Args:
+        total_events: Total events detected
+        critical_events: Number of critical threat events
+        high_events: Number of high threat events
+        duration_minutes: Duration of analyzed footage
+        key_findings: List of key findings strings
+        
+    Returns:
+        Formatted user prompt
+    """
+    findings_text = "\n".join([f"- {f}" for f in key_findings]) if key_findings else "- No specific findings to highlight"
+    
+    return f"""Write a factual Conclusion section based on the following analysis summary:
+
+ANALYSIS SUMMARY:
+- Total Events Detected: {total_events}
+- Critical Threat Events: {critical_events}
+- High Threat Events: {high_events}
+- Footage Duration: {duration_minutes:.1f} minutes
+
+KEY FINDINGS:
+{findings_text}
+
+Write a 2-paragraph conclusion that:
+1. Summarizes the overall security status based on the data
+2. Notes any areas that may warrant attention
+3. Closes with a factual statement about the analysis
+
+RULES:
+- Do NOT make legal judgments
+- Do NOT recommend specific actions unless critical events exist
+- Do NOT speculate on future events
+- Keep tone professional and neutral"""
+
+
+# =============================================================================
+# HELPER FUNCTIONS
+# =============================================================================
+
+def _format_event_summary(event_summary: Dict[str, Any]) -> str:
+    """Format event summary dictionary as readable text."""
+    if not event_summary:
+        return "No events detected"
+    
+    lines = []
+    for event_type, count in event_summary.items():
+        lines.append(f"- {event_type}: {count} event(s)")
+    
+    return "\n".join(lines)
+
+
+# =============================================================================
+# TEMPLATE REGISTRY
+# =============================================================================
+
+PROMPT_TEMPLATES = {
+    'executive_summary': PromptTemplate(
+        name='executive_summary',
+        system_prompt=SYSTEM_PROMPT_SUMMARY,
+        user_template='format_executive_summary_prompt',
+        description='Generate executive summary for the report'
+    ),
+    'timeline': PromptTemplate(
+        name='timeline',
+        system_prompt=SYSTEM_PROMPT_TIMELINE,
+        user_template='format_timeline_prompt',
+        description='Generate chronological incident timeline'
+    ),
+    'observations': PromptTemplate(
+        name='observations',
+        system_prompt=SYSTEM_PROMPT_OBSERVATIONS,
+        user_template='format_observations_prompt',
+        description='Generate pattern observations section'
+    ),
+    'evidence': PromptTemplate(
+        name='evidence',
+        system_prompt=SYSTEM_PROMPT_REPORT,
+        user_template='format_evidence_section_prompt',
+        description='Generate evidence catalog section'
+    ),
+    'conclusion': PromptTemplate(
+        name='conclusion',
+        system_prompt=SYSTEM_PROMPT_REPORT,
+        user_template='format_conclusion_prompt',
+        description='Generate conclusion section'
+    )
+}
+
+
+def get_template(name: str) -> PromptTemplate:
+    """Get a prompt template by name."""
+    if name not in PROMPT_TEMPLATES:
+        raise ValueError(f"Unknown template: {name}. Available: {list(PROMPT_TEMPLATES.keys())}")
+    return PROMPT_TEMPLATES[name]
diff --git a/report_generation/prompts/.gitkeep b/report_generation/prompts/.gitkeep
new file mode 100644
index 0000000000000000000000000000000000000000..d01d0d45b8826330b50b23c36a3bcbf5b0c92671
--- /dev/null
+++ b/report_generation/prompts/.gitkeep
@@ -0,0 +1 @@
+# Keep this directory for prompt template files
diff --git a/report_generation/report_builder.py b/report_generation/report_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..706539680be05a96f09b1ccbc6d11dba9c43f9a5
--- /dev/null
+++ b/report_generation/report_builder.py
@@ -0,0 +1,728 @@
+"""
+Report Builder - Main Orchestrator
+
+Coordinates all components to generate complete incident reports:
+1. Collects data from database
+2. Generates content using LLM
+3. Assembles the report structure
+4. Exports to PDF/HTML
+"""
+
+import os
+import logging
+import uuid
+from datetime import datetime
+from typing import Optional, Dict, Any, List, Tuple
+from dataclasses import dataclass, field
+
+from .config import ReportConfig
+from .llm_engine import LLMEngine, get_llm_engine
+from .data_collector import DataCollector
+from .prompt_templates import (
+    SYSTEM_PROMPT_REPORT,
+    SYSTEM_PROMPT_SUMMARY,
+    SYSTEM_PROMPT_TIMELINE,
+    SYSTEM_PROMPT_OBSERVATIONS,
+    format_executive_summary_prompt,
+    format_timeline_prompt,
+    format_observations_prompt,
+    format_evidence_section_prompt,
+    format_conclusion_prompt
+)
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ReportSection:
+    """A section of the generated report."""
+    name: str
+    title: str
+    content: str
+    images: List[Dict[str, Any]] = field(default_factory=list)
+    order: int = 0
+
+
+@dataclass
+class GeneratedReport:
+    """Complete generated report with all sections."""
+    report_id: str
+    video_id: str
+    title: str
+    generated_at: datetime
+    time_range: Optional[Tuple[datetime, datetime]]
+    sections: List[ReportSection]
+    metadata: Dict[str, Any]
+    statistics: Dict[str, Any]
+    raw_data: Dict[str, Any]
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert report to dictionary."""
+        return {
+            'report_id': self.report_id,
+            'video_id': self.video_id,
+            'title': self.title,
+            'generated_at': self.generated_at.isoformat(),
+            'time_range': [
+                self.time_range[0].isoformat() if self.time_range and self.time_range[0] else None,
+                self.time_range[1].isoformat() if self.time_range and self.time_range[1] else None
+            ],
+            'sections': [
+                {
+                    'name': s.name,
+                    'title': s.title,
+                    'content': s.content,
+                    'images': s.images,
+                    'order': s.order
+                }
+                for s in sorted(self.sections, key=lambda x: x.order)
+            ],
+            'metadata': self.metadata,
+            'statistics': self.statistics
+        }
+
+
+class ReportGenerator:
+    """
+    Main report generator class that orchestrates the entire
+    report generation pipeline.
+    """
+    
+    def __init__(self, config: Optional[ReportConfig] = None):
+        """
+        Initialize the report generator.
+        
+        Args:
+            config: Report configuration (uses default if None)
+        """
+        self.config = config or ReportConfig()
+        self.llm_engine: Optional[LLMEngine] = None
+        self.data_collector: Optional[DataCollector] = None
+        self._initialized = False
+    
+    def initialize(self) -> bool:
+        """
+        Initialize all components.
+        
+        Returns:
+            True if successful
+        """
+        if self._initialized:
+            return True
+        
+        try:
+            # Initialize data collector
+            self.data_collector = DataCollector(self.config)
+            
+            # Initialize LLM engine
+            self.llm_engine = get_llm_engine(self.config)
+            
+            # Load the model
+            if not self.llm_engine.load_model():
+                logger.warning("LLM model not loaded - will generate fallback content")
+            
+            self._initialized = True
+            logger.info("✅ Report generator initialized successfully")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Failed to initialize report generator: {e}")
+            return False
+    
+    def generate_report(
+        self,
+        video_id: str,
+        time_range: Optional[Tuple[datetime, datetime]] = None,
+        include_sections: Optional[List[str]] = None
+    ) -> GeneratedReport:
+        """
+        Generate a complete incident report for a video.
+        
+        Args:
+            video_id: Video identifier
+            time_range: Optional time range to filter events
+            include_sections: List of sections to include (None = all)
+            
+        Returns:
+            GeneratedReport object
+        """
+        if not self._initialized:
+            self.initialize()
+        
+        logger.info(f"Generating report for video: {video_id}")
+        
+        # Default sections
+        if include_sections is None:
+            include_sections = ['header', 'executive_summary', 'timeline', 
+                              'evidence', 'observations', 'conclusion']
+        
+        # Collect all data
+        report_data = self.data_collector.collect_all_report_data(video_id, time_range)
+        
+        # Generate report ID
+        report_id = f"RPT-{datetime.now().strftime('%Y%m%d%H%M%S')}-{uuid.uuid4().hex[:6].upper()}"
+        
+        # Generate each section
+        sections = []
+        
+        # 1. Header section (always included, no LLM needed)
+        if 'header' in include_sections:
+            sections.append(self._generate_header_section(report_id, report_data))
+        
+        # 2. Executive Summary
+        if 'executive_summary' in include_sections:
+            logger.info("📝 Generating executive summary...")
+            sections.append(self._generate_executive_summary(report_data))
+            logger.info("✅ Executive summary complete")
+        
+        # 3. Timeline
+        if 'timeline' in include_sections:
+            logger.info("📝 Generating timeline...")
+            sections.append(self._generate_timeline(report_data))
+            logger.info("✅ Timeline complete")
+        
+        # 4. Evidence
+        if 'evidence' in include_sections:
+            logger.info("📝 Generating evidence section...")
+            sections.append(self._generate_evidence_section(report_data))
+            logger.info("✅ Evidence section complete")
+        
+        # 5. Observations
+        if 'observations' in include_sections:
+            logger.info("📝 Generating observations...")
+            sections.append(self._generate_observations(report_data))
+            logger.info("✅ Observations complete")
+        
+        # 6. Conclusion
+        if 'conclusion' in include_sections:
+            logger.info("📝 Generating conclusion...")
+            sections.append(self._generate_conclusion(report_data))
+            logger.info("✅ Conclusion complete")
+        
+        # Create the report object
+        report = GeneratedReport(
+            report_id=report_id,
+            video_id=video_id,
+            title=f"Incident Report - {video_id}",
+            generated_at=datetime.utcnow(),
+            time_range=report_data.get('time_range'),
+            sections=sections,
+            metadata=report_data.get('metadata', {}),
+            statistics=report_data.get('statistics', {}),
+            raw_data=report_data
+        )
+        
+        logger.info(f"Report generated: {report_id} with {len(sections)} sections")
+        
+        return report
+    
+    @staticmethod
+    def _clean_llm_output(content: str, section_title: str) -> str:
+        """Strip redundant headings and bold titles from LLM output that duplicate the section heading."""
+        import re
+        if not content:
+            return content
+        lines = content.strip().split('\n')
+        cleaned_lines = []
+        skip_next_blank = False
+        title_lower = section_title.lower().replace('_', ' ').strip()
+        
+        for line in lines:
+            stripped = line.strip()
+            # Skip markdown heading lines that match the section title
+            heading_match = re.match(r'^#{1,3}\s+(.*)', stripped)
+            if heading_match:
+                heading_text = heading_match.group(1).strip().lower().replace('_', ' ')
+                if heading_text == title_lower or title_lower in heading_text or heading_text in title_lower:
+                    skip_next_blank = True
+                    continue
+            # Skip bold-only lines that match the section title
+            bold_match = re.match(r'^\*\*([^*]+)\*\*$', stripped)
+            if bold_match:
+                bold_text = bold_match.group(1).strip().lower().replace('_', ' ')
+                if bold_text == title_lower or title_lower in bold_text or bold_text in title_lower:
+                    skip_next_blank = True
+                    continue
+            # Skip blank lines immediately after removed headings
+            if skip_next_blank and stripped == '':
+                skip_next_blank = False
+                continue
+            skip_next_blank = False
+            cleaned_lines.append(line)
+        
+        return '\n'.join(cleaned_lines).strip()
+    
+    def _generate_header_section(
+        self,
+        report_id: str,
+        data: Dict[str, Any]
+    ) -> ReportSection:
+        """Generate the report header section."""
+        metadata = data.get('metadata', {})
+        stats = data.get('statistics', {})
+        time_range = data.get('time_range')
+        
+        time_range_str = "Not specified"
+        if time_range:
+            # Convert to datetime if needed
+            start_dt = time_range[0]
+            if isinstance(start_dt, (int, float)):
+                start_dt = datetime.utcfromtimestamp(start_dt)
+            end_dt = time_range[1]
+            if isinstance(end_dt, (int, float)):
+                end_dt = datetime.utcfromtimestamp(end_dt)
+            
+            start = start_dt.strftime('%Y-%m-%d %H:%M:%S') if start_dt else 'N/A'
+            end = end_dt.strftime('%Y-%m-%d %H:%M:%S') if end_dt else 'N/A'
+            time_range_str = f"{start} to {end}"
+        
+        content = f"""# INCIDENT REPORT
+
+**Report ID:** {report_id}  
+**Classification:** {self.config.report_classification}  
+**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}  
+**Organization:** {self.config.organization_name}
+
+---
+
+## Report Details
+
+| Field | Value |
+|-------|-------|
+| Video ID | {data.get('video_id', 'Unknown')} |
+| Camera ID | {metadata.get('camera_id', 'Unknown')} |
+| Location | {metadata.get('location', 'Not specified')} |
+| Analysis Period | {time_range_str} |
+| Total Events | {stats.get('total_events', 0)} |
+| Total Keyframes | {stats.get('total_keyframes', 0)} |
+| Faces Detected | {stats.get('total_faces', 0)} |
+"""
+        # Add Video Link if available
+        if metadata.get('video_url'):
+            content += f"\n**[Download/View Video]({metadata.get('video_url')})**\n"
+        
+        content += "\n---\n"""
+        return ReportSection(
+            name='header',
+            title='Report Header',
+            content=content,
+            order=0
+        )
+    
+    def _generate_executive_summary(self, data: Dict[str, Any]) -> ReportSection:
+        """Generate executive summary using LLM."""
+        metadata = data.get('metadata', {})
+        stats = data.get('statistics', {})
+        time_range = data.get('time_range', (None, None))
+        
+        # Format time range for prompt
+        time_range_formatted = ('Start', 'End')
+        if time_range:
+            # Convert to datetime if needed
+            start_dt = time_range[0]
+            if isinstance(start_dt, (int, float)):
+                start_dt = datetime.utcfromtimestamp(start_dt)
+            end_dt = time_range[1]
+            if isinstance(end_dt, (int, float)):
+                end_dt = datetime.utcfromtimestamp(end_dt)
+            
+            time_range_formatted = (
+                start_dt.strftime('%Y-%m-%d %H:%M:%S') if start_dt else 'Start',
+                end_dt.strftime('%Y-%m-%d %H:%M:%S') if end_dt else 'End'
+            )
+        
+        # Create prompt
+        user_prompt = format_executive_summary_prompt(
+            video_id=data.get('video_id', 'Unknown'),
+            camera_info={
+                'camera_id': metadata.get('camera_id', 'Unknown'),
+                'location': metadata.get('location', 'Not specified')
+            },
+            time_range=time_range_formatted,
+            event_summary=stats.get('event_types', {}),
+            total_events=stats.get('total_events', 0),
+            threat_levels=stats.get('threat_levels', {})
+        )
+        
+        # Generate with LLM
+        if self.llm_engine and self.llm_engine.is_loaded:
+            logger.info("🤖 Calling LLM for executive summary...")
+            result = self.llm_engine.generate(
+                system_prompt=SYSTEM_PROMPT_SUMMARY,
+                user_prompt=user_prompt,
+                max_tokens=400  # Shorter for executive summary
+            )
+            logger.info(f"🤖 LLM response received ({result.get('tokens_used', 0)} tokens)")
+            content = self._clean_llm_output(result.get('text', ''), 'Executive Summary')
+        else:
+            logger.info("⚠️ Using fallback executive summary (LLM not loaded)")
+            content = self._fallback_executive_summary(data)
+        
+        return ReportSection(
+            name='executive_summary',
+            title='Executive Summary',
+            content=f"## Executive Summary\n\n{content}",
+            order=1
+        )
+    
+    def _generate_timeline(self, data: Dict[str, Any]) -> ReportSection:
+        """Generate incident timeline using LLM."""
+        events = data.get('events', [])
+        
+        if not events:
+            content = "*No events detected during the analysis period.*"
+        else:
+            # Prepare events for prompt
+            events_for_prompt = [
+                {
+                    'timestamp': e.get('timestamp').strftime('%H:%M:%S') if e.get('timestamp') else 'Unknown',
+                    'event_type': e.get('event_type', 'Unknown'),
+                    'caption': e.get('caption') or e.get('description', 'No description'),
+                    'threat_level': e.get('threat_level', 'low'),
+                    'keyframe_id': e.get('keyframe_id', 'N/A')
+                }
+                for e in events[:self.config.max_events_in_report]
+            ]
+            
+            user_prompt = format_timeline_prompt(events_for_prompt)
+            
+            if self.llm_engine and self.llm_engine.is_loaded:
+                logger.info("🤖 Calling LLM for timeline...")
+                result = self.llm_engine.generate(
+                    system_prompt=SYSTEM_PROMPT_TIMELINE,
+                    user_prompt=user_prompt,
+                    max_tokens=600  # Longer for timeline
+                )
+                logger.info(f"🤖 LLM response received ({result.get('tokens_used', 0)} tokens)")
+                content = self._clean_llm_output(result.get('text', ''), 'Incident Timeline')
+            else:
+                logger.info("⚠️ Using fallback timeline (LLM not loaded)")
+                content = self._fallback_timeline(events_for_prompt)
+        
+        # Collect images for this section
+        images = [
+            {'keyframe_id': e.get('keyframe_id'), 'timestamp': e.get('timestamp')}
+            for e in events if e.get('keyframe_id')
+        ][:self.config.max_images_per_event * 5]
+        
+        return ReportSection(
+            name='timeline',
+            title='Incident Timeline',
+            content=f"## Incident Timeline\n\n{content}",
+            images=images,
+            order=2
+        )
+    
+    def _generate_evidence_section(self, data: Dict[str, Any]) -> ReportSection:
+        """Generate evidence catalog section with actual images from MinIO.
+        
+        Instead of using LLM-generated placeholders, this method:
+        - Shows actual keyframe images fetched from MinIO when available
+        - Shows actual face crop images from MinIO when available
+        - Displays bold 'not found' messages when no data exists
+        """
+        keyframes = data.get('keyframes', [])
+        faces = data.get('faces', [])
+        
+        content_parts = []
+        images = []
+        
+        # --- Keyframes subsection ---
+        if keyframes:
+            content_parts.append("### Keyframes\n")
+            for i, kf in enumerate(keyframes[:20], 1):
+                ts = kf.get('timestamp')
+                ts_str = ts.strftime('%H:%M:%S') if hasattr(ts, 'strftime') else str(ts or 'Unknown')
+                caption = kf.get('caption') or 'No caption available'
+                url = kf.get('image_url')
+                
+                content_parts.append(f"**Keyframe {i}** — {ts_str}")
+                content_parts.append(f"{caption}\n")
+                if url:
+                    content_parts.append(f"![Keyframe {i}]({url})\n")
+                
+                # Add to gallery images
+                images.append({
+                    'type': 'keyframe',
+                    'id': kf.get('keyframe_id'),
+                    'path': kf.get('image_path'),
+                    'url': url,
+                    'caption': caption
+                })
+        else:
+            content_parts.append("**No keyframes were captured for this video.**\n")
+        
+        # --- Face Detections subsection ---
+        if faces:
+            content_parts.append("\n### Face Detections\n")
+            for i, f in enumerate(faces[:10], 1):
+                ts = f.get('timestamp')
+                ts_str = ts.strftime('%H:%M:%S') if hasattr(ts, 'strftime') else str(ts or 'Unknown')
+                conf = f.get('confidence', 0)
+                person_id = f.get('person_id') or 'Unidentified'
+                url = f.get('crop_url')
+                
+                content_parts.append(f"**Face {i}** — Detected at {ts_str} (confidence: {conf:.2f}, ID: {person_id})")
+                if url:
+                    content_parts.append(f"\n![Face {i}]({url})\n")
+                else:
+                    content_parts.append("")
+                
+                # Add to gallery images
+                if self.config.include_face_crops:
+                    images.append({
+                        'type': 'face',
+                        'id': f.get('face_id'),
+                        'path': f.get('crop_path'),
+                        'url': url,
+                        'caption': f"Face {i} at {ts_str} (conf: {conf:.2f})"
+                    })
+        else:
+            content_parts.append("\n**No faces were detected in this video.**\n")
+        
+        evidence_content = "\n".join(content_parts)
+        
+        logger.info(f"📸 Evidence section built: {len(keyframes)} keyframes, {len(faces)} faces")
+        
+        return ReportSection(
+            name='evidence',
+            title='Evidence Catalog',
+            content=f"## Evidence Catalog\n\n{evidence_content}",
+            images=images,
+            order=3
+        )
+    
+    def _generate_observations(self, data: Dict[str, Any]) -> ReportSection:
+        """Generate observations section using LLM."""
+        events = data.get('events', [])
+        faces = data.get('faces', [])
+        patterns = data.get('patterns', {})
+        
+        if self.llm_engine and self.llm_engine.is_loaded:
+            logger.info("🤖 Calling LLM for observations...")
+            # Format patterns for prompt
+            time_clusters = patterns.get('time_clusters', [])
+            cluster_text = "No significant time clusters identified"
+            if time_clusters:
+                cluster_text = "\n".join([
+                    f"- Cluster: {c.get('start')} to {c.get('end')} ({c.get('event_count')} events)"
+                    for c in time_clusters
+                ])
+            
+            escalation_text = patterns.get('escalation', 'No clear escalation pattern')
+            
+            user_prompt = format_observations_prompt(
+                events=events,
+                faces_detected=faces,
+                patterns={
+                    'time_clusters': cluster_text,
+                    'escalation': escalation_text
+                }
+            )
+            
+            result = self.llm_engine.generate(
+                system_prompt=SYSTEM_PROMPT_OBSERVATIONS,
+                user_prompt=user_prompt,
+                max_tokens=400  # Shorter for observations
+            )
+            logger.info(f"🤖 LLM response received ({result.get('tokens_used', 0)} tokens)")
+            content = self._clean_llm_output(result.get('text', ''), 'Observations')
+        else:
+            logger.info("⚠️ Using fallback observations (LLM not loaded)")
+            content = self._fallback_observations(data)
+        
+        return ReportSection(
+            name='observations',
+            title='Observations',
+            content=f"## Observations\n\n{content}",
+            order=4
+        )
+    
+    def _generate_conclusion(self, data: Dict[str, Any]) -> ReportSection:
+        """Generate conclusion section using LLM."""
+        stats = data.get('statistics', {})
+        threat_levels = stats.get('threat_levels', {})
+        
+        # Compile key findings
+        key_findings = []
+        
+        if threat_levels.get('critical', 0) > 0:
+            key_findings.append(f"{threat_levels['critical']} critical threat event(s) detected")
+        
+        if threat_levels.get('high', 0) > 0:
+            key_findings.append(f"{threat_levels['high']} high threat event(s) detected")
+        
+        patterns = data.get('patterns', {})
+        if patterns.get('repeated_faces'):
+            key_findings.append(f"{len(patterns['repeated_faces'])} individual(s) appeared multiple times")
+        
+        if patterns.get('escalation') == 'increasing':
+            key_findings.append("Escalating threat pattern observed")
+        
+        if not key_findings:
+            key_findings.append("No significant security concerns identified")
+        
+        if self.llm_engine and self.llm_engine.is_loaded:
+            logger.info("🤖 Calling LLM for conclusion...")
+            user_prompt = format_conclusion_prompt(
+                total_events=stats.get('total_events', 0),
+                critical_events=threat_levels.get('critical', 0),
+                high_events=threat_levels.get('high', 0),
+                duration_minutes=stats.get('duration_minutes', 0),
+                key_findings=key_findings
+            )
+            
+            result = self.llm_engine.generate(
+                system_prompt=SYSTEM_PROMPT_REPORT,
+                user_prompt=user_prompt,
+                max_tokens=300  # Shorter for conclusion
+            )
+            logger.info(f"🤖 LLM response received ({result.get('tokens_used', 0)} tokens)")
+            content = self._clean_llm_output(result.get('text', ''), 'Conclusion')
+        else:
+            logger.info("⚠️ Using fallback conclusion (LLM not loaded)")
+            content = self._fallback_conclusion(stats, key_findings)
+        
+        return ReportSection(
+            name='conclusion',
+            title='Conclusion',
+            content=f"## Conclusion\n\n{content}",
+            order=5
+        )
+    
+    # =========================================================================
+    # FALLBACK METHODS (used when LLM is not available)
+    # =========================================================================
+    
+    def _fallback_executive_summary(self, data: Dict[str, Any]) -> str:
+        """Generate basic executive summary without LLM."""
+        stats = data.get('statistics', {})
+        metadata = data.get('metadata', {})
+        
+        return f"""This report summarizes the automated security analysis of video footage 
+from camera {metadata.get('camera_id', 'Unknown')} located at {metadata.get('location', 'unspecified location')}.
+
+During the analysis period, the system detected a total of {stats.get('total_events', 0)} events, 
+including {stats.get('threat_levels', {}).get('critical', 0)} critical and 
+{stats.get('threat_levels', {}).get('high', 0)} high-priority incidents.
+
+{stats.get('total_faces', 0)} face detections were recorded for potential identification purposes."""
+    
+    def _fallback_timeline(self, events: List[Dict[str, Any]]) -> str:
+        """Generate basic timeline without LLM."""
+        if not events:
+            return "*No events detected.*"
+        
+        lines = ["| Time | Event Type | Description | Threat Level |",
+                "| ---- | ---------- | ----------- | ------------ |"]
+        
+        for e in events:
+            lines.append(
+                f"| {e.get('timestamp', 'N/A')} | {e.get('event_type', 'Unknown')} | "
+                f"{e.get('caption', 'No description')[:50]} | {e.get('threat_level', 'low')} |"
+            )
+        
+        return "\n".join(lines)
+    
+    def _fallback_evidence_section(
+        self,
+        keyframes: List[Dict[str, Any]],
+        faces: List[Dict[str, Any]]
+    ) -> str:
+        """Generate basic evidence section without LLM."""
+        if not keyframes and not faces:
+            return "**No keyframes were captured for this video.**\n\n**No faces were detected in this video.**"
+        
+        content = ""
+        if keyframes:
+            content += "### Keyframes\n\n"
+            for kf in keyframes:
+                content += f"- **{kf.get('keyframe_id')}** ({kf.get('timestamp')}): {kf.get('caption', 'No caption')}\n\n"
+        else:
+            content += "**No keyframes were captured for this video.**\n\n"
+        
+        if faces:
+            content += "### Face Detections\n\n"
+            for f in faces:
+                content += f"- **{f.get('face_id')}** at {f.get('timestamp')} (confidence: {f.get('confidence')})\n\n"
+        else:
+            content += "**No faces were detected in this video.**\n\n"
+        
+        return content
+    
+    def _fallback_observations(self, data: Dict[str, Any]) -> str:
+        """Generate basic observations without LLM."""
+        patterns = data.get('patterns', {})
+        
+        content = "Based on the analyzed data:\n\n"
+        
+        if patterns.get('repeated_faces'):
+            content += f"- {len(patterns['repeated_faces'])} individual(s) appeared multiple times during the analysis period\n"
+        
+        if patterns.get('time_clusters'):
+            content += f"- {len(patterns['time_clusters'])} time period(s) showed concentrated activity\n"
+        
+        if patterns.get('escalation'):
+            content += f"- Threat level trend: {patterns['escalation']}\n"
+        
+        if content == "Based on the analyzed data:\n\n":
+            content += "- No significant patterns identified in the analyzed footage\n"
+        
+        return content
+    
+    def _fallback_conclusion(
+        self,
+        stats: Dict[str, Any],
+        key_findings: List[str]
+    ) -> str:
+        """Generate basic conclusion without LLM."""
+        total = stats.get('total_events', 0)
+        critical = stats.get('threat_levels', {}).get('critical', 0)
+        high = stats.get('threat_levels', {}).get('high', 0)
+        
+        content = f"""The automated analysis detected {total} events during the review period. """
+        
+        if critical > 0 or high > 0:
+            content += f"Of these, {critical + high} were classified as high-priority incidents requiring attention. "
+        else:
+            content += "No high-priority security incidents were detected. "
+        
+        content += "\n\nKey findings:\n"
+        for finding in key_findings:
+            content += f"- {finding}\n"
+        
+        content += "\n*This report was generated automatically by DetectifAI.*"
+        
+        return content
+    
+    def export_html(self, report: GeneratedReport, output_path: Optional[str] = None) -> str:
+        """
+        Export report to HTML format.
+        
+        Args:
+            report: Generated report object
+            output_path: Output file path (auto-generated if None)
+            
+        Returns:
+            Path to generated HTML file
+        """
+        from .html_renderer import HTMLRenderer
+        
+        renderer = HTMLRenderer(self.config)
+        return renderer.render(report, output_path)
+    
+    def export_pdf(self, report: GeneratedReport, output_path: Optional[str] = None) -> str:
+        """
+        Export report to PDF format.
+        
+        Args:
+            report: Generated report object
+            output_path: Output file path (auto-generated if None)
+            
+        Returns:
+            Path to generated PDF file
+        """
+        from .pdf_exporter import PDFExporter
+        
+        exporter = PDFExporter(self.config)
+        return exporter.export(report, output_path)
diff --git a/report_generation/setup.py b/report_generation/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..f026f92b6ffff0e7fc7cefd248faebab30e76acf
--- /dev/null
+++ b/report_generation/setup.py
@@ -0,0 +1,245 @@
+"""
+Report Generation Module Setup Script
+
+Run this script to:
+1. Check/install dependencies
+2. Download the LLM model
+3. Create necessary directories
+4. Verify the installation
+"""
+
+import os
+import sys
+import subprocess
+
+
+def check_python_version():
+    """Check Python version."""
+    print("🔍 Checking Python version...")
+    version = sys.version_info
+    if version.major < 3 or (version.major == 3 and version.minor < 9):
+        print(f"❌ Python 3.9+ required. Found: {version.major}.{version.minor}")
+        return False
+    print(f"✅ Python {version.major}.{version.minor}.{version.micro}")
+    return True
+
+
+def install_dependencies():
+    """Install required Python packages."""
+    print("\n📦 Installing Python dependencies...")
+    
+    packages = [
+        "llama-cpp-python",
+        "huggingface_hub",
+        "jinja2",
+        "markdown",
+        "Pillow",
+        "reportlab",
+    ]
+    
+    # Optional packages (may fail on some systems)
+    optional_packages = [
+        "weasyprint",  # Requires GTK3 on Windows
+    ]
+    
+    # Install required packages
+    for package in packages:
+        print(f"  Installing {package}...")
+        try:
+            subprocess.check_call(
+                [sys.executable, "-m", "pip", "install", package, "-q"],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL
+            )
+            print(f"  ✅ {package}")
+        except subprocess.CalledProcessError:
+            print(f"  ❌ Failed to install {package}")
+            return False
+    
+    # Try optional packages
+    for package in optional_packages:
+        print(f"  Installing {package} (optional)...")
+        try:
+            subprocess.check_call(
+                [sys.executable, "-m", "pip", "install", package, "-q"],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL
+            )
+            print(f"  ✅ {package}")
+        except subprocess.CalledProcessError:
+            print(f"  ⚠️ {package} not installed (PDF export may not work)")
+    
+    return True
+
+
+def download_model():
+    """Download the LLM model."""
+    print("\n🤖 Downloading LLM model...")
+    
+    try:
+        from huggingface_hub import hf_hub_download
+        
+        model_dir = os.path.join(os.path.dirname(__file__), 'models')
+        os.makedirs(model_dir, exist_ok=True)
+        
+        # Download Qwen2.5-3B-Instruct Q4 quantized
+        print("  Downloading Qwen2.5-3B-Instruct (Q4_K_M, ~2GB)...")
+        print("  This may take several minutes depending on your connection...")
+        
+        downloaded_path = hf_hub_download(
+            repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
+            filename="qwen2.5-3b-instruct-q4_k_m.gguf",
+            local_dir=model_dir,
+            local_dir_use_symlinks=False
+        )
+        
+        print(f"  ✅ Model downloaded to: {downloaded_path}")
+        return True
+        
+    except Exception as e:
+        print(f"  ❌ Failed to download model: {e}")
+        print("  You can manually download from:")
+        print("  https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GGUF")
+        return False
+
+
+def create_directories():
+    """Create necessary directories."""
+    print("\n📁 Creating directories...")
+    
+    base_dir = os.path.dirname(__file__)
+    directories = [
+        os.path.join(base_dir, 'models'),
+        os.path.join(base_dir, 'templates'),
+        os.path.join(base_dir, 'prompts'),
+        os.path.join(os.path.dirname(base_dir), 'video_processing_outputs', 'reports'),
+    ]
+    
+    for directory in directories:
+        os.makedirs(directory, exist_ok=True)
+        print(f"  ✅ {directory}")
+    
+    return True
+
+
+def verify_installation():
+    """Verify the installation."""
+    print("\n🔬 Verifying installation...")
+    
+    # Check imports
+    modules = [
+        ('llama_cpp', 'llama-cpp-python'),
+        ('huggingface_hub', 'huggingface_hub'),
+        ('jinja2', 'Jinja2'),
+        ('markdown', 'markdown'),
+        ('PIL', 'Pillow'),
+        ('reportlab', 'reportlab'),
+    ]
+    
+    all_ok = True
+    for module, package in modules:
+        try:
+            __import__(module)
+            print(f"  ✅ {package}")
+        except ImportError:
+            print(f"  ❌ {package} not found")
+            all_ok = False
+    
+    # Check weasyprint (optional)
+    try:
+        import weasyprint
+        print("  ✅ weasyprint (PDF export available)")
+    except (ImportError, OSError):
+        print("  ⚠️ weasyprint not available (PDF export disabled, use HTML)")
+    
+    # Check model file
+    model_path = os.path.join(
+        os.path.dirname(__file__), 
+        'models', 
+        'qwen2.5-3b-instruct-q4_k_m.gguf'
+    )
+    if os.path.exists(model_path):
+        size_mb = os.path.getsize(model_path) / (1024 * 1024)
+        print(f"  ✅ LLM model found ({size_mb:.0f} MB)")
+    else:
+        print("  ⚠️ LLM model not found - will download on first use")
+    
+    return all_ok
+
+
+def test_generation():
+    """Test the report generation system."""
+    print("\n🧪 Testing report generation...")
+    
+    try:
+        from report_generation import ReportGenerator
+        
+        generator = ReportGenerator()
+        print("  ✅ ReportGenerator initialized")
+        
+        # Test without actual data
+        print("  ✅ Module imports successful")
+        print("\n  Note: Full test requires a processed video in the database")
+        
+        return True
+        
+    except Exception as e:
+        print(f"  ❌ Test failed: {e}")
+        return False
+
+
+def main():
+    """Run the setup process."""
+    print("=" * 60)
+    print("🛡️ DetectifAI Report Generation Module Setup")
+    print("=" * 60)
+    
+    steps = [
+        ("Check Python version", check_python_version),
+        ("Create directories", create_directories),
+        ("Install dependencies", install_dependencies),
+        ("Verify installation", verify_installation),
+    ]
+    
+    all_passed = True
+    for step_name, step_func in steps:
+        if not step_func():
+            all_passed = False
+            print(f"\n⚠️ Step '{step_name}' had issues. Continuing...")
+    
+    # Ask about model download
+    print("\n" + "=" * 60)
+    print("📥 Model Download")
+    print("=" * 60)
+    print("\nThe LLM model (~2GB) is required for AI-generated report content.")
+    print("It will be automatically downloaded on first use, or you can download now.")
+    
+    response = input("\nDownload model now? [y/N]: ").strip().lower()
+    if response == 'y':
+        download_model()
+    else:
+        print("Skipping model download. Will download on first use.")
+    
+    # Final summary
+    print("\n" + "=" * 60)
+    print("📋 Setup Summary")
+    print("=" * 60)
+    
+    if all_passed:
+        print("\n✅ Setup completed successfully!")
+        print("\nNext steps:")
+        print("1. Process a video through DetectifAI pipeline")
+        print("2. Generate a report:")
+        print("   from report_generation import ReportGenerator")
+        print("   generator = ReportGenerator()")
+        print("   report = generator.generate_report('video_id_here')")
+        print("   generator.export_html(report)")
+    else:
+        print("\n⚠️ Setup completed with some warnings.")
+        print("Check the messages above for details.")
+    
+    print("\n" + "=" * 60)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/report_generation/templates/report_base.html b/report_generation/templates/report_base.html
new file mode 100644
index 0000000000000000000000000000000000000000..15da93c37eec0da803fc815a9a0f9614e815009a
--- /dev/null
+++ b/report_generation/templates/report_base.html
@@ -0,0 +1,79 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{{ report.title }}</title>
+    <style>
+        {% include 'report_styles.css' %}
+    </style>
+</head>
+<body>
+    <div class="report-container">
+        <!-- Report Header -->
+        <header class="report-header">
+            <div class="logo">
+                <h1>🛡️ DetectifAI</h1>
+                <p class="subtitle">AI-Powered Surveillance System</p>
+            </div>
+            <div class="report-meta">
+                <span class="classification {{ report.metadata.classification|default('CONFIDENTIAL')|lower }}">
+                    {{ report.metadata.classification|default('CONFIDENTIAL') }}
+                </span>
+            </div>
+        </header>
+
+        <!-- Report Content -->
+        <main class="report-content">
+            {% for section in report.sections|sort(attribute='order') %}
+            <section class="report-section" id="section-{{ section.name }}">
+                {{ section.content|markdown|safe }}
+                
+                {% if section.images %}
+                <div class="evidence-gallery">
+                    <h3 class="gallery-title">Evidence Images</h3>
+                    <div class="gallery-grid">
+                        {% for img in section.images[:max_images] %}
+                        <figure class="evidence-item">
+                            {% if img.url %}
+                            <img src="{{ img.url }}" 
+                                 alt="{{ img.caption|default('Evidence image') }}"
+                                 class="evidence-image"
+                                 loading="lazy"
+                                 onerror="this.onerror=null; this.src='data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%22200%22 height=%22150%22%3E%3Crect fill=%22%23edf2f7%22 width=%22200%22 height=%22150%22/%3E%3Ctext x=%2250%25%22 y=%2250%25%22 text-anchor=%22middle%22 fill=%22%23718096%22%3EImage unavailable%3C/text%3E%3C/svg%3E';">
+                            {% elif img.embedded_data %}
+                            <img src="data:image/jpeg;base64,{{ img.embedded_data }}" 
+                                 alt="{{ img.caption|default('Evidence image') }}"
+                                 class="evidence-image">
+                            {% elif img.path %}
+                            <img src="{{ img.path }}" 
+                                 alt="{{ img.caption|default('Evidence image') }}"
+                                 class="evidence-image"
+                                 onerror="this.onerror=null; this.src='data:image/svg+xml,%3Csvg xmlns=%22http://www.w3.org/2000/svg%22 width=%22200%22 height=%22150%22%3E%3Crect fill=%22%23edf2f7%22 width=%22200%22 height=%22150%22/%3E%3Ctext x=%2250%25%22 y=%2250%25%22 text-anchor=%22middle%22 fill=%22%23718096%22%3EImage unavailable%3C/text%3E%3C/svg%3E';">
+                            {% else %}
+                            <div class="image-placeholder">
+                                <span>📷 {{ img.caption|default('Image: ' + (img.id|default('N/A')|string)) }}</span>
+                            </div>
+                            {% endif %}
+                            <figcaption>{{ img.caption|default('Evidence ' + loop.index|string) }}</figcaption>
+                        </figure>
+                        {% endfor %}
+                    </div>
+                </div>
+                {% endif %}
+            </section>
+            {% endfor %}
+        </main>
+
+        <!-- Report Footer -->
+        <footer class="report-footer">
+            <p>Report ID: {{ report.report_id }}</p>
+            <p>Generated: {{ report.generated_at|format_datetime }}</p>
+            <p class="disclaimer">
+                This report was automatically generated by DetectifAI. 
+                All findings are based on AI analysis and should be verified by qualified personnel.
+            </p>
+        </footer>
+    </div>
+</body>
+</html>
\ No newline at end of file
diff --git a/report_generation/templates/report_styles.css b/report_generation/templates/report_styles.css
new file mode 100644
index 0000000000000000000000000000000000000000..96c2656028cc7df966b021cad3ab64f332b2e1b0
--- /dev/null
+++ b/report_generation/templates/report_styles.css
@@ -0,0 +1,412 @@
+/* DetectifAI Report Styles - Enhanced Readability */
+:root {
+    --primary-color: #1a365d;
+    --secondary-color: #2d3748;
+    --accent-color: #3182ce;
+    --danger-color: #e53e3e;
+    --warning-color: #dd6b20;
+    --success-color: #38a169;
+    --bg-color: #ffffff;
+    --text-color: #1a202c;
+    --border-color: #e2e8f0;
+    --section-bg: #f8fafc;
+}
+
+* {
+    box-sizing: border-box;
+    margin: 0;
+    padding: 0;
+}
+
+body {
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+    font-size: 12pt;
+    line-height: 1.8;
+    color: var(--text-color);
+    background-color: #f7fafc;
+}
+
+.report-container {
+    max-width: 210mm;
+    margin: 20px auto;
+    background: var(--bg-color);
+    box-shadow: 0 4px 30px rgba(0,0,0,0.15);
+}
+
+/* Header Styles */
+.report-header {
+    background: linear-gradient(135deg, var(--primary-color) 0%, #2c5282 100%);
+    color: white;
+    padding: 40px 50px;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    border-bottom: 4px solid var(--accent-color);
+}
+
+.report-header .logo h1 {
+    font-size: 32pt;
+    margin-bottom: 8px;
+    font-weight: 700;
+}
+
+.report-header .subtitle {
+    font-size: 12pt;
+    opacity: 0.95;
+    font-weight: 300;
+}
+
+.classification {
+    padding: 10px 20px;
+    border-radius: 6px;
+    font-weight: bold;
+    text-transform: uppercase;
+    font-size: 10pt;
+    letter-spacing: 1px;
+}
+
+.classification.confidential {
+    background: var(--danger-color);
+}
+
+.classification.internal {
+    background: var(--warning-color);
+}
+
+.classification.public {
+    background: var(--success-color);
+}
+
+/* Content Styles */
+.report-content {
+    padding: 50px;
+    background: white;
+}
+
+.report-section {
+    margin-bottom: 50px;
+    page-break-inside: avoid;
+}
+
+.section-content {
+    background: white;
+    padding: 30px;
+    border-radius: 8px;
+    border-left: 4px solid var(--accent-color);
+    color: var(--text-color);
+}
+
+.section-divider {
+    height: 2px;
+    background: linear-gradient(to right, transparent, var(--border-color), transparent);
+    margin: 40px 0;
+}
+
+h1, h2, h3, h4 {
+    color: var(--primary-color);
+    margin-top: 30px;
+    margin-bottom: 20px;
+    font-weight: 600;
+}
+
+h1 { 
+    font-size: 28pt; 
+    border-bottom: 3px solid var(--accent-color); 
+    padding-bottom: 15px;
+    margin-top: 0;
+}
+
+h2 { 
+    font-size: 20pt; 
+    border-bottom: 2px solid var(--border-color); 
+    padding-bottom: 12px;
+    margin-top: 0;
+}
+
+h3 { 
+    font-size: 16pt;
+    color: var(--secondary-color);
+}
+
+h4 { 
+    font-size: 13pt;
+    color: var(--secondary-color);
+}
+
+p {
+    margin-bottom: 16px;
+    color: var(--text-color);
+    line-height: 1.8;
+}
+
+strong {
+    color: var(--primary-color);
+    font-weight: 600;
+}
+
+/* Table Styles */
+table {
+    width: 100%;
+    border-collapse: collapse;
+    margin: 25px 0;
+    font-size: 11pt;
+    box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+    border-radius: 8px;
+    overflow: hidden;
+    background: white;
+}
+
+th, td {
+    border: 1px solid var(--border-color);
+    padding: 14px 16px;
+    text-align: left;
+    color: var(--text-color);
+}
+
+th {
+    background: var(--primary-color);
+    color: white;
+    font-weight: 600;
+    text-transform: uppercase;
+    font-size: 10pt;
+    letter-spacing: 0.5px;
+}
+
+tr:nth-child(even) {
+    background: #f8fafc;
+}
+
+tr:hover {
+    background: #edf2f7;
+    transition: background 0.2s ease;
+}
+
+/* Evidence Gallery */
+.evidence-gallery {
+    margin: 30px 0;
+    padding: 25px;
+    background: white;
+    border-radius: 8px;
+    border: 1px solid var(--border-color);
+}
+
+.gallery-title {
+    font-size: 14pt;
+    color: var(--primary-color);
+    margin-bottom: 20px;
+    padding-bottom: 10px;
+    border-bottom: 2px solid var(--accent-color);
+}
+
+.gallery-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));
+    gap: 25px;
+}
+
+.evidence-item {
+    border: 2px solid var(--border-color);
+    border-radius: 10px;
+    overflow: hidden;
+    background: white;
+    transition: transform 0.2s ease, box-shadow 0.2s ease;
+}
+
+.evidence-item:hover {
+    transform: translateY(-4px);
+    box-shadow: 0 6px 20px rgba(0,0,0,0.15);
+}
+
+.evidence-image {
+    width: 100%;
+    height: 180px;
+    object-fit: cover;
+    border-bottom: 2px solid var(--border-color);
+    background: #f8fafc;
+}
+
+.image-placeholder {
+    width: 100%;
+    height: 180px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: linear-gradient(135deg, #edf2f7 0%, #e2e8f0 100%);
+    color: #4a5568;
+    font-size: 11pt;
+    border-bottom: 2px solid var(--border-color);
+    font-weight: 500;
+}
+
+.evidence-item figcaption {
+    padding: 12px;
+    font-size: 10pt;
+    color: #2d3748;
+    background: white;
+    font-weight: 500;
+    min-height: 50px;
+}
+
+/* Lists */
+ul, ol {
+    margin: 16px 0;
+    padding-left: 30px;
+}
+
+li {
+    margin-bottom: 10px;
+    line-height: 1.6;
+    color: var(--text-color);
+}
+
+/* Code blocks */
+code {
+    background: #edf2f7;
+    padding: 3px 8px;
+    border-radius: 4px;
+    font-size: 10pt;
+    font-family: 'Courier New', monospace;
+    color: #2d3748;
+}
+
+pre {
+    background: #2d3748;
+    color: #e2e8f0;
+    padding: 20px;
+    border-radius: 8px;
+    overflow-x: auto;
+    font-size: 10pt;
+    margin: 20px 0;
+}
+
+pre code {
+    background: transparent;
+    color: #e2e8f0;
+    padding: 0;
+}
+
+/* Footer Styles */
+.report-footer {
+    background: var(--section-bg);
+    padding: 30px 50px;
+    border-top: 3px solid var(--accent-color);
+    font-size: 10pt;
+    color: #4a5568;
+}
+
+.footer-content {
+    max-width: 100%;
+}
+
+.footer-info {
+    margin-bottom: 20px;
+    padding-bottom: 20px;
+    border-bottom: 1px solid var(--border-color);
+}
+
+.footer-info p {
+    margin-bottom: 8px;
+    color: #2d3748;
+}
+
+.disclaimer {
+    margin-top: 20px;
+    font-style: italic;
+    padding: 15px;
+    background: #fff3cd;
+    border-left: 4px solid var(--warning-color);
+    border-radius: 4px;
+    color: #856404;
+}
+
+/* Print Styles */
+@media print {
+    body {
+        background: white;
+    }
+    
+    .report-container {
+        box-shadow: none;
+        max-width: 100%;
+        margin: 0;
+    }
+    
+    .report-section {
+        page-break-inside: avoid;
+    }
+    
+    h2, h3 {
+        page-break-after: avoid;
+    }
+    
+    .section-divider {
+        page-break-after: avoid;
+    }
+}
+
+/* Threat Level Indicators */
+.threat-critical { 
+    color: #c53030; 
+    font-weight: bold;
+    background: #fff5f5;
+    padding: 2px 6px;
+    border-radius: 3px;
+}
+
+.threat-high { 
+    color: #dd6b20; 
+    font-weight: bold;
+    background: #fffaf0;
+    padding: 2px 6px;
+    border-radius: 3px;
+}
+
+.threat-medium { 
+    color: #d69e2e;
+    background: #fffff0;
+    padding: 2px 6px;
+    border-radius: 3px;
+}
+
+.threat-low { 
+    color: #38a169;
+    background: #f0fff4;
+    padding: 2px 6px;
+    border-radius: 3px;
+}
+
+/* Horizontal rules */
+hr {
+    border: none;
+    border-top: 2px solid var(--border-color);
+    margin: 30px 0;
+}
+
+/* Scrollable sections - ensure text is visible */
+.scrollable-section {
+    max-height: 400px;
+    overflow-y: auto;
+    padding: 20px;
+    background: white;
+    border: 1px solid var(--border-color);
+    border-radius: 8px;
+    color: var(--text-color);
+}
+
+.scrollable-section::-webkit-scrollbar {
+    width: 10px;
+}
+
+.scrollable-section::-webkit-scrollbar-track {
+    background: #f1f1f1;
+    border-radius: 10px;
+}
+
+.scrollable-section::-webkit-scrollbar-thumb {
+    background: var(--accent-color);
+    border-radius: 10px;
+}
+
+.scrollable-section::-webkit-scrollbar-thumb:hover {
+    background: var(--primary-color);
+}
\ No newline at end of file
diff --git a/requirements-docker.txt b/requirements-docker.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81af226bd44072ce516b10531a417add2156d6b6
--- /dev/null
+++ b/requirements-docker.txt
@@ -0,0 +1,35 @@
+# ---------- DetectifAI Backend — Docker requirements ----------
+# torch + torchvision are installed separately (CPU-only) in Dockerfile
+# Do NOT add them here to avoid overwriting with the CUDA build.
+
+Flask==2.3.3
+Flask-CORS==4.0.0
+Werkzeug==3.0.0
+PyJWT==2.8.0
+pymongo>=4.6.3,<5.0
+python-multipart==0.0.6
+minio==7.1.11
+opencv-python-headless==4.8.0.74
+python-dotenv==1.0.0
+stripe==7.10.0
+gunicorn==21.2.0
+faiss-cpu
+numpy<2
+Pillow
+scikit-learn
+sentence-transformers
+bcrypt
+ultralytics
+facenet-pytorch
+matplotlib
+tqdm
+scikit-image
+imagehash
+
+# Report Generation Module Dependencies
+llama-cpp-python
+weasyprint
+reportlab
+markdown
+huggingface_hub
+jinja2
diff --git a/start_detectifai.py b/start_detectifai.py
new file mode 100644
index 0000000000000000000000000000000000000000..a96cc0d0ef4e56d0a7d8f6866b5ea4188561952a
--- /dev/null
+++ b/start_detectifai.py
@@ -0,0 +1,200 @@
+"""
+DetectifAI API Startup Script
+
+Quick script to launch the DetectifAI API server with proper environment setup
+and preliminary checks for the surveillance system.
+"""
+
+import sys
+import os
+import subprocess
+import time
+import logging
+
+def check_python_environment():
+    """Check if required Python packages are available"""
+    print("🐍 Checking Python environment...")
+    
+    required_packages = [
+        'flask', 'flask_cors', 'opencv-python', 'numpy', 
+        'ultralytics', 'pillow', 'matplotlib'
+    ]
+    
+    missing_packages = []
+    
+    for package in required_packages:
+        try:
+            if package == 'opencv-python':
+                import cv2
+            elif package == 'flask_cors':
+                from flask_cors import CORS
+            elif package == 'ultralytics':
+                from ultralytics import YOLO
+            elif package == 'pillow':
+                from PIL import Image
+            else:
+                __import__(package)
+            print(f"  ✅ {package}")
+        except ImportError:
+            missing_packages.append(package)
+            print(f"  ❌ {package}")
+    
+    if missing_packages:
+        print(f"\n⚠️ Missing packages: {', '.join(missing_packages)}")
+        print("💡 Install with: pip install " + " ".join(missing_packages))
+        return False
+    
+    return True
+
+def check_model_files():
+    """Check if YOLO model files are available"""
+    print("\n🤖 Checking AI model files...")
+    
+    model_files = [
+        'models/fire_YOLO11.pt',
+        'models/weapon_YOLO11.pt'
+    ]
+    
+    missing_models = []
+    found_models = []
+    
+    for model_file in model_files:
+        if os.path.exists(model_file):
+            size_mb = os.path.getsize(model_file) / (1024 * 1024)
+            print(f"  ✅ {model_file} ({size_mb:.1f} MB)")
+            found_models.append(model_file)
+        else:
+            missing_models.append(model_file)
+            print(f"  ❌ {model_file}")
+    
+    if missing_models:
+        print(f"\n⚠️ Missing model files: {', '.join(missing_models)}")
+        if found_models:
+            print(f"✅ Found {len(found_models)} model(s): {', '.join([os.path.basename(f) for f in found_models])}")
+            print("💡 DetectifAI will work with available models")
+        else:
+            print("💡 DetectifAI will work with reduced functionality")
+        return len(found_models) > 0
+    
+    return True
+
+def check_test_videos():
+    """Check if test videos are available"""
+    print("\n🎬 Checking test videos...")
+    
+    test_videos = ['rob.mp4', 'fire.avi']
+    available_videos = []
+    
+    for video in test_videos:
+        if os.path.exists(video):
+            size_mb = os.path.getsize(video) / (1024 * 1024)
+            print(f"  ✅ {video} ({size_mb:.1f} MB)")
+            available_videos.append(video)
+        else:
+            print(f"  ❌ {video}")
+    
+    print(f"\n📊 {len(available_videos)}/{len(test_videos)} test videos available")
+    return available_videos
+
+def setup_directories():
+    """Create necessary directories"""
+    print("\n📁 Setting up directories...")
+    
+    directories = [
+        'uploads',
+        'video_processing_outputs',
+        'logs',
+        'core',
+        'docs',
+        'models'
+    ]
+    
+    for directory in directories:
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+            print(f"  ✅ Created {directory}/")
+        else:
+            print(f"  ✅ {directory}/ exists")
+
+def start_detectifai_api():
+    """Start the DetectifAI API server"""
+    print("\n🚀 Starting DetectifAI API server...")
+    print("=" * 50)
+    
+    try:
+        # Change to backend directory if needed
+        if not os.path.exists('app.py'):
+            print("❌ app.py not found in current directory")
+            print("💡 Make sure you're in the backend directory")
+            return False
+        
+        # Start the API server
+        print("🌐 API will be available at: http://localhost:5000")
+        print("📋 API endpoints:")
+        print("  • Health: GET /api/health")
+        print("  • Upload: POST /api/upload")
+        print("  • Status: GET /api/status/<video_id>")
+        print("  • Results: GET /api/results/<video_id>")
+        print("  • Demo: GET /api/detectifai/demo")
+        print("  • DetectifAI Events: GET /api/detectifai/events/<video_id>")
+        print("  • Keyframes: GET /api/keyframes/<video_id>")
+        print("")
+        print("🔧 To test the API, run: python test_detectifai_integration.py")
+        print("🌐 For frontend integration, ensure CORS is enabled")
+        print("")
+        print("Press Ctrl+C to stop the server")
+        print("=" * 50)
+        
+        # Run the API server
+        subprocess.run([sys.executable, 'app.py'])
+        
+    except KeyboardInterrupt:
+        print("\n\n🛑 DetectifAI API server stopped")
+        return True
+    except Exception as e:
+        print(f"\n❌ Error starting API server: {e}")
+        return False
+
+def main():
+    """Main startup function"""
+    print("🔧 DetectifAI API Startup")
+    print("========================")
+    
+    # System checks
+    env_ok = check_python_environment()
+    models_ok = check_model_files()
+    videos = check_test_videos()
+    
+    # Setup
+    setup_directories()
+    
+    # Summary
+    print("\n📋 System Status Summary:")
+    print(f"  🐍 Python Environment: {'✅' if env_ok else '⚠️'}")
+    print(f"  🤖 AI Models: {'✅' if models_ok else '⚠️'}")
+    print(f"  🎬 Test Videos: {len(videos)} available")
+    
+    if not env_ok:
+        print("\n❌ Cannot start API - missing required Python packages")
+        return False
+    
+    print(f"\n🎯 DetectifAI System Ready")
+    
+    if videos:
+        print(f"💡 Demo videos available: {', '.join(videos)}")
+    
+    # Ask user if they want to continue
+    try:
+        response = input("\n🚀 Start DetectifAI API server? (y/n): ").lower().strip()
+        if response in ['y', 'yes', '']:
+            return start_detectifai_api()
+        else:
+            print("👋 Startup cancelled")
+            return True
+    except KeyboardInterrupt:
+        print("\n👋 Startup cancelled")
+        return True
+
+if __name__ == "__main__":
+    success = main()
+    sys.exit(0 if success else 1)
\ No newline at end of file
diff --git a/stripe_service.py b/stripe_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..665514b84626e463718e1900fbf0183ce6257321
--- /dev/null
+++ b/stripe_service.py
@@ -0,0 +1,370 @@
+"""
+Stripe Service Module for DetectifAI Subscription Management
+
+This module provides a wrapper around Stripe API for:
+- Creating checkout sessions
+- Managing customer subscriptions
+- Handling subscription updates
+- Processing webhooks
+"""
+
+import stripe
+import os
+from typing import Dict, Optional, List
+from datetime import datetime
+from dotenv import load_dotenv
+import logging
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+# Configure Stripe
+stripe.api_key = os.getenv('STRIPE_SECRET_KEY')
+
+class StripeService:
+    """Service class for Stripe payment and subscription management"""
+    
+    def __init__(self):
+        self.stripe_secret_key = os.getenv('STRIPE_SECRET_KEY')
+        self.webhook_secret = os.getenv('STRIPE_WEBHOOK_SECRET')
+        self.frontend_url = os.getenv('FRONTEND_URL', 'https://detectif-ai-fyp.vercel.app')
+        
+        # Product IDs
+        self.basic_product_id = os.getenv('STRIPE_BASIC_PRODUCT_ID')
+        self.pro_product_id = os.getenv('STRIPE_PRO_PRODUCT_ID')
+        
+        # Price IDs
+        self.price_ids = {
+            'basic_monthly': os.getenv('STRIPE_BASIC_MONTHLY_PRICE_ID'),
+            'basic_yearly': os.getenv('STRIPE_BASIC_YEARLY_PRICE_ID'),
+            'pro_monthly': os.getenv('STRIPE_PRO_MONTHLY_PRICE_ID'),
+            'pro_yearly': os.getenv('STRIPE_PRO_YEARLY_PRICE_ID'),
+        }
+        
+        # Validate configuration
+        if not self.stripe_secret_key:
+            logger.warning("⚠️ STRIPE_SECRET_KEY not set — Stripe payments DISABLED (demo mode)")
+            self.enabled = False
+        else:
+            self.enabled = True
+            logger.info("✅ Stripe service initialized successfully")
+    
+    def create_checkout_session(
+        self,
+        user_id: str,
+        user_email: str,
+        price_id: str,
+        plan_name: str,
+        billing_period: str
+    ) -> Dict:
+        """
+        Create a Stripe Checkout session for subscription
+        
+        Args:
+            user_id: DetectifAI user ID
+            user_email: User's email address
+            price_id: Stripe price ID
+            plan_name: Plan name (Basic or Pro)
+            billing_period: Billing period (monthly or yearly)
+        
+        Returns:
+            Dictionary with checkout session details
+        """
+        try:
+            # Create checkout session
+            checkout_session = stripe.checkout.Session.create(
+                customer_email=user_email,
+                payment_method_types=['card'],
+                line_items=[{
+                    'price': price_id,
+                    'quantity': 1,
+                }],
+                mode='subscription',
+                success_url=f'{self.frontend_url}/dashboard/subscription?success=true&session_id={{CHECKOUT_SESSION_ID}}',
+                cancel_url=f'{self.frontend_url}/pricing?canceled=true',
+                metadata={
+                    'user_id': user_id,
+                    'plan_name': plan_name,
+                    'billing_period': billing_period
+                },
+                subscription_data={
+                    'metadata': {
+                        'user_id': user_id,
+                        'plan_name': plan_name,
+                        'billing_period': billing_period
+                    }
+                }
+            )
+            
+            logger.info(f"✅ Created checkout session for user {user_id}: {checkout_session.id}")
+            
+            return {
+                'session_id': checkout_session.id,
+                'url': checkout_session.url,
+                'status': 'created'
+            }
+            
+        except stripe.error.StripeError as e:
+            logger.error(f"❌ Stripe error creating checkout session: {str(e)}")
+            raise Exception(f"Failed to create checkout session: {str(e)}")
+        except Exception as e:
+            logger.error(f"❌ Error creating checkout session: {str(e)}")
+            raise
+    
+    def create_customer_portal_session(
+        self,
+        customer_id: str,
+        return_url: Optional[str] = None
+    ) -> Dict:
+        """
+        Create a customer portal session for subscription management
+        
+        Args:
+            customer_id: Stripe customer ID
+            return_url: URL to return to after portal session
+        
+        Returns:
+            Dictionary with portal session URL
+        """
+        try:
+            if not return_url:
+                return_url = f'{self.frontend_url}/dashboard/subscription'
+            
+            portal_session = stripe.billing_portal.Session.create(
+                customer=customer_id,
+                return_url=return_url,
+            )
+            
+            logger.info(f"✅ Created portal session for customer {customer_id}")
+            
+            return {
+                'url': portal_session.url,
+                'status': 'created'
+            }
+            
+        except stripe.error.StripeError as e:
+            logger.error(f"❌ Stripe error creating portal session: {str(e)}")
+            raise Exception(f"Failed to create portal session: {str(e)}")
+        except Exception as e:
+            logger.error(f"❌ Error creating portal session: {str(e)}")
+            raise
+    
+    def get_subscription(self, subscription_id: str) -> Optional[Dict]:
+        """
+        Retrieve subscription details from Stripe
+        
+        Args:
+            subscription_id: Stripe subscription ID
+        
+        Returns:
+            Subscription details or None if not found
+        """
+        try:
+            subscription = stripe.Subscription.retrieve(subscription_id)
+            
+            return {
+                'id': subscription.id,
+                'customer': subscription.customer,
+                'status': subscription.status,
+                'current_period_start': datetime.fromtimestamp(subscription.current_period_start),
+                'current_period_end': datetime.fromtimestamp(subscription.current_period_end),
+                'cancel_at_period_end': subscription.cancel_at_period_end,
+                'canceled_at': datetime.fromtimestamp(subscription.canceled_at) if subscription.canceled_at else None,
+                'plan': {
+                    'id': subscription.plan.id,
+                    'amount': subscription.plan.amount / 100,  # Convert from cents
+                    'currency': subscription.plan.currency,
+                    'interval': subscription.plan.interval,
+                }
+            }
+            
+        except stripe.error.StripeError as e:
+            logger.error(f"❌ Stripe error retrieving subscription: {str(e)}")
+            return None
+        except Exception as e:
+            logger.error(f"❌ Error retrieving subscription: {str(e)}")
+            return None
+    
+    def cancel_subscription(
+        self,
+        subscription_id: str,
+        at_period_end: bool = True
+    ) -> Dict:
+        """
+        Cancel a subscription
+        
+        Args:
+            subscription_id: Stripe subscription ID
+            at_period_end: If True, cancel at period end; if False, cancel immediately
+        
+        Returns:
+            Updated subscription details
+        """
+        try:
+            if at_period_end:
+                # Cancel at period end
+                subscription = stripe.Subscription.modify(
+                    subscription_id,
+                    cancel_at_period_end=True
+                )
+                logger.info(f"✅ Subscription {subscription_id} will cancel at period end")
+            else:
+                # Cancel immediately
+                subscription = stripe.Subscription.delete(subscription_id)
+                logger.info(f"✅ Subscription {subscription_id} canceled immediately")
+            
+            return {
+                'id': subscription.id,
+                'status': subscription.status,
+                'cancel_at_period_end': subscription.cancel_at_period_end,
+                'canceled_at': datetime.fromtimestamp(subscription.canceled_at) if subscription.canceled_at else None,
+            }
+            
+        except stripe.error.StripeError as e:
+            logger.error(f"❌ Stripe error canceling subscription: {str(e)}")
+            raise Exception(f"Failed to cancel subscription: {str(e)}")
+        except Exception as e:
+            logger.error(f"❌ Error canceling subscription: {str(e)}")
+            raise
+    
+    def update_subscription(
+        self,
+        subscription_id: str,
+        new_price_id: str
+    ) -> Dict:
+        """
+        Update subscription to a new plan/price
+        
+        Args:
+            subscription_id: Stripe subscription ID
+            new_price_id: New Stripe price ID
+        
+        Returns:
+            Updated subscription details
+        """
+        try:
+            # Get current subscription
+            subscription = stripe.Subscription.retrieve(subscription_id)
+            
+            # Update subscription with new price
+            updated_subscription = stripe.Subscription.modify(
+                subscription_id,
+                items=[{
+                    'id': subscription['items']['data'][0].id,
+                    'price': new_price_id,
+                }],
+                proration_behavior='create_prorations',  # Prorate the change
+            )
+            
+            logger.info(f"✅ Updated subscription {subscription_id} to price {new_price_id}")
+            
+            return {
+                'id': updated_subscription.id,
+                'status': updated_subscription.status,
+                'current_period_start': datetime.fromtimestamp(updated_subscription.current_period_start),
+                'current_period_end': datetime.fromtimestamp(updated_subscription.current_period_end),
+            }
+            
+        except stripe.error.StripeError as e:
+            logger.error(f"❌ Stripe error updating subscription: {str(e)}")
+            raise Exception(f"Failed to update subscription: {str(e)}")
+        except Exception as e:
+            logger.error(f"❌ Error updating subscription: {str(e)}")
+            raise
+    
+    def get_customer_subscriptions(self, customer_id: str) -> List[Dict]:
+        """
+        Get all subscriptions for a customer
+        
+        Args:
+            customer_id: Stripe customer ID
+        
+        Returns:
+            List of subscription details
+        """
+        try:
+            subscriptions = stripe.Subscription.list(
+                customer=customer_id,
+                limit=10
+            )
+            
+            return [{
+                'id': sub.id,
+                'status': sub.status,
+                'current_period_start': datetime.fromtimestamp(sub.current_period_start),
+                'current_period_end': datetime.fromtimestamp(sub.current_period_end),
+                'cancel_at_period_end': sub.cancel_at_period_end,
+                'plan': {
+                    'id': sub.plan.id,
+                    'amount': sub.plan.amount / 100,
+                    'currency': sub.plan.currency,
+                    'interval': sub.plan.interval,
+                }
+            } for sub in subscriptions.data]
+            
+        except stripe.error.StripeError as e:
+            logger.error(f"❌ Stripe error retrieving customer subscriptions: {str(e)}")
+            return []
+        except Exception as e:
+            logger.error(f"❌ Error retrieving customer subscriptions: {str(e)}")
+            return []
+    
+    def construct_webhook_event(self, payload: bytes, signature: str):
+        """
+        Construct and verify webhook event from Stripe
+        
+        Args:
+            payload: Raw request body
+            signature: Stripe signature header
+        
+        Returns:
+            Verified Stripe event object
+        """
+        try:
+            event = stripe.Webhook.construct_event(
+                payload, signature, self.webhook_secret
+            )
+            logger.info(f"✅ Verified webhook event: {event['type']}")
+            return event
+            
+        except ValueError as e:
+            logger.error(f"❌ Invalid webhook payload: {str(e)}")
+            raise
+        except stripe.error.SignatureVerificationError as e:
+            logger.error(f"❌ Invalid webhook signature: {str(e)}")
+            raise
+    
+    def get_price_id(self, plan_name: str, billing_period: str) -> Optional[str]:
+        """
+        Get Stripe price ID for a plan and billing period
+        
+        Args:
+            plan_name: 'basic' or 'pro'
+            billing_period: 'monthly' or 'yearly'
+        
+        Returns:
+            Stripe price ID or None if not found
+        """
+        key = f"{plan_name.lower()}_{billing_period.lower()}"
+        return self.price_ids.get(key)
+
+
+# Singleton instance
+_stripe_service = None
+
+def get_stripe_service() -> StripeService:
+    """Get or create Stripe service singleton"""
+    global _stripe_service
+    if _stripe_service is None:
+        _stripe_service = StripeService()
+    return _stripe_service
+
+
+def is_stripe_enabled() -> bool:
+    """Check if Stripe is properly configured and enabled"""
+    try:
+        svc = get_stripe_service()
+        return svc.enabled
+    except Exception:
+        return False
diff --git a/subscription_middleware.py b/subscription_middleware.py
new file mode 100644
index 0000000000000000000000000000000000000000..737f42bab69b9cbeff05edf529480bbe01209fdb
--- /dev/null
+++ b/subscription_middleware.py
@@ -0,0 +1,617 @@
+"""
+Subscription Middleware - Feature Gating & Usage Limits
+Enforces subscription plan restrictions and tracks usage
+"""
+
+from functools import wraps
+from flask import request, jsonify
+from datetime import datetime
+from bson import ObjectId
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class SubscriptionMiddleware:
+    """Middleware for subscription-based feature gating"""
+    
+    def __init__(self, db):
+        """
+        Initialize middleware with database connection
+        
+        Args:
+            db: MongoDB database instance
+        """
+        self.db = db
+        self.user_subscriptions = db['user_subscriptions']
+        self.subscription_plans = db['subscription_plans']
+        self.subscription_usage = db['subscription_usage']
+        
+    
+    def get_user_subscription(self, user_id):
+        """
+        Get active subscription for a user
+        
+        Args:
+            user_id: User's unique identifier (could be database user_id or Google ID)
+            
+        Returns:
+            dict: Subscription document or None
+        """
+        try:
+            logger.info(f"🔍 get_user_subscription: Looking for subscription with user_id: {user_id}")
+            
+            # First, try direct lookup with the provided user_id
+            subscription = self.user_subscriptions.find_one({
+                'user_id': user_id,
+                'status': 'active',
+                'current_period_end': {'$gte': datetime.utcnow()}
+            })
+            
+            if subscription:
+                logger.info(f"✅ Found active subscription with direct user_id lookup: {subscription.get('subscription_id')}")
+                # Get plan details
+                plan = self.subscription_plans.find_one({
+                    'plan_id': subscription.get('plan_id')
+                })
+                
+                if plan:
+                    subscription['plan_details'] = plan
+                else:
+                    logger.warning(f"⚠️ Plan not found for plan_id: {subscription.get('plan_id')}")
+                return subscription
+            
+            # If not found, try to find the user in the users collection and get their actual user_id
+            # This handles the case where user_id might be a Google ID instead of database user_id
+            logger.info(f"⚠️ No subscription found with user_id {user_id}, trying to find user in database...")
+            try:
+                users_collection = self.db['users']
+                user_doc = users_collection.find_one({'user_id': user_id})
+                if not user_doc:
+                    # Try finding by Google ID if user_id looks like a Google ID (numeric string)
+                    if user_id and isinstance(user_id, str) and user_id.isdigit():
+                        logger.info(f"🔍 user_id looks like Google ID, searching by google_id...")
+                        user_doc = users_collection.find_one({'google_id': user_id})
+                
+                if user_doc:
+                    actual_user_id = user_doc.get('user_id')
+                    logger.info(f"✅ Found user in database, actual user_id: {actual_user_id}")
+                    
+                    # Now try to find subscription with the actual user_id
+                    subscription = self.user_subscriptions.find_one({
+                        'user_id': actual_user_id,
+                        'status': 'active',
+                        'current_period_end': {'$gte': datetime.utcnow()}
+                    })
+                    
+                    if subscription:
+                        logger.info(f"✅ Found active subscription with actual user_id: {subscription.get('subscription_id')}")
+                        # Get plan details
+                        plan = self.subscription_plans.find_one({
+                            'plan_id': subscription.get('plan_id')
+                        })
+                        
+                        if plan:
+                            subscription['plan_details'] = plan
+                        return subscription
+                    else:
+                        logger.warning(f"⚠️ No active subscription found for actual user_id: {actual_user_id}")
+                else:
+                    logger.warning(f"⚠️ User not found in database with user_id or google_id: {user_id}")
+            except Exception as e:
+                logger.error(f"❌ Error looking up user: {str(e)}")
+            
+            # Debug: List all subscriptions for this user_id
+            all_subscriptions = list(self.user_subscriptions.find({'user_id': user_id}))
+            logger.info(f"📊 Found {len(all_subscriptions)} total subscription(s) for user_id {user_id}")
+            for sub in all_subscriptions:
+                logger.info(f"  - Subscription ID: {sub.get('subscription_id')}, Status: {sub.get('status')}, Plan: {sub.get('plan_id')}")
+                    
+            return None
+            
+        except Exception as e:
+            logger.error(f"❌ Error getting user subscription: {str(e)}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return None
+    
+    
+    def get_user_plan_name(self, user_id):
+        """
+        Get user's plan name (basic, pro, or free)
+        
+        Args:
+            user_id: User's unique identifier
+            
+        Returns:
+            str: Plan name ('free', 'detectifai_basic', 'detectifai_pro')
+        """
+        subscription = self.get_user_subscription(user_id)
+        
+        if not subscription:
+            return 'free'
+        
+        return subscription.get('plan_id', 'free')
+    
+    
+    def check_feature_access(self, user_id, feature_name):
+        """
+        Check if user has access to a specific feature
+        
+        Args:
+            user_id: User's unique identifier
+            feature_name: Feature to check (e.g., 'behavior_analysis', 'nlp_search')
+            
+        Returns:
+            bool: True if user has access, False otherwise
+        """
+        subscription = self.get_user_subscription(user_id)
+        
+        # Free tier - no features
+        if not subscription:
+            return False
+        
+        plan_details = subscription.get('plan_details', {})
+        features = plan_details.get('features', [])
+        
+        return feature_name in features
+    
+    
+    def check_usage_limit(self, user_id, limit_type):
+        """
+        Check if user has exceeded their usage limit
+        
+        Args:
+            user_id: User's unique identifier
+            limit_type: Type of limit (e.g., 'video_processing', 'searches')
+            
+        Returns:
+            dict: {'allowed': bool, 'current': int, 'limit': int, 'remaining': int}
+        """
+        try:
+            subscription = self.get_user_subscription(user_id)
+            
+            # Free tier - no access
+            if not subscription:
+                return {
+                    'allowed': False,
+                    'current': 0,
+                    'limit': 0,
+                    'remaining': 0,
+                    'message': 'Subscription required'
+                }
+            
+            plan_details = subscription.get('plan_details', {})
+            limits = plan_details.get('limits', {})
+            limit_value = limits.get(limit_type, float('inf'))
+            
+            # Get current usage for this billing period
+            usage = self.subscription_usage.find_one({
+                'user_id': user_id,
+                'subscription_id': str(subscription['_id']),
+                'period_start': subscription.get('current_period_start'),
+                'period_end': subscription.get('current_period_end')
+            })
+            
+            current_usage = 0
+            if usage:
+                current_usage = usage.get('usage', {}).get(limit_type, 0)
+            
+            allowed = current_usage < limit_value
+            remaining = max(0, limit_value - current_usage)
+            
+            return {
+                'allowed': allowed,
+                'current': current_usage,
+                'limit': limit_value,
+                'remaining': remaining,
+                'message': 'OK' if allowed else f'{limit_type} limit exceeded'
+            }
+            
+        except Exception as e:
+            logger.error(f"Error checking usage limit: {str(e)}")
+            return {
+                'allowed': False,
+                'current': 0,
+                'limit': 0,
+                'remaining': 0,
+                'message': f'Error: {str(e)}'
+            }
+    
+    
+    def increment_usage(self, user_id, limit_type, amount=1):
+        """
+        Increment usage counter for a user
+        
+        Args:
+            user_id: User's unique identifier
+            limit_type: Type of usage to increment
+            amount: Amount to increment by (default: 1)
+            
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            subscription = self.get_user_subscription(user_id)
+            
+            if not subscription:
+                return False
+            
+            # Upsert usage document
+            self.subscription_usage.update_one(
+                {
+                    'user_id': user_id,
+                    'subscription_id': str(subscription['_id']),
+                    'period_start': subscription.get('current_period_start'),
+                    'period_end': subscription.get('current_period_end')
+                },
+                {
+                    '$inc': {f'usage.{limit_type}': amount},
+                    '$set': {
+                        'last_updated': datetime.utcnow()
+                    },
+                    '$setOnInsert': {
+                        'user_id': user_id,
+                        'subscription_id': str(subscription['_id']),
+                        'plan_id': subscription.get('plan_id'),
+                        'period_start': subscription.get('current_period_start'),
+                        'period_end': subscription.get('current_period_end'),
+                        'created_at': datetime.utcnow()
+                    }
+                },
+                upsert=True
+            )
+            
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error incrementing usage: {str(e)}")
+            return False
+    
+    
+    def get_usage_summary(self, user_id):
+        """
+        Get complete usage summary for a user
+        
+        Args:
+            user_id: User's unique identifier
+            
+        Returns:
+            dict: Usage statistics and limits
+        """
+        try:
+            subscription = self.get_user_subscription(user_id)
+            
+            if not subscription:
+                return {
+                    'has_subscription': False,
+                    'plan': 'free',
+                    'message': 'No active subscription'
+                }
+            
+            plan_details = subscription.get('plan_details', {})
+            limits = plan_details.get('limits', {})
+            
+            # Get current usage
+            usage_doc = self.subscription_usage.find_one({
+                'user_id': user_id,
+                'subscription_id': str(subscription['_id']),
+                'period_start': subscription.get('current_period_start'),
+                'period_end': subscription.get('current_period_end')
+            })
+            
+            current_usage = {}
+            if usage_doc:
+                current_usage = usage_doc.get('usage', {})
+            
+            # Build summary
+            summary = {
+                'has_subscription': True,
+                'plan': subscription.get('plan_id'),
+                'plan_name': plan_details.get('plan_name'),
+                'status': subscription.get('status'),
+                'period_start': subscription.get('current_period_start'),
+                'period_end': subscription.get('current_period_end'),
+                'usage': {},
+                'limits': limits
+            }
+            
+            # Calculate remaining for each limit
+            for limit_type, limit_value in limits.items():
+                used = current_usage.get(limit_type, 0)
+                summary['usage'][limit_type] = {
+                    'used': used,
+                    'limit': limit_value,
+                    'remaining': max(0, limit_value - used),
+                    'percentage': (used / limit_value * 100) if limit_value > 0 else 0
+                }
+            
+            return summary
+            
+        except Exception as e:
+            logger.error(f"Error getting usage summary: {str(e)}")
+            return {
+                'has_subscription': False,
+                'error': str(e)
+            }
+
+
+# Decorator for requiring subscription
+def require_subscription(plan_required=None):
+    """
+    Decorator to require active subscription for endpoint
+    
+    Args:
+        plan_required: Minimum plan required ('basic' or 'pro'), None for any plan
+        
+    Usage:
+        @app.route('/api/process-video')
+        @require_subscription('basic')
+        def process_video():
+            ...
+    """
+    def decorator(f):
+        @wraps(f)
+        def decorated_function(*args, **kwargs):
+            from flask import current_app
+            
+            # Get user_id from request (adjust based on your auth)
+            user_id = request.args.get('user_id')
+            
+            # Try getting from form data (for multipart/form-data)
+            if not user_id:
+                user_id = request.form.get('user_id')
+                
+            # Try getting from JSON if not found yet (silent=True prevents 415 error)
+            if not user_id:
+                try:
+                    json_data = request.get_json(silent=True)
+                    if json_data:
+                        user_id = json_data.get('user_id')
+                except Exception:
+                    pass
+            
+            if not user_id:
+                logger.warning("⚠️ require_subscription: user_id not found in request")
+                return jsonify({
+                    'success': False,
+                    'error': 'user_id required'
+                }), 401
+            
+            logger.info(f"🔍 require_subscription: Checking subscription for user_id: {user_id} (type: {type(user_id).__name__})")
+            
+            # Initialize middleware
+            db = current_app.config.get('DETECTIFAI_DB')
+            if db is None:
+                logger.error("❌ DETECTIFAI_DB not found in app config")
+                return jsonify({
+                    'success': False,
+                    'error': 'Database configuration error'
+                }), 500
+            
+            middleware = SubscriptionMiddleware(db)
+            
+            # If user_id looks like a Google ID (all numeric), try to find the actual database user_id
+            actual_user_id = user_id
+            if user_id and isinstance(user_id, str) and user_id.isdigit():
+                logger.info(f"🔍 user_id appears to be a Google ID, looking up actual user_id...")
+                try:
+                    users_collection = db['users']
+                    user_doc = users_collection.find_one({'google_id': user_id})
+                    if user_doc:
+                        actual_user_id = user_doc.get('user_id')
+                        logger.info(f"✅ Found user, actual user_id: {actual_user_id}")
+                    else:
+                        # Also try by user_id in case it's already the database ID
+                        user_doc = users_collection.find_one({'user_id': user_id})
+                        if user_doc:
+                            actual_user_id = user_id
+                            logger.info(f"✅ User found with provided user_id")
+                except Exception as e:
+                    logger.error(f"❌ Error looking up user: {str(e)}")
+            
+            # Check subscription with actual_user_id
+            subscription = middleware.get_user_subscription(actual_user_id)
+            
+            if not subscription:
+                logger.warning(f"⚠️ require_subscription: No active subscription found for user_id: {user_id}")
+                # Check if user exists at all
+                try:
+                    users_collection = db['users']
+                    user_exists = users_collection.find_one({'user_id': user_id})
+                    if not user_exists:
+                        # Try finding by email or other identifier
+                        logger.warning(f"⚠️ User with user_id {user_id} not found in users collection")
+                except Exception as e:
+                    logger.error(f"❌ Error checking user existence: {str(e)}")
+                
+                return jsonify({
+                    'success': False,
+                    'error': 'Active subscription required',
+                    'message': 'Please subscribe to a plan to access this feature',
+                    'upgrade_url': '/pricing',
+                    'user_id_received': user_id
+                }), 403
+            
+            logger.info(f"✅ require_subscription: Active subscription found for user_id: {user_id}, plan: {subscription.get('plan_id')}")
+            
+            # Check plan level if specified
+            if plan_required:
+                plan_id = subscription.get('plan_id', '')
+                
+                # Define plan hierarchy
+                plan_hierarchy = {
+                    'detectifai_basic': 1,
+                    'detectifai_pro': 2
+                }
+                
+                required_level = plan_hierarchy.get(f'detectifai_{plan_required}', 0)
+                user_level = plan_hierarchy.get(plan_id, 0)
+                
+                if user_level < required_level:
+                    return jsonify({
+                        'success': False,
+                        'error': f'{plan_required.title()} plan required',
+                        'message': f'This feature requires {plan_required.title()} or higher plan',
+                        'current_plan': plan_id,
+                        'required_plan': f'detectifai_{plan_required}',
+                        'upgrade_url': '/pricing'
+                    }), 403
+            
+            # Add subscription to request context
+            request.subscription = subscription
+            request.subscription_middleware = middleware
+            
+            return f(*args, **kwargs)
+        
+        return decorated_function
+    return decorator
+
+
+# Decorator for requiring specific feature
+def require_feature(feature_name):
+    """
+    Decorator to require specific feature access
+    
+    Args:
+        feature_name: Feature required (e.g., 'behavior_analysis', 'nlp_search')
+        
+    Usage:
+        @app.route('/api/behavior-analysis')
+        @require_feature('behavior_analysis')
+        def behavior_analysis():
+            ...
+    """
+    def decorator(f):
+        @wraps(f)
+        def decorated_function(*args, **kwargs):
+            from flask import current_app
+            
+            user_id = request.args.get('user_id')
+            
+            # Try getting from form data (for multipart/form-data)
+            if not user_id:
+                user_id = request.form.get('user_id')
+                
+            # Try getting from JSON if not found yet (silent=True prevents 415 error)
+            if not user_id:
+                try:
+                    json_data = request.get_json(silent=True)
+                    if json_data:
+                        user_id = json_data.get('user_id')
+                except Exception:
+                    pass
+            
+            if not user_id:
+                return jsonify({
+                    'success': False,
+                    'error': 'user_id required'
+                }), 401
+            
+            db = current_app.config.get('DETECTIFAI_DB')
+            middleware = SubscriptionMiddleware(db)
+            
+            # Check feature access
+            has_access = middleware.check_feature_access(user_id, feature_name)
+            
+            if not has_access:
+                subscription = middleware.get_user_subscription(user_id)
+                current_plan = subscription.get('plan_id') if subscription else 'free'
+                
+                return jsonify({
+                    'success': False,
+                    'error': f'Feature not available: {feature_name}',
+                    'message': f'Your {current_plan} plan does not include {feature_name}',
+                    'current_plan': current_plan,
+                    'upgrade_url': '/pricing'
+                }), 403
+            
+            request.subscription_middleware = middleware
+            
+            return f(*args, **kwargs)
+        
+        return decorated_function
+    return decorator
+
+
+# Decorator for checking usage limits
+def check_usage_limit(limit_type, auto_increment=True):
+    """
+    Decorator to check and optionally increment usage limits
+    
+    Args:
+        limit_type: Type of limit to check (e.g., 'video_processing')
+        auto_increment: Whether to automatically increment counter (default: True)
+        
+    Usage:
+        @app.route('/api/process-video')
+        @require_subscription()
+        @check_usage_limit('video_processing')
+        def process_video():
+            ...
+    """
+    def decorator(f):
+        @wraps(f)
+        def decorated_function(*args, **kwargs):
+            from flask import current_app
+            
+            user_id = request.args.get('user_id')
+            
+            # Try getting from form data (for multipart/form-data)
+            if not user_id:
+                user_id = request.form.get('user_id')
+                
+            # Try getting from JSON if not found yet (silent=True prevents 415 error)
+            if not user_id:
+                try:
+                    json_data = request.get_json(silent=True)
+                    if json_data:
+                        user_id = json_data.get('user_id')
+                except Exception:
+                    pass
+            
+            if not user_id:
+                return jsonify({
+                    'success': False,
+                    'error': 'user_id required'
+                }), 401
+            
+            db = current_app.config.get('DETECTIFAI_DB')
+            middleware = SubscriptionMiddleware(db)
+            
+            # Check limit
+            limit_check = middleware.check_usage_limit(user_id, limit_type)
+            
+            if not limit_check['allowed']:
+                return jsonify({
+                    'success': False,
+                    'error': 'Usage limit exceeded',
+                    'message': limit_check['message'],
+                    'usage': {
+                        'current': limit_check['current'],
+                        'limit': limit_check['limit'],
+                        'remaining': limit_check['remaining']
+                    },
+                    'upgrade_url': '/pricing'
+                }), 429  # Too Many Requests
+            
+            # Execute function
+            result = f(*args, **kwargs)
+            
+            # Auto-increment if successful and enabled
+            if auto_increment:
+                # Check if response indicates success
+                if isinstance(result, tuple):
+                    response_data, status_code = result[0], result[1]
+                else:
+                    response_data = result
+                    status_code = 200
+                
+                # Only increment on success
+                if status_code < 400:
+                    middleware.increment_usage(user_id, limit_type)
+            
+            return result
+        
+        return decorated_function
+    return decorator
diff --git a/subscription_routes.py b/subscription_routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e3d6b327e6f4150a7fce59afbf2cd5bdfa2842d
--- /dev/null
+++ b/subscription_routes.py
@@ -0,0 +1,827 @@
+"""
+Subscription Routes for DetectifAI Payment Management
+
+API endpoints for:
+- Creating checkout sessions
+- Managing subscriptions
+- Accessing customer portal
+- Retrieving subscription status
+- Handling webhooks
+"""
+
+from flask import Blueprint, request, jsonify
+from stripe_service import get_stripe_service, is_stripe_enabled
+from pymongo import MongoClient
+from datetime import datetime, timedelta
+from uuid import uuid4
+import os
+import logging
+import json
+from dotenv import load_dotenv
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+# Check if we're in demo mode (Stripe disabled)
+DEMO_MODE = os.getenv('DEMO_MODE', 'false').lower() in ('true', '1', 'yes')
+if DEMO_MODE:
+    logger.info("🎯 DEMO_MODE enabled — all users get Pro features, Stripe bypassed")
+
+# Create Blueprint
+subscription_bp = Blueprint('subscriptions', __name__, url_prefix='/api/subscriptions')
+
+# MongoDB connection
+MONGO_URI = os.getenv('MONGO_URI')
+client = MongoClient(MONGO_URI)
+db = client.get_default_database()
+
+# Collections
+subscription_plans = db.subscription_plans
+user_subscriptions = db.user_subscriptions
+subscription_events = db.subscription_events
+payment_history = db.payment_history
+users = db.users
+
+# Lazy-init Stripe service (avoid crash if env vars not yet set)
+stripe_service = None
+
+def _get_stripe():
+    global stripe_service
+    if stripe_service is None:
+        stripe_service = get_stripe_service()
+    return stripe_service
+
+
+@subscription_bp.route('/create-checkout-session', methods=['POST'])
+def create_checkout_session():
+    """
+    Create a Stripe Checkout session for subscription purchase
+    
+    Request body:
+    {
+        "user_id": "user123",
+        "user_email": "user@example.com",
+        "plan_name": "basic",  # or "pro"
+        "billing_period": "monthly"  # or "yearly"
+    }
+    """
+    try:
+        data = request.json
+        user_id = data.get('user_id')
+        user_email = data.get('user_email')
+        plan_name = data.get('plan_name', 'basic').lower()
+        billing_period = data.get('billing_period', 'monthly').lower()
+        
+        # If DEMO_MODE or Stripe disabled, reject checkout gracefully
+        if DEMO_MODE or not is_stripe_enabled():
+            return jsonify({
+                'error': 'Payments are disabled in demo mode. All Pro features are already unlocked.'
+            }), 400
+        
+        # Validate input
+        if not user_id or not user_email:
+            return jsonify({'error': 'user_id and user_email are required'}), 400
+        
+        if plan_name not in ['basic', 'pro']:
+            return jsonify({'error': 'Invalid plan_name. Must be "basic" or "pro"'}), 400
+        
+        if billing_period not in ['monthly', 'yearly']:
+            return jsonify({'error': 'Invalid billing_period. Must be "monthly" or "yearly"'}), 400
+        
+        # Get price ID
+        price_id = _get_stripe().get_price_id(plan_name, billing_period)
+        if not price_id:
+            return jsonify({'error': 'Price ID not found for selected plan'}), 400
+        
+        # Create checkout session
+        session = _get_stripe().create_checkout_session(
+            user_id=user_id,
+            user_email=user_email,
+            price_id=price_id,
+            plan_name=plan_name,
+            billing_period=billing_period
+        )
+        
+        logger.info(f"✅ Created checkout session for user {user_id}: {session['session_id']}")
+        
+        return jsonify({
+            'success': True,
+            'session_id': session['session_id'],
+            'url': session['url']
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"❌ Error creating checkout session: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+
+@subscription_bp.route('/sync-from-stripe', methods=['POST'])
+def sync_subscription_from_stripe():
+    """
+    Manually sync subscription from Stripe (for development when webhooks don't reach localhost)
+    
+    Request body:
+    {
+        "user_id": "user123",
+        "user_email": "user@example.com" (required for finding customer)
+    }
+    """
+    try:
+        data = request.json
+        user_id = data.get('user_id')
+        user_email = data.get('user_email')
+        
+        if not user_id:
+            return jsonify({'error': 'user_id is required'}), 400
+        
+        if not user_email:
+            return jsonify({'error': 'user_email is required'}), 400
+        
+        import stripe
+        stripe.api_key = os.getenv('STRIPE_SECRET_KEY')
+        
+        # Find customer by email
+        customers = stripe.Customer.list(email=user_email, limit=1)
+        if not customers.data:
+            return jsonify({'error': f'No Stripe customer found with email: {user_email}'}), 404
+        
+        customer = customers.data[0]
+        customer_id = customer.id
+        
+        logger.info(f"✅ Found customer: {customer_id} for email: {user_email}")
+        
+        # Get latest subscription for this customer
+        subscriptions = stripe.Subscription.list(customer=customer_id, limit=1, status='active')
+        if not subscriptions.data:
+            # Try to get any subscription (including past_due, etc)
+            subscriptions = stripe.Subscription.list(customer=customer_id, limit=1)
+            if not subscriptions.data:
+                return jsonify({'error': 'No subscriptions found for this customer'}), 404
+        
+        subscription_data = subscriptions.data[0]
+        
+        logger.info(f"✅ Found subscription: {subscription_data.id} with status: {subscription_data.status}")
+        
+        # Get plan details from subscription
+        # Try multiple ways to get price info
+        price_id = None
+        try:
+            # Method 1: Direct attribute access
+            if hasattr(subscription_data, 'items') and subscription_data.items and len(subscription_data.items.data) > 0:
+                price_id = subscription_data.items.data[0].price.id
+                logger.info(f"✅ Got price_id via attribute: {price_id}")
+        except Exception as e:
+            logger.warning(f"⚠️ Attribute access failed: {e}")
+        
+        if not price_id:
+            try:
+                # Method 2: Dictionary access
+                price_id = subscription_data['items']['data'][0]['price']['id']
+                logger.info(f"✅ Got price_id via dict: {price_id}")
+            except Exception as e:
+                logger.warning(f"⚠️ Dict access failed: {e}")
+        
+        # Determine plan and billing period
+        plan_name = 'basic'  # default
+        billing_period = 'monthly'  # default
+        
+        if price_id:
+            # Map price_id to plan
+            price_ids = {
+                os.getenv('STRIPE_BASIC_MONTHLY_PRICE_ID'): ('basic', 'monthly'),
+                os.getenv('STRIPE_BASIC_YEARLY_PRICE_ID'): ('basic', 'yearly'),
+                os.getenv('STRIPE_PRO_MONTHLY_PRICE_ID'): ('pro', 'monthly'),
+                os.getenv('STRIPE_PRO_YEARLY_PRICE_ID'): ('pro', 'yearly'),
+            }
+            
+            plan_info = price_ids.get(price_id)
+            if plan_info:
+                plan_name, billing_period = plan_info
+        
+        # Try metadata as fallback
+        if hasattr(subscription_data, 'metadata'):
+            plan_name = subscription_data.metadata.get('plan_name', plan_name)
+            billing_period = subscription_data.metadata.get('billing_period', billing_period)
+        
+        logger.info(f"✅ Detected plan: {plan_name}, billing: {billing_period}")
+        
+        # Get plan from database
+        plan = subscription_plans.find_one({
+            'plan_id': f'detectifai_{plan_name}'
+        })
+        
+        if not plan:
+            logger.error(f"❌ Plan not found in database: detectifai_{plan_name}")
+            return jsonify({'error': f'Plan not found: {plan_name}'}), 404
+        
+        # Check if subscription already exists
+        existing = user_subscriptions.find_one({
+            'stripe_subscription_id': subscription_data.id
+        })
+        
+        if existing:
+            # Update existing
+            user_subscriptions.update_one(
+                {'stripe_subscription_id': subscription_data.id},
+                {
+                    '$set': {
+                        'user_id': user_id,  # Update user_id
+                        'status': subscription_data.status,
+                        'billing_period': billing_period,
+                        'current_period_start': datetime.fromtimestamp(subscription_data.current_period_start) if hasattr(subscription_data, 'current_period_start') else datetime.utcnow(),
+                        'current_period_end': datetime.fromtimestamp(subscription_data.current_period_end) if hasattr(subscription_data, 'current_period_end') else datetime.utcnow(),
+                        'updated_at': datetime.utcnow()
+                    }
+                }
+            )
+            logger.info(f"✅ Updated existing subscription for user {user_id}")
+            message = "Subscription updated successfully"
+        else:
+            # Delete any old subscriptions for this user first
+            user_subscriptions.delete_many({'user_id': user_id})
+            
+            # Create new subscription
+            subscription_id = str(uuid4())
+            
+            # Safely get timestamps
+            created_timestamp = subscription_data.created if hasattr(subscription_data, 'created') else int(datetime.utcnow().timestamp())
+            period_start = subscription_data.current_period_start if hasattr(subscription_data, 'current_period_start') else int(datetime.utcnow().timestamp())
+            period_end = subscription_data.current_period_end if hasattr(subscription_data, 'current_period_end') else int((datetime.utcnow() + timedelta(days=30)).timestamp())
+            
+            user_subscriptions.insert_one({
+                'subscription_id': subscription_id,
+                'user_id': user_id,
+                'plan_id': plan['plan_id'],
+                'start_date': datetime.fromtimestamp(created_timestamp),
+                'end_date': datetime.fromtimestamp(period_end),
+                'stripe_customer_id': customer_id,
+                'stripe_subscription_id': subscription_data.id,
+                'billing_period': billing_period,
+                'status': subscription_data.status,
+                'current_period_start': datetime.fromtimestamp(period_start),
+                'current_period_end': datetime.fromtimestamp(period_end),
+                'cancel_at_period_end': subscription_data.cancel_at_period_end if hasattr(subscription_data, 'cancel_at_period_end') else False,
+                'created_at': datetime.utcnow(),
+                'updated_at': datetime.utcnow()
+            })
+            logger.info(f"✅ Created subscription for user {user_id}")
+            message = "Subscription synced successfully"
+        
+        return jsonify({
+            'success': True,
+            'message': message,
+            'subscription': {
+                'subscription_id': subscription_data.id,
+                'status': subscription_data.status,
+                'plan': plan['plan_name'],
+                'billing_period': billing_period,
+                'customer_email': user_email
+            }
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"❌ Error syncing subscription: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return jsonify({'error': str(e)}), 500
+
+
+@subscription_bp.route('/create-portal-session', methods=['POST'])
+def create_portal_session():
+    """
+    Create a Stripe Customer Portal session for subscription management
+    
+    Request body:
+    {
+        "user_id": "user123"
+    }
+    """
+    try:
+        data = request.json
+        user_id = data.get('user_id')
+        
+        if not user_id:
+            return jsonify({'error': 'user_id is required'}), 400
+        
+        # Get user's subscription
+        subscription = user_subscriptions.find_one({'user_id': user_id})
+        if not subscription:
+            return jsonify({'error': 'No subscription found for user'}), 404
+        
+        stripe_customer_id = subscription.get('stripe_customer_id')
+        if not stripe_customer_id:
+            return jsonify({'error': 'No Stripe customer ID found'}), 400
+        
+        # Create portal session
+        portal_session = _get_stripe().create_customer_portal_session(
+            customer_id=stripe_customer_id
+        )
+        
+        logger.info(f"✅ Created portal session for user {user_id}")
+        
+        return jsonify({
+            'success': True,
+            'url': portal_session['url']
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"❌ Error creating portal session: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+
+@subscription_bp.route('/status', methods=['GET'])
+def get_subscription_status():
+    """
+    Get current subscription status for a user
+    
+    Query params:
+    - user_id: User ID
+    """
+    try:
+        user_id = request.args.get('user_id')
+        
+        if not user_id:
+            return jsonify({'error': 'user_id is required'}), 400
+        
+        # DEMO MODE: Return Pro plan for all authenticated users
+        if DEMO_MODE or not is_stripe_enabled():
+            logger.info(f"🎯 Demo mode — returning Pro access for user {user_id}")
+            return jsonify({
+                'success': True,
+                'has_subscription': True,
+                'subscription_id': 'demo_mode',
+                'plan_name': 'DetectifAI Pro',
+                'plan_id': 'detectifai_pro',
+                'billing_period': 'monthly',
+                'status': 'active',
+                'current_period_start': datetime.utcnow().isoformat(),
+                'current_period_end': (datetime.utcnow() + timedelta(days=365)).isoformat(),
+                'cancel_at_period_end': False,
+                'stripe_customer_id': None,
+                'features': [
+                    'single_video', 'object_detection', 'face_recognition',
+                    'event_history_7day', 'dashboard', 'basic_reports', 'video_clips',
+                    'behavior_analysis', 'nlp_search', 'person_tracking',
+                    'image_search', 'custom_reports', 'priority_queue', 'event_history_30day',
+                ],
+                'current_plan': 'dev_mode',
+            }), 200
+        
+        # Get user's subscription
+        subscription = user_subscriptions.find_one({'user_id': user_id})
+        
+        if not subscription:
+            return jsonify({
+                'success': True,
+                'has_subscription': False,
+                'plan': None
+            }), 200
+        
+        # Get plan details
+        plan = subscription_plans.find_one({'plan_id': subscription['plan_id']})
+        
+        # Format subscription data
+        # Handle features - check if it's already a list or needs splitting
+        features = []
+        if plan:
+            plan_features = plan.get('features', '')
+            if isinstance(plan_features, list):
+                features = plan_features
+            elif isinstance(plan_features, str):
+                features = plan_features.split(',') if plan_features else []
+        
+        subscription_data = {
+            'has_subscription': True,
+            'subscription_id': subscription['subscription_id'],
+            'plan_name': plan['plan_name'] if plan else 'Unknown',
+            'plan_id': subscription['plan_id'],
+            'billing_period': subscription.get('billing_period', 'monthly'),
+            'status': subscription.get('status', 'active'),
+            'current_period_start': subscription.get('current_period_start').isoformat() if subscription.get('current_period_start') else None,
+            'current_period_end': subscription.get('current_period_end').isoformat() if subscription.get('current_period_end') else None,
+            'cancel_at_period_end': subscription.get('cancel_at_period_end', False),
+            'stripe_customer_id': subscription.get('stripe_customer_id'),
+            'features': features
+        }
+        
+        logger.info(f"✅ Retrieved subscription status for user {user_id}")
+        
+        return jsonify({
+            'success': True,
+            **subscription_data
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"❌ Error getting subscription status: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+
+@subscription_bp.route('/cancel', methods=['POST'])
+def cancel_subscription():
+    """
+    Cancel a subscription
+    
+    Request body:
+    {
+        "user_id": "user123",
+        "immediate": false  # If true, cancel immediately; otherwise at period end
+    }
+    """
+    try:
+        data = request.json
+        user_id = data.get('user_id')
+        immediate = data.get('immediate', False)
+        
+        if not user_id:
+            return jsonify({'error': 'user_id is required'}), 400
+        
+        # Get user's subscription
+        subscription = user_subscriptions.find_one({'user_id': user_id})
+        if not subscription:
+            return jsonify({'error': 'No subscription found for user'}), 404
+        
+        stripe_subscription_id = subscription.get('stripe_subscription_id')
+        if not stripe_subscription_id:
+            return jsonify({'error': 'No Stripe subscription ID found'}), 400
+        
+        # Cancel subscription
+        result = _get_stripe().cancel_subscription(
+            subscription_id=stripe_subscription_id,
+            at_period_end=not immediate
+        )
+        
+        # Update database
+        user_subscriptions.update_one(
+            {'user_id': user_id},
+            {
+                '$set': {
+                    'status': 'canceled' if immediate else 'active',
+                    'cancel_at_period_end': not immediate,
+                    'updated_at': datetime.utcnow()
+                }
+            }
+        )
+        
+        # Log event
+        subscription_events.insert_one({
+            'event_id': str(uuid4()),
+            'subscription_id': subscription['subscription_id'],
+            'event_type': 'subscription_canceled',
+            'event_data': {
+                'immediate': immediate,
+                'canceled_at': result.get('canceled_at').isoformat() if result.get('canceled_at') else None
+            },
+            'created_at': datetime.utcnow()
+        })
+        
+        logger.info(f"✅ Canceled subscription for user {user_id} (immediate: {immediate})")
+        
+        return jsonify({
+            'success': True,
+            'message': 'Subscription canceled successfully',
+            'cancel_at_period_end': not immediate
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"❌ Error canceling subscription: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+
+@subscription_bp.route('/upgrade', methods=['POST'])
+def upgrade_subscription():
+    """
+    Upgrade/downgrade subscription to a different plan
+    
+    Request body:
+    {
+        "user_id": "user123",
+        "new_plan_name": "pro",  # or "basic"
+        "new_billing_period": "monthly"  # or "yearly"
+    }
+    """
+    try:
+        data = request.json
+        user_id = data.get('user_id')
+        new_plan_name = data.get('new_plan_name', 'pro').lower()
+        new_billing_period = data.get('new_billing_period', 'monthly').lower()
+        
+        if not user_id:
+            return jsonify({'error': 'user_id is required'}), 400
+        
+        # Get user's subscription
+        subscription = user_subscriptions.find_one({'user_id': user_id})
+        if not subscription:
+            return jsonify({'error': 'No subscription found for user'}), 404
+        
+        stripe_subscription_id = subscription.get('stripe_subscription_id')
+        if not stripe_subscription_id:
+            return jsonify({'error': 'No Stripe subscription ID found'}), 400
+        
+        # Get new price ID
+        new_price_id = _get_stripe().get_price_id(new_plan_name, new_billing_period)
+        if not new_price_id:
+            return jsonify({'error': 'Price ID not found for new plan'}), 400
+        
+        # Get new plan from database
+        new_plan = subscription_plans.find_one({
+            'plan_name': f'DetectifAI {new_plan_name.capitalize()}'
+        })
+        if not new_plan:
+            return jsonify({'error': 'Plan not found in database'}), 404
+        
+        # Update subscription
+        result = _get_stripe().update_subscription(
+            subscription_id=stripe_subscription_id,
+            new_price_id=new_price_id
+        )
+        
+        # Update database
+        user_subscriptions.update_one(
+            {'user_id': user_id},
+            {
+                '$set': {
+                    'plan_id': new_plan['plan_id'],
+                    'billing_period': new_billing_period,
+                    'updated_at': datetime.utcnow()
+                }
+            }
+        )
+        
+        # Log event
+        subscription_events.insert_one({
+            'event_id': str(uuid4()),
+            'subscription_id': subscription['subscription_id'],
+            'event_type': 'subscription_updated',
+            'event_data': {
+                'old_plan': subscription.get('plan_id'),
+                'new_plan': new_plan['plan_id'],
+                'new_billing_period': new_billing_period
+            },
+            'created_at': datetime.utcnow()
+        })
+        
+        logger.info(f"✅ Updated subscription for user {user_id} to {new_plan_name}")
+        
+        return jsonify({
+            'success': True,
+            'message': 'Subscription updated successfully',
+            'new_plan': new_plan_name,
+            'billing_period': new_billing_period
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"❌ Error updating subscription: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+
+@subscription_bp.route('/plans', methods=['GET'])
+def get_subscription_plans():
+    """
+    Get all available subscription plans
+    """
+    try:
+        plans = list(subscription_plans.find({'is_active': True}, {'_id': 0}))
+        
+        # Format plans
+        formatted_plans = []
+        for plan in plans:
+            # Handle features field
+            plan_features = plan.get('features', '')
+            if isinstance(plan_features, list):
+                features = plan_features
+            elif isinstance(plan_features, str):
+                features = plan_features.split(',') if plan_features else []
+            else:
+                features = []
+            
+            formatted_plans.append({
+                'plan_id': plan['plan_id'],
+                'plan_name': plan['plan_name'],
+                'description': plan.get('description', ''),
+                'price': float(plan['price']),
+                'features': features,
+                'billing_periods': plan.get('billing_periods', ['monthly']),
+                'stripe_product_id': plan.get('stripe_product_id'),
+                'stripe_price_ids': plan.get('stripe_price_ids', {})
+            })
+        
+        logger.info(f"✅ Retrieved {len(formatted_plans)} subscription plans")
+        
+        return jsonify({
+            'success': True,
+            'plans': formatted_plans
+        }), 200
+        
+    except Exception as e:
+        logger.error(f"❌ Error retrieving plans: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+
+
+@subscription_bp.route('/webhook', methods=['POST'])
+def stripe_webhook():
+    """
+    Handle Stripe webhook events
+    """
+    try:
+        payload = request.data
+        sig_header = request.headers.get('Stripe-Signature')
+        
+        # Development mode: Allow webhooks without signature if webhook secret is placeholder
+        webhook_secret = os.getenv('STRIPE_WEBHOOK_SECRET', '')
+        dev_mode = webhook_secret == 'whsec_xxxxx' or not webhook_secret
+        
+        if dev_mode:
+            # Development: Parse without verification
+            logger.warning("⚠️ Development mode: Webhook signature verification DISABLED")
+            event = json.loads(payload)
+        else:
+            # Production: Verify signature
+            if not sig_header:
+                return jsonify({'error': 'Missing Stripe signature'}), 400
+            event = _get_stripe().construct_webhook_event(payload, sig_header)
+        
+        # Handle different event types
+        event_type = event['type']
+        event_data = event['data']['object']
+        
+        logger.info(f"📬 Received webhook event: {event_type}")
+        
+        if event_type == 'customer.subscription.created':
+            handle_subscription_created(event_data)
+        elif event_type == 'customer.subscription.updated':
+            handle_subscription_updated(event_data)
+        elif event_type == 'customer.subscription.deleted':
+            handle_subscription_deleted(event_data)
+        elif event_type == 'invoice.payment_succeeded':
+            handle_payment_succeeded(event_data)
+        elif event_type == 'invoice.payment_failed':
+            handle_payment_failed(event_data)
+        else:
+            logger.info(f"ℹ️ Unhandled webhook event type: {event_type}")
+        
+        return jsonify({'success': True}), 200
+        
+    except Exception as e:
+        logger.error(f"❌ Error processing webhook: {str(e)}")
+        return jsonify({'error': str(e)}), 400
+
+
+def handle_subscription_created(subscription_data):
+    """Handle subscription.created event"""
+    try:
+        user_id = subscription_data['metadata'].get('user_id')
+        plan_name = subscription_data['metadata'].get('plan_name')
+        billing_period = subscription_data['metadata'].get('billing_period')
+        
+        if not user_id:
+            logger.warning("⚠️ No user_id in subscription metadata")
+            return
+        
+        # Get plan from database
+        plan = subscription_plans.find_one({
+            'plan_name': f'DetectifAI {plan_name.capitalize()}'
+        })
+        
+        if not plan:
+            logger.error(f"❌ Plan not found: {plan_name}")
+            return
+        
+        # Create subscription in database
+        subscription_id = str(uuid4())
+        user_subscriptions.insert_one({
+            'subscription_id': subscription_id,
+            'user_id': user_id,
+            'plan_id': plan['plan_id'],
+            'start_date': datetime.utcnow(),
+            'end_date': datetime.fromtimestamp(subscription_data['current_period_end']),
+            'stripe_customer_id': subscription_data['customer'],
+            'stripe_subscription_id': subscription_data['id'],
+            'billing_period': billing_period,
+            'status': subscription_data['status'],
+            'current_period_start': datetime.fromtimestamp(subscription_data['current_period_start']),
+            'current_period_end': datetime.fromtimestamp(subscription_data['current_period_end']),
+            'cancel_at_period_end': False,
+            'created_at': datetime.utcnow(),
+            'updated_at': datetime.utcnow()
+        })
+        
+        # Log event
+        subscription_events.insert_one({
+            'event_id': str(uuid4()),
+            'subscription_id': subscription_id,
+            'event_type': 'subscription_created',
+            'stripe_event_id': subscription_data['id'],
+            'event_data': {'plan_name': plan_name, 'billing_period': billing_period},
+            'created_at': datetime.utcnow()
+        })
+        
+        logger.info(f"✅ Created subscription for user {user_id}")
+        
+    except Exception as e:
+        logger.error(f"❌ Error handling subscription created: {str(e)}")
+
+
+def handle_subscription_updated(subscription_data):
+    """Handle subscription.updated event"""
+    try:
+        stripe_subscription_id = subscription_data['id']
+        
+        # Update subscription in database
+        user_subscriptions.update_one(
+            {'stripe_subscription_id': stripe_subscription_id},
+            {
+                '$set': {
+                    'status': subscription_data['status'],
+                    'current_period_start': datetime.fromtimestamp(subscription_data['current_period_start']),
+                    'current_period_end': datetime.fromtimestamp(subscription_data['current_period_end']),
+                    'cancel_at_period_end': subscription_data.get('cancel_at_period_end', False),
+                    'updated_at': datetime.utcnow()
+                }
+            }
+        )
+        
+        logger.info(f"✅ Updated subscription {stripe_subscription_id}")
+        
+    except Exception as e:
+        logger.error(f"❌ Error handling subscription updated: {str(e)}")
+
+
+def handle_subscription_deleted(subscription_data):
+    """Handle subscription.deleted event"""
+    try:
+        stripe_subscription_id = subscription_data['id']
+        
+        # Update subscription status
+        user_subscriptions.update_one(
+            {'stripe_subscription_id': stripe_subscription_id},
+            {
+                '$set': {
+                    'status': 'canceled',
+                    'updated_at': datetime.utcnow()
+                }
+            }
+        )
+        
+        logger.info(f"✅ Marked subscription as canceled: {stripe_subscription_id}")
+        
+    except Exception as e:
+        logger.error(f"❌ Error handling subscription deleted: {str(e)}")
+
+
+def handle_payment_succeeded(invoice_data):
+    """Handle invoice.payment_succeeded event"""
+    try:
+        customer_id = invoice_data['customer']
+        amount = invoice_data['amount_paid'] / 100  # Convert from cents
+        
+        # Get user subscription
+        subscription = user_subscriptions.find_one({'stripe_customer_id': customer_id})
+        if not subscription:
+            logger.warning(f"⚠️ No subscription found for customer {customer_id}")
+            return
+        
+        # Log payment
+        payment_history.insert_one({
+            'payment_id': str(uuid4()),
+            'user_id': subscription['user_id'],
+            'stripe_payment_intent_id': invoice_data.get('payment_intent'),
+            'amount': amount,
+            'currency': invoice_data['currency'].upper(),
+            'status': 'succeeded',
+            'payment_method': 'card',
+            'created_at': datetime.utcnow()
+        })
+        
+        logger.info(f"✅ Recorded payment of ${amount} for user {subscription['user_id']}")
+        
+    except Exception as e:
+        logger.error(f"❌ Error handling payment succeeded: {str(e)}")
+
+
+def handle_payment_failed(invoice_data):
+    """Handle invoice.payment_failed event"""
+    try:
+        customer_id = invoice_data['customer']
+        
+        # Get user subscription
+        subscription = user_subscriptions.find_one({'stripe_customer_id': customer_id})
+        if not subscription:
+            logger.warning(f"⚠️ No subscription found for customer {customer_id}")
+            return
+        
+        # Update subscription status
+        user_subscriptions.update_one(
+            {'stripe_customer_id': customer_id},
+            {
+                '$set': {
+                    'status': 'past_due',
+                    'updated_at': datetime.utcnow()
+                }
+            }
+        )
+        
+        logger.warning(f"⚠️ Payment failed for user {subscription['user_id']}")
+        
+    except Exception as e:
+        logger.error(f"❌ Error handling payment failed: {str(e)}")
diff --git a/video_captioning/__init__.py b/video_captioning/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c01749a2d5a7026e238ef46a77a8c3eb9561db3
--- /dev/null
+++ b/video_captioning/__init__.py
@@ -0,0 +1,22 @@
+"""
+Video Captioning Module - Parent Package
+"""
+
+# This allows importing from video_captioning
+try:
+    from .video_captioning.captioning_service import CaptioningService
+    from .video_captioning.models import Frame, CaptionRecord
+    from .video_captioning.config import CaptioningConfig
+    
+    __all__ = ["CaptioningService", "Frame", "CaptionRecord", "CaptioningConfig"]
+except ImportError as e:
+    # Fallback for direct imports
+    import sys
+    import os
+    sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'video_captioning'))
+    
+    from captioning_service import CaptioningService
+    from models import Frame, CaptionRecord
+    from config import CaptioningConfig
+    
+    __all__ = ["CaptioningService", "Frame", "CaptionRecord", "CaptioningConfig"]
diff --git a/video_captioning/video_captioning/README.md b/video_captioning/video_captioning/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..04889d35c4b8622c0009e0e6d487420101cba0a5
--- /dev/null
+++ b/video_captioning/video_captioning/README.md
@@ -0,0 +1,203 @@
+# Video Captioning Module
+
+A comprehensive vision-language captioning system for surveillance applications that generates neutral, policy-safe captions from video frames and stores them with semantic embeddings for retrieval.
+
+## Features
+
+- **Vision-Language Captioning**: Uses BLIP/similar models to generate descriptive captions
+- **LLM-based Sanitization**: Ensures captions are neutral and policy-compliant
+- **Semantic Embeddings**: Sentence-BERT embeddings for semantic search
+- **Dual Storage**: Relational database for metadata, vector database for embeddings
+- **Async Processing**: Non-blocking operations for high throughput
+- **Safety-First**: Built-in content filtering and audit logging
+
+## Architecture
+
+```
+Frame Input → Vision Model → LLM Sanitizer → Embedding Generator → Storage
+     ↓              ↓             ↓               ↓              ↓
+  PIL Images    Raw Captions  Safe Captions   Embeddings    DB + Vector Store
+```
+
+## Quick Start
+
+```python
+from video_captioning import CaptioningService, Frame, CaptioningConfig
+from PIL import Image
+from datetime import datetime
+
+# Configure the service
+config = CaptioningConfig(
+    vision_model_name="Salesforce/blip-image-captioning-base",
+    embedding_model_name="sentence-transformers/all-MiniLM-L6-v2"
+)
+
+# Initialize service
+service = CaptioningService(config)
+
+# Create frame objects
+frame = Frame(
+    frame_id="frame_001",
+    timestamp=datetime.now(),
+    video_id="video_001",
+    image=Image.open("frame.jpg")
+)
+
+# Process frames
+result = service.process_frames([frame])
+
+# Search captions
+results = service.search_captions("person walking", top_k=5)
+```
+
+## Installation
+
+```bash
+pip install -r requirements.txt
+```
+
+## Configuration
+
+The `CaptioningConfig` class provides comprehensive configuration options:
+
+```python
+config = CaptioningConfig(
+    # Vision model settings
+    vision_model_name="Salesforce/blip-image-captioning-base",
+    vision_device="cpu",  # or "cuda"
+    vision_batch_size=4,
+    
+    # Embedding settings
+    embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
+    embedding_normalize=True,
+    
+    # Processing settings
+    enable_async_processing=True,
+    max_concurrent_requests=10,
+    log_rejected_captions=True
+)
+```
+
+## Safety Features
+
+The module implements strict safety measures:
+
+- **Content Filtering**: Removes references to gender, race, age, appearance
+- **Neutral Language**: Focuses only on observable actions and objects
+- **Audit Logging**: Tracks all rejected/modified captions
+- **Policy Compliance**: Built-in safety prompt templates
+
+## API Reference
+
+### CaptioningService
+
+Main service class that orchestrates the entire pipeline.
+
+#### Methods
+
+- `process_frames(frames: List[Frame]) -> ProcessingResult`
+- `process_frames_async(frames: List[Frame]) -> ProcessingResult`
+- `search_captions(query: str, top_k: int = 5) -> List[dict]`
+- `get_video_captions(video_id: str) -> List[dict]`
+- `get_statistics() -> dict`
+
+### Frame
+
+Input data structure for video frames.
+
+```python
+@dataclass
+class Frame:
+    frame_id: str
+    timestamp: datetime
+    video_id: str
+    image: Image.Image
+```
+
+### CaptionRecord
+
+Output data structure for processed captions.
+
+```python
+@dataclass
+class CaptionRecord:
+    caption_id: str
+    video_id: str
+    frame_id: str
+    timestamp: datetime
+    raw_caption: str
+    sanitized_caption: str
+    embedding: np.ndarray
+    created_at: datetime
+```
+
+## Storage
+
+The module uses a dual storage approach:
+
+1. **Relational Database** (SQLite): Stores caption metadata
+2. **Vector Database** (File-based): Stores embeddings for similarity search
+
+### Database Schema
+
+```sql
+-- Captions table
+CREATE TABLE captions (
+    caption_id TEXT PRIMARY KEY,
+    video_id TEXT NOT NULL,
+    frame_id TEXT NOT NULL,
+    timestamp TEXT NOT NULL,
+    raw_caption TEXT NOT NULL,
+    sanitized_caption TEXT NOT NULL,
+    created_at TEXT NOT NULL
+);
+
+-- Audit table
+CREATE TABLE caption_audit (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    raw_caption TEXT NOT NULL,
+    sanitized_caption TEXT,
+    rejection_reason TEXT,
+    created_at TEXT NOT NULL
+);
+```
+
+## Performance
+
+- **Batch Processing**: Optimized for multiple frames
+- **Async Support**: Non-blocking operations
+- **Memory Efficient**: Streaming processing for large datasets
+- **GPU Acceleration**: CUDA support for models
+
+## Examples
+
+See `example_usage.py` for comprehensive usage examples including:
+- Basic frame processing
+- Async processing
+- Search functionality
+- Configuration options
+
+## Integration
+
+This module is designed to integrate with larger surveillance systems:
+
+```python
+# In your surveillance pipeline
+from video_captioning import CaptioningService
+
+# Initialize once
+captioning_service = CaptioningService(config)
+
+# Process frames from video stream
+def process_video_segment(frames):
+    result = captioning_service.process_frames(frames)
+    return result.caption_records
+
+# Search historical data
+def search_events(query):
+    return captioning_service.search_captions(query)
+```
+
+## License
+
+This module is designed for surveillance and security applications with built-in privacy and safety measures.
\ No newline at end of file
diff --git a/video_captioning/video_captioning/__init__.py b/video_captioning/video_captioning/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2be6dd82d41f807a7d8e9f8fd96039d8e58bedb2
--- /dev/null
+++ b/video_captioning/video_captioning/__init__.py
@@ -0,0 +1,13 @@
+"""
+Video Captioning Module for Surveillance System
+
+This module provides vision-language captioning capabilities for video frames,
+including caption generation, sanitization, embedding, and storage.
+"""
+
+from .captioning_service import CaptioningService
+from .models import Frame, CaptionRecord
+from .config import CaptioningConfig
+
+__version__ = "1.0.0"
+__all__ = ["CaptioningService", "Frame", "CaptionRecord", "CaptioningConfig"]
\ No newline at end of file
diff --git a/video_captioning/video_captioning/caption_sanitizer.py b/video_captioning/video_captioning/caption_sanitizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..14968c9758bbe8ac6cbed4ab6a04bd8b02440a56
--- /dev/null
+++ b/video_captioning/video_captioning/caption_sanitizer.py
@@ -0,0 +1,143 @@
+"""
+Caption sanitization for policy compliance.
+
+Uses efficient rule-based sanitization to remove sensitive/identifying terms
+from captions while preserving descriptive quality for NLP search.
+
+Note: DialoGPT LLM-based sanitization was removed because:
+  1. It was extremely slow on CPU (~1.5s per caption vs 0ms rule-based)
+  2. It produced worse captions (e.g., "a parking lot with cars" → "a car")
+  3. It consumed ~1.5GB RAM for a conversational model misused for text rewriting
+  4. Its outputs always failed safety checks and fell back to rule-based anyway
+"""
+
+import logging
+from typing import List
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+
+try:
+    from .config import CaptioningConfig
+except ImportError:
+    from config import CaptioningConfig
+
+
+class CaptionSanitizer:
+    """Handles caption sanitization using efficient rule-based approach"""
+    
+    def __init__(self, config: CaptioningConfig):
+        self.config = config
+        self.logger = logging.getLogger(__name__)
+        
+        # No LLM model needed — rule-based sanitization is faster and more accurate
+        self.model = None
+        self.tokenizer = None
+        
+        # Thread pool for async processing
+        self.executor = ThreadPoolExecutor(max_workers=config.max_concurrent_requests)
+        
+        # Audit log for rejected captions
+        self.rejected_captions = []
+        
+        self.logger.info("Caption sanitizer initialized (rule-based mode)")
+    
+    def sanitize_caption(self, raw_caption: str) -> str:
+        """Sanitize a single caption using rule-based approach"""
+        try:
+            return self._rule_based_sanitization(raw_caption)
+        except Exception as e:
+            self.logger.error(f"Failed to sanitize caption: {e}")
+            return raw_caption  # Return original rather than losing the caption
+    
+    def _rule_based_sanitization(self, caption: str) -> str:
+        """Efficient rule-based sanitization that preserves descriptive quality.
+        
+        Replaces identifying terms (gender, age) with neutral alternatives
+        while preserving object descriptions useful for NLP search.
+        """
+        # Terms to replace with 'person'
+        person_terms = {
+            'man', 'woman', 'boy', 'girl', 'guy', 'lady', 'gentleman',
+            'male', 'female'
+        }
+        # Terms to replace with 'individual'  
+        age_terms = {
+            'elderly', 'teenager', 'toddler'
+        }
+        # Terms to skip entirely (too identifying for people)
+        skip_terms = {
+            'blonde', 'brunette', 'bald', 'redhead'
+        }
+        
+        words = caption.lower().split()
+        filtered_words = []
+        
+        for word in words:
+            clean_word = word.strip('.,!?;:')
+            if clean_word in person_terms:
+                filtered_words.append('person')
+            elif clean_word in age_terms:
+                filtered_words.append('individual')
+            elif clean_word in skip_terms:
+                continue  # Remove hair/appearance descriptors
+            else:
+                filtered_words.append(word)
+        
+        sanitized = ' '.join(filtered_words)
+        
+        # Ensure we have meaningful content
+        if len(sanitized.strip()) < 5:
+            return "Activity detected in scene"
+        
+        return sanitized.capitalize()
+    
+    def _is_caption_safe(self, caption: str) -> bool:
+        """Validate that caption meets safety requirements"""
+        caption_lower = caption.lower()
+        
+        # Check for prohibited terms
+        prohibited_terms = [
+            'gender', 'race', 'skin', 'color', 'age', 'appearance',
+            'man', 'woman', 'male', 'female', 'boy', 'girl',
+            'black', 'white', 'asian', 'hispanic', 'latino',
+            'young', 'old', 'elderly', 'child', 'teenager'
+        ]
+        
+        for term in prohibited_terms:
+            if term in caption_lower:
+                return False
+        
+        return True
+    
+    def sanitize_captions_batch(self, raw_captions: List[str]) -> List[str]:
+        """Sanitize a batch of captions"""
+        return [self.sanitize_caption(caption) for caption in raw_captions]
+    
+    async def sanitize_captions_async(self, raw_captions: List[str]) -> List[str]:
+        """Sanitize captions asynchronously"""
+        if not self.config.enable_async_processing:
+            return self.sanitize_captions_batch(raw_captions)
+        
+        loop = asyncio.get_event_loop()
+        
+        # Run in thread pool
+        sanitized_captions = await loop.run_in_executor(
+            self.executor,
+            self.sanitize_captions_batch,
+            raw_captions
+        )
+        
+        return sanitized_captions
+    
+    def get_rejected_captions(self) -> List[dict]:
+        """Get audit log of rejected captions"""
+        return self.rejected_captions.copy()
+    
+    def clear_rejected_captions(self):
+        """Clear the rejected captions log"""
+        self.rejected_captions.clear()
+    
+    def __del__(self):
+        """Cleanup resources"""
+        if hasattr(self, 'executor'):
+            self.executor.shutdown(wait=True)
\ No newline at end of file
diff --git a/video_captioning/video_captioning/captioning_service.py b/video_captioning/video_captioning/captioning_service.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4380ac9c71a939f40653d0e0a66d77d689085e6
--- /dev/null
+++ b/video_captioning/video_captioning/captioning_service.py
@@ -0,0 +1,307 @@
+"""
+Main captioning service that orchestrates the entire pipeline
+"""
+
+import logging
+import asyncio
+import uuid
+from datetime import datetime
+from typing import List
+import time
+
+try:
+    from .models import Frame, CaptionRecord, ProcessingResult
+    from .config import CaptioningConfig
+    from .vision_captioner import VisionCaptioner
+    from .caption_sanitizer import CaptionSanitizer
+    from .embedding_generator import EmbeddingGenerator
+    from .mongodb_storage import MongoDBCaptionStorage
+except ImportError:
+    from models import Frame, CaptionRecord, ProcessingResult
+    from config import CaptioningConfig
+    from vision_captioner import VisionCaptioner
+    from caption_sanitizer import CaptionSanitizer
+    from embedding_generator import EmbeddingGenerator
+    from mongodb_storage import MongoDBCaptionStorage
+
+
+class CaptioningService:
+    """Main service for video frame captioning pipeline"""
+    
+    def __init__(self, config: CaptioningConfig = None, db_manager=None):
+        self.config = config or CaptioningConfig()
+        self.logger = logging.getLogger(__name__)
+        self.db_manager = db_manager
+        
+        # Initialize components
+        self._init_components()
+    
+    def _init_components(self):
+        """Initialize all pipeline components"""
+        try:
+            self.logger.info("Initializing captioning service components...")
+            
+            # Initialize vision captioner
+            self.logger.info("Loading vision captioner...")
+            self.vision_captioner = VisionCaptioner(self.config)
+            self.logger.info("✅ Vision captioner loaded")
+            
+            # Initialize caption sanitizer
+            self.logger.info("Loading caption sanitizer...")
+            self.caption_sanitizer = CaptionSanitizer(self.config)
+            self.logger.info("✅ Caption sanitizer loaded")
+            
+            # Initialize embedding generator
+            self.logger.info("Loading embedding generator...")
+            self.embedding_generator = EmbeddingGenerator(self.config)
+            self.logger.info("✅ Embedding generator loaded")
+            
+            # Initialize MongoDB storage with FAISS
+            self.logger.info("Initializing MongoDB storage...")
+            self.storage = MongoDBCaptionStorage(self.config, db_manager=self.db_manager)
+            self.logger.info("✅ MongoDB storage initialized")
+            
+            self.logger.info("✅ All components initialized successfully (MongoDB + FAISS)")
+            
+        except Exception as e:
+            self.logger.error(f"Failed to initialize components: {e}")
+            raise
+    
+    def process_frames(self, frames: List[Frame]) -> ProcessingResult:
+        """Process a batch of frames through the complete pipeline"""
+        start_time = time.time()
+        errors = []
+        caption_records = []
+        
+        try:
+            self.logger.info(f"Processing {len(frames)} frames")
+            
+            # Step 1: Generate raw captions
+            self.logger.debug("Generating raw captions...")
+            raw_captions = self.vision_captioner.generate_captions_batch(
+                [frame.image for frame in frames]
+            )
+            
+            # Step 2: Sanitize captions
+            self.logger.debug("Sanitizing captions...")
+            sanitized_captions = self.caption_sanitizer.sanitize_captions_batch(
+                raw_captions
+            )
+            
+            # Step 3: Generate embeddings
+            self.logger.debug("Generating embeddings...")
+            embeddings = self.embedding_generator.generate_embeddings_batch(
+                sanitized_captions
+            )
+            
+            # Step 4: Create caption records
+            self.logger.debug("Creating caption records...")
+            for i, frame in enumerate(frames):
+                try:
+                    record = CaptionRecord(
+                        caption_id=str(uuid.uuid4()),
+                        video_id=frame.video_id,
+                        frame_id=frame.frame_id,
+                        timestamp=frame.timestamp,
+                        raw_caption=raw_captions[i],
+                        sanitized_caption=sanitized_captions[i],
+                        embedding=embeddings[i],
+                        created_at=datetime.now()
+                    )
+                    caption_records.append(record)
+                    
+                except Exception as e:
+                    error_msg = f"Failed to create record for frame {frame.frame_id}: {e}"
+                    self.logger.error(error_msg)
+                    errors.append(error_msg)
+            
+            # Step 5: Store records
+            if caption_records:
+                self.logger.debug("Storing caption records...")
+                stored_count = self.storage.store_caption_records_batch(caption_records)
+                
+                if stored_count != len(caption_records):
+                    error_msg = f"Only stored {stored_count}/{len(caption_records)} records"
+                    self.logger.warning(error_msg)
+                    errors.append(error_msg)
+            
+            # Log rejected captions if enabled
+            if self.config.log_rejected_captions:
+                rejected = self.caption_sanitizer.get_rejected_captions()
+                for rejection in rejected:
+                    self.storage.log_rejected_caption(
+                        rejection['raw'],
+                        rejection['sanitized'],
+                        rejection['reason']
+                    )
+            
+            processing_time = time.time() - start_time
+            
+            self.logger.info(
+                f"Processed {len(frames)} frames in {processing_time:.2f}s, "
+                f"created {len(caption_records)} records, {len(errors)} errors"
+            )
+            
+            return ProcessingResult(
+                success=len(errors) == 0,
+                caption_records=caption_records,
+                errors=errors,
+                processing_time=processing_time
+            )
+            
+        except Exception as e:
+            error_msg = f"Pipeline processing failed: {e}"
+            self.logger.error(error_msg)
+            errors.append(error_msg)
+            
+            return ProcessingResult(
+                success=False,
+                caption_records=caption_records,
+                errors=errors,
+                processing_time=time.time() - start_time
+            )
+    
+    async def process_frames_async(self, frames: List[Frame]) -> ProcessingResult:
+        """Process frames asynchronously"""
+        start_time = time.time()
+        errors = []
+        caption_records = []
+        
+        try:
+            self.logger.info(f"Processing {len(frames)} frames asynchronously")
+            
+            # Run all steps concurrently where possible
+            tasks = []
+            
+            # Step 1: Generate raw captions
+            caption_task = self.vision_captioner.generate_captions_async(frames)
+            tasks.append(caption_task)
+            
+            # Wait for captions to complete before sanitization
+            raw_captions = await caption_task
+            
+            # Step 2: Sanitize captions
+            sanitize_task = self.caption_sanitizer.sanitize_captions_async(raw_captions)
+            sanitized_captions = await sanitize_task
+            
+            # Step 3: Generate embeddings
+            embedding_task = self.embedding_generator.generate_embeddings_async(
+                sanitized_captions
+            )
+            embeddings = await embedding_task
+            
+            # Step 4: Create caption records
+            for i, frame in enumerate(frames):
+                try:
+                    record = CaptionRecord(
+                        caption_id=str(uuid.uuid4()),
+                        video_id=frame.video_id,
+                        frame_id=frame.frame_id,
+                        timestamp=frame.timestamp,
+                        raw_caption=raw_captions[i],
+                        sanitized_caption=sanitized_captions[i],
+                        embedding=embeddings[i],
+                        created_at=datetime.now()
+                    )
+                    caption_records.append(record)
+                    
+                except Exception as e:
+                    error_msg = f"Failed to create record for frame {frame.frame_id}: {e}"
+                    self.logger.error(error_msg)
+                    errors.append(error_msg)
+            
+            # Step 5: Store records
+            if caption_records:
+                stored_count = self.storage.store_caption_records_batch(caption_records)
+                
+                if stored_count != len(caption_records):
+                    error_msg = f"Only stored {stored_count}/{len(caption_records)} records"
+                    self.logger.warning(error_msg)
+                    errors.append(error_msg)
+            
+            processing_time = time.time() - start_time
+            
+            self.logger.info(
+                f"Async processed {len(frames)} frames in {processing_time:.2f}s"
+            )
+            
+            return ProcessingResult(
+                success=len(errors) == 0,
+                caption_records=caption_records,
+                errors=errors,
+                processing_time=processing_time
+            )
+            
+        except Exception as e:
+            error_msg = f"Async pipeline processing failed: {e}"
+            self.logger.error(error_msg)
+            errors.append(error_msg)
+            
+            return ProcessingResult(
+                success=False,
+                caption_records=caption_records,
+                errors=errors,
+                processing_time=time.time() - start_time
+            )
+    
+    def search_captions(self, query: str, top_k: int = 5) -> List[dict]:
+        """Search for similar captions using semantic search"""
+        try:
+            # Generate embedding for query
+            query_embedding = self.embedding_generator.generate_embedding(query)
+            
+            # Search for similar captions
+            results = self.storage.search_similar_captions(query_embedding, top_k)
+            
+            return results
+            
+        except Exception as e:
+            self.logger.error(f"Failed to search captions: {e}")
+            return []
+    
+    def get_video_captions(self, video_id: str) -> List[dict]:
+        """Get all captions for a specific video"""
+        return self.storage.get_captions_by_video(video_id)
+    
+    def get_statistics(self) -> dict:
+        """Get service statistics"""
+        stats = self.storage.get_statistics()
+        
+        # Add component information
+        stats.update({
+            'embedding_dimension': self.embedding_generator.get_embedding_dimension(),
+            'vision_model': self.config.vision_model_name,
+            'embedding_model': self.config.embedding_model_name,
+            'async_enabled': self.config.enable_async_processing
+        })
+        
+        return stats
+    
+    def get_rejected_captions(self) -> List[dict]:
+        """Get audit log of rejected captions"""
+        return self.caption_sanitizer.get_rejected_captions()
+    
+    def clear_rejected_captions(self):
+        """Clear the rejected captions audit log"""
+        self.caption_sanitizer.clear_rejected_captions()
+    
+    def close(self):
+        """Close service and cleanup resources"""
+        try:
+            # Only close if storage exists and we're not in the middle of processing
+            if hasattr(self, 'storage') and self.storage is not None:
+                self.storage.close()
+                self.logger.info("Captioning service closed")
+        except Exception as e:
+            self.logger.error(f"Failed to close service: {e}")
+    
+    def __del__(self):
+        """Cleanup on destruction"""
+        try:
+            # Check if Python is shutting down
+            import sys
+            if sys.meta_path is not None and hasattr(self, 'storage'):
+                self.close()
+        except:
+            # Silently ignore errors during shutdown
+            pass
\ No newline at end of file
diff --git a/video_captioning/video_captioning/config.py b/video_captioning/video_captioning/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..483c02379dea089d2bb2772589e6c0c0b8f7ed39
--- /dev/null
+++ b/video_captioning/video_captioning/config.py
@@ -0,0 +1,49 @@
+"""
+Configuration settings for video captioning module
+"""
+
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class CaptioningConfig:
+    """Configuration for captioning service"""
+    
+    # Vision model settings
+    vision_model_name: str = "Salesforce/blip-image-captioning-base"
+    vision_device: str = "cpu"  # or "cuda" if available
+    vision_batch_size: int = 4
+    
+    # LLM settings for sanitization
+    llm_model_name: str = "microsoft/DialoGPT-medium"
+    llm_device: str = "cpu"
+    llm_max_tokens: int = 150
+    llm_temperature: float = 0.1
+    
+    # Embedding settings
+    embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
+    embedding_device: str = "cpu"
+    embedding_normalize: bool = True
+    
+    # Database settings
+    db_connection_string: Optional[str] = None
+    vector_db_path: Optional[str] = "./vector_store"
+    
+    # Processing settings
+    max_concurrent_requests: int = 10
+    enable_async_processing: bool = True
+    log_rejected_captions: bool = True
+    
+    # Safety prompt template
+    safety_prompt_template: str = """You are a surveillance captioning assistant. Rewrite the following caption to be neutral, objective, and safe.
+
+Rules:
+- Do NOT mention gender, race, skin color, clothing, age, or physical appearance.
+- Do NOT make identity assumptions.
+- Only describe observable actions, movements, interactions, and objects.
+- Keep the caption concise (1–2 sentences).
+
+Caption: {raw_caption}
+
+Rewritten caption:"""
\ No newline at end of file
diff --git a/video_captioning/video_captioning/data_flow_diagram.md b/video_captioning/video_captioning/data_flow_diagram.md
new file mode 100644
index 0000000000000000000000000000000000000000..ad2a73f89654a1425c8ed355e3be48a8a9623a73
--- /dev/null
+++ b/video_captioning/video_captioning/data_flow_diagram.md
@@ -0,0 +1,286 @@
+# Data Flow Diagram - Video Captioning Module
+
+## High-Level Architecture
+
+```
+┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
+│   Video Frames  │───▶│  Captioning      │───▶│   Storage &     │
+│   (PIL Images)  │    │   Pipeline       │    │   Retrieval     │
+└─────────────────┘    └──────────────────┘    └─────────────────┘
+```
+
+## Detailed Pipeline Flow
+
+```
+┌─────────────────┐
+│  Input Frames   │
+│  - frame_id     │
+│  - timestamp    │
+│  - video_id     │
+│  - PIL Image    │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Vision Captioner│
+│ (BLIP Model)    │
+│ - Batch process │
+│ - GPU support   │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Raw Captions    │
+│ "A man walking  │
+│  in blue shirt" │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Caption         │
+│ Sanitizer (LLM) │
+│ - Safety prompt │
+│ - Rule-based    │
+│ - Audit logging │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Safe Captions   │
+│ "Person walking │
+│  in outdoor     │
+│  environment"   │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Embedding       │
+│ Generator       │
+│ (Sentence-BERT) │
+│ - Normalized    │
+│ - Deterministic │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Caption Records │
+│ - Metadata      │
+│ - Embeddings    │
+│ - Timestamps    │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐    ┌─────────────────┐
+│ Relational DB   │    │  Vector Store   │
+│ - SQLite        │    │ - Embeddings    │
+│ - Metadata      │    │ - Similarity    │
+│ - Audit logs    │    │ - Search index  │
+└─────────────────┘    └─────────────────┘
+```
+
+## Component Interactions
+
+### 1. Frame Input Processing
+```
+Frame Object
+├── frame_id: str
+├── timestamp: datetime
+├── video_id: str
+└── image: PIL.Image
+    └── Validation
+        ├── Format check
+        ├── Size validation
+        └── Error handling
+```
+
+### 2. Vision Model Processing
+```
+Vision Captioner
+├── Model Loading
+│   ├── BLIP Processor
+│   ├── BLIP Model
+│   └── Device allocation
+├── Batch Processing
+│   ├── Image preprocessing
+│   ├── Tensor conversion
+│   ├── Model inference
+│   └── Caption decoding
+└── Output: Raw captions
+```
+
+### 3. Caption Sanitization
+```
+Caption Sanitizer
+├── LLM Processing
+│   ├── Safety prompt template
+│   ├── Token generation
+│   └── Response parsing
+├── Rule-based Fallback
+│   ├── Sensitive term filtering
+│   ├── Generic replacements
+│   └── Content validation
+├── Safety Validation
+│   ├── Prohibited term check
+│   ├── Policy compliance
+│   └── Audit logging
+└── Output: Safe captions
+```
+
+### 4. Embedding Generation
+```
+Embedding Generator
+├── Model Loading
+│   ├── Sentence-BERT
+│   └── Device allocation
+├── Text Processing
+│   ├── Tokenization
+│   ├── Encoding
+│   └── Normalization
+└── Output: Vector embeddings
+```
+
+### 5. Storage Layer
+```
+Storage System
+├── Relational Database (SQLite)
+│   ├── Captions table
+│   │   ├── caption_id (PK)
+│   │   ├── video_id
+│   │   ├── frame_id
+│   │   ├── timestamp
+│   │   ├── raw_caption
+│   │   ├── sanitized_caption
+│   │   └── created_at
+│   └── Audit table
+│       ├── id (PK)
+│       ├── raw_caption
+│       ├── sanitized_caption
+│       ├── rejection_reason
+│       └── created_at
+└── Vector Database (File-based)
+    ├── embeddings.pkl
+    │   └── List[np.ndarray]
+    └── metadata.json
+        └── List[dict] (caption_id, video_id, etc.)
+```
+
+## Data Transformations
+
+### Input → Raw Caption
+```
+PIL Image (224x224x3) → Tensor → BLIP Model → "A person walking down a street"
+```
+
+### Raw Caption → Safe Caption
+```
+"A man in blue shirt walking" → LLM/Rules → "Person walking in outdoor area"
+```
+
+### Safe Caption → Embedding
+```
+"Person walking in outdoor area" → Sentence-BERT → [0.1, -0.3, 0.7, ...] (384-dim)
+```
+
+## Async Processing Flow
+
+```
+┌─────────────────┐
+│  Async Request  │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Thread Pool     │
+│ Executor        │
+└─────────┬───────┘
+          │
+    ┌─────┴─────┐
+    ▼           ▼
+┌─────────┐ ┌─────────┐
+│ Vision  │ │ Other   │
+│ Task    │ │ Tasks   │
+└─────────┘ └─────────┘
+    │           │
+    └─────┬─────┘
+          ▼
+┌─────────────────┐
+│ Await Results   │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Continue        │
+│ Pipeline        │
+└─────────────────┘
+```
+
+## Error Handling Flow
+
+```
+┌─────────────────┐
+│ Processing Step │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Try Operation   │
+└─────────┬───────┘
+          │
+    ┌─────┴─────┐
+    ▼           ▼
+┌─────────┐ ┌─────────┐
+│Success  │ │ Error   │
+└─────────┘ └─────┬───┘
+    │             │
+    │             ▼
+    │     ┌─────────────────┐
+    │     │ Log Error       │
+    │     │ Add to errors[] │
+    │     │ Use fallback    │
+    │     └─────────┬───────┘
+    │               │
+    └───────┬───────┘
+            ▼
+┌─────────────────┐
+│ Continue or     │
+│ Return Result   │
+└─────────────────┘
+```
+
+## Search and Retrieval Flow
+
+```
+┌─────────────────┐
+│ Search Query    │
+│ "person walking"│
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Generate Query  │
+│ Embedding       │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Vector Search   │
+│ - Cosine sim    │
+│ - Top-K results │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Fetch Metadata  │
+│ from Relational │
+│ Database        │
+└─────────┬───────┘
+          │
+          ▼
+┌─────────────────┐
+│ Return Results  │
+│ with Similarity │
+│ Scores          │
+└─────────────────┘
+```
+
+This data flow ensures efficient, safe, and scalable processing of video frames into searchable, policy-compliant captions.
\ No newline at end of file
diff --git a/video_captioning/video_captioning/embedding_generator.py b/video_captioning/video_captioning/embedding_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..25bfa02e9f222c657ec193953f06bb649d35a376
--- /dev/null
+++ b/video_captioning/video_captioning/embedding_generator.py
@@ -0,0 +1,141 @@
+"""
+Sentence-BERT embedding generation for captions
+"""
+
+import logging
+from typing import List
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+
+try:
+    from .config import CaptioningConfig
+except ImportError:
+    from config import CaptioningConfig
+
+
+class EmbeddingGenerator:
+    """Handles Sentence-BERT embedding generation"""
+    
+    def __init__(self, config: CaptioningConfig):
+        self.config = config
+        self.logger = logging.getLogger(__name__)
+        
+        # Initialize Sentence-BERT model
+        self._load_model()
+        
+        # Thread pool for async processing
+        self.executor = ThreadPoolExecutor(max_workers=config.max_concurrent_requests)
+    
+    def _load_model(self):
+        """Load the Sentence-BERT model"""
+        try:
+            self.logger.info(f"Loading embedding model: {self.config.embedding_model_name}")
+            self.model = SentenceTransformer(
+                self.config.embedding_model_name,
+                device=self.config.embedding_device
+            )
+            self.logger.info("Embedding model loaded successfully")
+        except Exception as e:
+            self.logger.error(f"Failed to load embedding model: {e}")
+            raise
+    
+    def generate_embedding(self, text: str) -> np.ndarray:
+        """Generate embedding for a single text"""
+        try:
+            # Generate embedding
+            embedding = self.model.encode(
+                text,
+                convert_to_numpy=True,
+                normalize_embeddings=self.config.embedding_normalize
+            )
+            
+            return embedding
+            
+        except Exception as e:
+            self.logger.error(f"Failed to generate embedding: {e}")
+            # Return zero vector as fallback
+            return np.zeros(self.model.get_sentence_embedding_dimension())
+    
+    def generate_embeddings_batch(self, texts: List[str]) -> List[np.ndarray]:
+        """Generate embeddings for a batch of texts"""
+        try:
+            # Generate embeddings in batch for efficiency
+            embeddings = self.model.encode(
+                texts,
+                convert_to_numpy=True,
+                normalize_embeddings=self.config.embedding_normalize,
+                batch_size=32,  # Optimize batch size for memory
+                show_progress_bar=False
+            )
+            
+            # Convert to list of arrays
+            return [embedding for embedding in embeddings]
+            
+        except Exception as e:
+            self.logger.error(f"Failed to generate batch embeddings: {e}")
+            # Return zero vectors as fallback
+            dim = self.model.get_sentence_embedding_dimension()
+            return [np.zeros(dim) for _ in texts]
+    
+    async def generate_embeddings_async(self, texts: List[str]) -> List[np.ndarray]:
+        """Generate embeddings asynchronously"""
+        if not self.config.enable_async_processing:
+            return self.generate_embeddings_batch(texts)
+        
+        loop = asyncio.get_event_loop()
+        
+        # Run in thread pool
+        embeddings = await loop.run_in_executor(
+            self.executor,
+            self.generate_embeddings_batch,
+            texts
+        )
+        
+        return embeddings
+    
+    def get_embedding_dimension(self) -> int:
+        """Get the dimension of embeddings"""
+        return self.model.get_sentence_embedding_dimension()
+    
+    def compute_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float:
+        """Compute cosine similarity between two embeddings"""
+        try:
+            # Normalize if not already normalized
+            if not self.config.embedding_normalize:
+                embedding1 = embedding1 / np.linalg.norm(embedding1)
+                embedding2 = embedding2 / np.linalg.norm(embedding2)
+            
+            # Compute cosine similarity
+            similarity = np.dot(embedding1, embedding2)
+            return float(similarity)
+            
+        except Exception as e:
+            self.logger.error(f"Failed to compute similarity: {e}")
+            return 0.0
+    
+    def find_similar_embeddings(self, query_embedding: np.ndarray, 
+                              embeddings: List[np.ndarray], 
+                              top_k: int = 5) -> List[tuple]:
+        """Find most similar embeddings to query"""
+        try:
+            similarities = []
+            
+            for i, embedding in enumerate(embeddings):
+                similarity = self.compute_similarity(query_embedding, embedding)
+                similarities.append((i, similarity))
+            
+            # Sort by similarity (descending)
+            similarities.sort(key=lambda x: x[1], reverse=True)
+            
+            return similarities[:top_k]
+            
+        except Exception as e:
+            self.logger.error(f"Failed to find similar embeddings: {e}")
+            return []
+    
+    def __del__(self):
+        """Cleanup resources"""
+        if hasattr(self, 'executor'):
+            self.executor.shutdown(wait=True)
\ No newline at end of file
diff --git a/video_captioning/video_captioning/example_usage.py b/video_captioning/video_captioning/example_usage.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ef03354878b5e70ebafc3a885490dc07d845e37
--- /dev/null
+++ b/video_captioning/video_captioning/example_usage.py
@@ -0,0 +1,120 @@
+"""
+Example usage of the video captioning module
+"""
+
+import asyncio
+from datetime import datetime
+from PIL import Image
+import logging
+
+from video_captioning import CaptioningService, Frame, CaptioningConfig
+
+
+def setup_logging():
+    """Setup logging configuration"""
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+
+
+def create_sample_frames():
+    """Create sample frames for testing"""
+    # Create dummy images (in real usage, these would be actual video frames)
+    frames = []
+    
+    for i in range(3):
+        # Create a simple test image
+        image = Image.new('RGB', (224, 224), color=(100 + i*50, 150, 200))
+        
+        frame = Frame(
+            frame_id=f"frame_{i:04d}",
+            timestamp=datetime.now(),
+            video_id="test_video_001",
+            image=image
+        )
+        frames.append(frame)
+    
+    return frames
+
+
+def main():
+    """Main example function"""
+    setup_logging()
+    
+    # Create configuration
+    config = CaptioningConfig(
+        vision_model_name="Salesforce/blip-image-captioning-base",
+        embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
+        enable_async_processing=True,
+        log_rejected_captions=True
+    )
+    
+    # Initialize service
+    service = CaptioningService(config)
+    
+    # Create sample frames
+    frames = create_sample_frames()
+    
+    print(f"Processing {len(frames)} frames...")
+    
+    # Process frames synchronously
+    result = service.process_frames(frames)
+    
+    print(f"Processing completed:")
+    print(f"  Success: {result.success}")
+    print(f"  Records created: {len(result.caption_records)}")
+    print(f"  Processing time: {result.processing_time:.2f}s")
+    print(f"  Errors: {len(result.errors)}")
+    
+    if result.errors:
+        for error in result.errors:
+            print(f"    - {error}")
+    
+    # Display results
+    for record in result.caption_records:
+        print(f"\nFrame {record.frame_id}:")
+        print(f"  Raw caption: {record.raw_caption}")
+        print(f"  Sanitized: {record.sanitized_caption}")
+        print(f"  Embedding shape: {record.embedding.shape}")
+    
+    # Test search functionality
+    print("\n--- Testing Search ---")
+    search_results = service.search_captions("person walking", top_k=3)
+    print(f"Found {len(search_results)} similar captions")
+    
+    # Get statistics
+    stats = service.get_statistics()
+    print(f"\n--- Statistics ---")
+    for key, value in stats.items():
+        print(f"  {key}: {value}")
+    
+    # Cleanup
+    service.close()
+
+
+async def async_example():
+    """Example of async processing"""
+    setup_logging()
+    
+    config = CaptioningConfig(enable_async_processing=True)
+    service = CaptioningService(config)
+    
+    frames = create_sample_frames()
+    
+    print("Processing frames asynchronously...")
+    result = await service.process_frames_async(frames)
+    
+    print(f"Async processing completed in {result.processing_time:.2f}s")
+    
+    service.close()
+
+
+if __name__ == "__main__":
+    # Run synchronous example
+    main()
+    
+    # Run async example
+    print("\n" + "="*50)
+    print("Running async example...")
+    asyncio.run(async_example())
\ No newline at end of file
diff --git a/video_captioning/video_captioning/install_requirements.py b/video_captioning/video_captioning/install_requirements.py
new file mode 100644
index 0000000000000000000000000000000000000000..32f414724ef2d389251f3ffa77905fc92d9ccab4
--- /dev/null
+++ b/video_captioning/video_captioning/install_requirements.py
@@ -0,0 +1,85 @@
+"""
+Install minimal requirements for video captioning module
+"""
+
+import subprocess
+import sys
+import os
+
+
+def install_package(package):
+    """Install a package using pip"""
+    try:
+        print(f"Installing {package}...")
+        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
+        print(f"✓ {package} installed successfully")
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"✗ Failed to install {package}: {e}")
+        return False
+
+
+def check_package(package):
+    """Check if a package is already installed"""
+    try:
+        __import__(package)
+        return True
+    except ImportError:
+        return False
+
+
+def main():
+    """Install required packages"""
+    print("Checking and installing requirements for video captioning module...")
+    
+    # Essential packages for testing
+    packages = [
+        "torch",
+        "torchvision", 
+        "transformers",
+        "sentence-transformers",
+        "Pillow",
+        "opencv-python",
+        "numpy"
+    ]
+    
+    # Check what's already installed
+    installed = []
+    to_install = []
+    
+    for package in packages:
+        # Map package names to import names
+        import_name = package
+        if package == "opencv-python":
+            import_name = "cv2"
+        elif package == "Pillow":
+            import_name = "PIL"
+        
+        if check_package(import_name):
+            installed.append(package)
+            print(f"✓ {package} already installed")
+        else:
+            to_install.append(package)
+    
+    if not to_install:
+        print("\nAll required packages are already installed!")
+        return
+    
+    print(f"\nNeed to install: {', '.join(to_install)}")
+    
+    # Install missing packages
+    failed = []
+    for package in to_install:
+        if not install_package(package):
+            failed.append(package)
+    
+    if failed:
+        print(f"\n⚠️  Failed to install: {', '.join(failed)}")
+        print("You may need to install these manually or check your internet connection.")
+    else:
+        print("\n🎉 All packages installed successfully!")
+        print("You can now run the test with: python test_runner.py")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/video_captioning/video_captioning/integration_example.py b/video_captioning/video_captioning/integration_example.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f46a34a3f9c471574139536a01a689411ec73d5
--- /dev/null
+++ b/video_captioning/video_captioning/integration_example.py
@@ -0,0 +1,243 @@
+"""
+Integration example showing how to use the video captioning module
+with an existing surveillance system
+"""
+
+import sys
+import os
+from pathlib import Path
+from datetime import datetime
+from PIL import Image
+import cv2
+import numpy as np
+
+# Add the parent directory to path to import from backend
+sys.path.append(str(Path(__file__).parent.parent))
+
+from video_captioning import CaptioningService, Frame, CaptioningConfig
+
+
+class SurveillanceIntegration:
+    """Example integration with surveillance system"""
+    
+    def __init__(self):
+        # Configure captioning service
+        self.config = CaptioningConfig(
+            vision_model_name="Salesforce/blip-image-captioning-base",
+            embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
+            db_connection_string="surveillance_captions.db",
+            vector_db_path="./surveillance_vectors",
+            enable_async_processing=True,
+            log_rejected_captions=True,
+            vision_batch_size=8  # Process more frames at once
+        )
+        
+        self.captioning_service = CaptioningService(self.config)
+        print("Captioning service initialized")
+    
+    def extract_frames_from_video(self, video_path: str, 
+                                 frame_interval: int = 30) -> list:
+        """Extract frames from video file at specified intervals"""
+        frames = []
+        
+        try:
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                print(f"Error: Could not open video {video_path}")
+                return frames
+            
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            frame_count = 0
+            extracted_count = 0
+            
+            while True:
+                ret, cv_frame = cap.read()
+                if not ret:
+                    break
+                
+                # Extract frame at intervals
+                if frame_count % frame_interval == 0:
+                    # Convert BGR to RGB
+                    rgb_frame = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB)
+                    pil_image = Image.fromarray(rgb_frame)
+                    
+                    # Create Frame object
+                    timestamp = datetime.now()
+                    timestamp = timestamp.replace(
+                        microsecond=int((frame_count / fps) * 1000000) % 1000000
+                    )
+                    
+                    frame = Frame(
+                        frame_id=f"frame_{frame_count:06d}",
+                        timestamp=timestamp,
+                        video_id=Path(video_path).stem,
+                        image=pil_image
+                    )
+                    
+                    frames.append(frame)
+                    extracted_count += 1
+                
+                frame_count += 1
+            
+            cap.release()
+            print(f"Extracted {extracted_count} frames from {video_path}")
+            
+        except Exception as e:
+            print(f"Error extracting frames: {e}")
+        
+        return frames
+    
+    def process_video_file(self, video_path: str):
+        """Process a complete video file"""
+        print(f"\nProcessing video: {video_path}")
+        
+        # Extract frames
+        frames = self.extract_frames_from_video(video_path, frame_interval=60)
+        
+        if not frames:
+            print("No frames extracted")
+            return
+        
+        # Process frames in batches
+        batch_size = 10
+        all_records = []
+        
+        for i in range(0, len(frames), batch_size):
+            batch = frames[i:i + batch_size]
+            print(f"Processing batch {i//batch_size + 1}/{(len(frames)-1)//batch_size + 1}")
+            
+            result = self.captioning_service.process_frames(batch)
+            
+            if result.success:
+                all_records.extend(result.caption_records)
+                print(f"  Processed {len(result.caption_records)} frames")
+            else:
+                print(f"  Batch failed with {len(result.errors)} errors")
+                for error in result.errors:
+                    print(f"    - {error}")
+        
+        print(f"Total processed: {len(all_records)} caption records")
+        
+        # Show sample results
+        if all_records:
+            print("\nSample captions:")
+            for i, record in enumerate(all_records[:3]):
+                print(f"  Frame {record.frame_id}:")
+                print(f"    Raw: {record.raw_caption}")
+                print(f"    Safe: {record.sanitized_caption}")
+                print()
+    
+    def search_events(self, query: str, top_k: int = 5):
+        """Search for events using natural language"""
+        print(f"\nSearching for: '{query}'")
+        
+        results = self.captioning_service.search_captions(query, top_k=top_k)
+        
+        if results:
+            print(f"Found {len(results)} similar events:")
+            for i, result in enumerate(results, 1):
+                print(f"  {i}. Video: {result['video_id']}")
+                print(f"     Frame: {result['frame_id']}")
+                print(f"     Caption: {result['sanitized_caption']}")
+                print(f"     Similarity: {result.get('similarity', 0):.3f}")
+                print(f"     Time: {result['timestamp']}")
+                print()
+        else:
+            print("No similar events found")
+    
+    def get_video_summary(self, video_id: str):
+        """Get summary of all captions for a video"""
+        print(f"\nVideo summary for: {video_id}")
+        
+        captions = self.captioning_service.get_video_captions(video_id)
+        
+        if captions:
+            print(f"Total frames: {len(captions)}")
+            print("Timeline:")
+            for caption in captions[:10]:  # Show first 10
+                print(f"  {caption['timestamp']}: {caption['sanitized_caption']}")
+            
+            if len(captions) > 10:
+                print(f"  ... and {len(captions) - 10} more frames")
+        else:
+            print("No captions found for this video")
+    
+    def show_statistics(self):
+        """Display system statistics"""
+        stats = self.captioning_service.get_statistics()
+        
+        print("\n=== System Statistics ===")
+        for key, value in stats.items():
+            print(f"{key}: {value}")
+        
+        # Show rejected captions if any
+        rejected = self.captioning_service.get_rejected_captions()
+        if rejected:
+            print(f"\nRejected captions: {len(rejected)}")
+            for rejection in rejected[:3]:  # Show first 3
+                print(f"  Raw: {rejection['raw']}")
+                print(f"  Reason: {rejection['reason']}")
+    
+    def close(self):
+        """Cleanup resources"""
+        self.captioning_service.close()
+        print("Integration closed")
+
+
+def main():
+    """Main demonstration function"""
+    integration = SurveillanceIntegration()
+    
+    try:
+        # Example 1: Process a video file (if available)
+        video_files = [
+            "../backend/fight_0002.mp4",
+            "../backend/fire.mp4",
+            "../backend/rob.mp4"
+        ]
+        
+        processed_any = False
+        for video_file in video_files:
+            if os.path.exists(video_file):
+                integration.process_video_file(video_file)
+                processed_any = True
+                break
+        
+        if not processed_any:
+            print("No video files found, creating sample data...")
+            # Create sample frames for demonstration
+            sample_frames = []
+            for i in range(5):
+                # Create test images with different colors
+                image = Image.new('RGB', (640, 480), 
+                                color=(50 + i*40, 100 + i*30, 150 + i*20))
+                frame = Frame(
+                    frame_id=f"demo_frame_{i:03d}",
+                    timestamp=datetime.now(),
+                    video_id="demo_video",
+                    image=image
+                )
+                sample_frames.append(frame)
+            
+            result = integration.captioning_service.process_frames(sample_frames)
+            print(f"Processed {len(result.caption_records)} demo frames")
+        
+        # Example 2: Search functionality
+        integration.search_events("person walking")
+        integration.search_events("movement in scene")
+        
+        # Example 3: Video summary
+        integration.get_video_summary("demo_video")
+        
+        # Example 4: Statistics
+        integration.show_statistics()
+        
+    except Exception as e:
+        print(f"Error in main: {e}")
+    
+    finally:
+        integration.close()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/video_captioning/video_captioning/models.py b/video_captioning/video_captioning/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..644f6af61dfd4c1b3f6a6e2e464850659fb6e4ed
--- /dev/null
+++ b/video_captioning/video_captioning/models.py
@@ -0,0 +1,56 @@
+"""
+Data models for video captioning module
+"""
+
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional, List
+import numpy as np
+from PIL import Image
+
+
+@dataclass
+class Frame:
+    """Represents a video frame with metadata"""
+    frame_id: str
+    timestamp: datetime
+    video_id: str
+    image: Image.Image
+    
+    def __post_init__(self):
+        if not isinstance(self.image, Image.Image):
+            raise ValueError("image must be a PIL Image object")
+
+
+@dataclass
+class CaptionRecord:
+    """Represents a processed caption with embeddings"""
+    caption_id: str
+    video_id: str
+    frame_id: str
+    timestamp: datetime
+    raw_caption: str
+    sanitized_caption: str
+    embedding: np.ndarray
+    created_at: datetime
+    
+    def to_dict(self) -> dict:
+        """Convert to dictionary for storage"""
+        return {
+            'caption_id': self.caption_id,
+            'video_id': self.video_id,
+            'frame_id': self.frame_id,
+            'timestamp': self.timestamp.isoformat(),
+            'raw_caption': self.raw_caption,
+            'sanitized_caption': self.sanitized_caption,
+            'created_at': self.created_at.isoformat()
+        }
+
+
+@dataclass
+class ProcessingResult:
+    """Result of frame processing operation"""
+    success: bool
+    caption_records: List[CaptionRecord]
+    errors: List[str]
+    processing_time: float
\ No newline at end of file
diff --git a/video_captioning/video_captioning/mongodb_storage.py b/video_captioning/video_captioning/mongodb_storage.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbeaa6d419e570e1c40dd12a45c659018367f1d9
--- /dev/null
+++ b/video_captioning/video_captioning/mongodb_storage.py
@@ -0,0 +1,368 @@
+"""
+MongoDB-based storage layer for captions and embeddings
+Replaces SQLite with MongoDB Atlas integration
+"""
+
+import logging
+import json
+import os
+from datetime import datetime
+from typing import List, Optional, Dict, Any
+import numpy as np
+import pickle
+from pathlib import Path
+from pymongo import MongoClient
+from pymongo.collection import Collection
+
+try:
+    from .models import CaptionRecord
+    from .config import CaptioningConfig
+except ImportError:
+    from models import CaptionRecord
+    from config import CaptioningConfig
+
+
+class MongoDBCaptionStorage:
+    """Handles storage of captions and embeddings using MongoDB and FAISS"""
+    
+    def __init__(self, config: CaptioningConfig, db_manager=None):
+        self.config = config
+        self.logger = logging.getLogger(__name__)
+        self.db_manager = db_manager
+        
+        # Initialize databases
+        self._init_mongodb()
+        self._init_vector_db()
+    
+    def _init_mongodb(self):
+        """Initialize MongoDB connection for caption metadata"""
+        try:
+            if self.db_manager:
+                # Use existing database manager
+                self.db = self.db_manager.db
+                self.logger.info("Using existing MongoDB connection from db_manager")
+            else:
+                # Create new connection
+                mongo_uri = self.config.db_connection_string or os.getenv(
+                    'MONGO_URI',
+                    'mongodb+srv://detectifai_user:DetectifAI123@cluster0.6f9uj.mongodb.net/detectifai?retryWrites=true&w=majority&appName=Cluster0'
+                )
+                client = MongoClient(mongo_uri)
+                self.db = client['detectifai']
+                self.logger.info("Created new MongoDB connection")
+            
+            # Get or create captions collection
+            self.captions_collection = self.db['video_captions']
+            
+            # Create indexes for efficient querying
+            self.captions_collection.create_index("caption_id", unique=True)
+            self.captions_collection.create_index("video_id")
+            self.captions_collection.create_index("frame_id")
+            self.captions_collection.create_index("timestamp")
+            
+            # Create audit collection for rejected captions
+            self.audit_collection = self.db['caption_audit']
+            self.audit_collection.create_index("created_at")
+            
+            self.logger.info("✅ MongoDB caption storage initialized")
+            
+        except Exception as e:
+            self.logger.error(f"❌ Failed to initialize MongoDB: {e}")
+            raise
+    
+    def _init_vector_db(self):
+        """Initialize FAISS vector database for embeddings"""
+        try:
+            # Create vector store directory
+            vector_path = Path(self.config.vector_db_path or "./video_captioning_store")
+            vector_path.mkdir(exist_ok=True, parents=True)
+            
+            self.vector_db_path = vector_path
+            self.embeddings_file = vector_path / "caption_embeddings.pkl"
+            self.metadata_file = vector_path / "caption_metadata.json"
+            
+            # Load existing data if available
+            self._load_vector_data()
+            
+            self.logger.info("✅ FAISS vector database initialized")
+            
+        except Exception as e:
+            self.logger.error(f"❌ Failed to initialize vector database: {e}")
+            raise
+    
+    def _load_vector_data(self):
+        """Load existing vector data from FAISS"""
+        try:
+            if self.embeddings_file.exists() and self.metadata_file.exists():
+                # Load embeddings
+                with open(self.embeddings_file, 'rb') as f:
+                    self.embeddings = pickle.load(f)
+                
+                # Load metadata
+                with open(self.metadata_file, 'r') as f:
+                    self.vector_metadata = json.load(f)
+                
+                self.logger.info(f"📦 Loaded {len(self.embeddings)} existing embeddings from FAISS")
+            else:
+                self.embeddings = []
+                self.vector_metadata = []
+                self.logger.info("🆕 Initialized empty FAISS vector store")
+                
+        except Exception as e:
+            self.logger.error(f"⚠️ Failed to load vector data: {e}")
+            self.embeddings = []
+            self.vector_metadata = []
+    
+    def _save_vector_data(self):
+        """Save vector data to FAISS disk storage"""
+        try:
+            # Check if Python is shutting down
+            import sys
+            if sys.meta_path is None:
+                return  # Python is shutting down, skip save
+            
+            # Ensure directory exists
+            self.vector_db_path.mkdir(exist_ok=True, parents=True)
+            
+            # Save embeddings
+            import builtins
+            with builtins.open(self.embeddings_file, 'wb') as f:
+                pickle.dump(self.embeddings, f)
+            
+            # Save metadata
+            with builtins.open(self.metadata_file, 'w') as f:
+                json.dump(self.vector_metadata, f, indent=2)
+            
+            self.logger.debug(f"💾 Saved {len(self.embeddings)} embeddings to FAISS")
+                
+        except Exception as e:
+            # Ignore shutdown errors
+            if "sys.meta_path is None" not in str(e) and "Python is likely shutting down" not in str(e):
+                self.logger.error(f"❌ Failed to save vector data: {e}")
+                import traceback
+                self.logger.error(traceback.format_exc())
+    
+    def store_caption_record(self, record: CaptionRecord) -> bool:
+        """Store a single caption record in MongoDB and FAISS"""
+        try:
+            # Prepare document for MongoDB
+            caption_doc = {
+                "caption_id": record.caption_id,
+                "video_id": record.video_id,
+                "frame_id": record.frame_id,
+                "timestamp": record.timestamp.isoformat() if isinstance(record.timestamp, datetime) else str(record.timestamp),
+                "raw_caption": record.raw_caption,
+                "sanitized_caption": record.sanitized_caption,
+                "created_at": record.created_at.isoformat() if isinstance(record.created_at, datetime) else datetime.now().isoformat()
+            }
+            
+            # Store in MongoDB (upsert to avoid duplicates)
+            self.captions_collection.update_one(
+                {"caption_id": record.caption_id},
+                {"$set": caption_doc},
+                upsert=True
+            )
+            
+            # Store embedding in FAISS vector database
+            self.embeddings.append(record.embedding)
+            self.vector_metadata.append({
+                'caption_id': record.caption_id,
+                'video_id': record.video_id,
+                'frame_id': record.frame_id,
+                'timestamp': caption_doc['timestamp']
+            })
+            
+            # Save vector data to disk
+            self._save_vector_data()
+            
+            self.logger.debug(f"✅ Stored caption: {record.caption_id}")
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"❌ Failed to store caption record: {e}")
+            return False
+    
+    def store_caption_records_batch(self, records: List[CaptionRecord]) -> int:
+        """Store multiple caption records in batch"""
+        stored_count = 0
+        
+        try:
+            # Prepare documents for MongoDB
+            caption_docs = []
+            embeddings_batch = []
+            metadata_batch = []
+            
+            for record in records:
+                caption_doc = {
+                    "caption_id": record.caption_id,
+                    "video_id": record.video_id,
+                    "frame_id": record.frame_id,
+                    "timestamp": record.timestamp.isoformat() if isinstance(record.timestamp, datetime) else str(record.timestamp),
+                    "raw_caption": record.raw_caption,
+                    "sanitized_caption": record.sanitized_caption,
+                    "created_at": record.created_at.isoformat() if isinstance(record.created_at, datetime) else datetime.now().isoformat()
+                }
+                caption_docs.append(caption_doc)
+                
+                embeddings_batch.append(record.embedding)
+                metadata_batch.append({
+                    'caption_id': record.caption_id,
+                    'video_id': record.video_id,
+                    'frame_id': record.frame_id,
+                    'timestamp': caption_doc['timestamp']
+                })
+            
+            # Batch insert into MongoDB (using bulk write for upserts)
+            from pymongo import UpdateOne
+            operations = [
+                UpdateOne(
+                    {"caption_id": doc["caption_id"]},
+                    {"$set": doc},
+                    upsert=True
+                )
+                for doc in caption_docs
+            ]
+            
+            result = self.captions_collection.bulk_write(operations)
+            
+            # Batch insert into FAISS vector database
+            self.embeddings.extend(embeddings_batch)
+            self.vector_metadata.extend(metadata_batch)
+            
+            # Save vector data to disk
+            self._save_vector_data()
+            
+            stored_count = len(records)
+            self.logger.info(f"✅ Stored {stored_count} caption records in MongoDB + FAISS")
+            
+        except Exception as e:
+            self.logger.error(f"❌ Failed to store caption records batch: {e}")
+        
+        return stored_count
+    
+    def get_caption_by_id(self, caption_id: str) -> Optional[Dict[str, Any]]:
+        """Retrieve caption by ID from MongoDB"""
+        try:
+            doc = self.captions_collection.find_one({"caption_id": caption_id})
+            if doc:
+                # Remove MongoDB _id field
+                doc.pop('_id', None)
+                return doc
+            return None
+            
+        except Exception as e:
+            self.logger.error(f"❌ Failed to get caption by ID: {e}")
+            return None
+    
+    def get_captions_by_video(self, video_id: str) -> List[Dict[str, Any]]:
+        """Retrieve all captions for a video from MongoDB"""
+        try:
+            cursor = self.captions_collection.find({"video_id": video_id}).sort("timestamp", 1)
+            
+            captions = []
+            for doc in cursor:
+                doc.pop('_id', None)
+                captions.append(doc)
+            
+            return captions
+            
+        except Exception as e:
+            self.logger.error(f"❌ Failed to get captions by video: {e}")
+            return []
+    
+    def search_similar_captions(self, query_embedding: np.ndarray, 
+                              top_k: int = 5) -> List[Dict[str, Any]]:
+        """Search for similar captions using FAISS embeddings"""
+        try:
+            if not self.embeddings:
+                self.logger.warning("No embeddings available for search")
+                return []
+            
+            # Compute cosine similarities
+            similarities = []
+            for i, embedding in enumerate(self.embeddings):
+                # Compute cosine similarity
+                similarity = np.dot(query_embedding, embedding)
+                similarities.append((i, similarity))
+            
+            # Sort by similarity (descending)
+            similarities.sort(key=lambda x: x[1], reverse=True)
+            
+            # Get top results
+            results = []
+            for i, similarity in similarities[:top_k]:
+                metadata = self.vector_metadata[i]
+                caption_data = self.get_caption_by_id(metadata['caption_id'])
+                if caption_data:
+                    caption_data['similarity'] = float(similarity)
+                    results.append(caption_data)
+            
+            self.logger.info(f"🔍 Found {len(results)} similar captions")
+            return results
+            
+        except Exception as e:
+            self.logger.error(f"❌ Failed to search similar captions: {e}")
+            return []
+    
+    def log_rejected_caption(self, raw_caption: str, sanitized_caption: str, 
+                           reason: str):
+        """Log rejected caption for auditing in MongoDB"""
+        try:
+            audit_doc = {
+                "raw_caption": raw_caption,
+                "sanitized_caption": sanitized_caption,
+                "rejection_reason": reason,
+                "created_at": datetime.now().isoformat()
+            }
+            
+            self.audit_collection.insert_one(audit_doc)
+            self.logger.debug(f"📝 Logged rejected caption")
+            
+        except Exception as e:
+            self.logger.error(f"❌ Failed to log rejected caption: {e}")
+    
+    def get_statistics(self) -> Dict[str, Any]:
+        """Get storage statistics from MongoDB and FAISS"""
+        try:
+            total_captions = self.captions_collection.count_documents({})
+            
+            unique_videos = len(self.captions_collection.distinct("video_id"))
+            
+            rejected_captions = self.audit_collection.count_documents({})
+            
+            return {
+                'total_captions': total_captions,
+                'unique_videos': unique_videos,
+                'rejected_captions': rejected_captions,
+                'vector_embeddings': len(self.embeddings)
+            }
+            
+        except Exception as e:
+            self.logger.error(f"❌ Failed to get statistics: {e}")
+            return {}
+    
+    def close(self):
+        """Close database connections and save vector data"""
+        try:
+            # Check if Python is shutting down
+            import sys
+            if sys.meta_path is None:
+                return  # Python is shutting down, skip cleanup
+            
+            self._save_vector_data()
+            self.logger.info("💾 Caption storage closed and saved")
+        except Exception as e:
+            # Ignore errors during shutdown
+            if "sys.meta_path is None" not in str(e):
+                self.logger.error(f"❌ Failed to close storage: {e}")
+    
+    def __del__(self):
+        """Cleanup on destruction"""
+        try:
+            # Check if Python is shutting down
+            import sys
+            if sys.meta_path is not None:
+                self.close()
+        except:
+            # Silently ignore errors during shutdown
+            pass
diff --git a/video_captioning/video_captioning/quick_test.py b/video_captioning/video_captioning/quick_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3acc4f41aa705d4d2b33c2f74de63afa8d822b8
--- /dev/null
+++ b/video_captioning/video_captioning/quick_test.py
@@ -0,0 +1,190 @@
+"""
+Quick test without heavy model downloads - uses mock data
+"""
+
+import os
+import sys
+from pathlib import Path
+from datetime import datetime
+from PIL import Image
+import cv2
+
+# Add current directory to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from models import Frame
+
+
+def create_test_frames_from_video(video_path, num_frames=3):
+    """Create test frames from video without processing"""
+    frames = []
+    
+    try:
+        print(f"Reading video: {video_path}")
+        cap = cv2.VideoCapture(video_path)
+        
+        if not cap.isOpened():
+            print(f"Error: Could not open {video_path}")
+            return frames
+        
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        
+        print(f"Video: {total_frames} frames, {fps:.1f} FPS")
+        
+        # Extract frames at intervals
+        interval = max(1, total_frames // num_frames)
+        
+        for i in range(num_frames):
+            frame_pos = i * interval
+            cap.set(cv2.CAP_PROP_POS_FRAMES, frame_pos)
+            
+            ret, cv_frame = cap.read()
+            if not ret:
+                break
+            
+            # Convert to RGB and PIL
+            rgb_frame = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(rgb_frame)
+            pil_image = pil_image.resize((320, 240))  # Smaller for testing
+            
+            frame = Frame(
+                frame_id=f"test_frame_{i:03d}",
+                timestamp=datetime.now(),
+                video_id=Path(video_path).stem,
+                image=pil_image
+            )
+            
+            frames.append(frame)
+            print(f"✓ Extracted frame {i+1}/{num_frames}")
+        
+        cap.release()
+        
+    except Exception as e:
+        print(f"Error: {e}")
+    
+    return frames
+
+
+def test_basic_functionality():
+    """Test basic module functionality without heavy models"""
+    print("="*50)
+    print("QUICK TEST - Video Captioning Module")
+    print("="*50)
+    
+    # Find a test video
+    test_videos = [
+        "../backend/fight_0002.mp4",
+        "../backend/fire.mp4",
+        "../backend/rob.mp4"
+    ]
+    
+    video_path = None
+    for video in test_videos:
+        if os.path.exists(video):
+            video_path = video
+            break
+    
+    if not video_path:
+        print("No test videos found!")
+        print("Available videos should be in ../backend/")
+        return
+    
+    print(f"Using video: {video_path}")
+    
+    # Test 1: Frame extraction
+    print("\n1. Testing frame extraction...")
+    frames = create_test_frames_from_video(video_path, num_frames=2)
+    
+    if frames:
+        print(f"✓ Successfully extracted {len(frames)} frames")
+        for frame in frames:
+            print(f"  - {frame.frame_id}: {frame.image.size} pixels")
+    else:
+        print("✗ Failed to extract frames")
+        return
+    
+    # Test 2: Basic model imports
+    print("\n2. Testing module imports...")
+    try:
+        from config import CaptioningConfig
+        config = CaptioningConfig()
+        print("✓ Configuration loaded")
+        
+        from models import CaptionRecord
+        print("✓ Models imported")
+        
+        from storage import CaptionStorage
+        print("✓ Storage module imported")
+        
+    except Exception as e:
+        print(f"✗ Import error: {e}")
+        return
+    
+    # Test 3: Mock caption processing
+    print("\n3. Testing mock caption processing...")
+    try:
+        import uuid
+        import numpy as np
+        
+        # Create mock caption records
+        mock_records = []
+        for frame in frames:
+            record = CaptionRecord(
+                caption_id=str(uuid.uuid4()),
+                video_id=frame.video_id,
+                frame_id=frame.frame_id,
+                timestamp=frame.timestamp,
+                raw_caption=f"Mock raw caption for {frame.frame_id}",
+                sanitized_caption=f"Person performing activity in scene {frame.frame_id[-1]}",
+                embedding=np.random.rand(384),  # Mock embedding
+                created_at=datetime.now()
+            )
+            mock_records.append(record)
+        
+        print(f"✓ Created {len(mock_records)} mock caption records")
+        
+        for record in mock_records:
+            print(f"  - {record.frame_id}: {record.sanitized_caption}")
+        
+    except Exception as e:
+        print(f"✗ Mock processing error: {e}")
+        return
+    
+    # Test 4: Storage test
+    print("\n4. Testing storage...")
+    try:
+        import tempfile
+        with tempfile.TemporaryDirectory() as temp_dir:
+            test_config = CaptioningConfig(
+                db_connection_string=os.path.join(temp_dir, "test.db"),
+                vector_db_path=os.path.join(temp_dir, "vectors")
+            )
+            
+            storage = CaptionStorage(test_config)
+            
+            # Store mock records
+            stored = storage.store_caption_records_batch(mock_records)
+            print(f"✓ Stored {stored} records in database")
+            
+            # Test retrieval
+            video_captions = storage.get_captions_by_video(frames[0].video_id)
+            print(f"✓ Retrieved {len(video_captions)} captions for video")
+            
+            storage.close()
+        
+    except Exception as e:
+        print(f"✗ Storage test error: {e}")
+        return
+    
+    print("\n" + "="*50)
+    print("✅ QUICK TEST PASSED!")
+    print("="*50)
+    print("\nNext steps:")
+    print("1. Install full requirements: python install_requirements.py")
+    print("2. Run full test: python test_runner.py")
+    print("3. Or run example: python example_usage.py")
+
+
+if __name__ == "__main__":
+    test_basic_functionality()
\ No newline at end of file
diff --git a/video_captioning/video_captioning/requirements.txt b/video_captioning/video_captioning/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b43e98e3bb69b65d8aa768819dd63146dfdd0149
--- /dev/null
+++ b/video_captioning/video_captioning/requirements.txt
@@ -0,0 +1,34 @@
+# Core dependencies
+torch>=1.9.0
+torchvision>=0.10.0
+transformers>=4.20.0
+sentence-transformers>=2.2.0
+Pillow>=8.3.0
+numpy>=1.21.0
+
+# Optional GPU support
+# torch>=1.9.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
+
+# Database and storage
+sqlite3  # Built into Python
+pickle   # Built into Python
+
+# Async support
+asyncio  # Built into Python
+
+# Logging and utilities
+logging  # Built into Python
+pathlib  # Built into Python
+dataclasses  # Built into Python (Python 3.7+)
+typing    # Built into Python
+uuid      # Built into Python
+datetime  # Built into Python
+time      # Built into Python
+json      # Built into Python
+os        # Built into Python
+
+# Development and testing (optional)
+pytest>=6.0.0
+pytest-asyncio>=0.18.0
+black>=21.0.0
+flake8>=3.9.0
\ No newline at end of file
diff --git a/video_captioning/video_captioning/run_video_test.py b/video_captioning/video_captioning/run_video_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..f16aba66786f2b9620397113ba8bd50235c67422
--- /dev/null
+++ b/video_captioning/video_captioning/run_video_test.py
@@ -0,0 +1,201 @@
+"""
+Working video test with dependency handling
+"""
+
+import os
+import sys
+import subprocess
+from pathlib import Path
+
+# Add current directory to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+
+def install_dependencies():
+    """Install required dependencies"""
+    print("Installing required dependencies...")
+    
+    packages = [
+        "numpy<2",  # Fix NumPy compatibility
+        "opencv-python",
+        "Pillow",
+        "torch",
+        "transformers", 
+        "sentence-transformers"
+    ]
+    
+    for package in packages:
+        try:
+            print(f"Installing {package}...")
+            subprocess.check_call([
+                sys.executable, "-m", "pip", "install", package, "--quiet"
+            ])
+            print(f"✓ {package} installed")
+        except subprocess.CalledProcessError:
+            print(f"⚠️  Failed to install {package}")
+    
+    print("Dependencies installation completed.")
+
+
+def test_video_processing():
+    """Test video processing with actual models"""
+    print("\n" + "="*60)
+    print("TESTING VIDEO CAPTIONING WITH REAL MODELS")
+    print("="*60)
+    
+    try:
+        # Import after dependencies are installed
+        from datetime import datetime
+        from PIL import Image
+        import cv2
+        import numpy as np
+        
+        from models import Frame
+        from config import CaptioningConfig
+        from captioning_service import CaptioningService
+        
+        # Find test video
+        video_files = [
+            "../backend/fight_0002.mp4",
+            "../backend/fire.mp4", 
+            "../backend/rob.mp4"
+        ]
+        
+        test_video = None
+        for video in video_files:
+            if os.path.exists(video):
+                test_video = video
+                break
+        
+        if not test_video:
+            print("No test video found!")
+            return
+        
+        print(f"Using video: {test_video}")
+        
+        # Extract a few frames
+        print("Extracting frames...")
+        cap = cv2.VideoCapture(test_video)
+        frames = []
+        
+        for i in range(3):  # Extract 3 frames
+            cap.set(cv2.CAP_PROP_POS_FRAMES, i * 30)  # Every 30 frames
+            ret, cv_frame = cap.read()
+            if not ret:
+                break
+            
+            # Convert to RGB and PIL
+            rgb_frame = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(rgb_frame)
+            pil_image = pil_image.resize((224, 224))  # Standard size
+            
+            frame = Frame(
+                frame_id=f"test_frame_{i:03d}",
+                timestamp=datetime.now(),
+                video_id=Path(test_video).stem,
+                image=pil_image
+            )
+            frames.append(frame)
+            print(f"✓ Extracted frame {i+1}")
+        
+        cap.release()
+        
+        if not frames:
+            print("No frames extracted!")
+            return
+        
+        # Configure service for CPU (safer for testing)
+        config = CaptioningConfig(
+            vision_model_name="Salesforce/blip-image-captioning-base",
+            embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
+            vision_device="cpu",
+            embedding_device="cpu",
+            vision_batch_size=1,  # Process one at a time
+            enable_async_processing=False,
+            log_rejected_captions=True
+        )
+        
+        print("\nInitializing captioning service...")
+        print("(This may take a while to download models on first run)")
+        
+        service = CaptioningService(config)
+        print("✓ Service initialized")
+        
+        # Process frames
+        print(f"\nProcessing {len(frames)} frames...")
+        result = service.process_frames(frames)
+        
+        # Show results
+        print(f"\n{'='*50}")
+        print("RESULTS")
+        print(f"{'='*50}")
+        print(f"Success: {result.success}")
+        print(f"Processing time: {result.processing_time:.2f}s")
+        print(f"Records created: {len(result.caption_records)}")
+        print(f"Errors: {len(result.errors)}")
+        
+        if result.errors:
+            print("\nErrors:")
+            for error in result.errors:
+                print(f"  - {error}")
+        
+        if result.caption_records:
+            print(f"\nGenerated Captions:")
+            for i, record in enumerate(result.caption_records, 1):
+                print(f"\n{i}. Frame: {record.frame_id}")
+                print(f"   Raw: {record.raw_caption}")
+                print(f"   Safe: {record.sanitized_caption}")
+                print(f"   Embedding: {record.embedding.shape}")
+        
+        # Test search
+        print(f"\n{'='*50}")
+        print("TESTING SEARCH")
+        print(f"{'='*50}")
+        
+        search_queries = ["person", "activity", "movement"]
+        for query in search_queries:
+            results = service.search_captions(query, top_k=2)
+            print(f"\nSearch '{query}': {len(results)} results")
+            for result_item in results:
+                similarity = result_item.get('similarity', 0)
+                print(f"  - {result_item['sanitized_caption']} ({similarity:.3f})")
+        
+        # Statistics
+        stats = service.get_statistics()
+        print(f"\n{'='*50}")
+        print("STATISTICS")
+        print(f"{'='*50}")
+        for key, value in stats.items():
+            print(f"{key}: {value}")
+        
+        service.close()
+        
+        print(f"\n{'='*60}")
+        print("🎉 VIDEO TEST COMPLETED SUCCESSFULLY!")
+        print(f"{'='*60}")
+        
+    except Exception as e:
+        print(f"Error during video test: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+def main():
+    """Main function"""
+    print("Video Captioning Module - Full Test")
+    print("This will install dependencies and test with real video")
+    
+    response = input("\nProceed with installation and testing? (y/n): ")
+    if response.lower() != 'y':
+        print("Test cancelled.")
+        return
+    
+    # Install dependencies
+    install_dependencies()
+    
+    # Test video processing
+    test_video_processing()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/video_captioning/video_captioning/simple_test.py b/video_captioning/video_captioning/simple_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..bde9be8a2f9869db926d85292dc9ee62bbc16745
--- /dev/null
+++ b/video_captioning/video_captioning/simple_test.py
@@ -0,0 +1,166 @@
+"""
+Simple test without OpenCV dependency
+"""
+
+import os
+import sys
+from pathlib import Path
+from datetime import datetime
+
+# Add current directory to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+
+def test_imports():
+    """Test basic module imports"""
+    print("Testing module imports...")
+    
+    try:
+        from models import Frame, CaptionRecord
+        print("✓ Models imported successfully")
+        
+        from config import CaptioningConfig
+        config = CaptioningConfig()
+        print("✓ Configuration loaded")
+        
+        from storage import CaptionStorage
+        print("✓ Storage module imported")
+        
+        return True
+        
+    except Exception as e:
+        print(f"✗ Import failed: {e}")
+        return False
+
+
+def test_with_dummy_data():
+    """Test with dummy image data"""
+    print("\nTesting with dummy data...")
+    
+    try:
+        from PIL import Image
+        from models import Frame, CaptionRecord
+        from config import CaptioningConfig
+        import uuid
+        import tempfile
+        
+        # Create dummy image
+        dummy_image = Image.new('RGB', (320, 240), color=(100, 150, 200))
+        
+        # Create frame
+        frame = Frame(
+            frame_id="test_frame_001",
+            timestamp=datetime.now(),
+            video_id="test_video",
+            image=dummy_image
+        )
+        
+        print(f"✓ Created test frame: {frame.frame_id}")
+        print(f"  Image size: {frame.image.size}")
+        print(f"  Video ID: {frame.video_id}")
+        
+        # Test storage with dummy data
+        with tempfile.TemporaryDirectory() as temp_dir:
+            config = CaptioningConfig(
+                db_connection_string=os.path.join(temp_dir, "test.db"),
+                vector_db_path=os.path.join(temp_dir, "vectors")
+            )
+            
+            from storage import CaptionStorage
+            storage = CaptionStorage(config)
+            
+            # Create dummy caption record
+            import numpy as np
+            record = CaptionRecord(
+                caption_id=str(uuid.uuid4()),
+                video_id=frame.video_id,
+                frame_id=frame.frame_id,
+                timestamp=frame.timestamp,
+                raw_caption="A test scene with colors",
+                sanitized_caption="Scene with various objects",
+                embedding=np.random.rand(384),
+                created_at=datetime.now()
+            )
+            
+            # Store record
+            success = storage.store_caption_record(record)
+            print(f"✓ Stored caption record: {success}")
+            
+            # Retrieve record
+            retrieved = storage.get_caption_by_id(record.caption_id)
+            print(f"✓ Retrieved record: {retrieved is not None}")
+            
+            # Get statistics
+            stats = storage.get_statistics()
+            print(f"✓ Statistics: {stats}")
+            
+            storage.close()
+        
+        return True
+        
+    except Exception as e:
+        print(f"✗ Test failed: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+def check_video_files():
+    """Check what video files are available"""
+    print("\nChecking for video files...")
+    
+    video_paths = [
+        "../backend/fight_0002.mp4",
+        "../backend/fire.mp4",
+        "../backend/rob.mp4",
+        "../backend/fire+weapon.mp4"
+    ]
+    
+    found_videos = []
+    for video_path in video_paths:
+        if os.path.exists(video_path):
+            size = os.path.getsize(video_path)
+            print(f"✓ Found: {video_path} ({size/1024/1024:.1f} MB)")
+            found_videos.append(video_path)
+        else:
+            print(f"✗ Not found: {video_path}")
+    
+    return found_videos
+
+
+def main():
+    """Run simple tests"""
+    print("="*50)
+    print("SIMPLE TEST - Video Captioning Module")
+    print("="*50)
+    
+    # Test 1: Basic imports
+    if not test_imports():
+        print("Basic imports failed. Check your Python environment.")
+        return
+    
+    # Test 2: Dummy data processing
+    if not test_with_dummy_data():
+        print("Dummy data test failed.")
+        return
+    
+    # Test 3: Check video files
+    videos = check_video_files()
+    
+    print("\n" + "="*50)
+    print("✅ SIMPLE TEST COMPLETED!")
+    print("="*50)
+    
+    if videos:
+        print(f"\nFound {len(videos)} video files ready for testing.")
+        print("\nNext steps:")
+        print("1. Fix NumPy compatibility: pip install 'numpy<2'")
+        print("2. Install OpenCV: pip install opencv-python")
+        print("3. Run full test: python test_runner.py")
+    else:
+        print("\nNo video files found in ../backend/")
+        print("Make sure video files are in the backend directory.")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/video_captioning/video_captioning/storage.py b/video_captioning/video_captioning/storage.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5f1ccf75498b56d10ac2bfcda0f99bb2f690871
--- /dev/null
+++ b/video_captioning/video_captioning/storage.py
@@ -0,0 +1,341 @@
+"""
+Storage layer for captions and embeddings
+"""
+
+import logging
+import sqlite3
+import json
+import os
+from datetime import datetime
+from typing import List, Optional, Dict, Any
+import numpy as np
+import pickle
+from pathlib import Path
+
+try:
+    from .models import CaptionRecord
+    from .config import CaptioningConfig
+except ImportError:
+    from models import CaptionRecord
+    from config import CaptioningConfig
+
+
+class CaptionStorage:
+    """Handles storage of captions and embeddings"""
+    
+    def __init__(self, config: CaptioningConfig):
+        self.config = config
+        self.logger = logging.getLogger(__name__)
+        
+        # Initialize databases
+        self._init_relational_db()
+        self._init_vector_db()
+    
+    def _init_relational_db(self):
+        """Initialize SQLite database for caption metadata"""
+        try:
+            # Use default path if not specified
+            db_path = self.config.db_connection_string or "captions.db"
+            
+            self.conn = sqlite3.connect(db_path, check_same_thread=False)
+            self.conn.row_factory = sqlite3.Row  # Enable dict-like access
+            
+            # Create captions table
+            self.conn.execute("""
+                CREATE TABLE IF NOT EXISTS captions (
+                    caption_id TEXT PRIMARY KEY,
+                    video_id TEXT NOT NULL,
+                    frame_id TEXT NOT NULL,
+                    timestamp TEXT NOT NULL,
+                    raw_caption TEXT NOT NULL,
+                    sanitized_caption TEXT NOT NULL,
+                    created_at TEXT NOT NULL
+                )
+            """)
+            
+            # Create indexes separately
+            self.conn.execute("CREATE INDEX IF NOT EXISTS idx_video_id ON captions(video_id)")
+            self.conn.execute("CREATE INDEX IF NOT EXISTS idx_frame_id ON captions(frame_id)")
+            self.conn.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON captions(timestamp)")
+            
+            # Create audit table for rejected captions
+            self.conn.execute("""
+                CREATE TABLE IF NOT EXISTS caption_audit (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    raw_caption TEXT NOT NULL,
+                    sanitized_caption TEXT,
+                    rejection_reason TEXT,
+                    created_at TEXT NOT NULL
+                )
+            """)
+            
+            self.conn.commit()
+            self.logger.info("Relational database initialized")
+            
+        except Exception as e:
+            self.logger.error(f"Failed to initialize relational database: {e}")
+            raise
+    
+    def _init_vector_db(self):
+        """Initialize vector database for embeddings"""
+        try:
+            # Create vector store directory
+            vector_path = Path(self.config.vector_db_path or "./vector_store")
+            vector_path.mkdir(exist_ok=True)
+            
+            self.vector_db_path = vector_path
+            self.embeddings_file = vector_path / "embeddings.pkl"
+            self.metadata_file = vector_path / "metadata.json"
+            
+            # Load existing data if available
+            self._load_vector_data()
+            
+            self.logger.info("Vector database initialized")
+            
+        except Exception as e:
+            self.logger.error(f"Failed to initialize vector database: {e}")
+            raise
+    
+    def _load_vector_data(self):
+        """Load existing vector data"""
+        try:
+            if self.embeddings_file.exists() and self.metadata_file.exists():
+                # Load embeddings
+                with open(self.embeddings_file, 'rb') as f:
+                    self.embeddings = pickle.load(f)
+                
+                # Load metadata
+                with open(self.metadata_file, 'r') as f:
+                    self.vector_metadata = json.load(f)
+                
+                self.logger.info(f"Loaded {len(self.embeddings)} existing embeddings")
+            else:
+                self.embeddings = []
+                self.vector_metadata = []
+                
+        except Exception as e:
+            self.logger.error(f"Failed to load vector data: {e}")
+            self.embeddings = []
+            self.vector_metadata = []
+    
+    def _save_vector_data(self):
+        """Save vector data to disk"""
+        try:
+            # Save embeddings
+            with open(self.embeddings_file, 'wb') as f:
+                pickle.dump(self.embeddings, f)
+            
+            # Save metadata
+            with open(self.metadata_file, 'w') as f:
+                json.dump(self.vector_metadata, f, indent=2)
+                
+        except Exception as e:
+            self.logger.error(f"Failed to save vector data: {e}")
+    
+    def store_caption_record(self, record: CaptionRecord) -> bool:
+        """Store a single caption record"""
+        try:
+            # Store in relational database
+            self.conn.execute("""
+                INSERT OR REPLACE INTO captions 
+                (caption_id, video_id, frame_id, timestamp, raw_caption, 
+                 sanitized_caption, created_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+            """, (
+                record.caption_id,
+                record.video_id,
+                record.frame_id,
+                record.timestamp.isoformat(),
+                record.raw_caption,
+                record.sanitized_caption,
+                record.created_at.isoformat()
+            ))
+            
+            # Store in vector database
+            self.embeddings.append(record.embedding)
+            self.vector_metadata.append({
+                'caption_id': record.caption_id,
+                'video_id': record.video_id,
+                'frame_id': record.frame_id,
+                'timestamp': record.timestamp.isoformat()
+            })
+            
+            self.conn.commit()
+            self._save_vector_data()
+            
+            return True
+            
+        except Exception as e:
+            self.logger.error(f"Failed to store caption record: {e}")
+            return False
+    
+    def store_caption_records_batch(self, records: List[CaptionRecord]) -> int:
+        """Store multiple caption records"""
+        stored_count = 0
+        
+        try:
+            # Prepare data for batch insert
+            relational_data = []
+            embeddings_batch = []
+            metadata_batch = []
+            
+            for record in records:
+                relational_data.append((
+                    record.caption_id,
+                    record.video_id,
+                    record.frame_id,
+                    record.timestamp.isoformat(),
+                    record.raw_caption,
+                    record.sanitized_caption,
+                    record.created_at.isoformat()
+                ))
+                
+                embeddings_batch.append(record.embedding)
+                metadata_batch.append({
+                    'caption_id': record.caption_id,
+                    'video_id': record.video_id,
+                    'frame_id': record.frame_id,
+                    'timestamp': record.timestamp.isoformat()
+                })
+            
+            # Batch insert into relational database
+            self.conn.executemany("""
+                INSERT OR REPLACE INTO captions 
+                (caption_id, video_id, frame_id, timestamp, raw_caption, 
+                 sanitized_caption, created_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+            """, relational_data)
+            
+            # Batch insert into vector database
+            self.embeddings.extend(embeddings_batch)
+            self.vector_metadata.extend(metadata_batch)
+            
+            self.conn.commit()
+            self._save_vector_data()
+            
+            stored_count = len(records)
+            self.logger.info(f"Stored {stored_count} caption records")
+            
+        except Exception as e:
+            self.logger.error(f"Failed to store caption records batch: {e}")
+        
+        return stored_count
+    
+    def get_caption_by_id(self, caption_id: str) -> Optional[Dict[str, Any]]:
+        """Retrieve caption by ID"""
+        try:
+            cursor = self.conn.execute("""
+                SELECT * FROM captions WHERE caption_id = ?
+            """, (caption_id,))
+            
+            row = cursor.fetchone()
+            if row:
+                return dict(row)
+            return None
+            
+        except Exception as e:
+            self.logger.error(f"Failed to get caption by ID: {e}")
+            return None
+    
+    def get_captions_by_video(self, video_id: str) -> List[Dict[str, Any]]:
+        """Retrieve all captions for a video"""
+        try:
+            cursor = self.conn.execute("""
+                SELECT * FROM captions WHERE video_id = ? 
+                ORDER BY timestamp
+            """, (video_id,))
+            
+            return [dict(row) for row in cursor.fetchall()]
+            
+        except Exception as e:
+            self.logger.error(f"Failed to get captions by video: {e}")
+            return []
+    
+    def search_similar_captions(self, query_embedding: np.ndarray, 
+                              top_k: int = 5) -> List[Dict[str, Any]]:
+        """Search for similar captions using embeddings"""
+        try:
+            if not self.embeddings:
+                return []
+            
+            # Compute similarities
+            similarities = []
+            for i, embedding in enumerate(self.embeddings):
+                # Compute cosine similarity
+                similarity = np.dot(query_embedding, embedding)
+                similarities.append((i, similarity))
+            
+            # Sort by similarity (descending)
+            similarities.sort(key=lambda x: x[1], reverse=True)
+            
+            # Get top results
+            results = []
+            for i, similarity in similarities[:top_k]:
+                metadata = self.vector_metadata[i]
+                caption_data = self.get_caption_by_id(metadata['caption_id'])
+                if caption_data:
+                    caption_data['similarity'] = similarity
+                    results.append(caption_data)
+            
+            return results
+            
+        except Exception as e:
+            self.logger.error(f"Failed to search similar captions: {e}")
+            return []
+    
+    def log_rejected_caption(self, raw_caption: str, sanitized_caption: str, 
+                           reason: str):
+        """Log rejected caption for auditing"""
+        try:
+            self.conn.execute("""
+                INSERT INTO caption_audit 
+                (raw_caption, sanitized_caption, rejection_reason, created_at)
+                VALUES (?, ?, ?, ?)
+            """, (
+                raw_caption,
+                sanitized_caption,
+                reason,
+                datetime.now().isoformat()
+            ))
+            self.conn.commit()
+            
+        except Exception as e:
+            self.logger.error(f"Failed to log rejected caption: {e}")
+    
+    def get_statistics(self) -> Dict[str, Any]:
+        """Get storage statistics"""
+        try:
+            cursor = self.conn.execute("SELECT COUNT(*) as total FROM captions")
+            total_captions = cursor.fetchone()['total']
+            
+            cursor = self.conn.execute("""
+                SELECT COUNT(DISTINCT video_id) as unique_videos FROM captions
+            """)
+            unique_videos = cursor.fetchone()['unique_videos']
+            
+            cursor = self.conn.execute("SELECT COUNT(*) as rejected FROM caption_audit")
+            rejected_captions = cursor.fetchone()['rejected']
+            
+            return {
+                'total_captions': total_captions,
+                'unique_videos': unique_videos,
+                'rejected_captions': rejected_captions,
+                'vector_embeddings': len(self.embeddings)
+            }
+            
+        except Exception as e:
+            self.logger.error(f"Failed to get statistics: {e}")
+            return {}
+    
+    def close(self):
+        """Close database connections"""
+        try:
+            if hasattr(self, 'conn'):
+                self.conn.close()
+            self._save_vector_data()
+        except Exception as e:
+            self.logger.error(f"Failed to close storage: {e}")
+    
+    def __del__(self):
+        """Cleanup on destruction"""
+        self.close()
\ No newline at end of file
diff --git a/video_captioning/video_captioning/test_runner.py b/video_captioning/video_captioning/test_runner.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc6d168842ccbab21285baf71a7b93aa48b4882d
--- /dev/null
+++ b/video_captioning/video_captioning/test_runner.py
@@ -0,0 +1,241 @@
+"""
+Simple test runner for video captioning module
+"""
+
+import os
+import sys
+from pathlib import Path
+from datetime import datetime
+from PIL import Image
+import cv2
+import logging
+
+# Add current directory to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+from captioning_service import CaptioningService
+from models import Frame
+from config import CaptioningConfig
+
+
+def setup_logging():
+    """Setup basic logging"""
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s'
+    )
+
+
+def extract_sample_frames(video_path, max_frames=5):
+    """Extract a few sample frames from video for testing"""
+    frames = []
+    
+    try:
+        print(f"Opening video: {video_path}")
+        cap = cv2.VideoCapture(video_path)
+        
+        if not cap.isOpened():
+            print(f"Error: Could not open video {video_path}")
+            return frames
+        
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        duration = total_frames / fps if fps > 0 else 0
+        
+        print(f"Video info: {total_frames} frames, {fps:.2f} FPS, {duration:.2f}s duration")
+        
+        # Extract frames at regular intervals
+        frame_interval = max(1, total_frames // max_frames)
+        
+        frame_count = 0
+        extracted = 0
+        
+        while extracted < max_frames:
+            ret, cv_frame = cap.read()
+            if not ret:
+                break
+            
+            if frame_count % frame_interval == 0:
+                # Convert BGR to RGB
+                rgb_frame = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB)
+                pil_image = Image.fromarray(rgb_frame)
+                
+                # Resize for efficiency (optional)
+                pil_image = pil_image.resize((640, 480), Image.Resampling.LANCZOS)
+                
+                # Calculate timestamp
+                timestamp_seconds = frame_count / fps if fps > 0 else extracted
+                timestamp = datetime.now().replace(
+                    second=int(timestamp_seconds) % 60,
+                    microsecond=int((timestamp_seconds % 1) * 1000000)
+                )
+                
+                frame = Frame(
+                    frame_id=f"frame_{frame_count:06d}",
+                    timestamp=timestamp,
+                    video_id=Path(video_path).stem,
+                    image=pil_image
+                )
+                
+                frames.append(frame)
+                extracted += 1
+                print(f"Extracted frame {extracted}/{max_frames} at {timestamp_seconds:.2f}s")
+            
+            frame_count += 1
+        
+        cap.release()
+        print(f"Successfully extracted {len(frames)} frames")
+        
+    except Exception as e:
+        print(f"Error extracting frames: {e}")
+    
+    return frames
+
+
+def test_video_captioning(video_path):
+    """Test the captioning module with a video file"""
+    print(f"\n{'='*60}")
+    print(f"TESTING VIDEO CAPTIONING MODULE")
+    print(f"Video: {video_path}")
+    print(f"{'='*60}")
+    
+    # Check if video exists
+    if not os.path.exists(video_path):
+        print(f"Error: Video file not found: {video_path}")
+        return
+    
+    try:
+        # Create configuration
+        config = CaptioningConfig(
+            vision_model_name="Salesforce/blip-image-captioning-base",
+            embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
+            vision_device="cpu",  # Use CPU for compatibility
+            embedding_device="cpu",
+            vision_batch_size=2,  # Small batch for testing
+            enable_async_processing=False,  # Sync for simplicity
+            log_rejected_captions=True
+        )
+        
+        print("Initializing captioning service...")
+        service = CaptioningService(config)
+        print("✓ Service initialized successfully")
+        
+        # Extract frames
+        print("\nExtracting frames from video...")
+        frames = extract_sample_frames(video_path, max_frames=3)
+        
+        if not frames:
+            print("No frames extracted. Exiting.")
+            return
+        
+        # Process frames
+        print(f"\nProcessing {len(frames)} frames...")
+        result = service.process_frames(frames)
+        
+        # Display results
+        print(f"\n{'='*40}")
+        print("PROCESSING RESULTS")
+        print(f"{'='*40}")
+        print(f"Success: {result.success}")
+        print(f"Processing time: {result.processing_time:.2f} seconds")
+        print(f"Records created: {len(result.caption_records)}")
+        print(f"Errors: {len(result.errors)}")
+        
+        if result.errors:
+            print("\nErrors encountered:")
+            for error in result.errors:
+                print(f"  - {error}")
+        
+        # Show captions
+        if result.caption_records:
+            print(f"\n{'='*40}")
+            print("GENERATED CAPTIONS")
+            print(f"{'='*40}")
+            
+            for i, record in enumerate(result.caption_records, 1):
+                print(f"\nFrame {i} ({record.frame_id}):")
+                print(f"  Timestamp: {record.timestamp}")
+                print(f"  Raw caption: {record.raw_caption}")
+                print(f"  Safe caption: {record.sanitized_caption}")
+                print(f"  Embedding shape: {record.embedding.shape}")
+        
+        # Test search functionality
+        print(f"\n{'='*40}")
+        print("TESTING SEARCH")
+        print(f"{'='*40}")
+        
+        search_queries = ["person", "movement", "activity", "scene"]
+        
+        for query in search_queries:
+            print(f"\nSearching for: '{query}'")
+            results = service.search_captions(query, top_k=3)
+            
+            if results:
+                for j, result_item in enumerate(results, 1):
+                    similarity = result_item.get('similarity', 0)
+                    print(f"  {j}. {result_item['sanitized_caption']} (similarity: {similarity:.3f})")
+            else:
+                print("  No results found")
+        
+        # Show statistics
+        stats = service.get_statistics()
+        print(f"\n{'='*40}")
+        print("STATISTICS")
+        print(f"{'='*40}")
+        for key, value in stats.items():
+            print(f"{key}: {value}")
+        
+        # Check for rejected captions
+        rejected = service.get_rejected_captions()
+        if rejected:
+            print(f"\nRejected captions: {len(rejected)}")
+            for rejection in rejected:
+                print(f"  Raw: {rejection['raw']}")
+                print(f"  Reason: {rejection['reason']}")
+        
+        print(f"\n{'='*60}")
+        print("TEST COMPLETED SUCCESSFULLY!")
+        print(f"{'='*60}")
+        
+        service.close()
+        
+    except Exception as e:
+        print(f"Error during testing: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+def main():
+    """Main function to run tests"""
+    setup_logging()
+    
+    # List of available test videos
+    test_videos = [
+        "../backend/fight_0002.mp4",
+        "../backend/fire.mp4", 
+        "../backend/rob.mp4",
+        "../backend/fire+weapon.mp4"
+    ]
+    
+    print("Available test videos:")
+    available_videos = []
+    for i, video in enumerate(test_videos, 1):
+        if os.path.exists(video):
+            available_videos.append(video)
+            print(f"  {i}. {video}")
+        else:
+            print(f"  {i}. {video} (NOT FOUND)")
+    
+    if not available_videos:
+        print("No test videos found!")
+        return
+    
+    # Test the first available video
+    test_video = available_videos[0]
+    print(f"\nTesting with: {test_video}")
+    
+    test_video_captioning(test_video)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/video_captioning/video_captioning/vision_captioner.py b/video_captioning/video_captioning/vision_captioner.py
new file mode 100644
index 0000000000000000000000000000000000000000..938b57fe40d8245cf9bf52916a05a9a8d994484c
--- /dev/null
+++ b/video_captioning/video_captioning/vision_captioner.py
@@ -0,0 +1,144 @@
+"""
+Vision-language model for generating captions from frames
+"""
+
+import logging
+from typing import List, Union
+from PIL import Image
+import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+
+try:
+    from .models import Frame
+    from .config import CaptioningConfig
+except ImportError:
+    from models import Frame
+    from config import CaptioningConfig
+
+
+class VisionCaptioner:
+    """Handles vision-language model for frame captioning"""
+    
+    def __init__(self, config: CaptioningConfig):
+        self.config = config
+        self.logger = logging.getLogger(__name__)
+        self.device = torch.device(config.vision_device)
+        
+        # Initialize model and processor
+        self._load_model()
+        
+        # Thread pool for async processing
+        self.executor = ThreadPoolExecutor(max_workers=config.max_concurrent_requests)
+    
+    def _load_model(self):
+        """Load the vision-language model"""
+        try:
+            self.logger.info(f"Loading vision model: {self.config.vision_model_name}")
+            self.processor = BlipProcessor.from_pretrained(self.config.vision_model_name)
+            self.model = BlipForConditionalGeneration.from_pretrained(
+                self.config.vision_model_name
+            ).to(self.device)
+            self.logger.info("Vision model loaded successfully")
+        except Exception as e:
+            self.logger.error(f"Failed to load vision model: {e}")
+            raise
+    
+    def generate_caption(self, image: Image.Image) -> str:
+        """Generate caption for a single image"""
+        try:
+            # Preprocess image
+            inputs = self.processor(image, return_tensors="pt").to(self.device)
+            
+            # Generate caption (reduced beams for faster inference)
+            # Note: attention_mask is not needed for BLIP's generate() with pixel_values
+            with torch.no_grad():
+                out = self.model.generate(
+                    pixel_values=inputs['pixel_values'],
+                    max_length=50, 
+                    num_beams=3
+                )
+            
+            # Decode caption
+            caption = self.processor.decode(out[0], skip_special_tokens=True)
+            return caption
+            
+        except Exception as e:
+            self.logger.error(f"Failed to generate caption: {e}")
+            return "Unable to generate caption"
+    
+    def generate_captions_batch(self, images: List[Image.Image]) -> List[str]:
+        """Generate captions for a batch of images"""
+        try:
+            # Process in batches
+            captions = []
+            batch_size = self.config.vision_batch_size
+            total_batches = (len(images) + batch_size - 1) // batch_size
+            
+            self.logger.info(f"🔄 Processing {len(images)} images in {total_batches} batches of {batch_size}")
+            
+            for i in range(0, len(images), batch_size):
+                batch_num = (i // batch_size) + 1
+                batch = images[i:i + batch_size]
+                self.logger.info(f"⏳ Processing batch {batch_num}/{total_batches} ({len(batch)} images)...")
+                batch_captions = self._process_batch(batch)
+                captions.extend(batch_captions)
+                self.logger.info(f"✅ Batch {batch_num}/{total_batches} complete")
+            
+            return captions
+            
+        except Exception as e:
+            self.logger.error(f"Failed to generate batch captions: {e}")
+            return ["Unable to generate caption"] * len(images)
+    
+    def _process_batch(self, images: List[Image.Image]) -> List[str]:
+        """Process a single batch of images"""
+        try:
+            # Preprocess batch with padding
+            inputs = self.processor(images, return_tensors="pt", padding=True).to(self.device)
+            
+            # Generate captions
+            # Note: BLIP's generate() handles attention internally for vision inputs
+            # Passing attention_mask causes shape errors (expects 2D for text, not 3D/4D for images)
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    pixel_values=inputs['pixel_values'],
+                    max_length=50, 
+                    num_beams=3,  # Reduced from 5 to 3 for 40% speed improvement
+                    do_sample=False
+                )
+            
+            # Decode captions
+            captions = []
+            for output in outputs:
+                caption = self.processor.decode(output, skip_special_tokens=True)
+                captions.append(caption)
+            
+            return captions
+            
+        except Exception as e:
+            self.logger.error(f"Failed to process batch: {e}")
+            return ["Unable to generate caption"] * len(images)
+    
+    async def generate_captions_async(self, frames: List[Frame]) -> List[str]:
+        """Generate captions asynchronously"""
+        if not self.config.enable_async_processing:
+            return self.generate_captions_batch([frame.image for frame in frames])
+        
+        loop = asyncio.get_event_loop()
+        images = [frame.image for frame in frames]
+        
+        # Run in thread pool
+        captions = await loop.run_in_executor(
+            self.executor, 
+            self.generate_captions_batch, 
+            images
+        )
+        
+        return captions
+    
+    def __del__(self):
+        """Cleanup resources"""
+        if hasattr(self, 'executor'):
+            self.executor.shutdown(wait=True)
\ No newline at end of file
diff --git a/video_captioning/video_captioning/working_test.py b/video_captioning/video_captioning/working_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2d9af0c781bebc28d38fade1c7e3b1f21dfc5ce
--- /dev/null
+++ b/video_captioning/video_captioning/working_test.py
@@ -0,0 +1,144 @@
+"""
+Working test for video captioning
+"""
+
+import os
+import sys
+from pathlib import Path
+from datetime import datetime
+from PIL import Image
+import cv2
+
+# Add current directory to path
+sys.path.insert(0, str(Path(__file__).parent))
+
+# Import modules
+from models import Frame
+from config import CaptioningConfig
+from captioning_service import CaptioningService
+
+
+def main():
+    """Test video captioning with a real video"""
+    print("="*60)
+    print("VIDEO CAPTIONING TEST")
+    print("="*60)
+    
+    # Find test video
+    video_files = [
+        "../backend/fight_0002.mp4",
+        "../backend/fire.mp4",
+        "../backend/rob.mp4"
+    ]
+    
+    test_video = None
+    for video in video_files:
+        if os.path.exists(video):
+            test_video = video
+            print(f"Found video: {video}")
+            break
+    
+    if not test_video:
+        print("No test video found!")
+        return
+    
+    try:
+        # Extract 2 frames from video
+        print("\nExtracting frames...")
+        cap = cv2.VideoCapture(test_video)
+        frames = []
+        
+        for i in range(2):
+            cap.set(cv2.CAP_PROP_POS_FRAMES, i * 100)  # Every 100 frames
+            ret, cv_frame = cap.read()
+            if not ret:
+                break
+            
+            # Convert to RGB and PIL
+            rgb_frame = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(rgb_frame)
+            pil_image = pil_image.resize((224, 224))
+            
+            frame = Frame(
+                frame_id=f"frame_{i:03d}",
+                timestamp=datetime.now(),
+                video_id=Path(test_video).stem,
+                image=pil_image
+            )
+            frames.append(frame)
+            print(f"✓ Frame {i+1} extracted")
+        
+        cap.release()
+        
+        if not frames:
+            print("No frames extracted!")
+            return
+        
+        # Configure service
+        print("\nConfiguring captioning service...")
+        config = CaptioningConfig(
+            vision_model_name="Salesforce/blip-image-captioning-base",
+            embedding_model_name="sentence-transformers/all-MiniLM-L6-v2",
+            vision_device="cpu",
+            embedding_device="cpu",
+            vision_batch_size=1,
+            enable_async_processing=False
+        )
+        
+        # Initialize service
+        print("Initializing service (downloading models if needed)...")
+        service = CaptioningService(config)
+        print("✓ Service ready")
+        
+        # Process frames
+        print(f"\nProcessing {len(frames)} frames...")
+        result = service.process_frames(frames)
+        
+        # Show results
+        print("\n" + "="*50)
+        print("RESULTS")
+        print("="*50)
+        print(f"Success: {result.success}")
+        print(f"Time: {result.processing_time:.2f}s")
+        print(f"Records: {len(result.caption_records)}")
+        print(f"Errors: {len(result.errors)}")
+        
+        if result.errors:
+            print("\nErrors:")
+            for error in result.errors:
+                print(f"  - {error}")
+        
+        if result.caption_records:
+            print("\nCaptions Generated:")
+            for i, record in enumerate(result.caption_records, 1):
+                print(f"\n{i}. Frame: {record.frame_id}")
+                print(f"   Raw: {record.raw_caption}")
+                print(f"   Safe: {record.sanitized_caption}")
+        
+        # Test search
+        print("\n" + "="*50)
+        print("SEARCH TEST")
+        print("="*50)
+        
+        queries = ["person", "activity", "scene"]
+        for query in queries:
+            results = service.search_captions(query, top_k=2)
+            print(f"\nSearch '{query}': {len(results)} results")
+            for res in results:
+                sim = res.get('similarity', 0)
+                print(f"  - {res['sanitized_caption']} ({sim:.3f})")
+        
+        service.close()
+        
+        print("\n" + "="*60)
+        print("🎉 TEST COMPLETED SUCCESSFULLY!")
+        print("="*60)
+        
+    except Exception as e:
+        print(f"\nError: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/video_captioning_integrator.py b/video_captioning_integrator.py
new file mode 100644
index 0000000000000000000000000000000000000000..acfba46f97d77a958921727c5c9ad29eb99bda70
--- /dev/null
+++ b/video_captioning_integrator.py
@@ -0,0 +1,404 @@
+"""
+Video Captioning Integrator for DetectifAI
+
+This module integrates video captioning into the video processing pipeline.
+It generates neutral, policy-safe captions from keyframes and stores them with semantic embeddings.
+"""
+
+import os
+import sys
+import logging
+from typing import List, Dict, Any
+from datetime import datetime
+from PIL import Image
+import cv2
+
+# Import from video_captioning package
+try:
+    # Try direct import from package
+    from video_captioning import CaptioningService, Frame, CaptioningConfig
+except ImportError:
+    # Fallback to explicit path
+    from video_captioning.video_captioning.captioning_service import CaptioningService
+    from video_captioning.video_captioning.models import Frame
+    from video_captioning.video_captioning.config import CaptioningConfig
+
+logger = logging.getLogger(__name__)
+
+
+class VideoCaptioningIntegrator:
+    """Integration layer between video captioning and DetectifAI pipeline"""
+    
+    def __init__(self, config, db_manager=None):
+        self.config = config
+        self.db_manager = db_manager
+        self.enabled = getattr(config, 'enable_video_captioning', False)
+        
+        logger.info(f"🎬 Initializing VideoCaptioningIntegrator - enabled: {self.enabled}")
+        
+        # Initialize captioning service if enabled
+        self.captioning_service = None
+        
+        if self.enabled:
+            try:
+                # Create captioning configuration from DetectifAI config
+                captioning_config = CaptioningConfig(
+                    vision_model_name=getattr(config, 'captioning_vision_model', "Salesforce/blip-image-captioning-base"),
+                    vision_device=getattr(config, 'captioning_device', "cpu"),
+                    vision_batch_size=getattr(config, 'captioning_batch_size', 4),
+                    embedding_model_name=getattr(config, 'captioning_embedding_model', "sentence-transformers/all-MiniLM-L6-v2"),
+                    db_connection_string=getattr(config, 'captioning_db_path', None),
+                    vector_db_path=getattr(config, 'captioning_vector_db_path', "./video_captioning_store"),
+                    enable_async_processing=getattr(config, 'captioning_async', True),
+                    log_rejected_captions=True
+                )
+                
+                # Initialize with MongoDB support
+                self.captioning_service = CaptioningService(captioning_config, db_manager=db_manager)
+                logger.info("✅ Video captioning service initialized successfully (MongoDB + FAISS)")
+                
+            except Exception as e:
+                logger.error(f"❌ Failed to initialize video captioning service: {e}")
+                self.enabled = False
+        else:
+            logger.info("Video captioning disabled in config")
+    
+    def _download_keyframe_from_minio(self, bucket, minio_path, local_path):
+        """Download a keyframe from MinIO to local path"""
+        try:
+            if not self.db_manager or not self.db_manager.minio_client:
+                logger.error("MinIO client not available")
+                return False
+            
+            # Add timeout to prevent hanging
+            import socket
+            original_timeout = socket.getdefaulttimeout()
+            socket.setdefaulttimeout(30)  # 30 second timeout
+            
+            try:
+                self.db_manager.minio_client.fget_object(bucket, minio_path, local_path)
+                logger.debug(f"✅ Downloaded {minio_path} to {local_path}")
+                return True
+            finally:
+                socket.setdefaulttimeout(original_timeout)
+                
+        except Exception as e:
+            logger.error(f"❌ Failed to download {minio_path} from MinIO: {e}")
+            return False
+    
+    def process_keyframes_with_captioning(self, keyframes: List, video_id: str = None) -> Dict[str, Any]:
+        """
+        Process keyframes to generate captions
+        
+        Args:
+            keyframes: List of KeyframeResult objects
+            video_id: Optional video identifier
+            
+        Returns:
+            Dictionary containing captioning results
+        """
+        if not self.enabled or not self.captioning_service:
+            logger.info("🚫 Video captioning disabled, skipping...")
+            return {
+                'enabled': False,
+                'total_captions': 0,
+                'captions': []
+            }
+        
+        logger.info(f"🎬 Starting video captioning on {len(keyframes)} keyframes")
+        
+        # Add overall timeout for the entire captioning process
+        import signal
+        
+        def timeout_handler(signum, frame):
+            raise TimeoutError("Video captioning exceeded maximum time limit")
+        
+        # Set 5 minute timeout for entire captioning process
+        # Note: signal.alarm only works on Unix, so we'll use a different approach
+        start_time = datetime.now()
+        max_processing_time = 300  # 5 minutes in seconds
+        
+        # Create temporary directory for downloaded keyframes
+        import tempfile
+        temp_dir = tempfile.mkdtemp(prefix="keyframes_")
+        logger.info(f"📁 Created temporary directory for keyframes: {temp_dir}")
+        
+        try:
+            # Get keyframe bucket from db_manager
+            keyframe_bucket = None
+            if self.db_manager and hasattr(self.db_manager, 'keyframe_repo'):
+                keyframe_bucket = self.db_manager.keyframe_repo.bucket
+            elif self.db_manager:
+                # Fallback: try to get from config or use default
+                keyframe_bucket = getattr(self.db_manager, 'keyframe_bucket', 'detectifai-keyframes')
+            else:
+                keyframe_bucket = 'detectifai-keyframes'  # Default bucket name
+            
+            logger.info(f"🪣 Using MinIO bucket: {keyframe_bucket}")
+            
+            # Convert keyframes to Frame objects
+            frames = []
+            downloaded_files = []  # Track files for cleanup
+            max_keyframes_to_process = 10  # Reduced limit for faster processing
+            
+            logger.info(f"📊 Processing up to {min(len(keyframes), max_keyframes_to_process)} keyframes (limited for performance)")
+            
+            for idx, keyframe in enumerate(keyframes[:max_keyframes_to_process]):  # Limit processing
+                try:
+                    # Debug: Log keyframe structure
+                    logger.debug(f"Processing keyframe {idx}: type={type(keyframe)}")
+                    
+                    # Try different keyframe structures
+                    frame_path = None
+                    timestamp = None
+                    frame_index = idx
+                    minio_path = None
+                    minio_bucket_override = None
+                    
+                    # Check for different attribute names
+                    if hasattr(keyframe, 'frame_path'):
+                        frame_path = keyframe.frame_path
+                    elif hasattr(keyframe, 'path'):
+                        frame_path = keyframe.path
+                    elif hasattr(keyframe, 'frame_data') and hasattr(keyframe.frame_data, 'frame_path'):
+                        frame_path = keyframe.frame_data.frame_path
+                    
+                    # Check for MinIO metadata in keyframe object (added by database_video_service)
+                    if hasattr(keyframe, 'minio_path'):
+                        minio_path = keyframe.minio_path
+                        minio_bucket_override = getattr(keyframe, 'minio_bucket', None)
+                    elif hasattr(keyframe, 'frame_data'):
+                        if hasattr(keyframe.frame_data, 'minio_path'):
+                            minio_path = keyframe.frame_data.minio_path
+                            minio_bucket_override = getattr(keyframe.frame_data, 'minio_bucket', None)
+                    
+                    # Get timestamp
+                    if hasattr(keyframe, 'timestamp'):
+                        timestamp = keyframe.timestamp
+                    elif hasattr(keyframe, 'frame_data') and hasattr(keyframe.frame_data, 'timestamp'):
+                        timestamp = keyframe.frame_data.timestamp
+                    else:
+                        timestamp = 0.0
+                    
+                    # Get frame index
+                    if hasattr(keyframe, 'frame_index'):
+                        frame_index = keyframe.frame_index
+                    elif hasattr(keyframe, 'frame_number'):
+                        frame_index = keyframe.frame_number
+                    elif hasattr(keyframe, 'frame_data') and hasattr(keyframe.frame_data, 'frame_number'):
+                        frame_index = keyframe.frame_data.frame_number
+                    
+                    # Check if frame_path is a MinIO path (doesn't exist locally)
+                    if frame_path and not os.path.exists(frame_path):
+                        logger.debug(f"⚠️ Frame path doesn't exist locally: {frame_path}")
+                        
+                        # Use MinIO path from keyframe metadata if available
+                        if not minio_path and video_id:
+                            # Fallback: construct MinIO path from video_id and frame_index
+                            minio_path = f"{video_id}/keyframes/frame_{frame_index:06d}.jpg"
+                        
+                        if minio_path:
+                            logger.debug(f"🔍 Attempting to download from MinIO: {minio_path}")
+                            
+                            # Use bucket from keyframe metadata or default
+                            bucket_to_use = minio_bucket_override or keyframe_bucket
+                            
+                            # Download from MinIO to temp directory
+                            local_temp_path = os.path.join(temp_dir, f"frame_{frame_index:06d}.jpg")
+                            
+                            if self._download_keyframe_from_minio(bucket_to_use, minio_path, local_temp_path):
+                                frame_path = local_temp_path
+                                downloaded_files.append(local_temp_path)
+                                logger.debug(f"✅ Downloaded keyframe to: {frame_path}")
+                            else:
+                                logger.warning(f"❌ Failed to download keyframe from MinIO: {minio_path}")
+                                continue
+                        else:
+                            logger.warning(f"⚠️ No MinIO path available and no video_id to construct path")
+                            continue
+                    
+                    # Load image from keyframe path
+                    if frame_path and os.path.exists(frame_path):
+                        logger.debug(f"📸 Loading image from: {frame_path}")
+                        pil_image = Image.open(frame_path)
+                        
+                        # Create Frame object
+                        frame = Frame(
+                            frame_id=f"frame_{frame_index:06d}",
+                            timestamp=datetime.fromtimestamp(timestamp) if timestamp else datetime.now(),
+                            video_id=video_id or "unknown",
+                            image=pil_image
+                        )
+                        
+                        frames.append(frame)
+                        logger.debug(f"✅ Successfully converted keyframe {idx}")
+                    else:
+                        logger.warning(f"⚠️ Keyframe {idx} has no valid frame_path or file doesn't exist: {frame_path}")
+                        
+                except Exception as e:
+                    logger.error(f"❌ Error converting keyframe {idx}: {e}")
+                    import traceback
+                    logger.error(traceback.format_exc())
+                    continue
+            
+            if not frames:
+                logger.warning("⚠️ No frames could be converted for captioning")
+                logger.warning(f"Keyframe sample: {keyframes[0] if keyframes else 'No keyframes'}")
+                return {
+                    'enabled': True,
+                    'total_captions': 0,
+                    'captions': [],
+                    'errors': ['No frames could be converted - check keyframe structure or MinIO access']
+                }
+            
+            logger.info(f"📝 Processing {len(frames)} frames for captioning...")
+            logger.info(f"⏱️  Time elapsed: {(datetime.now() - start_time).total_seconds():.1f}s")
+            
+            # Check if we've exceeded time limit before processing
+            elapsed = (datetime.now() - start_time).total_seconds()
+            if elapsed > max_processing_time:
+                logger.error(f"❌ Exceeded time limit before caption generation: {elapsed:.1f}s")
+                return {
+                    'enabled': True,
+                    'total_captions': 0,
+                    'captions': [],
+                    'errors': [f'Timeout: Exceeded {max_processing_time}s before caption generation']
+                }
+            
+            # Process frames through captioning pipeline with error handling
+            try:
+                logger.info("🤖 Calling captioning service to process frames...")
+                result = self.captioning_service.process_frames(frames)
+                logger.info(f"✅ Captioning service completed in {(datetime.now() - start_time).total_seconds():.1f}s")
+            except Exception as caption_error:
+                logger.error(f"❌ Caption generation failed: {caption_error}")
+                import traceback
+                logger.error(traceback.format_exc())
+                return {
+                    'enabled': True,
+                    'total_captions': 0,
+                    'captions': [],
+                    'errors': [f'Caption generation error: {str(caption_error)}']
+                }
+            
+            # Extract caption records and print debugging info
+            captions = []
+            logger.info("=" * 80)
+            logger.info("🎬 VIDEO CAPTIONING RESULTS - KEYFRAME CAPTIONS")
+            logger.info("=" * 80)
+            
+            for idx, record in enumerate(result.caption_records, 1):
+                caption_data = {
+                    'caption_id': record.caption_id,
+                    'frame_id': record.frame_id,
+                    'timestamp': record.timestamp.isoformat(),
+                    'raw_caption': record.raw_caption,
+                    'sanitized_caption': record.sanitized_caption,
+                    'created_at': record.created_at.isoformat()
+                }
+                captions.append(caption_data)
+                
+                # DEBUG: Print caption for each keyframe
+                logger.info(f"\n📸 Keyframe #{idx} - {record.frame_id}")
+                logger.info(f"   ⏱️  Timestamp: {record.timestamp}")
+                logger.info(f"   🔤 Raw Caption: {record.raw_caption}")
+                logger.info(f"   ✨ Sanitized Caption: {record.sanitized_caption}")
+                logger.info(f"   🆔 Caption ID: {record.caption_id}")
+                
+                # Also print to console for immediate visibility
+                print(f"\n{'='*60}")
+                print(f"📸 Keyframe #{idx}: {record.frame_id}")
+                print(f"⏱️  Time: {record.timestamp}")
+                print(f"🔤 Caption: {record.sanitized_caption}")
+                print(f"{'='*60}")
+            
+            logger.info("\n" + "=" * 80)
+            logger.info(f"✅ Video captioning complete: {len(captions)} captions generated and saved to MongoDB")
+            logger.info(f"💾 Embeddings saved to FAISS vector database")
+            logger.info("=" * 80)
+            
+            return {
+                'enabled': True,
+                'total_captions': len(captions),
+                'captions': captions,
+                'processing_time': result.processing_time,
+                'errors': result.errors
+            }
+            
+        except Exception as e:
+            logger.error(f"❌ Video captioning failed: {e}", exc_info=True)
+            return {
+                'enabled': True,
+                'total_captions': 0,
+                'captions': [],
+                'errors': [str(e)]
+            }
+        finally:
+            # Cleanup: Remove temporary directory and downloaded files
+            try:
+                import shutil
+                if os.path.exists(temp_dir):
+                    shutil.rmtree(temp_dir)
+                    logger.info(f"🧹 Cleaned up temporary directory: {temp_dir}")
+            except Exception as e:
+                logger.warning(f"⚠️ Failed to cleanup temporary directory: {e}")
+    
+    def search_captions(self, query: str, video_id: str = None, top_k: int = 5) -> List[Dict[str, Any]]:
+        """
+        Search captions using semantic similarity
+        
+        Args:
+            query: Search query text
+            video_id: Optional video ID to filter results
+            top_k: Number of results to return
+            
+        Returns:
+            List of matching caption records with similarity scores
+        """
+        if not self.enabled or not self.captioning_service:
+            return []
+        
+        try:
+            results = self.captioning_service.search_captions(query, top_k=top_k)
+            
+            # Filter by video_id if provided
+            if video_id:
+                results = [r for r in results if r.get('video_id') == video_id]
+            
+            return results
+            
+        except Exception as e:
+            logger.error(f"Caption search failed: {e}")
+            return []
+    
+    def get_video_captions(self, video_id: str) -> List[Dict[str, Any]]:
+        """
+        Get all captions for a specific video
+        
+        Args:
+            video_id: Video identifier
+            
+        Returns:
+            List of caption records
+        """
+        if not self.enabled or not self.captioning_service:
+            return []
+        
+        try:
+            return self.captioning_service.get_video_captions(video_id)
+        except Exception as e:
+            logger.error(f"Failed to get video captions: {e}")
+            return []
+    
+    def get_statistics(self) -> Dict[str, Any]:
+        """Get captioning service statistics"""
+        if not self.enabled or not self.captioning_service:
+            return {'enabled': False}
+        
+        try:
+            stats = self.captioning_service.get_statistics()
+            stats['enabled'] = True
+            return stats
+        except Exception as e:
+            logger.error(f"Failed to get statistics: {e}")
+            return {'enabled': True, 'error': str(e)}
diff --git a/video_compression.py b/video_compression.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3e554edcfff87cf079d89b292dc084712601286
--- /dev/null
+++ b/video_compression.py
@@ -0,0 +1,397 @@
+"""
+Video Compression Module
+
+This module handles:
+- Video compression with configurable quality settings
+- Resolution scaling
+- Format conversion
+- Compression statistics and reporting
+"""
+
+import os
+import subprocess
+import json
+import cv2
+import logging
+from typing import Dict, Any, Tuple, Optional
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+class VideoCompressor:
+    """Handle video compression and format conversion"""
+    
+    def __init__(self, config):
+        self.config = config
+        self.compressed_dir = os.path.join(config.output_base_dir, "compressed")
+        os.makedirs(self.compressed_dir, exist_ok=True)
+        
+        # Verify FFmpeg availability
+        self.ffmpeg_available = self._check_ffmpeg()
+        
+    def _check_ffmpeg(self) -> bool:
+        """Check if FFmpeg is available"""
+        try:
+            result = subprocess.run(
+                ['ffmpeg', '-version'], 
+                capture_output=True, 
+                text=True, 
+                timeout=10
+            )
+            available = result.returncode == 0
+            logger.info(f"FFmpeg available: {available}")
+            return available
+        except (subprocess.TimeoutExpired, FileNotFoundError, Exception) as e:
+            logger.warning(f"FFmpeg not available: {e}")
+            return False
+    
+    def compress_video(self, input_path: str, output_filename: str = None) -> str:
+        """
+        Compress video with configured settings
+        
+        Args:
+            input_path: Path to input video
+            output_filename: Optional custom output filename
+            
+        Returns:
+            Path to compressed video
+        """
+        if not os.path.exists(input_path):
+            raise FileNotFoundError(f"Input video not found: {input_path}")
+        
+        # Generate output path
+        if output_filename is None:
+            base_name = os.path.splitext(os.path.basename(input_path))[0]
+            output_filename = f"{base_name}_compressed.{self.config.video_output_format}"
+        
+        output_path = os.path.join(self.compressed_dir, output_filename)
+        
+        logger.info(f"Compressing video: {input_path} -> {output_path}")
+        
+        if self.ffmpeg_available:
+            return self._compress_with_ffmpeg(input_path, output_path)
+        else:
+            return self._compress_with_opencv(input_path, output_path)
+    
+    def _compress_with_ffmpeg(self, input_path: str, output_path: str) -> str:
+        """Compress video using FFmpeg"""
+        try:
+            # Build FFmpeg command
+            cmd = self._build_ffmpeg_command(input_path, output_path)
+            
+            logger.info(f"Running FFmpeg command: {' '.join(cmd)}")
+            
+            # Run compression
+            result = subprocess.run(
+                cmd, 
+                capture_output=True, 
+                text=True,
+                timeout=300  # 5 minute timeout
+            )
+            
+            if result.returncode == 0:
+                if os.path.exists(output_path):
+                    # Get compression statistics
+                    stats = self._get_compression_stats(input_path, output_path)
+                    logger.info(f"✅ Compression successful: {stats}")
+                    return output_path
+                else:
+                    logger.error("FFmpeg completed but output file not found")
+                    return ""
+            else:
+                logger.error(f"FFmpeg error: {result.stderr}")
+                return ""
+                
+        except subprocess.TimeoutExpired:
+            logger.error("FFmpeg compression timed out")
+            return ""
+        except Exception as e:
+            logger.error(f"FFmpeg compression failed: {e}")
+            return ""
+    
+    def _build_ffmpeg_command(self, input_path: str, output_path: str) -> list:
+        """Build FFmpeg command with configured parameters"""
+        cmd = ['ffmpeg', '-y', '-i', input_path]
+        
+        # Video codec and quality settings
+        cmd.extend(['-c:v', 'libx264'])
+        cmd.extend(['-preset', self.config.compression_preset])
+        cmd.extend(['-crf', str(self.config.compression_crf)])
+        
+        # Resolution scaling
+        if self.config.output_resolution != "original":
+            scale_filter = self._get_scale_filter()
+            if scale_filter:
+                cmd.extend(['-vf', scale_filter])
+        
+        # Audio settings (copy or remove)
+        cmd.extend(['-c:a', 'aac', '-b:a', '128k'])
+        
+        # Output optimizations
+        cmd.extend(['-movflags', '+faststart'])
+        
+        cmd.append(output_path)
+        
+        return cmd
+    
+    def _get_scale_filter(self) -> str:
+        """Get FFmpeg scale filter for resolution"""
+        resolution_map = {
+            "720p": "scale=1280:720:force_original_aspect_ratio=decrease,pad=1280:720:(ow-iw)/2:(oh-ih)/2",
+            "1080p": "scale=1920:1080:force_original_aspect_ratio=decrease,pad=1920:1080:(ow-iw)/2:(oh-ih)/2",
+            "480p": "scale=854:480:force_original_aspect_ratio=decrease,pad=854:480:(ow-iw)/2:(oh-ih)/2"
+        }
+        
+        return resolution_map.get(self.config.output_resolution, "")
+    
+    def _compress_with_opencv(self, input_path: str, output_path: str) -> str:
+        """Fallback compression using OpenCV"""
+        logger.info("Using OpenCV for video compression (fallback)")
+        
+        try:
+            cap = cv2.VideoCapture(input_path)
+            if not cap.isOpened():
+                logger.error(f"Cannot open input video: {input_path}")
+                return ""
+            
+            # Get video properties
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            
+            # Adjust resolution if needed
+            output_width, output_height = self._get_output_dimensions(width, height)
+            
+            # Set up video writer with H.264 codec for better browser compatibility
+            # Try multiple codecs in order of preference
+            codec_options = [
+                'avc1',  # H.264 (best browser support)
+                'H264',  # H.264 alternative
+                'X264',  # H.264 alternative
+                'mp4v'   # MPEG-4 fallback
+            ]
+            
+            out = None
+            for codec in codec_options:
+                try:
+                    fourcc = cv2.VideoWriter_fourcc(*codec)
+                    out = cv2.VideoWriter(output_path, fourcc, fps, (output_width, output_height))
+                    if out.isOpened():
+                        logger.info(f"Using codec: {codec}")
+                        break
+                    out.release()
+                except Exception as e:
+                    logger.warning(f"Codec {codec} failed: {e}")
+                    continue
+            
+            if not out or not out.isOpened():
+                logger.error("Cannot create output video writer with any codec")
+                cap.release()
+                return ""
+            
+            # Process frames
+            frame_count = 0
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                
+                # Resize frame if needed
+                if (output_width, output_height) != (width, height):
+                    frame = cv2.resize(frame, (output_width, output_height))
+                
+                out.write(frame)
+                frame_count += 1
+                
+                # Progress logging
+                if frame_count % 100 == 0:
+                    progress = (frame_count / total_frames) * 100
+                    logger.info(f"Compression progress: {progress:.1f}%")
+            
+            cap.release()
+            out.release()
+            
+            if os.path.exists(output_path):
+                stats = self._get_compression_stats(input_path, output_path)
+                logger.info(f"✅ OpenCV compression successful: {stats}")
+                return output_path
+            else:
+                logger.error("OpenCV compression failed - output file not created")
+                return ""
+                
+        except Exception as e:
+            logger.error(f"OpenCV compression failed: {e}")
+            return ""
+    
+    def _get_output_dimensions(self, input_width: int, input_height: int) -> Tuple[int, int]:
+        """Calculate output dimensions based on configuration"""
+        if self.config.output_resolution == "original":
+            return input_width, input_height
+        
+        resolution_map = {
+            "720p": (1280, 720),
+            "1080p": (1920, 1080),
+            "480p": (854, 480)
+        }
+        
+        target_width, target_height = resolution_map.get(
+            self.config.output_resolution, 
+            (input_width, input_height)
+        )
+        
+        # Maintain aspect ratio
+        aspect_ratio = input_width / input_height
+        
+        if aspect_ratio > target_width / target_height:
+            # Width-constrained
+            output_width = target_width
+            output_height = int(target_width / aspect_ratio)
+        else:
+            # Height-constrained
+            output_height = target_height
+            output_width = int(target_height * aspect_ratio)
+        
+        # Ensure even dimensions (required for some codecs)
+        output_width = (output_width // 2) * 2
+        output_height = (output_height // 2) * 2
+        
+        return output_width, output_height
+    
+    def _get_compression_stats(self, input_path: str, output_path: str) -> Dict[str, Any]:
+        """Get compression statistics"""
+        try:
+            input_size = os.path.getsize(input_path)
+            output_size = os.path.getsize(output_path)
+            
+            compression_ratio = input_size / output_size if output_size > 0 else 0
+            size_reduction = ((input_size - output_size) / input_size) * 100
+            
+            # Get video properties
+            input_cap = cv2.VideoCapture(input_path)
+            output_cap = cv2.VideoCapture(output_path)
+            
+            stats = {
+                'input_size_mb': round(input_size / (1024*1024), 2),
+                'output_size_mb': round(output_size / (1024*1024), 2),
+                'compression_ratio': round(compression_ratio, 2),
+                'size_reduction_percent': round(size_reduction, 1),
+                'input_resolution': f"{int(input_cap.get(cv2.CAP_PROP_FRAME_WIDTH))}x{int(input_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))}",
+                'output_resolution': f"{int(output_cap.get(cv2.CAP_PROP_FRAME_WIDTH))}x{int(output_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))}",
+                'input_fps': round(input_cap.get(cv2.CAP_PROP_FPS), 2),
+                'output_fps': round(output_cap.get(cv2.CAP_PROP_FPS), 2)
+            }
+            
+            input_cap.release()
+            output_cap.release()
+            
+            return stats
+            
+        except Exception as e:
+            logger.error(f"Failed to get compression stats: {e}")
+            return {}
+    
+    def batch_compress(self, input_directory: str, output_directory: str = None) -> Dict[str, str]:
+        """
+        Compress multiple videos in a directory
+        
+        Args:
+            input_directory: Directory containing videos to compress
+            output_directory: Optional output directory (uses compressed_dir by default)
+            
+        Returns:
+            Dictionary mapping input paths to output paths
+        """
+        if output_directory is None:
+            output_directory = self.compressed_dir
+        
+        os.makedirs(output_directory, exist_ok=True)
+        
+        # Find video files
+        video_extensions = {'.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv'}
+        video_files = []
+        
+        for filename in os.listdir(input_directory):
+            if any(filename.lower().endswith(ext) for ext in video_extensions):
+                video_files.append(os.path.join(input_directory, filename))
+        
+        logger.info(f"Found {len(video_files)} videos to compress")
+        
+        results = {}
+        
+        for video_path in video_files:
+            try:
+                base_name = os.path.splitext(os.path.basename(video_path))[0]
+                output_filename = f"{base_name}_compressed.{self.config.video_output_format}"
+                output_path = os.path.join(output_directory, output_filename)
+                
+                compressed_path = self._compress_with_ffmpeg(video_path, output_path) if self.ffmpeg_available else self._compress_with_opencv(video_path, output_path)
+                
+                if compressed_path:
+                    results[video_path] = compressed_path
+                    logger.info(f"✅ Compressed: {os.path.basename(video_path)}")
+                else:
+                    logger.error(f"❌ Failed to compress: {os.path.basename(video_path)}")
+                    
+            except Exception as e:
+                logger.error(f"Error compressing {video_path}: {e}")
+        
+        logger.info(f"Batch compression complete: {len(results)}/{len(video_files)} successful")
+        return results
+    
+    def save_compression_report(self, compression_results: Dict[str, Any], 
+                              output_path: str) -> bool:
+        """Save compression report to JSON file"""
+        try:
+            report = {
+                'compression_info': {
+                    'timestamp': datetime.now().isoformat(),
+                    'config': {
+                        'output_resolution': self.config.output_resolution,
+                        'compression_crf': self.config.compression_crf,
+                        'compression_preset': self.config.compression_preset,
+                        'video_output_format': self.config.video_output_format
+                    }
+                },
+                'results': compression_results
+            }
+            
+            with open(output_path, 'w') as f:
+                json.dump(report, f, indent=2)
+            
+            logger.info(f"Compression report saved: {output_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Failed to save compression report: {e}")
+            return False
+    
+    def estimate_compression_time(self, input_path: str) -> Optional[float]:
+        """Estimate compression time based on video properties"""
+        try:
+            cap = cv2.VideoCapture(input_path)
+            if not cap.isOpened():
+                return None
+            
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            duration = total_frames / fps
+            
+            cap.release()
+            
+            # Rough estimation: 0.1-0.5x realtime depending on preset
+            preset_multipliers = {
+                'ultrafast': 0.1,
+                'fast': 0.2,
+                'medium': 0.3,
+                'slow': 0.5
+            }
+            
+            multiplier = preset_multipliers.get(self.config.compression_preset, 0.3)
+            estimated_time = duration * multiplier
+            
+            return estimated_time
+            
+        except Exception as e:
+            logger.error(f"Failed to estimate compression time: {e}")
+            return None
\ No newline at end of file
diff --git a/video_segmentation.py b/video_segmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..b127f620806830bcff84f21ad85951f946ff95a8
--- /dev/null
+++ b/video_segmentation.py
@@ -0,0 +1,324 @@
+"""
+Video Segmentation Module
+
+This module handles:
+- Temporal video segmentation
+- Segment-wise keyframe extraction
+- Segment metadata generation
+- Segment-based event detection
+"""
+
+import os
+import json
+import cv2
+import numpy as np
+from typing import List, Dict, Any, Tuple
+from dataclasses import dataclass, asdict
+import logging
+from datetime import datetime
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class VideoSegment:
+    """Represents a temporal video segment"""
+    segment_id: int
+    start_timestamp: float
+    end_timestamp: float
+    duration: float
+    start_frame: int
+    end_frame: int
+    keyframes: List[Dict[str, Any]]
+    segment_type: str
+    activity_level: str
+    motion_statistics: Dict[str, float]
+    quality_statistics: Dict[str, float]
+
+class VideoSegmentationEngine:
+    """Handle video segmentation and segment analysis"""
+    
+    def __init__(self, config):
+        self.config = config
+        self.segments_dir = os.path.join(config.output_base_dir, "segments")
+        os.makedirs(self.segments_dir, exist_ok=True)
+        
+    def create_video_segments(self, video_path: str, keyframes: List) -> List[VideoSegment]:
+        """
+        Create temporal segments from video and associated keyframes
+        
+        Args:
+            video_path: Path to source video
+            keyframes: List of extracted keyframes
+            
+        Returns:
+            List of VideoSegment objects
+        """
+        logger.info(f"Creating video segments from: {video_path}")
+        
+        # Get video properties
+        cap = cv2.VideoCapture(video_path)
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        duration = total_frames / fps
+        cap.release()
+        
+        logger.info(f"Video duration: {duration:.2f}s, FPS: {fps:.2f}")
+        
+        # Create temporal segments
+        segments = []
+        segment_duration = self.config.segment_duration
+        num_segments = int(np.ceil(duration / segment_duration))
+        
+        logger.info(f"Creating {num_segments} segments of {segment_duration}s each")
+        
+        for i in range(num_segments):
+            start_time = i * segment_duration
+            end_time = min((i + 1) * segment_duration, duration)
+            
+            start_frame = int(start_time * fps)
+            end_frame = int(end_time * fps)
+            
+            # Find keyframes in this segment
+            segment_keyframes = self._get_keyframes_in_segment(
+                keyframes, start_time, end_time
+            )
+            
+            # Analyze segment
+            segment_analysis = self._analyze_segment(segment_keyframes)
+            
+            segment = VideoSegment(
+                segment_id=i,
+                start_timestamp=start_time,
+                end_timestamp=end_time,
+                duration=end_time - start_time,
+                start_frame=start_frame,
+                end_frame=end_frame,
+                keyframes=segment_keyframes,
+                segment_type=segment_analysis['segment_type'],
+                activity_level=segment_analysis['activity_level'],
+                motion_statistics=segment_analysis['motion_statistics'],
+                quality_statistics=segment_analysis['quality_statistics']
+            )
+            
+            segments.append(segment)
+        
+        logger.info(f"Created {len(segments)} video segments")
+        return segments
+    
+    def _get_keyframes_in_segment(self, keyframes: List, start_time: float, 
+                                end_time: float) -> List[Dict[str, Any]]:
+        """Get keyframes that fall within a segment's time range"""
+        segment_keyframes = []
+        
+        for kf in keyframes:
+            timestamp = kf.frame_data.timestamp
+            if start_time <= timestamp < end_time:
+                # Convert keyframe to serializable format
+                kf_dict = {
+                    'frame_data': {
+                        'frame_path': kf.frame_data.frame_path,
+                        'timestamp': kf.frame_data.timestamp,
+                        'frame_number': kf.frame_data.frame_number,
+                        'quality_score': kf.frame_data.quality_score,
+                        'motion_score': kf.frame_data.motion_score,
+                        'burst_active': kf.frame_data.burst_active,
+                        'enhancement_applied': kf.frame_data.enhancement_applied
+                    },
+                    'keyframe_score': kf.keyframe_score,
+                    'selection_reason': kf.selection_reason
+                }
+                segment_keyframes.append(kf_dict)
+        
+        # Sort by timestamp
+        segment_keyframes.sort(key=lambda x: x['frame_data']['timestamp'])
+        
+        # Limit keyframes per segment if configured
+        if len(segment_keyframes) > self.config.keyframes_per_segment:
+            # Select top keyframes by score
+            segment_keyframes.sort(key=lambda x: x['keyframe_score'], reverse=True)
+            segment_keyframes = segment_keyframes[:self.config.keyframes_per_segment]
+            # Re-sort by timestamp
+            segment_keyframes.sort(key=lambda x: x['frame_data']['timestamp'])
+        
+        return segment_keyframes
+    
+    def _analyze_segment(self, keyframes: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Analyze segment characteristics"""
+        if not keyframes:
+            return {
+                'segment_type': 'empty',
+                'activity_level': 'none',
+                'motion_statistics': {'min': 0, 'max': 0, 'mean': 0, 'std': 0},
+                'quality_statistics': {'min': 0, 'max': 0, 'mean': 0, 'std': 0}
+            }
+        
+        # Extract metrics
+        motion_scores = [kf['frame_data']['motion_score'] for kf in keyframes]
+        quality_scores = [kf['frame_data']['quality_score'] for kf in keyframes]
+        burst_count = sum(1 for kf in keyframes if kf['frame_data']['burst_active'])
+        
+        # Motion statistics
+        motion_stats = {
+            'min': float(np.min(motion_scores)),
+            'max': float(np.max(motion_scores)),
+            'mean': float(np.mean(motion_scores)),
+            'std': float(np.std(motion_scores))
+        }
+        
+        # Quality statistics
+        quality_stats = {
+            'min': float(np.min(quality_scores)),
+            'max': float(np.max(quality_scores)),
+            'mean': float(np.mean(quality_scores)),
+            'std': float(np.std(quality_scores))
+        }
+        
+        # Determine segment type
+        segment_type = self._classify_segment_type(motion_stats, quality_stats, burst_count)
+        
+        # Determine activity level
+        activity_level = self._classify_activity_level(motion_stats, burst_count)
+        
+        return {
+            'segment_type': segment_type,
+            'activity_level': activity_level,
+            'motion_statistics': motion_stats,
+            'quality_statistics': quality_stats
+        }
+    
+    def _classify_segment_type(self, motion_stats: Dict, quality_stats: Dict, 
+                             burst_count: int) -> str:
+        """Classify segment type based on characteristics"""
+        avg_motion = motion_stats['mean']
+        max_motion = motion_stats['max']
+        avg_quality = quality_stats['mean']
+        
+        if burst_count >= 2:
+            return 'burst_activity'
+        elif max_motion > self.config.motion_threshold * 2:
+            return 'high_motion'
+        elif avg_motion > self.config.motion_threshold:
+            return 'moderate_motion'
+        elif avg_quality > self.config.base_quality_threshold * 1.2:
+            return 'high_quality'
+        else:
+            return 'static'
+    
+    def _classify_activity_level(self, motion_stats: Dict, burst_count: int) -> str:
+        """Classify activity level of segment"""
+        avg_motion = motion_stats['mean']
+        max_motion = motion_stats['max']
+        
+        if burst_count >= 3 or max_motion > self.config.motion_threshold * 3:
+            return 'very_high'
+        elif burst_count >= 2 or max_motion > self.config.motion_threshold * 2:
+            return 'high'
+        elif burst_count >= 1 or avg_motion > self.config.motion_threshold:
+            return 'moderate'
+        elif avg_motion > self.config.motion_threshold * 0.5:
+            return 'low'
+        else:
+            return 'very_low'
+    
+    def save_segments_metadata(self, segments: List[VideoSegment], output_path: str) -> bool:
+        """Save segment metadata to JSON file"""
+        try:
+            segments_data = {
+                'metadata': {
+                    'total_segments': len(segments),
+                    'segment_duration': self.config.segment_duration,
+                    'keyframes_per_segment': self.config.keyframes_per_segment,
+                    'generation_timestamp': datetime.now().isoformat()
+                },
+                'segments': [asdict(segment) for segment in segments]
+            }
+            
+            with open(output_path, 'w') as f:
+                json.dump(segments_data, f, indent=2)
+            
+            logger.info(f"Segments metadata saved to: {output_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Failed to save segments metadata: {e}")
+            return False
+    
+    def save_individual_segment_files(self, segments: List[VideoSegment]) -> bool:
+        """Save individual JSON files for each segment"""
+        try:
+            for segment in segments:
+                segment_file = os.path.join(
+                    self.segments_dir, 
+                    f"segment_{segment.segment_id:03d}.json"
+                )
+                
+                segment_data = {
+                    'segment_info': asdict(segment),
+                    'keyframe_details': segment.keyframes
+                }
+                
+                with open(segment_file, 'w') as f:
+                    json.dump(segment_data, f, indent=2)
+            
+            logger.info(f"Individual segment files saved to: {self.segments_dir}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Failed to save individual segment files: {e}")
+            return False
+    
+    def generate_segment_summary(self, segments: List[VideoSegment]) -> Dict[str, Any]:
+        """Generate summary statistics for all segments"""
+        if not segments:
+            return {}
+        
+        # Aggregate statistics
+        total_keyframes = sum(len(seg.keyframes) for seg in segments)
+        activity_levels = [seg.activity_level for seg in segments]
+        segment_types = [seg.segment_type for seg in segments]
+        
+        # Count by activity level
+        activity_counts = {}
+        for level in activity_levels:
+            activity_counts[level] = activity_counts.get(level, 0) + 1
+        
+        # Count by segment type
+        type_counts = {}
+        for seg_type in segment_types:
+            type_counts[seg_type] = type_counts.get(seg_type, 0) + 1
+        
+        # Motion statistics across all segments
+        all_motion_means = [seg.motion_statistics['mean'] for seg in segments]
+        all_quality_means = [seg.quality_statistics['mean'] for seg in segments]
+        
+        summary = {
+            'total_segments': len(segments),
+            'total_keyframes': total_keyframes,
+            'average_keyframes_per_segment': total_keyframes / len(segments),
+            'activity_level_distribution': activity_counts,
+            'segment_type_distribution': type_counts,
+            'overall_motion_statistics': {
+                'min': float(np.min(all_motion_means)),
+                'max': float(np.max(all_motion_means)),
+                'mean': float(np.mean(all_motion_means)),
+                'std': float(np.std(all_motion_means))
+            },
+            'overall_quality_statistics': {
+                'min': float(np.min(all_quality_means)),
+                'max': float(np.max(all_quality_means)),
+                'mean': float(np.mean(all_quality_means)),
+                'std': float(np.std(all_quality_means))
+            }
+        }
+        
+        return summary
+    
+    def get_high_activity_segments(self, segments: List[VideoSegment]) -> List[VideoSegment]:
+        """Get segments with high activity levels"""
+        high_activity_levels = {'high', 'very_high'}
+        return [seg for seg in segments if seg.activity_level in high_activity_levels]
+    
+    def get_segments_by_type(self, segments: List[VideoSegment], 
+                           segment_type: str) -> List[VideoSegment]:
+        """Get segments of a specific type"""
+        return [seg for seg in segments if seg.segment_type == segment_type]
\ No newline at end of file