Spaces:

blacksinisterx
/

DetectifAI-Backend

Sleeping

Commit

2278049

1 Parent(s): 23769bb

feat: Full DetectifAI backend with B2 storage, DEMO_MODE, Stripe bypass

- Complete Flask backend with video processing, object detection, facial recognition
- Backblaze B2 cloud storage (replaced MinIO)
- DEMO_MODE support: set DEMO_MODE=true for Pro features without Stripe
- Lazy Stripe initialization (no crash if keys missing)
- Subscription routes with demo fallback
- Docker-ready deployment for HF Spaces

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +74 -0
DetectifAI_db/app_integrated.py +1250 -0
DetectifAI_db/caption_search.py +209 -0
DetectifAI_db/check_minio.py +26 -0
DetectifAI_db/check_video_storage.py +191 -0
DetectifAI_db/create_admin.py +120 -0
DetectifAI_db/database_seed.py +212 -0
DetectifAI_db/database_setup.py +375 -0
DetectifAI_db/env.example +19 -0
DetectifAI_db/faiss_captions.index +0 -0
DetectifAI_db/faiss_captions_idmap.json +12 -0
DetectifAI_db/migrate_stripe_integration.py +209 -0
DetectifAI_db/minio_config.py +37 -0
DetectifAI_db/requirements.txt +14 -0
DetectifAI_db/reset_minio.py +104 -0
DetectifAI_db/reset_users_collection.py +29 -0
DetectifAI_db/seed_stripe_plans.py +141 -0
DetectifAI_db/setup_database.py +44 -0
DetectifAI_db/setup_minio.py +91 -0
DetectifAI_db/setup_nlp_bucket.py +61 -0
DetectifAI_db/upload_caption_images.py +264 -0
DetectifAI_db/upload_captions.py +349 -0
DetectifAI_db/vector_index.py +348 -0
Dockerfile +92 -0
README.md +27 -6
alert_routes.py +361 -0
app.py +0 -0
behavior_analysis/action_recognition.py +381 -0
behavior_analysis/wallclimb.pt +3 -0
behavior_analysis/yolov11_wallclimb.pt +3 -0
behavior_analysis_integrator.py +580 -0
config.py +369 -0
core/video_processing.py +384 -0
database/config.py +173 -0
database/keyframe_repository.py +243 -0
database/models.py +432 -0
database/models_backup.py +330 -0
database/repositories.py +516 -0
database/repositories_old.py +653 -0
database/storage_logger.py +41 -0
database/video_compression_service.py +379 -0
database_video_service.py +1804 -0
detectifai_events.py +577 -0
event_aggregation.py +819 -0
event_clip_generator.py +390 -0
extract_upload_keyframes.py +240 -0
facial_recognition.py +926 -0
highlight_reel.py +542 -0
json_reports.py +575 -0
live_stream_processor.py +866 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,74 @@

+# Ignore everything we don't need in the Docker image
+__pycache__/
+*.pyc
+*.pyo
+venv/
+.venv/
+.env
+.env.example
+# Videos & test files
+*.mp4
+*.avi
+*.mov
+*.mkv
+*.mpeg
+*.wmv
+*.flv
+*.jpeg
+*.jpg
+download.jpeg
+images.jpeg
+# Large model files — downloaded at build time from HF Hub instead
+behavior_analysis/accident_detection.pt
+behavior_analysis/fight_detection.pt
+report_generation/models/qwen2.5-3b-instruct-q4_k_m.gguf
+report_generation/models/.cache/
+# Output directories
+video_processing_outputs/
+logs/
+uploads/
+temp_faces/
+# Test & debug files
+test_*.py
+check_*.py
+debug_*.py
+verify_*.py
+reproduce_issue.py
+fix_*.py
+clear_cache_and_test.py
+simple_test_report.py
+quick_fix_reports.py
+scan_imports_temp.py
+protected_api_example.py
+# Misc
+output*.txt
+verify_log.txt
+*.zip
+*.bat
+README.md
+BUCKET_NAMES.md
+VIDEO_CAPTIONING_MONGODB_INTEGRATION.md
+video_captioning_store/
+backfill_*.py
+create_subscriptions.py
+# Unnecessary sub-items
+behavior_analysis/action_recognition_outputs/
+video_captioning/video_captioning/captions.db
+video_captioning/video_captioning/tests/
+video_captioning/video_captioning/vector_store/
+video_captioning/video_captioning/example_usage.py
+video_captioning/video_captioning/install_requirements.py
+video_captioning/video_captioning/integration_example.py
+video_captioning/video_captioning/quick_test.py
+video_captioning/video_captioning/run_video_test.py
+video_captioning/video_captioning/simple_test.py
+video_captioning/video_captioning/test_runner.py
+video_captioning/video_captioning/working_test.py
+video_captioning/video_captioning/data_flow_diagram.md
+video_captioning/video_captioning/README.md

DetectifAI_db/app_integrated.py ADDED Viewed

	@@ -0,0 +1,1250 @@

+"""
+DetectifAI Flask Backend - AI-Powered CCTV Surveillance System with Database Integration
+Enhanced Flask API for:
+- Video upload and processing with DetectifAI security focus
+- Real-time processing status and results
+- Object detection with fire/weapon recognition
+- Security event analysis and threat assessment
+- Database integration with MongoDB and FAISS vector search
+- User authentication and authorization
+- Frontend integration for surveillance dashboard
+"""
+import os
+from datetime import datetime, timedelta, timezone
+from uuid import uuid4
+from flask import Flask, request, jsonify, send_file, send_from_directory, g
+from flask_cors import CORS
+from werkzeug.utils import secure_filename
+import threading
+import json
+import logging
+import jwt
+from dotenv import load_dotenv
+import numpy as np
+# Import DetectifAI components
+from main_pipeline import CompleteVideoProcessingPipeline
+from config import get_security_focused_config, VideoProcessingConfig
+# Import database components
+from pymongo import MongoClient
+from minio import Minio
+from minio.error import S3Error
+from vector_index import get_faiss_manager, generate_text_embedding, generate_visual_embedding
+# Try to import caption search (optional - may not be available)
+try:
+    from caption_search import get_caption_search_engine
+    CAPTION_SEARCH_AVAILABLE = True
+except ImportError as e:
+    logger.warning(f"Caption search not available: {e}")
+    CAPTION_SEARCH_AVAILABLE = False
+    get_caption_search_engine = None
+# Try to import DetectifAI-specific components
+try:
+    from detectifai_events import DetectifAIEventType, ThreatLevel
+    DETECTIFAI_EVENTS_AVAILABLE = True
+except ImportError:
+    DETECTIFAI_EVENTS_AVAILABLE = False
+    logging.warning("DetectifAI events module not available - using basic functionality")
+# === Load Environment ===
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY")
+MINIO_BUCKET = os.getenv("MINIO_BUCKET")
+JWT_SECRET = os.getenv("JWT_SECRET", "defaultsecret")
+# Initialize Flask app
+app = Flask(__name__)
+CORS(app, resources={r"/api/*": {"origins": "*"}})
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        logging.FileHandler('logs/detectifai_api.log')
+    ]
+)
+logger = logging.getLogger(__name__)
+# Configuration
+UPLOAD_FOLDER = 'uploads'
+OUTPUT_FOLDER = 'video_processing_outputs'
+ALLOWED_EXTENSIONS = {'mp4', 'avi', 'mov', 'mkv', 'wmv', 'flv'}
+MAX_CONTENT_LENGTH = 500 * 1024 * 1024  # 500MB max file size
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = MAX_CONTENT_LENGTH
+# Create necessary directories
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+os.makedirs(OUTPUT_FOLDER, exist_ok=True)
+os.makedirs('logs', exist_ok=True)
+# === MongoDB Atlas Setup ===
+mongo = MongoClient(MONGO_URI)
+db = mongo.get_default_database()
+# Collections from schema
+admin = db.admin
+user = db.users  # Use 'users' to match database_setup.py
+users = db.users  # Alias for clarity
+video_file = db.video_file
+event = db.event
+event_clip = db.event_clip
+detected_faces = db.detected_faces
+face_matches = db.face_matches
+event_description = db.event_description
+event_caption = db.event_caption
+query = db.query
+query_result = db.query_result
+subscription_plan = db.subscription_plan
+user_subscription = db.user_subscription
+# === MinIO Setup ===
+minio_client = Minio(
+    MINIO_ENDPOINT,
+    access_key=MINIO_ACCESS_KEY,
+    secret_key=MINIO_SECRET_KEY,
+    secure=False
+)
+try:
+    if not minio_client.bucket_exists(MINIO_BUCKET):
+        minio_client.make_bucket(MINIO_BUCKET)
+except S3Error as err:
+    if err.code != "BucketAlreadyOwnedByYou" and err.code != "BucketAlreadyExists":
+        raise
+# === FAISS Setup ===
+faiss_manager = get_faiss_manager()
+# Store processing status in memory (use Redis in production)
+processing_status = {}
+# === Auth Helpers ===
+def generate_jwt(user):
+    payload = {
+        "user_id": user["user_id"],
+        "email": user["email"],
+        "role": user.get("role", "user"),
+        "exp": datetime.now(timezone.utc) + timedelta(hours=24)
+    }
+    return jwt.encode(payload, JWT_SECRET, algorithm="HS256")
+def decode_jwt(token):
+    try:
+        return jwt.decode(token, JWT_SECRET, algorithms=["HS256"])
+    except jwt.ExpiredSignatureError:
+        return None
+    except jwt.InvalidTokenError:
+        return None
+def auth_required(role=None):
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            token = request.headers.get("Authorization", "").replace("Bearer ", "")
+            if not token:
+                return jsonify({"error": "missing token"}), 401
+            decoded = decode_jwt(token)
+            if not decoded:
+                return jsonify({"error": "invalid or expired token"}), 401
+            if role and decoded.get("role") != role:
+                return jsonify({"error": "unauthorized"}), 403
+            g.user = decoded
+            return func(*args, **kwargs)
+        wrapper.__name__ = func.__name__
+        return wrapper
+    return decorator
+def allowed_file(filename):
+    """Check if file extension is allowed"""
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+def extract_detectifai_results(pipeline_results):
+    """Extract DetectifAI-specific results from pipeline output"""
+    try:
+        detectifai_results = {
+            # Basic video metrics
+            'video_info': {
+                'total_keyframes': pipeline_results['outputs'].get('total_keyframes', 0),
+                'processing_time': pipeline_results['processing_stats'].get('total_processing_time', 0),
+                'output_directory': pipeline_results['outputs'].get('output_directory', '')
+            },
+            # Security detection results
+            'security_detection': {
+                'total_object_detections': pipeline_results['outputs'].get('total_object_detections', 0),
+                'total_object_events': pipeline_results['outputs'].get('total_object_events', 0),
+                'detectifai_events': pipeline_results['outputs'].get('detectifai_events', 0),
+                'fire_detections': 0,  # Will be populated from actual results
+                'weapon_detections': 0,
+                'security_alerts': []
+            },
+            # Event analysis
+            'event_analysis': {
+                'canonical_events': pipeline_results['outputs'].get('canonical_events', 0),
+                'total_motion_events': pipeline_results['outputs'].get('total_motion_events', 0),
+                'high_priority_events': 0,
+                'critical_events': 0
+            },
+            # Output files
+            'output_files': {
+                'keyframes_directory': os.path.join(pipeline_results['outputs'].get('output_directory', ''), 'frames'),
+                'reports': pipeline_results['outputs'].get('reports', {}),
+                'highlight_reels': pipeline_results['outputs'].get('highlight_reels', {}),
+                'compressed_video': pipeline_results['outputs'].get('compressed_video', '')
+            },
+            # System performance
+            'performance': {
+                'frames_processed': pipeline_results['processing_stats'].get('frames_processed', 0),
+                'frames_enhanced': pipeline_results['processing_stats'].get('frames_enhanced', 0),
+                'gpu_acceleration': pipeline_results['processing_stats'].get('gpu_used', False)
+            }
+        }
+        return detectifai_results
+    except Exception as e:
+        logger.error(f"Error extracting DetectifAI results: {e}")
+        return {'error': 'Failed to extract results'}
+def process_video_async(video_id, video_path, config_type='detectifai', user_id=None):
+    """Process video in background thread with DetectifAI focus and database integration"""
+    try:
+        processing_status[video_id]['status'] = 'processing'
+        processing_status[video_id]['progress'] = 0
+        processing_status[video_id]['message'] = 'Initializing DetectifAI processing...'
+        # Select configuration with DetectifAI optimizations
+        if config_type == 'detectifai' or config_type == 'security':
+            config = get_security_focused_config()
+        # Removed robbery detection - using security focused config
+        elif config_type == 'high_recall':
+            try:
+                from config import get_high_recall_config
+                config = get_high_recall_config()
+            except ImportError:
+                config = get_security_focused_config()
+        elif config_type == 'balanced':
+            try:
+                from config import get_balanced_config
+                config = get_balanced_config()
+            except ImportError:
+                config = VideoProcessingConfig()
+        else:
+            config = VideoProcessingConfig()
+        # DetectifAI-specific configuration enhancements
+        config.enable_object_detection = True
+        config.enable_facial_recognition = True
+        config.keyframe_extraction_fps = 1.0  # Extract 1 frame per second for surveillance
+        config.enable_adaptive_processing = True
+        # Set custom output directory for this video
+        config.output_base_dir = os.path.join(OUTPUT_FOLDER, video_id)
+        # Initialize pipeline
+        pipeline = CompleteVideoProcessingPipeline(config)
+        # Update progress
+        processing_status[video_id]['progress'] = 10
+        processing_status[video_id]['message'] = 'Extracting keyframes for security analysis...'
+        # Process video with DetectifAI (with error tolerance)
+        output_name = os.path.splitext(os.path.basename(video_path))[0]
+        results = None
+        processing_errors = []
+        try:
+            results = pipeline.process_video_complete(video_path, output_name)
+            logger.info(f"✅ Core pipeline processing completed for {video_id}")
+        except Exception as pipeline_error:
+            logger.error(f"⚠️ Pipeline error (but continuing): {str(pipeline_error)}")
+            processing_errors.append(f"Pipeline: {str(pipeline_error)}")
+            # Create minimal results structure
+            results = {
+                'outputs': {
+                    'total_keyframes': 0,
+                    'total_events': 0,
+                    'total_motion_events': 0,
+                    'total_object_events': 0,
+                    'total_object_detections': 0,
+                    'canonical_events': [],
+                    'total_segments': 1,
+                    'highlight_reels': {},
+                    'reports': {},
+                    'compressed_video': ''
+                },
+                'processing_stats': {'total_processing_time': 0}
+            }
+        # Extract DetectifAI-specific results (with error tolerance)
+        detectifai_results = {}
+        try:
+            detectifai_results = extract_detectifai_results(results)
+        except Exception as extract_error:
+            logger.error(f"⚠️ Result extraction error (but continuing): {str(extract_error)}")
+            processing_errors.append(f"Extraction: {str(extract_error)}")
+            detectifai_results = {'security_detection': {}, 'event_analysis': {}, 'performance': {}}
+        # Store results in database
+        try:
+            # Update video file record with processing results
+            video_file.update_one(
+                {"video_id": video_id},
+                {
+                    "$set": {
+                        "processing_status": "completed",
+                        "processing_results": {
+                            "total_keyframes": results['outputs']['total_keyframes'],
+                            "total_events": results['outputs']['total_events'],
+                            "processing_time": results['processing_stats']['total_processing_time'],
+                            "detectifai_results": detectifai_results
+                        },
+                        "updated_at": datetime.now(timezone.utc)
+                    }
+                }
+            )
+            # Create events in database
+            for i, canonical_event in enumerate(results['outputs'].get('canonical_events', [])):
+                event_doc = {
+                    "event_id": str(uuid4()),
+                    "video_id": video_id,
+                    "start_timestamp_ms": int(canonical_event.get('start_time', 0) * 1000),
+                    "end_timestamp_ms": int(canonical_event.get('end_time', 0) * 1000),
+                    "confidence_score": canonical_event.get('importance', 0.0),
+                    "is_verified": False,
+                    "is_false_positive": False,
+                    "verified_at": None,
+                    "verified_by": None,
+                    "visual_embedding": generate_visual_embedding(),
+                    "bounding_boxes": canonical_event.get('bounding_boxes', {}),
+                    "event_type": canonical_event.get('event_type', 'motion_detection')
+                }
+                event.insert_one(event_doc)
+                # Add to FAISS index
+                faiss_manager.add_visual_embedding(event_doc["event_id"], event_doc["visual_embedding"])
+                # Create event description
+                description_doc = {
+                    "description_id": str(uuid4()),
+                    "event_id": event_doc["event_id"],
+                    "text_embedding": generate_text_embedding(f"Event {i+1}: {canonical_event.get('description', 'Motion detected')}"),
+                    "caption": canonical_event.get('description', f'Motion detected at {canonical_event.get("start_time", 0):.2f}s'),
+                    "confidence": canonical_event.get('importance', 0.0),
+                    "created_at": datetime.now(timezone.utc),
+                    "updated_at": datetime.now(timezone.utc)
+                }
+                event_description.insert_one(description_doc)
+                # Add to FAISS text index
+                faiss_manager.add_text_embedding(description_doc["description_id"], description_doc["text_embedding"])
+            logger.info(f"✅ Database integration completed for {video_id}")
+        except Exception as db_error:
+            logger.error(f"⚠️ Database integration error (but continuing): {str(db_error)}")
+            processing_errors.append(f"Database: {str(db_error)}")
+        # Always mark as completed (even with errors)
+        processing_status[video_id]['status'] = 'completed'
+        processing_status[video_id]['progress'] = 100
+        completion_message = 'DetectifAI processing completed successfully!'
+        if processing_errors:
+            completion_message = f'DetectifAI processing completed with warnings: {len(processing_errors)} non-critical errors'
+        processing_status[video_id]['message'] = completion_message
+        processing_status[video_id]['results'] = {
+            # Original results for backward compatibility
+            'total_keyframes': results['outputs']['total_keyframes'],
+            'total_events': results['outputs']['total_events'],
+            'total_motion_events': results['outputs'].get('total_motion_events', 0),
+            'total_object_events': results['outputs'].get('total_object_events', 0),
+            'total_object_detections': results['outputs'].get('total_object_detections', 0),
+            'canonical_events': results['outputs']['canonical_events'],
+            'total_segments': results['outputs']['total_segments'],
+            'processing_time': results['processing_stats']['total_processing_time'],
+            'highlight_reels': results['outputs'].get('highlight_reels', {}),
+            'reports': results['outputs'].get('reports', {}),
+            'compressed_video': results['outputs'].get('compressed_video', ''),
+            'output_directory': config.output_base_dir,
+            'object_detection_enabled': config.enable_object_detection,
+            # DetectifAI-specific results
+            'detectifai_results': detectifai_results,
+            'security_detection': detectifai_results.get('security_detection', {}),
+            'event_analysis': detectifai_results.get('event_analysis', {}),
+            'performance': detectifai_results.get('performance', {}),
+            # Processing status
+            'processing_errors': processing_errors,
+            'has_warnings': len(processing_errors) > 0
+        }
+        logger.info(f"Video {video_id} processed successfully")
+    except Exception as e:
+        logger.error(f"Error processing video {video_id}: {str(e)}")
+        processing_status[video_id]['status'] = 'failed'
+        processing_status[video_id]['message'] = f'Error: {str(e)}'
+        processing_status[video_id]['error'] = str(e)
+# === API Endpoints ===
+@app.route('/')
+def index():
+    return jsonify({"message": "DetectifAI backend running with database integration"})
+@app.route('/api/health', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    return jsonify({'status': 'healthy', 'timestamp': datetime.now().isoformat()})
+# === Authentication Endpoints ===
+@app.route("/api/register", methods=["POST"])
+def register():
+    data = request.json or {}
+    email = data.get("email")
+    password = data.get("password")
+    username = data.get("username", email.split("@")[0] if email else None)
+    if not email or not password:
+        return jsonify({"error": "email and password required"}), 400
+    if user.find_one({"email": email}):
+        return jsonify({"error": "email exists"}), 400
+    user_doc = {
+        "user_id": str(uuid4()),
+        "username": username,
+        "email": email,
+        "password": password,  # TODO: hash properly
+        "role": "user",
+        "created_at": datetime.now(timezone.utc),
+        "updated_at": datetime.now(timezone.utc),
+        "last_login": None
+    }
+    user.insert_one(user_doc)
+    token = generate_jwt(user_doc)
+    return jsonify({"token": token})
+@app.route("/api/login", methods=["POST", "OPTIONS"])
+def login():
+    if request.method == "OPTIONS":
+        return '', 200  # Handle preflight CORS request
+    data = request.json or {}
+    email = data.get("email")
+    password = data.get("password")
+    if not email or not password:
+        return jsonify({"error": "email and password required"}), 400
+    # Check against Mongo
+    user_doc = user.find_one({"email": email})
+    if not user_doc or user_doc.get("password") != password:
+        return jsonify({"error": "invalid credentials"}), 401
+    token = generate_jwt(user_doc)
+    return jsonify({
+        "message": "login successful",
+        "token": token,
+        "user": {
+            "user_id": user_doc["user_id"],
+            "username": user_doc.get("username"),
+            "email": user_doc["email"]
+        }
+    })
+# === Admin User Management Endpoints ===
+@app.route("/api/admin/users", methods=["GET"])
+@auth_required(role="admin")
+def get_all_users():
+    """Get all users - Admin only"""
+    try:
+        # Get query parameters for pagination and filtering
+        page = int(request.args.get("page", 1))
+        limit = int(request.args.get("limit", 50))
+        search = request.args.get("search", "")
+        role_filter = request.args.get("role", "")
+        status_filter = request.args.get("status", "")
+        # Build query
+        query = {}
+        if search:
+            query["$or"] = [
+                {"email": {"$regex": search, "$options": "i"}},
+                {"username": {"$regex": search, "$options": "i"}}
+            ]
+        if role_filter:
+            query["role"] = role_filter
+        if status_filter:
+            if status_filter == "active":
+                query["is_active"] = True
+            elif status_filter == "inactive":
+                query["is_active"] = False
+        # Get total count
+        total = users.count_documents(query)
+        # Get users with pagination
+        skip = (page - 1) * limit
+        user_list = list(users.find(query).skip(skip).limit(limit).sort("created_at", -1))
+        # Remove sensitive data
+        for u in user_list:
+            u["_id"] = str(u["_id"])
+            u.pop("password", None)
+            u.pop("password_hash", None)
+        return jsonify({
+            "users": user_list,
+            "total": total,
+            "page": page,
+            "limit": limit,
+            "pages": (total + limit - 1) // limit
+        })
+    except Exception as e:
+        logger.error(f"Error fetching users: {str(e)}")
+        return jsonify({"error": "Failed to fetch users"}), 500
+@app.route("/api/admin/users", methods=["POST"])
+@auth_required(role="admin")
+def create_user():
+    """Create a new user - Admin only"""
+    try:
+        data = request.json or {}
+        email = data.get("email")
+        password = data.get("password")
+        username = data.get("username") or data.get("name")
+        role = data.get("role", "user")
+        if not email or not password:
+            return jsonify({"error": "email and password required"}), 400
+        # Check if user already exists
+        if users.find_one({"email": email}):
+            return jsonify({"error": "User with this email already exists"}), 400
+        # Create user document
+        user_doc = {
+            "user_id": str(uuid4()),
+            "username": username or email.split("@")[0],
+            "email": email,
+            "password": password,  # TODO: hash properly with bcrypt
+            "password_hash": password,  # For compatibility
+            "role": role,
+            "is_active": True,
+            "profile_data": {},
+            "created_at": datetime.now(timezone.utc),
+            "updated_at": datetime.now(timezone.utc),
+            "last_login": None
+        }
+        users.insert_one(user_doc)
+        # Remove sensitive data before returning
+        user_doc["_id"] = str(user_doc["_id"])
+        user_doc.pop("password", None)
+        user_doc.pop("password_hash", None)
+        return jsonify({
+            "message": "User created successfully",
+            "user": user_doc
+        }), 201
+    except Exception as e:
+        logger.error(f"Error creating user: {str(e)}")
+        return jsonify({"error": "Failed to create user"}), 500
+@app.route("/api/admin/users/<user_id>", methods=["GET"])
+@auth_required(role="admin")
+def get_user(user_id):
+    """Get a specific user by ID - Admin only"""
+    try:
+        user_doc = users.find_one({"user_id": user_id})
+        if not user_doc:
+            return jsonify({"error": "User not found"}), 404
+        # Remove sensitive data
+        user_doc["_id"] = str(user_doc["_id"])
+        user_doc.pop("password", None)
+        user_doc.pop("password_hash", None)
+        return jsonify({"user": user_doc})
+    except Exception as e:
+        logger.error(f"Error fetching user: {str(e)}")
+        return jsonify({"error": "Failed to fetch user"}), 500
+@app.route("/api/admin/users/<user_id>", methods=["PUT"])
+@auth_required(role="admin")
+def update_user(user_id):
+    """Update a user - Admin only"""
+    try:
+        data = request.json or {}
+        user_doc = users.find_one({"user_id": user_id})
+        if not user_doc:
+            return jsonify({"error": "User not found"}), 404
+        # Update allowed fields
+        update_data = {}
+        if "username" in data or "name" in data:
+            update_data["username"] = data.get("username") or data.get("name")
+        if "email" in data:
+            # Check if new email already exists
+            existing = users.find_one({"email": data["email"], "user_id": {"$ne": user_id}})
+            if existing:
+                return jsonify({"error": "Email already in use"}), 400
+            update_data["email"] = data["email"]
+        if "role" in data:
+            update_data["role"] = data["role"]
+        if "is_active" in data:
+            update_data["is_active"] = data["is_active"]
+        if "password" in data and data["password"]:
+            update_data["password"] = data["password"]
+            update_data["password_hash"] = data["password"]
+        if not update_data:
+            return jsonify({"error": "No valid fields to update"}), 400
+        update_data["updated_at"] = datetime.now(timezone.utc)
+        users.update_one({"user_id": user_id}, {"$set": update_data})
+        # Fetch updated user
+        updated_user = users.find_one({"user_id": user_id})
+        updated_user["_id"] = str(updated_user["_id"])
+        updated_user.pop("password", None)
+        updated_user.pop("password_hash", None)
+        return jsonify({
+            "message": "User updated successfully",
+            "user": updated_user
+        })
+    except Exception as e:
+        logger.error(f"Error updating user: {str(e)}")
+        return jsonify({"error": "Failed to update user"}), 500
+@app.route("/api/admin/users/<user_id>", methods=["DELETE"])
+@auth_required(role="admin")
+def delete_user(user_id):
+    """Delete a user - Admin only"""
+    try:
+        user_doc = users.find_one({"user_id": user_id})
+        if not user_doc:
+            return jsonify({"error": "User not found"}), 404
+        # Prevent deleting yourself
+        current_user = g.user
+        if current_user.get("user_id") == user_id:
+            return jsonify({"error": "Cannot delete your own account"}), 400
+        users.delete_one({"user_id": user_id})
+        return jsonify({"message": "User deleted successfully"})
+    except Exception as e:
+        logger.error(f"Error deleting user: {str(e)}")
+        return jsonify({"error": "Failed to delete user"}), 500
+# === Video Processing Endpoints ===
+@app.route('/api/video/upload', methods=['POST'])
+@app.route('/api/upload', methods=['POST'])
+@auth_required()
+def upload_video():
+    """Upload video endpoint with database integration"""
+    try:
+        # Check if file is present
+        if 'video' not in request.files:
+            return jsonify({'error': 'No video file provided'}), 400
+        file = request.files['video']
+        if file.filename == '':
+            return jsonify({'error': 'No file selected'}), 400
+        if not allowed_file(file.filename):
+            return jsonify({'error': 'Invalid file type. Allowed: mp4, avi, mov, mkv, wmv, flv'}), 400
+        # Get processing configuration (default to DetectifAI optimized)
+        config_type = request.form.get('config_type', 'detectifai')
+        # Generate unique video ID
+        video_id = f"video_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{os.urandom(4).hex()}"
+        # Save uploaded file
+        filename = secure_filename(file.filename)
+        video_path = os.path.join(app.config['UPLOAD_FOLDER'], f"{video_id}_{filename}")
+        file.save(video_path)
+        # Get file size
+        file.seek(0, os.SEEK_END)
+        file_size = file.tell()
+        file.seek(0)
+        # Store in MinIO using standardized paths
+        from minio_config import VIDEOS_BUCKET, get_minio_paths
+        minio_paths = get_minio_paths(video_id, filename)
+        object_name = minio_paths["original"]
+        try:
+            with open(video_path, 'rb') as file_data:
+                minio_client.put_object(
+                    VIDEOS_BUCKET,
+                    object_name,
+                    file_data,
+                    file_size,
+                    content_type='video/mp4'
+                )
+                logger.info(f"✅ Video uploaded to MinIO: {object_name}")
+        except Exception as e:
+            logger.error(f"❌ MinIO upload failed: {e}")
+            raise
+        # Create video record in database
+        video_doc = {
+            "video_id": video_id,
+            "user_id": g.user.get("user_id"),
+            "file_path": video_path,
+            "minio_object_key": object_name,
+            "minio_bucket": MINIO_BUCKET,
+            "codec": None,
+            "fps": None,
+            "upload_date": datetime.now(timezone.utc),
+            "duration_secs": None,
+            "file_size_bytes": file_size,
+            "meta_data": {},
+            "processing_status": "uploaded"
+        }
+        video_file.insert_one(video_doc)
+        # Initialize processing status
+        processing_status[video_id] = {
+            'video_id': video_id,
+            'filename': filename,
+            'status': 'queued',
+            'progress': 0,
+            'message': 'Video uploaded successfully. Processing queued.',
+            'uploaded_at': datetime.now().isoformat(),
+            'config_type': config_type
+        }
+        # Start background processing
+        thread = threading.Thread(
+            target=process_video_async,
+            args=(video_id, video_path, config_type, g.user.get("user_id"))
+        )
+        thread.daemon = True
+        thread.start()
+        return jsonify({
+            'success': True,
+            'video_id': video_id,
+            'message': 'Video uploaded successfully. Processing started.',
+            'status_url': f'/api/status/{video_id}'
+        }), 200
+    except Exception as e:
+        logger.error(f"Upload error: {str(e)}")
+        return jsonify({'error': str(e)}), 500
+@app.route('/api/video/status/<video_id>', methods=['GET'])
+@app.route('/api/status/<video_id>', methods=['GET'])
+@auth_required()
+def get_status(video_id):
+    """Get processing status for a video"""
+    # Check memory first
+    if video_id in processing_status:
+        return jsonify(processing_status[video_id]), 200
+    # Check database for video record
+    video_doc = video_file.find_one({"video_id": video_id})
+    if video_doc:
+        status = {
+            'video_id': video_id,
+            'filename': video_doc.get('file_path', '').split('/')[-1],
+            'status': video_doc.get('processing_status', 'unknown'),
+            'progress': 100 if video_doc.get('processing_status') == 'completed' else 0,
+            'message': f"Video status: {video_doc.get('processing_status', 'unknown')}",
+            'uploaded_at': video_doc.get('upload_date', '').isoformat() if video_doc.get('upload_date') else '',
+            'results': video_doc.get('processing_results', {})
+        }
+        return jsonify(status), 200
+    return jsonify({'error': 'Video not found'}), 404
+# === Database Query Endpoints ===
+@app.route("/api/videos", methods=["GET"])
+@auth_required()
+def list_videos():
+    """List all videos for the authenticated user"""
+    user_id = g.user.get("user_id")
+    vids = list(video_file.find({"user_id": user_id}, {"_id": 0}))
+    return jsonify(vids)
+@app.route("/api/video/<video_id>", methods=["GET"])
+@auth_required()
+def get_video(video_id):
+    """Get specific video details"""
+    user_id = g.user.get("user_id")
+    vid = video_file.find_one({"video_id": video_id, "user_id": user_id}, {"_id": 0})
+    if not vid:
+        return jsonify({"error": "not found"}), 404
+    return jsonify(vid)
+@app.route("/api/video/<video_id>/events", methods=["GET"])
+@auth_required()
+def get_video_events(video_id):
+    """Get events for a specific video"""
+    user_id = g.user.get("user_id")
+    # Verify user owns the video
+    video_doc = video_file.find_one({"video_id": video_id, "user_id": user_id})
+    if not video_doc:
+        return jsonify({"error": "video not found or access denied"}), 404
+    events_list = list(event.find({"video_id": video_id}, {"_id": 0}))
+    return jsonify(events_list)
+@app.route("/api/event/<event_id>", methods=["GET"])
+@auth_required()
+def get_event_details(event_id):
+    """Get event details with descriptions"""
+    event_doc = event.find_one({"event_id": event_id}, {"_id": 0})
+    if not event_doc:
+        return jsonify({"error": "event not found"}), 404
+    # Get descriptions for this event
+    descriptions = list(event_description.find({"event_id": event_id}, {"_id": 0}))
+    event_doc["descriptions"] = descriptions
+    return jsonify(event_doc)
+# === Search Endpoints ===
+@app.route("/api/search", methods=["GET"])
+@auth_required()
+def search():
+    """Simple text search in event descriptions"""
+    q = request.args.get("q", "")
+    user_id = g.user.get("user_id")
+    # Get user's videos first
+    user_videos = [v["video_id"] for v in video_file.find({"user_id": user_id}, {"video_id": 1})]
+    # Search in descriptions for user's videos
+    matches = list(event_description.find({
+        "caption": {"$regex": q, "$options": "i"},
+        "event_id": {"$in": [e["event_id"] for e in event.find({"video_id": {"$in": user_videos}}, {"event_id": 1})]}
+    }, {"_id": 0}))
+    return jsonify(matches)
+@app.route("/api/search-vector", methods=["POST"])
+@auth_required()
+def search_vector():
+    """Vector search for similar text embeddings using FAISS"""
+    data = request.json or {}
+    query_text = data.get("query_text")
+    k = data.get("k", 10)  # Number of results to return
+    if not query_text:
+        return jsonify({"error": "query_text is required"}), 400
+    try:
+        # Generate embedding for the query text
+        query_embedding = generate_text_embedding(query_text)
+        # Search FAISS index
+        results = faiss_manager.search_text_embeddings(query_embedding, k)
+        return jsonify({
+            "query_text": query_text,
+            "results": results,
+            "total_results": len(results)
+        })
+    except Exception as e:
+        return jsonify({"error": f"Search failed: {str(e)}"}), 500
+@app.route("/api/search-visual", methods=["POST"])
+@auth_required()
+def search_visual():
+    """Vector search for similar visual embeddings using FAISS"""
+    data = request.json or {}
+    query_embedding = data.get("query_embedding")
+    k = data.get("k", 10)  # Number of results to return
+    if not query_embedding:
+        return jsonify({"error": "query_embedding is required"}), 400
+    if not isinstance(query_embedding, list):
+        return jsonify({"error": "query_embedding must be a list of floats"}), 400
+    try:
+        # Search FAISS index
+        results = faiss_manager.search_visual_embeddings(query_embedding, k)
+        return jsonify({
+            "query_embedding_dim": len(query_embedding),
+            "results": results,
+            "total_results": len(results)
+        })
+    except Exception as e:
+        return jsonify({"error": f"Visual search failed: {str(e)}"}), 500
+@app.route("/api/search/captions", methods=["POST"])
+@auth_required()
+def search_captions():
+    """Search captions using FAISS index and sentence transformers"""
+    try:
+        if not CAPTION_SEARCH_AVAILABLE:
+            return jsonify({
+                "error": "Caption search not available",
+                "message": "Caption search module not installed or not available"
+            }), 503
+        data = request.json or {}
+        query_text = data.get("query", "").strip()
+        top_k = data.get("top_k", 10)
+        min_score = data.get("min_score", 0.0)
+        if not query_text:
+            return jsonify({"error": "query is required"}), 400
+        # Get caption search engine
+        search_engine = get_caption_search_engine()
+        if not search_engine or not search_engine.is_ready():
+            return jsonify({
+                "error": "Caption search engine not ready",
+                "stats": search_engine.get_stats() if search_engine else {}
+            }), 503
+        # Perform search
+        results = search_engine.search(query_text, top_k=top_k, min_score=min_score)
+        # Format results for frontend
+        formatted_results = []
+        for result in results:
+            video_ref = result.get("video_reference", {})
+            minio_path = video_ref.get("minio_path", "")
+            object_name = video_ref.get("object_name", "")
+            # Generate MinIO URL for the image/video
+            image_url = None
+            if object_name:
+                try:
+                    bucket = video_ref.get("bucket", "nlp-images")
+                    # Create bucket if it doesn't exist
+                    try:
+                        if not minio_client.bucket_exists(bucket):
+                            logger.info(f"Creating MinIO bucket: {bucket}")
+                            minio_client.make_bucket(bucket)
+                    except S3Error as e:
+                        if e.code != "BucketAlreadyOwnedByYou" and e.code != "BucketAlreadyExists":
+                            logger.warning(f"Could not create bucket {bucket}: {e}")
+                    # Generate presigned URL for MinIO object (valid for 1 hour)
+                    from datetime import timedelta
+                    image_url = minio_client.presigned_get_object(
+                        bucket,
+                        object_name,
+                        expires=timedelta(hours=1)
+                    )
+                except Exception as e:
+                    logger.warning(f"Could not generate MinIO URL: {e}")
+                    # Fallback: use unified image serving endpoint
+                    bucket = video_ref.get("bucket", "nlp-images")
+                    image_url = f"/api/minio/image/{bucket}/{object_name}"
+            formatted_result = {
+                "id": result.get("description_id"),
+                "event_id": result.get("event_id"),
+                "description": result.get("caption", ""),
+                "caption": result.get("caption", ""),
+                "confidence": result.get("confidence", 0.0),
+                "similarity_score": result.get("similarity_score", 0.0),
+                "thumbnail": image_url,
+                "video_reference": video_ref,
+                "timestamp": result.get("created_at"),
+                "zone": "N/A"  # Can be enhanced with actual zone data
+            }
+            formatted_results.append(formatted_result)
+        return jsonify({
+            "query": query_text,
+            "results": formatted_results,
+            "total_results": len(formatted_results),
+            "stats": search_engine.get_stats()
+        })
+    except Exception as e:
+        logger.error(f"Error in caption search: {e}")
+        return jsonify({"error": f"Search failed: {str(e)}"}), 500
+# === FAISS Management Endpoints ===
+@app.route("/api/rebuild-indices", methods=["POST"])
+@auth_required()
+def rebuild_indices():
+    """Rebuild FAISS indices from MongoDB data"""
+    try:
+        # Rebuild both indices
+        faiss_manager.rebuild_text_index()
+        faiss_manager.rebuild_visual_index()
+        # Get updated stats
+        stats = faiss_manager.get_index_stats()
+        return jsonify({
+            "message": "Indices rebuilt successfully",
+            "stats": stats
+        })
+    except Exception as e:
+        return jsonify({"error": f"Failed to rebuild indices: {str(e)}"}), 500
+@app.route("/api/index-stats", methods=["GET"])
+@auth_required()
+def get_index_stats():
+    """Get statistics about FAISS indices"""
+    try:
+        stats = faiss_manager.get_index_stats()
+        return jsonify(stats)
+    except Exception as e:
+        return jsonify({"error": f"Failed to get index stats: {str(e)}"}), 500
+# === Legacy DetectifAI Endpoints (for backward compatibility) ===
+@app.route('/api/results/<video_id>', methods=['GET'])
+@auth_required()
+def get_results(video_id):
+    """Get processing results for a video"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    status = processing_status[video_id]
+    if status['status'] != 'completed':
+        return jsonify({
+            'error': 'Processing not completed',
+            'current_status': status['status']
+        }), 400
+    return jsonify(status.get('results', {})), 200
+@app.route('/api/video/results/<video_id>', methods=['GET'])
+@auth_required()
+def get_video_results(video_id):
+    """Get video processing results with availability flags"""
+    # First check if video is in memory status
+    if video_id in processing_status:
+        status = processing_status[video_id]
+        if status['status'] != 'completed':
+            return jsonify({
+                'error': 'Processing not completed',
+                'current_status': status['status']
+            }), 400
+        # Check if status has results structure (normal processing)
+        if 'results' in status and 'output_directory' in status['results']:
+            output_dir = status['results']['output_directory']
+        else:
+            # Fallback to standard directory structure
+            output_dir = os.path.join('video_processing_outputs', video_id)
+    else:
+        # Check database for video record
+        video_doc = video_file.find_one({"video_id": video_id})
+        if not video_doc:
+            return jsonify({'error': 'Video not found'}), 404
+        output_dir = os.path.join('video_processing_outputs', video_id)
+        if not os.path.exists(output_dir):
+            return jsonify({'error': 'Video processing results not found'}), 404
+        logger.info(f"📁 Found video files on disk for {video_id}, recovering results")
+    # Check for compressed video
+    compressed_dir = os.path.join(output_dir, 'compressed')
+    compressed_video_available = False
+    compressed_video_url = None
+    if os.path.exists(compressed_dir):
+        video_files = [f for f in os.listdir(compressed_dir) if f.endswith('.mp4')]
+        if video_files:
+            compressed_video_available = True
+            compressed_video_url = f'/api/video/compressed/{video_id}'
+    # Check for keyframes
+    frames_dir = os.path.join(output_dir, 'frames')
+    keyframes_available = os.path.exists(frames_dir) and len([f for f in os.listdir(frames_dir) if f.endswith('.jpg')]) > 0
+    keyframes_count = len([f for f in os.listdir(frames_dir) if f.endswith('.jpg')]) if keyframes_available else 0
+    # Check for reports
+    reports_dir = os.path.join(output_dir, 'reports')
+    reports_available = os.path.exists(reports_dir)
+    report_files = []
+    if reports_available:
+        report_files = [f for f in os.listdir(reports_dir) if f.endswith('.json')]
+    return jsonify({
+        'video_id': video_id,
+        'compressed_video_available': compressed_video_available,
+        'compressed_video_url': compressed_video_url,
+        'keyframes_available': keyframes_available,
+        'keyframes_count': keyframes_count,
+        'keyframes_url': f'/api/video/keyframes/{video_id}',
+        'reports_available': reports_available,
+        'reports': report_files
+    }), 200
+# === File Serving Endpoints ===
+@app.route('/api/video/keyframes/<video_id>', methods=['GET'])
+@app.route('/api/keyframes/<video_id>', methods=['GET'])
+@auth_required()
+def get_keyframes(video_id):
+    """Get list of extracted keyframes with DetectifAI annotations"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    status = processing_status[video_id]
+    if status['status'] != 'completed':
+        return jsonify({'error': 'Processing not completed'}), 400
+    output_dir = status['results']['output_directory']
+    frames_dir = os.path.join(output_dir, 'frames')
+    if not os.path.exists(frames_dir):
+        return jsonify({'error': 'Frames directory not found'}), 404
+    # Load detection metadata if available
+    detection_metadata = {}
+    detection_metadata_path = os.path.join(output_dir, 'detection_metadata.json')
+    if os.path.exists(detection_metadata_path):
+        try:
+            with open(detection_metadata_path, 'r') as f:
+                detection_metadata = json.load(f)
+        except Exception as e:
+            logger.warning(f"Could not load detection metadata: {e}")
+    # Get filter parameter
+    filter_detections = request.args.get('filter_detections', 'false').lower() == 'true'
+    keyframes = []
+    frames_with_detections = {item['original_path']: item for item in detection_metadata.get('detection_summary', [])}
+    for filename in sorted(os.listdir(frames_dir)):
+        if filename.endswith('.jpg') and not filename.endswith('_annotated.jpg'):
+            # Extract timestamp from filename
+            timestamp = 0.0
+            try:
+                if '_' in filename:
+                    timestamp_part = filename.split('_')[1].replace('s', '').replace('.jpg', '')
+                    timestamp = float(timestamp_part)
+            except:
+                pass
+            frame_path = os.path.join(frames_dir, filename)
+            has_detections = frame_path in frames_with_detections
+            # Skip frames without detections if filtering is enabled
+            if filter_detections and not has_detections:
+                continue
+            keyframe_data = {
+                'filename': filename,
+                'timestamp': timestamp,
+                'url': f'/api/keyframe/{video_id}/{filename}',
+                'has_detections': has_detections
+            }
+            # Add detection details if available
+            if has_detections:
+                detection_info = frames_with_detections[frame_path]
+                keyframe_data.update({
+                    'detection_count': detection_info.get('detection_count', 0),
+                    'objects': detection_info.get('objects', []),
+                    'confidence_avg': detection_info.get('confidence_avg', 0.0)
+                })
+            keyframes.append(keyframe_data)
+    return jsonify({
+        'video_id': video_id,
+        'total_keyframes': detection_metadata.get('total_keyframes', len(keyframes)),
+        'keyframes_with_detections': detection_metadata.get('frames_with_detections', 0),
+        'keyframes': keyframes,
+        'objects_detected': detection_metadata.get('objects_detected', {}),
+        'filter_applied': filter_detections
+    }), 200
+@app.route('/api/keyframe/<video_id>/<filename>', methods=['GET'])
+@auth_required()
+def get_keyframe_image(video_id, filename):
+    """Serve keyframe image"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    status = processing_status[video_id]
+    output_dir = status['results']['output_directory']
+    frames_dir = os.path.join(output_dir, 'frames')
+    return send_from_directory(frames_dir, filename)
+@app.route('/api/video/compressed/<video_id>', methods=['GET'])
+@auth_required()
+def get_compressed_video(video_id):
+    """Serve compressed video"""
+    if video_id not in processing_status:
+        return jsonify({'error': 'Video not found'}), 404
+    status = processing_status[video_id]
+    if status['status'] != 'completed':
+        return jsonify({'error': 'Processing not completed'}), 400
+    output_dir = status['results']['output_directory']
+    compressed_dir = os.path.join(output_dir, 'compressed')
+    if not os.path.exists(compressed_dir):
+        return jsonify({'error': 'Compressed video directory not found'}), 404
+    # Find the compressed video file
+    video_files = [f for f in os.listdir(compressed_dir) if f.endswith('.mp4')]
+    if not video_files:
+        return jsonify({'error': 'Compressed video file not found'}), 404
+    # Use the first video file found (should only be one)
+    video_filename = video_files[0]
+    return send_from_directory(compressed_dir, video_filename)
+if __name__ == '__main__':
+    logger.info("Starting DetectifAI Flask API server with database integration...")
+    app.run(host='0.0.0.0', port=5000, debug=True)

DetectifAI_db/caption_search.py ADDED Viewed

	@@ -0,0 +1,209 @@

+"""
+Caption Search Module for DetectifAI
+This module provides caption-based search functionality using FAISS index
+and MongoDB for retrieving video descriptions based on text queries.
+"""
+import os
+import json
+import logging
+import numpy as np
+import faiss
+from typing import List, Dict, Optional, Tuple
+from pymongo import MongoClient
+from dotenv import load_dotenv
+# Optional import for sentence transformers
+try:
+    from sentence_transformers import SentenceTransformer
+    SENTENCE_TRANSFORMERS_AVAILABLE = True
+except ImportError:
+    SENTENCE_TRANSFORMERS_AVAILABLE = False
+    logging.warning("sentence-transformers not available - caption search will not work")
+load_dotenv()
+logger = logging.getLogger(__name__)
+# Paths for FAISS index and id map
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+FAISS_INDEX_PATH = os.path.join(BASE_DIR, "faiss_captions.index")
+FAISS_IDMAP_PATH = os.path.join(BASE_DIR, "faiss_captions_idmap.json")
+# MongoDB connection
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai")
+# Embedding model name
+EMBEDDING_MODEL = "all-mpnet-base-v2"
+EMBEDDING_DIM = 768  # Dimension for all-mpnet-base-v2
+class CaptionSearchEngine:
+    """Search engine for caption-based video search using FAISS"""
+    def __init__(self):
+        """Initialize the caption search engine"""
+        self.faiss_index = None
+        self.id_map = {}  # Maps FAISS index -> description_id
+        self.embedding_model = None
+        self.mongo_client = None
+        self.db = None
+        self.collection = None
+        # Initialize components
+        self._load_faiss_index()
+        self._load_embedding_model()
+        self._connect_mongodb()
+    def _load_faiss_index(self):
+        """Load FAISS index and id map from disk"""
+        try:
+            if os.path.exists(FAISS_INDEX_PATH):
+                self.faiss_index = faiss.read_index(FAISS_INDEX_PATH)
+                logger.info(f"✅ Loaded FAISS index from {FAISS_INDEX_PATH}")
+                logger.info(f"   Index size: {self.faiss_index.ntotal} vectors")
+            else:
+                logger.warning(f"⚠️ FAISS index not found at {FAISS_INDEX_PATH}")
+                return
+            if os.path.exists(FAISS_IDMAP_PATH):
+                with open(FAISS_IDMAP_PATH, 'r', encoding='utf-8') as f:
+                    id_map_list = json.load(f)
+                    # Convert list to dict: index -> description_id
+                    self.id_map = {i: desc_id for i, desc_id in enumerate(id_map_list)}
+                logger.info(f"✅ Loaded FAISS id map from {FAISS_IDMAP_PATH}")
+                logger.info(f"   Mapped {len(self.id_map)} indices")
+            else:
+                logger.warning(f"⚠️ FAISS id map not found at {FAISS_IDMAP_PATH}")
+        except Exception as e:
+            logger.error(f"❌ Error loading FAISS index: {e}")
+            self.faiss_index = None
+    def _load_embedding_model(self):
+        """Load sentence transformer model for generating query embeddings"""
+        if not SENTENCE_TRANSFORMERS_AVAILABLE:
+            logger.warning("⚠️ sentence-transformers not available - cannot generate embeddings")
+            return
+        try:
+            logger.info(f"Loading embedding model: {EMBEDDING_MODEL}...")
+            self.embedding_model = SentenceTransformer(EMBEDDING_MODEL)
+            logger.info(f"✅ Loaded embedding model: {EMBEDDING_MODEL}")
+        except Exception as e:
+            logger.error(f"❌ Error loading embedding model: {e}")
+            self.embedding_model = None
+    def _connect_mongodb(self):
+        """Connect to MongoDB"""
+        try:
+            self.mongo_client = MongoClient(MONGO_URI)
+            self.db = self.mongo_client.get_default_database()
+            self.collection = self.db["event_descriptions"]
+            logger.info("✅ Connected to MongoDB")
+        except Exception as e:
+            logger.error(f"❌ Error connecting to MongoDB: {e}")
+            self.mongo_client = None
+    def is_ready(self) -> bool:
+        """Check if the search engine is ready to use"""
+        return (
+            self.faiss_index is not None and
+            self.embedding_model is not None and
+            self.mongo_client is not None and
+            self.faiss_index.ntotal > 0
+        )
+    def search(self, query_text: str, top_k: int = 10, min_score: float = 0.0) -> List[Dict]:
+        """
+        Search for captions similar to the query text
+        Args:
+            query_text: Text query to search for
+            top_k: Number of results to return
+            min_score: Minimum similarity score threshold
+        Returns:
+            List of result dictionaries with caption, video reference, and similarity score
+        """
+        if not self.is_ready():
+            logger.warning("⚠️ Search engine not ready - missing components")
+            return []
+        try:
+            # Generate query embedding
+            query_embedding = self.embedding_model.encode(
+                query_text,
+                normalize_embeddings=True,
+                show_progress_bar=False
+            ).astype("float32")
+            # Reshape for FAISS (1, dim)
+            query_embedding = query_embedding.reshape(1, -1)
+            # Search FAISS index
+            k = min(top_k, self.faiss_index.ntotal)
+            scores, indices = self.faiss_index.search(query_embedding, k)
+            # Process results
+            results = []
+            for score, idx in zip(scores[0], indices[0]):
+                if idx < 0 or idx not in self.id_map:
+                    continue
+                if score < min_score:
+                    continue
+                description_id = self.id_map[idx]
+                # Fetch document from MongoDB
+                doc = self.collection.find_one(
+                    {"description_id": description_id},
+                    {"_id": 0}
+                )
+                if doc:
+                    result = {
+                        "description_id": doc.get("description_id"),
+                        "event_id": doc.get("event_id"),
+                        "caption": doc.get("caption"),
+                        "confidence": doc.get("confidence", 0.0),
+                        "similarity_score": float(score),
+                        "video_reference": doc.get("video_reference", {}),
+                        "created_at": doc.get("created_at").isoformat() if doc.get("created_at") else None
+                    }
+                    results.append(result)
+            logger.info(f"✅ Found {len(results)} results for query: '{query_text[:50]}...'")
+            return results
+        except Exception as e:
+            logger.error(f"❌ Error during search: {e}")
+            return []
+    def get_stats(self) -> Dict:
+        """Get statistics about the search engine"""
+        return {
+            "faiss_index_loaded": self.faiss_index is not None,
+            "faiss_index_size": self.faiss_index.ntotal if self.faiss_index else 0,
+            "id_map_size": len(self.id_map),
+            "embedding_model_loaded": self.embedding_model is not None,
+            "embedding_model": EMBEDDING_MODEL if self.embedding_model else None,
+            "embedding_dim": EMBEDDING_DIM,
+            "mongodb_connected": self.mongo_client is not None,
+            "ready": self.is_ready()
+        }
+# Global instance
+_caption_search_engine = None
+def get_caption_search_engine() -> CaptionSearchEngine:
+    """Get the global caption search engine instance"""
+    global _caption_search_engine
+    if _caption_search_engine is None:
+        _caption_search_engine = CaptionSearchEngine()
+    return _caption_search_engine

DetectifAI_db/check_minio.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from minio import Minio
+from dotenv import load_dotenv
+import os
+# Load environment variables
+load_dotenv()
+# MinIO client setup
+client = Minio(
+    os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com"),
+    access_key=os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001"),
+    secret_key=os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA"),
+    secure=os.getenv("MINIO_SECURE", "true").lower() == "true",
+    region=os.getenv("MINIO_REGION", "eu-central-003")
+)
+# Check if bucket exists
+bucket_name = "detectifai-videos"
+found = client.bucket_exists(bucket_name)
+print(f"Bucket '{bucket_name}' exists: {found}")
+if found:
+    print("\nListing objects in bucket:")
+    objects = client.list_objects(bucket_name, recursive=True)
+    for obj in objects:
+        print(f"- {obj.object_name} (size: {obj.size} bytes)")

DetectifAI_db/check_video_storage.py ADDED Viewed

	@@ -0,0 +1,191 @@

+"""
+Utility script to validate and fix video storage
+"""
+import os
+import sys
+from datetime import datetime
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from database.config import DatabaseManager
+from database.models import VideoFileModel
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def check_video_storage():
+    """Check and validate video storage in MongoDB and MinIO"""
+    db_manager = DatabaseManager()
+    # 1. Check MongoDB video records
+    logger.info("Checking MongoDB video records...")
+    video_collection = db_manager.db.video_file
+    videos = list(video_collection.find({}))
+    logger.info(f"Found {len(videos)} video records in MongoDB")
+    # 2. Check MinIO storage
+    logger.info("\nChecking MinIO storage...")
+    try:
+        # Check video bucket
+        video_objects = list(db_manager.minio_client.list_objects(
+            db_manager.config.minio_video_bucket,
+            recursive=True
+        ))
+        logger.info(f"Found {len(video_objects)} objects in video bucket")
+        # Check keyframe bucket
+        keyframe_objects = list(db_manager.minio_client.list_objects(
+            db_manager.config.minio_keyframe_bucket,
+            recursive=True
+        ))
+        logger.info(f"Found {len(keyframe_objects)} objects in keyframe bucket")
+        # Map MinIO objects to video IDs
+        minio_video_ids = set()
+        minio_keyframe_video_ids = set()
+        for obj in video_objects:
+            parts = obj.object_name.split('/')
+            if len(parts) > 1:
+                minio_video_ids.add(parts[1])  # original/{video_id}/video.mp4
+        for obj in keyframe_objects:
+            parts = obj.object_name.split('/')
+            if len(parts) > 0:
+                minio_keyframe_video_ids.add(parts[0])  # {video_id}/keyframes/...
+        # 3. Cross-reference and find inconsistencies
+        logger.info("\nCross-referencing storage...")
+        mongo_video_ids = {str(v['video_id']) for v in videos}
+        # Find mismatches
+        missing_in_minio = mongo_video_ids - minio_video_ids
+        missing_keyframes = mongo_video_ids - minio_keyframe_video_ids
+        orphaned_in_minio = minio_video_ids - mongo_video_ids
+        if missing_in_minio:
+            logger.warning(f"\n⚠️ Found {len(missing_in_minio)} videos missing in MinIO:")
+            for vid in missing_in_minio:
+                logger.warning(f"- {vid}")
+        if missing_keyframes:
+            logger.warning(f"\n⚠️ Found {len(missing_keyframes)} videos missing keyframes:")
+            for vid in missing_keyframes:
+                logger.warning(f"- {vid}")
+        if orphaned_in_minio:
+            logger.warning(f"\n⚠️ Found {len(orphaned_in_minio)} orphaned videos in MinIO:")
+            for vid in orphaned_in_minio:
+                logger.warning(f"- {vid}")
+        # 4. Check MongoDB metadata completeness
+        logger.info("\nChecking metadata completeness...")
+        incomplete_metadata = []
+        for video in videos:
+            if not video.get('meta_data'):
+                incomplete_metadata.append(video['video_id'])
+                continue
+            meta = video['meta_data']
+            required_fields = ['filename', 'processing_status', 'upload_date']
+            missing_fields = [f for f in required_fields if f not in meta]
+            if missing_fields:
+                incomplete_metadata.append({
+                    'video_id': video['video_id'],
+                    'missing_fields': missing_fields
+                })
+        if incomplete_metadata:
+            logger.warning(f"\n⚠️ Found {len(incomplete_metadata)} videos with incomplete metadata:")
+            for item in incomplete_metadata:
+                if isinstance(item, dict):
+                    logger.warning(f"- {item['video_id']} (missing: {', '.join(item['missing_fields'])})")
+                else:
+                    logger.warning(f"- {item} (missing entire meta_data object)")
+        return {
+            'mongodb_videos': len(videos),
+            'minio_videos': len(video_objects),
+            'minio_keyframes': len(keyframe_objects),
+            'missing_in_minio': list(missing_in_minio),
+            'missing_keyframes': list(missing_keyframes),
+            'orphaned_in_minio': list(orphaned_in_minio),
+            'incomplete_metadata': incomplete_metadata
+        }
+    except Exception as e:
+        logger.error(f"Error checking storage: {e}")
+        raise
+def fix_metadata():
+    """Fix incomplete metadata in MongoDB records"""
+    db_manager = DatabaseManager()
+    video_collection = db_manager.db.video_file
+    logger.info("Fixing incomplete metadata...")
+    fixed_count = 0
+    for video in video_collection.find({}):
+        needs_update = False
+        update_fields = {}
+        # Ensure meta_data exists
+        if 'meta_data' not in video:
+            update_fields['meta_data'] = {
+                'processing_status': 'unknown',
+                'upload_date': video.get('upload_date', datetime.utcnow()),
+                'filename': f"video_{video['video_id']}.mp4"
+            }
+            needs_update = True
+        else:
+            meta = video['meta_data']
+            # Check and fix required fields
+            if 'processing_status' not in meta:
+                meta['processing_status'] = 'unknown'
+                needs_update = True
+            if 'upload_date' not in meta and 'upload_date' in video:
+                meta['upload_date'] = video['upload_date']
+                needs_update = True
+            if 'filename' not in meta:
+                meta['filename'] = f"video_{video['video_id']}.mp4"
+                needs_update = True
+            if needs_update:
+                update_fields['meta_data'] = meta
+        # Apply updates if needed
+        if needs_update:
+            try:
+                video_collection.update_one(
+                    {'_id': video['_id']},
+                    {'$set': update_fields}
+                )
+                fixed_count += 1
+                logger.info(f"Fixed metadata for video {video['video_id']}")
+            except Exception as e:
+                logger.error(f"Failed to fix metadata for {video['video_id']}: {e}")
+    logger.info(f"\n✅ Fixed metadata for {fixed_count} videos")
+    return fixed_count
+if __name__ == "__main__":
+    try:
+        # First check storage
+        results = check_video_storage()
+        # If there are metadata issues, fix them
+        if results['incomplete_metadata']:
+            if input("\nFix incomplete metadata? (y/n): ").lower() == 'y':
+                fixed = fix_metadata()
+                print(f"\nFixed {fixed} video records")
+        print("\nStorage check complete!")
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)

DetectifAI_db/create_admin.py ADDED Viewed

	@@ -0,0 +1,120 @@

+#!/usr/bin/env python3
+"""
+Script to create an admin user in the DetectifAI database
+"""
+from pymongo import MongoClient
+from uuid import uuid4
+from datetime import datetime, timezone
+import bcrypt
+import os
+import sys
+from dotenv import load_dotenv
+load_dotenv()
+def create_admin_user():
+    """Create an admin user in the database"""
+    # Get MongoDB connection
+    mongo_uri = os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai")
+    client = MongoClient(mongo_uri)
+    db = client.get_default_database()
+    users = db.users
+    # Admin credentials (change these!)
+    admin_email = "admin@detectifai.com"
+    admin_password = "admin123"  # ⚠️ CHANGE THIS PASSWORD!
+    admin_username = "admin"
+    # Check if admin already exists
+    existing_admin = users.find_one({"email": admin_email})
+    if existing_admin:
+        print(f"⚠️  Admin user with email '{admin_email}' already exists!")
+        update = input("Do you want to update the password? (y/n): ").lower().strip()
+        if update == 'y':
+            new_password = input("Enter new password: ").strip()
+            if not new_password:
+                print("❌ Password cannot be empty")
+                sys.exit(1)
+            # Hash new password
+            password_hash = bcrypt.hashpw(new_password.encode('utf-8'), bcrypt.gensalt()).decode('utf-8')
+            # Update admin user
+            users.update_one(
+                {"email": admin_email},
+                {
+                    "$set": {
+                        "password_hash": password_hash,
+                        "password": new_password,  # For Flask backend compatibility
+                        "role": "admin",
+                        "is_active": True,
+                        "updated_at": datetime.now(timezone.utc)
+                    }
+                }
+            )
+            print(f"✅ Admin password updated successfully!")
+            print(f"   Email: {admin_email}")
+            print(f"   Password: {new_password}")
+        else:
+            print("ℹ️  Keeping existing admin user")
+        client.close()
+        return
+    # Create new admin user
+    print(f"Creating admin user...")
+    print(f"   Email: {admin_email}")
+    print(f"   Username: {admin_username}")
+    # Hash password
+    password_hash = bcrypt.hashpw(admin_password.encode('utf-8'), bcrypt.gensalt()).decode('utf-8')
+    admin_user = {
+        "user_id": str(uuid4()),
+        "username": admin_username,
+        "email": admin_email,
+        "password_hash": password_hash,
+        "password": admin_password,  # For Flask backend compatibility (plain text - TODO: remove in production)
+        "role": "admin",
+        "is_active": True,
+        "profile_data": {},
+        "created_at": datetime.now(timezone.utc),
+        "updated_at": datetime.now(timezone.utc),
+        "last_login": None
+    }
+    try:
+        users.insert_one(admin_user)
+        print("\n✅ Admin user created successfully!")
+        print(f"\n📋 Login Credentials:")
+        print(f"   Email: {admin_email}")
+        print(f"   Password: {admin_password}")
+        print(f"\n⚠️  IMPORTANT: Change this password after first login!")
+        print(f"\n🌐 Access the admin panel at: http://localhost:3000/admin/signin")
+    except Exception as e:
+        print(f"❌ Error creating admin user: {e}")
+        sys.exit(1)
+    finally:
+        client.close()
+if __name__ == "__main__":
+    print("=" * 60)
+    print("DetectifAI - Admin User Creation Script")
+    print("=" * 60)
+    print()
+    # Check if MONGO_URI is set
+    if not os.getenv("MONGO_URI"):
+        print("❌ Error: MONGO_URI environment variable not set")
+        print("Please create a .env file with your MongoDB connection string")
+        print("Example: MONGO_URI=mongodb://localhost:27017/detectifai")
+        sys.exit(1)
+    create_admin_user()
+    print("\n" + "=" * 60)
+    print("✅ Script completed!")
+    print("=" * 60)

DetectifAI_db/database_seed.py ADDED Viewed

	@@ -0,0 +1,212 @@

+from pymongo import MongoClient
+from uuid import uuid4
+from dotenv import load_dotenv
+from datetime import datetime, timezone
+import os
+load_dotenv()
+client = MongoClient(os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai"))
+db = client.get_default_database()
+users = db.users
+video_files = db.video_files
+event_descriptions = db.event_descriptions
+subscription_plans = db.subscription_plans
+events = db.events
+# Add sample user if not exists
+sample_user = {
+    "user_id": str(uuid4()),
+    "username": "testuser",
+    "email": "user@detectifai.test",
+    "password": "userpass",
+    "role": "user",
+    "created_at": datetime.now(timezone.utc),
+    "updated_at": datetime.now(timezone.utc),
+    "last_login": None
+}
+if users.count_documents({"email": "user@detectifai.test"}) == 0:
+    users.insert_one(sample_user)
+    print("Added sample user: user@detectifai.test / userpass")
+else:
+    print("Sample user already exists")
+# Add sample subscription plans
+sample_plans = [
+    {
+        "plan_id": str(uuid4()),
+        "plan_name": "Basic",
+        "description": "Basic surveillance features",
+        "price": 9.99,
+        "features": "basic_ai,email_support",
+        "storage_limit": 10,
+        "is_active": True
+    },
+    {
+        "plan_id": str(uuid4()),
+        "plan_name": "Pro",
+        "description": "Advanced AI features with priority support",
+        "price": 29.99,
+        "features": "advanced_ai,priority_support,face_recognition",
+        "storage_limit": 100,
+        "is_active": True
+    },
+    {
+        "plan_id": str(uuid4()),
+        "plan_name": "Enterprise",
+        "description": "Full enterprise features with 24/7 support",
+        "price": 99.99,
+        "features": "premium_ai,24_7_support,face_recognition,custom_integrations",
+        "storage_limit": 1000,
+        "is_active": True
+    }
+]
+for plan in sample_plans:
+    if subscription_plans.count_documents({"plan_id": plan["plan_id"]}) == 0:
+        subscription_plans.insert_one(plan)
+        print(f"Added subscription plan: {plan['plan_name']}")
+    else:
+        print(f"Subscription plan {plan['plan_name']} already exists")
+# Get existing video files to add sample events and descriptions
+existing_videos = list(video_files.find({}))
+if not existing_videos:
+    print("No video files found. Upload some videos first, then run this script.")
+else:
+    # Add sample events and descriptions to the first video
+    video = existing_videos[0]
+    video_id = video["video_id"]
+    # Create sample events
+    sample_events = [
+        {
+            "event_id": str(uuid4()),
+            "video_id": video_id,
+            "event_type": "person_detection",
+            "confidence_score": 0.95,
+            "start_timestamp_ms": 0,
+            "end_timestamp_ms": 5000,
+            "bounding_boxes": {"x": 100, "y": 150, "width": 200, "height": 300},
+            "visual_embedding": [],
+            "is_verified": False,
+            "is_false_positive": False,
+            "verified_by": None,
+            "verified_at": None
+        },
+        {
+            "event_id": str(uuid4()),
+            "video_id": video_id,
+            "event_type": "object_detection",
+            "confidence_score": 0.87,
+            "start_timestamp_ms": 5200,
+            "end_timestamp_ms": 12800,
+            "bounding_boxes": {"x": 300, "y": 200, "width": 150, "height": 100},
+            "visual_embedding": [],
+            "is_verified": False,
+            "is_false_positive": False,
+            "verified_by": None,
+            "verified_at": None
+        }
+    ]
+    # Insert events
+    for event in sample_events:
+        if events.count_documents({"event_id": event["event_id"]}) == 0:
+            events.insert_one(event)
+            print(f"Added event: {event['event_type']}")
+    # Add sample descriptions for the events
+    sample_descriptions = [
+        {
+            "description_id": str(uuid4()),
+            "event_id": sample_events[0]["event_id"],
+            "caption": "Person walking into the room carrying a briefcase",
+            "text_embedding": [],
+            "confidence": 0.92,
+            "created_at": datetime.now(timezone.utc),
+            "updated_at": datetime.now(timezone.utc)
+        },
+        {
+            "description_id": str(uuid4()),
+            "event_id": sample_events[1]["event_id"],
+            "caption": "Individual sits down at desk and opens laptop computer",
+            "text_embedding": [],
+            "confidence": 0.88,
+            "created_at": datetime.now(timezone.utc),
+            "updated_at": datetime.now(timezone.utc)
+        }
+    ]
+    # Insert descriptions
+    for desc in sample_descriptions:
+        if event_descriptions.count_documents({"description_id": desc["description_id"]}) == 0:
+            event_descriptions.insert_one(desc)
+            print(f"Added description: {desc['caption'][:50]}...")
+    # If there are more videos, add different events to the second one
+    if len(existing_videos) > 1:
+        video2 = existing_videos[1]
+        video2_id = video2["video_id"]
+        sample_events2 = [
+            {
+                "event_id": str(uuid4()),
+                "video_id": video2_id,
+                "event_type": "security_patrol",
+                "confidence_score": 0.93,
+                "start_timestamp_ms": 2100,
+                "end_timestamp_ms": 15400,
+                "bounding_boxes": {"x": 50, "y": 100, "width": 180, "height": 250},
+                "visual_embedding": [],
+                "is_verified": False,
+                "is_false_positive": False,
+                "verified_by": None,
+                "verified_at": None
+            }
+        ]
+        for event in sample_events2:
+            if events.count_documents({"event_id": event["event_id"]}) == 0:
+                events.insert_one(event)
+                print(f"Added event: {event['event_type']}")
+        sample_descriptions2 = [
+            {
+                "description_id": str(uuid4()),
+                "event_id": sample_events2[0]["event_id"],
+                "caption": "Security guard patrolling the hallway with flashlight",
+                "text_embedding": [],
+                "confidence": 0.91,
+                "created_at": datetime.now(timezone.utc),
+                "updated_at": datetime.now(timezone.utc)
+            }
+        ]
+        for desc in sample_descriptions2:
+            if event_descriptions.count_documents({"description_id": desc["description_id"]}) == 0:
+                event_descriptions.insert_one(desc)
+                print(f"Added description: {desc['caption'][:50]}...")
+print("\n--- Database Seeding Complete ---")
+print("You can now test search functionality with terms like:")
+print("- 'briefcase' or 'laptop'")
+print("- 'security' or 'guard'")
+print("- 'person' or 'detection'")
+print("- 'desk' or 'computer'")
+print("- 'patrol' or 'hallway'")
+# Show summary
+total_videos = video_files.count_documents({})
+total_events = events.count_documents({})
+total_descriptions = event_descriptions.count_documents({})
+total_users = users.count_documents({})
+total_plans = subscription_plans.count_documents({})
+print(f"\nDatabase Summary:")
+print(f"Total users: {total_users}")
+print(f"Total subscription plans: {total_plans}")
+print(f"Total video files: {total_videos}")
+print(f"Total events: {total_events}")
+print(f"Total event descriptions: {total_descriptions}")

DetectifAI_db/database_setup.py ADDED Viewed

	@@ -0,0 +1,375 @@

+from pymongo import MongoClient, ASCENDING
+import os
+from dotenv import load_dotenv
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+client = MongoClient(MONGO_URI)
+db = client.get_default_database()
+def create_collection_if_not_exists(name, validator=None, indexes=None):
+    """Create collection if it doesn't exist, otherwise skip"""
+    try:
+        if validator:
+            db.create_collection(name, validator=validator)
+        else:
+            db.create_collection(name)
+        print(f"Created collection: {name}")
+    except Exception as e:
+        if "already exists" in str(e):
+            print(f"Collection {name} already exists, skipping...")
+        else:
+            print(f"Error creating collection {name}: {e}")
+            return False
+    # Create indexes if specified
+    if indexes:
+        for index in indexes:
+            try:
+                if isinstance(index, tuple):
+                    # Index with options
+                    db[name].create_index(index[0], **index[1])
+                else:
+                    # Simple index
+                    db[name].create_index(index)
+                print(f"  Created index on {name}")
+            except Exception as e:
+                if "already exists" in str(e) or "duplicate key" in str(e):
+                    print(f"  Index on {name} already exists")
+                else:
+                    print(f"  Error creating index on {name}: {e}")
+    return True
+# === ADMIN ===
+create_collection_if_not_exists("admin", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["admin_id", "username", "email", "password"],
+        "properties": {
+            "admin_id": {"bsonType": "string"},
+            "username": {"bsonType": "string"},
+            "email": {"bsonType": "string"},
+            "password": {"bsonType": "string"},
+            "role": {"bsonType": "string"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"},
+            "last_login": {"bsonType": ["date", "null"]}
+        }
+    }
+}, indexes=[([("email", ASCENDING)], {"unique": True}), "username"])
+# === USERS ===
+create_collection_if_not_exists("users", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["user_id", "email"],
+        "properties": {
+            "user_id": {"bsonType": "string"},
+            "username": {"bsonType": "string"},
+            "email": {"bsonType": "string"},
+            "password_hash": {"bsonType": "string"},
+            "role": {"bsonType": "string"},
+            "profile_data": {"bsonType": "object"},
+            "is_active": {"bsonType": "bool"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"},
+            "last_login": {"bsonType": ["date", "null"]}
+        }
+    }
+}, indexes=[([("email", ASCENDING)], {"unique": True}), "username"])
+# === VIDEO FILES ===
+create_collection_if_not_exists("video_files", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["video_id", "user_id", "file_path"],
+        "properties": {
+            "video_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "file_path": {"bsonType": "string"},
+            "minio_object_key": {"bsonType": "string"},
+            "minio_bucket": {"bsonType": "string"},
+            "codec": {"bsonType": "string"},
+            "fps": {"bsonType": "double"},
+            "upload_date": {"bsonType": "date"},
+            "duration_secs": {"bsonType": "int"},
+            "file_size_bytes": {"bsonType": "long"},
+            "meta_data": {"bsonType": "object"}
+        }
+    }
+}, indexes=["user_id", "upload_date"])
+# === EVENTS ===
+create_collection_if_not_exists("events", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["event_id", "video_id", "start_timestamp_ms", "end_timestamp_ms"],
+        "properties": {
+            "event_id": {"bsonType": "string"},
+            "video_id": {"bsonType": "string"},
+            "start_timestamp_ms": {"bsonType": "long"},
+            "end_timestamp_ms": {"bsonType": "long"},
+            "confidence_score": {"bsonType": "double"},
+            "is_verified": {"bsonType": "bool"},
+            "is_false_positive": {"bsonType": "bool"},
+            "verified_at": {"bsonType": ["date", "null"]},
+            "verified_by": {"bsonType": ["string", "null"]},
+            "visual_embedding": {"bsonType": "array"},
+            "bounding_boxes": {"bsonType": "object"},
+            "event_type": {"bsonType": "string"}
+        }
+    }
+}, indexes=["video_id", "event_type", "is_verified"])
+# === EVENT CLIPS ===
+create_collection_if_not_exists("event_clips", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["clip_id", "event_id", "clip_path"],
+        "properties": {
+            "clip_id": {"bsonType": "string"},
+            "event_id": {"bsonType": "string"},
+            "clip_path": {"bsonType": "string"},
+            "thumbnail_path": {"bsonType": "string"},
+            "minio_object_key": {"bsonType": "string"},
+            "minio_bucket": {"bsonType": "string"},
+            "duration_ms": {"bsonType": "long"},
+            "extracted_at": {"bsonType": "date"},
+            "file_size_bytes": {"bsonType": "long"}
+        }
+    }
+}, indexes=["event_id"])
+# === DETECTED FACES ===
+create_collection_if_not_exists("detected_faces", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["face_id", "event_id", "detected_at"],
+        "properties": {
+            "face_id": {"bsonType": "string"},
+            "event_id": {"bsonType": "string"},
+            "detected_at": {"bsonType": "date"},
+            "confidence_score": {"bsonType": "double"},
+            "face_embedding": {"bsonType": "array"},
+            "minio_object_key": {"bsonType": "string"},
+            "minio_bucket": {"bsonType": "string"},
+            "face_image_path": {"bsonType": "string"},
+            "bounding_boxes": {"bsonType": "object"}
+        }
+    }
+}, indexes=["event_id", "detected_at"])
+# === FACE MATCHES ===
+create_collection_if_not_exists("face_matches", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["match_id", "face_id_1", "face_id_2", "similarity_score"],
+        "properties": {
+            "match_id": {"bsonType": "string"},
+            "face_id_1": {"bsonType": "string"},
+            "face_id_2": {"bsonType": "string"},
+            "similarity_score": {"bsonType": "double"},
+            "matched_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["face_id_1", "face_id_2", "similarity_score"])
+# === EVENT DESCRIPTIONS ===
+create_collection_if_not_exists("event_descriptions", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["description_id", "event_id", "text_embedding"],
+        "properties": {
+            "description_id": {"bsonType": "string"},
+            "event_id": {"bsonType": "string"},
+            "text_embedding": {"bsonType": "array"},
+            "caption": {"bsonType": "string"},
+            "confidence": {"bsonType": "double"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["event_id", "created_at"])
+# === EVENT CAPTIONS ===
+create_collection_if_not_exists("event_captions", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["description_id", "description"],
+        "properties": {
+            "description_id": {"bsonType": "string"},
+            "description": {"bsonType": "string"}
+        }
+    }
+}, indexes=["description_id"])
+# === QUERY ===
+create_collection_if_not_exists("query", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["query_id", "user_id", "query_text"],
+        "properties": {
+            "query_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "query_text": {"bsonType": "string"},
+            "query_embedding": {"bsonType": "array"},
+            "executed_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["user_id", "executed_at"])
+# === QUERY RESULT ===
+create_collection_if_not_exists("query_result", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["result_id", "query_id", "event_id"],
+        "properties": {
+            "result_id": {"bsonType": "string"},
+            "query_id": {"bsonType": "string"},
+            "event_id": {"bsonType": "string"},
+            "relevance_score": {"bsonType": "double"},
+            "match_details": {"bsonType": "object"},
+            "returned_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["query_id", "event_id", "relevance_score"])
+# === SUBSCRIPTION PLANS ===
+create_collection_if_not_exists("subscription_plans", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["plan_id", "plan_name", "price"],
+        "properties": {
+            "plan_id": {"bsonType": "string"},
+            "plan_name": {"bsonType": "string"},
+            "description": {"bsonType": "string"},
+            "price": {"bsonType": "decimal"},
+            "features": {"bsonType": "string"},
+            "storage_limit": {"bsonType": "int"},
+            "is_active": {"bsonType": "bool"},
+            "stripe_product_id": {"bsonType": "string"},
+            "stripe_price_ids": {"bsonType": "object"},
+            "billing_periods": {"bsonType": "array"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=[([("plan_id", ASCENDING)], {"unique": True}), "is_active", "stripe_product_id"])
+# === USER SUBSCRIPTIONS ===
+create_collection_if_not_exists("user_subscriptions", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["subscription_id", "user_id", "plan_id"],
+        "properties": {
+            "subscription_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "plan_id": {"bsonType": "string"},
+            "start_date": {"bsonType": "date"},
+            "end_date": {"bsonType": "date"},
+            "stripe_customer_id": {"bsonType": "string"},
+            "stripe_subscription_id": {"bsonType": "string"},
+            "billing_period": {"bsonType": "string"},
+            "status": {"bsonType": "string"},
+            "current_period_start": {"bsonType": "date"},
+            "current_period_end": {"bsonType": "date"},
+            "cancel_at_period_end": {"bsonType": "bool"},
+            "created_at": {"bsonType": "date"},
+            "updated_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["user_id", "plan_id", "start_date", "stripe_customer_id", "stripe_subscription_id", "status"])
+# === SUBSCRIPTION EVENTS === (NEW - for audit trail)
+create_collection_if_not_exists("subscription_events", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["event_id", "subscription_id", "event_type"],
+        "properties": {
+            "event_id": {"bsonType": "string"},
+            "subscription_id": {"bsonType": "string"},
+            "event_type": {"bsonType": "string"},
+            "stripe_event_id": {"bsonType": "string"},
+            "event_data": {"bsonType": "object"},
+            "created_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["subscription_id", "event_type", "created_at", "stripe_event_id"])
+# === PAYMENT HISTORY === (NEW - for transaction records)
+create_collection_if_not_exists("payment_history", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["payment_id", "user_id", "amount"],
+        "properties": {
+            "payment_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "stripe_payment_intent_id": {"bsonType": "string"},
+            "amount": {"bsonType": "double"},
+            "currency": {"bsonType": "string"},
+            "status": {"bsonType": "string"},
+            "payment_method": {"bsonType": "string"},
+            "created_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["user_id", "created_at", "status", "stripe_payment_intent_id"])
+# === SUBSCRIPTION USAGE === (NEW - for analytics and limits)
+create_collection_if_not_exists("subscription_usage", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["usage_id", "user_id", "usage_type"],
+        "properties": {
+            "usage_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "usage_type": {"bsonType": "string"},
+            "usage_value": {"bsonType": "double"},
+            "usage_date": {"bsonType": "date"},
+            "created_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=["user_id", "usage_type", "usage_date"])
+# === USER SESSIONS ===
+create_collection_if_not_exists("user_sessions", validator={
+    "$jsonSchema": {
+        "bsonType": "object",
+        "required": ["session_id", "user_id", "session_token", "expires_at"],
+        "properties": {
+            "session_id": {"bsonType": "string"},
+            "user_id": {"bsonType": "string"},
+            "session_token": {"bsonType": "string"},
+            "expires_at": {"bsonType": "date"},
+            "ip_address": {"bsonType": "string"},
+            "user_agent": {"bsonType": "string"},
+            "created_at": {"bsonType": "date"}
+        }
+    }
+}, indexes=[
+    ([("session_token", ASCENDING)], {"unique": True}),
+    "user_id",
+    "expires_at"
+])
+print("\nDatabase schema setup completed successfully.")
+print("All collections are ready with validation and indexes.")

DetectifAI_db/env.example ADDED Viewed

	@@ -0,0 +1,19 @@

+# MongoDB Configuration
+MONGO_URI=mongodb://localhost:27017/detectifai
+# S3-compatible Storage (Backblaze B2)
+MINIO_ENDPOINT=s3.eu-central-003.backblazeb2.com
+MINIO_ACCESS_KEY=your-b2-key-id
+MINIO_SECRET_KEY=your-b2-application-key
+MINIO_VIDEO_BUCKET=detectifai-videos
+MINIO_KEYFRAME_BUCKET=detectifai-keyframes
+MINIO_REPORTS_BUCKET=detectifai-reports
+MINIO_SECURE=true
+MINIO_REGION=eu-central-003
+# JWT Configuration
+JWT_SECRET=your-super-secret-jwt-key-here
+# Flask Configuration
+FLASK_ENV=development
+FLASK_DEBUG=True

DetectifAI_db/faiss_captions.index ADDED Viewed

Binary file (30.8 kB). View file

DetectifAI_db/faiss_captions_idmap.json ADDED Viewed

	@@ -0,0 +1,12 @@

+[
+  "desc_fe5f4141f350",
+  "desc_6683c8f65ca9",
+  "desc_93f7c560626c",
+  "desc_02ac022c7621",
+  "desc_9fc4ce829b64",
+  "desc_3b45f7543394",
+  "desc_49df9ce76beb",
+  "desc_e119f53298d0",
+  "desc_e6a2154fb826",
+  "desc_3e3ca6f4637d"
+]

DetectifAI_db/migrate_stripe_integration.py ADDED Viewed

	@@ -0,0 +1,209 @@

+"""
+Database Migration Script: Add Stripe Integration to Subscription Plans
+This script updates existing subscription_plans and prepares the database
+for Stripe payment integration.
+Run this script ONCE after updating the database schema.
+"""
+from pymongo import MongoClient
+from datetime import datetime
+import os
+from dotenv import load_dotenv
+from uuid import uuid4
+load_dotenv()
+# Connect to MongoDB
+MONGO_URI = os.getenv("MONGO_URI")
+client = MongoClient(MONGO_URI)
+db = client.get_default_database()
+subscription_plans = db.subscription_plans
+user_subscriptions = db.user_subscriptions
+print("🔄 Starting Stripe integration migration...")
+# ========================================
+# Step 1: Update existing subscription plans with Stripe data
+# ========================================
+print("\n📋 Step 1: Updating subscription plans with Stripe data...")
+# DetectifAI Basic Plan
+basic_plan = subscription_plans.find_one({"plan_name": "Basic"})
+if basic_plan:
+    subscription_plans.update_one(
+        {"_id": basic_plan["_id"]},
+        {
+            "$set": {
+                "stripe_product_id": "prod_TqIuL76gNG4hxu",
+                "stripe_price_ids": {
+                    "monthly": "price_1SscIsBC7V4mGo7rR4T0YZIc",
+                    "yearly": "price_1SscMQBC7V4mGo7rigJ4bFFE"
+                },
+                "billing_periods": ["monthly", "yearly"],
+                "price": 19.00,
+                "description": "Essential AI-powered security monitoring",
+                "features": "single_video,object_detection,face_recognition,7day_history,dashboard,basic_reports",
+                "updated_at": datetime.utcnow()
+            }
+        }
+    )
+    print("✅ Updated Basic plan with Stripe integration")
+else:
+    # Create Basic plan if it doesn't exist
+    basic_plan_data = {
+        "plan_id": str(uuid4()),
+        "plan_name": "Basic",
+        "description": "Essential AI-powered security monitoring",
+        "price": 19.00,
+        "features": "single_video,object_detection,face_recognition,7day_history,dashboard,basic_reports",
+        "storage_limit": 50,
+        "is_active": True,
+        "stripe_product_id": "prod_TqIuL76gNG4hxu",
+        "stripe_price_ids": {
+            "monthly": "price_1SscIsBC7V4mGo7rR4T0YZIc",
+            "yearly": "price_1SscMQBC7V4mGo7rigJ4bFFE"
+        },
+        "billing_periods": ["monthly", "yearly"],
+        "created_at": datetime.utcnow(),
+        "updated_at": datetime.utcnow()
+    }
+    subscription_plans.insert_one(basic_plan_data)
+    print("✅ Created Basic plan with Stripe integration")
+# DetectifAI Pro Plan
+pro_plan = subscription_plans.find_one({"plan_name": "Pro"})
+if pro_plan:
+    subscription_plans.update_one(
+        {"_id": pro_plan["_id"]},
+        {
+            "$set": {
+                "stripe_product_id": "prod_TqIyhR08zDDa2B",
+                "stripe_price_ids": {
+                    "monthly": "price_1SscMwBC7V4mGo7rmmRPTTOz",
+                    "yearly": "price_1SscNXBC7V4mGo7rdGgYAYRs"
+                },
+                "billing_periods": ["monthly", "yearly"],
+                "price": 49.00,
+                "description": "Advanced security intelligence with extended capabilities",
+                "features": "everything_basic,30day_history,behavior_analysis,person_tracking,nlp_search,image_search,custom_reports,priority_queue",
+                "updated_at": datetime.utcnow()
+            }
+        }
+    )
+    print("✅ Updated Pro plan with Stripe integration")
+else:
+    # Create Pro plan if it doesn't exist
+    pro_plan_data = {
+        "plan_id": str(uuid4()),
+        "plan_name": "Pro",
+        "description": "Advanced security intelligence with extended capabilities",
+        "price": 49.00,
+        "features": "everything_basic,30day_history,behavior_analysis,person_tracking,nlp_search,image_search,custom_reports,priority_queue",
+        "storage_limit": 200,
+        "is_active": True,
+        "stripe_product_id": "prod_TqIyhR08zDDa2B",
+        "stripe_price_ids": {
+            "monthly": "price_1SscMwBC7V4mGo7rmmRPTTOz",
+            "yearly": "price_1SscNXBC7V4mGo7rdGgYAYRs"
+        },
+        "billing_periods": ["monthly", "yearly"],
+        "created_at": datetime.utcnow(),
+        "updated_at": datetime.utcnow()
+    }
+    subscription_plans.insert_one(pro_plan_data)
+    print("✅ Created Pro plan with Stripe integration")
+# Remove Enterprise plan if it exists (not part of current offering)
+enterprise_plan = subscription_plans.find_one({"plan_name": "Enterprise"})
+if enterprise_plan:
+    subscription_plans.update_one(
+        {"_id": enterprise_plan["_id"]},
+        {"$set": {"is_active": False, "updated_at": datetime.utcnow()}}
+    )
+    print("✅ Deactivated Enterprise plan (not in current offering)")
+# ========================================
+# Step 2: Add Stripe fields to existing user subscriptions
+# ========================================
+print("\n📋 Step 2: Adding Stripe fields to existing user subscriptions...")
+existing_subscriptions = user_subscriptions.find({})
+updated_count = 0
+for sub in existing_subscriptions:
+    # Check if Stripe fields already exist
+    if "stripe_customer_id" not in sub:
+        user_subscriptions.update_one(
+            {"_id": sub["_id"]},
+            {
+                "$set": {
+                    "stripe_customer_id": None,
+                    "stripe_subscription_id": None,
+                    "billing_period": "monthly",
+                    "status": "active",
+                    "current_period_start": sub.get("start_date"),
+                    "current_period_end": sub.get("end_date"),
+                    "cancel_at_period_end": False,
+                    "updated_at": datetime.utcnow()
+                }
+            }
+        )
+        updated_count += 1
+if updated_count > 0:
+    print(f"✅ Updated {updated_count} existing subscriptions with Stripe fields")
+else:
+    print("✅ No existing subscriptions to update")
+# ========================================
+# Step 3: Verify collections exist
+# ========================================
+print("\n📋 Step 3: Verifying new collections...")
+collections_to_check = [
+    "subscription_events",
+    "payment_history",
+    "subscription_usage"
+]
+for collection_name in collections_to_check:
+    if collection_name in db.list_collection_names():
+        count = db[collection_name].count_documents({})
+        print(f"✅ Collection '{collection_name}' exists (documents: {count})")
+    else:
+        print(f"⚠️  Collection '{collection_name}' not found - run database_setup.py first")
+# ========================================
+# Step 4: Display summary
+# ========================================
+print("\n" + "="*60)
+print("📊 MIGRATION SUMMARY")
+print("="*60)
+all_plans = list(subscription_plans.find({"is_active": True}))
+print(f"\n✅ Active Subscription Plans: {len(all_plans)}")
+for plan in all_plans:
+    print(f"   • {plan['plan_name']}: ${plan['price']}/month")
+    print(f"     Stripe Product: {plan.get('stripe_product_id', 'NOT SET')}")
+    print(f"     Billing: {', '.join(plan.get('billing_periods', []))}")
+all_subs = user_subscriptions.count_documents({})
+print(f"\n✅ Total User Subscriptions: {all_subs}")
+print("\n" + "="*60)
+print("✅ Migration completed successfully!")
+print("="*60)
+print("\nNext steps:")
+print("1. Test Stripe integration endpoints")
+print("2. Create webhook endpoint for Stripe events")
+print("3. Test checkout flow with test cards")
+print("4. Update frontend pricing components")
+client.close()

DetectifAI_db/minio_config.py ADDED Viewed

	@@ -0,0 +1,37 @@

+"""
+S3-compatible storage configuration for DetectifAI (Backblaze B2)
+"""
+# S3 bucket names (matching actual Backblaze B2 buckets)
+VIDEOS_BUCKET = "detectifai-videos"
+KEYFRAMES_BUCKET = "detectifai-keyframes"
+COMPRESSED_BUCKET = "detectifai-compressed"
+NLP_IMAGES_BUCKET = "nlp-images"
+REPORTS_BUCKET = "detectifai-reports"
+# Object prefixes/paths
+ORIGINAL_VIDEO_PREFIX = "original"
+COMPRESSED_VIDEO_PREFIX = "compressed"
+KEYFRAME_PREFIX = "keyframes"
+# S3-compatible storage default configuration (Backblaze B2)
+MINIO_CONFIG = {
+    "endpoint": "s3.eu-central-003.backblazeb2.com",
+    "access_key": "00367479ffb7e4e0000000001",
+    "secret_key": "K003opTvf92ijRj5dM7H1dgrlwcGTdA",
+    "secure": True,
+    "region": "eu-central-003"
+}
+# Function to generate MinIO paths
+def get_minio_paths(video_id: str, filename: str = None):
+    """Generate standardized MinIO paths for a video"""
+    if filename is None:
+        filename = f"{video_id}.mp4"
+    return {
+        "original": f"{ORIGINAL_VIDEO_PREFIX}/{video_id}/{filename}",
+        "compressed": f"{COMPRESSED_VIDEO_PREFIX}/{video_id}/{filename}",
+        "keyframes": f"{KEYFRAME_PREFIX}/{video_id}",
+        "reports": f"reports/{video_id}"
+    }

DetectifAI_db/requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+Flask==2.3.3
+Flask-CORS==4.0.0
+Werkzeug==3.0.0
+PyJWT==2.8.0
+pymongo>=4.6.3,<5.0
+python-multipart==0.0.6
+minio==7.1.11
+opencv-python==4.8.0.74
+python-dotenv==1.0.0
+faiss-cpu
+numpy
+Pillow
+scikit-learn
+sentence-transformers

DetectifAI_db/reset_minio.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""
+Reset MinIO buckets and test storage paths for DetectifAI.
+This script ensures that all required MinIO buckets and storage paths
+are properly configured for video processing.
+"""
+from minio import Minio
+from minio.error import S3Error
+import os
+from datetime import datetime
+from dotenv import load_dotenv
+import logging
+# Load environment variables
+load_dotenv()
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# MinIO configuration
+MINIO_CONFIG = {
+    "endpoint": os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com"),
+    "access_key": os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001"),
+    "secret_key": os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA"),
+    "secure": os.getenv("MINIO_SECURE", "true").lower() == "true",
+    "region": os.getenv("MINIO_REGION", "eu-central-003")
+}
+# Bucket configuration with descriptions
+BUCKETS = {
+    "detectifai-videos": {
+        "description": "Main bucket for video storage",
+        "prefixes": {
+            "original": "Original uploaded videos",
+            "compressed": "Compressed video versions"
+        }
+    },
+    "detectifai-keyframes": {
+        "description": "Storage for extracted video frames",
+        "prefixes": {
+            "keyframes": "Extracted keyframes and annotated frames"
+        }
+    }
+}
+def reset_minio_storage():
+    """Reset and verify MinIO storage configuration"""
+    client = Minio(**MINIO_CONFIG)
+    print("Checking MinIO connection and buckets...")
+    for bucket_name, config in BUCKETS.items():
+        try:
+            # Check if bucket exists
+            found = client.bucket_exists(bucket_name)
+            if not found:
+                print(f"Creating bucket: {bucket_name}")
+                client.make_bucket(bucket_name)
+            # Test each prefix path
+            for prefix in config["prefixes"]:
+                test_object = f"{prefix}/test.txt"
+                test_data = f"Test data for {bucket_name}/{prefix}"
+                print(f"\nTesting path: {bucket_name}/{test_object}")
+                # Upload test object
+                test_bytes = bytes(test_data, 'utf-8')
+                from io import BytesIO
+                test_stream = BytesIO(test_bytes)
+                client.put_object(
+                    bucket_name,
+                    test_object,
+                    test_stream,
+                    len(test_bytes)
+                )
+                # Verify upload
+                try:
+                    client.stat_object(bucket_name, test_object)
+                    print(f"✅ Test file uploaded successfully")
+                    # Clean up test file
+                    client.remove_object(bucket_name, test_object)
+                    print(f"✅ Test file removed")
+                except:
+                    print(f"❌ Could not verify test file")
+            print(f"\nListing objects in {bucket_name}:")
+            objects = client.list_objects(bucket_name, recursive=True)
+            for obj in objects:
+                print(f"- {obj.object_name} (size: {obj.size} bytes)")
+        except S3Error as e:
+            print(f"❌ Error with bucket {bucket_name}: {e}")
+            continue
+if __name__ == "__main__":
+    reset_minio_storage()

DetectifAI_db/reset_users_collection.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pymongo import MongoClient
+import os
+from dotenv import load_dotenv
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+def reset_users_collection():
+    try:
+        client = MongoClient(MONGO_URI)
+        db = client.get_default_database()
+        # Drop the existing users collection
+        print("Dropping existing users collection...")
+        db.users.drop()
+        # Run database_setup.py to recreate with new schema
+        print("Creating users collection with new schema...")
+        import database_setup
+        print("✅ Users collection reset successfully!")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+    finally:
+        client.close()
+if __name__ == "__main__":
+    reset_users_collection()

DetectifAI_db/seed_stripe_plans.py ADDED Viewed

	@@ -0,0 +1,141 @@

+"""
+Seed Stripe-Integrated Subscription Plans
+This script populates the subscription_plans collection with accurate
+DetectifAI Basic and Pro plans connected to Stripe.
+"""
+from pymongo import MongoClient
+from datetime import datetime
+import os
+from dotenv import load_dotenv
+from uuid import uuid4
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+client = MongoClient(MONGO_URI)
+db = client.get_default_database()
+subscription_plans = db.subscription_plans
+print("🌱 Seeding Stripe-integrated subscription plans...")
+# DetectifAI Basic Plan
+basic_plan = {
+    "plan_id": "detectifai_basic",
+    "plan_name": "DetectifAI Basic",
+    "description": "Essential AI-powered security monitoring for single installations",
+    "price": 19.00,
+    "features": [
+        "single_video",
+        "object_detection",
+        "face_recognition",
+        "event_history_7day",
+        "dashboard",
+        "basic_reports",
+        "video_clips"
+    ],
+    "limits": {
+        "video_processing": 10,  # Videos per month
+        "history_retention_days": 7,
+        "nlp_searches": 0,  # Not available in Basic
+        "image_searches": 0,  # Not available in Basic
+        "concurrent_streams": 1
+    },
+    "is_active": True,
+    "stripe_product_id": "prod_TqIuL76gNG4hxu",
+    "stripe_price_ids": {
+        "monthly": "price_1SscIsBC7V4mGo7rR4T0YZIc",
+        "yearly": "price_1SscMQBC7V4mGo7rigJ4bFFE"
+    },
+    "billing_periods": ["monthly", "yearly"],
+    "created_at": datetime.utcnow(),
+    "updated_at": datetime.utcnow()
+}
+# DetectifAI Pro Plan
+pro_plan = {
+    "plan_id": "detectifai_pro",
+    "plan_name": "DetectifAI Pro",
+    "description": "Advanced security intelligence with extended capabilities",
+    "price": 49.00,
+    "features": [
+        "single_video",
+        "object_detection",
+        "face_recognition",
+        "event_history_30day",
+        "dashboard",
+        "basic_reports",
+        "video_clips",
+        "behavior_analysis",
+        "person_tracking",
+        "nlp_search",
+        "image_search",
+        "custom_reports",
+        "priority_queue"
+    ],
+    "limits": {
+        "video_processing": 999999,  # Unlimited videos per month for Pro
+        "history_retention_days": 30,
+        "nlp_searches": 200,  # NLP searches per month
+        "image_searches": 100,  # Image searches per month
+        "concurrent_streams": 1
+    },
+    "is_active": True,
+    "stripe_product_id": "prod_TqIyhR08zDDa2B",
+    "stripe_price_ids": {
+        "monthly": "price_1SscMwBC7V4mGo7rmmRPTTOz",
+        "yearly": "price_1SscNXBC7V4mGo7rdGgYAYRs"
+    },
+    "billing_periods": ["monthly", "yearly"],
+    "created_at": datetime.utcnow(),
+    "updated_at": datetime.utcnow()
+}
+# Upsert plans
+for plan in [basic_plan, pro_plan]:
+    result = subscription_plans.update_one(
+        {"plan_id": plan["plan_id"]},
+        {"$set": plan},
+        upsert=True
+    )
+    if result.upserted_id:
+        print(f"✅ Created plan: {plan['plan_name']}")
+    else:
+        print(f"✅ Updated plan: {plan['plan_name']}")
+# Display summary
+print("\n" + "="*60)
+print("📊 SUBSCRIPTION PLANS")
+print("="*60)
+all_plans = list(subscription_plans.find({"is_active": True}))
+for plan in all_plans:
+    print(f"\n{plan['plan_name']} - ${plan['price']}/month")
+    print(f"  Description: {plan['description']}")
+    # Only print if exists (for compatibility with old plans)
+    if 'stripe_product_id' in plan:
+        print(f"  Stripe Product: {plan['stripe_product_id']}")
+    if 'stripe_price_ids' in plan:
+        monthly_price = plan['stripe_price_ids'].get('monthly', 'N/A')
+        yearly_price = plan['stripe_price_ids'].get('yearly', 'N/A')
+        print(f"  Monthly Price ID: {monthly_price}")
+        print(f"  Yearly Price ID: {yearly_price}")
+    if 'features' in plan:
+        features = plan['features']
+        if isinstance(features, list):
+            print(f"  Features: {', '.join(features)}")
+        else:
+            print(f"  Features: {features}")
+    if 'limits' in plan:
+        print(f"  Limits:")
+        for limit_name, limit_value in plan['limits'].items():
+            print(f"    - {limit_name}: {limit_value}")
+print("\n✅ Subscription plans seeded successfully!")
+client.close()

DetectifAI_db/setup_database.py ADDED Viewed

	@@ -0,0 +1,44 @@

+#!/usr/bin/env python3
+"""
+Database setup script for DetectifAI backend
+This script initializes the MongoDB database with the required collections and indexes.
+"""
+import os
+import sys
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Check if MONGO_URI is set
+if not os.getenv("MONGO_URI"):
+    print("❌ Error: MONGO_URI environment variable not set")
+    print("Please create a .env file with your MongoDB connection string")
+    print("Example: MONGO_URI=mongodb://localhost:27017/detectifai")
+    sys.exit(1)
+try:
+    # Import and run database setup
+    from database_setup import *
+    print("\n✅ Database setup completed successfully!")
+    # Ask if user wants to seed the database
+    seed_choice = input("\nWould you like to seed the database with sample data? (y/n): ").lower().strip()
+    if seed_choice in ['y', 'yes']:
+        print("\n🌱 Seeding database with sample data...")
+        from database_seed import *
+        print("\n✅ Database seeding completed!")
+    else:
+        print("\n⏭️  Skipping database seeding")
+    print("\n🎉 Database initialization complete!")
+    print("\nNext steps:")
+    print("1. Start the integrated Flask app: python app_integrated.py")
+    print("2. Or start the original app: python app.py")
+    print("3. Test the API endpoints at http://localhost:5000")
+except Exception as e:
+    print(f"❌ Error during database setup: {e}")
+    sys.exit(1)

DetectifAI_db/setup_minio.py ADDED Viewed

	@@ -0,0 +1,91 @@

+"""
+S3-compatible Storage Setup and Test Script for DetectifAI (Backblaze B2)
+"""
+from minio import Minio
+from dotenv import load_dotenv
+import os
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+def setup_minio():
+    """Setup S3-compatible storage (Backblaze B2)"""
+    try:
+        endpoint = os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com')
+        access_key = os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001')
+        secret_key = os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA')
+        secure = os.getenv('MINIO_SECURE', 'true').lower() == 'true'
+        region = os.getenv('MINIO_REGION', 'eu-central-003')
+        # S3 client setup
+        client = Minio(
+            endpoint,
+            access_key=access_key,
+            secret_key=secret_key,
+            secure=secure,
+            region=region or None
+        )
+        # Define required buckets
+        buckets = [
+            "detectifai-videos",       # Original and compressed videos
+            "detectifai-keyframes",    # Extracted keyframes
+            "detectifai-reports"       # Generated reports (HTML/PDF)
+        ]
+        # Verify buckets exist (don't create — buckets managed in B2 dashboard)
+        for bucket in buckets:
+            found = client.bucket_exists(bucket)
+            if found:
+                logger.info(f"✅ Bucket exists: {bucket}")
+            else:
+                logger.warning(f"⚠️ Bucket NOT found: {bucket} — create it in Backblaze B2 dashboard")
+        # Test upload to each bucket
+        test_data = b"DetectifAI Test Data"
+        for bucket in buckets:
+            try:
+                test_object = f"test_{bucket}.txt"
+                client.put_object(
+                    bucket,
+                    test_object,
+                    bytes(test_data),
+                    len(test_data)
+                )
+                logger.info(f"✅ Test upload successful to {bucket}")
+                # Clean up test file
+                client.remove_object(bucket, test_object)
+            except Exception as bucket_error:
+                logger.error(f"❌ Failed to upload test file to {bucket}: {str(bucket_error)}")
+        # List objects in each bucket
+        logger.info("\nCurrent bucket contents:")
+        for bucket in buckets:
+            logger.info(f"\nBucket: {bucket}")
+            try:
+                objects = client.list_objects(bucket, recursive=True)
+                for obj in objects:
+                    logger.info(f"- {obj.object_name} (size: {obj.size} bytes)")
+            except Exception as list_error:
+                logger.error(f"❌ Failed to list objects in {bucket}: {str(list_error)}")
+        return True, "MinIO setup completed successfully"
+    except Exception as e:
+        error_message = f"MinIO setup failed: {str(e)}"
+        logger.error(f"❌ {error_message}")
+        return False, error_message
+if __name__ == "__main__":
+    success, message = setup_minio()
+    if success:
+        logger.info("✅ MinIO setup completed successfully!")
+    else:
+        logger.error(f"❌ MinIO setup failed: {message}")

DetectifAI_db/setup_nlp_bucket.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""
+Setup script to create the nlp-images bucket in MinIO
+"""
+import os
+from dotenv import load_dotenv
+from minio import Minio
+from minio.error import S3Error
+import logging
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA")
+MINIO_SECURE = os.getenv("MINIO_SECURE", "true").lower() == "true"
+MINIO_REGION = os.getenv("MINIO_REGION", "eu-central-003")
+NLP_IMAGES_BUCKET = "nlp-images"
+def setup_nlp_bucket():
+    """Create the nlp-images bucket if it doesn't exist"""
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=MINIO_SECURE,
+            region=MINIO_REGION
+        )
+        if client.bucket_exists(NLP_IMAGES_BUCKET):
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' already exists")
+            return True
+        else:
+            logger.info(f"Creating MinIO bucket '{NLP_IMAGES_BUCKET}'...")
+            client.make_bucket(NLP_IMAGES_BUCKET)
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' created successfully")
+            return True
+    except S3Error as e:
+        if e.code == "BucketAlreadyOwnedByYou" or e.code == "BucketAlreadyExists":
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' already exists")
+            return True
+        else:
+            logger.error(f"❌ Error creating bucket: {e}")
+            return False
+    except Exception as e:
+        logger.error(f"❌ Error connecting to MinIO: {e}")
+        return False
+if __name__ == "__main__":
+    logger.info("Setting up nlp-images bucket...")
+    success = setup_nlp_bucket()
+    if success:
+        logger.info("✅ Setup complete!")
+    else:
+        logger.error("❌ Setup failed!")
+        exit(1)

DetectifAI_db/upload_caption_images.py ADDED Viewed

	@@ -0,0 +1,264 @@

+"""
+Upload Caption Images to MinIO
+This script uploads the image files referenced in the captions to the MinIO nlp-images bucket.
+The images should be in a local directory (e.g., 'caption_images' folder).
+Usage:
+    python upload_caption_images.py [--image-dir <directory>]
+"""
+import os
+import sys
+from pathlib import Path
+from dotenv import load_dotenv
+from minio import Minio
+from minio.error import S3Error
+import logging
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai")
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA")
+MINIO_SECURE = os.getenv("MINIO_SECURE", "true").lower() == "true"
+MINIO_REGION = os.getenv("MINIO_REGION", "eu-central-003")
+NLP_IMAGES_BUCKET = "nlp-images"
+# Expected image files from upload_captions.py
+EXPECTED_IMAGES = [
+    "img1.webp",
+    "img2.jpg",
+    "img3.png",
+    "img4.png",
+    "img5.jpg",
+    "img6.webp",
+    "img7.webp",
+    "img8.webp",
+    "img9.jpg",
+    "img10.png"
+]
+def setup_minio_client():
+    """Initialize MinIO client"""
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=MINIO_SECURE,
+            region=MINIO_REGION
+        )
+        return client
+    except Exception as e:
+        logger.error(f"❌ Error connecting to MinIO: {e}")
+        return None
+def ensure_bucket_exists(client, bucket_name):
+    """Ensure the bucket exists, create if it doesn't"""
+    try:
+        if not client.bucket_exists(bucket_name):
+            logger.info(f"Creating bucket: {bucket_name}")
+            client.make_bucket(bucket_name)
+            logger.info(f"✅ Created bucket: {bucket_name}")
+        else:
+            logger.info(f"✅ Bucket '{bucket_name}' already exists")
+        return True
+    except S3Error as e:
+        if e.code == "BucketAlreadyOwnedByYou" or e.code == "BucketAlreadyExists":
+            logger.info(f"✅ Bucket '{bucket_name}' already exists")
+            return True
+        logger.error(f"❌ Error creating bucket: {e}")
+        return False
+    except Exception as e:
+        logger.error(f"❌ Unexpected error: {e}")
+        return False
+def upload_image(client, bucket_name, image_path, object_name):
+    """Upload a single image file to MinIO"""
+    try:
+        if not os.path.exists(image_path):
+            logger.warning(f"⚠️ Image file not found: {image_path}")
+            return False
+        file_size = os.path.getsize(image_path)
+        # Determine content type based on extension
+        ext = image_path.lower().split('.')[-1]
+        content_type_map = {
+            'jpg': 'image/jpeg',
+            'jpeg': 'image/jpeg',
+            'png': 'image/png',
+            'webp': 'image/webp',
+            'gif': 'image/gif'
+        }
+        content_type = content_type_map.get(ext, 'application/octet-stream')
+        with open(image_path, 'rb') as file_data:
+            client.put_object(
+                bucket_name,
+                object_name,
+                file_data,
+                length=file_size,
+                content_type=content_type
+            )
+        logger.info(f"✅ Uploaded: {object_name} ({file_size} bytes)")
+        return True
+    except S3Error as e:
+        logger.error(f"❌ S3Error uploading {object_name}: {e}")
+        return False
+    except Exception as e:
+        logger.error(f"❌ Error uploading {object_name}: {e}")
+        return False
+def find_image_directory():
+    """Try to find the directory containing caption images"""
+    # Common locations to check
+    possible_dirs = [
+        Path(__file__).parent / "caption_images",
+        Path(__file__).parent.parent / "caption_images",
+        Path(__file__).parent / "images",
+        Path(__file__).parent.parent / "images",
+        Path(__file__).parent / "DetectifAI_db" / "caption_images",
+    ]
+    for dir_path in possible_dirs:
+        if dir_path.exists() and dir_path.is_dir():
+            # Check if it contains any of the expected images
+            files = [f.name for f in dir_path.iterdir() if f.is_file()]
+            if any(img in files for img in EXPECTED_IMAGES):
+                return dir_path
+    return None
+def upload_all_images(image_dir=None):
+    """Upload all caption images to MinIO"""
+    logger.info("🚀 Starting Caption Image Upload Process")
+    logger.info("=" * 80)
+    # Initialize MinIO client
+    client = setup_minio_client()
+    if not client:
+        logger.error("❌ Failed to initialize MinIO client")
+        return False
+    # Ensure bucket exists
+    if not ensure_bucket_exists(client, NLP_IMAGES_BUCKET):
+        logger.error("❌ Failed to ensure bucket exists")
+        return False
+    # Find image directory
+    if image_dir is None:
+        image_dir = find_image_directory()
+    if image_dir is None:
+        logger.error("❌ Could not find image directory")
+        logger.info("💡 Please provide the image directory path:")
+        logger.info("   python upload_caption_images.py --image-dir <path>")
+        logger.info("")
+        logger.info("Expected image files:")
+        for img in EXPECTED_IMAGES:
+            logger.info(f"   - {img}")
+        return False
+    image_dir = Path(image_dir)
+    if not image_dir.exists():
+        logger.error(f"❌ Image directory does not exist: {image_dir}")
+        return False
+    logger.info(f"📁 Using image directory: {image_dir}")
+    logger.info("")
+    # Upload each image
+    uploaded_count = 0
+    failed_count = 0
+    missing_count = 0
+    for image_name in EXPECTED_IMAGES:
+        image_path = image_dir / image_name
+        if not image_path.exists():
+            logger.warning(f"⚠️ Image not found: {image_name}")
+            missing_count += 1
+            continue
+        if upload_image(client, NLP_IMAGES_BUCKET, str(image_path), image_name):
+            uploaded_count += 1
+        else:
+            failed_count += 1
+    # Summary
+    logger.info("")
+    logger.info("=" * 80)
+    logger.info("📊 Upload Summary:")
+    logger.info(f"   ✅ Successfully uploaded: {uploaded_count}")
+    logger.info(f"   ❌ Failed: {failed_count}")
+    logger.info(f"   ⚠️ Missing: {missing_count}")
+    logger.info(f"   📦 Total expected: {len(EXPECTED_IMAGES)}")
+    logger.info("=" * 80)
+    if uploaded_count > 0:
+        logger.info("✅ Image upload process completed!")
+        return True
+    else:
+        logger.error("❌ No images were uploaded")
+        return False
+def list_bucket_contents(client, bucket_name):
+    """List all objects in the bucket"""
+    try:
+        logger.info(f"\n📦 Contents of '{bucket_name}' bucket:")
+        objects = client.list_objects(bucket_name, recursive=True)
+        count = 0
+        for obj in objects:
+            logger.info(f"   - {obj.object_name} ({obj.size} bytes)")
+            count += 1
+        if count == 0:
+            logger.info("   (bucket is empty)")
+        return count
+    except Exception as e:
+        logger.error(f"❌ Error listing bucket contents: {e}")
+        return 0
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Upload caption images to MinIO")
+    parser.add_argument(
+        "--image-dir",
+        type=str,
+        help="Directory containing the caption images"
+    )
+    parser.add_argument(
+        "--list",
+        action="store_true",
+        help="List current contents of nlp-images bucket"
+    )
+    args = parser.parse_args()
+    if args.list:
+        client = setup_minio_client()
+        if client:
+            list_bucket_contents(client, NLP_IMAGES_BUCKET)
+    else:
+        success = upload_all_images(args.image_dir)
+        sys.exit(0 if success else 1)

DetectifAI_db/upload_captions.py ADDED Viewed

	@@ -0,0 +1,349 @@

+"""
+Upload Captions to MongoDB
+This script uploads 10 hardcoded captions linked to videos stored in the
+MinIO 'nlp-images' bucket. The captions are inserted into the MongoDB
+'event_descriptions' collection.
+Usage:
+    python upload_captions.py
+"""
+import os
+import uuid
+from datetime import datetime
+from dotenv import load_dotenv
+from pymongo import MongoClient
+from minio import Minio
+import logging
+import numpy as np
+import json
+# Optional imports for embeddings and FAISS
+try:
+    from sentence_transformers import SentenceTransformer
+    import faiss
+    SENTER_AVAILABLE = True
+except Exception:
+    SENTER_AVAILABLE = False
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/detectifai")
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "s3.eu-central-003.backblazeb2.com")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "00367479ffb7e4e0000000001")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "K003opTvf92ijRj5dM7H1dgrlwcGTdA")
+MINIO_SECURE = os.getenv("MINIO_SECURE", "true").lower() == "true"
+MINIO_REGION = os.getenv("MINIO_REGION", "eu-central-003")
+# MinIO bucket for NLP images/videos
+NLP_IMAGES_BUCKET = "nlp-images"
+# Hardcoded captions with video references
+HARDCODED_CAPTIONS = [
+    {
+        "video_filename": "img1.webp",
+        "caption": "Forty story building reported to be on fire with smoke visible from several floors",
+        "confidence": 0.95
+    },
+    {
+        "video_filename": "img2.jpg",
+        "caption": "Smoke seen to be coming from a building next to tower by the road",
+        "confidence": 0.87
+    },
+    {
+        "video_filename": "img3.png",
+        "caption": "Large flames visible on a local high-rise building with fire department on the scene",
+        "confidence": 0.92
+    },
+    {
+        "video_filename": "img4.png",
+        "caption": "Wide parking of local school building with many parked cars",
+        "confidence": 0.92
+    },
+    {
+        "video_filename": "img5.jpg",
+        "caption": "Smoke coming from skyscraper fire brigade on scene trying to extinguish the flames",
+        "confidence": 0.89
+    },
+    {
+        "video_filename": "img6.webp",
+        "caption": "dog sitting on grass",
+        "confidence": 0.91
+    },
+    {
+        "video_filename": "img7.webp",
+        "caption": "dog sitting infront of tree trunk in park",
+        "confidence": 0.88
+    },
+    {
+        "video_filename": "img8.webp",
+        "caption": "dog out on a hike with owner",
+        "confidence": 0.84
+    },
+    {
+        "video_filename": "img9.jpg",
+        "caption": "dog jumping over obstacle",
+        "confidence": 0.96
+    },
+    {
+        "video_filename": "img10.png",
+        "caption": "puppy sleeping while hugging stuffed animal",
+        "confidence": 0.79
+    }
+]
+# Paths for FAISS index and id map
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+FAISS_INDEX_PATH = os.path.join(BASE_DIR, "faiss_captions.index")
+FAISS_IDMAP_PATH = os.path.join(BASE_DIR, "faiss_captions_idmap.json")
+def verify_minio_bucket():
+    """Verify that the nlp-images bucket exists in MinIO"""
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=MINIO_SECURE,
+            region=MINIO_REGION
+        )
+        if client.bucket_exists(NLP_IMAGES_BUCKET):
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' exists")
+            return True
+        else:
+            logger.warning(f"⚠️ MinIO bucket '{NLP_IMAGES_BUCKET}' does not exist")
+            logger.info(f"Creating bucket '{NLP_IMAGES_BUCKET}'...")
+            client.make_bucket(NLP_IMAGES_BUCKET)
+            logger.info(f"✅ MinIO bucket '{NLP_IMAGES_BUCKET}' created")
+            return True
+    except Exception as e:
+        logger.error(f"❌ Error connecting to MinIO: {e}")
+        return False
+def list_objects_in_bucket():
+    """List all objects in the nlp-images bucket"""
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=MINIO_SECURE,
+            region=MINIO_REGION
+        )
+        objects = client.list_objects(NLP_IMAGES_BUCKET)
+        object_list = [obj.object_name for obj in objects]
+        if object_list:
+            logger.info(f"📁 Objects in '{NLP_IMAGES_BUCKET}' bucket:")
+            for obj in object_list:
+                logger.info(f"   - {obj}")
+            return object_list
+        else:
+            logger.warning(f"⚠️ No objects found in '{NLP_IMAGES_BUCKET}' bucket")
+            return []
+    except Exception as e:
+        logger.error(f"❌ Error listing objects: {e}")
+        return []
+def upload_captions_to_mongodb():
+    """Upload captions to MongoDB event_descriptions collection"""
+    try:
+        # Connect to MongoDB
+        client = MongoClient(MONGO_URI)
+        db = client.get_default_database()
+        collection = db["event_descriptions"]
+        logger.info(f"📊 Connected to MongoDB database")
+        logger.info(f"📝 Uploading {len(HARDCODED_CAPTIONS)} captions to 'event_descriptions' collection...")
+        inserted_count = 0
+        inserted_documents = []
+        # Prepare embedding model and lists for FAISS
+        embeddings = []
+        id_map = []  # maps faiss idx -> description_id
+        if not SENTER_AVAILABLE:
+            logger.warning("⚠️ sentence-transformers or faiss not available; captions will be stored without embeddings")
+        else:
+            # Load model once
+            try:
+                embed_model = SentenceTransformer("all-mpnet-base-v2")
+                embed_dim = 768
+                logger.info("✅ Loaded SentenceTransformer 'all-mpnet-base-v2' for embeddings")
+            except Exception as e:
+                logger.error(f"❌ Failed to load embedding model: {e}")
+                embed_model = None
+        for i, caption_data in enumerate(HARDCODED_CAPTIONS, 1):
+            # Generate unique IDs
+            description_id = f"desc_{uuid.uuid4().hex[:12]}"
+            event_id = f"event_{uuid.uuid4().hex[:12]}"
+            # Compute embedding if available
+            text_emb_list = []
+            if SENTER_AVAILABLE and embed_model is not None:
+                try:
+                    emb = embed_model.encode(caption_data["caption"], normalize_embeddings=True).astype("float32")
+                    text_emb_list = emb.tolist()
+                    embeddings.append(emb)
+                    id_map.append(description_id)
+                except Exception as e:
+                    logger.warning(f"⚠️ Failed to compute embedding for caption {i}: {e}")
+            # Create caption document
+            caption_doc = {
+                "description_id": description_id,
+                "event_id": event_id,
+                "caption": caption_data["caption"],
+                "confidence": caption_data["confidence"],
+                "text_embedding": text_emb_list,
+                "video_reference": {
+                    "bucket": NLP_IMAGES_BUCKET,
+                    "object_name": caption_data["video_filename"],
+                    "minio_path": f"{NLP_IMAGES_BUCKET}/{caption_data['video_filename']}"
+                },
+                "created_at": datetime.utcnow(),
+                "updated_at": datetime.utcnow()
+            }
+            # Insert into MongoDB
+            result = collection.insert_one(caption_doc)
+            inserted_count += 1
+            inserted_documents.append({
+                "index": i,
+                "description_id": description_id,
+                "event_id": event_id,
+                "video": caption_data["video_filename"],
+                "confidence": caption_data["confidence"]
+            })
+            logger.info(f"✅ [{i}/10] Inserted caption: {description_id}")
+        logger.info(f"\n🎉 Successfully uploaded {inserted_count} captions to MongoDB")
+        logger.info("\n📋 Inserted Captions Summary:")
+        logger.info("=" * 80)
+        for doc in inserted_documents:
+            logger.info(
+                f"[{doc['index']:2d}] ID: {doc['description_id']} | "
+                f"Event: {doc['event_id']} | "
+                f"Video: {doc['video']} | "
+                f"Confidence: {doc['confidence']:.2f}"
+            )
+        logger.info("=" * 80)
+        # Display summary statistics
+        total_captions = collection.count_documents({})
+        logger.info(f"\n📊 Total captions in collection: {total_captions}")
+        # Build and persist FAISS index if embeddings were computed
+        if SENTER_AVAILABLE and embeddings:
+            try:
+                emb_matrix = np.stack(embeddings, axis=0).astype("float32")
+                dim = emb_matrix.shape[1]
+                index = faiss.IndexFlatIP(dim)
+                # Add embeddings
+                index.add(emb_matrix)
+                # Write index to disk
+                faiss.write_index(index, FAISS_INDEX_PATH)
+                # Save id map (index -> description_id)
+                with open(FAISS_IDMAP_PATH, "w", encoding="utf-8") as f:
+                    json.dump(id_map, f, indent=2)
+                logger.info(f"✅ FAISS index saved to: {FAISS_INDEX_PATH}")
+                logger.info(f"✅ FAISS id map saved to: {FAISS_IDMAP_PATH}")
+            except Exception as e:
+                logger.error(f"❌ Failed to build/save FAISS index: {e}")
+        return True
+    except Exception as e:
+        logger.error(f"❌ Error uploading captions to MongoDB: {e}")
+        return False
+def verify_uploaded_captions():
+    """Verify that captions were successfully uploaded"""
+    try:
+        client = MongoClient(MONGO_URI)
+        db = client.get_default_database()
+        collection = db["event_descriptions"]
+        # Find recently uploaded captions
+        captions = list(collection.find(
+            {"video_reference": {"$exists": True}},
+            {"_id": 0, "description_id": 1, "caption": 1, "confidence": 1, "video_reference": 1}
+        ).limit(10))
+        if captions:
+            logger.info(f"\n✅ Verification: Found {len(captions)} captions with video references")
+            logger.info("\n📝 Sample Captions:")
+            logger.info("=" * 80)
+            for cap in captions[:3]:
+                logger.info(f"ID: {cap['description_id']}")
+                logger.info(f"Caption: {cap['caption']}")
+                logger.info(f"Confidence: {cap['confidence']:.2f}")
+                logger.info(f"Video: {cap['video_reference']['object_name']}")
+                logger.info("-" * 80)
+            return True
+        else:
+            logger.warning("⚠️ No captions found with video references")
+            return False
+    except Exception as e:
+        logger.error(f"❌ Error verifying captions: {e}")
+        return False
+def main():
+    """Main execution function"""
+    logger.info("🚀 Starting Caption Upload Process")
+    logger.info("=" * 80)
+    # Step 1: Verify MinIO bucket
+    logger.info("\n[Step 1/4] Verifying MinIO bucket...")
+    if not verify_minio_bucket():
+        logger.error("❌ Failed to verify MinIO bucket. Exiting.")
+        return False
+    # Step 2: List objects in bucket
+    logger.info("\n[Step 2/4] Listing objects in MinIO bucket...")
+    objects = list_objects_in_bucket()
+    # Step 3: Upload captions to MongoDB
+    logger.info("\n[Step 3/4] Uploading captions to MongoDB...")
+    if not upload_captions_to_mongodb():
+        logger.error("❌ Failed to upload captions. Exiting.")
+        return False
+    # Step 4: Verify upload
+    logger.info("\n[Step 4/4] Verifying uploaded captions...")
+    if not verify_uploaded_captions():
+        logger.warning("⚠️ Verification encountered issues")
+    logger.info("\n" + "=" * 80)
+    logger.info("🎉 Caption Upload Process Completed Successfully!")
+    logger.info("=" * 80)
+    return True
+if __name__ == "__main__":
+    success = main()
+    exit(0 if success else 1)

DetectifAI_db/vector_index.py ADDED Viewed

	@@ -0,0 +1,348 @@

+import faiss
+import numpy as np
+from pymongo import MongoClient
+import os
+from dotenv import load_dotenv
+import pickle
+from typing import List, Dict, Tuple, Optional
+import logging
+load_dotenv()
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class FAISSIndexManager:
+    """Manages FAISS indices for text and visual embeddings"""
+    def __init__(self, mongo_uri: str, db_name: str = None):
+        self.mongo_client = MongoClient(mongo_uri)
+        self.db = self.mongo_client.get_default_database() if not db_name else self.mongo_client[db_name]
+        # Collection references
+        self.event_descriptions = self.db.event_description
+        self.events = self.db.event
+        # FAISS indices
+        self.text_index = None
+        self.visual_index = None
+        # Index metadata
+        self.text_index_metadata = {}  # Maps FAISS ID to MongoDB document ID
+        self.visual_index_metadata = {}  # Maps FAISS ID to MongoDB document ID
+        # Embedding dimensions (adjust based on your embedding model)
+        self.text_embedding_dim = 384  # Common for sentence-transformers
+        self.visual_embedding_dim = 512  # Common for visual embeddings
+        # Index file paths
+        self.text_index_path = "faiss_text_index.bin"
+        self.visual_index_path = "faiss_visual_index.bin"
+        self.text_metadata_path = "faiss_text_metadata.pkl"
+        self.visual_metadata_path = "faiss_visual_metadata.pkl"
+        self._initialize_indices()
+    def _initialize_indices(self):
+        """Initialize or load existing FAISS indices"""
+        try:
+            # Try to load existing indices
+            if os.path.exists(self.text_index_path) and os.path.exists(self.text_metadata_path):
+                self._load_text_index()
+                logger.info("Loaded existing text index")
+            else:
+                self._create_text_index()
+                logger.info("Created new text index")
+            if os.path.exists(self.visual_index_path) and os.path.exists(self.visual_metadata_path):
+                self._load_visual_index()
+                logger.info("Loaded existing visual index")
+            else:
+                self._create_visual_index()
+                logger.info("Created new visual index")
+        except Exception as e:
+            logger.error(f"Error initializing indices: {e}")
+            # Fallback to creating new indices
+            self._create_text_index()
+            self._create_visual_index()
+    def _create_text_index(self):
+        """Create a new FAISS index for text embeddings"""
+        self.text_index = faiss.IndexFlatIP(self.text_embedding_dim)  # Inner product for cosine similarity
+        self.text_index_metadata = {}
+        self._save_text_index()
+    def _create_visual_index(self):
+        """Create a new FAISS index for visual embeddings"""
+        self.visual_index = faiss.IndexFlatIP(self.visual_embedding_dim)  # Inner product for cosine similarity
+        self.visual_index_metadata = {}
+        self._save_visual_index()
+    def _load_text_index(self):
+        """Load text index from disk"""
+        self.text_index = faiss.read_index(self.text_index_path)
+        with open(self.text_metadata_path, 'rb') as f:
+            self.text_index_metadata = pickle.load(f)
+    def _load_visual_index(self):
+        """Load visual index from disk"""
+        self.visual_index = faiss.read_index(self.visual_index_path)
+        with open(self.visual_metadata_path, 'rb') as f:
+            self.visual_index_metadata = pickle.load(f)
+    def _save_text_index(self):
+        """Save text index to disk"""
+        if self.text_index is not None:
+            faiss.write_index(self.text_index, self.text_index_path)
+            with open(self.text_metadata_path, 'wb') as f:
+                pickle.dump(self.text_index_metadata, f)
+    def _save_visual_index(self):
+        """Save visual index to disk"""
+        if self.visual_index is not None:
+            faiss.write_index(self.visual_index, self.visual_index_path)
+            with open(self.visual_metadata_path, 'wb') as f:
+                pickle.dump(self.visual_index_metadata, f)
+    def rebuild_text_index(self):
+        """Rebuild text index from MongoDB data"""
+        logger.info("Rebuilding text index from MongoDB...")
+        # Create new index
+        self._create_text_index()
+        # Fetch all event descriptions with embeddings
+        cursor = self.event_descriptions.find(
+            {"text_embedding": {"$exists": True, "$ne": []}},
+            {"_id": 0, "description_id": 1, "text_embedding": 1}
+        )
+        embeddings = []
+        metadata = {}
+        for doc in cursor:
+            embedding = np.array(doc["text_embedding"], dtype=np.float32)
+            if len(embedding) == self.text_embedding_dim:
+                faiss_id = len(embeddings)
+                embeddings.append(embedding)
+                metadata[faiss_id] = doc["description_id"]
+        if embeddings:
+            embeddings_array = np.vstack(embeddings)
+            self.text_index.add(embeddings_array)
+            self.text_index_metadata = metadata
+            self._save_text_index()
+            logger.info(f"Rebuilt text index with {len(embeddings)} embeddings")
+        else:
+            logger.warning("No text embeddings found in MongoDB")
+    def rebuild_visual_index(self):
+        """Rebuild visual index from MongoDB data"""
+        logger.info("Rebuilding visual index from MongoDB...")
+        # Create new index
+        self._create_visual_index()
+        # Fetch all events with visual embeddings
+        cursor = self.events.find(
+            {"visual_embedding": {"$exists": True, "$ne": []}},
+            {"_id": 0, "event_id": 1, "visual_embedding": 1}
+        )
+        embeddings = []
+        metadata = {}
+        for doc in cursor:
+            embedding = np.array(doc["visual_embedding"], dtype=np.float32)
+            if len(embedding) == self.visual_embedding_dim:
+                faiss_id = len(embeddings)
+                embeddings.append(embedding)
+                metadata[faiss_id] = doc["event_id"]
+        if embeddings:
+            embeddings_array = np.vstack(embeddings)
+            self.visual_index.add(embeddings_array)
+            self.visual_index_metadata = metadata
+            self._save_visual_index()
+            logger.info(f"Rebuilt visual index with {len(embeddings)} embeddings")
+        else:
+            logger.warning("No visual embeddings found in MongoDB")
+    def add_text_embedding(self, description_id: str, embedding: List[float]) -> bool:
+        """Add a text embedding to the index"""
+        try:
+            embedding_array = np.array(embedding, dtype=np.float32).reshape(1, -1)
+            if embedding_array.shape[1] != self.text_embedding_dim:
+                logger.error(f"Text embedding dimension mismatch: expected {self.text_embedding_dim}, got {embedding_array.shape[1]}")
+                return False
+            faiss_id = self.text_index.ntotal
+            self.text_index.add(embedding_array)
+            self.text_index_metadata[faiss_id] = description_id
+            self._save_text_index()
+            logger.info(f"Added text embedding for description_id: {description_id}")
+            return True
+        except Exception as e:
+            logger.error(f"Error adding text embedding: {e}")
+            return False
+    def add_visual_embedding(self, event_id: str, embedding: List[float]) -> bool:
+        """Add a visual embedding to the index"""
+        try:
+            embedding_array = np.array(embedding, dtype=np.float32).reshape(1, -1)
+            if embedding_array.shape[1] != self.visual_embedding_dim:
+                logger.error(f"Visual embedding dimension mismatch: expected {self.visual_embedding_dim}, got {embedding_array.shape[1]}")
+                return False
+            faiss_id = self.visual_index.ntotal
+            self.visual_index.add(embedding_array)
+            self.visual_index_metadata[faiss_id] = event_id
+            self._save_visual_index()
+            logger.info(f"Added visual embedding for event_id: {event_id}")
+            return True
+        except Exception as e:
+            logger.error(f"Error adding visual embedding: {e}")
+            return False
+    def search_text_embeddings(self, query_embedding: List[float], k: int = 10) -> List[Dict]:
+        """Search for similar text embeddings"""
+        try:
+            if self.text_index.ntotal == 0:
+                return []
+            query_array = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
+            if query_array.shape[1] != self.text_embedding_dim:
+                logger.error(f"Query embedding dimension mismatch: expected {self.text_embedding_dim}, got {query_array.shape[1]}")
+                return []
+            # Search FAISS
+            scores, indices = self.text_index.search(query_array, min(k, self.text_index.ntotal))
+            # Fetch corresponding documents from MongoDB
+            results = []
+            for score, idx in zip(scores[0], indices[0]):
+                if idx in self.text_index_metadata:
+                    description_id = self.text_index_metadata[idx]
+                    doc = self.event_descriptions.find_one(
+                        {"description_id": description_id},
+                        {"_id": 0}
+                    )
+                    if doc:
+                        doc["similarity_score"] = float(score)
+                        results.append(doc)
+            return results
+        except Exception as e:
+            logger.error(f"Error searching text embeddings: {e}")
+            return []
+    def search_visual_embeddings(self, query_embedding: List[float], k: int = 10) -> List[Dict]:
+        """Search for similar visual embeddings"""
+        try:
+            if self.visual_index.ntotal == 0:
+                return []
+            query_array = np.array(query_embedding, dtype=np.float32).reshape(1, -1)
+            if query_array.shape[1] != self.visual_embedding_dim:
+                logger.error(f"Query embedding dimension mismatch: expected {self.visual_embedding_dim}, got {query_array.shape[1]}")
+                return []
+            # Search FAISS
+            scores, indices = self.visual_index.search(query_array, min(k, self.visual_index.ntotal))
+            # Fetch corresponding documents from MongoDB
+            results = []
+            for score, idx in zip(scores[0], indices[0]):
+                if idx in self.visual_index_metadata:
+                    event_id = self.visual_index_metadata[idx]
+                    doc = self.events.find_one(
+                        {"event_id": event_id},
+                        {"_id": 0}
+                    )
+                    if doc:
+                        doc["similarity_score"] = float(score)
+                        results.append(doc)
+            return results
+        except Exception as e:
+            logger.error(f"Error searching visual embeddings: {e}")
+            return []
+    def get_index_stats(self) -> Dict:
+        """Get statistics about the indices"""
+        return {
+            "text_index_size": self.text_index.ntotal if self.text_index else 0,
+            "visual_index_size": self.visual_index.ntotal if self.visual_index else 0,
+            "text_embedding_dim": self.text_embedding_dim,
+            "visual_embedding_dim": self.visual_embedding_dim
+        }
+    def close(self):
+        """Close the index manager and save indices"""
+        self._save_text_index()
+        self._save_visual_index()
+        self.mongo_client.close()
+# Global instance
+faiss_manager = None
+def get_faiss_manager() -> FAISSIndexManager:
+    """Get the global FAISS manager instance"""
+    global faiss_manager
+    if faiss_manager is None:
+        mongo_uri = os.getenv("MONGO_URI")
+        faiss_manager = FAISSIndexManager(mongo_uri)
+    return faiss_manager
+def generate_text_embedding(text: str) -> List[float]:
+    """
+    Generate text embeddings using SentenceTransformer.
+    Uses all-mpnet-base-v2 for compatibility with NLP search (query_retreival.py).
+    Model is lazy-loaded and cached on first call.
+    """
+    global _text_embedding_model
+    if '_text_embedding_model' not in globals() or _text_embedding_model is None:
+        try:
+            from sentence_transformers import SentenceTransformer
+            _text_embedding_model = SentenceTransformer('all-mpnet-base-v2')
+            logger.info("✅ Loaded SentenceTransformer (all-mpnet-base-v2) for text embeddings")
+        except Exception as e:
+            logger.error(f"Failed to load SentenceTransformer: {e}")
+            # Fallback to deterministic random for graceful degradation
+            np.random.seed(hash(text) % 2**32)
+            return np.random.randn(768).astype(np.float32).tolist()
+    try:
+        embedding = _text_embedding_model.encode(text, normalize_embeddings=True)
+        return embedding.astype(np.float32).tolist()
+    except Exception as e:
+        logger.error(f"Failed to generate embedding for text: {e}")
+        np.random.seed(hash(text) % 2**32)
+        return np.random.randn(768).astype(np.float32).tolist()
+# Global model cache
+_text_embedding_model = None
+def generate_visual_embedding(image_data: bytes = None) -> List[float]:
+    """
+    Placeholder function to generate visual embeddings.
+    Replace this with your actual visual embedding model.
+    """
+    # For now, return a random embedding of the correct dimension
+    # In production, use a proper visual embedding model
+    np.random.seed(42)  # Fixed seed for demo
+    return np.random.randn(512).astype(np.float32).tolist()

Dockerfile ADDED Viewed

	@@ -0,0 +1,92 @@

+# ============================================================
+# DetectifAI Backend — Hugging Face Spaces (Docker SDK, CPU)
+# ============================================================
+FROM python:3.11-slim
+# ---- Non-interactive, UTF-8 ----
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PORT=7860
+WORKDIR /app
+# ---- System deps (OpenCV, WeasyPrint, ffmpeg) ----
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev \
+    libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 \
+    libffi-dev shared-mime-info \
+    ffmpeg \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# ---- Install PyTorch CPU-only first (saves ~1 GB vs CUDA) ----
+RUN pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
+# ---- Python deps (torch excluded — installed above as CPU-only) ----
+COPY requirements-docker.txt .
+RUN pip install --no-cache-dir -r requirements-docker.txt
+# ---- Copy application code ----
+# Core application files
+COPY app.py config.py main_pipeline.py database_video_service.py \
+     object_detection.py behavior_analysis_integrator.py \
+     video_captioning_integrator.py event_aggregation.py \
+     video_segmentation.py highlight_reel.py video_compression.py \
+     json_reports.py detectifai_events.py facial_recognition.py \
+     stripe_service.py subscription_middleware.py subscription_routes.py \
+     alert_routes.py real_time_alerts.py event_clip_generator.py \
+     extract_upload_keyframes.py live_stream_processor.py \
+     start_detectifai.py ./
+# Sub-packages
+COPY core/ core/
+COPY database/ database/
+COPY report_generation/ report_generation/
+COPY video_captioning/ video_captioning/
+COPY behavior_analysis/ behavior_analysis/
+COPY nlp_search/ nlp_search/
+COPY DetectifAI_db/ DetectifAI_db/
+# Small model files (<50 MB each) — ship in image
+COPY models/fire_YOLO11.pt models/fire_YOLO11.pt
+COPY models/weapon_YOLO11.pt models/weapon_YOLO11.pt
+COPY models/merged_fire_knife_gun.pt models/merged_fire_knife_gun.pt
+COPY "models/best (2).pt" "models/best (2).pt"
+COPY models/classifier_svm.pkl models/classifier_svm.pkl
+COPY models/label_encoder.pkl models/label_encoder.pkl
+COPY models/metadata.json models/metadata.json
+# Copy the top-level model/ directory (FAISS/SVM face index)
+COPY model/ /app/model/
+# ---- Pre-create writable directories ----
+RUN mkdir -p /app/uploads /app/video_processing_outputs /app/logs \
+    /app/temp_faces /app/report_generation/models \
+    && chmod -R 777 /app/uploads /app/video_processing_outputs /app/logs /app/temp_faces
+# ---- Download large models at build time (cached in Docker layer) ----
+# fight_detection.pt & accident_detection.pt (~127 MB each)
+# Qwen2.5-3B GGUF (~2 GB)
+# This runs once during build; layer is cached on HF Spaces.
+RUN python -c "\
+from huggingface_hub import hf_hub_download; \
+print('Downloading fight_detection.pt...'); \
+hf_hub_download('blacksinisterx/detectifai-models', 'fight_detection.pt', local_dir='/app/behavior_analysis', local_dir_use_symlinks=False); \
+print('Downloading accident_detection.pt...'); \
+hf_hub_download('blacksinisterx/detectifai-models', 'accident_detection.pt', local_dir='/app/behavior_analysis', local_dir_use_symlinks=False); \
+print('Done with behavior models.'); \
+" || echo "WARNING: Could not download behavior models — will retry at startup"
+RUN python -c "\
+from huggingface_hub import hf_hub_download; \
+print('Downloading Qwen2.5-3B GGUF (~2 GB)...'); \
+hf_hub_download('Qwen/Qwen2.5-3B-Instruct-GGUF', 'qwen2.5-3b-instruct-q4_k_m.gguf', local_dir='/app/report_generation/models', local_dir_use_symlinks=False); \
+print('Done with LLM model.'); \
+" || echo "WARNING: Could not download LLM model — report generation will download on first use"
+EXPOSE 7860
+# ---- Start Flask ----
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,31 @@
 ---
-title: Detectifai Backend
-emoji: 📚
-colorFrom: yellow
-colorTo: indigo
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: DetectifAI Backend
+emoji: "\U0001F50D"
+colorFrom: blue
+colorTo: red
 sdk: docker
+app_port: 7860
 ---
+# DetectifAI Backend API
+AI-powered CCTV surveillance system backend. Runs Flask + PyTorch + YOLO on CPU.
+## Features
+- Video upload & processing (object detection, action recognition)
+- Fire / weapon / fight / accident / wall-climbing detection
+- Video captioning with BLIP
+- Facial recognition with FaceNet
+- Forensic report generation with local LLM (Qwen2.5-3B)
+- Stripe subscription management
+## Environment Variables (set in Space Settings → Secrets)
+- `MONGO_URI` — MongoDB Atlas connection string
+- `MINIO_ENDPOINT` — Cloud object storage endpoint (Cloudflare R2 recommended)
+- `MINIO_ACCESS_KEY` — Storage access key
+- `MINIO_SECRET_KEY` — Storage secret key
+- `MINIO_SECURE` — `true` for HTTPS
+- `JWT_SECRET` — JWT signing secret
+- `STRIPE_SECRET_KEY` — Stripe secret key
+- `FRONTEND_URL` — Vercel frontend URL (for CORS)
+- `CORS_ORIGINS` — Comma-separated allowed origins

alert_routes.py ADDED Viewed

	@@ -0,0 +1,361 @@

+"""
+Alert API Routes for DetectifAI
+Flask Blueprint providing:
+- SSE (Server-Sent Events) endpoint for real-time alert streaming
+- REST endpoints for alert confirmation/dismissal
+- Alert history and statistics
+- Alert snapshot image serving
+"""
+import json
+import time
+import logging
+import queue
+from datetime import datetime
+from flask import Blueprint, request, jsonify, Response, stream_with_context
+from real_time_alerts import get_alert_engine
+logger = logging.getLogger(__name__)
+alert_bp = Blueprint('alerts', __name__, url_prefix='/api/alerts')
+# ========================================
+# SSE Stream Endpoint
+# ========================================
+@alert_bp.route('/stream', methods=['GET'])
+def alert_stream():
+    """
+    SSE (Server-Sent Events) endpoint for real-time alert streaming.
+    Frontend connects to this endpoint and receives push notifications
+    whenever a new alert is generated by the live stream pipeline.
+    Response format (SSE):
+        event: alert
+        data: {"alert_id": "...", "severity": "critical", ...}
+        event: alert_update
+        data: {"alert_id": "...", "status": "confirmed", ...}
+        event: heartbeat
+        data: {"time": 1234567890}
+    """
+    engine = get_alert_engine()
+    subscriber_queue = engine.subscribe()
+    def event_stream():
+        try:
+            # Send initial connection event
+            yield f"event: connected\ndata: {json.dumps({'message': 'Connected to alert stream', 'timestamp': time.time()})}\n\n"
+            # Send any active pending alerts immediately
+            active = engine.get_active_alerts()
+            if active:
+                yield f"event: active_alerts\ndata: {json.dumps(active)}\n\n"
+            heartbeat_interval = 15  # seconds
+            last_heartbeat = time.time()
+            while True:
+                try:
+                    # Wait for alert with timeout (for heartbeat)
+                    alert_data = subscriber_queue.get(timeout=heartbeat_interval)
+                    if alert_data is None:
+                        # Poison pill — disconnect
+                        break
+                    # Determine event type
+                    event_type = alert_data.pop("type", "alert") if isinstance(alert_data, dict) and "type" in alert_data else "alert"
+                    yield f"event: {event_type}\ndata: {json.dumps(alert_data)}\n\n"
+                except queue.Empty:
+                    # Send heartbeat to keep connection alive
+                    now = time.time()
+                    if now - last_heartbeat >= heartbeat_interval:
+                        stats = engine.get_stats()
+                        yield f"event: heartbeat\ndata: {json.dumps({'time': now, 'pending': stats.get('active_pending_count', 0)})}\n\n"
+                        last_heartbeat = now
+        except GeneratorExit:
+            logger.info("SSE client disconnected")
+        except Exception as e:
+            logger.error(f"SSE stream error: {e}")
+        finally:
+            engine.unsubscribe(subscriber_queue)
+    return Response(
+        stream_with_context(event_stream()),
+        mimetype='text/event-stream',
+        headers={
+            'Cache-Control': 'no-cache',
+            'X-Accel-Buffering': 'no',
+            'Connection': 'keep-alive',
+            'Access-Control-Allow-Origin': '*',
+        }
+    )
+# ========================================
+# Alert Actions
+# ========================================
+@alert_bp.route('/confirm/<alert_id>', methods=['POST'])
+def confirm_alert(alert_id):
+    """
+    Confirm an alert as a real threat.
+    Body (JSON):
+        user_id: str (optional)
+        note: str (optional)
+    """
+    try:
+        data = request.json or {}
+        user_id = data.get('user_id', 'anonymous')
+        note = data.get('note', '')
+        engine = get_alert_engine()
+        result = engine.confirm_alert(alert_id, user_id=user_id, note=note)
+        if result:
+            return jsonify({
+                'success': True,
+                'message': f'Alert {alert_id} confirmed as real threat',
+                'alert': result
+            })
+        else:
+            return jsonify({
+                'success': False,
+                'error': f'Alert {alert_id} not found'
+            }), 404
+    except Exception as e:
+        logger.error(f"Error confirming alert: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+@alert_bp.route('/dismiss/<alert_id>', methods=['POST'])
+def dismiss_alert(alert_id):
+    """
+    Dismiss an alert as a false positive.
+    Body (JSON):
+        user_id: str (optional)
+        note: str (optional)
+    """
+    try:
+        data = request.json or {}
+        user_id = data.get('user_id', 'anonymous')
+        note = data.get('note', '')
+        engine = get_alert_engine()
+        result = engine.dismiss_alert(alert_id, user_id=user_id, note=note)
+        if result:
+            return jsonify({
+                'success': True,
+                'message': f'Alert {alert_id} dismissed as false positive',
+                'alert': result
+            })
+        else:
+            return jsonify({
+                'success': False,
+                'error': f'Alert {alert_id} not found'
+            }), 404
+    except Exception as e:
+        logger.error(f"Error dismissing alert: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+# ========================================
+# Alert Queries
+# ========================================
+@alert_bp.route('/active', methods=['GET'])
+def get_active_alerts():
+    """Get all active (pending) alerts"""
+    try:
+        camera_id = request.args.get('camera_id')
+        engine = get_alert_engine()
+        alerts = engine.get_active_alerts(camera_id=camera_id)
+        return jsonify({
+            'success': True,
+            'count': len(alerts),
+            'alerts': alerts
+        })
+    except Exception as e:
+        logger.error(f"Error getting active alerts: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+@alert_bp.route('/history', methods=['GET'])
+def get_alert_history():
+    """
+    Get alert history with optional filters.
+    Query params:
+        limit: int (default 50)
+        camera_id: str (optional)
+        severity: str (optional) - critical, high, medium, low
+        status: str (optional) - pending, confirmed, dismissed
+    """
+    try:
+        limit = int(request.args.get('limit', 50))
+        camera_id = request.args.get('camera_id')
+        severity = request.args.get('severity')
+        status = request.args.get('status')
+        engine = get_alert_engine()
+        # Try to get from DB for persistence across restarts
+        try:
+            query = {}
+            if camera_id:
+                query["camera_id"] = camera_id
+            if severity:
+                query["severity"] = severity
+            if status:
+                query["status"] = status
+            db_alerts = list(
+                engine.alerts_collection.find(query)
+                .sort("timestamp", -1)
+                .limit(limit)
+            )
+            # Convert ObjectId to string
+            for alert in db_alerts:
+                alert["_id"] = str(alert["_id"])
+            return jsonify({
+                'success': True,
+                'count': len(db_alerts),
+                'alerts': db_alerts
+            })
+        except Exception:
+            # Fallback to in-memory
+            alerts = engine.get_alert_history(
+                limit=limit, camera_id=camera_id,
+                severity=severity, status=status
+            )
+            return jsonify({
+                'success': True,
+                'count': len(alerts),
+                'alerts': alerts
+            })
+    except Exception as e:
+        logger.error(f"Error getting alert history: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+@alert_bp.route('/<alert_id>', methods=['GET'])
+def get_alert(alert_id):
+    """Get a single alert by ID"""
+    try:
+        engine = get_alert_engine()
+        alert = engine.get_alert_by_id(alert_id)
+        if alert:
+            return jsonify({'success': True, 'alert': alert})
+        else:
+            return jsonify({'success': False, 'error': 'Alert not found'}), 404
+    except Exception as e:
+        logger.error(f"Error getting alert: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+@alert_bp.route('/stats', methods=['GET'])
+def get_alert_stats():
+    """Get alert statistics"""
+    try:
+        engine = get_alert_engine()
+        stats = engine.get_stats()
+        return jsonify({'success': True, 'stats': stats})
+    except Exception as e:
+        logger.error(f"Error getting alert stats: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+@alert_bp.route('/snapshot/<alert_id>', methods=['GET'])
+def get_alert_snapshot(alert_id):
+    """Get the frame snapshot for an alert (proxied from MinIO)"""
+    try:
+        engine = get_alert_engine()
+        alert = engine.get_alert_by_id(alert_id)
+        if not alert:
+            return jsonify({'success': False, 'error': 'Alert not found'}), 404
+        snapshot_path = alert.get('frame_snapshot_path')
+        if not snapshot_path:
+            return jsonify({'success': False, 'error': 'No snapshot available'}), 404
+        # Generate fresh presigned URL
+        url = engine._get_snapshot_url(snapshot_path)
+        if url:
+            return jsonify({'success': True, 'url': url})
+        else:
+            return jsonify({'success': False, 'error': 'Failed to generate snapshot URL'}), 500
+    except Exception as e:
+        logger.error(f"Error getting snapshot: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500
+# ========================================
+# Test Endpoint (for development)
+# ========================================
+@alert_bp.route('/test', methods=['POST'])
+def test_alert():
+    """
+    Send a test alert for development/testing.
+    Body (JSON):
+        detection_class: str (e.g., 'fire', 'gun', 'fighting')
+        confidence: float (0.0-1.0)
+        camera_id: str (optional, default 'webcam_01')
+    """
+    try:
+        data = request.json or {}
+        detection_class = data.get('detection_class', 'fire')
+        confidence = float(data.get('confidence', 0.85))
+        camera_id = data.get('camera_id', 'webcam_01')
+        engine = get_alert_engine()
+        alert = engine.process_detection(
+            camera_id=camera_id,
+            detection_class=detection_class,
+            confidence=confidence,
+            timestamp=time.time(),
+        )
+        if alert:
+            return jsonify({
+                'success': True,
+                'message': f'Test alert created: {alert.display_name}',
+                'alert': alert.to_sse_payload()
+            })
+        else:
+            return jsonify({
+                'success': False,
+                'message': 'Alert was suppressed (cooldown or low confidence)'
+            })
+    except Exception as e:
+        logger.error(f"Error creating test alert: {e}")
+        return jsonify({'success': False, 'error': str(e)}), 500

app.py ADDED Viewed

The diff for this file is too large to render. See raw diff

behavior_analysis/action_recognition.py ADDED Viewed

	@@ -0,0 +1,381 @@

+# ============================================================
+# FULLY FIXED ACTION RECOGNITION PIPELINE
+# Supports:
+#   - fight_detection.pt (3D ResNet18, state_dict)
+#   - road_accident.pt   (3D ResNet18, state_dict)
+#   - wallclimb.pt       (YOLO, Ultralytics)
+# ============================================================
+from dataclasses import dataclass, asdict
+import multiprocessing as mp
+import torch
+import cv2
+import numpy as np
+import os
+import time
+import json
+import logging
+from typing import List, Optional, Dict, Any
+from torchvision.models.video import r3d_18
+import torch.nn as nn
+# --- YOLO + PyTorch 2.6 compatibility ---
+from ultralytics import YOLO
+import ultralytics
+torch.serialization.add_safe_globals([ultralytics.nn.tasks.DetectionModel])
+# --- Logging ---
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+# ============================================================
+# FIXED MODEL PATHS
+# ============================================================
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+MODEL_PATHS = {
+    "fight_detection":   os.path.join(BASE_DIR, "fight_detection.pt"),
+    "road_accident":     os.path.join(BASE_DIR, "accident_detection.pt"),
+    "wallclimb":         os.path.join(BASE_DIR, "wallclimb.pt"),
+}
+# Define which models are 3D-ResNet (run separately) vs YOLO
+RESNET_MODELS = {"fight_detection", "road_accident"}
+YOLO_MODELS = {"wallclimb"}
+# ============================================================
+#  Dataclasses
+# ============================================================
+@dataclass
+class ActionPrediction:
+    timestamp: float
+    frame_index: int
+    label: str
+    confidence: float
+# ============================================================
+# MODEL LOADER (YOLO or 3D-ResNet)
+# ============================================================
+def load_model(model_path: str, device: torch.device):
+    name = os.path.basename(model_path).lower()
+    # -------- YOLO MODEL (wallclimb) --------
+    if "wall" in name or "yolo" in name:
+        logger.info(f"Loading YOLO model: {model_path}")
+        return YOLO(model_path)
+    # -------- TRY TorchScript --------
+    try:
+        model = torch.jit.load(model_path, map_location=device)
+        logger.info(f"Loaded TorchScript model")
+        model.eval()
+        return model
+    except:
+        pass
+    # -------- 3D-ResNet --------
+    try:
+        ckpt = torch.load(model_path, map_location=device)
+        if isinstance(ckpt, dict):
+            logger.info(f"Loading 3D-ResNet model: {model_path}")
+            model = r3d_18(weights=None)
+            model.fc = nn.Linear(512, 2)
+            state = ckpt.get("state_dict", ckpt)
+            model.load_state_dict(state)
+            model.to(device)
+            model.eval()
+            return model
+    except Exception as e:
+        logger.error(f"3D-ResNet load failed: {e}")
+    raise RuntimeError(f"Unsupported model format: {model_path}")
+# ============================================================
+# FRAME PREPROCESSING FOR 3D-ResNet
+# ============================================================
+def preprocess_clip(frames: List[np.ndarray], device: torch.device, target_size=None):
+    """
+    frames = list of 16 RGB frames
+    output: tensor (1, 3, 16, H, W)
+    """
+    processed = []
+    # default target size used in your training/preprocessing
+    if not target_size:
+        target_size = (112, 112)
+    for f in frames:
+        img = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
+        if target_size:
+            img = cv2.resize(img, (target_size[1], target_size[0]))
+        img = img / 255.0
+        img = img.transpose(2, 0, 1)
+        processed.append(img)
+    clip = np.stack(processed, axis=1)
+    tensor = torch.from_numpy(clip).float().unsqueeze(0).to(device)
+    return tensor
+# ============================================================
+# INTERPRET MODEL OUTPUT
+# ============================================================
+# Map class indices to action labels
+ACTION_LABELS = {
+    0: "fighting",
+    1: "accident",
+    2: "climbing"
+}
+# Per-action confidence thresholds
+ACTION_CONFIDENCE_THRESHOLDS = {
+    "fighting": 0.5,
+    "accident": 0.65,
+    "climbing": 0.8
+}
+def interpret_prediction(model, output, model_name, confidence_threshold=None):
+    """
+    Interpret model output and return one of three actions: "fighting", "accident", or "climbing".
+    If confidence is below 0.5, suppress the prediction and return ("no_action", 0.0).
+    Model-specific handling:
+    - fight_detection: returns "fighting" if class 1, "no_action" for class 0
+    - road_accident: returns "accident" if class 1, "no_action" for class 0
+    - wallclimb (YOLO): returns "climbing" for class 2
+    """
+    # -------- YOLO (wallclimb) --------
+    if hasattr(model, "predict") and isinstance(output, list):
+        logger.info(f"🔍 YOLO prediction for {model_name}")
+        boxes = output[0].boxes
+        if boxes is None or len(boxes) == 0:
+            logger.info("🚫 No boxes detected by YOLO")
+            return ("no_action", 0.0)
+        best = boxes[0]
+        cls_idx = int(best.cls)
+        conf = float(best.conf)
+        # YOLO returns climbing detections
+        label = "climbing" if cls_idx == 0 else "no_action"
+        # Use per-action threshold or provided threshold
+        threshold = confidence_threshold if confidence_threshold is not None else ACTION_CONFIDENCE_THRESHOLDS.get(label, 0.5)
+        logger.info(f"🎯 YOLO detection: class_idx={cls_idx}, confidence={conf:.3f}, threshold={threshold}")
+        # Suppress if confidence < threshold
+        if conf < threshold:
+            logger.info(f"🚫 Confidence {conf:.3f} below threshold {threshold}")
+            return ("no_action", 0.0)
+        logger.info(f"✅ YOLO final result: {label} (conf: {conf:.3f})")
+        return (label, conf)
+    # -------- 3D-ResNet (fight_detection or road_accident) --------
+    if isinstance(output, torch.Tensor):
+        logger.info(f"🔍 3D-ResNet prediction for {model_name}")
+        probs = torch.softmax(output, dim=1)[0]
+        cls_idx = int(torch.argmax(probs).item())
+        conf = float(probs[cls_idx])
+        logger.info(f"📊 Raw probabilities: {probs.tolist()}")
+        # Model-specific mapping (class 0 = negative, class 1 = positive)
+        if "fight" in model_name.lower():
+            label = "fighting" if cls_idx == 1 else "no_action"
+            logger.info(f"🥊 Fight detection: class {cls_idx} -> {label}")
+        elif "accident" in model_name.lower() or "road" in model_name.lower():
+            # match user's naming and capitalization for saved frames
+            label = "Accident" if cls_idx == 1 else "no_action"
+        else:
+            label = "no_action"
+            logger.info(f"❓ Unknown model type, defaulting to no_action")
+        # Use per-action threshold or provided threshold
+        threshold = confidence_threshold if confidence_threshold is not None else ACTION_CONFIDENCE_THRESHOLDS.get(label.lower(), 0.5)
+        logger.info(f"🎯 Predicted class: {cls_idx}, confidence: {conf:.3f}, threshold: {threshold}")
+        # Suppress if confidence < threshold
+        if conf < threshold:
+            logger.info(f"🚫 Confidence {conf:.3f} below threshold {threshold}")
+            return ("no_action", 0.0)
+        logger.info(f"✅ 3D-ResNet final result: {label} (conf: {conf:.3f})")
+        return (label, conf)
+    return ("no_action", 0.0)
+# ============================================================
+# VIDEO PROCESSING
+# ============================================================
+def process_video_with_model(
+        video_path,
+        model_path,
+        output_dir,
+        model_name=None,
+        use_gpu=True,
+        frame_skip=1,
+        target_size=None,
+        annotate=True):
+    device = torch.device("cuda" if (use_gpu and torch.cuda.is_available()) else "cpu")
+    model_name = model_name or os.path.splitext(os.path.basename(model_path))[0]
+    logger.info(f"[{model_name}] Loading model...")
+    model = load_model(model_path, device)
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        logger.error(f"[{model_name}] Could not open video")
+        return
+    fps = cap.get(cv2.CAP_PROP_FPS) or 25
+    frame_buffer = []
+    idx = 0
+    frames_processed = 0
+    predictions = []
+    # annotation folder
+    anno_dir = os.path.join(output_dir, f"{model_name}_annotated")
+    if annotate:
+        os.makedirs(anno_dir, exist_ok=True)
+    start = time.time()
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if idx % frame_skip != 0:
+            idx += 1
+            continue
+        timestamp = idx / fps
+        try:
+            # -------- YOLO --------
+            if hasattr(model, "predict"):
+                output = model.predict(frame, verbose=False)
+                label, conf = interpret_prediction(model, output, model_name)
+            # -------- 3D-ResNet uses CLIPS of 16 frames --------
+            else:
+                frame_buffer.append(frame)
+                if len(frame_buffer) < 16:
+                    idx += 1
+                    continue
+                clip = preprocess_clip(frame_buffer[-16:], device, target_size)
+                with torch.no_grad():
+                    output = model(clip)
+                label, conf = interpret_prediction(model, output, model_name)
+            # Only record and annotate positive detections
+            if label != "no_action":
+                predictions.append(ActionPrediction(timestamp, idx, label, conf))
+                frames_processed += 1
+                # -------- Annotate output --------
+                if annotate:
+                    anno = frame.copy()
+                    cv2.putText(
+                        anno,
+                        f"{label} {conf:.2f}",
+                        (10, 35),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        1.0,
+                        (0, 255, 0),
+                        2,
+                    )
+                    cv2.imwrite(os.path.join(anno_dir, f"{idx:06}.jpg"), anno)
+        except Exception as e:
+            logger.error(f"[{model_name}] Error on frame {idx}: {e}")
+        idx += 1
+    cap.release()
+    # Save results
+    os.makedirs(output_dir, exist_ok=True)
+    json_path = os.path.join(output_dir, f"{os.path.basename(video_path)}__{model_name}.json")
+    with open(json_path, "w") as f:
+        json.dump({
+            "video": video_path,
+            "model": model_path,
+            "frames_processed": frames_processed,
+            "processing_time": time.time() - start,
+            "predictions": [asdict(p) for p in predictions]
+        }, f, indent=2)
+    logger.info(f"[{model_name}] Finished. Saved: {json_path}")
+# ============================================================
+# MULTI-MODEL EXECUTOR (Windows-safe)
+# ============================================================
+def run_models_on_videos(video_paths, model_paths,
+                         output_dir="./action_recognition_outputs",
+                         use_gpu=True, frame_skip=5,
+                         target_size=None, annotate=True):
+    os.makedirs(output_dir, exist_ok=True)
+    processes = []
+    for model_path in model_paths:
+        model_name = os.path.splitext(os.path.basename(model_path))[0]
+        for video in video_paths:
+            p = mp.Process(target=process_video_with_model,
+                           args=(video, model_path, output_dir, model_name,
+                                 use_gpu, frame_skip, target_size, annotate))
+            p.start()
+            processes.append(p)
+            logger.info(f"Started PID={p.pid} → {model_name}")
+    for p in processes:
+        p.join()
+        logger.info(f"PID={p.pid} finished with code {p.exitcode}")
+# ============================================================
+# MAIN
+# ============================================================
+if __name__ == "__main__":
+    mp.set_start_method("spawn", force=True)   # IMPORTANT FIX ON WINDOWS
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--videos", "-v", nargs="+", required=True)
+    parser.add_argument("--models", "-m", nargs="*", default=list(MODEL_PATHS.values()))
+    parser.add_argument("--output", "-o", default="./action_recognition_outputs")
+    parser.add_argument("--no-gpu", action="store_true")
+    parser.add_argument("--frame-skip", type=int, default=5)
+    parser.add_argument("--no-annotate", action="store_true")
+    args = parser.parse_args()
+    run_models_on_videos(
+        video_paths=args.videos,
+        model_paths=args.models,
+        output_dir=args.output,
+        use_gpu=not args.no_gpu,
+        frame_skip=max(1, args.frame_skip),
+        annotate=not args.no_annotate
+    )

behavior_analysis/wallclimb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b51bb0eec57891debefc3f1c1a53299229b716ac8385dfd759cc469058fe04e
+size 5352882

behavior_analysis/yolov11_wallclimb.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ae0285b20cf8ab66e4ddcf47f300c326c1b972e9bfc909e00f2cf6f65202ff3
+size 5359282

behavior_analysis_integrator.py ADDED Viewed

	@@ -0,0 +1,580 @@

+"""
+Behavior Analysis Integrator for DetectifAI
+This module integrates behavior analysis (action recognition) into the video processing pipeline.
+It processes video segments/keyframes to detect suspicious behaviors like fighting, accidents, and climbing.
+Similar to ObjectDetectionIntegrator, it creates behavior-based events and identifies suspicious frames
+for facial recognition processing.
+"""
+import os
+import cv2
+import time
+import logging
+import json
+from typing import List, Dict, Any, Tuple, Optional
+from dataclasses import dataclass, asdict
+import numpy as np
+# Import behavior analysis module
+from behavior_analysis.action_recognition import (
+    load_model, preprocess_clip, interpret_prediction,
+    MODEL_PATHS, RESNET_MODELS, YOLO_MODELS, ActionPrediction
+)
+logger = logging.getLogger(__name__)
+@dataclass
+class BehaviorDetectionResult:
+    """Result of behavior detection on a frame or segment"""
+    frame_path: str
+    timestamp: float
+    frame_index: int
+    behavior_detected: str  # "fighting", "accident", "climbing", or "no_action"
+    confidence: float
+    model_used: str
+    processing_time: float
+@dataclass
+class BehaviorEvent:
+    """Behavior-based event created from detections"""
+    event_id: str
+    behavior_type: str
+    start_timestamp: float
+    end_timestamp: float
+    confidence: float
+    frame_indices: List[int]
+    keyframes: List[str]
+    model_used: str
+    importance_score: float
+class BehaviorAnalysisIntegrator:
+    """Integration layer between behavior analysis and video processing pipeline"""
+    def __init__(self, config):
+        self.config = config
+        self.enabled = getattr(config, 'enable_behavior_analysis', False)
+        logger.info(f"🔍 Initializing BehaviorAnalysisIntegrator - enabled: {self.enabled}")
+        # Initialize models if enabled
+        self.models = {}
+        self.device = None
+        if self.enabled:
+            try:
+                import torch
+                self.device = torch.device("cuda" if (torch.cuda.is_available() and getattr(config, 'use_gpu_acceleration', True)) else "cpu")
+                # Load all available models
+                logger.info(f"🔧 Attempting to load models from: {MODEL_PATHS}")
+                for model_name, model_path in MODEL_PATHS.items():
+                    logger.info(f"📁 Checking model {model_name} at: {model_path}")
+                    if os.path.exists(model_path):
+                        try:
+                            logger.info(f"⏳ Loading {model_name}...")
+                            self.models[model_name] = load_model(model_path, self.device)
+                            logger.info(f"✅ Loaded behavior analysis model: {model_name}")
+                        except Exception as e:
+                            logger.error(f"❌ Failed to load {model_name}: {e}")
+                    else:
+                        logger.error(f"❌ Model file not found: {model_path}")
+                if not self.models:
+                    logger.warning("⚠️ No behavior analysis models loaded, disabling behavior analysis")
+                    self.enabled = False
+                else:
+                    logger.info(f"✅ Behavior analysis initialized with {len(self.models)} models")
+            except ImportError:
+                logger.warning("⚠️ PyTorch not available, disabling behavior analysis")
+                self.enabled = False
+        else:
+            logger.info("Behavior analysis disabled in config")
+    def detect_behavior_in_frame(self, frame_path: str, timestamp: float, frame_index: int = 0) -> List[BehaviorDetectionResult]:
+        """
+        Detect behaviors in a single frame
+        Args:
+            frame_path: Path to frame image
+            timestamp: Timestamp in seconds
+            frame_index: Frame index number
+        Returns:
+            List of BehaviorDetectionResult objects (one per model)
+        """
+        if not self.enabled or not self.models:
+            return []
+        if not os.path.exists(frame_path):
+            logger.warning(f"Frame not found: {frame_path}")
+            return []
+        results = []
+        frame = cv2.imread(frame_path)
+        if frame is None:
+            logger.warning(f"Failed to read frame: {frame_path}")
+            return []
+        for model_name, model in self.models.items():
+            try:
+                start_time = time.time()
+                # YOLO models (wallclimb)
+                if model_name in YOLO_MODELS:
+                    output = model.predict(frame, verbose=False)
+                    # Use default per-action thresholds from ACTION_CONFIDENCE_THRESHOLDS
+                    label, conf = interpret_prediction(model, output, model_name)
+                    logger.info(f"🔍 YOLO model {model_name} prediction: {label} (confidence: {conf:.3f})")
+                    if label != "no_action":
+                        result = BehaviorDetectionResult(
+                            frame_path=frame_path,
+                            timestamp=timestamp,
+                            frame_index=frame_index,
+                            behavior_detected=label,
+                            confidence=conf,
+                            model_used=model_name,
+                            processing_time=time.time() - start_time
+                        )
+                        results.append(result)
+                # 3D-ResNet models need clips of 16 frames
+                # For single frame detection, we'll need to handle this differently
+                # For now, skip 3D-ResNet models for single frame detection
+                # They should be used with video segments instead
+            except Exception as e:
+                logger.error(f"Error detecting behavior with {model_name}: {e}")
+                continue
+        return results
+    def detect_behavior_in_segment(self, video_path: str, start_time: float, end_time: float,
+                                   frame_indices: List[int] = None) -> List[BehaviorDetectionResult]:
+        """
+        Detect behaviors in a video segment (for 3D-ResNet models that need temporal context)
+        Args:
+            video_path: Path to video file
+            start_time: Start timestamp in seconds
+            end_time: End timestamp in seconds
+            frame_indices: Optional list of frame indices to process
+        Returns:
+            List of BehaviorDetectionResult objects
+        """
+        if not self.enabled or not self.models:
+            return []
+        if not os.path.exists(video_path):
+            logger.warning(f"Video not found: {video_path}")
+            return []
+        results = []
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            logger.error(f"Could not open video: {video_path}")
+            return []
+        fps = cap.get(cv2.CAP_PROP_FPS) or 25
+        start_frame = int(start_time * fps)
+        end_frame = int(end_time * fps)
+        # Read frames for the segment
+        frame_buffer = []
+        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+        for idx in range(start_frame, min(end_frame, int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))):
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_buffer.append(frame)
+        cap.release()
+        # Calculate mid frame index
+        mid_frame_idx = (start_frame + end_frame) // 2 if end_frame > start_frame else start_frame
+        return self._process_frame_buffer(frame_buffer, start_time, end_time, mid_frame_idx, video_path)
+    def detect_behavior_in_segment_from_buffer(self, frame_buffer: List[np.ndarray],
+                                               start_time: float, end_time: float,
+                                               frame_indices: List[int] = None) -> List[BehaviorDetectionResult]:
+        """
+        Detect behaviors in a frame buffer (for live streams)
+        Args:
+            frame_buffer: List of frames (numpy arrays)
+            start_time: Start timestamp in seconds
+            end_time: End timestamp in seconds
+            frame_indices: Optional list of frame indices
+        Returns:
+            List of BehaviorDetectionResult objects
+        """
+        if not self.enabled or not self.models:
+            return []
+        if len(frame_buffer) < 16:
+            logger.debug(f"Frame buffer too short ({len(frame_buffer)} frames), skipping 3D-ResNet models")
+            return []
+        # Use last 16 frames from buffer
+        frames_to_process = frame_buffer[-16:] if len(frame_buffer) >= 16 else frame_buffer
+        mid_frame_idx = len(frame_buffer) // 2 if frame_indices is None else (frame_indices[len(frame_indices) // 2] if frame_indices else len(frame_buffer) // 2)
+        return self._process_frame_buffer(frames_to_process, start_time, end_time, mid_frame_idx, "live_stream")
+    def _process_frame_buffer(self, frame_buffer: List[np.ndarray], start_time: float,
+                             end_time: float, frame_index: int, video_path: str = "live_stream") -> List[BehaviorDetectionResult]:
+        """
+        Process frame buffer with behavior analysis models
+        Args:
+            frame_buffer: List of frames (numpy arrays)
+            start_time: Start timestamp
+            end_time: End timestamp
+            frame_index: Frame index for result
+            video_path: Path to video file or "live_stream" for live streams
+        Returns:
+            List of BehaviorDetectionResult objects
+        """
+        if len(frame_buffer) < 16:
+            return []
+        results = []
+        # Process with 3D-ResNet models (need 16-frame clips)
+        for model_name, model in self.models.items():
+            if model_name not in RESNET_MODELS:
+                continue
+            try:
+                start_time_proc = time.time()
+                # Process last 16 frames from buffer
+                clip = preprocess_clip(frame_buffer[-16:], self.device)
+                import torch
+                model.eval()
+                with torch.no_grad():
+                    output = model(clip)
+                # Use default per-action thresholds from ACTION_CONFIDENCE_THRESHOLDS
+                label, conf = interpret_prediction(model, output, model_name)
+                logger.info(f"🔍 Model {model_name} prediction: {label} (confidence: {conf:.3f})")
+                if label != "no_action":
+                    # Use middle timestamp of the segment
+                    mid_timestamp = (start_time + end_time) / 2
+                    result = BehaviorDetectionResult(
+                        frame_path="live_stream",  # Live stream identifier
+                        timestamp=mid_timestamp,
+                        frame_index=frame_index,
+                        behavior_detected=label,
+                        confidence=conf,
+                        model_used=model_name,
+                        processing_time=time.time() - start_time_proc
+                    )
+                    results.append(result)
+            except Exception as e:
+                logger.error(f"Error detecting behavior with {model_name} in segment: {e}")
+                continue
+        return results
+    def detect_behavior_in_keyframes(self, keyframes: List, video_path: str = None) -> List[BehaviorDetectionResult]:
+        """
+        Detect behaviors in keyframes
+        Args:
+            keyframes: List of KeyframeResult objects
+            video_path: Optional path to video file (needed for 3D-ResNet models)
+        Returns:
+            List of BehaviorDetectionResult objects
+        """
+        if not self.enabled:
+            logger.info("🚫 Behavior analysis disabled, skipping")
+            return []
+        logger.info(f"🎬 Starting behavior detection on {len(keyframes)} keyframes")
+        logger.info(f"📹 Video path provided: {video_path}")
+        logger.info(f"🤖 Available models: {list(self.models.keys())}")
+        logger.info(f"🔍 Running behavior analysis on {len(keyframes)} keyframes...")
+        all_results = []
+        # Process YOLO models (single frame) - wallclimb
+        yolo_models_available = [m for m in self.models.keys() if m in YOLO_MODELS]
+        logger.info(f"🎯 Processing YOLO models (single frame): {yolo_models_available}")
+        for i, keyframe in enumerate(keyframes):
+            # Extract frame path and timestamp
+            frame_path = None
+            timestamp = 0.0
+            frame_index = i
+            if hasattr(keyframe, 'frame_data'):
+                frame_path = keyframe.frame_data.frame_path if hasattr(keyframe.frame_data, 'frame_path') else None
+                timestamp = keyframe.frame_data.timestamp if hasattr(keyframe.frame_data, 'timestamp') else 0.0
+            elif hasattr(keyframe, 'frame_path'):
+                frame_path = keyframe.frame_path
+                timestamp = getattr(keyframe, 'timestamp', 0.0)
+            if frame_path and os.path.exists(frame_path):
+                # Detect with YOLO models (single frame) - wallclimb
+                frame_results = self.detect_behavior_in_frame(frame_path, timestamp, frame_index)
+                all_results.extend(frame_results)
+        # Process 3D-ResNet models (need 16-frame clips) - fighting, road_accident
+        if video_path and os.path.exists(video_path) and RESNET_MODELS:
+            resnet_models_available = [m for m in self.models.keys() if m in RESNET_MODELS]
+            logger.info(f"🎬 Processing 3D-ResNet models using video segments...")
+            logger.info(f"📊 Available ResNet models: {resnet_models_available}")
+            logger.info(f"📊 Total ResNet models to process: {len(resnet_models_available)}")
+            # Group keyframes into temporal segments for 3D-ResNet processing
+            # Process segments of ~1 second (16 frames at ~30fps) around each keyframe
+            segment_window = 1.0  # 1 second window
+            processed_segments = set()  # Track processed segments to avoid duplicates
+            for keyframe in keyframes:
+                timestamp = 0.0
+                if hasattr(keyframe, 'frame_data'):
+                    timestamp = keyframe.frame_data.timestamp if hasattr(keyframe.frame_data, 'timestamp') else 0.0
+                elif hasattr(keyframe, 'timestamp'):
+                    timestamp = getattr(keyframe, 'timestamp', 0.0)
+                if timestamp > 0:
+                    # Create segment around this keyframe
+                    start_time = max(0, timestamp - segment_window / 2)
+                    end_time = timestamp + segment_window / 2
+                    # Round to avoid processing same segment multiple times
+                    segment_key = (int(start_time * 10), int(end_time * 10))
+                    if segment_key not in processed_segments:
+                        processed_segments.add(segment_key)
+                        try:
+                            logger.info(f"🎥 Processing video segment: {start_time:.1f}s - {end_time:.1f}s")
+                            # Process segment with 3D-ResNet models
+                            segment_results = self.detect_behavior_in_segment(
+                                video_path=video_path,
+                                start_time=start_time,
+                                end_time=end_time,
+                                frame_indices=None
+                            )
+                            logger.info(f"📈 Segment results: {len(segment_results)} detections")
+                            for result in segment_results:
+                                logger.info(f"🔍 Detected: {result.behavior_detected} (conf: {result.confidence:.3f})")
+                            all_results.extend(segment_results)
+                        except Exception as e:
+                            logger.error(f"❌ Error processing segment {start_time:.1f}s-{end_time:.1f}s: {e}")
+                            continue
+        logger.info(f"✅ Behavior analysis complete: {len(all_results)} behaviors detected")
+        return all_results
+    def create_behavior_events(self, detection_results: List[BehaviorDetectionResult],
+                              temporal_window: float = 5.0) -> List[BehaviorEvent]:
+        """
+        Create behavior-based events from detection results
+        Args:
+            detection_results: List of BehaviorDetectionResult objects
+            temporal_window: Time window in seconds for grouping detections
+        Returns:
+            List of BehaviorEvent objects
+        """
+        if not detection_results:
+            return []
+        # Group detections by behavior type and temporal proximity
+        events = []
+        sorted_results = sorted(detection_results, key=lambda x: x.timestamp)
+        current_event = None
+        event_id_counter = 0
+        for result in sorted_results:
+            if result.behavior_detected == "no_action":
+                continue
+            if current_event is None:
+                # Start new event
+                event_id_counter += 1
+                current_event = {
+                    'event_id': f"behavior_{result.behavior_detected}_{event_id_counter}",
+                    'behavior_type': result.behavior_detected,
+                    'start_timestamp': result.timestamp,
+                    'end_timestamp': result.timestamp,
+                    'confidences': [result.confidence],
+                    'frame_indices': [result.frame_index],
+                    'keyframes': [result.frame_path],
+                    'model_used': result.model_used
+                }
+            elif (result.behavior_detected == current_event['behavior_type'] and
+                  result.timestamp - current_event['end_timestamp'] <= temporal_window):
+                # Extend current event
+                current_event['end_timestamp'] = result.timestamp
+                current_event['confidences'].append(result.confidence)
+                current_event['frame_indices'].append(result.frame_index)
+                current_event['keyframes'].append(result.frame_path)
+            else:
+                # Finalize current event and start new one
+                avg_confidence = sum(current_event['confidences']) / len(current_event['confidences'])
+                importance = avg_confidence * (current_event['end_timestamp'] - current_event['start_timestamp'] + 1)
+                behavior_event = BehaviorEvent(
+                    event_id=current_event['event_id'],
+                    behavior_type=current_event['behavior_type'],
+                    start_timestamp=current_event['start_timestamp'],
+                    end_timestamp=current_event['end_timestamp'],
+                    confidence=avg_confidence,
+                    frame_indices=current_event['frame_indices'],
+                    keyframes=current_event['keyframes'],
+                    model_used=current_event['model_used'],
+                    importance_score=importance
+                )
+                events.append(behavior_event)
+                # Start new event
+                event_id_counter += 1
+                current_event = {
+                    'event_id': f"behavior_{result.behavior_detected}_{event_id_counter}",
+                    'behavior_type': result.behavior_detected,
+                    'start_timestamp': result.timestamp,
+                    'end_timestamp': result.timestamp,
+                    'confidences': [result.confidence],
+                    'frame_indices': [result.frame_index],
+                    'keyframes': [result.frame_path],
+                    'model_used': result.model_used
+                }
+        # Finalize last event
+        if current_event:
+            avg_confidence = sum(current_event['confidences']) / len(current_event['confidences'])
+            importance = avg_confidence * (current_event['end_timestamp'] - current_event['start_timestamp'] + 1)
+            behavior_event = BehaviorEvent(
+                event_id=current_event['event_id'],
+                behavior_type=current_event['behavior_type'],
+                start_timestamp=current_event['start_timestamp'],
+                end_timestamp=current_event['end_timestamp'],
+                confidence=avg_confidence,
+                frame_indices=current_event['frame_indices'],
+                keyframes=current_event['keyframes'],
+                model_used=current_event['model_used'],
+                importance_score=importance
+            )
+            events.append(behavior_event)
+        logger.info(f"✅ Created {len(events)} behavior-based events")
+        return events
+    def process_keyframes_with_behavior_analysis(self, keyframes: List, video_path: str = None) -> Tuple[List[BehaviorDetectionResult], List[BehaviorEvent]]:
+        """
+        Process keyframes with behavior analysis and create behavior-based events
+        Args:
+            keyframes: List of KeyframeResult objects
+            video_path: Optional path to video file (needed for 3D-ResNet models)
+        Returns:
+            Tuple of (detection_results, behavior_events)
+        """
+        if not self.enabled:
+            logger.info("🚫 Behavior analysis disabled, skipping...")
+            return [], []
+        logger.info("🚀 ===== STARTING BEHAVIOR ANALYSIS INTEGRATION =====")
+        logger.info(f"📊 Input: {len(keyframes)} keyframes, video_path: {video_path}")
+        logger.info(f"🤖 Loaded models: {list(self.models.keys())}")
+        logger.info(f"⚙️ Confidence thresholds: fighting={getattr(self.config, 'fighting_detection_confidence', 0.5)}, accident={getattr(self.config, 'accident_detection_confidence', 0.6)}, climbing={getattr(self.config, 'climbing_detection_confidence', 0.7)}")
+        logger.info("🔍 Starting behavior analysis integration")
+        # Run behavior detection on keyframes (with video_path for 3D-ResNet models)
+        detection_results = self.detect_behavior_in_keyframes(keyframes, video_path=video_path)
+        # Create behavior-based events
+        temporal_window = getattr(self.config, 'behavior_event_temporal_window', 5.0)
+        logger.info(f"📅 Creating behavior events with temporal window: {temporal_window}s")
+        logger.info(f"📊 Total detections to process: {len(detection_results)}")
+        positive_detections = [r for r in detection_results if r.behavior_detected != "no_action"]
+        logger.info(f"✅ Positive detections: {len(positive_detections)}")
+        for detection in positive_detections:
+            logger.info(f"   🎯 {detection.behavior_detected} at {detection.timestamp:.1f}s (conf: {detection.confidence:.3f})")
+        behavior_events = self.create_behavior_events(detection_results, temporal_window)
+        # Store detection metadata
+        if hasattr(self.config, 'output_base_dir') and detection_results:
+            detection_metadata = {
+                'total_keyframes': len(keyframes),
+                'frames_with_behaviors': len([r for r in detection_results if r.behavior_detected != "no_action"]),
+                'behaviors_detected': {
+                    'fighting': len([r for r in detection_results if r.behavior_detected == "fighting"]),
+                    'accident': len([r for r in detection_results if r.behavior_detected == "accident"]),
+                    'climbing': len([r for r in detection_results if r.behavior_detected == "climbing"])
+                },
+                'total_events': len(behavior_events),
+                'detection_summary': [asdict(r) for r in detection_results[:10]]  # First 10 for summary
+            }
+            metadata_path = os.path.join(self.config.output_base_dir, 'behavior_analysis_metadata.json')
+            os.makedirs(os.path.dirname(metadata_path), exist_ok=True)
+            with open(metadata_path, 'w') as f:
+                json.dump(detection_metadata, f, indent=2, default=str)
+            logger.info(f"📊 Behavior analysis metadata saved: {metadata_path}")
+        logger.info("🏁 ===== BEHAVIOR ANALYSIS INTEGRATION COMPLETE =====")
+        logger.info(f"📈 Summary:")
+        logger.info(f"   📊 Total detections: {len(detection_results)}")
+        logger.info(f"   ✅ Positive detections: {len([r for r in detection_results if r.behavior_detected != 'no_action'])}")
+        logger.info(f"   📅 Events created: {len(behavior_events)}")
+        for event in behavior_events:
+            logger.info(f"   🎬 Event: {event.behavior_type} ({event.start_timestamp:.1f}s-{event.end_timestamp:.1f}s, conf: {event.confidence:.3f})")
+        return detection_results, behavior_events
+    def get_suspicious_frames(self, detection_results: List[BehaviorDetectionResult]) -> List[BehaviorDetectionResult]:
+        """
+        Get frames with suspicious behaviors (for facial recognition processing)
+        Args:
+            detection_results: List of BehaviorDetectionResult objects
+        Returns:
+            List of suspicious BehaviorDetectionResult objects
+        """
+        suspicious = [r for r in detection_results if r.behavior_detected != "no_action"]
+        logger.info(f"🔍 Identified {len(suspicious)} suspicious frames from behavior analysis")
+        return suspicious
+    def get_behavior_analysis_summary(self) -> Dict[str, Any]:
+        """Get summary statistics of behavior analysis"""
+        return {
+            'enabled': self.enabled,
+            'models_loaded': list(self.models.keys()) if self.models else [],
+            'device': str(self.device) if self.device else None
+        }

config.py ADDED Viewed

	@@ -0,0 +1,369 @@

+"""
+Configuration settings for the Video Event Detection and Preprocessing Pipeline.
+This file contains all configurable parameters that can be tweaked to control:
+- Keyframe extraction sensitivity
+- Event detection thresholds
+- Video quality settings
+- Output formats and paths
+"""
+import os
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+@dataclass
+class VideoProcessingConfig:
+    """Main configuration class for video processing pipeline"""
+    # ===== KEYFRAME EXTRACTION PARAMETERS =====
+    # Control how many keyframes are extracted
+    # Base quality threshold (0.1-0.3): Lower = more keyframes, Higher = fewer but better quality
+    base_quality_threshold: float = 0.15
+    # Motion detection threshold (0.005-0.02): Lower = more motion-sensitive, Higher = only significant motion
+    motion_threshold: float = 0.008
+    # Burst sampling rate (1-10): Higher = more frames during high activity periods
+    burst_sampling_rate: int = 3
+    # Frame sampling interval in seconds (0.5-3.0): Lower = more frequent sampling
+    frame_sampling_interval: float = 1.0
+    # ===== EVENT DETECTION PARAMETERS =====
+    # Control how events are detected and prioritized
+    # Event importance threshold (0.2-0.5): Lower = more events detected
+    event_importance_threshold: float = 0.25
+    # Burst activity weight (1.5-3.0): Higher = burst frames get higher priority
+    burst_weight: float = 2.5
+    # Temporal clustering window in seconds (10-30): Frames within this window are clustered
+    temporal_clustering_window: float = 15.0
+    # Scene change detection threshold (0.01-0.05): Lower = more scene changes detected
+    scene_change_threshold: float = 0.02
+    # ===== VIDEO SEGMENTATION PARAMETERS =====
+    # Control how video is divided into segments
+    # Segment duration in seconds (30-60): Length of each temporal segment
+    segment_duration: float = 45.0
+    # Keyframes per segment (3-8): How many keyframes to extract per segment
+    keyframes_per_segment: int = 5
+    # ===== HIGHLIGHT REEL PARAMETERS =====
+    # Control the final summary video creation
+    # Maximum summary duration in seconds (15-60): Total length of highlight reel
+    max_summary_duration: float = 25.0
+    # Frame display duration in seconds (0.5-3.0): How long each frame is shown
+    frame_display_duration: float = 1.5
+    # Maximum frames in summary (10-30): Total number of frames in highlight reel
+    max_summary_frames: int = 18
+    # Summary video FPS (0.4-1.0): Playback speed of summary
+    summary_fps: float = 0.6
+    # ===== DEDUPLICATION PARAMETERS =====
+    # Control duplicate frame removal
+    # Similarity threshold (0.80-0.95): Higher = stricter deduplication
+    similarity_threshold: float = 0.85
+    # Minimum time gap between frames in seconds (1-5): Prevents frames too close in time
+    min_frame_gap: float = 2.0
+    # ===== COMPRESSION PARAMETERS =====
+    # Control video compression settings
+    # Output resolution (720p, 1080p, or original)
+    output_resolution: str = "720p"
+    # Compression quality (18-28): Lower = better quality, larger files
+    compression_crf: int = 23
+    # Compression preset (ultrafast, fast, medium, slow): Affects encoding speed vs efficiency
+    compression_preset: str = "fast"
+    # ===== ADAPTIVE ENHANCEMENT PARAMETERS =====
+    # Control image enhancement
+    # Enable adaptive histogram equalization
+    enable_clahe: bool = True
+    # CLAHE clip limit (1.0-4.0): Higher = more contrast enhancement
+    clahe_clip_limit: float = 2.0
+    # Enable denoising
+    enable_denoising: bool = True
+    # Denoising strength (3-10): Higher = more denoising
+    denoise_strength: int = 5
+    # ===== OUTPUT SETTINGS =====
+    # Control output files and formats
+    # Base output directory
+    output_base_dir: str = "video_processing_outputs"
+    # Enable various output formats
+    generate_json_reports: bool = True
+    generate_html_gallery: bool = True
+    generate_compressed_video: bool = True
+    generate_segments: bool = True
+    generate_highlight_reels: bool = False  # Disabled for security focus - saves processing time
+    # Video output format (mp4, avi, mov)
+    video_output_format: str = "mp4"
+    # ===== ADVANCED PARAMETERS =====
+    # Fine-tuning for specific use cases
+    # Enable GPU acceleration if available
+    use_gpu_acceleration: bool = True
+    # Enable face detection for human-centric events
+    enable_face_detection: bool = False
+    # Enable object detection for context-aware processing
+    enable_object_detection: bool = False
+    # Enable facial recognition for suspicious person tracking (FULL implementation with FAISS + MongoDB)
+    enable_facial_recognition: bool = True
+    # Face recognition confidence threshold (0.5-0.95)
+    face_recognition_confidence: float = 0.7
+    # Face detection model to use (MTCNN for detection, FaceNet for embeddings)
+    face_detection_model: str = "mtcnn"
+    # Face recognition model to use (InceptionResnetV1 with FAISS similarity search)
+    face_recognition_model: str = "facenet_faiss"
+    # Enable suspicious person database and tracking
+    suspicious_person_tracking: bool = True
+    # Face database settings
+    face_database_enabled: bool = True
+    # ===== OBJECT DETECTION PARAMETERS =====
+    # Configuration for fire, knife, gun detection
+    # Models directory path (relative to backend directory when running from project root)
+    models_dir: str = os.path.join(os.path.dirname(__file__), "models")
+    # Object detection confidence threshold (0.1-0.9)
+    object_detection_confidence: float = 0.5
+    # Temporal window for grouping object detections into events (seconds)
+    object_event_temporal_window: float = 5.0
+    # Enable annotation of detected objects on keyframes
+    enable_object_annotation: bool = True
+    # Object detection specific thresholds
+    fire_detection_confidence: float = 0.7     # Lower threshold for fire (safety critical)
+    weapon_detection_confidence: float = 0.7   # Higher threshold for weapons (reduce false positives)
+    # Enable specific object types
+    enable_fire_detection: bool = True
+    enable_weapon_detection: bool = True
+    # Object event importance multiplier
+    object_event_importance_multiplier: float = 2.0
+    # ===== BEHAVIOR ANALYSIS PARAMETERS =====
+    # Configuration for behavior/action recognition (fighting, accidents, climbing)
+    # Enable behavior analysis
+    enable_behavior_analysis: bool = False
+    # Behavior analysis models directory
+    behavior_models_dir: str = os.path.join(os.path.dirname(__file__), "behavior_analysis")
+    # Behavior detection confidence thresholds per action type (0.3-0.8)
+    fighting_detection_confidence: float = 0.5
+    accident_detection_confidence: float = 0.6
+    climbing_detection_confidence: float = 0.7
+    # Temporal window for grouping behavior detections into events (seconds)
+    behavior_event_temporal_window: float = 5.0
+    # Behavior event importance multiplier
+    behavior_event_importance_multiplier: float = 2.5
+    # Enable specific behavior types
+    enable_fighting_detection: bool = True
+    enable_accident_detection: bool = True
+    enable_climbing_detection: bool = True
+    # ===== VIDEO CAPTIONING PARAMETERS =====
+    # Configuration for video frame captioning with vision-language models
+    # Enable video captioning
+    enable_video_captioning: bool = False
+    # Vision model for caption generation
+    captioning_vision_model: str = "Salesforce/blip-image-captioning-base"
+    # Embedding model for semantic search
+    captioning_embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2"
+    # Device for captioning models (cpu or cuda)
+    captioning_device: str = "cpu"
+    # Batch size for captioning (increased for better throughput)
+    captioning_batch_size: int = 8
+    # Database paths for caption storage
+    captioning_db_path: str = None  # Will use default if None
+    captioning_vector_db_path: str = "./video_captioning_store"
+    # Enable async processing for captioning
+    captioning_async: bool = True
+    # Parallel processing workers (1-8): More workers = faster but more memory
+    num_workers: int = 4
+    def __post_init__(self):
+        """Validate configuration parameters"""
+        # Ensure output directory exists
+        os.makedirs(self.output_base_dir, exist_ok=True)
+        # Validate thresholds
+        assert 0.1 <= self.base_quality_threshold <= 0.3, "Quality threshold must be between 0.1-0.3"
+        assert 0.005 <= self.motion_threshold <= 0.02, "Motion threshold must be between 0.005-0.02"
+        assert 0.8 <= self.similarity_threshold <= 0.95, "Similarity threshold must be between 0.8-0.95"
+# ===== PRESET CONFIGURATIONS =====
+def get_high_recall_config() -> VideoProcessingConfig:
+    """Configuration optimized for capturing more events (more keyframes)"""
+    return VideoProcessingConfig(
+        base_quality_threshold=0.12,      # Lower quality threshold
+        motion_threshold=0.005,           # Very sensitive motion detection
+        event_importance_threshold=0.20,   # Lower event threshold
+        max_summary_frames=25,            # More frames in summary
+        frame_sampling_interval=0.8,      # More frequent sampling
+        temporal_clustering_window=20.0,   # Wider clustering window
+        burst_weight=3.0,                 # Higher burst priority
+        keyframes_per_segment=6           # More keyframes per segment
+    )
+def get_high_precision_config() -> VideoProcessingConfig:
+    """Configuration optimized for quality over quantity (fewer but better keyframes)"""
+    return VideoProcessingConfig(
+        base_quality_threshold=0.20,      # Higher quality threshold
+        motion_threshold=0.015,           # Less sensitive motion detection
+        event_importance_threshold=0.35,   # Higher event threshold
+        max_summary_frames=12,            # Fewer frames in summary
+        frame_sampling_interval=1.5,      # Less frequent sampling
+        temporal_clustering_window=10.0,   # Tighter clustering
+        burst_weight=2.0,                 # Moderate burst priority
+        keyframes_per_segment=4           # Fewer keyframes per segment
+    )
+def get_balanced_config() -> VideoProcessingConfig:
+    """Balanced configuration for general use"""
+    return VideoProcessingConfig()  # Uses default values
+# Removed robbery detection config - using security_focused_config instead
+def get_security_focused_config() -> VideoProcessingConfig:
+    """Configuration optimized specifically for security and threat detection"""
+    return VideoProcessingConfig(
+        base_quality_threshold=0.12,
+        motion_threshold=0.005,           # Very sensitive
+        event_importance_threshold=0.20,
+        burst_weight=3.0,                 # Highest priority for burst activity
+        temporal_clustering_window=20.0,
+        max_summary_frames=25,
+        frame_display_duration=2.0,
+        similarity_threshold=0.82,
+        enable_clahe=True,
+        clahe_clip_limit=3.0,
+        # Enhanced object detection for security
+        enable_object_detection=True,
+        object_detection_confidence=0.4,  # Lower threshold for better recall
+        fire_detection_confidence=0.5,    # Very sensitive for fire
+        weapon_detection_confidence=0.7,  # Higher threshold for weapons to reduce false positives
+        object_event_temporal_window=8.0, # Longer window for complex events
+        enable_object_annotation=True,
+        object_event_importance_multiplier=3.0,  # High importance for security events
+        # Enhanced behavior analysis for security
+        enable_behavior_analysis=True,
+        fighting_detection_confidence=0.5,
+        accident_detection_confidence=0.6,
+        climbing_detection_confidence=0.7,
+        behavior_event_temporal_window=8.0,  # Longer window for complex events
+        behavior_event_importance_multiplier=3.0,  # High importance for security events
+        # Video captioning for semantic search
+        enable_video_captioning=True,
+        captioning_device="cpu"  # Change to "cuda" if GPU available
+    )
+# ===== PARAMETER ADJUSTMENT GUIDE =====
+PARAMETER_GUIDE = {
+    "More Keyframes": {
+        "base_quality_threshold": "Decrease (0.10-0.12)",
+        "motion_threshold": "Decrease (0.005-0.008)",
+        "event_importance_threshold": "Decrease (0.20-0.25)",
+        "max_summary_frames": "Increase (20-30)",
+        "keyframes_per_segment": "Increase (6-8)",
+        "frame_sampling_interval": "Decrease (0.5-1.0)"
+    },
+    "Fewer Keyframes": {
+        "base_quality_threshold": "Increase (0.18-0.25)",
+        "motion_threshold": "Increase (0.012-0.020)",
+        "event_importance_threshold": "Increase (0.30-0.40)",
+        "max_summary_frames": "Decrease (8-15)",
+        "keyframes_per_segment": "Decrease (3-4)",
+        "frame_sampling_interval": "Increase (1.5-2.5)"
+    },
+    "Better Quality": {
+        "base_quality_threshold": "Increase (0.18-0.25)",
+        "compression_crf": "Decrease (18-20)",
+        "enable_clahe": "True",
+        "enable_denoising": "True",
+        "output_resolution": "'1080p'"
+    },
+    "Faster Processing": {
+        "compression_preset": "'ultrafast'",
+        "num_workers": "Increase (6-8)",
+        "enable_face_detection": "False",
+        "enable_object_detection": "False",
+        "keyframes_per_segment": "Decrease (3-4)"
+    },
+    "More Sensitive Event Detection": {
+        "motion_threshold": "Decrease (0.005-0.008)",
+        "burst_weight": "Increase (2.5-3.0)",
+        "event_importance_threshold": "Decrease (0.20-0.25)",
+        "temporal_clustering_window": "Increase (15-25)"
+    }
+}
+def print_parameter_guide():
+    """Print parameter adjustment guide"""
+    print("🔧 VIDEO PROCESSING PARAMETER ADJUSTMENT GUIDE")
+    print("=" * 60)
+    for goal, params in PARAMETER_GUIDE.items():
+        print(f"\n🎯 {goal}:")
+        for param, adjustment in params.items():
+            print(f"   • {param}: {adjustment}")
+    print(f"\n📝 Available Preset Configurations:")
+    print(f"   • get_high_recall_config() - More keyframes, sensitive detection")
+    print(f"   • get_high_precision_config() - Fewer but higher quality keyframes")
+    print(f"   • get_balanced_config() - General purpose settings")
+    print(f"   • get_security_focused_config() - Optimized for security/threat detection")
+if __name__ == "__main__":
+    print_parameter_guide()

core/video_processing.py ADDED Viewed

	@@ -0,0 +1,384 @@

+"""
+Optimized Video Processing for DetectifAI
+This module contains optimized video processing components focusing on:
+- Efficient keyframe extraction for security footage
+- Selective frame enhancement only when needed
+- Memory-optimized processing for large surveillance videos
+"""
+import cv2
+import numpy as np
+import os
+import uuid
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass
+import time
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+@dataclass
+class FrameData:
+    """Data structure for frame information"""
+    frame_path: str
+    timestamp: float
+    frame_number: int
+    quality_score: float
+    motion_score: float
+    burst_active: bool
+    enhancement_applied: bool
+    face_count: int = 0
+    object_count: int = 0
+@dataclass
+class KeyframeResult:
+    """Result structure for keyframe extraction"""
+    frame_data: FrameData
+    keyframe_score: float
+    selection_reason: str
+class OptimizedFrameEnhancer:
+    """Optimized frame enhancement for DetectifAI - only enhance when necessary"""
+    def __init__(self, enable_clahe: bool = True, clahe_clip_limit: float = 2.0):
+        self.enable_clahe = enable_clahe
+        # Initialize CLAHE (skip denoising for performance)
+        if enable_clahe:
+            self.clahe = cv2.createCLAHE(clipLimit=clahe_clip_limit, tileGridSize=(8, 8))
+        logger.info(f"OptimizedFrameEnhancer initialized - CLAHE: {enable_clahe}")
+    def enhance_frame_if_needed(self, frame: np.ndarray) -> Tuple[np.ndarray, bool]:
+        """
+        Enhance frame only if quality is poor (DetectifAI optimization)
+        Args:
+            frame: Input frame as numpy array
+        Returns:
+            Tuple of (enhanced_frame, enhancement_applied)
+        """
+        try:
+            # Quick quality assessment
+            if not self._needs_enhancement(frame):
+                return frame, False
+            enhanced = frame.copy()
+            # Apply CLAHE only to L channel for color frames
+            if len(frame.shape) == 3 and self.enable_clahe:
+                lab = cv2.cvtColor(enhanced, cv2.COLOR_BGR2LAB)
+                l_channel = lab[:, :, 0]
+                l_enhanced = self.clahe.apply(l_channel)
+                lab[:, :, 0] = l_enhanced
+                enhanced = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
+                return enhanced, True
+            elif len(frame.shape) == 2 and self.enable_clahe:
+                # Grayscale frame
+                enhanced = self.clahe.apply(enhanced)
+                return enhanced, True
+            return frame, False
+        except Exception as e:
+            logger.error(f"Error enhancing frame: {e}")
+            return frame, False
+    def _needs_enhancement(self, frame: np.ndarray) -> bool:
+        """
+        Quick quality check - only enhance genuinely poor quality frames
+        """
+        try:
+            # Convert to grayscale for analysis
+            if len(frame.shape) == 3:
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            else:
+                gray = frame
+            # Check brightness and contrast
+            mean_brightness = np.mean(gray)
+            contrast = np.std(gray)
+            # Only enhance if frame has quality issues
+            return (
+                mean_brightness < 50 or    # Too dark
+                mean_brightness > 200 or   # Too bright
+                contrast < 30             # Low contrast
+            )
+        except Exception:
+            return False
+class OptimizedVideoProcessor:
+    """
+    Optimized video processor for DetectifAI surveillance footage
+    """
+    def __init__(self, config=None):
+        self.config = config
+        self.frame_enhancer = OptimizedFrameEnhancer(
+            enable_clahe=getattr(config, 'enable_adaptive_processing', True)
+        )
+        # Processing statistics
+        self.processing_stats = {
+            'frames_processed': 0,
+            'frames_enhanced': 0,
+            'keyframes_extracted': 0,
+            'total_processing_time': 0.0
+        }
+        logger.info("OptimizedVideoProcessor initialized")
+    def extract_keyframes_optimized(self, video_path: str, output_dir: str,
+                                   fps_interval: float = 1.0) -> List[KeyframeResult]:
+        """
+        Extract keyframes with optimized processing for surveillance video
+        Args:
+            video_path: Path to input video
+            output_dir: Directory to save keyframes
+            fps_interval: Seconds between keyframes (default: 1 frame per second)
+        Returns:
+            List of KeyframeResult objects
+        """
+        start_time = time.time()
+        keyframes = []
+        try:
+            # Open video
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                logger.error(f"Could not open video: {video_path}")
+                return []
+            # Get video properties
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            duration = total_frames / fps if fps > 0 else 0
+            logger.info(f"Video properties: {total_frames} frames, {fps:.2f} FPS, {duration:.2f}s")
+            # Calculate frame interval
+            frame_interval = int(fps * fps_interval) if fps > 0 else 30
+            # Create output directory
+            frames_dir = os.path.join(output_dir, 'frames')
+            os.makedirs(frames_dir, exist_ok=True)
+            frame_count = 0
+            extracted_count = 0
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Extract keyframes at specified intervals
+                if frame_count % frame_interval == 0:
+                    timestamp = frame_count / fps if fps > 0 else frame_count
+                    # Assess frame quality
+                    quality_score = self._assess_frame_quality(frame)
+                    # Enhance frame if needed
+                    enhanced_frame, enhancement_applied = self.frame_enhancer.enhance_frame_if_needed(frame)
+                    # Use consistent naming pattern for MinIO storage
+                    frame_filename = f"frame_{frame_count:06d}.jpg"
+                    frame_path = os.path.join(frames_dir, frame_filename)
+                    cv2.imwrite(frame_path, enhanced_frame)
+                    # Create frame data
+                    frame_data = FrameData(
+                        frame_path=frame_path,
+                        timestamp=timestamp,
+                        frame_number=frame_count,
+                        quality_score=quality_score,
+                        motion_score=0.0,  # Can be calculated if needed
+                        burst_active=False,
+                        enhancement_applied=enhancement_applied
+                    )
+                    keyframe_result = KeyframeResult(
+                        frame_data=frame_data,
+                        keyframe_score=quality_score,
+                        selection_reason="Regular interval extraction"
+                    )
+                    keyframes.append(keyframe_result)
+                    extracted_count += 1
+                    # Update stats
+                    if enhancement_applied:
+                        self.processing_stats['frames_enhanced'] += 1
+                frame_count += 1
+                self.processing_stats['frames_processed'] += 1
+                # Progress logging
+                if frame_count % 1000 == 0:
+                    progress = (frame_count / total_frames) * 100 if total_frames > 0 else 0
+                    logger.info(f"Progress: {progress:.1f}% ({frame_count}/{total_frames} frames)")
+            cap.release()
+            # Update final statistics
+            processing_time = time.time() - start_time
+            self.processing_stats['keyframes_extracted'] = extracted_count
+            self.processing_stats['total_processing_time'] = processing_time
+            logger.info(f"✅ Keyframe extraction complete:")
+            logger.info(f"   📊 Extracted {extracted_count} keyframes from {frame_count} frames")
+            logger.info(f"   ⚡ Enhanced {self.processing_stats['frames_enhanced']} frames")
+            logger.info(f"   ⏱️  Processing time: {processing_time:.2f}s")
+            return keyframes
+        except Exception as e:
+            logger.error(f"Error in keyframe extraction: {e}")
+            return []
+    def _assess_frame_quality(self, frame: np.ndarray) -> float:
+        """
+        Quick frame quality assessment for keyframe selection
+        """
+        try:
+            # Convert to grayscale
+            if len(frame.shape) == 3:
+                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            else:
+                gray = frame
+            # Calculate Laplacian variance (focus measure)
+            laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
+            # Normalize to 0-1 scale (higher = better quality)
+            quality_score = min(laplacian_var / 1000.0, 1.0)
+            return quality_score
+        except Exception:
+            return 0.5  # Default quality score
+    def extract_keyframes(self, video_path: str) -> List[KeyframeResult]:
+        """
+        Main keyframe extraction method for DetectifAI pipeline compatibility
+        Args:
+            video_path: Path to input video file
+        Returns:
+            List of KeyframeResult objects
+        """
+        if not self.config:
+            logger.error("No configuration provided for keyframe extraction")
+            return []
+        # Use output directory from config
+        output_dir = getattr(self.config, 'output_base_dir', 'video_processing_outputs')
+        fps_interval = getattr(self.config, 'keyframe_extraction_fps', 1.0)
+        return self.extract_keyframes_optimized(video_path, output_dir, fps_interval)
+    def get_processing_stats(self) -> Dict[str, Any]:
+        """Get processing statistics"""
+        return self.processing_stats.copy()
+class StreamingVideoProcessor:
+    """
+    Streaming processor for large surveillance videos to reduce memory usage
+    """
+    def __init__(self, config=None):
+        self.config = config
+        self.chunk_size = getattr(config, 'video_chunk_size', 1000)  # Process 1000 frames at a time
+    def process_video_in_chunks(self, video_path: str, output_dir: str,
+                               chunk_processor_func) -> Dict[str, Any]:
+        """
+        Process large videos in chunks to manage memory usage
+        Args:
+            video_path: Path to input video
+            output_dir: Output directory
+            chunk_processor_func: Function to process each chunk
+        Returns:
+            Dictionary with processing results
+        """
+        results = {
+            'total_chunks': 0,
+            'processed_chunks': 0,
+            'total_frames': 0,
+            'processing_time': 0.0
+        }
+        start_time = time.time()
+        try:
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                logger.error(f"Could not open video: {video_path}")
+                return results
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            results['total_frames'] = total_frames
+            results['total_chunks'] = (total_frames + self.chunk_size - 1) // self.chunk_size
+            logger.info(f"Processing video in {results['total_chunks']} chunks of {self.chunk_size} frames")
+            frame_count = 0
+            chunk_count = 0
+            while frame_count < total_frames:
+                # Process chunk
+                chunk_frames = []
+                chunk_start = frame_count
+                # Read chunk frames
+                for i in range(self.chunk_size):
+                    ret, frame = cap.read()
+                    if not ret:
+                        break
+                    chunk_frames.append({
+                        'frame': frame,
+                        'frame_number': frame_count,
+                        'timestamp': frame_count / fps if fps > 0 else frame_count
+                    })
+                    frame_count += 1
+                if chunk_frames:
+                    # Process chunk
+                    chunk_processor_func(chunk_frames, chunk_count, output_dir)
+                    chunk_count += 1
+                    results['processed_chunks'] += 1
+                    # Clear memory
+                    del chunk_frames
+                    logger.info(f"Processed chunk {chunk_count}/{results['total_chunks']}")
+            cap.release()
+            results['processing_time'] = time.time() - start_time
+            logger.info(f"✅ Streaming processing complete in {results['processing_time']:.2f}s")
+        except Exception as e:
+            logger.error(f"Error in streaming processing: {e}")
+        return results
+def create_optimized_processor(config=None):
+    """Factory function to create optimized video processor"""
+    return OptimizedVideoProcessor(config)

database/config.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""
+Database Configuration for DetectifAI Backend
+This module handles connections to MongoDB Atlas and S3-compatible object storage
+(Backblaze B2) for the DetectifAI system.
+It provides centralized configuration and connection management.
+"""
+import os
+from pymongo import MongoClient
+from minio import Minio
+from minio.error import S3Error
+from dotenv import load_dotenv
+import logging
+from datetime import timedelta
+# Load environment variables
+load_dotenv()
+logger = logging.getLogger(__name__)
+class DatabaseConfig:
+    """Configuration class for database connections"""
+    def __init__(self):
+        # MongoDB Atlas connection (same as frontend)
+        self.mongo_uri = os.getenv(
+            'MONGO_URI',
+            'mongodb+srv://detectifai_user:DetectifAI123@cluster0.6f9uj.mongodb.net/detectifai?retryWrites=true&w=majority&appName=Cluster0'
+        )
+        self.mongo_db_name = 'detectifai'
+        # S3-compatible object storage (Backblaze B2)
+        self.minio_endpoint = os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com')
+        self.minio_access_key = os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001')
+        self.minio_secret_key = os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA')
+        self.minio_video_bucket = os.getenv('MINIO_VIDEO_BUCKET', 'detectifai-videos')
+        self.minio_keyframe_bucket = os.getenv('MINIO_KEYFRAME_BUCKET', 'detectifai-keyframes')
+        self.minio_reports_bucket = os.getenv('MINIO_REPORTS_BUCKET', 'detectifai-reports')
+        self.minio_secure = os.getenv('MINIO_SECURE', 'true').lower() == 'true'
+        # Extract region from endpoint for S3 signing (e.g. 'eu-central-003')
+        self.minio_region = os.getenv('MINIO_REGION', self._extract_region(self.minio_endpoint))
+    @staticmethod
+    def _extract_region(endpoint: str) -> str:
+        """Extract region from B2 S3 endpoint like s3.eu-central-003.backblazeb2.com"""
+        parts = endpoint.split('.')
+        if len(parts) >= 3 and parts[0] == 's3':
+            return parts[1]  # e.g. 'eu-central-003'
+        return ''
+class DatabaseManager:
+    """Central database manager for MongoDB and MinIO connections"""
+    def __init__(self):
+        self.config = DatabaseConfig()
+        self._mongodb_client = None
+        self._db = None
+        self._minio_client = None
+    @property
+    def mongo_client(self):
+        """Lazy loading MongoDB client"""
+        if self._mongodb_client is None:
+            try:
+                self._mongodb_client = MongoClient(self.config.mongo_uri)
+                # Test connection
+                self._mongodb_client.admin.command('ping')
+                logger.info("✅ MongoDB connection established successfully")
+            except Exception as e:
+                logger.error(f"❌ Failed to connect to MongoDB: {e}")
+                raise
+        return self._mongodb_client
+    @property
+    def db(self):
+        """Get MongoDB database instance"""
+        if self._db is None:
+            self._db = self.mongo_client[self.config.mongo_db_name]
+        return self._db
+    @property
+    def minio_client(self):
+        """Lazy loading S3-compatible storage client — returns None when unavailable"""
+        if self._minio_client is None:
+            try:
+                self._minio_client = Minio(
+                    self.config.minio_endpoint,
+                    access_key=self.config.minio_access_key,
+                    secret_key=self.config.minio_secret_key,
+                    secure=self.config.minio_secure,
+                    region=self.config.minio_region or None
+                )
+                # Test connection and verify buckets exist
+                self._ensure_bucket_exists()
+                logger.info("✅ S3 storage connection established (Backblaze B2)")
+            except Exception as e:
+                logger.warning(f"⚠️ S3 storage unavailable (non-fatal): {e}")
+                self._minio_client = None  # keep it None so we can retry later
+                return None
+        return self._minio_client
+    def _ensure_bucket_exists(self):
+        """Verify that the required S3 buckets exist on Backblaze B2"""
+        try:
+            for bucket_name in [
+                self.config.minio_video_bucket,
+                self.config.minio_keyframe_bucket,
+                self.config.minio_reports_bucket,
+            ]:
+                if self._minio_client.bucket_exists(bucket_name):
+                    logger.info(f"✅ S3 bucket verified: {bucket_name}")
+                else:
+                    logger.warning(f"⚠️ S3 bucket not found: {bucket_name} — create it in Backblaze B2 dashboard")
+        except S3Error as e:
+            logger.error(f"❌ Failed to verify S3 buckets: {e}")
+            raise
+    def test_connections(self):
+        """Test both MongoDB and MinIO connections"""
+        mongodb_success = False
+        minio_success = False
+        try:
+            # Test MongoDB
+            self.mongo_client.admin.command('ping')
+            collections = self.db.list_collection_names()
+            logger.info(f"✅ MongoDB test successful. Collections: {collections}")
+            print(f"✅ MongoDB connected successfully. Collections: {collections}")
+            mongodb_success = True
+        except Exception as e:
+            logger.error(f"❌ MongoDB connection failed: {e}")
+            print(f"❌ MongoDB connection failed: {e}")
+        try:
+            # Test S3 storage (Backblaze B2)
+            buckets = self.minio_client.list_buckets()
+            bucket_names = [bucket.name for bucket in buckets]
+            logger.info(f"✅ S3 storage test successful. Buckets: {bucket_names}")
+            print(f"✅ S3 storage (Backblaze B2) connected successfully. Buckets: {bucket_names}")
+            minio_success = True
+        except Exception as e:
+            logger.error(f"❌ S3 storage connection failed: {e}")
+            print(f"❌ S3 storage connection failed: {e}")
+            print("💡 Check MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY env vars.")
+        return mongodb_success  # At minimum, we need MongoDB working
+    def close_connections(self):
+        """Close database connections"""
+        if self._mongodb_client:
+            self._mongodb_client.close()
+            logger.info("MongoDB connection closed")
+def get_presigned_url(minio_client, bucket_name: str, object_name: str, expires: timedelta = timedelta(hours=1)):
+    """Generate presigned URL for S3 object access (works with Backblaze B2)"""
+    try:
+        return minio_client.presigned_get_object(bucket_name, object_name, expires=expires)
+    except S3Error as e:
+        logger.error(f"Failed to generate presigned URL for {object_name}: {e}")
+        return None
+if __name__ == "__main__":
+    # Test connections
+    db_manager = DatabaseManager()
+    if db_manager.test_connections():
+        print("✅ All database connections working!")
+    else:
+        print("❌ Database connection issues detected")

database/keyframe_repository.py ADDED Viewed

	@@ -0,0 +1,243 @@

+"""
+Keyframe Repository for DetectifAI Database Operations
+This module provides MinIO storage and database operations for keyframes.
+"""
+import os
+import io
+import cv2
+import numpy as np
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+import logging
+from minio.error import S3Error
+logger = logging.getLogger(__name__)
+class KeyframeRepository:
+    """Repository for keyframe operations with S3 storage and MongoDB"""
+    def __init__(self, db_manager):
+        self._db_manager = db_manager
+        self.db = db_manager.db
+        self.bucket = db_manager.config.minio_keyframe_bucket  # Use dedicated keyframes bucket
+        self.collection = self.db.keyframes  # MongoDB collection for keyframe metadata
+    @property
+    def minio(self):
+        """Lazy access to S3 storage — tolerates unavailable storage"""
+        return self._db_manager.minio_client
+    def save_keyframe_to_minio(self, video_id: str, frame_data: bytes, frame_number: int, timestamp: float) -> Optional[str]:
+        """Save a single keyframe directly to S3 storage"""
+        if self.minio is None:
+            return None
+        try:
+            minio_path = f"{video_id}/frame_{frame_number:06d}.jpg"  # Use consistent naming pattern
+            # Upload bytes directly to MinIO using BytesIO
+            from io import BytesIO
+            buffer = BytesIO(frame_data)
+            self.minio.put_object(
+                self.bucket,
+                minio_path,
+                buffer,
+                length=len(frame_data),
+                content_type='image/jpeg'
+            )
+            logger.info(f"✅ Uploaded keyframe to MinIO: {minio_path}")
+            return minio_path
+        except Exception as e:
+            logger.error(f"❌ Failed to upload keyframe to MinIO: {e}")
+            return None
+    def save_keyframes_batch(self, video_id: str, keyframes: List) -> List[Dict]:
+        """Save multiple keyframes directly to MinIO and locally, return their storage info"""
+        keyframe_info = []
+        try:
+            # Create local storage directory
+            local_dir = os.path.join("video_processing_outputs", "keyframes", video_id)
+            os.makedirs(local_dir, exist_ok=True)
+            for keyframe in keyframes:
+                # Handle KeyframeResult objects
+                frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+                frame = frame_data.get('frame')  # numpy array
+                frame_number = frame_data.get('frame_number', 0)
+                timestamp = frame_data.get('timestamp', 0.0)
+                if frame is not None:
+                    # Convert numpy array to jpg bytes
+                    is_success, buffer = cv2.imencode('.jpg', frame)
+                    if not is_success:
+                        continue
+                    frame_bytes = buffer.tobytes()
+                    # Save locally
+                    local_filename = f"frame_{frame_number:06d}.jpg"
+                    local_path = os.path.join(local_dir, local_filename)
+                    with open(local_path, 'wb') as f:
+                        f.write(frame_bytes)
+                    logger.info(f"✅ Keyframe saved locally: {local_path}")
+                    # Upload bytes directly to MinIO
+                    minio_path = self.save_keyframe_to_minio(
+                        video_id, frame_bytes, frame_number, timestamp
+                    )
+                    if minio_path:
+                        info = {
+                            'frame_number': frame_number,
+                            'timestamp': timestamp,
+                            'minio_path': minio_path,
+                            'local_path': local_path,
+                            'quality_score': frame_data.get('quality_score', 0.0),
+                            'enhancement_applied': frame_data.get('enhancement_applied', False)
+                        }
+                        keyframe_info.append(info)
+            logger.info(f"✅ Uploaded {len(keyframe_info)} keyframes to MinIO and saved locally for video {video_id}")
+            return keyframe_info
+        except Exception as e:
+            logger.error(f"❌ Failed to upload keyframes batch: {e}")
+            return keyframe_info  # Return whatever was successful
+    def get_keyframe_presigned_url(self, minio_path: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for keyframe access"""
+        if self.minio is None:
+            return None
+        try:
+            return self.minio.presigned_get_object(self.bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL for keyframe: {e}")
+            return None
+    def get_video_keyframes_presigned_urls(self, video_id: str, expires: timedelta = timedelta(hours=1)) -> List[Dict]:
+        """Get presigned URLs for all keyframes of a video"""
+        if self.minio is None:
+            return self._get_keyframes_from_local(video_id) if hasattr(self, '_get_keyframes_from_local') else []
+        try:
+            # Try both storage patterns:
+            #   1) {video_id}/keyframes/frame_*.jpg  (legacy / some pipelines)
+            #   2) {video_id}/frame_*.jpg            (save_keyframe_to_minio pattern)
+            logger.info(f"🔍 Looking for keyframes in bucket '{self.bucket}' for video '{video_id}'")
+            objects = list(self.minio.list_objects(self.bucket, prefix=f"{video_id}/keyframes/", recursive=True))
+            if not objects:
+                # Fallback: flat storage path used by save_keyframe_to_minio
+                objects = list(self.minio.list_objects(self.bucket, prefix=f"{video_id}/", recursive=True))
+            logger.info(f"📦 Found {len(objects)} objects in MinIO for keyframes")
+            keyframes_urls = []
+            for obj in objects:
+                if obj.object_name.endswith('.jpg'):
+                    # Extract frame number and timestamp from filename
+                    filename = obj.object_name.split('/')[-1]  # e.g., "frame_000001.jpg"
+                    frame_number = 0
+                    timestamp = 0.0
+                    try:
+                        # Parse frame number from filename like "frame_000001.jpg"
+                        if 'frame_' in filename:
+                            frame_str = filename.split('_')[1].split('.')[0]
+                            frame_number = int(frame_str)
+                            # Estimate timestamp from frame number (assuming 30 fps)
+                            timestamp = frame_number / 30.0
+                    except (ValueError, IndexError):
+                        pass
+                    # Try to get metadata from MinIO object
+                    try:
+                        obj_stat = self.minio.stat_object(self.bucket, obj.object_name)
+                        if obj_stat.metadata:
+                            # Extract timestamp from metadata if available
+                            if 'timestamp' in obj_stat.metadata:
+                                try:
+                                    timestamp = float(obj_stat.metadata['timestamp'])
+                                except:
+                                    pass
+                            if 'frame_number' in obj_stat.metadata:
+                                try:
+                                    frame_number = int(obj_stat.metadata['frame_number'])
+                                except:
+                                    pass
+                    except:
+                        pass
+                    # Generate presigned URL and API URL
+                    presigned_url = self.get_keyframe_presigned_url(obj.object_name, expires=expires)
+                    # Also provide API endpoint URL for direct serving
+                    api_url = f"/api/minio/image/{self.bucket}/{obj.object_name}"
+                    if presigned_url:
+                        keyframes_urls.append({
+                            'frame_number': frame_number,
+                            'timestamp': timestamp,
+                            'minio_path': obj.object_name,
+                            'presigned_url': presigned_url,
+                            'url': api_url,  # Use API endpoint for better reliability
+                            'api_url': api_url,
+                            'filename': filename
+                        })
+            # Sort by frame number
+            keyframes_urls.sort(key=lambda x: x['frame_number'])
+            logger.info(f"✅ Generated {len(keyframes_urls)} presigned URLs for video {video_id} keyframes")
+            return keyframes_urls
+        except Exception as e:
+            logger.error(f"❌ Failed to get keyframes presigned URLs for video {video_id}: {e}")
+            return []
+    def create_keyframe(self, keyframe_doc: Dict[str, Any]) -> Optional[str]:
+        """
+        Save keyframe metadata to MongoDB
+        Args:
+            keyframe_doc: Dictionary containing keyframe metadata:
+                - camera_id: Camera identifier (for live streams)
+                - video_id: Video identifier (for uploaded videos, optional)
+                - timestamp: Frame timestamp in seconds
+                - timestamp_ms: Frame timestamp in milliseconds
+                - frame_index: Frame number/index
+                - minio_path: Path to keyframe in MinIO
+                - objects_detected: List of detected objects
+                - behaviors_detected: List of detected behaviors
+                - motion_detected: Whether motion was detected
+                - motion_score: Motion detection score
+                - created_at: Creation timestamp
+        Returns:
+            MongoDB document ID or None
+        """
+        try:
+            # Ensure required fields
+            if 'created_at' not in keyframe_doc:
+                keyframe_doc['created_at'] = datetime.utcnow()
+            # Convert numpy types if present
+            try:
+                from database.models import convert_numpy_types, prepare_for_mongodb
+                keyframe_doc = convert_numpy_types(keyframe_doc)
+                keyframe_doc = prepare_for_mongodb(keyframe_doc)
+            except ImportError:
+                # Fallback if models not available
+                pass
+            # Insert into MongoDB
+            result = self.collection.insert_one(keyframe_doc)
+            logger.info(f"✅ Saved keyframe metadata to MongoDB: {keyframe_doc.get('minio_path', 'unknown')}")
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"❌ Failed to save keyframe metadata to MongoDB: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return None

database/models.py ADDED Viewed

	@@ -0,0 +1,432 @@

+"""
+Data Models for DetectifAI Database Integration
+This module defines data models that map EXACTLY to the MongoDB collections
+defined in DetectifAI_db/database_setup.py schema.
+CRITICAL RULES:
+1. Only use fields defined in the MongoDB schema validators
+2. Extra fields must go in meta_data for video_file or use related collections
+3. Always convert numpy types before MongoDB operations
+4. Timestamps in events must be milliseconds (int/long), not seconds (float)
+"""
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from bson import ObjectId
+from dataclasses import dataclass, asdict
+import json
+import numpy as np
+# ========================================
+# Schema-Compliant Data Models
+# ========================================
+@dataclass
+class VideoFileModel:
+    """Maps EXACTLY to video_file collection schema in MongoDB Atlas"""
+    # Required fields (from schema)
+    video_id: str
+    user_id: str
+    file_path: str  # MinIO path or local path
+    # Optional fields (from schema)
+    minio_object_key: Optional[str] = None
+    minio_bucket: Optional[str] = None
+    codec: Optional[str] = None
+    fps: Optional[float] = 30.0  # bsonType: double - must be float
+    upload_date: Optional[datetime] = None
+    duration_secs: Optional[int] = None  # bsonType: int - must be INTEGER not float
+    file_size_bytes: Optional[int] = None  # bsonType: long
+    meta_data: Optional[Dict] = None  # Store ALL extra fields here (processing_status, resolution, etc.)
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion with proper type conversion"""
+        data = asdict(self)
+        # Set defaults
+        if data.get('upload_date') is None:
+            data['upload_date'] = datetime.utcnow()
+        if data.get('fps') is None:
+            data['fps'] = 30.0
+        # Ensure duration is integer (MongoDB schema requires int)
+        if data.get('duration_secs') is not None:
+            data['duration_secs'] = int(data['duration_secs'])
+        # Ensure file_size is integer (MongoDB schema requires long)
+        if data.get('file_size_bytes') is not None:
+            data['file_size_bytes'] = int(data['file_size_bytes'])
+        # Ensure fps is float (MongoDB schema requires double)
+        if data.get('fps') is not None:
+            data['fps'] = float(data['fps'])
+        return data
+@dataclass
+class EventModel:
+    """Maps EXACTLY to event collection schema in MongoDB Atlas"""
+    # Required fields (from schema)
+    event_id: str
+    video_id: str
+    start_timestamp_ms: int  # bsonType: long - MUST be milliseconds as INTEGER
+    end_timestamp_ms: int    # bsonType: long - MUST be milliseconds as INTEGER
+    # Optional fields (from schema)
+    event_type: Optional[str] = None  # 'object_detection', 'motion', 'fire', 'weapon', etc.
+    confidence_score: Optional[float] = None  # bsonType: double (NOT 'confidence')
+    is_verified: bool = False
+    is_false_positive: bool = False
+    verified_at: Optional[datetime] = None
+    verified_by: Optional[str] = None
+    visual_embedding: Optional[List[float]] = None  # For future FAISS integration
+    bounding_boxes: Optional[Dict] = None  # Store detection bboxes here as object
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion with proper type conversion"""
+        data = asdict(self)
+        # Ensure timestamps are integers (milliseconds) - CRITICAL for MongoDB long type
+        data['start_timestamp_ms'] = int(data['start_timestamp_ms'])
+        data['end_timestamp_ms'] = int(data['end_timestamp_ms'])
+        # Ensure confidence_score is float
+        if data.get('confidence_score') is not None:
+            data['confidence_score'] = float(data['confidence_score'])
+        # Set default empty arrays/objects for schema compliance
+        if data.get('visual_embedding') is None:
+            data['visual_embedding'] = []
+        if data.get('bounding_boxes') is None:
+            data['bounding_boxes'] = {}
+        return data
+@dataclass
+class EventDescriptionModel:
+    """Maps EXACTLY to event_description collection schema"""
+    # Required fields
+    description_id: str
+    event_id: str
+    text_embedding: List[float]  # Required (empty array if not generated yet)
+    # Optional fields
+    caption: Optional[str] = None
+    confidence: Optional[float] = None
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('updated_at') is None:
+            data['updated_at'] = datetime.utcnow()
+        # Ensure text_embedding is always a list
+        if data.get('text_embedding') is None:
+            data['text_embedding'] = []
+        return data
+@dataclass
+class EventCaptionModel:
+    """Maps EXACTLY to event_caption collection schema"""
+    # Required fields
+    description_id: str
+    description: str
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        return asdict(self)
+@dataclass
+class EventClipModel:
+    """Maps EXACTLY to event_clip collection schema"""
+    # Required fields
+    clip_id: str
+    event_id: str
+    clip_path: str
+    # Optional fields
+    thumbnail_path: Optional[str] = None
+    minio_object_key: Optional[str] = None
+    minio_bucket: Optional[str] = None
+    duration_ms: Optional[int] = None  # bsonType: long
+    extracted_at: Optional[datetime] = None
+    file_size_bytes: Optional[int] = None  # bsonType: long
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('extracted_at') is None:
+            data['extracted_at'] = datetime.utcnow()
+        # Ensure integer types
+        if data.get('duration_ms') is not None:
+            data['duration_ms'] = int(data['duration_ms'])
+        if data.get('file_size_bytes') is not None:
+            data['file_size_bytes'] = int(data['file_size_bytes'])
+        return data
+@dataclass
+class DetectedFaceModel:
+    """Maps EXACTLY to detected_faces collection schema"""
+    # Required fields
+    face_id: str
+    event_id: str
+    detected_at: datetime
+    # Optional fields
+    confidence_score: Optional[float] = None
+    face_embedding: Optional[List[float]] = None
+    minio_object_key: Optional[str] = None
+    minio_bucket: Optional[str] = None
+    face_image_path: Optional[str] = None
+    bounding_boxes: Optional[Dict] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('face_embedding') is None:
+            data['face_embedding'] = []
+        return data
+@dataclass
+class FaceMatchModel:
+    """Maps EXACTLY to face_matches collection schema"""
+    # Required fields
+    match_id: str
+    face_id_1: str
+    face_id_2: str
+    similarity_score: float
+    # Optional fields
+    matched_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('matched_at') is None:
+            data['matched_at'] = datetime.utcnow()
+        return data
+# ========================================
+# Helper Functions for Type Safety
+# ========================================
+def convert_numpy_types(obj):
+    """
+    Recursively convert numpy types to native Python types for MongoDB compatibility.
+    MongoDB cannot serialize numpy types directly, causing BSON errors.
+    This function ensures all numpy integers become int, numpy floats become float, etc.
+    """
+    if isinstance(obj, dict):
+        return {key: convert_numpy_types(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_numpy_types(item) for item in obj]
+    elif isinstance(obj, np.integer):
+        return int(obj)
+    elif isinstance(obj, np.floating):
+        return float(obj)
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
+    elif isinstance(obj, np.bool_):
+        return bool(obj)
+    else:
+        return obj
+def seconds_to_milliseconds(seconds: float) -> int:
+    """Convert seconds (float) to milliseconds (int) for MongoDB long type"""
+    return int(seconds * 1000)
+def milliseconds_to_seconds(milliseconds: int) -> float:
+    """Convert milliseconds (int) to seconds (float) for display"""
+    return float(milliseconds) / 1000.0
+def prepare_for_mongodb(data: Dict) -> Dict:
+    """
+    Prepare data dictionary for MongoDB insertion.
+    - Remove None ObjectId fields
+    - Convert numpy types to Python natives
+    """
+    # First convert numpy types
+    data = convert_numpy_types(data)
+    # Remove None ObjectId fields
+    cleaned_data = {}
+    for key, value in data.items():
+        if key == '_id' and value is None:
+            continue
+        cleaned_data[key] = value
+    return cleaned_data
+def convert_objectid_to_string(doc: Dict) -> Dict:
+    """Convert ObjectId fields to strings for JSON serialization"""
+    if isinstance(doc, dict):
+        for key, value in doc.items():
+            if isinstance(value, ObjectId):
+                doc[key] = str(value)
+            elif isinstance(value, list):
+                doc[key] = [
+                    convert_objectid_to_string(item) if isinstance(item, dict)
+                    else str(item) if isinstance(item, ObjectId)
+                    else item
+                    for item in value
+                ]
+            elif isinstance(value, dict):
+                doc[key] = convert_objectid_to_string(value)
+    return doc
+# ========================================
+# Subscription & Payment Models
+# ========================================
+@dataclass
+class SubscriptionPlanModel:
+    """Maps to subscription_plans collection with Stripe integration"""
+    # Required fields
+    plan_id: str
+    plan_name: str
+    price: float
+    # Optional fields
+    description: Optional[str] = None
+    features: Optional[str] = None  # Comma-separated feature list
+    storage_limit: Optional[int] = None
+    is_active: bool = True
+    stripe_product_id: Optional[str] = None
+    stripe_price_ids: Optional[Dict[str, str]] = None  # {"monthly": "price_xxx", "yearly": "price_xxx"}
+    billing_periods: Optional[List[str]] = None  # ["monthly", "yearly"]
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('updated_at') is None:
+            data['updated_at'] = datetime.utcnow()
+        if data.get('stripe_price_ids') is None:
+            data['stripe_price_ids'] = {}
+        if data.get('billing_periods') is None:
+            data['billing_periods'] = []
+        return data
+@dataclass
+class UserSubscriptionModel:
+    """Maps to user_subscriptions collection with Stripe integration"""
+    # Required fields
+    subscription_id: str
+    user_id: str
+    plan_id: str
+    # Optional fields
+    start_date: Optional[datetime] = None
+    end_date: Optional[datetime] = None
+    stripe_customer_id: Optional[str] = None
+    stripe_subscription_id: Optional[str] = None
+    billing_period: Optional[str] = None  # "monthly" or "yearly"
+    status: Optional[str] = "active"  # 'active', 'canceled', 'past_due', 'trialing'
+    current_period_start: Optional[datetime] = None
+    current_period_end: Optional[datetime] = None
+    cancel_at_period_end: bool = False
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('start_date') is None:
+            data['start_date'] = datetime.utcnow()
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('updated_at') is None:
+            data['updated_at'] = datetime.utcnow()
+        return data
+@dataclass
+class SubscriptionEventModel:
+    """Maps to subscription_events collection for audit trail"""
+    # Required fields
+    event_id: str
+    subscription_id: str
+    event_type: str  # 'created', 'updated', 'canceled', 'payment_succeeded', etc.
+    # Optional fields
+    stripe_event_id: Optional[str] = None
+    event_data: Optional[Dict] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('event_data') is None:
+            data['event_data'] = {}
+        return data
+@dataclass
+class PaymentHistoryModel:
+    """Maps to payment_history collection for transaction records"""
+    # Required fields
+    payment_id: str
+    user_id: str
+    amount: float
+    # Optional fields
+    stripe_payment_intent_id: Optional[str] = None
+    currency: str = "USD"
+    status: Optional[str] = None  # 'succeeded', 'pending', 'failed'
+    payment_method: Optional[str] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        # Ensure amount is float
+        data['amount'] = float(data['amount'])
+        return data
+@dataclass
+class SubscriptionUsageModel:
+    """Maps to subscription_usage collection for analytics and limits"""
+    # Required fields
+    usage_id: str
+    user_id: str
+    usage_type: str  # 'video_processed', 'storage_used', 'searches_performed'
+    # Optional fields
+    usage_value: Optional[float] = None
+    usage_date: Optional[datetime] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion"""
+        data = asdict(self)
+        if data.get('usage_date') is None:
+            data['usage_date'] = datetime.utcnow()
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('usage_value') is not None:
+            data['usage_value'] = float(data['usage_value'])
+        return data

database/models_backup.py ADDED Viewed

	@@ -0,0 +1,330 @@

+"""
+Data Models for DetectifAI Database Integration
+This module defines data models that map EXACTLY to the MongoDB collections
+defined in DetectifAI_db/database_setup.py schema.
+CRITICAL: Only use fields defined in the MongoDB schema validators.
+Extra fields must go in meta_data for video_file or use related collections.
+"""
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from bson import ObjectId
+from dataclasses import dataclass, asdict
+import json
+import numpy as np
+@dataclass
+class VideoFileModel:
+    """Maps EXACTLY to video_file collection schema in MongoDB Atlas"""
+    # Required fields (from schema)
+    video_id: str
+    user_id: str
+    file_path: str  # MinIO path or local path
+    # Optional fields (from schema)
+    minio_object_key: Optional[str] = None
+    minio_bucket: Optional[str] = None
+    codec: Optional[str] = None
+    fps: Optional[float] = 30.0  # bsonType: double - must be float
+    upload_date: Optional[datetime] = None
+    duration_secs: Optional[int] = None  # bsonType: int - must be INTEGER not float
+    file_size_bytes: Optional[int] = None  # bsonType: long
+    meta_data: Optional[Dict] = None  # Store ALL extra fields here (processing_status, resolution, etc.)
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion with proper type conversion"""
+        data = asdict(self)
+        # Set defaults
+        if data.get('upload_date') is None:
+            data['upload_date'] = datetime.utcnow()
+        if data.get('fps') is None:
+            data['fps'] = 30.0
+        # Ensure duration is integer (MongoDB schema requires int)
+        if data.get('duration_secs') is not None:
+            data['duration_secs'] = int(data['duration_secs'])
+        # Ensure file_size is integer (MongoDB schema requires long)
+        if data.get('file_size_bytes') is not None:
+            data['file_size_bytes'] = int(data['file_size_bytes'])
+        # Ensure fps is float (MongoDB schema requires double)
+        if data.get('fps') is not None:
+            data['fps'] = float(data['fps'])
+        return data
+@dataclass
+class DetectedFaceModel:
+    """Maps to existing detected_faces collection"""
+    video_id: str
+    frame_timestamp: float
+    face_bbox: List[float]  # [x1, y1, x2, y2]
+    confidence: float
+    face_encoding: Optional[List[float]] = None
+    keyframe_minio_path: Optional[str] = None
+    keyframe_id: Optional[ObjectId] = None
+    person_id: Optional[str] = None
+    is_suspicious: bool = False
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        return asdict(self)
+@dataclass
+class EventModel:
+    """Maps EXACTLY to event collection schema in MongoDB Atlas"""
+    # Required fields (from schema)
+    event_id: str
+    video_id: str
+    start_timestamp_ms: int  # bsonType: long - MUST be milliseconds as INTEGER
+    end_timestamp_ms: int    # bsonType: long - MUST be milliseconds as INTEGER
+    # Optional fields (from schema)
+    event_type: Optional[str] = None  # 'object_detection', 'motion', 'fire', 'weapon', etc.
+    confidence_score: Optional[float] = None  # bsonType: double (NOT 'confidence')
+    is_verified: bool = False
+    is_false_positive: bool = False
+    verified_at: Optional[datetime] = None
+    verified_by: Optional[str] = None
+    visual_embedding: Optional[List[float]] = None  # For future FAISS integration
+    bounding_boxes: Optional[Dict] = None  # Store detection bboxes here as object
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        """Convert to dictionary for MongoDB insertion with proper type conversion"""
+        data = asdict(self)
+        # Ensure timestamps are integers (milliseconds) - CRITICAL for MongoDB long type
+        data['start_timestamp_ms'] = int(data['start_timestamp_ms'])
+        data['end_timestamp_ms'] = int(data['end_timestamp_ms'])
+        # Ensure confidence_score is float
+        if data.get('confidence_score') is not None:
+            data['confidence_score'] = float(data['confidence_score'])
+        # Set default empty arrays/objects for schema compliance
+        if data.get('visual_embedding') is None:
+            data['visual_embedding'] = []
+        if data.get('bounding_boxes') is None:
+            data['bounding_boxes'] = {}
+        return data
+@dataclass
+class EventCaptionModel:
+    """Maps to existing event_caption collection"""
+    event_id: ObjectId
+    video_id: str
+    caption_text: str
+    generated_by: str = "system"  # system, user, ai
+    confidence: Optional[float] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+@dataclass
+class EventClipModel:
+    """Maps to existing event_clip collection"""
+    event_id: ObjectId
+    video_id: str
+    clip_start_timestamp: float
+    clip_end_timestamp: float
+    minio_clip_path: str
+    clip_duration: float
+    frame_count: int
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+@dataclass
+class EventDescriptionModel:
+    """Maps to existing event_description collection"""
+    event_id: ObjectId
+    video_id: str
+    description_text: str
+    description_type: str = "automatic"  # automatic, manual, ai_generated
+    tags: Optional[List[str]] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+@dataclass
+class FaceMatchModel:
+    """Maps to existing face_matches collection"""
+    video_id: str
+    face_1_id: ObjectId
+    face_2_id: ObjectId
+    similarity_score: float
+    match_confidence: float
+    is_match: bool
+    person_id: Optional[str] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+# New models for video processing pipeline
+@dataclass
+class KeyframeModel:
+    """New collection for extracted keyframes"""
+    video_id: str
+    frame_number: int
+    timestamp: float
+    quality_score: float
+    motion_score: float
+    minio_path: str
+    enhancement_applied: bool = False
+    face_count: int = 0
+    object_detections: Optional[List[Dict]] = None
+    processing_metadata: Optional[Dict] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        if data.get('object_detections') is None:
+            data['object_detections'] = []
+        return data
+@dataclass
+class VideoSegmentModel:
+    """New collection for video segments"""
+    video_id: str
+    segment_id: int
+    start_timestamp: float
+    end_timestamp: float
+    duration: float
+    start_frame: int
+    end_frame: int
+    keyframe_ids: List[ObjectId]
+    activity_level: str  # low, medium, high
+    motion_statistics: Optional[Dict] = None
+    segment_minio_path: Optional[str] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+@dataclass
+class ProcessingJobModel:
+    """New collection for tracking processing jobs"""
+    video_id: str
+    job_type: str = "complete_processing"  # complete_processing, keyframe_extraction, object_detection
+    status: str = "queued"  # queued, processing, completed, failed
+    progress: int = 0  # 0-100
+    message: str = ""
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    processing_stats: Optional[Dict] = None
+    error_details: Optional[Dict] = None
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+@dataclass
+class ObjectDetectionModel:
+    """Detailed object detection results"""
+    video_id: str
+    keyframe_id: ObjectId
+    detection_id: str
+    class_name: str  # fire, smoke, knife, gun
+    confidence: float
+    bbox: List[float]  # [x1, y1, x2, y2]
+    center_point: List[float]  # [x, y]
+    area: float
+    frame_timestamp: float
+    detection_model: str  # 'fire' for fire_YOLO11.pt, 'weapon' for weapon_YOLO11.pt
+    threat_level: str = "low"
+    created_at: Optional[datetime] = None
+    _id: Optional[ObjectId] = None
+    def to_dict(self) -> Dict:
+        data = asdict(self)
+        if data.get('created_at') is None:
+            data['created_at'] = datetime.utcnow()
+        return data
+class ModelFactory:
+    """Factory class for creating model instances from database documents"""
+    @staticmethod
+    def create_video_file(doc: Dict) -> VideoFileModel:
+        """Create VideoFileModel from MongoDB document"""
+        return VideoFileModel(**doc)
+    @staticmethod
+    def create_keyframe(doc: Dict) -> KeyframeModel:
+        """Create KeyframeModel from MongoDB document"""
+        return KeyframeModel(**doc)
+    @staticmethod
+    def create_event(doc: Dict) -> EventModel:
+        """Create EventModel from MongoDB document"""
+        return EventModel(**doc)
+    @staticmethod
+    def create_processing_job(doc: Dict) -> ProcessingJobModel:
+        """Create ProcessingJobModel from MongoDB document"""
+        return ProcessingJobModel(**doc)
+# Helper functions for database operations
+def prepare_for_mongodb(data: Dict) -> Dict:
+    """Prepare data dictionary for MongoDB insertion"""
+    # Remove None ObjectId fields
+    cleaned_data = {}
+    for key, value in data.items():
+        if key == '_id' and value is None:
+            continue
+        cleaned_data[key] = value
+    return cleaned_data
+def convert_objectid_to_string(doc: Dict) -> Dict:
+    """Convert ObjectId fields to strings for JSON serialization"""
+    if isinstance(doc, dict):
+        for key, value in doc.items():
+            if isinstance(value, ObjectId):
+                doc[key] = str(value)
+            elif isinstance(value, list):
+                doc[key] = [convert_objectid_to_string(item) if isinstance(item, dict) else str(item) if isinstance(item, ObjectId) else item for item in value]
+            elif isinstance(value, dict):
+                doc[key] = convert_objectid_to_string(value)
+    return doc

database/repositories.py ADDED Viewed

	@@ -0,0 +1,516 @@

+"""
+Repository Classes for DetectifAI Database Operations
+This module provides data access layer for MongoDB and MinIO operations.
+Each repository handles CRUD operations for specific collections.
+"""
+import os
+import io
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+from bson import ObjectId
+from pymongo.collection import Collection
+from minio import Minio
+from minio.error import S3Error
+import logging
+from .models import (
+    VideoFileModel, EventModel, EventDescriptionModel, DetectedFaceModel,
+    prepare_for_mongodb, convert_objectid_to_string, convert_numpy_types,
+    seconds_to_milliseconds
+)
+logger = logging.getLogger(__name__)
+class BaseRepository:
+    """Base repository class with common functionality"""
+    def __init__(self, db_manager):
+        self.db = db_manager.db
+        self._db_manager = db_manager
+        self.video_bucket = db_manager.config.minio_video_bucket
+        self.keyframe_bucket = db_manager.config.minio_keyframe_bucket
+    @property
+    def minio(self):
+        """Lazy access to S3 storage — tolerates unavailable storage"""
+        return self._db_manager.minio_client
+class VideoRepository(BaseRepository):
+    """Repository for video_file collection operations"""
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.video_file
+    def create_video_record(self, video_data: Dict) -> str:
+        """Create new video record matching MongoDB schema exactly"""
+        try:
+            # Extract required fields
+            video_id = video_data.get('video_id')
+            user_id = video_data.get('user_id', 'system')
+            file_path = video_data.get('file_path', f"videos/{video_id}.mp4")
+            # Build schema-compliant record
+            record = {
+                "video_id": video_id,
+                "user_id": user_id,
+                "file_path": file_path,
+                "upload_date": datetime.utcnow()
+            }
+            # Add optional schema fields
+            if 'fps' in video_data:
+                record['fps'] = float(video_data['fps'])  # Ensure double type
+            else:
+                record['fps'] = 30.0  # Default
+            if 'duration' in video_data or 'duration_secs' in video_data:
+                duration = video_data.get('duration_secs') or video_data.get('duration', 0)
+                record['duration_secs'] = int(duration)  # Ensure integer
+            if 'file_size' in video_data or 'file_size_bytes' in video_data:
+                file_size = video_data.get('file_size_bytes') or video_data.get('file_size', 0)
+                record['file_size_bytes'] = int(file_size)  # Ensure long
+            if 'codec' in video_data:
+                record['codec'] = str(video_data['codec'])
+            if 'minio_object_key' in video_data:
+                record['minio_object_key'] = video_data['minio_object_key']
+            if 'minio_bucket' in video_data:
+                record['minio_bucket'] = video_data['minio_bucket']
+            # Build meta_data object for extra fields
+            meta_data = {}
+            extra_fields = [
+                'processing_status', 'resolution', 'filename', 'keyframe_count',
+                'event_count', 'compression_applied', 'enhancement_applied',
+                'error_message', 'processing_config'
+            ]
+            for field in extra_fields:
+                if field in video_data:
+                    meta_data[field] = video_data[field]
+            if meta_data:
+                record['meta_data'] = meta_data
+            # Convert numpy types and prepare for MongoDB
+            record = prepare_for_mongodb(record)
+            result = self.collection.insert_one(record)
+            logger.info(f"✅ Created video record: {video_id}")
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"❌ Failed to create video record: {e}")
+            raise
+    def get_video_by_id(self, video_id: str) -> Optional[Dict]:
+        """Get video record by video_id"""
+        try:
+            doc = self.collection.find_one({"video_id": video_id})
+            if doc:
+                return convert_objectid_to_string(doc)
+            return None
+        except Exception as e:
+            logger.error(f"❌ Failed to get video {video_id}: {e}")
+            return None
+    def update_processing_status(self, video_id: str, status: str, metadata: Dict = None):
+        """Update video processing status in meta_data field"""
+        try:
+            # Get current meta_data
+            video = self.collection.find_one({"video_id": video_id})
+            if not video:
+                logger.warning(f"⚠️ Video not found for status update: {video_id}")
+                return
+            current_meta = video.get('meta_data', {})
+            current_meta['processing_status'] = status
+            current_meta['last_updated'] = datetime.utcnow().isoformat()
+            # Add any additional metadata
+            if metadata:
+                current_meta.update(metadata)
+            result = self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": {"meta_data": current_meta}}
+            )
+            if result.matched_count > 0:
+                logger.info(f"✅ Updated video status: {video_id} -> {status}")
+            else:
+                logger.warning(f"⚠️ Video not found for status update: {video_id}")
+        except Exception as e:
+            logger.error(f"❌ Failed to update video status: {e}")
+            raise
+    def update_metadata(self, video_id: str, metadata: Dict):
+        """Update video meta_data field with processing information"""
+        try:
+            # Get current meta_data
+            video = self.collection.find_one({"video_id": video_id})
+            if not video:
+                logger.warning(f"⚠️ Video not found: {video_id}")
+                return
+            current_meta = video.get('meta_data', {})
+            current_meta.update(metadata)
+            result = self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": {"meta_data": current_meta}}
+            )
+            logger.info(f"✅ Updated video metadata: {video_id}")
+        except Exception as e:
+            logger.error(f"❌ Failed to update video metadata: {e}")
+            raise
+    def upload_video_to_minio(self, local_path: str, video_id: str) -> str:
+        """Upload video file to S3 storage"""
+        if self.minio is None:
+            logger.warning("S3 storage unavailable — skipping video upload to object storage")
+            return f"local://{local_path}"
+        try:
+            minio_path = f"original/{video_id}/video.mp4"
+            with open(local_path, 'rb') as file_data:
+                file_info = os.stat(local_path)
+                self.minio.put_object(
+                    self.video_bucket,
+                    minio_path,
+                    file_data,
+                    length=file_info.st_size,
+                    content_type='video/mp4'
+                )
+            logger.info(f"✅ Uploaded video to S3: {minio_path}")
+            return minio_path
+        except Exception as e:
+            logger.error(f"❌ Failed to upload video to S3: {e}")
+            raise
+    def get_video_presigned_url(self, minio_path: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for video access"""
+        if self.minio is None:
+            return None
+        try:
+            return self.minio.presigned_get_object(self.video_bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL: {e}")
+            return None
+    def get_compressed_video_presigned_url(self, video_id: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for compressed video access"""
+        if self.minio is None:
+            return None
+        try:
+            minio_path = f"compressed/{video_id}/video.mp4"
+            return self.minio.presigned_get_object(self.video_bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL for compressed video: {e}")
+            return None
+# ========================================
+# Event Repository (Schema-Compliant)
+# ========================================
+class EventRepository(BaseRepository):
+    """Repository for event collection operations - Schema Compliant"""
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.event
+        self.event_description_collection = self.db.event_description
+    def create_event(self, event_data: Dict) -> str:
+        """Create event - alias for save_event for compatibility"""
+        return self.save_event(event_data)
+    def save_event(self, event_data: Dict) -> str:
+        """Save event matching MongoDB schema exactly"""
+        try:
+            import uuid
+            # Extract required fields
+            event_id = event_data.get('event_id', str(uuid.uuid4()))
+            video_id = event_data.get('video_id', event_data.get('camera_id', 'unknown'))
+            # Convert timestamps: seconds (float) -> milliseconds (int)
+            start_time = event_data.get('start_timestamp', 0.0)
+            end_time = event_data.get('end_timestamp', 0.0)
+            start_timestamp_ms = seconds_to_milliseconds(start_time)
+            end_timestamp_ms = seconds_to_milliseconds(end_time)
+            # Build schema-compliant event document
+            event_doc = {
+                "event_id": event_id,
+                "video_id": video_id,
+                "start_timestamp_ms": int(start_timestamp_ms),
+                "end_timestamp_ms": int(end_timestamp_ms),
+                "event_type": event_data.get('event_type', 'motion'),
+                "confidence_score": float(event_data.get('confidence', 0.0)),
+                "is_verified": False,
+                "is_false_positive": False,
+                "verified_at": None,
+                "verified_by": None,
+                "visual_embedding": [],
+                "bounding_boxes": event_data.get('bounding_boxes', {})
+            }
+            # Convert numpy types
+            event_doc = convert_numpy_types(event_doc)
+            event_doc = prepare_for_mongodb(event_doc)
+            result = self.collection.insert_one(event_doc)
+            logger.info(f"✅ Saved event: {event_id} ({event_data.get('event_type')})")
+            # If there's additional description info, save to event_description
+            if event_data.get('description') or event_data.get('caption'):
+                self._save_event_description(event_id, event_data)
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"❌ Failed to save event: {e}")
+            raise
+    def save_detection_events(self, video_id: str, detection_groups: List[Dict]) -> List[str]:
+        """Save object detection events with proper schema compliance"""
+        event_ids = []
+        try:
+            for group in detection_groups:
+                # Build bounding_boxes object
+                bboxes = {
+                    "detections": [
+                        {
+                            "class": det.get('class_name', ''),
+                            "confidence": float(det.get('confidence', 0.0)),
+                            "bbox": [float(x) for x in det.get('bbox', [0, 0, 0, 0])],
+                            "timestamp": float(det.get('frame_timestamp', 0.0)),
+                            "model": det.get('detection_model', '')
+                        }
+                        for det in group.get('detections', [])
+                    ]
+                }
+                event_data = {
+                    "video_id": video_id,
+                    "start_timestamp": group.get('start_timestamp', 0.0),
+                    "end_timestamp": group.get('end_timestamp', 0.0),
+                    "event_type": f"object_detection_{group.get('class', 'unknown')}",
+                    "confidence": group.get('max_confidence', 0.0),
+                    "bounding_boxes": bboxes,
+                    "description": f"Detected {len(group.get('detections', []))} {group.get('class', 'object')}(s)"
+                }
+                event_id = self.save_event(event_data)
+                event_ids.append(event_id)
+            logger.info(f"✅ Saved {len(event_ids)} detection events for video {video_id}")
+            return event_ids
+        except Exception as e:
+            logger.error(f"❌ Failed to save detection events: {e}")
+            raise
+    def _save_event_description(self, event_id: str, event_data: Dict):
+        """Save detailed event description to event_description collection.
+        Generates real text embeddings using SentenceTransformer (all-mpnet-base-v2)
+        for compatibility with NLP search in query_retreival.py.
+        """
+        try:
+            import uuid
+            description_text = event_data.get('description') or event_data.get('caption', '')
+            if not description_text:
+                return
+            # Generate real text embedding for NLP search
+            text_embedding = self._generate_text_embedding(description_text)
+            description_doc = {
+                "description_id": str(uuid.uuid4()),
+                "event_id": event_id,
+                "caption": description_text,
+                "text_embedding": text_embedding,
+                "confidence": float(event_data.get('confidence', 0.0)),
+                "created_at": datetime.utcnow(),
+                "updated_at": datetime.utcnow()
+            }
+            description_doc = prepare_for_mongodb(description_doc)
+            self.event_description_collection.insert_one(description_doc)
+            logger.info(f"✅ Saved event description for {event_id} (embedding: {len(text_embedding)}-dim)")
+        except Exception as e:
+            logger.error(f"❌ Failed to save event description: {e}")
+    def _generate_text_embedding(self, text: str) -> list:
+        """Generate text embedding using SentenceTransformer.
+        Lazy-loads the model on first call and caches it as a class attribute.
+        Uses all-mpnet-base-v2 (768-dim) for NLP search compatibility.
+        """
+        # Lazy-load and cache the model at class level
+        if not hasattr(EventRepository, '_embedding_model'):
+            EventRepository._embedding_model = None
+        if EventRepository._embedding_model is None:
+            try:
+                from sentence_transformers import SentenceTransformer
+                EventRepository._embedding_model = SentenceTransformer('all-mpnet-base-v2')
+                logger.info("✅ Loaded SentenceTransformer (all-mpnet-base-v2) for event embeddings")
+            except Exception as e:
+                logger.error(f"Failed to load SentenceTransformer: {e}")
+                return []
+        try:
+            import numpy as np
+            embedding = EventRepository._embedding_model.encode(text, normalize_embeddings=True)
+            return embedding.astype(np.float32).tolist()
+        except Exception as e:
+            logger.error(f"Failed to generate text embedding: {e}")
+            return []
+    def get_events_by_video_id(self, video_id: str, event_type: str = None) -> List[Dict]:
+        """Get events for a video with optional type filtering"""
+        try:
+            query = {"video_id": video_id}
+            if event_type:
+                query["event_type"] = event_type
+            events = list(self.collection.find(query).sort("start_timestamp_ms", 1))
+            # Convert ObjectIds to strings
+            for event in events:
+                event = convert_objectid_to_string(event)
+            return events
+        except Exception as e:
+            logger.error(f"❌ Failed to get events for video {video_id}: {e}")
+            return []
+    def mark_as_false_positive(self, event_id: str):
+        """Mark event as false positive (for deduplication)"""
+        try:
+            self.collection.update_one(
+                {"event_id": event_id},
+                {"$set": {"is_false_positive": True}}
+            )
+            logger.info(f"✅ Marked event {event_id} as false positive")
+        except Exception as e:
+            logger.error(f"❌ Failed to mark event as false positive: {e}")
+# ========================================
+# Report Repository
+# ========================================
+class ReportRepository(BaseRepository):
+    """Repository for report storage and retrieval operations"""
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.reports_bucket = db_manager.config.minio_reports_bucket
+    def upload_report_to_minio(self, local_path: str, video_id: str, filename: str) -> str:
+        """
+        Upload report file to S3 storage
+        Args:
+            local_path: Path to local report file
+            video_id: Video identifier
+            filename: Report filename (e.g., report_20260130_123456.html)
+        Returns:
+            S3 object path
+        """
+        if self.minio is None:
+            logger.warning("S3 storage unavailable — skipping report upload to object storage")
+            return f"local://{local_path}"
+        try:
+            minio_path = f"reports/{video_id}/{filename}"
+            # Determine content type based on file extension
+            content_type = 'text/html' if filename.endswith('.html') else 'application/pdf'
+            with open(local_path, 'rb') as file_data:
+                file_info = os.stat(local_path)
+                self.minio.put_object(
+                    self.reports_bucket,
+                    minio_path,
+                    file_data,
+                    length=file_info.st_size,
+                    content_type=content_type
+                )
+            logger.info(f"✅ Uploaded report to S3: {minio_path}")
+            return minio_path
+        except Exception as e:
+            logger.error(f"❌ Failed to upload report to S3: {e}")
+            raise
+    def get_report_presigned_url(self, video_id: str, filename: str, expires: timedelta = timedelta(hours=24)) -> str:
+        """
+        Generate presigned URL for report access
+        """
+        if self.minio is None:
+            return None
+        try:
+            minio_path = f"reports/{video_id}/{filename}"
+            url = self.minio.presigned_get_object(self.reports_bucket, minio_path, expires=expires)
+            logger.info(f"✅ Generated presigned URL for report: {filename}")
+            return url
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL for report: {e}")
+            return None
+    def list_reports_for_video(self, video_id: str) -> List[Dict[str, Any]]:
+        """
+        List all reports for a video
+        """
+        if self.minio is None:
+            return []
+        try:
+            prefix = f"reports/{video_id}/"
+            objects = self.minio.list_objects(self.reports_bucket, prefix=prefix, recursive=True)
+            reports = []
+            for obj in objects:
+                reports.append({
+                    'filename': obj.object_name.split('/')[-1],
+                    'path': obj.object_name,
+                    'size': obj.size,
+                    'last_modified': obj.last_modified,
+                    'content_type': 'text/html' if obj.object_name.endswith('.html') else 'application/pdf'
+                })
+            logger.info(f"✅ Found {len(reports)} reports for video {video_id}")
+            return reports
+        except Exception as e:
+            logger.error(f"❌ Failed to list reports for video {video_id}: {e}")
+            return []
+# Remove KeyframeRepository - collection doesn't exist in schema
+# Remove ProcessingJobRepository - collection doesn't exist in schema
+# Remove ObjectDetectionRepository - collection doesn't exist in schema
+# Only VideoRepository, EventRepository, and ReportRepository are schema-compliant and remain above

database/repositories_old.py ADDED Viewed

	@@ -0,0 +1,653 @@

+"""
+Repository Classes for DetectifAI Database Operations
+This module provides data access layer for MongoDB and MinIO operations.
+Each repository handles CRUD operations for specific collections.
+"""
+import os
+import io
+from typing import List, Dict, Any, Optional
+from datetime import datetime, timedelta
+from bson import ObjectId
+from pymongo.collection import Collection
+from minio import Minio
+from minio.error import S3Error
+import logging
+from .models import (
+    VideoFileModel, EventModel, EventDescriptionModel, DetectedFaceModel,
+    prepare_for_mongodb, convert_objectid_to_string, convert_numpy_types,
+    seconds_to_milliseconds
+)
+logger = logging.getLogger(__name__)
+class BaseRepository:
+    """Base repository class with common functionality"""
+    def __init__(self, db_manager):
+        self.db = db_manager.db
+        self.minio = db_manager.minio_client
+        self.video_bucket = db_manager.config.minio_video_bucket
+        self.keyframe_bucket = db_manager.config.minio_keyframe_bucket
+class VideoRepository(BaseRepository):
+    """Repository for video_file collection operations"""
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.video_file
+    def create_video_record(self, video_data: Dict) -> str:
+        """Create new video record matching MongoDB schema exactly"""
+        try:
+            # Extract required fields
+            video_id = video_data.get('video_id')
+            user_id = video_data.get('user_id', 'system')
+            file_path = video_data.get('file_path', f"videos/{video_id}.mp4")
+            # Build schema-compliant record
+            record = {
+                "video_id": video_id,
+                "user_id": user_id,
+                "file_path": file_path,
+                "upload_date": datetime.utcnow()
+            }
+            # Add optional schema fields
+            if 'fps' in video_data:
+                record['fps'] = float(video_data['fps'])  # Ensure double type
+            else:
+                record['fps'] = 30.0  # Default
+            if 'duration' in video_data or 'duration_secs' in video_data:
+                duration = video_data.get('duration_secs') or video_data.get('duration', 0)
+                record['duration_secs'] = int(duration)  # Ensure integer
+            if 'file_size' in video_data or 'file_size_bytes' in video_data:
+                file_size = video_data.get('file_size_bytes') or video_data.get('file_size', 0)
+                record['file_size_bytes'] = int(file_size)  # Ensure long
+            if 'codec' in video_data:
+                record['codec'] = str(video_data['codec'])
+            if 'minio_object_key' in video_data:
+                record['minio_object_key'] = video_data['minio_object_key']
+            if 'minio_bucket' in video_data:
+                record['minio_bucket'] = video_data['minio_bucket']
+            # Build meta_data object for extra fields
+            meta_data = {}
+            extra_fields = [
+                'processing_status', 'resolution', 'filename', 'keyframe_count',
+                'event_count', 'compression_applied', 'enhancement_applied',
+                'error_message', 'processing_config'
+            ]
+            for field in extra_fields:
+                if field in video_data:
+                    meta_data[field] = video_data[field]
+            if meta_data:
+                record['meta_data'] = meta_data
+            # Convert numpy types and prepare for MongoDB
+            record = prepare_for_mongodb(record)
+            result = self.collection.insert_one(record)
+            logger.info(f"✅ Created video record: {video_id}")
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"❌ Failed to create video record: {e}")
+            raise
+    def get_video_by_id(self, video_id: str) -> Optional[Dict]:
+        """Get video record by video_id"""
+        try:
+            doc = self.collection.find_one({"video_id": video_id})
+            if doc:
+                return convert_objectid_to_string(doc)
+            return None
+        except Exception as e:
+            logger.error(f"❌ Failed to get video {video_id}: {e}")
+            return None
+    def update_processing_status(self, video_id: str, status: str, metadata: Dict = None):
+        """Update video processing status in meta_data field"""
+        try:
+            # Get current meta_data
+            video = self.collection.find_one({"video_id": video_id})
+            if not video:
+                logger.warning(f"⚠️ Video not found for status update: {video_id}")
+                return
+            current_meta = video.get('meta_data', {})
+            current_meta['processing_status'] = status
+            current_meta['last_updated'] = datetime.utcnow().isoformat()
+            # Add any additional metadata
+            if metadata:
+                current_meta.update(metadata)
+            result = self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": {"meta_data": current_meta}}
+            )
+            if result.matched_count > 0:
+                logger.info(f"✅ Updated video status: {video_id} -> {status}")
+            else:
+                logger.warning(f"⚠️ Video not found for status update: {video_id}")
+        except Exception as e:
+            logger.error(f"❌ Failed to update video status: {e}")
+            raise
+    def update_metadata(self, video_id: str, metadata: Dict):
+        """Update video meta_data field with processing information"""
+        try:
+            # Get current meta_data
+            video = self.collection.find_one({"video_id": video_id})
+            if not video:
+                logger.warning(f"⚠️ Video not found: {video_id}")
+                return
+            current_meta = video.get('meta_data', {})
+            current_meta.update(metadata)
+            result = self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": {"meta_data": current_meta}}
+            )
+            logger.info(f"✅ Updated video metadata: {video_id}")
+        except Exception as e:
+            logger.error(f"❌ Failed to update video metadata: {e}")
+            raise
+    def upload_video_to_minio(self, local_path: str, video_id: str) -> str:
+        """Upload video file to MinIO storage"""
+        try:
+            minio_path = f"original/{video_id}/video.mp4"
+            with open(local_path, 'rb') as file_data:
+                file_info = os.stat(local_path)
+                self.minio.put_object(
+                    self.video_bucket,
+                    minio_path,
+                    file_data,
+                    length=file_info.st_size,
+                    content_type='video/mp4'
+                )
+            logger.info(f"✅ Uploaded video to MinIO: {minio_path}")
+            return minio_path
+        except Exception as e:
+            logger.error(f"❌ Failed to upload video to MinIO: {e}")
+            raise
+    def get_video_presigned_url(self, minio_path: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for video access"""
+        try:
+            return self.minio.presigned_get_object(self.video_bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL: {e}")
+            return None
+class KeyframeRepository(BaseRepository):
+    """Repository for keyframes collection operations"""
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.keyframes
+    def save_keyframes_batch(self, video_id: str, keyframes_data: List[Dict]) -> List[str]:
+        """Save multiple keyframes to MinIO and MongoDB"""
+        keyframe_ids = []
+        try:
+            for i, kf_data in enumerate(keyframes_data):
+                # Extract frame data from keyframe result
+                frame_data = kf_data.frame_data if hasattr(kf_data, 'frame_data') else kf_data
+                # Upload keyframe image to MinIO using correct bucket path structure
+                minio_path = f"{video_id}/frame_{frame_data['frame_number']:06d}.jpg"
+                # Handle both file path and frame data scenarios
+                if 'frame_path' in frame_data:
+                    local_path = frame_data['frame_path']
+                    if os.path.exists(local_path):
+                        with open(local_path, 'rb') as img_file:
+                            file_info = os.stat(local_path)
+                            self.minio.put_object(
+                                self.keyframe_bucket,
+                                minio_path,
+                                img_file,
+                                length=file_info.st_size,
+                                content_type='image/jpeg'
+                            )
+                    else:
+                        logger.warning(f"⚠️ Keyframe file not found: {local_path}")
+                        continue
+                # Create keyframe document
+                keyframe_doc = {
+                    "video_id": video_id,
+                    "frame_number": frame_data.get('frame_number', i),
+                    "timestamp": frame_data.get('timestamp', 0.0),
+                    "quality_score": frame_data.get('quality_score', 0.0),
+                    "motion_score": frame_data.get('motion_score', 0.0),
+                    "minio_path": minio_path,
+                    "enhancement_applied": frame_data.get('enhancement_applied', False),
+                    "face_count": frame_data.get('face_count', 0),
+                    "object_detections": [],
+                    "created_at": datetime.utcnow()
+                }
+                result = self.collection.insert_one(keyframe_doc)
+                keyframe_ids.append(str(result.inserted_id))
+            logger.info(f"✅ Saved {len(keyframe_ids)} keyframes for video {video_id}")
+            return keyframe_ids
+        except Exception as e:
+            logger.error(f"❌ Failed to save keyframes batch: {e}")
+            raise
+    def get_keyframes_by_video_id(self, video_id: str, has_detections: bool = False,
+                                limit: int = None) -> List[Dict]:
+        """Get keyframes for a video with optional filtering"""
+        try:
+            query = {"video_id": video_id}
+            if has_detections:
+                query["object_detections"] = {"$exists": True, "$not": {"$size": 0}}
+            cursor = self.collection.find(query).sort("timestamp", 1)
+            if limit:
+                cursor = cursor.limit(limit)
+            keyframes = list(cursor)
+            # Convert ObjectIds to strings and add presigned URLs
+            for kf in keyframes:
+                kf = convert_objectid_to_string(kf)
+                kf['presigned_url'] = self.minio.presigned_get_object(
+                    self.bucket,
+                    kf['minio_path'],
+                    expires=timedelta(hours=1)
+                )
+            return keyframes
+        except Exception as e:
+            logger.error(f"❌ Failed to get keyframes for video {video_id}: {e}")
+            return []
+    def update_keyframe_detections(self, keyframe_id: str, detections: List[Dict]):
+        """Update keyframe with object detection results"""
+        try:
+            self.collection.update_one(
+                {"_id": ObjectId(keyframe_id)},
+                {"$set": {
+                    "object_detections": detections,
+                    "updated_at": datetime.utcnow()
+                }}
+            )
+            logger.info(f"✅ Updated keyframe {keyframe_id} with {len(detections)} detections")
+        except Exception as e:
+            logger.error(f"❌ Failed to update keyframe detections: {e}")
+class EventRepository(BaseRepository):
+    """Repository for event collection operations - Schema Compliant"""
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.event
+        self.event_description_collection = self.db.event_description
+    def save_event(self, event_data: Dict) -> str:
+        """Save event matching MongoDB schema exactly"""
+        try:
+            import uuid
+            # Extract required fields
+            event_id = event_data.get('event_id', str(uuid.uuid4()))
+            video_id = event_data['video_id']
+            # Convert timestamps: seconds (float) -> milliseconds (int)
+            start_time = event_data.get('start_timestamp', 0.0)
+            end_time = event_data.get('end_timestamp', 0.0)
+            start_timestamp_ms = seconds_to_milliseconds(start_time)
+            end_timestamp_ms = seconds_to_milliseconds(end_time)
+            # Build schema-compliant event document
+            event_doc = {
+                "event_id": event_id,
+                "video_id": video_id,
+                "start_timestamp_ms": int(start_timestamp_ms),
+                "end_timestamp_ms": int(end_timestamp_ms),
+                "event_type": event_data.get('event_type', 'motion'),
+                "confidence_score": float(event_data.get('confidence', 0.0)),
+                "is_verified": False,
+                "is_false_positive": False,
+                "verified_at": None,
+                "verified_by": None,
+                "visual_embedding": [],
+                "bounding_boxes": event_data.get('bounding_boxes', {})
+            }
+            # Convert numpy types
+            event_doc = convert_numpy_types(event_doc)
+            event_doc = prepare_for_mongodb(event_doc)
+            result = self.collection.insert_one(event_doc)
+            logger.info(f"✅ Saved event: {event_id} ({event_data.get('event_type')})")
+            # If there's additional description info, save to event_description
+            if event_data.get('description') or event_data.get('caption'):
+                self._save_event_description(event_id, event_data)
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"❌ Failed to save event: {e}")
+            raise
+    def save_detection_events(self, video_id: str, detection_groups: List[Dict]) -> List[str]:
+        """Save object detection events with proper schema compliance"""
+        event_ids = []
+        try:
+            for group in detection_groups:
+                # Build bounding_boxes object
+                bboxes = {
+                    "detections": [
+                        {
+                            "class": det.get('class_name', ''),
+                            "confidence": float(det.get('confidence', 0.0)),
+                            "bbox": [float(x) for x in det.get('bbox', [0, 0, 0, 0])],
+                            "timestamp": float(det.get('frame_timestamp', 0.0)),
+                            "model": det.get('detection_model', '')
+                        }
+                        for det in group.get('detections', [])
+                    ]
+                }
+                event_data = {
+                    "video_id": video_id,
+                    "start_timestamp": group.get('start_timestamp', 0.0),
+                    "end_timestamp": group.get('end_timestamp', 0.0),
+                    "event_type": f"object_detection_{group.get('class', 'unknown')}",
+                    "confidence": group.get('max_confidence', 0.0),
+                    "bounding_boxes": bboxes,
+                    "description": f"Detected {len(group.get('detections', []))} {group.get('class', 'object')}(s)"
+                }
+                event_id = self.save_event(event_data)
+                event_ids.append(event_id)
+            logger.info(f"✅ Saved {len(event_ids)} detection events for video {video_id}")
+            return event_ids
+        except Exception as e:
+            logger.error(f"❌ Failed to save detection events: {e}")
+            raise
+    def _save_event_description(self, event_id: str, event_data: Dict):
+        """Save detailed event description to event_description collection"""
+        try:
+            import uuid
+            description_text = event_data.get('description') or event_data.get('caption', '')
+            if not description_text:
+                return
+            description_doc = {
+                "description_id": str(uuid.uuid4()),
+                "event_id": event_id,
+                "caption": description_text,
+                "text_embedding": [],  # TODO: Generate embedding in future
+                "confidence": float(event_data.get('confidence', 0.0)),
+                "created_at": datetime.utcnow(),
+                "updated_at": datetime.utcnow()
+            }
+            description_doc = prepare_for_mongodb(description_doc)
+            self.event_description_collection.insert_one(description_doc)
+            logger.info(f"✅ Saved event description for {event_id}")
+        except Exception as e:
+            logger.error(f"❌ Failed to save event description: {e}")
+    def get_events_by_video_id(self, video_id: str, event_type: str = None) -> List[Dict]:
+        """Get events for a video with optional type filtering"""
+        try:
+            query = {"video_id": video_id}
+            if event_type:
+                query["event_type"] = event_type
+            events = list(self.collection.find(query).sort("start_timestamp_ms", 1))
+            # Convert ObjectIds to strings
+            for event in events:
+                event = convert_objectid_to_string(event)
+            return events
+        except Exception as e:
+            logger.error(f"❌ Failed to get events for video {video_id}: {e}")
+            return []
+    def mark_as_false_positive(self, event_id: str):
+        """Mark event as false positive (for deduplication)"""
+        try:
+            self.collection.update_one(
+                {"event_id": event_id},
+                {"$set": {"is_false_positive": True}}
+            )
+            logger.info(f"✅ Marked event {event_id} as false positive")
+        except Exception as e:
+            logger.error(f"❌ Failed to mark event as false positive: {e}")
+# Remove KeyframeRepository - collection doesn't exist in schema
+# Remove ProcessingJobRepository - collection doesn't exist in schema
+# Remove ObjectDetectionRepository - collection doesn't exist in schema
+# Keeping only repositories for schema-defined collections below:
+        event_ids = []
+        try:
+            for event_data in detection_events:
+                # Calculate threat level based on detected objects
+                threat_level = self._calculate_threat_level(event_data.get('object_class', ''))
+                event_doc = {
+                    "video_id": video_id,
+                    "event_type": "object_detection",
+                    "start_timestamp": event_data.get('start_timestamp', 0.0),
+                    "end_timestamp": event_data.get('end_timestamp', 0.0),
+                    "confidence": event_data.get('confidence', 0.0),
+                    "importance_score": event_data.get('importance_score', 0.0),
+                    "threat_level": threat_level,
+                    "object_detections": event_data.get('detections', []),
+                    "keyframe_paths": event_data.get('keyframe_paths', []),
+                    "is_canonical": False,
+                    "created_at": datetime.utcnow()
+                }
+                result = self.collection.insert_one(event_doc)
+                event_ids.append(str(result.inserted_id))
+            logger.info(f"✅ Saved {len(event_ids)} object detection events for video {video_id}")
+            return event_ids
+        except Exception as e:
+            logger.error(f"❌ Failed to save object detection events: {e}")
+            raise
+    def get_events_by_video_id(self, video_id: str, event_type: str = None) -> List[Dict]:
+        """Get events for a video with optional type filtering"""
+        try:
+            query = {"video_id": video_id}
+            if event_type:
+                query["event_type"] = event_type
+            events = list(self.collection.find(query).sort("start_timestamp", 1))
+            # Convert ObjectIds to strings
+            for event in events:
+                event = convert_objectid_to_string(event)
+            return events
+        except Exception as e:
+            logger.error(f"❌ Failed to get events for video {video_id}: {e}")
+            return []
+    def _calculate_threat_level(self, object_class: str) -> str:
+        """Calculate threat level based on detected object class"""
+        threat_map = {
+            'fire': 'critical',
+            'gun': 'critical',
+            'knife': 'high',
+            'smoke': 'medium'
+        }
+        return threat_map.get(object_class.lower(), 'low')
+class ProcessingJobRepository(BaseRepository):
+    """Repository for processing_jobs collection operations"""
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.processing_jobs
+    def create_processing_job(self, video_id: str, job_type: str = "complete_processing") -> str:
+        """Create new processing job record"""
+        try:
+            job_doc = {
+                "video_id": video_id,
+                "job_type": job_type,
+                "status": "queued",
+                "progress": 0,
+                "message": "Processing job queued",
+                "created_at": datetime.utcnow()
+            }
+            result = self.collection.insert_one(job_doc)
+            logger.info(f"✅ Created processing job: {video_id}")
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"❌ Failed to create processing job: {e}")
+            raise
+    def update_job_progress(self, video_id: str, progress: int, message: str, status: str = None):
+        """Update processing job progress and status"""
+        try:
+            update_data = {
+                "progress": progress,
+                "message": message,
+                "updated_at": datetime.utcnow()
+            }
+            if status:
+                update_data["status"] = status
+                if status == "processing" and not self.collection.find_one({"video_id": video_id, "started_at": {"$exists": True}}):
+                    update_data["started_at"] = datetime.utcnow()
+                elif status in ["completed", "failed"]:
+                    update_data["completed_at"] = datetime.utcnow()
+            self.collection.update_one(
+                {"video_id": video_id},
+                {"$set": update_data}
+            )
+        except Exception as e:
+            logger.error(f"❌ Failed to update job progress: {e}")
+    def get_job_status(self, video_id: str) -> Optional[Dict]:
+        """Get processing job status"""
+        try:
+            job = self.collection.find_one({"video_id": video_id})
+            if job:
+                return convert_objectid_to_string(job)
+            return None
+        except Exception as e:
+            logger.error(f"❌ Failed to get job status: {e}")
+            return None
+class ObjectDetectionRepository(BaseRepository):
+    """Repository for object detection results"""
+    def __init__(self, db_manager):
+        super().__init__(db_manager)
+        self.collection = self.db.object_detections
+    def save_detection_batch(self, video_id: str, detections: List[Dict]) -> List[str]:
+        """Save object detection results"""
+        detection_ids = []
+        try:
+            for detection in detections:
+                detection_doc = {
+                    "video_id": video_id,
+                    "keyframe_id": ObjectId(detection.get('keyframe_id')) if detection.get('keyframe_id') else None,
+                    "detection_id": f"{video_id}_{detection.get('frame_number', 0)}_{len(detection_ids)}",
+                    "class_name": detection.get('class_name', ''),
+                    "confidence": detection.get('confidence', 0.0),
+                    "bbox": detection.get('bbox', [0, 0, 0, 0]),
+                    "center_point": detection.get('center_point', [0, 0]),
+                    "area": detection.get('area', 0.0),
+                    "frame_timestamp": detection.get('frame_timestamp', 0.0),
+                    "detection_model": detection.get('detection_model', ''),
+                    "threat_level": self._calculate_threat_level(detection.get('class_name', '')),
+                    "created_at": datetime.utcnow()
+                }
+                result = self.collection.insert_one(detection_doc)
+                detection_ids.append(str(result.inserted_id))
+            logger.info(f"✅ Saved {len(detection_ids)} detection results for video {video_id}")
+            return detection_ids
+        except Exception as e:
+            logger.error(f"❌ Failed to save detection results: {e}")
+            raise
+    def get_detections_by_video_id(self, video_id: str, class_filter: str = None) -> List[Dict]:
+        """Get object detections for a video"""
+        try:
+            query = {"video_id": video_id}
+            if class_filter:
+                query["class_name"] = class_filter
+            detections = list(self.collection.find(query).sort("frame_timestamp", 1))
+            # Convert ObjectIds to strings
+            for detection in detections:
+                detection = convert_objectid_to_string(detection)
+            return detections
+        except Exception as e:
+            logger.error(f"❌ Failed to get detections for video {video_id}: {e}")
+            return []
+    def _calculate_threat_level(self, class_name: str) -> str:
+        """Calculate threat level based on detected object class"""
+        threat_map = {
+            'fire': 'critical',
+            'gun': 'critical',
+            'knife': 'high',
+            'smoke': 'medium'
+        }
+        return threat_map.get(class_name.lower(), 'low')

database/storage_logger.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""
+Storage Logging Configuration for MinIO and Database Operations
+"""
+import logging
+import os
+from datetime import datetime
+def setup_storage_logger():
+    """Configure logger for storage operations"""
+    logger = logging.getLogger('storage_operations')
+    logger.setLevel(logging.DEBUG)
+    # Create logs directory if it doesn't exist
+    logs_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'logs')
+    os.makedirs(logs_dir, exist_ok=True)
+    # File handler for storage operations
+    log_file = os.path.join(logs_dir, f'storage_{datetime.now().strftime("%Y%m%d")}.log')
+    file_handler = logging.FileHandler(log_file)
+    file_handler.setLevel(logging.DEBUG)
+    # Console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    # Create formatter
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    file_handler.setFormatter(formatter)
+    console_handler.setFormatter(formatter)
+    # Add handlers
+    logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+    return logger
+# Initialize logger
+storage_logger = setup_storage_logger()

database/video_compression_service.py ADDED Viewed

	@@ -0,0 +1,379 @@

+"""
+Video Compression and Storage Service for DetectifAI
+This module handles video compression and MinIO storage for compressed videos.
+"""
+import os
+import cv2
+import subprocess
+import logging
+from io import BytesIO
+from typing import Dict, Optional
+from datetime import timedelta
+from minio.error import S3Error
+logger = logging.getLogger(__name__)
+class VideoCompressionService:
+    """Service for compressing videos and storing in S3-compatible storage"""
+    def __init__(self, db_manager, config=None):
+        self._db_manager = db_manager
+        self.bucket = db_manager.config.minio_video_bucket  # Store compressed videos in the videos bucket
+        self.config = config
+        # Default compression settings
+        self.output_resolution = "720p"  # 720p for web delivery
+        self.compression_crf = 23  # 0-51, lower = better quality (23 is default)
+        self.compression_preset = "medium"  # ultrafast to veryslow
+        # Check if FFmpeg is available
+        self.ffmpeg_available = self._check_ffmpeg_available()
+    @property
+    def minio(self):
+        """Lazy access to S3 storage — tolerates unavailable storage"""
+        return self._db_manager.minio_client
+    def _check_ffmpeg_available(self) -> bool:
+        """Check if FFmpeg is available on the system"""
+        try:
+            result = subprocess.run(
+                ['ffmpeg', '-version'],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            return result.returncode == 0
+        except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError):
+            return False
+    def compress_and_store(self, input_path: str, video_id: str) -> Optional[Dict]:
+        """Compress video and store in MinIO and locally"""
+        try:
+            # Create local storage directory
+            local_dir = os.path.join("video_processing_outputs", "compressed", video_id)
+            os.makedirs(local_dir, exist_ok=True)
+            local_path = os.path.join(local_dir, "video.mp4")
+            # Use BytesIO for in-memory compression
+            from io import BytesIO
+            compressed_buffer = BytesIO()
+            # Try FFmpeg first if available, otherwise use OpenCV
+            if self.ffmpeg_available:
+                success = self._compress_with_ffmpeg_to_buffer(input_path, compressed_buffer)
+                if not success:
+                    logger.warning("FFmpeg compression failed, falling back to OpenCV")
+                    compressed_buffer.seek(0)  # Reset buffer position
+                    success = self._compress_with_opencv_to_buffer(input_path, compressed_buffer)
+            else:
+                logger.info("FFmpeg not available, using OpenCV compression")
+                success = self._compress_with_opencv_to_buffer(input_path, compressed_buffer)
+            if not success:
+                logger.error("Both compression methods failed")
+                return None
+            # Get buffer contents
+            compressed_buffer.seek(0)
+            compressed_data = compressed_buffer.getvalue()
+            compressed_size = len(compressed_data)
+            # Save locally
+            with open(local_path, 'wb') as f:
+                f.write(compressed_data)
+            logger.info(f"✅ Video saved locally: {local_path}")
+            # Calculate compression stats
+            original_size = os.path.getsize(input_path)
+            compression_ratio = ((original_size - compressed_size) / original_size) * 100
+            # Upload directly to S3 using consistent path structure (skip if unavailable)
+            minio_path = None
+            if self.minio is not None:
+                try:
+                    minio_path = f"compressed/{video_id}/video.mp4"
+                    compressed_buffer.seek(0)  # Reset buffer for S3 upload
+                    self.minio.put_object(
+                        self.bucket,
+                        minio_path,
+                        compressed_buffer,
+                        length=compressed_size,
+                        content_type='video/mp4'
+                    )
+                except Exception as s3_err:
+                    logger.warning(f"⚠️ S3 upload skipped for compressed video: {s3_err}")
+                    minio_path = None
+            else:
+                logger.info("S3 storage unavailable — compressed video stored locally only")
+            result = {
+                'success': True,
+                'minio_path': minio_path,
+                'local_path': local_path,
+                'original_size': original_size,
+                'compressed_size': compressed_size,
+                'compression_ratio': round(compression_ratio, 2),
+                'output_resolution': self.output_resolution
+            }
+            logger.info(f"✅ Video compressed and stored: {compression_ratio:.1f}% reduction")
+            return result
+        except Exception as e:
+            logger.error(f"❌ Compression and storage failed: {e}")
+            return None
+    def get_compressed_video_presigned_url(self, video_id: str, expires: timedelta = timedelta(hours=1)) -> str:
+        """Generate presigned URL for compressed video access"""
+        if self.minio is None:
+            return None
+        try:
+            minio_path = f"compressed/{video_id}/video.mp4"
+            return self.minio.presigned_get_object(self.bucket, minio_path, expires=expires)
+        except S3Error as e:
+            logger.error(f"❌ Failed to generate presigned URL for compressed video: {e}")
+            return None
+    def _compress_with_ffmpeg(self, input_path: str, output_path: str) -> bool:
+        """Compress video using FFmpeg"""
+        try:
+            # Build FFmpeg command
+            cmd = [
+                'ffmpeg',
+                '-i', input_path,
+                '-c:v', 'libx264',  # H.264 codec
+                '-crf', str(self.compression_crf),
+                '-preset', self.compression_preset,
+                '-movflags', '+faststart',  # Enable web playback
+                '-y'  # Overwrite output file
+            ]
+            # Add resolution scaling if needed
+            if self.output_resolution == "720p":
+                cmd.extend(['-vf', 'scale=1280:720:force_original_aspect_ratio=decrease'])  # Scale to 720p preserving aspect ratio
+            elif self.output_resolution == "480p":
+                cmd.extend(['-vf', 'scale=854:480:force_original_aspect_ratio=decrease'])  # Scale to 480p preserving aspect ratio
+            cmd.append(output_path)
+            # Run FFmpeg
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True
+            )
+            if result.returncode == 0 and os.path.exists(output_path):
+                logger.info("✅ FFmpeg compression successful")
+                return True
+            else:
+                logger.error(f"FFmpeg error: {result.stderr}")
+                return False
+        except Exception as e:
+            logger.error(f"FFmpeg compression failed: {e}")
+            return False
+    def _compress_with_ffmpeg_to_buffer(self, input_path: str, output_buffer: BytesIO) -> bool:
+        """Compress video using FFmpeg with temporary file (more reliable than pipe)"""
+        import tempfile
+        try:
+            # Create temporary file for FFmpeg output
+            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
+                temp_path = temp_file.name
+            # Build FFmpeg command to output to temporary file
+            cmd = [
+                'ffmpeg',
+                '-i', input_path,
+                '-c:v', 'libx264',  # H.264 codec
+                '-crf', str(self.compression_crf),
+                '-preset', self.compression_preset,
+                '-movflags', '+faststart',  # Enable web playback (safe for file output)
+                '-y'  # Overwrite output
+            ]
+            # Add resolution scaling if needed
+            if self.output_resolution == "720p":
+                cmd.extend(['-vf', 'scale=1280:720:force_original_aspect_ratio=decrease'])  # Scale to 720p preserving aspect ratio
+            elif self.output_resolution == "480p":
+                cmd.extend(['-vf', 'scale=854:480:force_original_aspect_ratio=decrease'])  # Scale to 480p preserving aspect ratio
+            # Add output file
+            cmd.append(temp_path)
+            # Run FFmpeg
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=300  # 5 minute timeout
+            )
+            if result.returncode == 0 and os.path.exists(temp_path):
+                # Read temporary file into buffer
+                with open(temp_path, 'rb') as f:
+                    output_buffer.write(f.read())
+                # Clean up temporary file
+                os.unlink(temp_path)
+                logger.info("✅ FFmpeg compression to buffer successful")
+                return True
+            else:
+                # Clean up temporary file on error
+                if os.path.exists(temp_path):
+                    os.unlink(temp_path)
+                logger.error(f"FFmpeg error: {result.stderr}")
+                return False
+        except Exception as e:
+            logger.error(f"FFmpeg compression to buffer failed: {e}")
+            return False
+    def _compress_with_opencv_to_buffer(self, input_path: str, output_buffer: BytesIO) -> bool:
+        """Fallback compression using OpenCV directly to a buffer"""
+        try:
+            # Open input video
+            cap = cv2.VideoCapture(input_path)
+            if not cap.isOpened():
+                logger.error(f"Cannot open input video: {input_path}")
+                return False
+            # Get video properties
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            # Calculate new dimensions
+            if self.output_resolution == "720p":
+                new_height = 720
+                new_width = int((width / height) * new_height)
+            elif self.output_resolution == "480p":
+                new_height = 480
+                new_width = int((width / height) * new_height)
+            else:
+                new_width, new_height = width, height
+            # Create temporary file for OpenCV (required for VideoWriter)
+            import tempfile
+            with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as temp_file:
+                temp_path = temp_file.name
+            # Create video writer with best available codec
+            # Prioritize H.264 (avc1) for browser compatibility
+            codecs_to_try = [
+                ('avc1', 'H.264'),
+                ('h264', 'H.264'),
+                ('X264', 'H.264'),
+                ('mp4v', 'MPEG-4')
+            ]
+            out = None
+            used_codec = None
+            for fourcc_code, name in codecs_to_try:
+                try:
+                    fourcc = cv2.VideoWriter_fourcc(*fourcc_code)
+                    out = cv2.VideoWriter(temp_path, fourcc, fps, (new_width, new_height))
+                    if out.isOpened():
+                        used_codec = name
+                        logger.info(f"✅ Using codec: {name} ({fourcc_code})")
+                        break
+                    out.release()
+                except Exception as e:
+                    logger.debug(f"Codec {fourcc_code} failed: {e}")
+            if not out or not out.isOpened():
+                logger.error("❌ No suitable video codec found")
+                return False
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Resize frame if needed
+                if (new_width, new_height) != (width, height):
+                    frame = cv2.resize(frame, (new_width, new_height))
+                out.write(frame)
+            cap.release()
+            out.release()
+            # Read compressed file into buffer
+            if os.path.exists(temp_path):
+                with open(temp_path, 'rb') as f:
+                    output_buffer.write(f.read())
+                os.unlink(temp_path)  # Delete temporary file
+                logger.info("✅ OpenCV compression to buffer successful")
+                return True
+            else:
+                logger.error("OpenCV compression failed - output file not created")
+                return False
+        except Exception as e:
+            logger.error(f"OpenCV compression to buffer failed: {e}")
+            return False
+    def _compress_with_opencv(self, input_path: str, output_path: str) -> bool:
+        """Fallback compression using OpenCV"""
+        try:
+            # Open input video
+            cap = cv2.VideoCapture(input_path)
+            if not cap.isOpened():
+                logger.error(f"Cannot open input video: {input_path}")
+                return False
+            # Get video properties
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            # Calculate new dimensions
+            if self.output_resolution == "720p":
+                new_height = 720
+                new_width = int((width / height) * new_height)
+            elif self.output_resolution == "480p":
+                new_height = 480
+                new_width = int((width / height) * new_height)
+            else:
+                new_width, new_height = width, height
+            # Create video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            out = cv2.VideoWriter(
+                output_path,
+                fourcc,
+                fps,
+                (new_width, new_height)
+            )
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Resize frame
+                if (new_width, new_height) != (width, height):
+                    frame = cv2.resize(frame, (new_width, new_height))
+                out.write(frame)
+            cap.release()
+            out.release()
+            if os.path.exists(output_path):
+                logger.info("✅ OpenCV compression successful")
+                return True
+            else:
+                logger.error("OpenCV compression failed - output file not created")
+                return False
+        except Exception as e:
+            logger.error(f"OpenCV compression failed: {e}")
+            return False

database_video_service.py ADDED Viewed

	@@ -0,0 +1,1804 @@

+"""
+Database-Integrated Video Processing Service
+This service integrates the existing video processing pipeline with MongoDB and MinIO storage.
+It replaces local file storage with database persistence while maintaining all processing capabilities.
+"""
+import os
+import cv2
+import time
+import threading
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+import logging
+import uuid
+import json
+# Import existing processing components
+from config import VideoProcessingConfig
+from main_pipeline import CompleteVideoProcessingPipeline
+from core.video_processing import OptimizedVideoProcessor
+from object_detection import ObjectDetector
+from behavior_analysis_integrator import BehaviorAnalysisIntegrator
+from event_aggregation import EventDetector
+from video_segmentation import VideoSegmentationEngine
+# Import database components
+from database.config import DatabaseManager
+from database.repositories import VideoRepository, EventRepository
+from database.keyframe_repository import KeyframeRepository
+from database.video_compression_service import VideoCompressionService
+from database.models import (
+    convert_numpy_types,
+    seconds_to_milliseconds,
+    milliseconds_to_seconds,
+    prepare_for_mongodb
+)
+logger = logging.getLogger(__name__)
+class DatabaseIntegratedVideoService:
+    """Enhanced video processing service with database integration"""
+    def __init__(self, config: VideoProcessingConfig = None):
+        """Initialize service with database connections and processing components"""
+        self.config = config or VideoProcessingConfig()
+        # Initialize database connections
+        self.db_manager = DatabaseManager()
+        # Initialize repositories (including keyframe and compression)
+        self.video_repo = VideoRepository(self.db_manager)
+        self.event_repo = EventRepository(self.db_manager)
+        self.keyframe_repo = KeyframeRepository(self.db_manager)
+        self.compression_service = VideoCompressionService(self.db_manager, self.config)
+        # Initialize processing components
+        self.video_processor = OptimizedVideoProcessor(self.config)
+        self.event_detector = EventDetector(self.config)
+        self.segmentation_engine = VideoSegmentationEngine(self.config)
+        # Initialize object detector if enabled
+        self.object_detector = None
+        if self.config.enable_object_detection:
+            try:
+                self.object_detector = ObjectDetector(self.config)
+                logger.info("✅ Object detection enabled")
+            except Exception as e:
+                logger.warning(f"⚠️ Object detection initialization failed: {e}")
+                self.config.enable_object_detection = False
+        # Initialize behavior analyzer if enabled
+        self.behavior_analyzer = None
+        if getattr(self.config, 'enable_behavior_analysis', False):
+            try:
+                self.behavior_analyzer = BehaviorAnalysisIntegrator(self.config)
+                logger.info("✅ Behavior analysis enabled")
+            except Exception as e:
+                logger.warning(f"⚠️ Behavior analysis initialization failed: {e}")
+                self.config.enable_behavior_analysis = False
+        # Initialize video captioning if enabled
+        self.video_captioning = None
+        if getattr(self.config, 'enable_video_captioning', False):
+            try:
+                from video_captioning_integrator import VideoCaptioningIntegrator
+                self.video_captioning = VideoCaptioningIntegrator(self.config, db_manager=self.db_manager)
+                logger.info("✅ Video captioning enabled (MongoDB + FAISS)")
+            except Exception as e:
+                logger.warning(f"⚠️ Video captioning initialization failed: {e}")
+                self.config.enable_video_captioning = False
+        logger.info("✅ Database-integrated video service initialized")
+    def process_video_with_database_storage(self, video_path: str, video_id: str, user_id: str = None):
+        """
+        Main processing pipeline with database integration
+        Args:
+            video_path: Path to uploaded video file
+            video_id: Unique identifier for the video
+            user_id: Optional user identifier
+        """
+        logger.info(f"🚀 Starting database-integrated processing for video: {video_id}")
+        try:
+            # Check if MongoDB record already exists (created during upload)
+            existing_video = self.video_repo.get_video_by_id(video_id)
+            if not existing_video:
+                logger.warning(f"⚠️ Video record not found in MongoDB for {video_id}, creating now...")
+                # Fallback: create record if it doesn't exist
+                video_metadata = self._extract_video_metadata(video_path)
+                video_record = {
+                    "video_id": video_id,
+                    "user_id": user_id or "system",
+                    "file_path": f"videos/{video_id}/video.mp4",
+                    "minio_object_key": f"original/{video_id}/video.mp4",
+                    "minio_bucket": self.video_repo.video_bucket,
+                    "codec": "h264",
+                    "fps": float(video_metadata.get("fps", 30.0)),
+                    "upload_date": datetime.utcnow(),
+                    "duration_secs": int(video_metadata.get("duration", 0)),
+                    "file_size_bytes": int(video_metadata.get("file_size", 0)),
+                    "meta_data": {
+                        "filename": os.path.basename(video_path),
+                        "resolution": video_metadata.get("resolution"),
+                        "processing_status": "processing",
+                        "processing_progress": 0,
+                        "processing_message": "Starting processing..."
+                    }
+                }
+                self.video_repo.create_video_record(video_record)
+            else:
+                logger.info(f"✅ MongoDB record already exists for {video_id}, proceeding with processing...")
+            # Update status: processing started
+            self.video_repo.update_metadata(video_id, {
+                "processing_status": "processing",
+                "processing_progress": 10,
+                "processing_message": "Starting video processing pipeline..."
+            })
+            # Step 1: Extract keyframes and upload to MinIO
+            self.video_repo.update_metadata(video_id, {
+                "processing_progress": 15,
+                "processing_message": "Extracting and uploading keyframes..."
+            })
+            keyframes = self.video_processor.extract_keyframes(video_path)
+            # Process keyframes directly for MinIO upload
+            keyframe_batch = []
+            for kf in keyframes:
+                frame_data = kf.frame_data if hasattr(kf, 'frame_data') else kf
+                # Extract keyframe information consistently
+                keyframe_info = {
+                    'frame_path': frame_data.frame_path if hasattr(frame_data, 'frame_path') else None,
+                    'frame_number': frame_data.frame_number if hasattr(frame_data, 'frame_number') else 0,
+                    'timestamp': frame_data.timestamp if hasattr(frame_data, 'timestamp') else 0.0,
+                    'enhancement_applied': frame_data.enhancement_applied if hasattr(frame_data, 'enhancement_applied') else False
+                }
+                # If we have a numpy frame directly, we might need to save it to a file first
+                if hasattr(frame_data, 'frame') and frame_data.frame is not None:
+                    # Save numpy array to temporary file for upload
+                    import tempfile
+                    import cv2
+                    import numpy as np
+                    with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp_file:
+                        temp_path = temp_file.name
+                        cv2.imwrite(temp_path, cv2.cvtColor(frame_data.frame, cv2.COLOR_RGB2BGR))
+                        keyframe_info['frame_path'] = temp_path
+                keyframe_batch.append(keyframe_info)
+            # Process and upload keyframes to MinIO
+            logger.info(f"Uploading {len(keyframe_batch)} keyframes to MinIO...")
+            keyframe_info = []
+            for idx, kf_info in enumerate(keyframe_batch):
+                frame_path = kf_info.get('frame_path')
+                if frame_path and os.path.exists(frame_path):
+                    try:
+                        # Create MinIO path
+                        frame_number = kf_info.get('frame_number', idx)
+                        timestamp = kf_info.get('timestamp', 0.0)
+                        minio_path = f"{video_id}/keyframes/frame_{frame_number:06d}.jpg"
+                        # Upload to MinIO with metadata
+                        with open(frame_path, 'rb') as f:
+                            file_size = os.path.getsize(frame_path)
+                            metadata = {
+                                "frame_number": str(frame_number),
+                                "timestamp": str(timestamp),
+                                "enhancement_applied": str(kf_info.get('enhancement_applied', False))
+                            }
+                            self.keyframe_repo.minio.put_object(
+                                self.keyframe_repo.bucket,
+                                minio_path,
+                                f,
+                                file_size,
+                                content_type='image/jpeg',
+                                metadata=metadata
+                            )
+                            keyframe_info.append({
+                                "frame_number": frame_number,
+                                "timestamp": timestamp,
+                                "minio_path": minio_path,
+                                "size_bytes": file_size,
+                                "uploaded_at": datetime.utcnow().isoformat()
+                            })
+                    except Exception as e:
+                        logger.error(f"Failed to upload keyframe {frame_path}: {e}")
+                        continue
+                if (idx + 1) % 10 == 0:
+                    logger.info(f"Uploaded {idx + 1}/{len(keyframe_batch)} keyframes")
+            # Step 2: Update MongoDB with keyframe MinIO paths (link metadata)
+            # Store each keyframe's MinIO path in MongoDB metadata
+            keyframe_metadata = []
+            for kf in keyframe_info:
+                keyframe_metadata.append({
+                    "frame_number": kf["frame_number"],
+                    "timestamp": kf["timestamp"],
+                    "minio_path": kf["minio_path"],
+                    "minio_bucket": self.keyframe_repo.bucket,
+                    "size_bytes": kf["size_bytes"],
+                    "uploaded_at": kf["uploaded_at"]
+                })
+            # Update video metadata with keyframe information and MinIO links
+            self.video_repo.update_metadata(video_id, {
+                "keyframe_info": keyframe_metadata,  # Full metadata with MinIO paths
+                "keyframe_count": len(keyframe_info),
+                "keyframe_bucket": self.keyframe_repo.bucket,
+                "keyframes_minio_paths": [kf["minio_path"] for kf in keyframe_info],  # Quick access list
+                "upload_stats": {
+                    "total_frames": len(keyframe_batch),
+                    "uploaded_frames": len(keyframe_info),
+                    "upload_completed": datetime.utcnow().isoformat()
+                }
+            })
+            logger.info(f"✅ Uploaded {len(keyframe_info)} keyframes to MinIO and linked in MongoDB")
+            # Enrich original keyframe objects with MinIO metadata for downstream processing
+            # This ensures video captioning and other modules can access MinIO paths
+            for idx, kf in enumerate(keyframes):
+                if idx < len(keyframe_metadata):
+                    kf_meta = keyframe_metadata[idx]
+                    # Add MinIO metadata to keyframe object
+                    if hasattr(kf, 'frame_data'):
+                        kf.frame_data.minio_path = kf_meta['minio_path']
+                        kf.frame_data.minio_bucket = kf_meta['minio_bucket']
+                    else:
+                        kf.minio_path = kf_meta['minio_path']
+                        kf.minio_bucket = kf_meta['minio_bucket']
+            logger.info(f"✅ Enriched {len(keyframes)} keyframe objects with MinIO metadata")
+            # Step 2: Generate compressed video and upload to MinIO (MOVED UP - Priority for playback)
+            compressed_minio_path = None
+            if self.config.generate_compressed_video:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 20,
+                    "processing_message": "Generating and uploading compressed video..."
+                })
+                logger.info("📦 ===== STARTING VIDEO COMPRESSION (PRIORITY) ===== ")
+                compressed_minio_path = self._generate_compressed_video(video_path, video_id)
+                if compressed_minio_path:
+                    logger.info(f"✅ Compressed video uploaded to MinIO: {compressed_minio_path}")
+                    # Update metadata immediately so video is playable
+                    self.video_repo.update_metadata(video_id, {
+                        "minio_compressed_path": compressed_minio_path
+                    })
+                    self.video_repo.collection.update_one(
+                        {"video_id": video_id},
+                        {"$set": {"meta_data.minio_compressed_path": compressed_minio_path}}
+                    )
+                else:
+                    logger.warning("⚠️ Video compression failed, continuing with other processing")
+            # Step 3: Object detection (if enabled)
+            detection_results = []
+            if self.config.enable_object_detection and self.object_detector:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 40,
+                    "processing_message": "Running object detection..."
+                })
+                detection_results = self._run_object_detection_on_keyframes(
+                    video_id, keyframes
+                )
+            # Step 4: Behavior analysis (if enabled)
+            behavior_results = []
+            behavior_events = []
+            if self.config.enable_behavior_analysis and self.behavior_analyzer:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 55,
+                    "processing_message": "Running behavior analysis (fight/accident/climbing detection)..."
+                })
+                logger.info("🚀 ===== STARTING BEHAVIOR ANALYSIS ===== ")
+                logger.info(f"📹 Processing video: {video_path}")
+                logger.info(f"🔧 Available models: {list(self.behavior_analyzer.models.keys())}")
+                # Pass video_path for 3D-ResNet models (fighting, road_accident) which need 16-frame clips
+                behavior_results, behavior_events = self.behavior_analyzer.process_keyframes_with_behavior_analysis(keyframes, video_path=video_path)
+                # Store behavior detections in keyframes
+                for i, keyframe in enumerate(keyframes):
+                    frame_path = keyframe.frame_data.frame_path if hasattr(keyframe, 'frame_data') else None
+                    timestamp = keyframe.frame_data.timestamp if hasattr(keyframe, 'frame_data') else 0
+                    # Find behavior detections for this frame
+                    frame_behaviors = [r for r in behavior_results if r.frame_path == frame_path and abs(r.timestamp - timestamp) < 0.1]
+                    if frame_behaviors:
+                        for behavior in frame_behaviors:
+                            if not hasattr(keyframe, 'behaviors'):
+                                keyframe.behaviors = []
+                            keyframe.behaviors.append({
+                                "type": behavior.behavior_detected,
+                                "confidence": behavior.confidence,
+                                "model": behavior.model_used,
+                                "timestamp": behavior.timestamp
+                            })
+                logger.info(f"✅ Behavior analysis complete: {len(behavior_results)} detections, {len(behavior_events)} events")
+            # Step 5: Event detection and aggregation
+            self.video_repo.update_metadata(video_id, {
+                "processing_progress": 70,
+                "processing_message": "Detecting and aggregating events..."
+            })
+            # Create events from object detections
+            event_ids = []
+            object_events = []
+            if detection_results:
+                object_events = self._create_object_events_from_detections(detection_results)
+                # Save events using EventRepository
+                for event in object_events:
+                    event['video_id'] = video_id  # Add video_id to event data
+                    event_id = self.event_repo.save_event(event)
+                    event_ids.append(event_id)
+            # Create and save events from behavior analysis
+            if behavior_events:
+                logger.info(f"📅 Creating {len(behavior_events)} behavior-based events...")
+                for behavior_event in behavior_events:
+                    event_dict = {
+                        "video_id": video_id,
+                        "event_type": f"behavior_{behavior_event.behavior_type}",
+                        "start_timestamp": behavior_event.start_timestamp,
+                        "end_timestamp": behavior_event.end_timestamp,
+                        "confidence_score": float(behavior_event.confidence),
+                        "keyframes": behavior_event.keyframes,
+                        "importance_score": float(behavior_event.importance_score),
+                        "description": f"{behavior_event.behavior_type.capitalize()} behavior detected",
+                        "detection_data": {
+                            "model_used": behavior_event.model_used,
+                            "frame_indices": behavior_event.frame_indices,
+                            "behavior_type": behavior_event.behavior_type
+                        }
+                    }
+                    try:
+                        event_id = self.event_repo.save_event(event_dict)
+                        event_ids.append(event_id)
+                        logger.info(f"✅ Saved behavior event: {behavior_event.behavior_type} at {behavior_event.start_timestamp:.1f}s")
+                    except Exception as e:
+                        logger.error(f"❌ Failed to save behavior event: {e}")
+            # Step 5.5: Run facial recognition on frames with detections (if enabled)
+            face_results = []
+            if self.config.enable_facial_recognition and (detection_results or behavior_results) and event_ids:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 75,
+                    "processing_message": "Running facial recognition on suspicious frames..."
+                })
+                try:
+                    from facial_recognition import FacialRecognitionIntegrated
+                    face_detector = FacialRecognitionIntegrated(self.config)
+                    # Get frames that have detections for facial recognition
+                    frames_with_detections = []
+                    for i, keyframe in enumerate(keyframes):
+                        frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+                        frame_path = (
+                            frame_data.frame_path if hasattr(frame_data, 'frame_path')
+                            else getattr(frame_data, 'path', None)
+                        )
+                        timestamp = (
+                            frame_data.timestamp if hasattr(frame_data, 'timestamp')
+                            else getattr(frame_data, 'timestamp', 0.0)
+                        )
+                        # Check if this frame has object detections
+                        has_object_detection = any(
+                            abs(d['frame_timestamp'] - timestamp) < 0.5
+                            for d in detection_results
+                        )
+                        # Check if this frame has behavior detections
+                        has_behavior_detection = any(
+                            abs(b.timestamp - timestamp) < 0.5 and b.behavior_detected != "no_action"
+                            for b in behavior_results
+                        )
+                        if (has_object_detection or has_behavior_detection) and frame_path and os.path.exists(frame_path):
+                            frames_with_detections.append((frame_path, timestamp))
+                    # Run facial recognition on suspicious frames
+                    for frame_path, timestamp in frames_with_detections:
+                        try:
+                            # Find associated event_id for this timestamp
+                            associated_event_id = None
+                            for event_id, event in zip(event_ids, object_events):
+                                if (event.get('start_timestamp', 0) <= timestamp <=
+                                    event.get('end_timestamp', float('inf'))):
+                                    associated_event_id = event_id
+                                    break
+                            if not associated_event_id and event_ids:
+                                associated_event_id = event_ids[0]  # Fallback to first event
+                            # Detect faces in frame
+                            face_result = face_detector.detect_faces_in_frame(frame_path, timestamp)
+                            # Convert FaceDetectionResult to list of face info dictionaries
+                            if face_result and face_result.faces_detected > 0:
+                                # Extract face information from FaceDetectionResult
+                                for i in range(face_result.faces_detected):
+                                    face_id = face_result.detected_face_ids[i] if face_result.detected_face_ids and i < len(face_result.detected_face_ids) else f"face_{uuid.uuid4().hex[:8]}"
+                                    bounding_box = face_result.face_bounding_boxes[i] if i < len(face_result.face_bounding_boxes) else [0, 0, 0, 0]
+                                    confidence = face_result.face_confidence_scores[i] if i < len(face_result.face_confidence_scores) else 0.0
+                                    matched_person = face_result.matched_persons[i] if face_result.matched_persons and i < len(face_result.matched_persons) else None
+                                    # Construct face_info dictionary
+                                    face_info = {
+                                        'face_id': face_id,
+                                        'bounding_box': bounding_box,
+                                        'confidence': confidence,
+                                        'person_name': matched_person.split('(')[0].strip() if matched_person else None,
+                                        'face_image_path': None  # Will be set if saved
+                                    }
+                                    # Try to get face image path from MongoDB if it was saved
+                                    try:
+                                        faces_collection = self.db_manager.db.detected_faces
+                                        existing_face = faces_collection.find_one({'face_id': face_id})
+                                        if existing_face:
+                                            face_info['face_image_path'] = existing_face.get('face_image_path')
+                                    except:
+                                        pass
+                                    # Get frame number from frame path if possible
+                                    frame_number = 0
+                                    try:
+                                        # Try to extract frame number from frame_path
+                                        import re
+                                        frame_match = re.search(r'frame_(\d+)', frame_path)
+                                        if frame_match:
+                                            frame_number = int(frame_match.group(1))
+                                        else:
+                                            # Estimate from timestamp (assuming 30 fps)
+                                            frame_number = int(timestamp * 30)
+                                    except:
+                                        frame_number = int(timestamp * 30)  # Fallback estimate
+                                    # Process this face_info - Save face to MongoDB detected_faces collection
+                                    # Convert bounding_box array [x1, y1, x2, y2] to bounding_boxes object {x1, y1, x2, y2}
+                                    bounding_box_array = face_info.get('bounding_box', [])
+                                    bounding_boxes_obj = {}
+                                    if isinstance(bounding_box_array, list) and len(bounding_box_array) >= 4:
+                                        bounding_boxes_obj = {
+                                            'x1': int(bounding_box_array[0]),
+                                            'y1': int(bounding_box_array[1]),
+                                            'x2': int(bounding_box_array[2]),
+                                            'y2': int(bounding_box_array[3])
+                                        }
+                                    face_data = {
+                                        'face_id': face_info.get('face_id', f"face_{uuid.uuid4().hex[:8]}"),
+                                        'event_id': associated_event_id or f"event_{uuid.uuid4().hex[:8]}",
+                                        'detected_at': datetime.utcnow(),
+                                        'confidence_score': float(face_info.get('confidence', 0.0)),
+                                        'bounding_box': bounding_box_array,  # Keep array format for backward compatibility
+                                        'bounding_boxes': bounding_boxes_obj,  # Object format required by MongoDB schema
+                                        'person_name': face_info.get('person_name'),
+                                        'person_confidence': None,
+                                        'face_image_path': '',  # Initialize as empty string (schema requires string)
+                                        'minio_object_key': None,
+                                        'minio_bucket': None,
+                                        'frame_number': frame_number,  # Store frame number to link to keyframes
+                                        'timestamp': float(timestamp),  # Store timestamp in seconds to link to keyframes
+                                        'video_id': video_id  # Store video_id for easier querying
+                                    }
+                                    # Upload face image to MinIO if available
+                                    # First try to save face image from the face detection result
+                                    temp_face_path = None
+                                    try:
+                                        # Get face crop from the detection result
+                                        if i < len(face_result.face_bounding_boxes):
+                                            # Load frame and crop face
+                                            import cv2
+                                            frame_img = cv2.imread(frame_path)
+                                            if frame_img is not None:
+                                                box = face_result.face_bounding_boxes[i]
+                                                x1, y1, x2, y2 = box[0], box[1], box[2], box[3]
+                                                # Ensure valid coordinates
+                                                x1, y1 = max(0, x1), max(0, y1)
+                                                x2, y2 = min(frame_img.shape[1], x2), min(frame_img.shape[0], y2)
+                                                if x2 > x1 and y2 > y1:
+                                                    face_crop = frame_img[y1:y2, x1:x2]
+                                                    # Create temp directory if it doesn't exist
+                                                    temp_dir = "temp_faces"
+                                                    os.makedirs(temp_dir, exist_ok=True)
+                                                    # Save face crop temporarily
+                                                    temp_face_path = os.path.join(temp_dir, f"{face_data['face_id']}.jpg")
+                                                    cv2.imwrite(temp_face_path, face_crop)
+                                                    # Verify file was created
+                                                    if os.path.exists(temp_face_path):
+                                                        # Upload to MinIO
+                                                        minio_face_path = f"{video_id}/faces/{face_data['face_id']}.jpg"
+                                                        with open(temp_face_path, 'rb') as f:
+                                                            file_size = os.path.getsize(temp_face_path)
+                                                            self.keyframe_repo.minio.put_object(
+                                                                self.keyframe_repo.bucket,
+                                                                minio_face_path,
+                                                                f,
+                                                                file_size,
+                                                                content_type='image/jpeg'
+                                                            )
+                                                        face_data['minio_object_key'] = minio_face_path
+                                                        face_data['minio_bucket'] = self.keyframe_repo.bucket
+                                                        face_data['face_image_path'] = minio_face_path  # Store MinIO path, not temp path
+                                                        logger.info(f"✅ Uploaded face image to MinIO: {minio_face_path}")
+                                                    else:
+                                                        logger.warning(f"Failed to create temp face file: {temp_face_path}")
+                                                else:
+                                                    logger.warning(f"Invalid bounding box coordinates: ({x1}, {y1}, {x2}, {y2})")
+                                    except Exception as e:
+                                        logger.warning(f"Failed to upload face image to MinIO: {e}")
+                                        import traceback
+                                        logger.debug(traceback.format_exc())
+                                    # Clean up temp file AFTER MongoDB save (not before)
+                                    # Save to MongoDB
+                                    try:
+                                        # Ensure face_image_path is a string (not None) for schema validation
+                                        if not face_data.get('face_image_path'):
+                                            face_data['face_image_path'] = ''  # Empty string is valid
+                                        faces_collection = self.db_manager.db.detected_faces
+                                        faces_collection.insert_one(face_data)
+                                        face_results.append(face_data)
+                                        logger.info(f"✅ Saved face to MongoDB: {face_data['face_id']}")
+                                    except Exception as e:
+                                        logger.error(f"Failed to save face to MongoDB: {e}")
+                                        import traceback
+                                        logger.debug(traceback.format_exc())
+                                        # Still add to results even if MongoDB save fails
+                                        face_results.append(face_data)
+                                    # Clean up temp file AFTER MongoDB save
+                                    if temp_face_path and os.path.exists(temp_face_path):
+                                        try:
+                                            os.remove(temp_face_path)
+                                        except Exception as e:
+                                            logger.warning(f"Failed to remove temp face file: {e}")
+                        except Exception as e:
+                            logger.error(f"Facial recognition error for frame {frame_path}: {e}")
+                            continue
+                    logger.info(f"✅ Facial recognition completed: {len(face_results)} faces detected")
+                    # Update metadata with face count
+                    self.video_repo.update_metadata(video_id, {
+                        "face_count": len(face_results),
+                        "facial_recognition_completed": True
+                    })
+                except ImportError:
+                    logger.warning("Facial recognition module not available")
+                except Exception as e:
+                    logger.error(f"Facial recognition failed: {e}")
+            # Step 6: Video Captioning (MOVED TO END - Last step, won't block other processing)
+            captioning_results = {}
+            if self.config.enable_video_captioning and self.video_captioning:
+                self.video_repo.update_metadata(video_id, {
+                    "processing_progress": 90,
+                    "processing_message": "Generating video captions with AI..."
+                })
+                logger.info("🎬 ===== STARTING VIDEO CAPTIONING (FINAL STEP) ===== ")
+                logger.info(f"📹 Processing {len(keyframes)} keyframes for captioning")
+                try:
+                    captioning_results = self.video_captioning.process_keyframes_with_captioning(
+                        keyframes,
+                        video_id=video_id
+                    )
+                    # Update video metadata with captioning info
+                    self.video_repo.update_metadata(video_id, {
+                        "total_captions": captioning_results.get('total_captions', 0),
+                        "captioning_enabled": captioning_results.get('enabled', False)
+                    })
+                    logger.info(f"✅ Video captioning complete: {captioning_results.get('total_captions', 0)} captions generated")
+                    logger.info(f"💾 Captions saved to MongoDB, embeddings saved to FAISS")
+                except Exception as caption_error:
+                    logger.error(f"❌ Video captioning failed (non-fatal): {caption_error}")
+                    # Don't fail the entire pipeline if captioning fails
+                    captioning_results = {'enabled': True, 'total_captions': 0, 'errors': [str(caption_error)]}
+            # Step 7: Finalize processing
+            final_meta_data = {
+                "processing_status": "completed",
+                "processing_progress": 100,
+                "processing_message": "Processing completed successfully!",
+                "keyframe_count": len(keyframes),
+                "detection_count": len(detection_results),
+                "event_count": len(object_events) if detection_results else 0,
+                "face_count": len(face_results) if 'face_results' in locals() else 0,
+                "caption_count": captioning_results.get('total_captions', 0) if captioning_results else 0,
+                "processed_at": datetime.utcnow().isoformat()
+            }
+            # Compressed video path was already set in Step 2
+            # No need to update again here
+            self.video_repo.update_processing_status(video_id, "completed")
+            self.video_repo.update_metadata(video_id, final_meta_data)
+            logger.info(f"✅ Video processing completed successfully: {video_id}")
+            # Cleanup temporary files
+            self._cleanup_temp_files(video_path, keyframes)
+        except Exception as e:
+            logger.error(f"❌ Video processing failed for {video_id}: {e}")
+            # Update status to failed
+            self.video_repo.update_processing_status(video_id, "failed")
+            self.video_repo.update_metadata(video_id, {
+                "processing_progress": 0,
+                "processing_message": f"Processing failed: {str(e)}",
+                "error_message": str(e),
+                "failed_at": datetime.utcnow().isoformat()
+            })
+            raise
+    def _extract_video_metadata(self, video_path: str) -> Dict:
+        """Extract metadata from video file with schema-compliant field names"""
+        try:
+            cap = cv2.VideoCapture(video_path)
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            duration = frame_count / fps if fps > 0 else 0
+            file_size = os.path.getsize(video_path)
+            cap.release()
+            return {
+                "duration": duration,
+                "fps": float(fps),
+                "resolution": f"{width}x{height}",
+                "file_size": int(file_size),
+                "frame_count": int(frame_count)
+            }
+        except Exception as e:
+            logger.error(f"Failed to extract video metadata: {e}")
+            return {"file_size": os.path.getsize(video_path)}
+    def _run_object_detection_on_keyframes(self, video_id: str, keyframes: List) -> List[Dict]:
+        """Run object detection on extracted keyframes, create annotated frames, and upload to MinIO"""
+        detection_results = []
+        annotated_keyframes_info = []  # Store info about annotated keyframes
+        try:
+            for i, keyframe in enumerate(keyframes):
+                # Get frame data
+                frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+                # Get frame path depending on structure
+                frame_path = (
+                    frame_data.frame_path if hasattr(frame_data, 'frame_path')
+                    else getattr(frame_data, 'path', None)
+                )
+                if frame_path and os.path.exists(frame_path):
+                    # Get timestamp from frame data
+                    timestamp = (
+                        frame_data.timestamp if hasattr(frame_data, 'timestamp')
+                        else getattr(frame_data, 'timestamp', 0.0)
+                    )
+                    frame_number = getattr(frame_data, 'frame_number', i)
+                    # Run detection on this keyframe
+                    detection_result = self.object_detector.detect_objects_in_frame(
+                        frame_path,
+                        timestamp
+                    )
+                    # Process detected objects and create annotated frame if detections exist
+                    annotated_minio_path = None
+                    if detection_result and detection_result.detected_objects:
+                        # Create annotated version of the frame
+                        try:
+                            annotated_path = self.object_detector.annotate_frame_with_detections(
+                                frame_path,
+                                detection_result
+                            )
+                            # Upload annotated frame to MinIO
+                            if annotated_path and os.path.exists(annotated_path):
+                                annotated_minio_path = f"{video_id}/keyframes/annotated/frame_{frame_number:06d}_annotated.jpg"
+                                with open(annotated_path, 'rb') as f:
+                                    file_size = os.path.getsize(annotated_path)
+                                    metadata = {
+                                        "frame_number": str(frame_number),
+                                        "timestamp": str(timestamp),
+                                        "is_annotated": "true",
+                                        "detection_count": str(len(detection_result.detected_objects))
+                                    }
+                                    self.keyframe_repo.minio.put_object(
+                                        self.keyframe_repo.bucket,
+                                        annotated_minio_path,
+                                        f,
+                                        file_size,
+                                        content_type='image/jpeg',
+                                        metadata=metadata
+                                    )
+                                annotated_keyframes_info.append({
+                                    "frame_number": frame_number,
+                                    "timestamp": timestamp,
+                                    "minio_path": annotated_minio_path,
+                                    "original_minio_path": f"{video_id}/keyframes/frame_{frame_number:06d}.jpg",
+                                    "detection_count": len(detection_result.detected_objects),
+                                    "objects": [obj.class_name for obj in detection_result.detected_objects],
+                                    "confidence_avg": sum(obj.confidence for obj in detection_result.detected_objects) / len(detection_result.detected_objects) if detection_result.detected_objects else 0.0
+                                })
+                                logger.info(f"✅ Uploaded annotated keyframe to MinIO: {annotated_minio_path}")
+                        except Exception as e:
+                            logger.warning(f"Failed to create/upload annotated keyframe: {e}")
+                    # Process detected objects for detection_results
+                    if detection_result and detection_result.detected_objects:
+                        for obj in detection_result.detected_objects:
+                            detection_data = {
+                                "frame_number": frame_number,
+                                "class_name": str(obj.class_name),
+                                "confidence": float(obj.confidence),
+                                "bbox": [int(x) for x in obj.bbox[:4]],  # Convert to list of ints
+                                "center_point": [float(x) for x in obj.center_point],
+                                "area": float(obj.area),
+                                "frame_timestamp": float(obj.frame_timestamp),
+                                "detection_model": str(obj.detection_model),
+                                "annotated_minio_path": annotated_minio_path  # Link to annotated frame
+                            }
+                            # Apply numpy type conversion
+                            detection_data = convert_numpy_types(detection_data)
+                            detection_results.append(detection_data)
+            # Store annotated keyframes info in MongoDB metadata
+            if annotated_keyframes_info:
+                self.video_repo.update_metadata(video_id, {
+                    "annotated_keyframes_info": annotated_keyframes_info,
+                    "annotated_keyframes_count": len(annotated_keyframes_info)
+                })
+                logger.info(f"✅ Stored {len(annotated_keyframes_info)} annotated keyframes metadata")
+            logger.info(f"✅ Object detection completed: {len(detection_results)} detections")
+            return detection_results
+        except Exception as e:
+            logger.error(f"Object detection failed: {e}")
+            import traceback
+            logger.debug(traceback.format_exc())
+            return []
+    def _create_object_events_from_detections(self, detection_results: List[Dict]) -> List[Dict]:
+        """Convert object detections into aggregated schema-compliant events"""
+        events = []
+        try:
+            # Group detections by class and temporal proximity
+            detection_groups = self._group_detections_by_class_and_time(detection_results)
+            for class_name, detections in detection_groups.items():
+                if not detections:
+                    continue
+                # Create event from detection group
+                start_time_secs = min(d['frame_timestamp'] for d in detections)
+                end_time_secs = max(d['frame_timestamp'] for d in detections)
+                avg_confidence = sum(d['confidence'] for d in detections) / len(detections)
+                # Calculate importance score based on threat level and confidence
+                threat_multiplier = {'fire': 3.0, 'gun': 3.0, 'knife': 2.0, 'smoke': 1.5}.get(class_name, 1.0)
+                importance_score = avg_confidence * threat_multiplier
+                # Create schema-compliant event structure
+                event = {
+                    "event_type": f"object_detection_{class_name}",
+                    "start_timestamp": start_time_secs,
+                    "end_timestamp": end_time_secs,
+                    "confidence_score": avg_confidence,
+                    "importance_score": importance_score,
+                    "bounding_boxes": [
+                        {
+                            "x": d['bbox'][0],
+                            "y": d['bbox'][1],
+                            "width": d['bbox'][2] - d['bbox'][0],
+                            "height": d['bbox'][3] - d['bbox'][1],
+                            "confidence": d['confidence'],
+                            "class_name": d['class_name']
+                        }
+                        for d in detections
+                    ],
+                    "detected_object_type": class_name,
+                    "detection_count": len(detections),
+                    "threat_level": self._calculate_threat_level(class_name, avg_confidence)
+                }
+                events.append(event)
+            return events
+        except Exception as e:
+            logger.error(f"Failed to create object events: {e}")
+            return []
+    def _calculate_threat_level(self, class_name: str, confidence: float) -> str:
+        """Calculate threat level based on object class and confidence"""
+        if class_name in ['fire', 'gun'] and confidence > 0.7:
+            return 'critical'
+        elif class_name in ['fire', 'gun', 'knife'] and confidence > 0.5:
+            return 'high'
+        elif class_name in ['smoke', 'knife']:
+            return 'medium'
+        else:
+            return 'low'
+    def _group_detections_by_class_and_time(self, detections: List[Dict], time_window: float = 5.0) -> Dict[str, List[Dict]]:
+        """Group detections by object class and temporal proximity"""
+        grouped = {}
+        # Sort detections by timestamp
+        sorted_detections = sorted(detections, key=lambda x: x['frame_timestamp'])
+        for detection in sorted_detections:
+            class_name = detection['class_name']
+            if class_name not in grouped:
+                grouped[class_name] = []
+            grouped[class_name].append(detection)
+        return grouped
+    def _generate_compressed_video(self, video_path: str, video_id: str) -> Optional[str]:
+        """Generate compressed version of video and upload to MinIO"""
+        try:
+            # Use compression service to compress and store video
+            result = self.compression_service.compress_and_store(video_path, video_id)
+            if result and result.get('success'):
+                compression_info = {
+                    'original_size_bytes': result['original_size'],
+                    'compressed_size_bytes': result['compressed_size'],
+                    'compression_ratio': result['compression_ratio'],
+                    'output_resolution': result['output_resolution'],
+                    'local_path': result.get('local_path'),  # Store local path for fallback
+                    'minio_path': result.get('minio_path')  # Store MinIO path
+                }
+                # Update video metadata with compression info (including local path)
+                self.video_repo.update_metadata(video_id, {
+                    'compression_info': compression_info,
+                    'minio_compressed_path': result.get('minio_path')  # Also store at top level for easy access
+                })
+                logger.info(f"✅ Stored compression info with local path: {result.get('local_path')}")
+                return result['minio_path']
+            else:
+                logger.error("Video compression failed")
+                return None
+        except Exception as e:
+            logger.error(f"❌ Failed to generate compressed video: {e}")
+            return None
+    def _cleanup_temp_files(self, video_path: str, keyframes: List):
+        """Clean up temporary files after processing"""
+        try:
+            # Remove uploaded video file
+            if os.path.exists(video_path):
+                os.remove(video_path)
+            # Remove temporary keyframe files
+            for keyframe in keyframes:
+                frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+                # Get frame path depending on structure
+                frame_path = (
+                    frame_data.frame_path if hasattr(frame_data, 'frame_path')
+                    else getattr(frame_data, 'path', None)
+                )
+                if frame_path and os.path.exists(frame_path):
+                    os.remove(frame_path)
+            logger.info("✅ Temporary files cleaned up")
+        except Exception as e:
+            logger.error(f"⚠️ Failed to cleanup temp files: {e}")
+    def get_video_status(self, video_id: str) -> Dict:
+        """Get processing status for a video"""
+        video = self.video_repo.get_video_by_id(video_id)
+        if not video:
+            return {"error": "Video not found"}
+        meta_data = video.get("meta_data", {})
+        status_data = {
+            "video_id": video_id,
+            "status": meta_data.get("processing_status", "unknown"),
+            "filename": meta_data.get("filename"),
+            "upload_date": video.get("upload_date"),
+            "duration": video.get("duration_secs"),
+            "fps": video.get("fps"),
+            "file_size_bytes": video.get("file_size_bytes"),
+            "resolution": meta_data.get("resolution"),
+            "keyframe_count": meta_data.get("keyframe_count", 0),
+            "detection_count": meta_data.get("detection_count", 0),
+            "event_count": meta_data.get("event_count", 0),
+            "processing_progress": meta_data.get("processing_progress", 0),
+            "processing_message": meta_data.get("processing_message", "")
+        }
+        # Add presigned URLs for accessing content
+        try:
+            # Original video URL
+            minio_original_path = meta_data.get("minio_original_path")
+            if minio_original_path:
+                status_data["original_video_url"] = self.video_repo.get_video_presigned_url(minio_original_path)
+            # Compressed video URL (if available)
+            minio_compressed_path = meta_data.get("minio_compressed_path")
+            if minio_compressed_path:
+                # Always use the API endpoint which will handle MinIO/local fallback
+                status_data["compressed_video_url"] = f"/api/video/compressed/{video_id}"
+                # Also try to get presigned URL as alternative
+                try:
+                    presigned_url = self.compression_service.get_compressed_video_presigned_url(video_id)
+                    if presigned_url:
+                        status_data["compressed_video_presigned_url"] = presigned_url
+                except:
+                    pass
+            else:
+                # Check if compression was completed but path not set
+                if meta_data.get("processing_status") == "completed":
+                    # Try to construct path and use API endpoint
+                    status_data["compressed_video_url"] = f"/api/video/compressed/{video_id}"
+            # Keyframes URLs (if available)
+            if meta_data.get("keyframe_count", 0) > 0:
+                try:
+                    keyframes_urls = self.keyframe_repo.get_video_keyframes_presigned_urls(video_id)
+                    # If no URLs from MinIO, try to get from MongoDB metadata
+                    if not keyframes_urls and meta_data.get("keyframe_info"):
+                        # Generate URLs from stored metadata
+                        keyframes_urls = []
+                        for kf_info in meta_data.get("keyframe_info", []):
+                            minio_path = kf_info.get("minio_path")
+                            if minio_path:
+                                presigned_url = self.keyframe_repo.get_keyframe_presigned_url(minio_path)
+                                # Also provide API endpoint URL
+                                api_url = f"/api/minio/image/{self.keyframe_repo.bucket}/{minio_path}"
+                                if presigned_url:
+                                    keyframes_urls.append({
+                                        'frame_number': kf_info.get("frame_number", 0),
+                                        'timestamp': kf_info.get("timestamp", 0.0),
+                                        'minio_path': minio_path,
+                                        'presigned_url': presigned_url,
+                                        'url': api_url,  # Use API endpoint for better reliability
+                                        'api_url': api_url,
+                                        'filename': minio_path.split('/')[-1]
+                                    })
+                    status_data["keyframes_urls"] = keyframes_urls
+                except Exception as e:
+                    logger.warning(f"Failed to get keyframes URLs: {e}")
+                    status_data["keyframes_urls"] = []
+        except Exception as e:
+            logger.warning(f"Failed to generate presigned URLs for video {video_id}: {e}")
+        return status_data
+    def get_video_keyframes(self, video_id: str, filter_detections: bool = False, limit: int = None) -> Dict:
+        """Get keyframes for a video with optional filtering and presigned URLs"""
+        try:
+            # Get video record to check if it exists
+            video = self.video_repo.get_video_by_id(video_id)
+            if not video:
+                return {"error": "Video not found"}
+            # Get keyframes with presigned URLs from keyframe repository
+            keyframes_urls = self.keyframe_repo.get_video_keyframes_presigned_urls(video_id)
+            # Fallback: If no keyframes from MinIO, try to get from MongoDB metadata
+            if not keyframes_urls:
+                meta_data = video.get("meta_data", {})
+                keyframe_info = meta_data.get("keyframe_info", [])
+                if keyframe_info:
+                    logger.info(f"Using MongoDB metadata for keyframes: {len(keyframe_info)} keyframes")
+                    for kf_info in keyframe_info:
+                        minio_path = kf_info.get("minio_path")
+                        if minio_path:
+                            try:
+                                presigned_url = self.keyframe_repo.get_keyframe_presigned_url(minio_path)
+                                if presigned_url:
+                                    keyframes_urls.append({
+                                        'frame_number': kf_info.get("frame_number", 0),
+                                        'timestamp': kf_info.get("timestamp", 0.0),
+                                        'minio_path': minio_path,
+                                        'presigned_url': presigned_url,
+                                        'url': presigned_url,
+                                        'filename': minio_path.split('/')[-1]
+                                    })
+                            except Exception as e:
+                                logger.warning(f"Failed to generate presigned URL for {minio_path}: {e}")
+            # Get events to determine which keyframes have detections
+            events = self.event_repo.get_events_by_video_id(video_id)
+            detection_events = [e for e in events if e.get("event_type", "").startswith("object_detection_")]
+            # Create a map of timestamps that have detections
+            detection_timestamps = set()
+            for event in detection_events:
+                start_ms = event.get("start_timestamp_ms", 0)
+                end_ms = event.get("end_timestamp_ms", 0)
+                # Convert milliseconds to seconds and create range
+                start_sec = start_ms / 1000.0
+                end_sec = end_ms / 1000.0
+                # Add timestamps in 1-second intervals
+                for t in range(int(start_sec), int(end_sec) + 1):
+                    detection_timestamps.add(t)
+            # Get annotated keyframes info from metadata
+            meta_data = video.get("meta_data", {})
+            annotated_keyframes_info = meta_data.get("annotated_keyframes_info", [])
+            annotated_lookup = {kf.get("frame_number"): kf for kf in annotated_keyframes_info}
+            # Get faces for this video to check which keyframes have faces
+            faces_data = self.get_video_faces(video_id)
+            faces = faces_data.get("faces", [])
+            # Create a map of frame_numbers and timestamps that have faces
+            frames_with_faces = set()
+            timestamps_with_faces = set()
+            for face in faces:
+                face_frame = face.get('frame_number', 0)
+                face_timestamp = face.get('timestamp', 0)
+                if face_frame:
+                    frames_with_faces.add(face_frame)
+                if face_timestamp:
+                    timestamps_with_faces.add(face_timestamp)
+            # Enhance keyframes with detection info and annotated URLs
+            enhanced_keyframes = []
+            for kf in keyframes_urls:
+                timestamp_sec = kf.get('timestamp', 0)
+                frame_number = kf.get('frame_number', 0)
+                # Check if this timestamp has detections (within 1 second tolerance)
+                has_detections = any(abs(timestamp_sec - dt) < 1.0 for dt in detection_timestamps)
+                # Check if this keyframe has faces (by frame_number or timestamp)
+                has_faces = (
+                    frame_number in frames_with_faces or
+                    any(abs(timestamp_sec - ft) < 0.5 for ft in timestamps_with_faces)
+                )
+                enhanced_kf = {
+                    **kf,
+                    'has_detections': has_detections,
+                    'has_faces': has_faces,  # Add face detection flag
+                    'url': kf.get('presigned_url'),  # Add url alias for compatibility
+                }
+                # Add annotated frame info if available
+                if frame_number in annotated_lookup:
+                    annotated_info = annotated_lookup[frame_number]
+                    # Generate presigned URL for annotated frame
+                    try:
+                        annotated_presigned_url = self.keyframe_repo.get_keyframe_presigned_url(
+                            annotated_info.get("minio_path")
+                        )
+                        if annotated_presigned_url:
+                            enhanced_kf['annotated_url'] = annotated_presigned_url
+                            enhanced_kf['annotated_presigned_url'] = annotated_presigned_url
+                            enhanced_kf['detection_count'] = annotated_info.get("detection_count", 0)
+                            enhanced_kf['objects'] = annotated_info.get("objects", [])
+                            enhanced_kf['confidence_avg'] = annotated_info.get("confidence_avg", 0.0)
+                            enhanced_kf['has_detections'] = True  # Override if annotated frame exists
+                    except Exception as e:
+                        logger.warning(f"Failed to get presigned URL for annotated keyframe: {e}")
+                # If this keyframe has faces, prioritize showing "Face Detected" over object names
+                if has_faces:
+                    # Count faces for this keyframe
+                    face_count = sum(
+                        1 for face in faces
+                        if (face.get('frame_number') == frame_number or
+                            abs(face.get('timestamp', 0) - timestamp_sec) < 0.5)
+                    )
+                    enhanced_kf['face_count'] = face_count
+                    # Add "Face Detected" to objects list if not already present, and prioritize it
+                    if enhanced_kf.get('objects'):
+                        # Check if "Face" is already in objects
+                        has_face_in_objects = any('face' in str(obj).lower() for obj in enhanced_kf['objects'])
+                        if not has_face_in_objects:
+                            # Add "Face Detected" at the beginning
+                            enhanced_kf['objects'] = ['Face Detected'] + enhanced_kf['objects']
+                        else:
+                            # Move "Face Detected" to front, remove duplicates
+                            face_objects = [obj for obj in enhanced_kf['objects'] if 'face' in str(obj).lower()]
+                            other_objects = [obj for obj in enhanced_kf['objects'] if 'face' not in str(obj).lower()]
+                            enhanced_kf['objects'] = ['Face Detected'] + other_objects
+                    else:
+                        enhanced_kf['objects'] = ['Face Detected']
+                    # Update detection count to include faces
+                    enhanced_kf['detection_count'] = enhanced_kf.get('detection_count', 0) + face_count
+                enhanced_keyframes.append(enhanced_kf)
+            # Apply filtering if requested
+            if filter_detections:
+                filtered_keyframes = [kf for kf in enhanced_keyframes if kf.get('has_detections', False)]
+            else:
+                filtered_keyframes = enhanced_keyframes
+            # Apply limit if specified
+            if limit and limit > 0:
+                filtered_keyframes = filtered_keyframes[:limit]
+            # Get video metadata for additional context
+            meta_data = video.get("meta_data", {})
+            keyframe_count = meta_data.get("keyframe_count", 0)
+            return {
+                "video_id": video_id,
+                "keyframes": filtered_keyframes,
+                "total_keyframes": len(filtered_keyframes),
+                "filter_applied": filter_detections,
+                "limit_applied": limit if limit and limit > 0 else None,
+                "keyframe_count": keyframe_count
+            }
+        except Exception as e:
+            logger.error(f"Failed to get keyframes for video {video_id}: {e}")
+            return {"error": str(e)}
+    def get_video_events(self, video_id: str, event_type: str = None) -> Dict:
+        """Get events for a video"""
+        events = self.event_repo.get_events_by_video_id(video_id)
+        # Filter by event type if specified
+        if event_type:
+            events = [e for e in events if e.get("event_type") == event_type]
+        return {
+            "video_id": video_id,
+            "events": events,
+            "total_events": len(events)
+        }
+    def get_video_detections(self, video_id: str, class_filter: str = None) -> Dict:
+        """Get object detections for a video from events"""
+        try:
+            # Get all events for this video
+            events = self.event_repo.get_events_by_video_id(video_id)
+            # Filter events that are object detection events
+            detection_events = [e for e in events if e.get("event_type", "").startswith("object_detection_")]
+            # Apply class filter if specified
+            if class_filter:
+                detection_events = [e for e in detection_events if e.get("event_type") == f"object_detection_{class_filter}"]
+            # Extract detections from bounding_boxes
+            detections = []
+            for event in detection_events:
+                bboxes = event.get("bounding_boxes", {})
+                # Handle different bounding_boxes structures
+                event_detections = []
+                if isinstance(bboxes, dict):
+                    event_detections = bboxes.get("detections", [])
+                elif isinstance(bboxes, list):
+                    # If bounding_boxes is a list directly
+                    event_detections = bboxes
+                # Also check if detections are stored directly in event
+                if not event_detections:
+                    event_detections = event.get("detections", [])
+                for det in event_detections:
+                    # Handle both dict and list formats
+                    if isinstance(det, dict):
+                        detection = {
+                            "class_name": det.get("class", det.get("class_name", "unknown")),
+                            "confidence": float(det.get("confidence", 0.0)),
+                            "bbox": det.get("bbox", [0, 0, 0, 0]),
+                            "timestamp": float(det.get("timestamp", event.get("start_timestamp_ms", 0) / 1000.0)),
+                            "event_id": event.get("event_id"),
+                            "model": det.get("model", "unknown")
+                        }
+                        detections.append(detection)
+                    elif isinstance(det, list) and len(det) >= 4:
+                        # Handle list format [x, y, width, height, class, confidence]
+                        detection = {
+                            "class_name": str(det[4]) if len(det) > 4 else "unknown",
+                            "confidence": float(det[5]) if len(det) > 5 else 0.0,
+                            "bbox": [int(det[0]), int(det[1]), int(det[0] + det[2]), int(det[1] + det[3])] if len(det) >= 4 else [0, 0, 0, 0],
+                            "timestamp": float(event.get("start_timestamp_ms", 0) / 1000.0),
+                            "event_id": event.get("event_id"),
+                            "model": "unknown"
+                        }
+                        detections.append(detection)
+                # Also extract from event_type if no detections found
+                if not detections and event.get("event_type"):
+                    event_type = event.get("event_type", "")
+                    if event_type.startswith("object_detection_"):
+                        class_name = event_type.replace("object_detection_", "")
+                        detection = {
+                            "class_name": class_name,
+                            "confidence": float(event.get("confidence_score", 0.0)),
+                            "bbox": [0, 0, 0, 0],  # No bbox info available
+                            "timestamp": float(event.get("start_timestamp_ms", 0) / 1000.0),
+                            "event_id": event.get("event_id"),
+                            "model": "unknown"
+                        }
+                        detections.append(detection)
+            return {
+                "video_id": video_id,
+                "detections": detections,
+                "total_detections": len(detections)
+            }
+        except Exception as e:
+            logger.error(f"Failed to get detections for video {video_id}: {e}")
+            return {
+                "video_id": video_id,
+                "detections": [],
+                "total_detections": 0,
+                "error": str(e)
+            }
+    def get_video_faces(self, video_id: str) -> Dict:
+        """Get detected faces for a video (through events)"""
+        try:
+            # Get all events for this video
+            events = self.event_repo.get_events_by_video_id(video_id)
+            event_ids = [e.get('event_id') for e in events if e.get('event_id')]
+            if not event_ids:
+                return {
+                    "video_id": video_id,
+                    "faces": [],
+                    "total_faces": 0
+                }
+            # Query detected_faces collection for faces associated with these events
+            faces_collection = self.db_manager.db.detected_faces
+            faces = list(faces_collection.find({"event_id": {"$in": event_ids}}))
+            # Convert ObjectIds to strings
+            from database.models import convert_objectid_to_string
+            faces = [convert_objectid_to_string(face) for face in faces]
+            return {
+                "video_id": video_id,
+                "faces": faces,
+                "total_faces": len(faces)
+            }
+        except Exception as e:
+            logger.error(f"Failed to get faces for video {video_id}: {e}")
+            return {
+                "video_id": video_id,
+                "faces": [],
+                "total_faces": 0,
+                "error": str(e)
+            }
+    def process_video_complete(self, video_path: str, video_id: str, user_id: str = None,
+                             upload_to_minio: bool = True, enable_compression: bool = True,
+                             enable_object_detection: bool = True, enable_behavior_analysis: bool = True,
+                             enable_event_aggregation: bool = True,
+                             enable_deduplication: bool = True) -> Dict:
+        """
+        Complete video processing pipeline with all features
+        Args:
+            video_path: Path to the video file
+            video_id: Unique identifier for the video
+            user_id: User identifier
+            upload_to_minio: Whether to upload to MinIO storage
+            enable_compression: Whether to compress the video
+            enable_object_detection: Whether to run object detection
+            enable_event_aggregation: Whether to aggregate events
+            enable_deduplication: Whether to deduplicate similar events
+        Returns:
+            Dict with processing results and statistics
+        """
+        logger.info(f"🔥 Starting complete pipeline processing for {video_id}")
+        start_time = time.time()
+        results = {
+            "video_id": video_id,
+            "status": "processing",
+            "minio_uploaded": False,
+            "processing_stats": {}
+        }
+        try:
+            # Step 1: Create video record with metadata
+            logger.info("📝 Creating video record...")
+            video_metadata = self._extract_video_metadata(video_path)
+            # Create schema-compliant video record
+            video_record = {
+                "video_id": video_id,
+                "user_id": user_id or "system",
+                "file_path": f"videos/{video_id}.mp4",
+                "fps": video_metadata.get("fps", 30.0),
+                "duration_secs": int(video_metadata.get("duration", 0)),
+                "file_size_bytes": video_metadata.get("file_size", 0),
+                "codec": "h264",  # default codec
+                "meta_data": {
+                    "processing_status": "processing",
+                    "filename": os.path.basename(video_path),
+                    "resolution": video_metadata.get("resolution"),
+                    "frame_count": video_metadata.get("frame_count")
+                }
+            }
+            video_doc_id = self.video_repo.create_video_record(video_record)
+            logger.info(f"✅ Created video record: {video_id}")
+            # Step 2: Upload to MinIO (if enabled and available)
+            minio_uploaded = False
+            if upload_to_minio:
+                try:
+                    logger.info("☁️ Uploading to MinIO...")
+                    minio_path = self.video_repo.upload_video_to_minio(video_path, video_id)
+                    minio_uploaded = True
+                    self.video_repo.update_metadata(video_id, {"minio_original_path": minio_path})
+                    logger.info(f"✅ Video uploaded to MinIO: {minio_path}")
+                except Exception as e:
+                    logger.warning(f"⚠️ MinIO upload failed (graceful fallback): {e}")
+            results["minio_uploaded"] = minio_uploaded
+            # Step 3: Process keyframes with object detection
+            logger.info("🔑 Processing keyframes...")
+            keyframes = self.video_processor.extract_keyframes(video_path)
+            logger.info(f"✅ Extracted {len(keyframes)} keyframes")
+            # Run object detection on keyframes if enabled
+            detection_results = []
+            if enable_object_detection and self.object_detector:
+                logger.info("🎯 Running object detection...")
+                for i, keyframe in enumerate(keyframes):
+                    # Handle KeyframeResult objects correctly
+                    frame_path = keyframe.frame_data.frame_path if hasattr(keyframe, 'frame_data') else None
+                    timestamp = keyframe.frame_data.timestamp if hasattr(keyframe, 'frame_data') else 0
+                    if frame_path and os.path.exists(frame_path):
+                        result = self.object_detector.detect_objects_in_frame(frame_path, timestamp)
+                        detections = []
+                        if result and result.detected_objects:
+                            for obj in result.detected_objects:
+                                detection_dict = {
+                                    "class_name": str(obj.class_name),
+                                    "confidence": float(obj.confidence),
+                                    "bbox": [int(x) for x in obj.bbox[:4]],
+                                    "frame_timestamp": float(timestamp),
+                                    "annotated_path": getattr(obj, 'annotated_path', None)
+                                }
+                                # Apply numpy type conversion
+                                detection_dict = convert_numpy_types(detection_dict)
+                                detections.append(detection_dict)
+                        # Store detections in keyframe (add as attribute)
+                        keyframe.object_detections = detections
+                        detection_results.extend(detections)
+                        # Log fire detections specifically
+                        fire_detections = [d for d in detections if d.get('class_name') == 'fire']
+                        if fire_detections:
+                            logger.info(f"🔥 Fire detected at {timestamp:.1f}s (confidence: {fire_detections[0].get('confidence', 0):.2f})")
+                logger.info(f"✅ Found {len(detection_results)} object detections")
+            # Step 3b: Run behavior analysis on keyframes if enabled
+            behavior_results = []
+            behavior_events = []
+            if enable_behavior_analysis and self.behavior_analyzer:
+                logger.info("🔍 Running behavior analysis...")
+                # Pass video_path for 3D-ResNet models (fighting, road_accident) which need 16-frame clips
+                behavior_results, behavior_events = self.behavior_analyzer.process_keyframes_with_behavior_analysis(keyframes, video_path=video_path)
+                # Store behavior detections in keyframes
+                for i, keyframe in enumerate(keyframes):
+                    frame_path = keyframe.frame_data.frame_path if hasattr(keyframe, 'frame_data') else None
+                    timestamp = keyframe.frame_data.timestamp if hasattr(keyframe, 'frame_data') else 0
+                    # Find behavior detections for this frame
+                    frame_behaviors = [r for r in behavior_results if r.frame_path == frame_path and abs(r.timestamp - timestamp) < 0.1]
+                    if frame_behaviors:
+                        behavior_detections = []
+                        for behavior in frame_behaviors:
+                            behavior_dict = {
+                                "behavior_type": behavior.behavior_detected,
+                                "confidence": float(behavior.confidence),
+                                "frame_timestamp": float(behavior.timestamp),
+                                "model_used": behavior.model_used
+                            }
+                            behavior_dict = convert_numpy_types(behavior_dict)
+                            behavior_detections.append(behavior_dict)
+                        keyframe.behavior_detections = behavior_detections
+                logger.info(f"✅ Found {len(behavior_results)} behavior detections, {len(behavior_events)} behavior events")
+            # Step 4: Event aggregation and deduplication
+            events = []
+            if enable_event_aggregation:
+                logger.info("📅 Performing event aggregation...")
+                # Group detections by type and time proximity
+                detection_events = self._aggregate_detection_events(keyframes, video_id)
+                events.extend(detection_events)
+                # Add behavior events
+                if behavior_events:
+                    for behavior_event in behavior_events:
+                        event_dict = {
+                            "event_type": f"behavior_{behavior_event.behavior_type}",
+                            "start_timestamp": behavior_event.start_timestamp,
+                            "end_timestamp": behavior_event.end_timestamp,
+                            "confidence_score": float(behavior_event.confidence),
+                            "keyframes": behavior_event.keyframes,
+                            "importance_score": float(behavior_event.importance_score),
+                            "description": f"{behavior_event.behavior_type.capitalize()} detected",
+                            "detection_data": {
+                                "model_used": behavior_event.model_used,
+                                "frame_indices": behavior_event.frame_indices
+                            }
+                        }
+                        event_dict = convert_numpy_types(event_dict)
+                        events.append(event_dict)
+                if enable_deduplication:
+                    logger.info("🔄 Deduplicating similar events...")
+                    events = self._deduplicate_events(events)
+                # Store events in database using EventRepository
+                logger.info(f"💾 Saving {len(events)} events to database...")
+                for event in events:
+                    try:
+                        # EventRepository.save_event expects event dict with proper structure
+                        # It will handle timestamp conversion and field mapping
+                        event['video_id'] = video_id  # Add video_id to event data
+                        self.event_repo.save_event(event)
+                    except Exception as e:
+                        logger.error(f"Failed to save event: {e}")
+                logger.info(f"✅ Stored {len(events)} events in database")
+            # Step 5: Create annotated video with bounding boxes (if detections exist)
+            annotated_video_path = None
+            annotated_minio_path = None
+            if enable_object_detection and detection_results and self.object_detector:
+                try:
+                    logger.info("🎨 Creating annotated video with bounding boxes...")
+                    # Convert keyframes to detection results format for annotation
+                    detection_result_objects = []
+                    for keyframe in keyframes:
+                        if hasattr(keyframe, 'object_detections') and keyframe.object_detections:
+                            # Create ObjectDetectionResult-like object
+                            from object_detection import ObjectDetectionResult, DetectedObject
+                            from core.video_processing import FrameData
+                            detected_objects = []
+                            for det in keyframe.object_detections:
+                                detected_objects.append(DetectedObject(
+                                    class_name=det['class_name'],
+                                    confidence=det['confidence'],
+                                    bbox=det['bbox']
+                                ))
+                            if detected_objects:
+                                frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else None
+                                frame_path = frame_data.frame_path if frame_data else None
+                                timestamp = frame_data.timestamp if frame_data else 0
+                                if frame_path:
+                                    detection_result_objects.append(ObjectDetectionResult(
+                                        frame_path=frame_path,
+                                        timestamp=timestamp,
+                                        detected_objects=detected_objects,
+                                        total_detections=len(detected_objects)
+                                    ))
+                    if detection_result_objects:
+                        # Create annotated video
+                        annotated_video_path = f"video_processing_outputs/annotated/{video_id}_annotated.mp4"
+                        os.makedirs(os.path.dirname(annotated_video_path), exist_ok=True)
+                        annotated_path = self.object_detector.create_annotated_video(
+                            video_path,
+                            detection_result_objects,
+                            annotated_video_path
+                        )
+                        if annotated_path and os.path.exists(annotated_path):
+                            annotated_video_path = annotated_path
+                            # Upload annotated video to MinIO
+                            try:
+                                annotated_minio_path = f"annotated/{video_id}/video_annotated.mp4"
+                                with open(annotated_video_path, 'rb') as file_data:
+                                    file_info = os.stat(annotated_video_path)
+                                    self.video_repo.minio.put_object(
+                                        self.video_repo.video_bucket,
+                                        annotated_minio_path,
+                                        file_data,
+                                        length=file_info.st_size,
+                                        content_type='video/mp4'
+                                    )
+                                logger.info(f"✅ Uploaded annotated video to MinIO: {annotated_minio_path}")
+                                # Update metadata with annotated video path
+                                self.video_repo.update_metadata(video_id, {
+                                    "minio_annotated_path": annotated_minio_path,
+                                    "annotated_video_path": annotated_video_path
+                                })
+                            except Exception as e:
+                                logger.warning(f"⚠️ Failed to upload annotated video to MinIO: {e}")
+                            logger.info(f"✅ Annotated video created: {annotated_video_path}")
+                        else:
+                            logger.warning("⚠️ Annotated video creation returned no path")
+                    else:
+                        logger.info("ℹ️ No detections found, skipping annotated video creation")
+                except Exception as e:
+                    logger.warning(f"⚠️ Annotated video creation failed: {e}")
+                    import traceback
+                    logger.error(traceback.format_exc())
+            # Step 6: Video compression (if enabled)
+            compression_info = {}
+            if enable_compression:
+                try:
+                    logger.info("📦 Compressing video...")
+                    from video_compression import OptimizedVideoCompressor
+                    compressor = OptimizedVideoCompressor()
+                    compressed_path = f"video_processing_outputs/compressed/{video_id}_compressed.mp4"
+                    os.makedirs(os.path.dirname(compressed_path), exist_ok=True)
+                    compression_result = compressor.compress_video(video_path, compressed_path)
+                    if compression_result.get('success'):
+                        original_size = os.path.getsize(video_path) / (1024 * 1024)  # MB
+                        compressed_size = os.path.getsize(compressed_path) / (1024 * 1024)  # MB
+                        compression_ratio = (1 - compressed_size / original_size) * 100 if original_size > 0 else 0
+                        compression_info = {
+                            "original_size_mb": round(original_size, 2),
+                            "compressed_size_mb": round(compressed_size, 2),
+                            "compression_ratio": round(compression_ratio, 1),
+                            "compressed_path": compressed_path
+                        }
+                        self.video_repo.update_metadata(video_id, {"minio_compressed_path": compressed_path})
+                        logger.info(f"✅ Video compressed: {compression_ratio:.1f}% reduction")
+                except Exception as e:
+                    logger.warning(f"⚠️ Video compression failed: {e}")
+            # Step 7: Update final status
+            processing_time = time.time() - start_time
+            final_meta_data = {
+                "processing_status": "completed",
+                "keyframe_count": len(keyframes),
+                "detection_count": len(detection_results),
+                "behavior_detection_count": len(behavior_results),
+                "behavior_event_count": len(behavior_events),
+                "event_count": len(events),
+                "processing_time_seconds": round(processing_time, 2),
+                "processed_at": datetime.utcnow().isoformat(),
+                "compressed_video_info": compression_info,
+                "annotated_video_available": bool(annotated_minio_path),
+                "annotated_video_path": annotated_minio_path
+            }
+            self.video_repo.update_processing_status(video_id, "completed")
+            self.video_repo.update_metadata(video_id, final_meta_data)
+            results.update({
+                "status": "completed",
+                "processing_stats": final_meta_data,
+                "keyframes_extracted": len(keyframes),
+                "objects_detected": len(detection_results),
+                "behaviors_detected": len(behavior_results),
+                "behavior_events": len(behavior_events),
+                "events_created": len(events),
+                "processing_time": processing_time
+            })
+            logger.info(f"🎉 Complete pipeline processing finished for {video_id} in {processing_time:.1f}s")
+            return results
+        except Exception as e:
+            logger.error(f"❌ Processing failed for {video_id}: {e}")
+            # Update status to failed
+            try:
+                self.video_repo.update_processing_status(video_id, "failed")
+                self.video_repo.update_metadata(video_id, {
+                    "error_message": str(e),
+                    "failed_at": datetime.utcnow().isoformat()
+                })
+            except:
+                pass
+            results.update({
+                "status": "failed",
+                "error": str(e)
+            })
+            raise e
+    def _aggregate_detection_events(self, keyframes, video_id):
+        """Aggregate object detections into schema-compliant events"""
+        events = []
+        # Group keyframes with detections by detection type
+        detection_groups = {}
+        for keyframe in keyframes:
+            # Handle KeyframeResult objects
+            detections = getattr(keyframe, 'object_detections', [])
+            frame_data = keyframe.frame_data if hasattr(keyframe, 'frame_data') else keyframe
+            for detection in detections:
+                class_name = detection.get('class_name', 'unknown')
+                if class_name not in detection_groups:
+                    detection_groups[class_name] = []
+                detection_groups[class_name].append({
+                    'keyframe': keyframe,
+                    'detection': detection,
+                    'timestamp': frame_data.timestamp if hasattr(frame_data, 'timestamp') else 0
+                })
+        # Create events for each detection type
+        for class_name, detections in detection_groups.items():
+            if not detections:
+                continue
+            # Sort by timestamp
+            detections.sort(key=lambda x: x['timestamp'])
+            # Group nearby detections into events (within 3 seconds)
+            current_event = None
+            for det_info in detections:
+                timestamp = det_info['timestamp']
+                confidence = det_info['detection'].get('confidence', 0)
+                bbox = det_info['detection'].get('bbox', [0, 0, 0, 0])
+                # Check if this detection belongs to current event
+                if current_event and timestamp - current_event['end_timestamp'] <= 3.0:
+                    # Extend current event
+                    current_event['end_timestamp'] = timestamp
+                    current_event['confidence_score'] = max(current_event['confidence_score'], confidence)
+                    current_event['bounding_boxes'].append({
+                        "x": int(bbox[0]),
+                        "y": int(bbox[1]),
+                        "width": int(bbox[2] - bbox[0]),
+                        "height": int(bbox[3] - bbox[1]),
+                        "confidence": float(confidence),
+                        "class_name": class_name
+                    })
+                else:
+                    # Start new event
+                    if current_event:
+                        events.append(current_event)
+                    threat_level = self._calculate_threat_level(class_name, confidence)
+                    importance_score = 0.9 if class_name == 'fire' else 0.7 if class_name in ['knife', 'gun'] else 0.5
+                    current_event = {
+                        'event_type': f'object_detection_{class_name}',
+                        'start_timestamp': timestamp,
+                        'end_timestamp': timestamp,
+                        'confidence_score': confidence,
+                        'importance_score': importance_score,
+                        'threat_level': threat_level,
+                        'bounding_boxes': [{
+                            "x": int(bbox[0]),
+                            "y": int(bbox[1]),
+                            "width": int(bbox[2] - bbox[0]),
+                            "height": int(bbox[3] - bbox[1]),
+                            "confidence": float(confidence),
+                            "class_name": class_name
+                        }],
+                        'detected_object_type': class_name
+                    }
+            # Add final event
+            if current_event:
+                events.append(current_event)
+        return events
+    def _deduplicate_events(self, events):
+        """Remove duplicate or very similar events and mark them as false positives"""
+        if len(events) <= 1:
+            return events
+        # Sort events by start timestamp
+        events.sort(key=lambda x: x.get('start_timestamp', 0))
+        deduplicated = []
+        for event in events:
+            # Check if this event is too similar to recent events
+            is_duplicate = False
+            for recent_event in deduplicated[-3:]:  # Check last 3 events
+                # Same type and overlapping time window
+                if (event.get('event_type') == recent_event.get('event_type') and
+                    abs(event.get('start_timestamp', 0) - recent_event.get('end_timestamp', 0)) <= 5.0):
+                    # Check if same object types detected
+                    event_objects = {event.get('detected_object_type')}
+                    recent_objects = {recent_event.get('detected_object_type')}
+                    if event_objects & recent_objects:  # Common objects
+                        is_duplicate = True
+                        # Merge into the existing event (extend time window, keep highest confidence)
+                        recent_event['end_timestamp'] = max(
+                            recent_event.get('end_timestamp', 0),
+                            event.get('end_timestamp', 0)
+                        )
+                        recent_event['confidence_score'] = max(
+                            recent_event.get('confidence_score', 0),
+                            event.get('confidence_score', 0)
+                        )
+                        recent_event['bounding_boxes'].extend(event.get('bounding_boxes', []))
+                        break
+            if not is_duplicate:
+                deduplicated.append(event)
+        logger.info(f"🔄 Deduplication: {len(events)} → {len(deduplicated)} events")
+        return deduplicated

detectifai_events.py ADDED Viewed

	@@ -0,0 +1,577 @@

+"""
+DetectifAI Security Event System
+This module defines the specific security event types and processing logic
+according to DetectifAI's scope: assault/fighting, weapons, fire, jumping over wall,
+road accidents, and suspicious person re-occurrence.
+"""
+import os
+import time
+import logging
+from typing import Dict, List, Tuple, Optional, Any
+from dataclasses import dataclass, asdict
+from enum import Enum
+import json
+logger = logging.getLogger(__name__)
+class DetectifAIEventType(Enum):
+    """DetectifAI-specific security event types"""
+    FIRE_DETECTION = "fire_detection"
+    WEAPON_DETECTION = "weapon_detection"  # knife, gun
+    PHYSICAL_ASSAULT = "physical_assault"  # fighting, violence
+    WALL_JUMPING = "wall_jumping"         # perimeter breach
+    ROAD_ACCIDENT = "road_accident"       # vehicle collision
+    SUSPICIOUS_PERSON_REOCCURRENCE = "suspicious_person_reoccurrence"
+    GENERAL_MOTION = "general_motion"     # fallback for unclassified motion
+class ThreatLevel(Enum):
+    """Security threat levels for DetectifAI events"""
+    CRITICAL = "critical"  # Immediate response required (fire, weapons)
+    HIGH = "high"         # Urgent attention needed (assault, suspicious person)
+    MEDIUM = "medium"     # Monitor closely (wall jumping, accidents)
+    LOW = "low"          # General awareness (motion)
+@dataclass
+class DetectifAIEvent:
+    """Enhanced event structure specific to DetectifAI security requirements"""
+    event_id: str
+    event_type: DetectifAIEventType
+    threat_level: ThreatLevel
+    start_timestamp: float
+    end_timestamp: float
+    duration: float
+    confidence: float
+    # Location and detection details
+    keyframes: List[str]
+    detection_details: Dict[str, Any]  # Specific to event type
+    # Security-specific fields
+    requires_immediate_response: bool
+    investigation_priority: int  # 1-10 scale
+    # Person tracking (for applicable events)
+    persons_detected: List[Dict] = None
+    is_person_reoccurrence: bool = False
+    # Context and description
+    description: str = ""
+    security_notes: str = ""
+    # Metadata
+    processing_timestamp: float = None
+    detection_model_used: str = ""
+@dataclass
+class DetectifAICanonicalEvent:
+    """Canonical representation of aggregated DetectifAI security events"""
+    canonical_id: str
+    event_type: DetectifAIEventType
+    threat_level: ThreatLevel
+    # Temporal information
+    start_time: float
+    end_time: float
+    total_duration: float
+    # Aggregation details
+    aggregated_events_count: int
+    aggregated_event_ids: List[str]
+    representative_frame: str
+    all_keyframes: List[str]
+    # Security assessment
+    max_confidence: float
+    average_confidence: float
+    investigation_priority: int
+    requires_immediate_response: bool
+    # Detection summary
+    total_detections: int
+    detection_summary: Dict[str, Any]
+    # Person tracking summary
+    unique_persons_count: int = 0
+    suspicious_persons: List[Dict] = None
+    person_reoccurrences: int = 0
+    # Investigation details
+    description: str = ""
+    security_assessment: str = ""
+    recommended_actions: List[str] = None
+class DetectifAIEventProcessor:
+    """Process and classify events according to DetectifAI security requirements"""
+    def __init__(self, config):
+        self.config = config
+        # DetectifAI-specific thresholds
+        self.threat_thresholds = {
+            DetectifAIEventType.FIRE_DETECTION: {
+                ThreatLevel.CRITICAL: 0.7,
+                ThreatLevel.HIGH: 0.5,
+                ThreatLevel.MEDIUM: 0.3,
+                ThreatLevel.LOW: 0.1
+            },
+            DetectifAIEventType.WEAPON_DETECTION: {
+                ThreatLevel.CRITICAL: 0.8,
+                ThreatLevel.HIGH: 0.6,
+                ThreatLevel.MEDIUM: 0.4,
+                ThreatLevel.LOW: 0.2
+            },
+            DetectifAIEventType.PHYSICAL_ASSAULT: {
+                ThreatLevel.CRITICAL: 0.9,
+                ThreatLevel.HIGH: 0.7,
+                ThreatLevel.MEDIUM: 0.5,
+                ThreatLevel.LOW: 0.3
+            },
+            DetectifAIEventType.WALL_JUMPING: {
+                ThreatLevel.HIGH: 0.8,
+                ThreatLevel.MEDIUM: 0.6,
+                ThreatLevel.LOW: 0.4
+            },
+            DetectifAIEventType.ROAD_ACCIDENT: {
+                ThreatLevel.HIGH: 0.8,
+                ThreatLevel.MEDIUM: 0.6,
+                ThreatLevel.LOW: 0.4
+            },
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: {
+                ThreatLevel.HIGH: 0.9,
+                ThreatLevel.MEDIUM: 0.7,
+                ThreatLevel.LOW: 0.5
+            }
+        }
+        # Processing statistics
+        self.processing_stats = {
+            'motion_events_processed': 0,
+            'object_events_processed': 0,
+            'detectifai_events_created': 0,
+            'facial_recognition_events': 0,
+            'placeholder_events_created': 0
+        }
+        logger.info("DetectifAI Event Processor initialized")
+    def process_security_events(self, keyframes: List, motion_events: List, object_events: List = None) -> List[DetectifAIEvent]:
+        """Main method to process all security events and convert to DetectifAI format"""
+        logger.info("🔍 Processing security events for DetectifAI system")
+        detectifai_events = []
+        # Convert object detection events
+        if object_events:
+            object_detectifai_events = self.convert_object_detection_to_detectifai_events(object_events)
+            detectifai_events.extend(object_detectifai_events)
+            self.processing_stats['object_events_processed'] = len(object_events)
+        # Create placeholder events from motion
+        placeholder_events = self.create_placeholder_events(keyframes, motion_events)
+        detectifai_events.extend(placeholder_events)
+        self.processing_stats['motion_events_processed'] = len(motion_events)
+        self.processing_stats['placeholder_events_created'] = len(placeholder_events)
+        # Update final count
+        self.processing_stats['detectifai_events_created'] = len(detectifai_events)
+        logger.info(f"✅ DetectifAI processing complete: {len(detectifai_events)} security events created")
+        return detectifai_events
+    def get_processing_stats(self) -> Dict[str, Any]:
+        """Get processing statistics"""
+        return self.processing_stats.copy()
+    def convert_object_detection_to_detectifai_events(self, object_events: List[Dict]) -> List[DetectifAIEvent]:
+        """Convert object detection events to DetectifAI security events"""
+        detectifai_events = []
+        for obj_event in object_events:
+            # Determine DetectifAI event type
+            object_class = obj_event.get('object_class', '').lower()
+            if object_class == 'fire':
+                event_type = DetectifAIEventType.FIRE_DETECTION
+            elif object_class in ['knife', 'gun']:
+                event_type = DetectifAIEventType.WEAPON_DETECTION
+            else:
+                event_type = DetectifAIEventType.GENERAL_MOTION
+            # Assess threat level
+            confidence = obj_event.get('confidence', 0.0)
+            threat_level = self._assess_threat_level(event_type, confidence)
+            # Create DetectifAI event
+            detectifai_event = DetectifAIEvent(
+                event_id=f"detectifai_{obj_event['event_id']}",
+                event_type=event_type,
+                threat_level=threat_level,
+                start_timestamp=obj_event['start_timestamp'],
+                end_timestamp=obj_event['end_timestamp'],
+                duration=obj_event['end_timestamp'] - obj_event['start_timestamp'],
+                confidence=confidence,
+                keyframes=obj_event.get('keyframes', []),
+                detection_details={
+                    'object_class': object_class,
+                    'detection_count': obj_event.get('detection_count', 0),
+                    'max_confidence': obj_event.get('max_confidence', confidence),
+                    'detection_data': obj_event.get('detection_details', [])
+                },
+                requires_immediate_response=threat_level in [ThreatLevel.CRITICAL, ThreatLevel.HIGH],
+                investigation_priority=self._calculate_investigation_priority(event_type, threat_level, confidence),
+                description=self._generate_detectifai_description(event_type, object_class, confidence),
+                processing_timestamp=time.time(),
+                detection_model_used=f"object_detection_{object_class}"
+            )
+            detectifai_events.append(detectifai_event)
+        logger.info(f"Converted {len(object_events)} object events to {len(detectifai_events)} DetectifAI events")
+        return detectifai_events
+    def create_placeholder_events(self, keyframes: List, motion_events: List) -> List[DetectifAIEvent]:
+        """Create placeholder events for unimplemented DetectifAI modules"""
+        placeholder_events = []
+        # Convert high-motion events to potential security events (placeholders)
+        for motion_event in motion_events:
+            if hasattr(motion_event, 'motion_intensity') and motion_event.motion_intensity > 0.015:
+                # High motion could be assault/fighting (placeholder)
+                placeholder_event = DetectifAIEvent(
+                    event_id=f"placeholder_assault_{motion_event.event_id}",
+                    event_type=DetectifAIEventType.PHYSICAL_ASSAULT,
+                    threat_level=ThreatLevel.MEDIUM,  # Conservative for placeholder
+                    start_timestamp=motion_event.start_timestamp,
+                    end_timestamp=motion_event.end_timestamp,
+                    duration=motion_event.end_timestamp - motion_event.start_timestamp,
+                    confidence=0.5,  # Placeholder confidence
+                    keyframes=motion_event.keyframes,
+                    detection_details={
+                        'placeholder': True,
+                        'motion_intensity': motion_event.motion_intensity,
+                        'original_event_type': motion_event.event_type
+                    },
+                    requires_immediate_response=False,
+                    investigation_priority=5,
+                    description=f"Potential physical assault detected (placeholder) - High motion intensity: {motion_event.motion_intensity:.3f}",
+                    security_notes="PLACEHOLDER: Requires fight detection module implementation",
+                    processing_timestamp=time.time(),
+                    detection_model_used="placeholder_fight_detection"
+                )
+                placeholder_events.append(placeholder_event)
+        # Add other placeholder event types based on analysis
+        # Wall jumping, road accidents, etc. can be added here based on scene analysis
+        logger.info(f"Created {len(placeholder_events)} placeholder DetectifAI events")
+        return placeholder_events
+    def _assess_threat_level(self, event_type: DetectifAIEventType, confidence: float) -> ThreatLevel:
+        """Assess threat level based on event type and confidence"""
+        if event_type not in self.threat_thresholds:
+            return ThreatLevel.LOW
+        thresholds = self.threat_thresholds[event_type]
+        for threat_level in [ThreatLevel.CRITICAL, ThreatLevel.HIGH, ThreatLevel.MEDIUM, ThreatLevel.LOW]:
+            if threat_level in thresholds and confidence >= thresholds[threat_level]:
+                return threat_level
+        return ThreatLevel.LOW
+    def _calculate_investigation_priority(self, event_type: DetectifAIEventType,
+                                        threat_level: ThreatLevel, confidence: float) -> int:
+        """Calculate investigation priority (1-10 scale)"""
+        base_priorities = {
+            DetectifAIEventType.FIRE_DETECTION: 9,
+            DetectifAIEventType.WEAPON_DETECTION: 8,
+            DetectifAIEventType.PHYSICAL_ASSAULT: 7,
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: 6,
+            DetectifAIEventType.WALL_JUMPING: 5,
+            DetectifAIEventType.ROAD_ACCIDENT: 4,
+            DetectifAIEventType.GENERAL_MOTION: 2
+        }
+        base_priority = base_priorities.get(event_type, 2)
+        # Adjust based on threat level
+        threat_multipliers = {
+            ThreatLevel.CRITICAL: 1.0,
+            ThreatLevel.HIGH: 0.9,
+            ThreatLevel.MEDIUM: 0.7,
+            ThreatLevel.LOW: 0.5
+        }
+        adjusted_priority = int(base_priority * threat_multipliers[threat_level])
+        # Boost for high confidence
+        if confidence > 0.8:
+            adjusted_priority = min(10, adjusted_priority + 1)
+        return max(1, min(10, adjusted_priority))
+    def _generate_detectifai_description(self, event_type: DetectifAIEventType,
+                                       object_class: str, confidence: float) -> str:
+        """Generate DetectifAI-specific event descriptions"""
+        descriptions = {
+            DetectifAIEventType.FIRE_DETECTION: f"🔥 Fire detected with {confidence:.1%} confidence - Immediate evacuation may be required",
+            DetectifAIEventType.WEAPON_DETECTION: f"⚠️ Weapon ({object_class}) detected with {confidence:.1%} confidence - Security alert triggered",
+            DetectifAIEventType.PHYSICAL_ASSAULT: f"👊 Physical assault detected with {confidence:.1%} confidence - Intervention may be needed",
+            DetectifAIEventType.WALL_JUMPING: f"🧗 Perimeter breach (wall jumping) detected with {confidence:.1%} confidence",
+            DetectifAIEventType.ROAD_ACCIDENT: f"🚗 Road accident detected with {confidence:.1%} confidence - Emergency services may be needed",
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: f"👤 Suspicious person re-occurrence detected with {confidence:.1%} confidence",
+            DetectifAIEventType.GENERAL_MOTION: f"📊 General motion activity detected"
+        }
+        return descriptions.get(event_type, f"Security event detected: {event_type.value}")
+class DetectifAIEventAggregator:
+    """Simplified event aggregation focused on DetectifAI security requirements"""
+    def __init__(self, config):
+        self.config = config
+        self.temporal_window = getattr(config, 'detectifai_temporal_window', 10.0)  # seconds
+    def aggregate_detectifai_events(self, events: List[DetectifAIEvent]) -> List[DetectifAICanonicalEvent]:
+        """Aggregate DetectifAI events into canonical security events"""
+        logger.info(f"Aggregating {len(events)} DetectifAI events")
+        if not events:
+            return []
+        # Group events by type for focused aggregation
+        events_by_type = {}
+        for event in events:
+            if event.event_type not in events_by_type:
+                events_by_type[event.event_type] = []
+            events_by_type[event.event_type].append(event)
+        canonical_events = []
+        canonical_id_counter = 1
+        # Process each event type separately with DetectifAI-specific logic
+        for event_type, type_events in events_by_type.items():
+            type_canonical = self._aggregate_by_detectifai_type(
+                event_type, type_events, canonical_id_counter
+            )
+            canonical_events.extend(type_canonical)
+            canonical_id_counter += len(type_canonical)
+        # Sort by investigation priority
+        canonical_events.sort(key=lambda e: e.investigation_priority, reverse=True)
+        logger.info(f"Created {len(canonical_events)} canonical DetectifAI events")
+        return canonical_events
+    def _aggregate_by_detectifai_type(self, event_type: DetectifAIEventType,
+                                    events: List[DetectifAIEvent],
+                                    start_id: int) -> List[DetectifAICanonicalEvent]:
+        """Aggregate events of specific DetectifAI type"""
+        if not events:
+            return []
+        # Sort events by timestamp
+        events.sort(key=lambda e: e.start_timestamp)
+        # Group events within temporal window
+        clusters = []
+        current_cluster = [events[0]]
+        for i in range(1, len(events)):
+            current_event = events[i]
+            last_in_cluster = current_cluster[-1]
+            # Check if events should be clustered
+            time_gap = current_event.start_timestamp - last_in_cluster.end_timestamp
+            if time_gap <= self.temporal_window:
+                current_cluster.append(current_event)
+            else:
+                clusters.append(current_cluster)
+                current_cluster = [current_event]
+        # Don't forget the last cluster
+        if current_cluster:
+            clusters.append(current_cluster)
+        # Create canonical events from clusters
+        canonical_events = []
+        for i, cluster in enumerate(clusters):
+            canonical_event = self._create_detectifai_canonical_event(
+                event_type, cluster, start_id + i
+            )
+            canonical_events.append(canonical_event)
+        return canonical_events
+    def _create_detectifai_canonical_event(self, event_type: DetectifAIEventType,
+                                         cluster: List[DetectifAIEvent],
+                                         canonical_id: int) -> DetectifAICanonicalEvent:
+        """Create canonical event from DetectifAI event cluster"""
+        # Find highest priority event as representative
+        representative = max(cluster, key=lambda e: e.investigation_priority)
+        # Aggregate temporal information
+        start_time = min(e.start_timestamp for e in cluster)
+        end_time = max(e.end_timestamp for e in cluster)
+        total_duration = end_time - start_time
+        # Aggregate confidence and priority
+        max_confidence = max(e.confidence for e in cluster)
+        avg_confidence = sum(e.confidence for e in cluster) / len(cluster)
+        max_priority = max(e.investigation_priority for e in cluster)
+        # Collect all keyframes
+        all_keyframes = []
+        for event in cluster:
+            all_keyframes.extend(event.keyframes)
+        unique_keyframes = list(set(all_keyframes))
+        # Aggregate detection information
+        total_detections = sum(
+            event.detection_details.get('detection_count', 1) for event in cluster
+        )
+        # Determine if immediate response required
+        requires_immediate_response = any(e.requires_immediate_response for e in cluster)
+        # Get highest threat level
+        threat_levels = [ThreatLevel.LOW, ThreatLevel.MEDIUM, ThreatLevel.HIGH, ThreatLevel.CRITICAL]
+        max_threat_level = max((e.threat_level for e in cluster), key=lambda t: threat_levels.index(t))
+        # Create detection summary
+        detection_summary = {
+            'total_events_aggregated': len(cluster),
+            'detection_methods': list(set(e.detection_model_used for e in cluster)),
+            'confidence_range': {
+                'min': min(e.confidence for e in cluster),
+                'max': max_confidence,
+                'average': avg_confidence
+            },
+            'detection_details': [e.detection_details for e in cluster]
+        }
+        # Generate description and assessment
+        description = self._generate_canonical_description(event_type, cluster, max_confidence)
+        security_assessment = self._generate_security_assessment(event_type, max_threat_level, len(cluster))
+        recommended_actions = self._get_recommended_actions(event_type, max_threat_level)
+        canonical_event = DetectifAICanonicalEvent(
+            canonical_id=f"detectifai_canonical_{canonical_id:04d}",
+            event_type=event_type,
+            threat_level=max_threat_level,
+            start_time=start_time,
+            end_time=end_time,
+            total_duration=total_duration,
+            aggregated_events_count=len(cluster),
+            aggregated_event_ids=[e.event_id for e in cluster],
+            representative_frame=representative.keyframes[0] if representative.keyframes else "",
+            all_keyframes=unique_keyframes,
+            max_confidence=max_confidence,
+            average_confidence=avg_confidence,
+            investigation_priority=max_priority,
+            requires_immediate_response=requires_immediate_response,
+            total_detections=total_detections,
+            detection_summary=detection_summary,
+            description=description,
+            security_assessment=security_assessment,
+            recommended_actions=recommended_actions
+        )
+        return canonical_event
+    def _generate_canonical_description(self, event_type: DetectifAIEventType,
+                                      cluster: List[DetectifAIEvent], confidence: float) -> str:
+        """Generate description for canonical DetectifAI event"""
+        event_count = len(cluster)
+        duration = max(e.end_timestamp for e in cluster) - min(e.start_timestamp for e in cluster)
+        base_descriptions = {
+            DetectifAIEventType.FIRE_DETECTION: f"Fire incident - {event_count} detections over {duration:.1f}s",
+            DetectifAIEventType.WEAPON_DETECTION: f"Weapon threat - {event_count} detections over {duration:.1f}s",
+            DetectifAIEventType.PHYSICAL_ASSAULT: f"Physical assault incident - {event_count} events over {duration:.1f}s",
+            DetectifAIEventType.WALL_JUMPING: f"Perimeter breach - {event_count} wall jumping events over {duration:.1f}s",
+            DetectifAIEventType.ROAD_ACCIDENT: f"Road accident - {event_count} incidents over {duration:.1f}s",
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: f"Suspicious person alert - {event_count} re-occurrences",
+            DetectifAIEventType.GENERAL_MOTION: f"Motion activity - {event_count} events over {duration:.1f}s"
+        }
+        return base_descriptions.get(event_type, f"Security event: {event_type.value}")
+    def _generate_security_assessment(self, event_type: DetectifAIEventType,
+                                    threat_level: ThreatLevel, event_count: int) -> str:
+        """Generate security assessment for canonical event"""
+        assessments = {
+            (DetectifAIEventType.FIRE_DETECTION, ThreatLevel.CRITICAL): "CRITICAL: Immediate evacuation and fire response required",
+            (DetectifAIEventType.WEAPON_DETECTION, ThreatLevel.CRITICAL): "CRITICAL: Armed threat present - immediate security intervention",
+            (DetectifAIEventType.PHYSICAL_ASSAULT, ThreatLevel.HIGH): "HIGH: Violence in progress - security response needed",
+            (DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE, ThreatLevel.HIGH): "HIGH: Known suspicious individual returned - monitor closely"
+        }
+        specific_assessment = assessments.get((event_type, threat_level))
+        if specific_assessment:
+            return specific_assessment
+        # Generic assessment based on threat level
+        generic_assessments = {
+            ThreatLevel.CRITICAL: f"CRITICAL threat level - immediate response required",
+            ThreatLevel.HIGH: f"HIGH priority security event - urgent attention needed",
+            ThreatLevel.MEDIUM: f"MEDIUM priority - monitor and assess situation",
+            ThreatLevel.LOW: f"LOW priority - general awareness sufficient"
+        }
+        return generic_assessments.get(threat_level, "Security event requires assessment")
+    def _get_recommended_actions(self, event_type: DetectifAIEventType,
+                               threat_level: ThreatLevel) -> List[str]:
+        """Get recommended actions for DetectifAI event types"""
+        actions_map = {
+            DetectifAIEventType.FIRE_DETECTION: [
+                "Verify fire location and extent",
+                "Initiate evacuation procedures if confirmed",
+                "Contact fire department",
+                "Monitor spread and safety of personnel"
+            ],
+            DetectifAIEventType.WEAPON_DETECTION: [
+                "Verify weapon type and threat level",
+                "Alert security personnel immediately",
+                "Consider lockdown procedures",
+                "Contact law enforcement if confirmed threat"
+            ],
+            DetectifAIEventType.PHYSICAL_ASSAULT: [
+                "Assess severity of altercation",
+                "Dispatch security to location",
+                "Consider medical assistance",
+                "Document incident for investigation"
+            ],
+            DetectifAIEventType.WALL_JUMPING: [
+                "Verify perimeter breach",
+                "Check intruder location and intent",
+                "Review security footage",
+                "Assess security protocol effectiveness"
+            ],
+            DetectifAIEventType.ROAD_ACCIDENT: [
+                "Assess severity of accident",
+                "Check for injuries",
+                "Contact emergency services if needed",
+                "Manage traffic flow around incident"
+            ],
+            DetectifAIEventType.SUSPICIOUS_PERSON_REOCCURRENCE: [
+                "Review person's previous incidents",
+                "Monitor current activities closely",
+                "Alert security personnel",
+                "Consider preventive measures"
+            ]
+        }
+        base_actions = actions_map.get(event_type, ["Monitor situation", "Assess threat level", "Take appropriate action"])
+        # Add threat-level specific actions
+        if threat_level == ThreatLevel.CRITICAL:
+            base_actions.insert(0, "IMMEDIATE ACTION REQUIRED")
+        elif threat_level == ThreatLevel.HIGH:
+            base_actions.insert(0, "URGENT: Prioritize response")
+        return base_actions

event_aggregation.py ADDED Viewed

	@@ -0,0 +1,819 @@

+"""
+Event Aggregation and Deduplication Module
+This module handles:
+- Event detection and clustering
+- Temporal aggregation of related events
+- Duplicate frame removal using similarity detection
+- Canonical event generation
+"""
+import numpy as np
+import cv2
+import json
+import os
+from typing import List, Dict, Tuple, Set, Any, Optional
+from dataclasses import dataclass, asdict
+import imagehash
+from PIL import Image
+from collections import defaultdict
+import logging
+from datetime import datetime
+logger = logging.getLogger(__name__)
+@dataclass
+class Event:
+    """Represents a detected event"""
+    event_id: str
+    start_timestamp: float
+    end_timestamp: float
+    event_type: str
+    confidence: float
+    keyframes: List[str]  # Frame paths
+    importance_score: float
+    motion_intensity: float
+    description: str = ""
+    # Object detection specific fields
+    object_class: str = ""           # For object-based events (fire, knife, gun)
+    detection_count: int = 0         # Number of detections in this event
+    max_confidence: float = 0.0      # Highest confidence detection
+    is_object_event: bool = False    # Flag to identify object-based events
+    detection_details: List = None   # Raw detection data
+@dataclass
+class CanonicalEvent:
+    """Canonical representation of aggregated events"""
+    canonical_id: str
+    event_type: str
+    representative_frame: str
+    start_time: float
+    end_time: float
+    duration: float
+    confidence: float
+    frame_count: int
+    aggregated_events: List[str]  # Event IDs
+    description: str
+    similarity_cluster: int
+    # Enhanced object detection fields
+    contains_objects: bool = False           # Whether this canonical event has object detections
+    detected_object_classes: List[str] = None  # List of detected object classes
+    object_detection_summary: Dict = None      # Summary of object detections
+    threat_level: str = "low"                # Threat assessment: low, medium, high, critical
+class SimilarityCalculator:
+    """Calculate similarity between frames using multiple methods"""
+    def __init__(self, similarity_threshold: float = 0.85):
+        self.similarity_threshold = similarity_threshold
+    def calculate_histogram_similarity(self, frame1: np.ndarray, frame2: np.ndarray) -> float:
+        """Calculate histogram-based similarity"""
+        try:
+            # Convert to HSV for better color comparison
+            hsv1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2HSV)
+            hsv2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2HSV)
+            # Calculate histograms
+            hist1 = cv2.calcHist([hsv1], [0, 1, 2], None, [50, 60, 60], [0, 180, 0, 256, 0, 256])
+            hist2 = cv2.calcHist([hsv2], [0, 1, 2], None, [50, 60, 60], [0, 180, 0, 256, 0, 256])
+            # Calculate correlation
+            correlation = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)
+            return max(0.0, correlation)
+        except Exception as e:
+            logger.error(f"Histogram similarity calculation failed: {e}")
+            return 0.0
+    def calculate_perceptual_hash_similarity(self, frame1_path: str, frame2_path: str) -> float:
+        """Calculate perceptual hash similarity"""
+        try:
+            # Load images with PIL for imagehash
+            img1 = Image.open(frame1_path)
+            img2 = Image.open(frame2_path)
+            # Calculate perceptual hashes
+            hash1 = imagehash.phash(img1)
+            hash2 = imagehash.phash(img2)
+            # Calculate similarity (lower hash difference = higher similarity)
+            hash_diff = hash1 - hash2
+            similarity = 1.0 - (hash_diff / 64.0)  # Normalize to 0-1
+            return max(0.0, similarity)
+        except Exception as e:
+            logger.error(f"Perceptual hash similarity calculation failed: {e}")
+            return 0.0
+    def calculate_structural_similarity(self, frame1: np.ndarray, frame2: np.ndarray) -> float:
+        """Calculate structural similarity using template matching"""
+        try:
+            # Convert to grayscale
+            gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
+            gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
+            # Resize to same dimensions if needed
+            if gray1.shape != gray2.shape:
+                h, w = min(gray1.shape[0], gray2.shape[0]), min(gray1.shape[1], gray2.shape[1])
+                gray1 = cv2.resize(gray1, (w, h))
+                gray2 = cv2.resize(gray2, (w, h))
+            # Calculate normalized cross-correlation
+            result = cv2.matchTemplate(gray1, gray2, cv2.TM_CCOEFF_NORMED)
+            similarity = result[0, 0]
+            return max(0.0, similarity)
+        except Exception as e:
+            logger.error(f"Structural similarity calculation failed: {e}")
+            return 0.0
+    def calculate_combined_similarity(self, frame1_path: str, frame2_path: str) -> float:
+        """Calculate combined similarity score using multiple methods"""
+        try:
+            # Load frames
+            frame1 = cv2.imread(frame1_path)
+            frame2 = cv2.imread(frame2_path)
+            if frame1 is None or frame2 is None:
+                return 0.0
+            # Calculate different similarity metrics
+            hist_sim = self.calculate_histogram_similarity(frame1, frame2)
+            hash_sim = self.calculate_perceptual_hash_similarity(frame1_path, frame2_path)
+            struct_sim = self.calculate_structural_similarity(frame1, frame2)
+            # Weighted combination
+            combined_similarity = (
+                hist_sim * 0.4 +      # Histogram similarity
+                hash_sim * 0.4 +      # Perceptual hash similarity
+                struct_sim * 0.2      # Structural similarity
+            )
+            return min(1.0, combined_similarity)
+        except Exception as e:
+            logger.error(f"Combined similarity calculation failed: {e}")
+            return 0.0
+class EventDetector:
+    """Detect events from keyframes"""
+    def __init__(self, config):
+        self.config = config
+        self.event_types = {
+            'high_motion': {'motion_threshold': config.motion_threshold * 2},
+            'burst_activity': {'requires_burst': True},
+            'scene_change': {'change_threshold': config.scene_change_threshold},
+            'quality_peak': {'quality_threshold': config.base_quality_threshold * 1.5}
+        }
+    def detect_events(self, keyframes: List) -> List[Event]:
+        """Detect events from keyframes"""
+        logger.info(f"Detecting events from {len(keyframes)} keyframes")
+        events = []
+        event_id_counter = 1
+        # Temporal clustering for event detection
+        clusters = self._create_temporal_clusters(keyframes)
+        for cluster in clusters:
+            if len(cluster) == 0:
+                continue
+            # Analyze cluster for event types
+            cluster_events = self._analyze_cluster_for_events(cluster, event_id_counter)
+            events.extend(cluster_events)
+            event_id_counter += len(cluster_events)
+        logger.info(f"Detected {len(events)} events")
+        return events
+    def _create_temporal_clusters(self, keyframes: List) -> List[List]:
+        """Create temporal clusters of keyframes"""
+        if not keyframes:
+            return []
+        # Sort keyframes by timestamp
+        sorted_keyframes = sorted(keyframes, key=lambda x: x.frame_data.timestamp)
+        clusters = []
+        current_cluster = [sorted_keyframes[0]]
+        for i in range(1, len(sorted_keyframes)):
+            current_kf = sorted_keyframes[i]
+            last_kf = current_cluster[-1]
+            time_gap = current_kf.frame_data.timestamp - last_kf.frame_data.timestamp
+            # If gap is within clustering window, add to current cluster
+            if time_gap <= self.config.temporal_clustering_window:
+                current_cluster.append(current_kf)
+            else:
+                # Start new cluster
+                if len(current_cluster) > 0:
+                    clusters.append(current_cluster)
+                current_cluster = [current_kf]
+        # Don't forget the last cluster
+        if len(current_cluster) > 0:
+            clusters.append(current_cluster)
+        return clusters
+    def _analyze_cluster_for_events(self, cluster: List, start_event_id: int) -> List[Event]:
+        """Analyze a temporal cluster for different event types"""
+        events = []
+        if not cluster:
+            return events
+        # Calculate cluster metrics
+        motion_scores = [kf.frame_data.motion_score for kf in cluster]
+        quality_scores = [kf.frame_data.quality_score for kf in cluster]
+        burst_frames = [kf for kf in cluster if kf.frame_data.burst_active]
+        start_time = min(kf.frame_data.timestamp for kf in cluster)
+        end_time = max(kf.frame_data.timestamp for kf in cluster)
+        max_motion = max(motion_scores) if motion_scores else 0
+        avg_motion = sum(motion_scores) / len(motion_scores) if motion_scores else 0
+        max_quality = max(quality_scores) if quality_scores else 0
+        # High motion event
+        if max_motion > self.config.motion_threshold * 2:
+            event = Event(
+                event_id=f"event_{start_event_id:04d}",
+                start_timestamp=start_time,
+                end_timestamp=end_time,
+                event_type="high_motion",
+                confidence=min(max_motion * 2, 1.0),
+                keyframes=[kf.frame_data.frame_path for kf in cluster],
+                importance_score=max_motion + (avg_motion * 0.5),
+                motion_intensity=max_motion,
+                description=f"High motion event with peak intensity {max_motion:.3f}"
+            )
+            events.append(event)
+            start_event_id += 1
+        # Burst activity event
+        if len(burst_frames) >= 2:
+            event = Event(
+                event_id=f"event_{start_event_id:04d}",
+                start_timestamp=start_time,
+                end_timestamp=end_time,
+                event_type="burst_activity",
+                confidence=min(len(burst_frames) / len(cluster), 1.0),
+                keyframes=[kf.frame_data.frame_path for kf in burst_frames],
+                importance_score=len(burst_frames) * 0.3 + avg_motion,
+                motion_intensity=max_motion,
+                description=f"Burst activity with {len(burst_frames)} active frames"
+            )
+            events.append(event)
+            start_event_id += 1
+        # Quality peak event
+        if max_quality > self.config.base_quality_threshold * 1.5:
+            high_quality_frames = [kf for kf in cluster if kf.frame_data.quality_score > self.config.base_quality_threshold * 1.3]
+            if high_quality_frames:
+                event = Event(
+                    event_id=f"event_{start_event_id:04d}",
+                    start_timestamp=start_time,
+                    end_timestamp=end_time,
+                    event_type="quality_peak",
+                    confidence=max_quality,
+                    keyframes=[kf.frame_data.frame_path for kf in high_quality_frames],
+                    importance_score=max_quality + (len(high_quality_frames) * 0.1),
+                    motion_intensity=max_motion,
+                    description=f"High quality event with peak score {max_quality:.3f}"
+                )
+                events.append(event)
+        return events
+    def convert_object_events_to_standard_format(self, object_events: List[Dict]) -> List[Event]:
+        """Convert object events from object detection module to standard Event format"""
+        standard_events = []
+        for obj_event in object_events:
+            # Convert object event dict to Event dataclass
+            event = Event(
+                event_id=obj_event['event_id'],
+                start_timestamp=obj_event['start_timestamp'],
+                end_timestamp=obj_event['end_timestamp'],
+                event_type=obj_event['event_type'],
+                confidence=obj_event['confidence'],
+                keyframes=obj_event['keyframes'],
+                importance_score=obj_event['importance_score'],
+                motion_intensity=obj_event.get('motion_intensity', 0.0),
+                description=obj_event['description'],
+                # Object-specific fields
+                object_class=obj_event.get('object_class', ''),
+                detection_count=obj_event.get('detection_count', 0),
+                max_confidence=obj_event.get('max_confidence', obj_event['confidence']),
+                is_object_event=True,
+                detection_details=obj_event.get('detection_details', [])
+            )
+            standard_events.append(event)
+        return standard_events
+    def convert_behavior_events_to_standard_format(self, behavior_events: List) -> List[Event]:
+        """Convert behavior events from behavior analysis module to standard Event format"""
+        standard_events = []
+        for behavior_event in behavior_events:
+            # Handle both dataclass and dict formats
+            if hasattr(behavior_event, 'behavior_type'):
+                # Dataclass format (from BehaviorEvent)
+                event = Event(
+                    event_id=behavior_event.event_id,
+                    start_timestamp=behavior_event.start_timestamp,
+                    end_timestamp=behavior_event.end_timestamp,
+                    event_type=f"behavior_{behavior_event.behavior_type}",
+                    confidence=behavior_event.confidence,
+                    keyframes=behavior_event.keyframes,
+                    importance_score=behavior_event.importance_score,
+                    motion_intensity=0.0,  # Behavior events don't have motion intensity
+                    description=f"{behavior_event.behavior_type.capitalize()} detected (confidence: {behavior_event.confidence:.2f})",
+                    # Use object_class field to store behavior type for consistency
+                    object_class=behavior_event.behavior_type,
+                    detection_count=len(behavior_event.frame_indices),
+                    max_confidence=behavior_event.confidence,
+                    is_object_event=False,  # Behavior events are separate from object events
+                    detection_details=[{
+                        'model_used': behavior_event.model_used,
+                        'frame_indices': behavior_event.frame_indices
+                    }]
+                )
+            else:
+                # Dict format (fallback)
+                event = Event(
+                    event_id=behavior_event.get('event_id', f"behavior_{len(standard_events)}"),
+                    start_timestamp=behavior_event.get('start_timestamp', 0.0),
+                    end_timestamp=behavior_event.get('end_timestamp', 0.0),
+                    event_type=f"behavior_{behavior_event.get('behavior_type', 'unknown')}",
+                    confidence=behavior_event.get('confidence', 0.0),
+                    keyframes=behavior_event.get('keyframes', []),
+                    importance_score=behavior_event.get('importance_score', 0.0),
+                    motion_intensity=0.0,
+                    description=behavior_event.get('description', 'Behavior detected'),
+                    object_class=behavior_event.get('behavior_type', ''),
+                    detection_count=len(behavior_event.get('frame_indices', [])),
+                    max_confidence=behavior_event.get('confidence', 0.0),
+                    is_object_event=False,
+                    detection_details=[{
+                        'model_used': behavior_event.get('model_used', 'unknown'),
+                        'frame_indices': behavior_event.get('frame_indices', [])
+                    }]
+                )
+            standard_events.append(event)
+        return standard_events
+    def assess_threat_level(self, event: Event) -> str:
+        """Assess threat level for events, particularly object-based events"""
+        if not event.is_object_event:
+            # For motion events, use motion intensity and burst activity
+            if event.event_type == "high_motion" and event.motion_intensity > 0.015:
+                return "medium"
+            elif event.event_type == "burst_activity":
+                return "medium"
+            else:
+                return "low"
+        # Object-based threat assessment
+        threat_map = {
+            'fire': {
+                'low': 0.3,      # Confidence thresholds
+                'medium': 0.5,
+                'high': 0.7,
+                'critical': 0.85
+            },
+            'gun': {
+                'low': 0.4,
+                'medium': 0.6,
+                'high': 0.8,
+                'critical': 0.9
+            },
+            'knife': {
+                'low': 0.4,
+                'medium': 0.6,
+                'high': 0.75,
+                'critical': 0.85
+            }
+        }
+        obj_class = event.object_class.lower()
+        confidence = event.max_confidence
+        if obj_class in threat_map:
+            thresholds = threat_map[obj_class]
+            if confidence >= thresholds['critical']:
+                return "critical"
+            elif confidence >= thresholds['high']:
+                return "high"
+            elif confidence >= thresholds['medium']:
+                return "medium"
+            else:
+                return "low"
+        return "medium"  # Default for unknown object types
+class EventDeduplicationEngine:
+    """Remove duplicate events and create canonical representations"""
+    def __init__(self, config):
+        self.config = config
+        self.similarity_calculator = SimilarityCalculator(config.similarity_threshold)
+    def deduplicate_events(self, events: List[Event]) -> Tuple[List[CanonicalEvent], Dict[str, Any]]:
+        """
+        Deduplicate events and create canonical representations
+        Returns:
+            Tuple of (canonical_events, deduplication_stats)
+        """
+        logger.info(f"Deduplicating {len(events)} events")
+        if not events:
+            return [], {}
+        # Group events by type first
+        events_by_type = defaultdict(list)
+        for event in events:
+            events_by_type[event.event_type].append(event)
+        canonical_events = []
+        dedup_stats = {
+            'original_events': len(events),
+            'canonical_events': 0,
+            'duplicates_removed': 0,
+            'similarity_clusters': 0
+        }
+        canonical_id_counter = 1
+        # Process each event type separately
+        for event_type, type_events in events_by_type.items():
+            type_canonical = self._deduplicate_events_by_type(
+                type_events, event_type, canonical_id_counter
+            )
+            canonical_events.extend(type_canonical)
+            canonical_id_counter += len(type_canonical)
+        # Update stats
+        dedup_stats['canonical_events'] = len(canonical_events)
+        dedup_stats['duplicates_removed'] = dedup_stats['original_events'] - dedup_stats['canonical_events']
+        dedup_stats['similarity_clusters'] = len(canonical_events)
+        logger.info(f"Deduplication complete: {len(canonical_events)} canonical events created")
+        return canonical_events, dedup_stats
+    def _deduplicate_events_by_type(self, events: List[Event], event_type: str,
+                                  start_canonical_id: int) -> List[CanonicalEvent]:
+        """Deduplicate events of the same type"""
+        if not events:
+            return []
+        # Create similarity matrix
+        similarity_matrix = self._create_similarity_matrix(events)
+        # Cluster similar events
+        clusters = self._cluster_similar_events(events, similarity_matrix)
+        # Create canonical events from clusters
+        canonical_events = []
+        for i, cluster in enumerate(clusters):
+            canonical_event = self._create_canonical_event(
+                cluster, event_type, start_canonical_id + i, i
+            )
+            canonical_events.append(canonical_event)
+        return canonical_events
+    def _create_similarity_matrix(self, events: List[Event]) -> np.ndarray:
+        """Create similarity matrix between events"""
+        n = len(events)
+        similarity_matrix = np.zeros((n, n))
+        for i in range(n):
+            for j in range(i, n):
+                if i == j:
+                    similarity_matrix[i, j] = 1.0
+                else:
+                    # Calculate similarity between representative frames
+                    sim_score = self._calculate_event_similarity(events[i], events[j])
+                    similarity_matrix[i, j] = sim_score
+                    similarity_matrix[j, i] = sim_score
+        return similarity_matrix
+    def _calculate_event_similarity(self, event1: Event, event2: Event) -> float:
+        """Calculate similarity between two events (enhanced for object events)"""
+        try:
+            # Object events similarity
+            if event1.is_object_event and event2.is_object_event:
+                return self._calculate_object_event_similarity(event1, event2)
+            elif event1.is_object_event != event2.is_object_event:
+                # Different event types (object vs motion) - lower similarity
+                return 0.1
+            # Motion events similarity (original logic)
+            # Time overlap similarity
+            time_overlap = self._calculate_time_overlap(event1, event2)
+            # Frame content similarity (use representative frames)
+            frame1 = event1.keyframes[0] if event1.keyframes else None
+            frame2 = event2.keyframes[0] if event2.keyframes else None
+            content_similarity = 0.0
+            if frame1 and frame2 and os.path.exists(frame1) and os.path.exists(frame2):
+                content_similarity = self.similarity_calculator.calculate_combined_similarity(frame1, frame2)
+            # Motion intensity similarity
+            motion_sim = 1.0 - abs(event1.motion_intensity - event2.motion_intensity)
+            # Combined similarity
+            combined_similarity = (
+                time_overlap * 0.3 +
+                content_similarity * 0.5 +
+                motion_sim * 0.2
+            )
+            return combined_similarity
+        except Exception as e:
+            logger.error(f"Event similarity calculation failed: {e}")
+            return 0.0
+    def _calculate_object_event_similarity(self, event1: Event, event2: Event) -> float:
+        """Calculate similarity between two object events"""
+        try:
+            # Object class similarity (must be same class)
+            if event1.object_class != event2.object_class:
+                return 0.0  # Different object types are not similar
+            # Time proximity
+            time_gap = abs(event1.start_timestamp - event2.start_timestamp)
+            time_similarity = max(0.0, 1.0 - (time_gap / self.config.object_event_temporal_window))
+            # Confidence similarity
+            conf_diff = abs(event1.confidence - event2.confidence)
+            conf_similarity = max(0.0, 1.0 - conf_diff)
+            # Detection count similarity
+            count_diff = abs(event1.detection_count - event2.detection_count)
+            count_similarity = max(0.0, 1.0 - (count_diff / max(event1.detection_count, event2.detection_count, 1)))
+            # Frame content similarity
+            frame1 = event1.keyframes[0] if event1.keyframes else None
+            frame2 = event2.keyframes[0] if event2.keyframes else None
+            content_similarity = 0.0
+            if frame1 and frame2 and os.path.exists(frame1) and os.path.exists(frame2):
+                content_similarity = self.similarity_calculator.calculate_combined_similarity(frame1, frame2)
+            # Combined similarity for object events
+            combined_similarity = (
+                time_similarity * 0.4 +      # Time proximity is important
+                content_similarity * 0.3 +    # Visual similarity
+                conf_similarity * 0.2 +       # Confidence similarity
+                count_similarity * 0.1        # Detection count similarity
+            )
+            return combined_similarity
+        except Exception as e:
+            logger.error(f"Object event similarity calculation failed: {e}")
+            return 0.0
+    def _calculate_time_overlap(self, event1: Event, event2: Event) -> float:
+        """Calculate temporal overlap between events"""
+        start1, end1 = event1.start_timestamp, event1.end_timestamp
+        start2, end2 = event2.start_timestamp, event2.end_timestamp
+        # Calculate overlap
+        overlap_start = max(start1, start2)
+        overlap_end = min(end1, end2)
+        if overlap_start >= overlap_end:
+            return 0.0
+        overlap_duration = overlap_end - overlap_start
+        total_duration = max(end1, end2) - min(start1, start2)
+        return overlap_duration / total_duration if total_duration > 0 else 0.0
+    def _cluster_similar_events(self, events: List[Event], similarity_matrix: np.ndarray) -> List[List[Event]]:
+        """Cluster similar events using similarity threshold"""
+        n = len(events)
+        visited = [False] * n
+        clusters = []
+        for i in range(n):
+            if visited[i]:
+                continue
+            # Start new cluster
+            cluster = [events[i]]
+            visited[i] = True
+            # Find similar events
+            for j in range(i + 1, n):
+                if not visited[j] and similarity_matrix[i, j] >= self.config.similarity_threshold:
+                    cluster.append(events[j])
+                    visited[j] = True
+            clusters.append(cluster)
+        return clusters
+    def _create_canonical_event(self, cluster: List[Event], event_type: str,
+                              canonical_id: int, cluster_id: int) -> CanonicalEvent:
+        """Create canonical event from cluster of similar events"""
+        if not cluster:
+            raise ValueError("Cannot create canonical event from empty cluster")
+        # Find representative event (highest importance score)
+        representative = max(cluster, key=lambda e: e.importance_score)
+        # Aggregate properties
+        start_time = min(e.start_timestamp for e in cluster)
+        end_time = max(e.end_timestamp for e in cluster)
+        duration = end_time - start_time
+        avg_confidence = sum(e.confidence for e in cluster) / len(cluster)
+        # Collect all keyframes
+        all_keyframes = []
+        for event in cluster:
+            all_keyframes.extend(event.keyframes)
+        # Remove duplicate frame paths
+        unique_keyframes = list(set(all_keyframes))
+        # Check if this cluster contains object events
+        object_events = [e for e in cluster if e.is_object_event]
+        contains_objects = len(object_events) > 0
+        # Object detection summary
+        detected_classes = []
+        object_summary = None
+        threat_level = "low"
+        if contains_objects:
+            # Collect detected object classes
+            detected_classes = list(set(e.object_class for e in object_events if e.object_class))
+            # Calculate object detection summary
+            total_detections = sum(e.detection_count for e in object_events)
+            max_confidence = max(e.max_confidence for e in object_events)
+            avg_obj_confidence = sum(e.confidence for e in object_events) / len(object_events)
+            object_summary = {
+                'total_detections': total_detections,
+                'max_confidence': max_confidence,
+                'average_confidence': avg_obj_confidence,
+                'detected_classes': detected_classes,
+                'object_events_count': len(object_events)
+            }
+            # Assess threat level based on object classes and confidence
+            threat_level = self._assess_canonical_threat_level(object_events)
+        # Create enhanced description
+        if contains_objects:
+            objects_str = ", ".join(detected_classes)
+            description = f"{event_type.replace('_', ' ').title()} with {objects_str} detected - {len(cluster)} events aggregated"
+        else:
+            description = f"{event_type.replace('_', ' ').title()} event aggregated from {len(cluster)} similar events"
+        canonical_event = CanonicalEvent(
+            canonical_id=f"canonical_{canonical_id:04d}",
+            event_type=event_type,
+            representative_frame=representative.keyframes[0] if representative.keyframes else "",
+            start_time=start_time,
+            end_time=end_time,
+            duration=duration,
+            confidence=avg_confidence,
+            frame_count=len(unique_keyframes),
+            aggregated_events=[e.event_id for e in cluster],
+            description=description,
+            similarity_cluster=cluster_id,
+            # Enhanced object detection fields
+            contains_objects=contains_objects,
+            detected_object_classes=detected_classes,
+            object_detection_summary=object_summary,
+            threat_level=threat_level
+        )
+        return canonical_event
+    def _assess_canonical_threat_level(self, object_events: List[Event]) -> str:
+        """Assess threat level for canonical event containing object events"""
+        if not object_events:
+            return "low"
+        # Get highest threat level from individual events
+        threat_levels = ["low", "medium", "high", "critical"]
+        max_threat_index = 0
+        for event in object_events:
+            event_threat = self._assess_individual_threat_level(event)
+            threat_index = threat_levels.index(event_threat) if event_threat in threat_levels else 0
+            max_threat_index = max(max_threat_index, threat_index)
+        # Additional factors for canonical events
+        max_confidence = max(e.max_confidence for e in object_events)
+        total_detections = sum(e.detection_count for e in object_events)
+        unique_classes = len(set(e.object_class for e in object_events))
+        # Escalate threat if multiple factors present
+        if unique_classes > 1:  # Multiple types of objects detected
+            max_threat_index = min(max_threat_index + 1, len(threat_levels) - 1)
+        if total_detections > 10:  # Many detections
+            max_threat_index = min(max_threat_index + 1, len(threat_levels) - 1)
+        if max_confidence > 0.9:  # Very high confidence
+            max_threat_index = min(max_threat_index + 1, len(threat_levels) - 1)
+        return threat_levels[max_threat_index]
+    def _assess_individual_threat_level(self, event: Event) -> str:
+        """Assess threat level for individual event (duplicate of EventDetector method)"""
+        if not event.is_object_event:
+            # For motion events, use motion intensity and burst activity
+            if event.event_type == "high_motion" and event.motion_intensity > 0.015:
+                return "medium"
+            elif event.event_type == "burst_activity":
+                return "medium"
+            else:
+                return "low"
+        # Object-based threat assessment
+        threat_map = {
+            'fire': {
+                'low': 0.3,      # Confidence thresholds
+                'medium': 0.5,
+                'high': 0.7,
+                'critical': 0.85
+            },
+            'gun': {
+                'low': 0.4,
+                'medium': 0.6,
+                'high': 0.8,
+                'critical': 0.9
+            },
+            'knife': {
+                'low': 0.4,
+                'medium': 0.6,
+                'high': 0.75,
+                'critical': 0.85
+            }
+        }
+        obj_class = event.object_class.lower()
+        confidence = event.max_confidence
+        if obj_class in threat_map:
+            thresholds = threat_map[obj_class]
+            if confidence >= thresholds['critical']:
+                return "critical"
+            elif confidence >= thresholds['high']:
+                return "high"
+            elif confidence >= thresholds['medium']:
+                return "medium"
+            else:
+                return "low"
+        return "medium"  # Default for unknown object types
+    def save_canonical_events(self, canonical_events: List[CanonicalEvent],
+                            output_path: str) -> bool:
+        """Save canonical events to JSON file"""
+        try:
+            # Convert to serializable format
+            events_data = {
+                'metadata': {
+                    'total_canonical_events': len(canonical_events),
+                    'generation_timestamp': datetime.now().isoformat(),
+                    'deduplication_threshold': self.config.similarity_threshold
+                },
+                'canonical_events': [asdict(event) for event in canonical_events]
+            }
+            with open(output_path, 'w') as f:
+                json.dump(events_data, f, indent=2)
+            logger.info(f"Canonical events saved to: {output_path}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to save canonical events: {e}")
+            return False

event_clip_generator.py ADDED Viewed

	@@ -0,0 +1,390 @@

+"""
+Event Clip Generator
+Generates video clips from events for viewing, playing, and downloading.
+Extracts clips from the original or compressed video based on event timestamps.
+Supports annotation with face bounding boxes for person search results.
+"""
+import os
+import cv2
+import subprocess
+import logging
+import uuid
+from typing import Optional, Dict, Any, List, Tuple
+from pathlib import Path
+from datetime import datetime
+logger = logging.getLogger(__name__)
+class EventClipGenerator:
+    """Generate video clips from events"""
+    def __init__(self, output_dir: str = "video_processing_outputs/clips"):
+        self.output_dir = output_dir
+        os.makedirs(self.output_dir, exist_ok=True)
+    def extract_clip(self, video_path: str, start_time: float, end_time: float,
+                   event_id: str, video_id: str = None) -> Optional[str]:
+        """
+        Extract a video clip from a video file
+        Args:
+            video_path: Path to source video
+            start_time: Start timestamp in seconds
+            end_time: End timestamp in seconds
+            event_id: Event identifier
+            video_id: Optional video identifier for organizing clips
+        Returns:
+            Path to extracted clip file, or None if extraction failed
+        """
+        if not os.path.exists(video_path):
+            logger.error(f"Video file not found: {video_path}")
+            return None
+        try:
+            # Create clip filename
+            clip_id = f"{event_id}_{uuid.uuid4().hex[:8]}"
+            clip_filename = f"{clip_id}.mp4"
+            # Create output directory for this video if video_id provided
+            if video_id:
+                clip_dir = os.path.join(self.output_dir, video_id)
+                os.makedirs(clip_dir, exist_ok=True)
+                clip_path = os.path.join(clip_dir, clip_filename)
+            else:
+                clip_path = os.path.join(self.output_dir, clip_filename)
+            # Calculate duration
+            duration = end_time - start_time
+            # Use ffmpeg to extract clip (more reliable than OpenCV)
+            try:
+                # Try ffmpeg first (faster and more reliable)
+                cmd = [
+                    'ffmpeg',
+                    '-i', video_path,
+                    '-ss', str(start_time),
+                    '-t', str(duration),
+                    '-c', 'copy',  # Copy codec (fast, no re-encoding)
+                    '-avoid_negative_ts', 'make_zero',
+                    '-y',  # Overwrite output file
+                    clip_path
+                ]
+                result = subprocess.run(
+                    cmd,
+                    capture_output=True,
+                    text=True,
+                    timeout=60  # 60 second timeout
+                )
+                if result.returncode == 0 and os.path.exists(clip_path):
+                    logger.info(f"✅ Extracted clip: {clip_path} ({duration:.2f}s)")
+                    return clip_path
+                else:
+                    logger.warning(f"FFmpeg extraction failed, trying OpenCV fallback: {result.stderr}")
+                    # Fallback to OpenCV
+                    return self._extract_clip_opencv(video_path, start_time, end_time, clip_path)
+            except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError) as e:
+                logger.warning(f"FFmpeg not available or failed: {e}, using OpenCV fallback")
+                # Fallback to OpenCV
+                return self._extract_clip_opencv(video_path, start_time, end_time, clip_path)
+        except Exception as e:
+            logger.error(f"Error extracting clip: {e}")
+            return None
+    def _extract_clip_opencv(self, video_path: str, start_time: float,
+                            end_time: float, output_path: str) -> Optional[str]:
+        """Extract clip using OpenCV (fallback method)"""
+        try:
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                logger.error(f"Could not open video: {video_path}")
+                return None
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            # Calculate frame numbers
+            start_frame = int(start_time * fps)
+            end_frame = int(end_time * fps)
+            # Set starting position
+            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+            # Create video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            frame_count = start_frame
+            while frame_count <= end_frame:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                out.write(frame)
+                frame_count += 1
+            cap.release()
+            out.release()
+            # Convert to browser-compatible format using ffmpeg
+            if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+                try:
+                    browser_compatible_path = output_path.replace('.mp4', '_h264.mp4')
+                    cmd = [
+                        'ffmpeg',
+                        '-i', output_path,
+                        '-c:v', 'libx264',  # H.264 codec for browser compatibility
+                        '-preset', 'fast',
+                        '-crf', '23',
+                        '-c:a', 'aac',  # AAC audio codec
+                        '-movflags', '+faststart',  # Enable streaming
+                        '-y',
+                        browser_compatible_path
+                    ]
+                    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+                    if result.returncode == 0 and os.path.exists(browser_compatible_path):
+                        # Remove the original mp4v file and rename
+                        os.remove(output_path)
+                        os.rename(browser_compatible_path, output_path)
+                        logger.info(f"✅ Extracted clip using OpenCV (H.264): {output_path}")
+                        return output_path
+                    else:
+                        logger.warning(f"FFmpeg conversion failed: {result.stderr}")
+                        logger.info(f"✅ Extracted clip using OpenCV (mp4v): {output_path}")
+                        return output_path
+                except Exception as e:
+                    logger.warning(f"FFmpeg not available for conversion: {e}")
+                    logger.info(f"✅ Extracted clip using OpenCV: {output_path}")
+                    return output_path
+            else:
+                logger.error(f"OpenCV extraction failed: output file is empty or missing")
+                return None
+        except Exception as e:
+            logger.error(f"OpenCV clip extraction error: {e}")
+            return None
+    def extract_annotated_clip(self, video_path: str, start_time: float, end_time: float,
+                              face_id: str, face_detections: List[Dict[str, Any]],
+                              video_id: str = None, person_name: str = None) -> Optional[str]:
+        """
+        Extract and annotate a video clip with bounding boxes for a specific person
+        Args:
+            video_path: Path to source video
+            start_time: Start timestamp in seconds
+            end_time: End timestamp in seconds
+            face_id: Face identifier to highlight
+            face_detections: List of face detection records with bounding boxes and timestamps
+            video_id: Optional video identifier
+            person_name: Optional person name to display on annotations
+        Returns:
+            Path to annotated clip file, or None if extraction failed
+        """
+        if not os.path.exists(video_path):
+            logger.error(f"Video file not found: {video_path}")
+            return None
+        try:
+            # Create annotated clip filename
+            clip_id = f"annotated_{face_id}_{uuid.uuid4().hex[:8]}"
+            clip_filename = f"{clip_id}.mp4"
+            # Create output directory
+            if video_id:
+                clip_dir = os.path.join(self.output_dir, video_id, "annotated")
+                os.makedirs(clip_dir, exist_ok=True)
+                clip_path = os.path.join(clip_dir, clip_filename)
+            else:
+                annotated_dir = os.path.join(self.output_dir, "annotated")
+                os.makedirs(annotated_dir, exist_ok=True)
+                clip_path = os.path.join(annotated_dir, clip_filename)
+            # Open video
+            cap = cv2.VideoCapture(video_path)
+            if not cap.isOpened():
+                logger.error(f"Could not open video: {video_path}")
+                return None
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            # Calculate frame numbers
+            start_frame = int(start_time * fps)
+            end_frame = min(int(end_time * fps), total_frames - 1)
+            # Create a map of frame_number -> bounding boxes for quick lookup
+            frame_bbox_map = {}
+            for detection in face_detections:
+                if detection.get('face_id') == face_id:
+                    # Try multiple timestamp fields
+                    timestamp = (
+                        detection.get('timestamp') or
+                        detection.get('detected_at') or
+                        (detection.get('detected_at').timestamp() if isinstance(detection.get('detected_at'), type(datetime.now())) else 0) or
+                        0
+                    )
+                    # If timestamp is a datetime object, convert to seconds
+                    if hasattr(timestamp, 'timestamp'):
+                        timestamp = timestamp.timestamp()
+                    frame_num = int(timestamp * fps) if timestamp > 0 else 0
+                    # Try multiple bbox field names
+                    bbox = (
+                        detection.get('bounding_box') or
+                        detection.get('bounding_boxes') or
+                        None
+                    )
+                    if bbox:
+                        # Handle different bbox formats: [x1, y1, x2, y2] or {"x1": ..., "y1": ..., ...}
+                        try:
+                            if isinstance(bbox, dict):
+                                x1 = int(bbox.get('x1', bbox.get(0, 0)))
+                                y1 = int(bbox.get('y1', bbox.get(1, 0)))
+                                x2 = int(bbox.get('x2', bbox.get(2, 0)))
+                                y2 = int(bbox.get('y2', bbox.get(3, 0)))
+                            elif isinstance(bbox, list) and len(bbox) >= 4:
+                                x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
+                            else:
+                                continue
+                            # Validate bounding box coordinates
+                            if x1 >= 0 and y1 >= 0 and x2 > x1 and y2 > y1:
+                                # Store for multiple nearby frames to handle timestamp inaccuracies
+                                for offset in range(-2, 3):  # ±2 frames tolerance
+                                    frame_bbox_map[frame_num + offset] = (x1, y1, x2, y2)
+                        except (ValueError, TypeError) as e:
+                            logger.warning(f"Invalid bounding box format: {bbox}, error: {e}")
+                            continue
+            # Set starting position
+            cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+            # Create video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            out = cv2.VideoWriter(clip_path, fourcc, fps, (width, height))
+            frame_count = start_frame
+            frames_annotated = 0
+            while frame_count <= end_frame:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Check if this frame has a bounding box for this face
+                if frame_count in frame_bbox_map:
+                    x1, y1, x2, y2 = frame_bbox_map[frame_count]
+                    # Draw bounding box (green for person detection)
+                    color = (0, 255, 0)  # Green in BGR
+                    thickness = 3
+                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, thickness)
+                    # Draw label
+                    label = person_name if person_name else "Detected Person"
+                    label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
+                    # Draw label background
+                    cv2.rectangle(frame, (x1, y1 - label_size[1] - 10),
+                                 (x1 + label_size[0] + 10, y1), color, -1)
+                    # Draw label text
+                    cv2.putText(frame, label, (x1 + 5, y1 - 5),
+                               cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+                    frames_annotated += 1
+                out.write(frame)
+                frame_count += 1
+            cap.release()
+            out.release()
+            # Convert to browser-compatible format using ffmpeg
+            if os.path.exists(clip_path) and os.path.getsize(clip_path) > 0:
+                try:
+                    browser_compatible_path = clip_path.replace('.mp4', '_h264.mp4')
+                    cmd = [
+                        'ffmpeg',
+                        '-i', clip_path,
+                        '-c:v', 'libx264',  # H.264 codec for browser compatibility
+                        '-preset', 'fast',
+                        '-crf', '23',
+                        '-c:a', 'aac',  # AAC audio codec
+                        '-movflags', '+faststart',  # Enable streaming
+                        '-y',
+                        browser_compatible_path
+                    ]
+                    result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+                    if result.returncode == 0 and os.path.exists(browser_compatible_path):
+                        # Remove the original mp4v file and rename
+                        os.remove(clip_path)
+                        os.rename(browser_compatible_path, clip_path)
+                        logger.info(f"✅ Created annotated clip: {clip_path} ({frames_annotated} frames annotated)")
+                        return clip_path
+                    else:
+                        logger.warning(f"FFmpeg conversion failed, returning OpenCV output: {result.stderr}")
+                        logger.info(f"✅ Created annotated clip (mp4v): {clip_path} ({frames_annotated} frames annotated)")
+                        return clip_path
+                except Exception as e:
+                    logger.warning(f"FFmpeg not available for conversion: {e}")
+                    logger.info(f"✅ Created annotated clip (mp4v): {clip_path} ({frames_annotated} frames annotated)")
+                    return clip_path
+            else:
+                logger.error(f"Annotated clip creation failed: output file is empty or missing")
+                return None
+        except Exception as e:
+            logger.error(f"Error creating annotated clip: {e}")
+            return None
+    def get_clip_info(self, clip_path: str) -> Dict[str, Any]:
+        """Get information about a clip file"""
+        if not os.path.exists(clip_path):
+            return {}
+        try:
+            cap = cv2.VideoCapture(clip_path)
+            if not cap.isOpened():
+                return {}
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+            duration = frame_count / fps if fps > 0 else 0
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            file_size = os.path.getsize(clip_path)
+            cap.release()
+            return {
+                'duration': duration,
+                'fps': fps,
+                'frame_count': frame_count,
+                'resolution': f"{width}x{height}",
+                'file_size': file_size,
+                'file_size_mb': round(file_size / (1024 * 1024), 2)
+            }
+        except Exception as e:
+            logger.error(f"Error getting clip info: {e}")
+            return {}

extract_upload_keyframes.py ADDED Viewed

	@@ -0,0 +1,240 @@

+"""
+Extract keyframes from videos and upload to S3-compatible storage (Backblaze B2).
+For each video that has captions but no keyframes in storage:
+1. Get the frame_ids from video_captions
+2. Get the video source (local file or S3)
+3. Extract those exact frames using OpenCV
+4. Upload to S3 at {video_id}/frame_XXXXXX.jpg
+"""
+import os
+import sys
+import io
+import tempfile
+import cv2
+from pymongo import MongoClient
+from minio import Minio
+from dotenv import load_dotenv
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI", "mongodb+srv://detectifai_user:DetectifAI123@cluster0.6f9uj.mongodb.net/detectifai?retryWrites=true&w=majority&appName=Cluster0")
+client = MongoClient(MONGO_URI)
+db = client.detectifai
+minio_client = Minio(
+    os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com'),
+    access_key=os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001'),
+    secret_key=os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA'),
+    secure=os.getenv('MINIO_SECURE', 'true').lower() == 'true',
+    region=os.getenv('MINIO_REGION', 'eu-central-003') or None
+)
+KEYFRAME_BUCKET = os.getenv('MINIO_KEYFRAME_BUCKET', 'detectifai-keyframes')
+VIDEO_BUCKET = os.getenv('MINIO_VIDEO_BUCKET', 'detectifai-videos')
+BASE_DIR = os.getenv('BASE_DIR', r"d:\FAST\Final Year Project\sem1_finalized_malaika\sem1")
+def get_video_source(video_id):
+    """Return path to video file. Download from MinIO if not local."""
+    # Check local uploads first
+    local_path = os.path.join(BASE_DIR, "uploads", video_id, "video.mp4")
+    if os.path.isfile(local_path) and os.path.getsize(local_path) > 0:
+        print(f"  Using local file: {local_path}")
+        return local_path
+    # Check MinIO
+    rec = db.video_file.find_one({"video_id": video_id}, {"minio_object_key": 1, "minio_bucket": 1})
+    if rec and rec.get("minio_object_key"):
+        bucket = rec.get("minio_bucket", VIDEO_BUCKET)
+        obj_key = rec["minio_object_key"]
+        # Verify the object actually exists before downloading
+        try:
+            minio_client.stat_object(bucket, obj_key)
+        except Exception:
+            print(f"  MinIO object not found: {bucket}/{obj_key}")
+            return None
+        print(f"  Downloading from MinIO: {bucket}/{obj_key}")
+        tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
+        minio_client.fget_object(bucket, obj_key, tmp_path)
+        print(f"  Downloaded to: {tmp_path}")
+        return tmp_path
+    return None
+import numpy as np
+def upload_placeholder_keyframes(video_id, frame_ids):
+    """Generate and upload placeholder keyframe images for videos whose source is gone."""
+    uploaded = 0
+    for frame_id in frame_ids:
+        # Get the caption text for this frame to display on placeholder
+        caption_doc = db.video_captions.find_one(
+            {"video_id": video_id, "frame_id": frame_id},
+            {"caption": 1, "_id": 0}
+        )
+        caption_text = caption_doc.get("caption", "No caption") if caption_doc else "No caption"
+        # Create a 640x360 dark gradient placeholder image
+        img = np.zeros((360, 640, 3), dtype=np.uint8)
+        # Dark blue gradient
+        for y in range(360):
+            val = int(30 + (y / 360) * 40)
+            img[y, :] = [val, int(val * 0.8), int(val * 0.5)]
+        # Add text
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        # Video ID
+        cv2.putText(img, video_id, (20, 40), font, 0.5, (150, 150, 150), 1)
+        # Frame ID
+        cv2.putText(img, frame_id, (20, 70), font, 0.5, (150, 150, 150), 1)
+        # Camera icon placeholder
+        cv2.rectangle(img, (270, 130), (370, 210), (80, 80, 80), 2)
+        cv2.putText(img, "VIDEO", (284, 178), font, 0.6, (120, 120, 120), 1)
+        # Caption (wrap if long)
+        words = caption_text[:80].split()
+        line = ""
+        y_pos = 250
+        for w in words:
+            test = line + " " + w if line else w
+            if len(test) > 50:
+                cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
+                y_pos += 22
+                line = w
+            else:
+                line = test
+        if line:
+            cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
+        # Encode as JPEG
+        success, buffer = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 85])
+        if not success:
+            continue
+        minio_path = f"{video_id}/{frame_id}.jpg"
+        data = io.BytesIO(buffer.tobytes())
+        minio_client.put_object(
+            KEYFRAME_BUCKET, minio_path, data,
+            length=len(buffer.tobytes()),
+            content_type='image/jpeg'
+        )
+        uploaded += 1
+    return uploaded
+def extract_and_upload_keyframes(video_id, frame_ids):
+    """Extract specific frames from video and upload to MinIO."""
+    video_path = get_video_source(video_id)
+    if not video_path:
+        print(f"  No video source found — generating placeholder keyframes")
+        return upload_placeholder_keyframes(video_id, frame_ids)
+    # Parse frame numbers from frame_ids like "frame_000060"
+    frame_numbers = {}
+    for fid in frame_ids:
+        try:
+            num = int(fid.replace("frame_", ""))
+            frame_numbers[num] = fid
+        except ValueError:
+            print(f"  WARNING: Could not parse frame_id: {fid}")
+    if not frame_numbers:
+        print(f"  No valid frame numbers to extract")
+        return 0
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        print(f"  ERROR: Could not open video: {video_path}")
+        return 0
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    print(f"  Video: {total_frames} frames, {fps:.1f} fps")
+    uploaded = 0
+    max_frame = max(frame_numbers.keys())
+    for frame_num in sorted(frame_numbers.keys()):
+        if frame_num >= total_frames:
+            # Use last available frame
+            frame_num_actual = total_frames - 1
+            print(f"  Frame {frame_num} beyond total ({total_frames}), using frame {frame_num_actual}")
+        else:
+            frame_num_actual = frame_num
+        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num_actual)
+        ret, frame = cap.read()
+        if not ret:
+            print(f"  ERROR: Could not read frame {frame_num_actual}")
+            continue
+        # Encode as JPEG
+        success, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
+        if not success:
+            print(f"  ERROR: Could not encode frame {frame_num}")
+            continue
+        frame_id = frame_numbers[frame_num]
+        minio_path = f"{video_id}/{frame_id}.jpg"
+        # Upload to MinIO
+        data = io.BytesIO(buffer.tobytes())
+        minio_client.put_object(
+            KEYFRAME_BUCKET,
+            minio_path,
+            data,
+            length=len(buffer.tobytes()),
+            content_type='image/jpeg'
+        )
+        uploaded += 1
+    cap.release()
+    # Clean up temp file if downloaded from MinIO
+    tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
+    if os.path.exists(tmp_path) and video_path == tmp_path:
+        os.remove(tmp_path)
+    return uploaded
+def main():
+    # Get all video_ids with captions
+    caption_vids = db.video_captions.distinct("video_id")
+    for video_id in caption_vids:
+        if video_id.startswith("test_"):
+            continue
+        # Check if keyframes already exist in MinIO
+        existing = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
+        if len(existing) > 0:
+            print(f"SKIP {video_id}: already has {len(existing)} keyframes in MinIO")
+            continue
+        # Get frame_ids from captions
+        frame_ids = db.video_captions.distinct("frame_id", {"video_id": video_id})
+        if not frame_ids:
+            print(f"SKIP {video_id}: no frame_ids in captions")
+            continue
+        print(f"\nPROCESSING {video_id}: {len(frame_ids)} frames to extract")
+        uploaded = extract_and_upload_keyframes(video_id, frame_ids)
+        print(f"  Uploaded {uploaded}/{len(frame_ids)} keyframes to MinIO")
+    print("\n=== DONE ===")
+    # Final check
+    for video_id in caption_vids:
+        if video_id.startswith("test_"):
+            continue
+        objs = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
+        print(f"  {video_id}: {len(objs)} keyframes in MinIO")
+if __name__ == "__main__":
+    main()

facial_recognition.py ADDED Viewed

	@@ -0,0 +1,926 @@

+"""
+Facial Recognition Module for DetectifAI
+This module handles facial recognition for suspicious activity frames:
+- Face detection using MTCNN (primary) or OpenCV Haar cascades (fallback)
+- Face embeddings using FaceNet (primary) or histogram-based (fallback)
+- FAISS vector similarity search (primary) or cosine similarity (fallback)
+- MongoDB metadata storage with local JSON fallback
+- Integration with suspicious activity detection pipeline
+Workflow (matches activity diagram):
+1. Receive frame from suspicious event (object detection)
+2. Run face detection
+3. If faces detected: crop faces, generate embeddings, store in FAISS/index
+4. Upload face crops to storage, save metadata to MongoDB/JSON
+5. Search for similar embeddings, link with previous incidents
+6. Assign new person ID if no match found
+Author: DetectifAI Team
+"""
+import os
+import cv2
+import numpy as np
+import logging
+import json
+import uuid
+import time
+import warnings
+from typing import List, Tuple, Optional, Dict, Any
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+# Advanced imports (with fallbacks)
+try:
+    import torch
+    from facenet_pytorch import MTCNN, InceptionResnetV1
+    import faiss
+    from pymongo import MongoClient
+    from dotenv import load_dotenv
+    import joblib
+    ADVANCED_AVAILABLE = True
+    load_dotenv()
+except ImportError:
+    ADVANCED_AVAILABLE = False
+warnings.filterwarnings('ignore')
+logger = logging.getLogger(__name__)
+# ========================================
+# Configuration
+# ========================================
+# MongoDB Configuration
+MONGO_URI = os.getenv("MONGO_URI", "mongodb://localhost:27017/") if ADVANCED_AVAILABLE else None
+MONGO_DB_NAME = "detectifai"
+# FAISS Configuration
+FAISS_INDEX_PATH = "model/faiss_face_index.bin"
+FAISS_ID_MAP_PATH = "model/faiss_id_map.json"
+EMBEDDING_DIM = 512  # InceptionResnetV1 produces 512-dim embeddings
+# Trained Models Configuration
+TRAINED_MODEL_DIR = "model/trained_models"
+CLASSIFIER_PATH = os.path.join(TRAINED_MODEL_DIR, "classifier_svm.pkl")
+ENCODER_PATH = os.path.join(TRAINED_MODEL_DIR, "label_encoder.pkl")
+# Simple fallback configuration
+SIMPLE_INDEX_PATH = "model/simple_face_index.json"
+# Face storage
+FACES_DIR = "model/faces"
+# ========================================
+# Data Models
+# ========================================
+@dataclass
+class FaceDetectionResult:
+    """Result of face detection in a frame"""
+    frame_path: str
+    timestamp: float
+    faces_detected: int
+    face_embeddings: List[np.ndarray]
+    face_bounding_boxes: List[Tuple[int, int, int, int]]
+    face_confidence_scores: List[float]
+    processing_time: float
+    detected_face_ids: List[str] = None
+    matched_persons: List[str] = None
+@dataclass
+class SuspiciousPerson:
+    """Information about a suspicious person"""
+    person_id: str
+    first_detected: float  # timestamp
+    last_seen: float       # timestamp
+    face_embedding: Optional[np.ndarray]
+    associated_events: List[str]  # event IDs where this person appeared
+    threat_level: str
+    notes: str
+    detection_count: int
+    face_id: str = ""  # Primary face_id
+# ========================================
+# Advanced Implementation (FAISS + FaceNet)
+# ========================================
+class AdvancedFaceDetector:
+    """Advanced face detector using MTCNN"""
+    def __init__(self, device='cpu', min_face_size=60):  # Increased from 40 to 60 for stricter filtering
+        self.device = torch.device(device)
+        self.mtcnn = MTCNN(
+            image_size=160,
+            margin=20,
+            min_face_size=min_face_size,  # Larger minimum to reject small circular objects
+            thresholds=[0.8, 0.9, 0.9],  # Very strict thresholds (was [0.7, 0.8, 0.8]) to eliminate false positives
+            factor=0.709,
+            keep_all=True,
+            device=self.device
+        )
+        logger.info(f"[AdvancedFaceDetector] Initialized MTCNN on {device} with min_face_size={min_face_size}, strict thresholds=[0.8, 0.9, 0.9]")
+    def detect_faces(self, frame: np.ndarray) -> Tuple[List[np.ndarray], List[np.ndarray], List[float]]:
+        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        boxes, probs = self.mtcnn.detect(rgb_frame, landmarks=False)
+        if boxes is None:
+            return [], [], []
+        faces = self.mtcnn.extract(rgb_frame, boxes, save_path=None)
+        if faces is None:
+            return [], [], []
+        valid_faces, valid_boxes, valid_probs = [], [], []
+        for face, prob, box in zip(faces, probs, boxes):
+            # Very strict probability threshold (increased from 0.85 to 0.90)
+            if face is not None and prob > 0.90:
+                # Additional validation to filter false positives (e.g., tires, wheels)
+                if self._is_valid_face(face, box):
+                    valid_faces.append(face)
+                    valid_boxes.append(box)
+                    valid_probs.append(prob)
+                else:
+                    logger.debug(f"Rejected detection (prob={prob:.3f}) - failed quality validation")
+        return valid_faces, valid_boxes, valid_probs
+    def _is_valid_face(self, face_tensor: torch.Tensor, box: np.ndarray) -> bool:
+        """Validate detected face to filter out false positives like tires, wheels, circular objects"""
+        try:
+            # 1. Check bounding box aspect ratio (faces should be ~1:1.2, not perfectly circular like tires)
+            x1, y1, x2, y2 = box
+            width = x2 - x1
+            height = y2 - y1
+            if width <= 0 or height <= 0:
+                return False
+            aspect_ratio = width / height
+            # Reject if too circular (like tires) or too elongated - tightened range
+            if aspect_ratio < 0.7 or aspect_ratio > 1.5:
+                logger.debug(f"Rejected: aspect_ratio={aspect_ratio:.2f} (tires ~1.0, faces 0.75-1.35)")
+                return False
+            # 2. Check minimum face size (reject small detections) - increased to 60px
+            if width < 60 or height < 60:
+                logger.debug(f"Rejected: too small ({width}x{height}) - minimum is 60x60")
+                return False
+            # 3. Check face tensor for quality (reject blurry or low-contrast images like tire treads)
+            face_np = face_tensor.permute(1, 2, 0).cpu().numpy()
+            # Check variance (faces should have good contrast, tires are uniform) - increased threshold
+            variance = np.var(face_np)
+            if variance < 0.02:  # Increased from 0.01 to 0.02 for stricter filtering
+                logger.debug(f"Rejected: low variance={variance:.4f} (uniform object, likely tire)")
+                return False
+            # 4. Check edge density (faces have more complex edges than smooth tire surfaces)
+            gray = cv2.cvtColor((face_np * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
+            edges = cv2.Canny(gray, 50, 150)
+            edge_density = np.sum(edges > 0) / (edges.shape[0] * edges.shape[1])
+            # Tires have uniform circular edges, faces have complex features - tightened range
+            if edge_density < 0.08 or edge_density > 0.35:  # Narrowed from (0.05, 0.4) to (0.08, 0.35)
+                logger.debug(f"Rejected: edge_density={edge_density:.3f} (abnormal edge pattern)")
+                return False
+            return True
+        except Exception as e:
+            logger.warning(f"Face validation error: {e}")
+            return False  # Reject on error to be safe
+class AdvancedFaceEmbedder:
+    """Advanced face embedder using FaceNet"""
+    def __init__(self, device='cpu', weights='vggface2'):
+        self.device = torch.device(device)
+        self.model = InceptionResnetV1(pretrained=weights).eval().to(self.device)
+        logger.info(f"[AdvancedFaceEmbedder] Loaded InceptionResnetV1 on {device}")
+    def generate_embedding(self, face_tensor: torch.Tensor) -> np.ndarray:
+        with torch.no_grad():
+            face_tensor = face_tensor.to(self.device).unsqueeze(0)
+            embedding = self.model(face_tensor).cpu().numpy().flatten()
+        return embedding
+class PersonClassifier:
+    """Person identification using trained SVM classifier"""
+    def __init__(self, classifier_path: str = CLASSIFIER_PATH, encoder_path: str = ENCODER_PATH,
+                 confidence_threshold: float = 0.5):
+        self.confidence_threshold = confidence_threshold
+        self.enabled = False
+        if ADVANCED_AVAILABLE and os.path.exists(classifier_path) and os.path.exists(encoder_path):
+            try:
+                self.classifier = joblib.load(classifier_path)
+                self.label_encoder = joblib.load(encoder_path)
+                self.enabled = True
+                logger.info(f"[PersonClassifier] ✅ Model loaded, {len(self.label_encoder.classes_)} identities recognized.")
+            except Exception as e:
+                logger.warning(f"[PersonClassifier] ⚠️ Failed to load model: {e}")
+        else:
+            logger.info("[PersonClassifier] Trained models not available, using generic face tracking")
+    def identify_person(self, embedding: np.ndarray) -> Tuple[Optional[str], float]:
+        """Identify person from face embedding using SVM classifier"""
+        if not self.enabled:
+            return None, 0.0
+        try:
+            probs = self.classifier.predict_proba(embedding.reshape(1, -1))[0]
+            best_idx = np.argmax(probs)
+            conf = probs[best_idx]
+            if conf >= self.confidence_threshold:
+                return self.label_encoder.classes_[best_idx], float(conf)
+            return None, float(conf)
+        except Exception as e:
+            logger.error(f"[PersonClassifier] Error: {e}")
+            return None, 0.0
+class FAISSFaceIndex:
+    """FAISS index manager for fast similarity search"""
+    def __init__(self, embedding_dim: int = 512, index_path: str = FAISS_INDEX_PATH,
+                 id_map_path: str = FAISS_ID_MAP_PATH):
+        self.embedding_dim = embedding_dim
+        self.index_path = index_path
+        self.id_map_path = id_map_path
+        self.index = None
+        self.id_map = {}
+        self.reverse_map = {}
+        os.makedirs(os.path.dirname(index_path), exist_ok=True)
+        self._load_or_create_index()
+    def _load_or_create_index(self):
+        if os.path.exists(self.index_path) and os.path.exists(self.id_map_path):
+            try:
+                self.index = faiss.read_index(self.index_path)
+                with open(self.id_map_path, 'r') as f:
+                    data = json.load(f)
+                    self.id_map = {int(k): v for k, v in data.items()}
+                    self.reverse_map = {v: int(k) for k, v in self.id_map.items()}
+                logger.info(f"[FAISS] Loaded index with {self.index.ntotal} embeddings")
+            except Exception as e:
+                logger.warning(f"[FAISS] Error loading index: {e}")
+                self._create_new_index()
+        else:
+            self._create_new_index()
+    def _create_new_index(self):
+        self.index = faiss.IndexFlatIP(self.embedding_dim)
+        self.id_map = {}
+        self.reverse_map = {}
+        logger.info(f"[FAISS] Created new index (dim={self.embedding_dim})")
+    def add_embedding(self, face_id: str, embedding: np.ndarray) -> int:
+        if face_id in self.reverse_map:
+            return self.reverse_map[face_id]
+        embedding = embedding.astype('float32').reshape(1, -1)
+        embedding = embedding / np.linalg.norm(embedding)
+        idx = self.index.ntotal
+        self.index.add(embedding)
+        self.id_map[idx] = face_id
+        self.reverse_map[face_id] = idx
+        return idx
+    def search(self, query_embedding: np.ndarray, k: int = 5, threshold: float = 0.6) -> List[Tuple[str, float]]:
+        if self.index.ntotal == 0:
+            return []
+        query_embedding = query_embedding.astype('float32').reshape(1, -1)
+        query_embedding = query_embedding / np.linalg.norm(query_embedding)
+        similarities, indices = self.index.search(query_embedding, min(k, self.index.ntotal))
+        results = []
+        for sim, idx in zip(similarities[0], indices[0]):
+            if idx in self.id_map and sim >= threshold:
+                results.append((self.id_map[idx], float(sim)))
+        return results
+    def save(self):
+        os.makedirs(os.path.dirname(self.index_path), exist_ok=True)
+        faiss.write_index(self.index, self.index_path)
+        with open(self.id_map_path, 'w') as f:
+            json.dump(self.id_map, f)
+class MongoDBFaceStorage:
+    """MongoDB storage for face metadata"""
+    def __init__(self, mongo_uri: str, db_name: str = MONGO_DB_NAME):
+        try:
+            self.client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
+            self.db = self.client[db_name]
+            self.faces_collection = self.db['detected_faces']
+            self.client.server_info()  # Test connection
+            self.enabled = True
+            logger.info("[MongoDB] Connected successfully")
+        except Exception as e:
+            logger.warning(f"[MongoDB] Connection failed: {e}")
+            self.enabled = False
+    def save_face(self, data: Dict) -> str:
+        if not self.enabled:
+            return ""
+        data['detected_at'] = datetime.utcnow()
+        if 'face_embedding' in data:
+            del data['face_embedding']  # Don't store embeddings in MongoDB
+        data['face_embedding'] = []
+        try:
+            result = self.faces_collection.insert_one(data)
+            return str(result.inserted_id)
+        except Exception as e:
+            logger.error(f"[MongoDB] Error saving face: {e}")
+            return ""
+    def close(self):
+        if hasattr(self, 'client'):
+            self.client.close()
+# ========================================
+# Simple Implementation (OpenCV + Histograms)
+# ========================================
+class SimpleFaceDetector:
+    """Simple face detector using OpenCV Haar cascades"""
+    def __init__(self, device='cpu'):
+        self.device = device
+        cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
+        self.face_cascade = cv2.CascadeClassifier(cascade_path)
+        logger.info(f"[SimpleFaceDetector] Initialized with OpenCV Haar cascades")
+    def detect_faces(self, frame: np.ndarray) -> Tuple[List[np.ndarray], List[np.ndarray], List[float]]:
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        faces = self.face_cascade.detectMultiScale(gray, 1.1, 4, minSize=(30, 30))
+        face_crops = []
+        boxes = []
+        confidences = []
+        for (x, y, w, h) in faces:
+            face_crop = frame[y:y+h, x:x+w]
+            face_crops.append(face_crop)
+            boxes.append([x, y, x+w, y+h])
+            confidences.append(0.8)
+        return face_crops, boxes, confidences
+class SimpleFaceEmbedder:
+    """Simple face embedder using histograms"""
+    def __init__(self, device='cpu'):
+        self.device = device
+        logger.info(f"[SimpleFaceEmbedder] Using histogram-based embeddings")
+    def generate_embedding(self, face_crop: np.ndarray) -> np.ndarray:
+        if isinstance(face_crop, np.ndarray) and len(face_crop.shape) == 3:
+            face_resized = cv2.resize(face_crop, (64, 64))
+            hsv = cv2.cvtColor(face_resized, cv2.COLOR_BGR2HSV)
+            hist_h = cv2.calcHist([hsv], [0], None, [16], [0, 180])
+            hist_s = cv2.calcHist([hsv], [1], None, [16], [0, 256])
+            hist_v = cv2.calcHist([hsv], [2], None, [16], [0, 256])
+            embedding = np.concatenate([hist_h.flatten(), hist_s.flatten(), hist_v.flatten()])
+            return embedding / np.linalg.norm(embedding)
+        else:
+            return np.random.rand(48) / np.linalg.norm(np.random.rand(48))
+class SimpleFaceIndex:
+    """Simple face index using cosine similarity"""
+    def __init__(self, index_path: str = SIMPLE_INDEX_PATH):
+        self.index_path = index_path
+        self.faces_db = {}
+        os.makedirs(os.path.dirname(index_path), exist_ok=True)
+        self._load_index()
+    def _load_index(self):
+        if os.path.exists(self.index_path):
+            try:
+                with open(self.index_path, 'r') as f:
+                    data = json.load(f)
+                    self.faces_db = {face_id: np.array(embedding)
+                                   for face_id, embedding in data.items()}
+                logger.info(f"[SimpleFaceIndex] Loaded {len(self.faces_db)} faces")
+            except Exception as e:
+                logger.warning(f"[SimpleFaceIndex] Error loading: {e}")
+                self.faces_db = {}
+        else:
+            self.faces_db = {}
+    def add_embedding(self, face_id: str, embedding: np.ndarray) -> int:
+        if face_id in self.faces_db:
+            return len(self.faces_db)
+        self.faces_db[face_id] = embedding
+        return len(self.faces_db)
+    def search(self, query_embedding: np.ndarray, k: int = 5, threshold: float = 0.6) -> List[Tuple[str, float]]:
+        if not self.faces_db:
+            return []
+        similarities = []
+        for face_id, stored_embedding in self.faces_db.items():
+            similarity = np.dot(query_embedding, stored_embedding) / (
+                np.linalg.norm(query_embedding) * np.linalg.norm(stored_embedding))
+            if similarity >= threshold:
+                similarities.append((face_id, float(similarity)))
+        similarities.sort(key=lambda x: x[1], reverse=True)
+        return similarities[:k]
+    def save(self):
+        try:
+            data = {face_id: embedding.tolist()
+                   for face_id, embedding in self.faces_db.items()}
+            with open(self.index_path, 'w') as f:
+                json.dump(data, f)
+            logger.debug(f"[SimpleFaceIndex] Saved {len(self.faces_db)} faces")
+        except Exception as e:
+            logger.error(f"[SimpleFaceIndex] Error saving: {e}")
+# ========================================
+# Main Facial Recognition Class
+# ========================================
+class FacialRecognitionIntegrated:
+    """
+    Unified facial recognition system for DetectifAI.
+    Automatically uses advanced implementation (MTCNN + FaceNet + FAISS + MongoDB)
+    if available, otherwise falls back to simple implementation (OpenCV + Histograms + JSON).
+    Applies facial recognition ONLY to suspicious frames detected by object detection.
+    """
+    def __init__(self, config):
+        self.config = config
+        self.enabled = getattr(config, 'enable_facial_recognition', False)
+        self.confidence_threshold = getattr(config, 'face_recognition_confidence', 0.7)
+        self.similarity_threshold = 0.6
+        self.device = 'cuda' if torch.cuda.is_available() and getattr(config, 'use_gpu_acceleration', False) else 'cpu'
+        # Create faces directory
+        self.faces_dir = Path(FACES_DIR)
+        self.faces_dir.mkdir(exist_ok=True, parents=True)
+        # Determine implementation mode
+        self.advanced_mode = ADVANCED_AVAILABLE and self.enabled
+        # Initialize components only if enabled
+        if self.enabled:
+            self._initialize_components()
+        # Detection statistics
+        self.detection_stats = {
+            'implementation_mode': 'advanced' if self.advanced_mode else 'simple',
+            'frames_processed': 0,
+            'faces_detected': 0,
+            'suspicious_persons_tracked': 0,
+            'reoccurrences_detected': 0,
+            'new_faces_added': 0,
+            'face_matches_found': 0
+        }
+        # Suspicious persons database
+        self.suspicious_persons_db = {}
+        if not self.enabled:
+            logger.info("[FacialRecognition] Disabled - skipping initialization")
+        else:
+            mode = "Advanced (MTCNN + FaceNet + FAISS)" if self.advanced_mode else "Simple (OpenCV + Histograms)"
+            logger.info(f"[FacialRecognition] ✅ Initialized in {mode} mode")
+    def _initialize_components(self):
+        """Initialize facial recognition components based on available dependencies"""
+        try:
+            if self.advanced_mode:
+                # Advanced implementation
+                self.detector = AdvancedFaceDetector(self.device)
+                self.embedder = AdvancedFaceEmbedder(self.device)
+                self.face_index = FAISSFaceIndex()
+                self.person_classifier = PersonClassifier()  # Add trained SVM classifier
+                # MongoDB storage (optional)
+                if MONGO_URI:
+                    self.mongodb_storage = MongoDBFaceStorage(MONGO_URI)
+                else:
+                    self.mongodb_storage = None
+                    logger.info("[FacialRecognition] MongoDB not configured, using local storage only")
+            else:
+                # Simple implementation
+                self.detector = SimpleFaceDetector()
+                self.embedder = SimpleFaceEmbedder()
+                self.face_index = SimpleFaceIndex()
+                self.person_classifier = None  # No classifier in simple mode
+                self.mongodb_storage = None
+        except Exception as e:
+            logger.error(f"[FacialRecognition] ❌ Initialization failed: {e}")
+            self.enabled = False
+            raise
+    def _generate_face_id(self, frame_number: int, face_index: int, person_name: Optional[str] = None, event_id: str = "unknown") -> str:
+        """Generate unique face ID"""
+        prefix = f"{person_name.replace(' ', '_')}" if person_name else "unknown"
+        unique_id = str(uuid.uuid4())[:8]
+        return f"face_{prefix}_event_{event_id}_{frame_number:06d}_{face_index:02d}_{unique_id}"
+    def _save_face_image(self, face_data, face_id: str) -> str:
+        """Save face image to disk"""
+        try:
+            path = self.faces_dir / f"{face_id}.jpg"
+            if self.advanced_mode and isinstance(face_data, torch.Tensor):
+                # Convert tensor to numpy array (MTCNN returns normalized tensors in range [0, 1])
+                face_np = face_data.permute(1, 2, 0).cpu().numpy()
+                # Convert from [0,1] float to [0,255] uint8
+                face_np = (face_np * 128 + 127.5).clip(0, 255).astype(np.uint8)
+                # MTCNN outputs RGB, convert to BGR for OpenCV
+                face_bgr = cv2.cvtColor(face_np, cv2.COLOR_RGB2BGR)
+                # Resize to reasonable display size (e.g., 160x160)
+                face_bgr = cv2.resize(face_bgr, (160, 160))
+                cv2.imwrite(str(path), face_bgr)
+                logger.debug(f"Saved advanced face image to {path}")
+            elif isinstance(face_data, np.ndarray):
+                # Direct numpy array (from simple mode or already processed)
+                # Ensure it's in proper format
+                if face_data.dtype != np.uint8:
+                    face_data = (face_data * 255).astype(np.uint8) if face_data.max() <= 1.0 else face_data.astype(np.uint8)
+                # Resize if too large
+                if face_data.shape[0] > 300 or face_data.shape[1] > 300:
+                    face_data = cv2.resize(face_data, (160, 160))
+                cv2.imwrite(str(path), face_data)
+                logger.debug(f"Saved simple face image to {path}")
+            else:
+                logger.error(f"Unknown face_data type: {type(face_data)}")
+                return ""
+            return str(path)
+        except Exception as e:
+            logger.error(f"[FacialRecognition] Error saving face image: {e}")
+            import traceback
+            traceback.print_exc()
+            return ""
+    def detect_faces_in_frame(self, frame_path: str, timestamp: float) -> FaceDetectionResult:
+        """
+        Detect faces in a single frame (for suspicious frames only).
+        Args:
+            frame_path: Path to the frame image
+            timestamp: Timestamp of the frame in video
+        Returns:
+            FaceDetectionResult with detected faces and metadata
+        """
+        if not self.enabled:
+            return FaceDetectionResult(
+                frame_path=frame_path,
+                timestamp=timestamp,
+                faces_detected=0,
+                face_embeddings=[],
+                face_bounding_boxes=[],
+                face_confidence_scores=[],
+                processing_time=0.0
+            )
+        start_time = time.time()
+        try:
+            # Load frame
+            frame = cv2.imread(frame_path)
+            if frame is None:
+                logger.error(f"Could not load frame: {frame_path}")
+                return FaceDetectionResult(
+                    frame_path=frame_path,
+                    timestamp=timestamp,
+                    faces_detected=0,
+                    face_embeddings=[],
+                    face_bounding_boxes=[],
+                    face_confidence_scores=[],
+                    processing_time=0.0
+                )
+            # Detect faces
+            faces, boxes, probs = self.detector.detect_faces(frame)
+            # Generate embeddings and process faces
+            face_embeddings = []
+            detected_face_ids = []
+            matched_persons = []
+            for i, (face, box, prob) in enumerate(zip(faces, boxes, probs)):
+                # Generate embedding
+                embedding = self.embedder.generate_embedding(face)
+                face_embeddings.append(embedding)
+                # Try person identification using trained classifier
+                person_name, person_confidence = None, 0.0
+                if self.person_classifier and self.person_classifier.enabled:
+                    person_name, person_confidence = self.person_classifier.identify_person(embedding)
+                # Search for similar faces in FAISS index
+                matches = self.face_index.search(embedding, k=1, threshold=self.similarity_threshold)
+                if matches:
+                    # Found matching face
+                    matched_face_id, similarity = matches[0]
+                    detected_face_ids.append(matched_face_id)
+                    if person_name:
+                        matched_persons.append(f"{person_name} (confidence: {person_confidence:.2f})")
+                        logger.info(f"👤 Known person identified: {person_name} (confidence: {person_confidence:.2f}, face similarity: {similarity:.3f})")
+                    else:
+                        matched_persons.append(f"person_{matched_face_id}")
+                        logger.info(f"👤 Face match found: {matched_face_id} (similarity: {similarity:.3f})")
+                    self.detection_stats['face_matches_found'] += 1
+                else:
+                    # New face - save to index
+                    frame_number = int(timestamp * 30)  # Estimate frame number
+                    new_face_id = self._generate_face_id(frame_number, i, person_name, event_id=f"obj_detection_{int(timestamp)}")
+                    # Add to FAISS index
+                    self.face_index.add_embedding(new_face_id, embedding)
+                    # Save face image
+                    face_path = self._save_face_image(face, new_face_id)
+                    # Save metadata to MongoDB if available
+                    if self.mongodb_storage and self.mongodb_storage.enabled:
+                        face_metadata = {
+                            'face_id': new_face_id,
+                            'frame_path': frame_path,
+                            'timestamp': timestamp,
+                            'confidence': float(prob),
+                            'person_name': person_name,
+                            'person_confidence': float(person_confidence) if person_name else None,
+                            'bounding_box': [int(x) for x in box],
+                            'face_image_path': face_path
+                        }
+                        self.mongodb_storage.save_face(face_metadata)
+                    detected_face_ids.append(new_face_id)
+                    if person_name:
+                        matched_persons.append(f"{person_name} (NEW, confidence: {person_confidence:.2f})")
+                        logger.info(f"👤 NEW known person detected: {person_name} (confidence: {person_confidence:.2f})")
+                    else:
+                        matched_persons.append(f"new_unknown_person_{new_face_id}")
+                        logger.info(f"👤 NEW unknown face detected: {new_face_id}")
+                    self.detection_stats['new_faces_added'] += 1
+            # Save face index
+            self.face_index.save()
+            processing_time = time.time() - start_time
+            self.detection_stats['frames_processed'] += 1
+            self.detection_stats['faces_detected'] += len(faces)
+            # Convert boxes to expected format
+            face_bounding_boxes = [(int(box[0]), int(box[1]), int(box[2]), int(box[3])) for box in boxes]
+            result = FaceDetectionResult(
+                frame_path=frame_path,
+                timestamp=timestamp,
+                faces_detected=len(faces),
+                face_embeddings=face_embeddings,
+                face_bounding_boxes=face_bounding_boxes,
+                face_confidence_scores=probs,
+                processing_time=processing_time,
+                detected_face_ids=detected_face_ids,
+                matched_persons=matched_persons
+            )
+            if faces:
+                logger.info(f"👤 Processed {len(faces)} faces in suspicious frame at {timestamp:.2f}s")
+            return result
+        except Exception as e:
+            logger.error(f"[FacialRecognition] Error processing frame {frame_path}: {e}")
+            return FaceDetectionResult(
+                frame_path=frame_path,
+                timestamp=timestamp,
+                faces_detected=0,
+                face_embeddings=[],
+                face_bounding_boxes=[],
+                face_confidence_scores=[],
+                processing_time=time.time() - start_time
+            )
+    def track_suspicious_persons(self, face_results: List[FaceDetectionResult],
+                               detectifai_events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Track suspicious persons and detect re-occurrences."""
+        if not self.enabled or not face_results:
+            logger.info("👤 Facial recognition disabled or no face results - skipping person tracking")
+            return []
+        logger.info(f"👤 Tracking suspicious persons across {len(face_results)} face detection results")
+        reoccurrence_events = []
+        person_timeline = {}  # face_id -> list of timestamps
+        # Build person timeline from face results
+        for face_result in face_results:
+            if face_result.detected_face_ids:
+                for face_id in face_result.detected_face_ids:
+                    if face_id not in person_timeline:
+                        person_timeline[face_id] = []
+                    person_timeline[face_id].append(face_result.timestamp)
+        # Look for re-occurrences (same person appearing multiple times)
+        for face_id, timestamps in person_timeline.items():
+            if len(timestamps) > 1:
+                # Create re-occurrence event
+                timestamps.sort()
+                reoccurrence_event = {
+                    'event_id': f"reoccurrence_{face_id}_{int(timestamps[-1])}",
+                    'start_timestamp': timestamps[0],
+                    'end_timestamp': timestamps[-1],
+                    'event_type': 'suspicious_person_reoccurrence',
+                    'confidence': 0.85,
+                    'max_confidence': 0.85,
+                    'keyframes': [r.frame_path for r in face_results if face_id in (r.detected_face_ids or [])],
+                    'importance_score': 4.0,
+                    'description': f"Suspicious person {face_id} appeared {len(timestamps)} times",
+                    'detection_details': {
+                        'person_id': face_id,
+                        'appearances': len(timestamps),
+                        'time_span': timestamps[-1] - timestamps[0],
+                        'timestamps': timestamps
+                    }
+                }
+                reoccurrence_events.append(reoccurrence_event)
+                self.detection_stats['reoccurrences_detected'] += 1
+        # Save face index
+        if self.face_index:
+            self.face_index.save()
+        # Update statistics
+        self.detection_stats['suspicious_persons_tracked'] = len(person_timeline)
+        logger.info(f"👤 Person tracking complete: {len(person_timeline)} unique persons, {len(reoccurrence_events)} re-occurrences")
+        return reoccurrence_events
+    def search_person_by_image(self, image_path: str, k: int = 10, threshold: float = 0.6) -> List[Dict[str, Any]]:
+        """
+        Search for a person by uploading their image.
+        Args:
+            image_path: Path to the uploaded image
+            k: Number of top matches to return
+            threshold: Similarity threshold for matches
+        Returns:
+            List of matched persons with their occurrences
+        """
+        if not self.enabled:
+            logger.warning("[FacialRecognition] System not enabled")
+            return []
+        try:
+            # Load the uploaded image
+            frame = cv2.imread(image_path)
+            if frame is None:
+                logger.error(f"Could not load image: {image_path}")
+                return []
+            # Detect faces in the uploaded image
+            faces, boxes, probs = self.detector.detect_faces(frame)
+            if not faces:
+                logger.info("No faces detected in uploaded image")
+                return []
+            # Use the first detected face for search
+            query_face = faces[0]
+            query_embedding = self.embedder.generate_embedding(query_face)
+            # Search for similar faces in the database
+            matches = self.face_index.search(query_embedding, k=k, threshold=threshold)
+            if not matches:
+                logger.info("No similar faces found in database")
+                return []
+            # Group matches by person/event and gather occurrence information
+            search_results = []
+            for face_id, similarity in matches:
+                # Parse face_id to extract information
+                # face_id format: face_{person}_{event}_{frame}_{face_index}_{unique_id}
+                parts = face_id.split('_')
+                if len(parts) >= 6:
+                    person_part = parts[1] if parts[1] != 'unknown' else 'Unknown Person'
+                    event_part = '_'.join(parts[2:4])  # event_obj_detection or similar
+                    # Check if we have face image saved
+                    face_image_path = str(self.faces_dir / f"{face_id}.jpg")
+                    has_face_image = os.path.exists(face_image_path)
+                    # Try to get person identification from trained classifier
+                    person_name, person_confidence = None, 0.0
+                    if self.person_classifier and self.person_classifier.enabled:
+                        person_name, person_confidence = self.person_classifier.identify_person(query_embedding)
+                    result = {
+                        'face_id': face_id,
+                        'person_name': person_name if person_name else person_part.replace('_', ' ').title(),
+                        'person_confidence': person_confidence,
+                        'similarity_score': similarity,
+                        'event_context': event_part,
+                        'face_image_path': face_image_path if has_face_image else None,
+                        'timestamp': self._extract_timestamp_from_face_id(face_id),
+                        'detection_context': 'Suspicious Activity Detection'
+                    }
+                    search_results.append(result)
+                else:
+                    # Fallback for differently formatted face_ids
+                    person_name, person_confidence = None, 0.0
+                    if self.person_classifier and self.person_classifier.enabled:
+                        person_name, person_confidence = self.person_classifier.identify_person(query_embedding)
+                    result = {
+                        'face_id': face_id,
+                        'person_name': person_name if person_name else 'Unknown Person',
+                        'person_confidence': person_confidence,
+                        'similarity_score': similarity,
+                        'event_context': 'security_event',
+                        'face_image_path': str(self.faces_dir / f"{face_id}.jpg") if os.path.exists(self.faces_dir / f"{face_id}.jpg") else None,
+                        'timestamp': 0.0,
+                        'detection_context': 'Security Event'
+                    }
+                    search_results.append(result)
+            # Sort by similarity score (highest first)
+            search_results.sort(key=lambda x: x['similarity_score'], reverse=True)
+            logger.info(f"👤 Image search complete: Found {len(search_results)} matches with similarity >= {threshold}")
+            return search_results
+        except Exception as e:
+            logger.error(f"[FacialRecognition] Error in image search: {e}")
+            return []
+    def _extract_timestamp_from_face_id(self, face_id: str) -> float:
+        """Extract timestamp from face_id format"""
+        try:
+            parts = face_id.split('_')
+            if len(parts) >= 6:
+                # Try to extract from event part (e.g., event_obj_detection_123)
+                for part in parts:
+                    if part.isdigit():
+                        return float(part)
+            return 0.0
+        except:
+            return 0.0
+    def get_detection_stats(self) -> Dict[str, Any]:
+        """Get facial recognition detection statistics"""
+        stats = self.detection_stats.copy()
+        if hasattr(self, 'face_index'):
+            if self.advanced_mode:
+                stats['total_faces_in_database'] = self.face_index.index.ntotal if self.face_index.index else 0
+            else:
+                stats['total_faces_in_database'] = len(self.face_index.faces_db) if self.face_index else 0
+        return stats
+    def cleanup(self):
+        """Cleanup resources"""
+        if hasattr(self, 'face_index'):
+            self.face_index.save()
+        if hasattr(self, 'mongodb_storage') and self.mongodb_storage:
+            self.mongodb_storage.close()
+        logger.info("[FacialRecognition] Cleanup completed")
+# For backward compatibility
+FacialRecognitionPlaceholder = FacialRecognitionIntegrated

highlight_reel.py ADDED Viewed

	@@ -0,0 +1,542 @@

+"""
+Highlight Reel Generation Module
+This module creates video summaries and highlight reels using various strategies:
+- Event-aware summarization
+- Ultra-comprehensive coverage
+- Quality-focused highlights
+- Motion-based highlights
+"""
+import cv2
+import os
+import numpy as np
+from typing import List, Dict, Any, Tuple, Optional
+import json
+import logging
+from datetime import datetime
+logger = logging.getLogger(__name__)
+class HighlightReelGenerator:
+    """Generate highlight reels from processed video segments"""
+    def __init__(self, config):
+        self.config = config
+        self.highlights_dir = os.path.join(config.output_base_dir, "highlights")
+        os.makedirs(self.highlights_dir, exist_ok=True)
+    def create_event_aware_highlight_reel(self, segments: List, canonical_events: List = None) -> str:
+        """
+        Create highlight reel focusing on detected events
+        Args:
+            segments: List of video segments
+            canonical_events: List of canonical events (optional)
+        Returns:
+            Path to generated highlight reel
+        """
+        logger.info("Creating event-aware highlight reel")
+        output_path = os.path.join(self.highlights_dir, "event_aware_highlights.mp4")
+        # Detect event segments
+        event_segments = self._detect_event_segments(segments)
+        # Select keyframes with event priority
+        selected_keyframes = self._select_event_aware_keyframes(
+            segments, event_segments, canonical_events
+        )
+        # Create video
+        success = self._create_highlight_video(
+            selected_keyframes,
+            output_path,
+            "Event-Aware Highlights"
+        )
+        if success:
+            logger.info(f"Event-aware highlight reel created: {output_path}")
+            return output_path
+        else:
+            logger.error("Failed to create event-aware highlight reel")
+            return ""
+    def create_ultra_comprehensive_highlight_reel(self, segments: List) -> str:
+        """
+        Create comprehensive highlight reel capturing maximum important moments
+        Args:
+            segments: List of video segments
+        Returns:
+            Path to generated highlight reel
+        """
+        logger.info("Creating ultra-comprehensive highlight reel")
+        output_path = os.path.join(self.highlights_dir, "ultra_comprehensive_highlights.mp4")
+        # Use ultra-sensitive selection
+        selected_keyframes = self._select_ultra_comprehensive_keyframes(segments)
+        # Create video
+        success = self._create_highlight_video(
+            selected_keyframes,
+            output_path,
+            "Ultra-Comprehensive Highlights"
+        )
+        if success:
+            logger.info(f"Ultra-comprehensive highlight reel created: {output_path}")
+            return output_path
+        else:
+            logger.error("Failed to create ultra-comprehensive highlight reel")
+            return ""
+    def create_quality_focused_highlight_reel(self, segments: List) -> str:
+        """
+        Create highlight reel focusing on highest quality frames
+        Args:
+            segments: List of video segments
+        Returns:
+            Path to generated highlight reel
+        """
+        logger.info("Creating quality-focused highlight reel")
+        output_path = os.path.join(self.highlights_dir, "quality_focused_highlights.mp4")
+        # Select highest quality keyframes
+        selected_keyframes = self._select_quality_focused_keyframes(segments)
+        # Create video
+        success = self._create_highlight_video(
+            selected_keyframes,
+            output_path,
+            "Quality-Focused Highlights"
+        )
+        if success:
+            logger.info(f"Quality-focused highlight reel created: {output_path}")
+            return output_path
+        else:
+            logger.error("Failed to create quality-focused highlight reel")
+            return ""
+    def _detect_event_segments(self, segments: List) -> List[int]:
+        """Detect which segments contain significant events"""
+        event_segments = []
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            if not keyframes:
+                continue
+            # Calculate segment activity metrics
+            motion_scores = [kf['frame_data']['motion_score'] for kf in keyframes]
+            burst_count = sum(1 for kf in keyframes if kf['frame_data']['burst_active'])
+            max_motion = max(motion_scores) if motion_scores else 0
+            avg_motion = sum(motion_scores) / len(motion_scores) if motion_scores else 0
+            # Event detection criteria
+            is_event_segment = (
+                max_motion > self.config.motion_threshold or
+                avg_motion > self.config.motion_threshold * 0.5 or
+                burst_count >= 1
+            )
+            if is_event_segment:
+                segment_id = segment.get('segment_id', len(event_segments))
+                event_segments.append(segment_id)
+        return event_segments
+    def _select_event_aware_keyframes(self, segments: List, event_segments: List[int],
+                                    canonical_events: List = None) -> List[Dict]:
+        """Select keyframes with event awareness"""
+        selected_keyframes = []
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            if not keyframes:
+                continue
+            segment_id = segment.get('segment_id', 0)
+            if segment_id in event_segments:
+                # Event segment: select multiple keyframes
+                scored_keyframes = []
+                for kf in keyframes:
+                    frame_data = kf['frame_data']
+                    base_score = kf['keyframe_score']
+                    motion_score = frame_data['motion_score']
+                    is_burst = frame_data['burst_active']
+                    # Event-aware scoring
+                    event_score = base_score
+                    if motion_score > self.config.motion_threshold:
+                        event_score += motion_score * 0.5
+                    if is_burst:
+                        event_score *= self.config.burst_weight
+                    scored_keyframes.append({
+                        'keyframe_data': kf,
+                        'event_score': event_score,
+                        'timestamp': frame_data['timestamp'],
+                        'is_event': True,
+                        'segment_id': segment_id
+                    })
+                # Select top keyframes from event segment
+                scored_keyframes.sort(key=lambda x: x['event_score'], reverse=True)
+                num_select = min(3, max(2, len([kf for kf in keyframes if kf['frame_data']['burst_active']])))
+                selected_keyframes.extend(scored_keyframes[:num_select])
+            else:
+                # Regular segment: select best keyframe
+                best_kf = max(keyframes, key=lambda x: x['keyframe_score'])
+                if best_kf['keyframe_score'] >= self.config.base_quality_threshold:
+                    selected_keyframes.append({
+                        'keyframe_data': best_kf,
+                        'event_score': best_kf['keyframe_score'],
+                        'timestamp': best_kf['frame_data']['timestamp'],
+                        'is_event': False,
+                        'segment_id': segment_id
+                    })
+        # Sort by timestamp and limit
+        selected_keyframes.sort(key=lambda x: x['timestamp'])
+        if len(selected_keyframes) > self.config.max_summary_frames:
+            # Prioritize by event score
+            selected_keyframes.sort(key=lambda x: x['event_score'], reverse=True)
+            selected_keyframes = selected_keyframes[:self.config.max_summary_frames]
+            selected_keyframes.sort(key=lambda x: x['timestamp'])
+        return selected_keyframes
+    def _select_ultra_comprehensive_keyframes(self, segments: List) -> List[Dict]:
+        """Select keyframes with ultra-comprehensive coverage"""
+        all_important_frames = []
+        # Ultra-low thresholds for comprehensive coverage
+        ultra_motion_threshold = self.config.motion_threshold * 0.5
+        ultra_quality_threshold = self.config.base_quality_threshold * 0.8
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            segment_id = segment.get('segment_id', 0)
+            for kf in keyframes:
+                frame_data = kf['frame_data']
+                base_score = kf['keyframe_score']
+                motion_score = frame_data['motion_score']
+                is_burst = frame_data['burst_active']
+                timestamp = frame_data['timestamp']
+                # Ultra-comprehensive scoring
+                importance = base_score
+                # Any motion is important
+                if motion_score > ultra_motion_threshold:
+                    importance += motion_score * 1.0
+                elif motion_score > 0:
+                    importance += motion_score * 0.5
+                # Burst frames are critical
+                if is_burst:
+                    importance *= 3.0
+                # Quality bonus
+                if base_score > self.config.base_quality_threshold * 1.1:
+                    importance += 0.1
+                # Include frame if it meets any importance criteria
+                include_frame = (
+                    importance > 0.20 or
+                    motion_score > ultra_motion_threshold or
+                    is_burst or
+                    base_score > ultra_quality_threshold
+                )
+                if include_frame:
+                    all_important_frames.append({
+                        'keyframe_data': kf,
+                        'importance_score': importance,
+                        'motion_score': motion_score,
+                        'is_burst': is_burst,
+                        'timestamp': timestamp,
+                        'segment_id': segment_id
+                    })
+        # Sort by importance and ensure temporal diversity
+        all_important_frames.sort(key=lambda x: x['importance_score'], reverse=True)
+        selected_frames = []
+        covered_timeframes = set()
+        for frame in all_important_frames:
+            timestamp = frame['timestamp']
+            timeframe = int(timestamp // 5) * 5  # 5-second bins
+            if timeframe not in covered_timeframes or len(selected_frames) < self.config.max_summary_frames:
+                selected_frames.append({
+                    'keyframe_data': frame['keyframe_data'],
+                    'event_score': frame['importance_score'],
+                    'timestamp': timestamp,
+                    'is_event': frame['is_burst'] or frame['motion_score'] > self.config.motion_threshold,
+                    'segment_id': frame['segment_id']
+                })
+                covered_timeframes.add(timeframe)
+                if len(selected_frames) >= self.config.max_summary_frames:
+                    break
+        # Sort by timestamp
+        selected_frames.sort(key=lambda x: x['timestamp'])
+        return selected_frames
+    def _select_quality_focused_keyframes(self, segments: List) -> List[Dict]:
+        """Select keyframes focusing on quality"""
+        all_quality_frames = []
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            segment_id = segment.get('segment_id', 0)
+            for kf in keyframes:
+                frame_data = kf['frame_data']
+                quality_score = frame_data['quality_score']
+                # Only include high-quality frames
+                if quality_score >= self.config.base_quality_threshold * 1.2:
+                    all_quality_frames.append({
+                        'keyframe_data': kf,
+                        'event_score': quality_score,
+                        'timestamp': frame_data['timestamp'],
+                        'is_event': False,
+                        'segment_id': segment_id
+                    })
+        # Sort by quality score and limit
+        all_quality_frames.sort(key=lambda x: x['event_score'], reverse=True)
+        # Ensure temporal diversity
+        selected_frames = []
+        last_timestamp = -float('inf')
+        min_gap = 3.0  # Minimum 3 seconds between frames
+        for frame in all_quality_frames:
+            if frame['timestamp'] - last_timestamp >= min_gap:
+                selected_frames.append(frame)
+                last_timestamp = frame['timestamp']
+                if len(selected_frames) >= self.config.max_summary_frames:
+                    break
+        # Sort by timestamp
+        selected_frames.sort(key=lambda x: x['timestamp'])
+        return selected_frames
+    def _create_highlight_video(self, selected_keyframes: List[Dict], output_path: str,
+                              title: str = "Highlight Reel") -> bool:
+        """Create highlight video from selected keyframes"""
+        if not selected_keyframes:
+            logger.error("No keyframes selected for highlight reel")
+            return False
+        try:
+            # Read first frame to get dimensions
+            first_frame_path = selected_keyframes[0]['keyframe_data']['frame_data']['frame_path']
+            first_image = cv2.imread(first_frame_path)
+            if first_image is None:
+                logger.error(f"Cannot read first frame: {first_frame_path}")
+                return False
+            height, width = first_image.shape[:2]
+            # Set up video writer
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            fps = self.config.summary_fps
+            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            if not out.isOpened():
+                logger.error("Cannot create video writer")
+                return False
+            # Add frames to video
+            frames_added = 0
+            logger.info(f"Creating {title} with {len(selected_keyframes)} frames")
+            for kf in selected_keyframes:
+                frame_path = kf['keyframe_data']['frame_data']['frame_path']
+                if os.path.exists(frame_path):
+                    frame = cv2.imread(frame_path)
+                    if frame is not None:
+                        # Resize frame if needed
+                        if frame.shape[:2] != (height, width):
+                            frame = cv2.resize(frame, (width, height))
+                        out.write(frame)
+                        frames_added += 1
+                        # Log frame info
+                        timestamp = kf['timestamp']
+                        mins = int(timestamp // 60)
+                        secs = timestamp % 60
+                        event_type = "EVENT" if kf['is_event'] else "QUALITY"
+                        logger.debug(f"Added frame: {mins:02d}:{secs:04.1f} - {event_type}")
+                    else:
+                        logger.warning(f"Cannot read frame: {frame_path}")
+                else:
+                    logger.warning(f"Frame not found: {frame_path}")
+            out.release()
+            # Verify output
+            if frames_added > 0 and os.path.exists(output_path):
+                file_size = os.path.getsize(output_path) / (1024*1024)
+                duration = frames_added / fps
+                logger.info(f"✅ {title} created successfully!")
+                logger.info(f"📁 Path: {output_path}")
+                logger.info(f"📊 {frames_added} frames, {duration:.1f}s duration, {file_size:.1f} MB")
+                return True
+            else:
+                logger.error("Failed to create video file")
+                return False
+        except Exception as e:
+            logger.error(f"Error creating highlight video: {e}")
+            return False
+    def create_custom_highlight_reel(self, segments: List, selection_criteria: Dict[str, Any]) -> str:
+        """
+        Create custom highlight reel based on specific criteria
+        Args:
+            segments: List of video segments
+            selection_criteria: Custom criteria for frame selection
+        Returns:
+            Path to generated highlight reel
+        """
+        logger.info(f"Creating custom highlight reel with criteria: {selection_criteria}")
+        output_path = os.path.join(self.highlights_dir, "custom_highlights.mp4")
+        # Apply custom selection
+        selected_keyframes = self._apply_custom_selection(segments, selection_criteria)
+        # Create video
+        success = self._create_highlight_video(
+            selected_keyframes,
+            output_path,
+            "Custom Highlights"
+        )
+        if success:
+            logger.info(f"Custom highlight reel created: {output_path}")
+            return output_path
+        else:
+            logger.error("Failed to create custom highlight reel")
+            return ""
+    def _apply_custom_selection(self, segments: List, criteria: Dict[str, Any]) -> List[Dict]:
+        """Apply custom selection criteria"""
+        selected_keyframes = []
+        # Extract criteria
+        min_motion = criteria.get('min_motion_score', 0.0)
+        min_quality = criteria.get('min_quality_score', self.config.base_quality_threshold)
+        require_burst = criteria.get('require_burst', False)
+        max_frames = criteria.get('max_frames', self.config.max_summary_frames)
+        time_range = criteria.get('time_range', None)  # (start, end) tuple
+        for segment in segments:
+            keyframes = segment.get('keyframes', [])
+            for kf in keyframes:
+                frame_data = kf['frame_data']
+                timestamp = frame_data['timestamp']
+                motion_score = frame_data['motion_score']
+                quality_score = frame_data['quality_score']
+                is_burst = frame_data['burst_active']
+                # Apply criteria
+                meets_criteria = True
+                if motion_score < min_motion:
+                    meets_criteria = False
+                if quality_score < min_quality:
+                    meets_criteria = False
+                if require_burst and not is_burst:
+                    meets_criteria = False
+                if time_range:
+                    start_time, end_time = time_range
+                    if not (start_time <= timestamp <= end_time):
+                        meets_criteria = False
+                if meets_criteria:
+                    selected_keyframes.append({
+                        'keyframe_data': kf,
+                        'event_score': kf['keyframe_score'],
+                        'timestamp': timestamp,
+                        'is_event': is_burst or motion_score > self.config.motion_threshold,
+                        'segment_id': segment.get('segment_id', 0)
+                    })
+        # Sort and limit
+        selected_keyframes.sort(key=lambda x: x['event_score'], reverse=True)
+        selected_keyframes = selected_keyframes[:max_frames]
+        selected_keyframes.sort(key=lambda x: x['timestamp'])
+        return selected_keyframes
+    def generate_highlight_reel_metadata(self, selected_keyframes: List[Dict],
+                                       output_path: str) -> bool:
+        """Generate metadata file for highlight reel"""
+        try:
+            metadata = {
+                'generation_info': {
+                    'timestamp': datetime.now().isoformat(),
+                    'total_frames': len(selected_keyframes),
+                    'selection_config': {
+                        'max_summary_frames': self.config.max_summary_frames,
+                        'summary_fps': self.config.summary_fps,
+                        'motion_threshold': self.config.motion_threshold,
+                        'quality_threshold': self.config.base_quality_threshold
+                    }
+                },
+                'frame_details': []
+            }
+            for i, kf in enumerate(selected_keyframes):
+                frame_detail = {
+                    'sequence_number': i + 1,
+                    'timestamp': kf['timestamp'],
+                    'is_event_frame': kf['is_event'],
+                    'segment_id': kf['segment_id'],
+                    'event_score': kf['event_score'],
+                    'frame_path': kf['keyframe_data']['frame_data']['frame_path']
+                }
+                metadata['frame_details'].append(frame_detail)
+            with open(output_path, 'w') as f:
+                json.dump(metadata, f, indent=2)
+            logger.info(f"Highlight reel metadata saved: {output_path}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to save highlight reel metadata: {e}")
+            return False

json_reports.py ADDED Viewed

	@@ -0,0 +1,575 @@

+"""
+JSON Reports Generation Module
+This module handles:
+- Processing results JSON reports
+- Canonical events JSON
+- Segment analysis reports
+- Performance statistics
+- HTML gallery generation
+"""
+import json
+import os
+import cv2
+import base64
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+import logging
+logger = logging.getLogger(__name__)
+class ReportGenerator:
+    """Generate comprehensive JSON reports and HTML galleries"""
+    def __init__(self, config):
+        self.config = config
+        self.reports_dir = os.path.join(config.output_base_dir, "reports")
+        os.makedirs(self.reports_dir, exist_ok=True)
+    def generate_processing_results_report(self,
+                                         keyframes: List,
+                                         events: List,
+                                         canonical_events: List,
+                                         segments: List,
+                                         processing_stats: Dict[str, Any]) -> str:
+        """Generate comprehensive processing results report"""
+        logger.info("Generating processing results report")
+        report = {
+            'metadata': {
+                'generation_timestamp': datetime.now().isoformat(),
+                'report_version': '1.0',
+                'processing_config': self._get_config_summary()
+            },
+            'summary': {
+                'total_keyframes_extracted': len(keyframes),
+                'total_events_detected': len(events),
+                'canonical_events_created': len(canonical_events),
+                'video_segments_created': len(segments),
+                'processing_duration': processing_stats.get('total_processing_time', 0)
+            },
+            'keyframe_analysis': self._analyze_keyframes(keyframes),
+            'event_analysis': self._analyze_events(events),
+            'canonical_event_analysis': self._analyze_canonical_events(canonical_events),
+            'segment_analysis': self._analyze_segments(segments),
+            'performance_statistics': processing_stats,
+            'quality_metrics': self._calculate_quality_metrics(keyframes, events)
+        }
+        # Save report
+        output_path = os.path.join(self.reports_dir, "processing_results.json")
+        try:
+            with open(output_path, 'w') as f:
+                json.dump(report, f, indent=2)
+            logger.info(f"Processing results report saved: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to save processing results report: {e}")
+            return ""
+    def generate_canonical_events_report(self, canonical_events: List) -> str:
+        """Generate canonical events JSON report"""
+        logger.info("Generating canonical events report")
+        report = {
+            'metadata': {
+                'generation_timestamp': datetime.now().isoformat(),
+                'total_canonical_events': len(canonical_events),
+                'deduplication_threshold': self.config.similarity_threshold
+            },
+            'canonical_events': []
+        }
+        for event in canonical_events:
+            event_data = {
+                'canonical_id': event.canonical_id,
+                'event_type': event.event_type,
+                'representative_frame': event.representative_frame,
+                'time_range': {
+                    'start_time': event.start_time,
+                    'end_time': event.end_time,
+                    'duration': event.duration
+                },
+                'confidence': event.confidence,
+                'frame_count': event.frame_count,
+                'aggregated_events': event.aggregated_events,
+                'description': event.description,
+                'similarity_cluster': event.similarity_cluster
+            }
+            report['canonical_events'].append(event_data)
+        # Save report
+        output_path = os.path.join(self.reports_dir, "canonical_events.json")
+        try:
+            with open(output_path, 'w') as f:
+                json.dump(report, f, indent=2)
+            logger.info(f"Canonical events report saved: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to save canonical events report: {e}")
+            return ""
+    def generate_segments_report(self, segments: List) -> str:
+        """Generate video segments analysis report"""
+        logger.info("Generating video segments report")
+        report = {
+            'metadata': {
+                'generation_timestamp': datetime.now().isoformat(),
+                'total_segments': len(segments),
+                'segment_duration': self.config.segment_duration,
+                'keyframes_per_segment': self.config.keyframes_per_segment
+            },
+            'summary_statistics': self._get_segments_summary(segments),
+            'segments': []
+        }
+        for segment in segments:
+            segment_data = {
+                'segment_id': segment.segment_id,
+                'time_range': {
+                    'start_timestamp': segment.start_timestamp,
+                    'end_timestamp': segment.end_timestamp,
+                    'duration': segment.duration
+                },
+                'frame_range': {
+                    'start_frame': segment.start_frame,
+                    'end_frame': segment.end_frame
+                },
+                'segment_classification': {
+                    'segment_type': segment.segment_type,
+                    'activity_level': segment.activity_level
+                },
+                'statistics': {
+                    'motion_statistics': segment.motion_statistics,
+                    'quality_statistics': segment.quality_statistics,
+                    'keyframe_count': len(segment.keyframes)
+                },
+                'keyframes': segment.keyframes
+            }
+            report['segments'].append(segment_data)
+        # Save report
+        output_path = os.path.join(self.reports_dir, "video_segments.json")
+        try:
+            with open(output_path, 'w') as f:
+                json.dump(report, f, indent=2)
+            logger.info(f"Video segments report saved: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to save video segments report: {e}")
+            return ""
+    def generate_html_gallery(self, keyframes: List, canonical_events: List = None,
+                            segments: List = None, title: str = "Video Processing Gallery") -> str:
+        """Generate interactive HTML gallery of keyframes and events"""
+        logger.info("Generating HTML gallery")
+        html_content = self._create_html_gallery(keyframes, canonical_events, segments, title)
+        # Save HTML gallery
+        output_path = os.path.join(self.reports_dir, "canonical_gallery.html")
+        try:
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(html_content)
+            logger.info(f"HTML gallery saved: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to save HTML gallery: {e}")
+            return ""
+    def _get_config_summary(self) -> Dict[str, Any]:
+        """Get summary of configuration settings"""
+        return {
+            'base_quality_threshold': self.config.base_quality_threshold,
+            'motion_threshold': self.config.motion_threshold,
+            'event_importance_threshold': self.config.event_importance_threshold,
+            'similarity_threshold': self.config.similarity_threshold,
+            'segment_duration': self.config.segment_duration,
+            'max_summary_frames': self.config.max_summary_frames,
+            'output_resolution': self.config.output_resolution,
+            'enable_clahe': self.config.enable_clahe,
+            'enable_denoising': self.config.enable_denoising
+        }
+    def _analyze_keyframes(self, keyframes: List) -> Dict[str, Any]:
+        """Analyze keyframe extraction results"""
+        if not keyframes:
+            return {}
+        # Extract metrics
+        quality_scores = [kf.frame_data.quality_score for kf in keyframes]
+        motion_scores = [kf.frame_data.motion_score for kf in keyframes]
+        selection_reasons = [kf.selection_reason for kf in keyframes]
+        burst_frames = [kf for kf in keyframes if kf.frame_data.burst_active]
+        enhanced_frames = [kf for kf in keyframes if kf.frame_data.enhancement_applied]
+        # Count selection reasons
+        reason_counts = {}
+        for reason in selection_reasons:
+            reason_counts[reason] = reason_counts.get(reason, 0) + 1
+        # Calculate statistics
+        analysis = {
+            'total_keyframes': len(keyframes),
+            'quality_statistics': {
+                'min': float(min(quality_scores)),
+                'max': float(max(quality_scores)),
+                'mean': float(sum(quality_scores) / len(quality_scores)),
+                'std': float(np.std(quality_scores))
+            },
+            'motion_statistics': {
+                'min': float(min(motion_scores)),
+                'max': float(max(motion_scores)),
+                'mean': float(sum(motion_scores) / len(motion_scores)),
+                'std': float(np.std(motion_scores))
+            },
+            'selection_reason_distribution': reason_counts,
+            'burst_frames_count': len(burst_frames),
+            'enhanced_frames_count': len(enhanced_frames),
+            'enhancement_rate': len(enhanced_frames) / len(keyframes) * 100
+        }
+        return analysis
+    def _analyze_events(self, events: List) -> Dict[str, Any]:
+        """Analyze detected events"""
+        if not events:
+            return {}
+        # Event type distribution
+        event_types = [event.event_type for event in events]
+        type_counts = {}
+        for event_type in event_types:
+            type_counts[event_type] = type_counts.get(event_type, 0) + 1
+        # Confidence statistics
+        confidences = [event.confidence for event in events]
+        importance_scores = [event.importance_score for event in events]
+        durations = [event.end_timestamp - event.start_timestamp for event in events]
+        analysis = {
+            'total_events': len(events),
+            'event_type_distribution': type_counts,
+            'confidence_statistics': {
+                'min': float(min(confidences)),
+                'max': float(max(confidences)),
+                'mean': float(sum(confidences) / len(confidences))
+            },
+            'importance_statistics': {
+                'min': float(min(importance_scores)),
+                'max': float(max(importance_scores)),
+                'mean': float(sum(importance_scores) / len(importance_scores))
+            },
+            'duration_statistics': {
+                'min': float(min(durations)),
+                'max': float(max(durations)),
+                'mean': float(sum(durations) / len(durations))
+            }
+        }
+        return analysis
+    def _analyze_canonical_events(self, canonical_events: List) -> Dict[str, Any]:
+        """Analyze canonical events"""
+        if not canonical_events:
+            return {}
+        # Type distribution
+        event_types = [event.event_type for event in canonical_events]
+        type_counts = {}
+        for event_type in event_types:
+            type_counts[event_type] = type_counts.get(event_type, 0) + 1
+        # Statistics
+        durations = [event.duration for event in canonical_events]
+        frame_counts = [event.frame_count for event in canonical_events]
+        confidences = [event.confidence for event in canonical_events]
+        analysis = {
+            'total_canonical_events': len(canonical_events),
+            'event_type_distribution': type_counts,
+            'duration_statistics': {
+                'min': float(min(durations)),
+                'max': float(max(durations)),
+                'mean': float(sum(durations) / len(durations))
+            },
+            'frame_count_statistics': {
+                'min': int(min(frame_counts)),
+                'max': int(max(frame_counts)),
+                'mean': float(sum(frame_counts) / len(frame_counts))
+            },
+            'confidence_statistics': {
+                'min': float(min(confidences)),
+                'max': float(max(confidences)),
+                'mean': float(sum(confidences) / len(confidences))
+            }
+        }
+        return analysis
+    def _analyze_segments(self, segments: List) -> Dict[str, Any]:
+        """Analyze video segments"""
+        if not segments:
+            return {}
+        # Type and activity distribution
+        segment_types = [seg.segment_type for seg in segments]
+        activity_levels = [seg.activity_level for seg in segments]
+        type_counts = {}
+        for seg_type in segment_types:
+            type_counts[seg_type] = type_counts.get(seg_type, 0) + 1
+        activity_counts = {}
+        for activity in activity_levels:
+            activity_counts[activity] = activity_counts.get(activity, 0) + 1
+        analysis = {
+            'total_segments': len(segments),
+            'segment_type_distribution': type_counts,
+            'activity_level_distribution': activity_counts,
+            'average_segment_duration': float(sum(seg.duration for seg in segments) / len(segments)),
+            'total_keyframes': sum(len(seg.keyframes) for seg in segments)
+        }
+        return analysis
+    def _calculate_quality_metrics(self, keyframes: List, events: List) -> Dict[str, Any]:
+        """Calculate overall quality metrics"""
+        if not keyframes:
+            return {}
+        # Coverage metrics
+        total_frames_extracted = len(keyframes)
+        burst_frames = len([kf for kf in keyframes if kf.frame_data.burst_active])
+        high_quality_frames = len([kf for kf in keyframes if kf.frame_data.quality_score > self.config.base_quality_threshold * 1.2])
+        high_motion_frames = len([kf for kf in keyframes if kf.frame_data.motion_score > self.config.motion_threshold])
+        # Event coverage
+        event_coverage = len(events) / total_frames_extracted if total_frames_extracted > 0 else 0
+        metrics = {
+            'frame_extraction_efficiency': {
+                'total_frames_extracted': total_frames_extracted,
+                'burst_frame_rate': burst_frames / total_frames_extracted * 100,
+                'high_quality_frame_rate': high_quality_frames / total_frames_extracted * 100,
+                'high_motion_frame_rate': high_motion_frames / total_frames_extracted * 100
+            },
+            'event_detection_efficiency': {
+                'events_per_keyframe': event_coverage,
+                'total_events_detected': len(events)
+            },
+            'processing_quality_score': self._calculate_overall_quality_score(keyframes, events)
+        }
+        return metrics
+    def _calculate_overall_quality_score(self, keyframes: List, events: List) -> float:
+        """Calculate overall processing quality score (0-100)"""
+        if not keyframes:
+            return 0.0
+        # Component scores
+        avg_quality = sum(kf.frame_data.quality_score for kf in keyframes) / len(keyframes)
+        avg_motion = sum(kf.frame_data.motion_score for kf in keyframes) / len(keyframes)
+        burst_rate = len([kf for kf in keyframes if kf.frame_data.burst_active]) / len(keyframes)
+        event_rate = len(events) / len(keyframes) if len(keyframes) > 0 else 0
+        # Weighted combination
+        quality_score = (
+            avg_quality * 40 +       # 40% weight on frame quality
+            avg_motion * 30 +        # 30% weight on motion detection
+            burst_rate * 20 +        # 20% weight on burst detection
+            event_rate * 10          # 10% weight on event detection
+        ) * 100
+        return min(100.0, quality_score)
+    def _get_segments_summary(self, segments: List) -> Dict[str, Any]:
+        """Get summary statistics for segments"""
+        if not segments:
+            return {}
+        # Activity level distribution
+        activity_levels = [seg.activity_level for seg in segments]
+        activity_counts = {}
+        for level in activity_levels:
+            activity_counts[level] = activity_counts.get(level, 0) + 1
+        # Segment type distribution
+        segment_types = [seg.segment_type for seg in segments]
+        type_counts = {}
+        for seg_type in segment_types:
+            type_counts[seg_type] = type_counts.get(seg_type, 0) + 1
+        return {
+            'total_segments': len(segments),
+            'activity_level_distribution': activity_counts,
+            'segment_type_distribution': type_counts
+        }
+    def _create_html_gallery(self, keyframes: List, canonical_events: List = None,
+                           segments: List = None, title: str = "Video Processing Gallery") -> str:
+        """Create HTML gallery content"""
+        html_template = f"""
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{title}</title>
+    <style>
+        body {{ font-family: Arial, sans-serif; margin: 20px; background-color: #f5f5f5; }}
+        .header {{ text-align: center; margin-bottom: 30px; }}
+        .stats {{ display: flex; justify-content: space-around; margin-bottom: 30px; }}
+        .stat-card {{ background: white; padding: 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
+        .gallery {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 20px; }}
+        .frame-card {{ background: white; border-radius: 8px; overflow: hidden; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
+        .frame-image {{ width: 100%; height: 200px; object-fit: cover; }}
+        .frame-info {{ padding: 15px; }}
+        .frame-info h3 {{ margin: 0 0 10px 0; color: #333; }}
+        .frame-info p {{ margin: 5px 0; color: #666; font-size: 14px; }}
+        .event-badge {{ display: inline-block; padding: 3px 8px; border-radius: 12px; font-size: 12px; color: white; margin-right: 5px; }}
+        .burst-activity {{ background-color: #e74c3c; }}
+        .high-motion {{ background-color: #f39c12; }}
+        .high-quality {{ background-color: #27ae60; }}
+        .context-frame {{ background-color: #3498db; }}
+        .timestamp {{ font-weight: bold; color: #2c3e50; }}
+        .score {{ color: #8e44ad; font-weight: bold; }}
+    </style>
+</head>
+<body>
+    <div class="header">
+        <h1>{title}</h1>
+        <p>Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
+    </div>
+    <div class="stats">
+        <div class="stat-card">
+            <h3>Keyframes</h3>
+            <p>{len(keyframes)} extracted</p>
+        </div>
+        <div class="stat-card">
+            <h3>Events</h3>
+            <p>{len(canonical_events) if canonical_events else 0} canonical</p>
+        </div>
+        <div class="stat-card">
+            <h3>Segments</h3>
+            <p>{len(segments) if segments else 0} temporal</p>
+        </div>
+    </div>
+    <div class="gallery">
+"""
+        # Add keyframes to gallery
+        for i, kf in enumerate(keyframes[:50]):  # Limit to first 50 for performance
+            try:
+                frame_path = kf.frame_data.frame_path
+                # Convert image to base64 for embedding
+                image_data = ""
+                if os.path.exists(frame_path):
+                    try:
+                        with open(frame_path, 'rb') as img_file:
+                            image_data = base64.b64encode(img_file.read()).decode('utf-8')
+                    except Exception as e:
+                        logger.warning(f"Could not encode image {frame_path}: {e}")
+                # Format timestamp
+                timestamp = kf.frame_data.timestamp
+                mins = int(timestamp // 60)
+                secs = timestamp % 60
+                time_str = f"{mins:02d}:{secs:04.1f}"
+                # Determine badge class
+                badge_class = "context-frame"
+                if kf.frame_data.burst_active:
+                    badge_class = "burst-activity"
+                elif kf.frame_data.motion_score > self.config.motion_threshold:
+                    badge_class = "high-motion"
+                elif kf.frame_data.quality_score > self.config.base_quality_threshold * 1.2:
+                    badge_class = "high-quality"
+                html_template += f"""
+        <div class="frame-card">
+            {"<img class='frame-image' src='data:image/jpeg;base64," + image_data + "' alt='Keyframe " + str(i+1) + "'>" if image_data else "<div class='frame-image' style='background-color: #ddd; display: flex; align-items: center; justify-content: center;'>Image not available</div>"}
+            <div class="frame-info">
+                <h3>Frame {i+1}</h3>
+                <p><span class="timestamp">Time: {time_str}</span></p>
+                <p>Quality: <span class="score">{kf.frame_data.quality_score:.3f}</span></p>
+                <p>Motion: <span class="score">{kf.frame_data.motion_score:.4f}</span></p>
+                <p>Keyframe Score: <span class="score">{kf.keyframe_score:.3f}</span></p>
+                <p><span class="event-badge {badge_class}">{kf.selection_reason}</span></p>
+                {"<p>✨ Enhanced</p>" if kf.frame_data.enhancement_applied else ""}
+            </div>
+        </div>
+"""
+            except Exception as e:
+                logger.warning(f"Error processing keyframe {i}: {e}")
+        html_template += """
+    </div>
+</body>
+</html>
+"""
+        return html_template
+    def generate_captioning_report(self, captioning_results: Dict[str, Any], statistics: Dict[str, Any]) -> str:
+        """Generate video captioning results report"""
+        logger.info("Generating video captioning report")
+        report = {
+            'metadata': {
+                'generation_timestamp': datetime.now().isoformat(),
+                'report_version': '1.0'
+            },
+            'summary': {
+                'captioning_enabled': captioning_results.get('enabled', False),
+                'total_captions_generated': captioning_results.get('total_captions', 0),
+                'processing_time': captioning_results.get('processing_time', 0),
+                'errors_count': len(captioning_results.get('errors', []))
+            },
+            'statistics': statistics,
+            'captions': captioning_results.get('captions', []),
+            'errors': captioning_results.get('errors', [])
+        }
+        # Save report
+        output_path = os.path.join(self.reports_dir, "video_captioning.json")
+        try:
+            with open(output_path, 'w') as f:
+                json.dump(report, f, indent=2)
+            logger.info(f"Video captioning report saved: {output_path}")
+            return output_path
+        except Exception as e:
+            logger.error(f"Failed to save video captioning report: {e}")
+            return ""
+# Import numpy for statistics
+import numpy as np

live_stream_processor.py ADDED Viewed

	@@ -0,0 +1,866 @@

+"""
+Live Stream Processor for DetectifAI
+Processes live webcam/CCTV footage through the same pipeline as uploaded videos:
+- Object detection (fire, weapons)
+- Behavior analysis (fighting, accidents, climbing)
+- Facial recognition on suspicious frames
+- Real-time event detection
+- Storage in MongoDB and MinIO
+"""
+import cv2
+import numpy as np
+import io
+import os
+import time
+import threading
+import logging
+import uuid
+from datetime import datetime
+from typing import Optional, Dict, Any, List, Tuple
+from pathlib import Path
+from config import VideoProcessingConfig, get_security_focused_config
+from object_detection import ObjectDetector
+from behavior_analysis_integrator import BehaviorAnalysisIntegrator
+from database.config import DatabaseManager
+from database.repositories import VideoRepository, EventRepository
+from database.keyframe_repository import KeyframeRepository
+# Real-time alert engine
+try:
+    from real_time_alerts import get_alert_engine, RealTimeAlertEngine
+    ALERTS_AVAILABLE = True
+except ImportError:
+    ALERTS_AVAILABLE = False
+    logging.warning("Real-time alerts module not available")
+logger = logging.getLogger(__name__)
+class LiveStreamProcessor:
+    """Process live video streams with DetectifAI pipeline"""
+    def __init__(self, config: VideoProcessingConfig = None, camera_id: str = "webcam_01"):
+        """
+        Initialize live stream processor
+        Args:
+            config: VideoProcessingConfig object
+            camera_id: Unique identifier for the camera/stream
+        """
+        self.config = config or get_security_focused_config()
+        self.camera_id = camera_id
+        self.is_processing = False
+        self.cap = None
+        self.camera_index = 0  # Default camera index
+        self.frame_count = 0
+        self.last_keyframe_time = 0
+        self.keyframe_interval = 1.0  # Extract keyframe every 1 second
+        # Initialize database connections
+        self.db_manager = DatabaseManager()
+        self.video_repo = VideoRepository(self.db_manager)
+        self.event_repo = EventRepository(self.db_manager)
+        self.keyframe_repo = KeyframeRepository(self.db_manager)
+        # Initialize processing components
+        self.object_detector = None
+        if self.config.enable_object_detection:
+            try:
+                self.object_detector = ObjectDetector(self.config)
+                logger.info("✅ Object detection enabled for live stream")
+            except Exception as e:
+                logger.warning(f"⚠️ Object detection initialization failed: {e}")
+                self.config.enable_object_detection = False
+        self.behavior_analyzer = None
+        if getattr(self.config, 'enable_behavior_analysis', False):
+            try:
+                self.behavior_analyzer = BehaviorAnalysisIntegrator(self.config)
+                logger.info("✅ Behavior analysis enabled for live stream")
+            except Exception as e:
+                logger.warning(f"⚠️ Behavior analysis initialization failed: {e}")
+                self.config.enable_behavior_analysis = False
+        # Initialize facial recognition if enabled
+        self.face_recognizer = None
+        if getattr(self.config, 'enable_facial_recognition', False):
+            try:
+                from facial_recognition import FacialRecognitionIntegrated
+                self.face_recognizer = FacialRecognitionIntegrated(self.config)
+                logger.info("✅ Facial recognition enabled for live stream")
+            except Exception as e:
+                logger.warning(f"⚠️ Facial recognition initialization failed: {e}")
+        # Frame buffer for behavior analysis (needs 16 frames)
+        self.frame_buffer = []
+        self.buffer_size = 16
+        # Motion detection
+        self.prev_frame_gray = None
+        self.motion_threshold = 25
+        # Real-time alert engine
+        self.alert_engine = None
+        if ALERTS_AVAILABLE:
+            try:
+                self.alert_engine = get_alert_engine()
+                self.alert_engine.load_flagged_persons()
+                logger.info("✅ Real-time alert engine connected for live stream")
+            except Exception as e:
+                logger.warning(f"⚠️ Alert engine initialization failed: {e}")
+        # Statistics
+        self.stats = {
+            'frames_processed': 0,
+            'keyframes_extracted': 0,
+            'objects_detected': 0,
+            'behaviors_detected': 0,
+            'events_created': 0,
+            'alerts_generated': 0,
+            'start_time': None
+        }
+        logger.info(f"✅ Live stream processor initialized for camera: {camera_id}")
+    def preprocess_frame(self, frame: np.ndarray) -> Optional[np.ndarray]:
+        """
+        Preprocess frame: resize, enhance, check quality
+        Args:
+            frame: Input frame from camera
+        Returns:
+            Preprocessed frame or None if frame is too blurry
+        """
+        if frame is None:
+            return None
+        # Resize to standard size for processing
+        target_size = (640, 640)
+        processed = cv2.resize(frame, target_size)
+        # Check for blur using Laplacian variance
+        gray = cv2.cvtColor(processed, cv2.COLOR_BGR2GRAY)
+        laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
+        # Skip blurry frames
+        if laplacian_var < 100:
+            return None
+        return processed
+    def detect_motion(self, frame_gray: np.ndarray) -> Tuple[bool, float]:
+        """
+        Detect motion in frame
+        Args:
+            frame_gray: Grayscale frame
+        Returns:
+            (motion_detected, motion_score)
+        """
+        if self.prev_frame_gray is None:
+            self.prev_frame_gray = frame_gray
+            return False, 0.0
+        diff = cv2.absdiff(self.prev_frame_gray, frame_gray)
+        self.prev_frame_gray = frame_gray
+        motion_score = np.sum(diff > self.motion_threshold)
+        motion_detected = motion_score > 5000
+        return motion_detected, float(motion_score)
+    def process_frame(self, frame: np.ndarray, timestamp: float) -> Dict[str, Any]:
+        """
+        Process a single frame through the pipeline
+        Args:
+            frame: Input frame
+            timestamp: Frame timestamp in seconds
+        Returns:
+            Processing results dictionary
+        """
+        results = {
+            'timestamp': timestamp,
+            'frame_count': self.frame_count,
+            'objects_detected': [],
+            'behaviors_detected': [],
+            'motion_detected': False,
+            'motion_score': 0.0,
+            'events': []
+        }
+        # Preprocess frame
+        processed_frame = self.preprocess_frame(frame)
+        if processed_frame is None:
+            return results
+        # Detect motion
+        gray = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2GRAY)
+        motion_detected, motion_score = self.detect_motion(gray)
+        results['motion_detected'] = motion_detected
+        results['motion_score'] = motion_score
+        # Add to frame buffer for behavior analysis
+        self.frame_buffer.append(processed_frame.copy())
+        if len(self.frame_buffer) > self.buffer_size:
+            self.frame_buffer.pop(0)
+        # Object detection (run on every frame with motion, or periodically)
+        # For real-time display, we want detections to show immediately
+        should_run_detection = motion_detected or (self.frame_count % 30 == 0)  # Every 30 frames or on motion
+        if self.object_detector and should_run_detection:
+            try:
+                # Create a temporary keyframe-like object
+                from core.video_processing import KeyframeResult, FrameData
+                frame_data = FrameData(
+                    frame_path=None,  # Live frame, no file path
+                    timestamp=timestamp,
+                    frame_index=self.frame_count
+                )
+                keyframe = KeyframeResult(
+                    frame_data=frame_data,
+                    quality_score=0.8,
+                    is_keyframe=True
+                )
+                # Store frame temporarily for detection
+                import tempfile
+                temp_dir = tempfile.gettempdir()
+                temp_frame_path = os.path.join(temp_dir, f"live_frame_{self.camera_id}_{self.frame_count}.jpg")
+                cv2.imwrite(temp_frame_path, processed_frame)
+                keyframe.frame_data.frame_path = temp_frame_path
+                # Run object detection
+                detection_result = self.object_detector.detect_objects_in_keyframes([keyframe])
+                if detection_result and len(detection_result) > 0:
+                    detections = detection_result[0]
+                    if hasattr(detections, 'total_detections') and detections.total_detections > 0:
+                        results['objects_detected'] = [
+                            {
+                                'class': det.class_name,
+                                'confidence': float(det.confidence),
+                                'bbox': det.bbox
+                            }
+                            for det in detections.detections
+                        ]
+                        self.stats['objects_detected'] += len(results['objects_detected'])
+                        # Log detections in real-time
+                        obj_classes = [obj['class'] for obj in results['objects_detected']]
+                        logger.info(f"🎯 REAL-TIME DETECTION: {len(results['objects_detected'])} object(s) detected: {', '.join(obj_classes)} (frame {self.frame_count})")
+                        # Generate real-time alerts for each detection
+                        if self.alert_engine:
+                            for det in results['objects_detected']:
+                                alert = self.alert_engine.process_detection(
+                                    camera_id=self.camera_id,
+                                    detection_class=det['class'],
+                                    confidence=det['confidence'],
+                                    bounding_boxes=[det],
+                                    frame=processed_frame,
+                                    timestamp=timestamp,
+                                    video_id=f"live_{self.camera_id}",
+                                )
+                                if alert:
+                                    self.stats['alerts_generated'] = self.stats.get('alerts_generated', 0) + 1
+                # Clean up temp file
+                try:
+                    os.remove(temp_frame_path)
+                except:
+                    pass
+            except Exception as e:
+                logger.warning(f"Error in object detection: {e}")
+        # Behavior analysis (on frame buffer) - use frame buffer method for live streams
+        if self.behavior_analyzer and len(self.frame_buffer) >= 16 and motion_detected:
+            try:
+                # Use frame buffer method for live streams (no video file needed)
+                behavior_results = self.behavior_analyzer.detect_behavior_in_segment_from_buffer(
+                    frame_buffer=self.frame_buffer,
+                    start_time=timestamp - (len(self.frame_buffer) / 30.0),  # Approximate start time
+                    end_time=timestamp,
+                    frame_indices=list(range(max(0, self.frame_count - len(self.frame_buffer) + 1), self.frame_count + 1))
+                )
+                if behavior_results:
+                    results['behaviors_detected'] = [
+                        {
+                            'behavior_type': r.behavior_detected,  # Use behavior_type for consistency
+                            'behavior': r.behavior_detected,  # Keep both for compatibility
+                            'confidence': float(r.confidence),
+                            'model': r.model_used
+                        }
+                        for r in behavior_results
+                    ]
+                    self.stats['behaviors_detected'] += len(results['behaviors_detected'])
+                    # Log behaviors in real-time
+                    behavior_types = [b['behavior_type'] for b in results['behaviors_detected']]
+                    logger.info(f"🎭 REAL-TIME BEHAVIOR: {len(results['behaviors_detected'])} behavior(s) detected: {', '.join(behavior_types)} (frame {self.frame_count})")
+                    # Generate real-time alerts for each behavior
+                    if self.alert_engine:
+                        for beh in results['behaviors_detected']:
+                            alert = self.alert_engine.process_detection(
+                                camera_id=self.camera_id,
+                                detection_class=beh['behavior_type'],
+                                confidence=beh['confidence'],
+                                frame=processed_frame,
+                                timestamp=timestamp,
+                                video_id=f"live_{self.camera_id}",
+                            )
+                            if alert:
+                                self.stats['alerts_generated'] = self.stats.get('alerts_generated', 0) + 1
+            except Exception as e:
+                logger.warning(f"Error in behavior analysis: {e}")
+        # Facial recognition on suspicious frames
+        if self.face_recognizer and (results['objects_detected'] or results['behaviors_detected']):
+            try:
+                # Process frame for facial recognition
+                face_results = self.face_recognizer.detect_faces_in_frame(
+                    processed_frame,
+                    frame_number=self.frame_count,
+                    timestamp=timestamp,
+                    event_id=f"live_{self.camera_id}_{int(timestamp)}"
+                )
+                if face_results:
+                    results['faces_detected'] = len(face_results)
+                    # Check for suspicious person re-appearance
+                    if self.alert_engine:
+                        for face in face_results:
+                            face_id = face.get('face_id') if isinstance(face, dict) else getattr(face, 'face_id', None)
+                            match_score = face.get('confidence', 0.0) if isinstance(face, dict) else getattr(face, 'confidence_score', 0.0)
+                            if face_id and match_score:
+                                alert = self.alert_engine.process_suspicious_person(
+                                    camera_id=self.camera_id,
+                                    face_id=str(face_id),
+                                    face_match_score=float(match_score),
+                                    frame=processed_frame,
+                                    timestamp=timestamp,
+                                )
+                                if alert:
+                                    self.stats['alerts_generated'] = self.stats.get('alerts_generated', 0) + 1
+            except Exception as e:
+                logger.warning(f"Error in facial recognition: {e}")
+        return results
+    def save_keyframe(self, frame: np.ndarray, results: Dict[str, Any], timestamp: float) -> Optional[str]:
+        """
+        Save keyframe to MinIO and MongoDB (matches uploaded video pipeline)
+        Args:
+            frame: Frame to save
+            results: Processing results
+            timestamp: Frame timestamp
+        Returns:
+            MinIO object path or None
+        """
+        try:
+            # Encode frame as JPEG (same as uploaded video pipeline)
+            is_success, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
+            if not is_success:
+                logger.warning(f"⚠️ Failed to encode frame {self.frame_count} as JPEG")
+                return None
+            frame_bytes = buffer.tobytes()
+            frame_size = len(frame_bytes)
+            # Generate object name (consistent with uploaded video pipeline)
+            timestamp_str = datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")
+            object_name = f"live/{self.camera_id}/{timestamp_str}.jpg"
+            # Upload to MinIO (same method as uploaded video pipeline)
+            minio_client = self.keyframe_repo.minio  # Use minio client from keyframe repository
+            bucket = self.keyframe_repo.bucket  # Use bucket from keyframe repository
+            logger.info(f"📤 Uploading keyframe to MinIO: {bucket}/{object_name} ({frame_size} bytes)")
+            # Use BytesIO for in-memory upload (same as uploaded video pipeline)
+            from io import BytesIO
+            frame_buffer = BytesIO(frame_bytes)
+            # Add metadata like uploaded video pipeline
+            metadata = {
+                "frame_index": str(self.frame_count),
+                "timestamp": str(timestamp),
+                "camera_id": self.camera_id,
+                "motion_detected": str(results.get('motion_detected', False)),
+                "motion_score": str(results.get('motion_score', 0.0))
+            }
+            minio_client.put_object(
+                bucket,
+                object_name,
+                frame_buffer,
+                length=frame_size,
+                content_type="image/jpeg",
+                metadata=metadata
+            )
+            logger.info(f"✅ Uploaded keyframe to MinIO: {bucket}/{object_name}")
+            # Save to MongoDB (same as uploaded video pipeline)
+            keyframe_doc = {
+                "camera_id": self.camera_id,
+                "video_id": f"live_{self.camera_id}",  # Use consistent video_id format
+                "timestamp": timestamp,
+                "timestamp_ms": int(timestamp * 1000),
+                "frame_index": self.frame_count,
+                "frame_number": self.frame_count,  # Also include frame_number for consistency
+                "minio_path": object_name,
+                "minio_bucket": bucket,
+                "objects_detected": results.get('objects_detected', []),
+                "behaviors_detected": results.get('behaviors_detected', []),
+                "motion_detected": results.get('motion_detected', False),
+                "motion_score": results.get('motion_score', 0.0),
+                "created_at": datetime.utcnow()
+            }
+            # Use create_keyframe method (same as uploaded video pipeline)
+            keyframe_id = self.keyframe_repo.create_keyframe(keyframe_doc)
+            if keyframe_id:
+                logger.info(f"✅ Saved keyframe metadata to MongoDB: {object_name} (ID: {keyframe_id})")
+            else:
+                logger.warning(f"⚠️ Failed to save keyframe metadata to MongoDB: {object_name}")
+            self.stats['keyframes_extracted'] += 1
+            # Return full path for URL generation
+            return f"{bucket}/{object_name}"
+        except Exception as e:
+            logger.error(f"❌ Error saving keyframe: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return None
+    def create_event(self, results: Dict[str, Any], start_time: float, end_time: float) -> Optional[str]:
+        """
+        Create event from processing results (matches uploaded video pipeline)
+        Args:
+            results: Processing results
+            start_time: Event start time
+            end_time: Event end time
+        Returns:
+            Event ID or None
+        """
+        try:
+            # Determine event type based on detections (same logic as uploaded video pipeline)
+            event_type = "motion"
+            if results.get('objects_detected'):
+                # Get the primary object class for event type
+                primary_object = results['objects_detected'][0].get('class', 'object')
+                event_type = f"object_detection_{primary_object}"
+            elif results.get('behaviors_detected'):
+                primary_behavior = results['behaviors_detected'][0].get('behavior_type', 'behavior')
+                event_type = f"behavior_detection_{primary_behavior}"
+            # Calculate confidence from detections (same as uploaded video pipeline)
+            confidences = []
+            if results.get('objects_detected'):
+                confidences.extend([float(r.get('confidence', 0.0)) for r in results['objects_detected']])
+            if results.get('behaviors_detected'):
+                confidences.extend([float(r.get('confidence', 0.0)) for r in results['behaviors_detected']])
+            max_confidence = max(confidences) if confidences else 0.0
+            # Build bounding boxes structure (same format as uploaded video pipeline)
+            bounding_boxes = {}
+            if results.get('objects_detected'):
+                bounding_boxes["detections"] = [
+                    {
+                        "class": det.get('class', 'unknown'),
+                        "confidence": float(det.get('confidence', 0.0)),
+                        "bbox": [float(x) for x in det.get('bbox', [0, 0, 0, 0])],
+                        "timestamp": float(start_time),
+                        "model": det.get('detection_model', 'fire' if det.get('class') == 'fire' else 'weapon')
+                    }
+                    for det in results['objects_detected']
+                ]
+            # Create event document (matches uploaded video pipeline schema)
+            event_doc = {
+                "event_id": f"live_{self.camera_id}_{int(start_time)}_{uuid.uuid4().hex[:8]}",
+                "camera_id": self.camera_id,
+                "video_id": f"live_{self.camera_id}",  # Use camera_id as video_id for live streams
+                "event_type": event_type,
+                "start_timestamp": start_time,
+                "end_timestamp": end_time,
+                "start_timestamp_ms": int(start_time * 1000),
+                "end_timestamp_ms": int(end_time * 1000),
+                "confidence": max_confidence,
+                "confidence_score": max_confidence,  # Also include confidence_score for schema compliance
+                "description": f"Live stream event: {event_type} detected",
+                "bounding_boxes": bounding_boxes,
+                "metadata": {
+                    "camera_id": self.camera_id,
+                    "objects_detected": results.get('objects_detected', []),
+                    "behaviors_detected": results.get('behaviors_detected', []),
+                    "motion_score": results.get('motion_score', 0.0),
+                    "source": "live_stream"
+                }
+            }
+            logger.info(f"📝 Creating event: {event_type} (confidence: {max_confidence:.2f})")
+            event_id = self.event_repo.create_event(event_doc)
+            if event_id:
+                logger.info(f"✅ Created event in MongoDB: {event_doc['event_id']} (MongoDB ID: {event_id})")
+                self.stats['events_created'] += 1
+            else:
+                logger.warning(f"⚠️ Failed to create event in MongoDB: {event_doc['event_id']}")
+            return event_id
+        except Exception as e:
+            logger.error(f"❌ Error creating event: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+            return None
+    def generate_frames(self, camera_index: int = 0):
+        """
+        Generator function for video frames with processing
+        Args:
+            camera_index: Camera device index (0 for default webcam)
+        Yields:
+            Processed frame bytes for streaming
+        """
+        # Release any existing camera connection
+        if self.cap is not None:
+            try:
+                self.cap.release()
+            except:
+                pass
+        # Try to open camera with retries
+        max_retries = 3
+        self.cap = None
+        for attempt in range(max_retries):
+            try:
+                logger.info(f"Attempting to open camera {camera_index} (attempt {attempt + 1}/{max_retries})")
+                self.cap = cv2.VideoCapture(camera_index)
+                # Give camera time to initialize
+                time.sleep(0.5)
+                if self.cap.isOpened():
+                    # Test if we can actually read a frame
+                    ret, test_frame = self.cap.read()
+                    if ret and test_frame is not None:
+                        logger.info(f"✅ Successfully opened camera {camera_index}")
+                        break
+                    else:
+                        logger.warning(f"Camera {camera_index} opened but cannot read frames")
+                        self.cap.release()
+                        self.cap = None
+                else:
+                    logger.warning(f"Camera {camera_index} failed to open")
+                    if self.cap:
+                        self.cap.release()
+                        self.cap = None
+            except Exception as e:
+                logger.error(f"Error opening camera {camera_index}: {e}")
+                if self.cap:
+                    try:
+                        self.cap.release()
+                    except:
+                        pass
+                    self.cap = None
+        if self.cap is None or not self.cap.isOpened():
+            error_msg = f"❌ Could not open camera {camera_index} after {max_retries} attempts"
+            logger.error(error_msg)
+            # Yield an error frame
+            error_frame = self._create_error_frame(error_msg)
+            ret, buffer = cv2.imencode('.jpg', error_frame)
+            if ret:
+                yield (b'--frame\r\n'
+                       b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')
+            return
+        # Set camera properties
+        try:
+            self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
+            self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
+            self.cap.set(cv2.CAP_PROP_FPS, 30)
+            # Set buffer size to reduce latency
+            self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+        except Exception as e:
+            logger.warning(f"Could not set camera properties: {e}")
+        self.is_processing = True
+        self.stats['start_time'] = time.time()
+        self.frame_count = 0
+        self.last_keyframe_time = time.time()
+        logger.info(f"🎥 Started live stream processing for camera {camera_index}")
+        logger.info(f"📊 Camera properties: {self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)}x{self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)} @ {self.cap.get(cv2.CAP_PROP_FPS)} FPS")
+        logger.info(f"🔄 Entering frame generation loop...")
+        current_event_start = None
+        event_results = None
+        try:
+            consecutive_failures = 0
+            max_failures = 10
+            while self.is_processing:
+                ret, frame = self.cap.read()
+                if not ret or frame is None:
+                    consecutive_failures += 1
+                    if consecutive_failures >= max_failures:
+                        logger.error(f"❌ Failed to read {max_failures} consecutive frames from camera")
+                        break
+                    logger.warning(f"⚠️ Failed to read frame from camera (failure {consecutive_failures}/{max_failures})")
+                    time.sleep(0.1)  # Brief pause before retry
+                    continue
+                consecutive_failures = 0  # Reset on success
+                self.frame_count += 1
+                self.stats['frames_processed'] += 1
+                if self.frame_count == 1:
+                    logger.info(f"✅ Successfully read first frame! Frame shape: {frame.shape}")
+                current_time = time.time()
+                timestamp = current_time - self.stats['start_time']
+                # Process frame
+                results = self.process_frame(frame, timestamp)
+                # Extract keyframe periodically or on significant events
+                should_extract_keyframe = (
+                    (current_time - self.last_keyframe_time >= self.keyframe_interval) or
+                    results.get('objects_detected') or
+                    results.get('behaviors_detected')
+                )
+                if should_extract_keyframe:
+                    self.save_keyframe(frame, results, timestamp)
+                    self.last_keyframe_time = current_time
+                # Track events
+                if results.get('objects_detected') or results.get('behaviors_detected'):
+                    if current_event_start is None:
+                        current_event_start = timestamp
+                        event_results = results
+                    else:
+                        # Update event results
+                        event_results['objects_detected'].extend(results.get('objects_detected', []))
+                        event_results['behaviors_detected'].extend(results.get('behaviors_detected', []))
+                else:
+                    # End event if it exists
+                    if current_event_start is not None:
+                        self.create_event(event_results, current_event_start, timestamp)
+                        current_event_start = None
+                        event_results = None
+                # Draw annotations on frame
+                annotated_frame = self.annotate_frame(frame, results)
+                # Encode frame for streaming
+                ret, buffer = cv2.imencode('.jpg', annotated_frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
+                if ret:
+                    frame_bytes = buffer.tobytes()
+                    if self.frame_count % 30 == 0:  # Log every 30 frames
+                        logger.debug(f"📹 Yielding frame {self.frame_count} ({len(frame_bytes)} bytes)")
+                    yield (b'--frame\r\n'
+                           b'Content-Type: image/jpeg\r\n\r\n' + frame_bytes + b'\r\n')
+                else:
+                    logger.warning(f"⚠️ Failed to encode frame {self.frame_count}")
+                # Small delay to control frame rate
+                time.sleep(0.033)  # ~30 FPS
+        except Exception as e:
+            logger.error(f"Error in frame generation: {e}")
+            import traceback
+            logger.error(traceback.format_exc())
+        finally:
+            self.stop()
+    def _create_error_frame(self, error_message: str) -> np.ndarray:
+        """Create an error frame to display when camera fails"""
+        frame = np.zeros((480, 640, 3), dtype=np.uint8)
+        frame.fill(20)  # Dark background
+        # Add error text
+        font = cv2.FONT_HERSHEY_SIMPLEX
+        text = "Camera Error"
+        text_size = cv2.getTextSize(text, font, 1, 2)[0]
+        text_x = (640 - text_size[0]) // 2
+        text_y = 200
+        cv2.putText(frame, text, (text_x, text_y), font, 1, (0, 0, 255), 2)
+        # Add error message (split if too long)
+        msg_lines = error_message.split(' ')
+        line = ""
+        y_offset = 250
+        for word in msg_lines:
+            test_line = line + word + " "
+            test_size = cv2.getTextSize(test_line, font, 0.6, 1)[0]
+            if test_size[0] > 600:
+                cv2.putText(frame, line, (20, y_offset), font, 0.6, (255, 255, 255), 1)
+                line = word + " "
+                y_offset += 30
+            else:
+                line = test_line
+        if line:
+            cv2.putText(frame, line, (20, y_offset), font, 0.6, (255, 255, 255), 1)
+        return frame
+    def annotate_frame(self, frame: np.ndarray, results: Dict[str, Any]) -> np.ndarray:
+        """
+        Draw annotations on frame (detections, behaviors, etc.) - matches uploaded video pipeline
+        Args:
+            frame: Input frame
+            results: Processing results
+        Returns:
+            Annotated frame
+        """
+        annotated = frame.copy()
+        # Draw object detections with color coding (same as uploaded video pipeline)
+        for obj in results.get('objects_detected', []):
+            bbox = obj.get('bbox', [0, 0, 100, 100])
+            class_name = obj.get('class', 'object')
+            confidence = float(obj.get('confidence', 0.0))
+            x1, y1, x2, y2 = map(int, bbox)
+            # Color coding based on object class (same as uploaded video pipeline)
+            color_map = {
+                'fire': (255, 255, 0),    # Cyan/Blue (BGR)
+                'knife': (0, 255, 255),   # Yellow (BGR)
+                'gun': (0, 255, 0),       # Green (BGR)
+                'smoke': (128, 128, 128)  # Gray (BGR)
+            }
+            color = color_map.get(class_name.lower(), (0, 0, 255))  # Default red
+            # Draw bounding box with thicker line for visibility
+            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 3)
+            # Draw label with background (same style as uploaded video pipeline)
+            label = f"{class_name}: {confidence:.2f}"
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            font_scale = 0.6
+            thickness = 2
+            label_size, baseline = cv2.getTextSize(label, font, font_scale, thickness)
+            # Draw label background
+            cv2.rectangle(annotated,
+                         (x1, y1 - label_size[1] - 10),
+                         (x1 + label_size[0], y1),
+                         color, -1)
+            # Draw label text
+            cv2.putText(annotated, label, (x1, y1 - 5),
+                       font, font_scale, (255, 255, 255), thickness)
+        # Draw behavior detections (same style as uploaded video pipeline)
+        behavior_y_offset = 30
+        for behavior in results.get('behaviors_detected', []):
+            behavior_type = behavior.get('behavior_type', behavior.get('behavior', 'unknown'))
+            confidence = float(behavior.get('confidence', 0.0))
+            label = f"{behavior_type.upper()}: {confidence:.2f}"
+            # Color coding for behaviors
+            behavior_colors = {
+                'fighting': (0, 0, 255),      # Red
+                'road_accident': (0, 165, 255),  # Orange
+                'wallclimb': (255, 0, 255)   # Magenta
+            }
+            behavior_color = behavior_colors.get(behavior_type.lower(), (0, 255, 0))  # Default green
+            # Draw behavior label with background
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            font_scale = 0.7
+            thickness = 2
+            label_size, baseline = cv2.getTextSize(label, font, font_scale, thickness)
+            # Background for behavior label
+            cv2.rectangle(annotated,
+                         (10, behavior_y_offset - label_size[1] - 5),
+                         (10 + label_size[0], behavior_y_offset + 5),
+                         behavior_color, -1)
+            cv2.putText(annotated, label, (10, behavior_y_offset),
+                       font, font_scale, (255, 255, 255), thickness)
+            behavior_y_offset += 35
+        # Draw motion indicator (if motion detected)
+        if results.get('motion_detected'):
+            motion_label = f"MOTION: {results.get('motion_score', 0.0):.0f}"
+            cv2.putText(annotated, motion_label, (10, behavior_y_offset),
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
+            behavior_y_offset += 30
+        # Draw face detection indicator
+        if results.get('faces_detected', 0) > 0:
+            face_label = f"FACES: {results['faces_detected']}"
+            cv2.putText(annotated, face_label, (10, behavior_y_offset),
+                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 192, 203), 2)
+            behavior_y_offset += 30
+        # Draw stats at bottom (same as uploaded video pipeline)
+        stats_text = f"Frame: {self.frame_count} | Objects: {len(results.get('objects_detected', []))} | Events: {self.stats['events_created']}"
+        cv2.putText(annotated, stats_text, (10, annotated.shape[0] - 10),
+                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        return annotated
+    def stop(self):
+        """Stop processing and release resources"""
+        self.is_processing = False
+        if self.cap:
+            self.cap.release()
+        logger.info("🛑 Live stream processing stopped")
+    def get_stats(self) -> Dict[str, Any]:
+        """Get processing statistics"""
+        runtime = time.time() - self.stats['start_time'] if self.stats['start_time'] else 0
+        return {
+            **self.stats,
+            'runtime_seconds': runtime,
+            'fps': self.stats['frames_processed'] / runtime if runtime > 0 else 0,
+            'is_processing': self.is_processing
+        }
+# Global processor instances (one per camera)
+_live_processors = {}
+def get_live_processor(camera_id: str = "webcam_01", config: VideoProcessingConfig = None) -> LiveStreamProcessor:
+    """Get or create a live stream processor for a camera"""
+    if camera_id not in _live_processors:
+        _live_processors[camera_id] = LiveStreamProcessor(config, camera_id)
+    return _live_processors[camera_id]
+def stop_live_processor(camera_id: str):
+    """Stop and remove a live stream processor"""
+    if camera_id in _live_processors:
+        _live_processors[camera_id].stop()
+        del _live_processors[camera_id]