Spaces:

blacksinisterx
/

DetectifAI-Backend

Running

File size: 8,885 Bytes

fd50325

"""

Extract keyframes from videos and upload to S3-compatible storage (Backblaze B2).



For each video that has captions but no keyframes in storage:

1. Get the frame_ids from video_captions

2. Get the video source (local file or S3)

3. Extract those exact frames using OpenCV

4. Upload to S3 at {video_id}/frame_XXXXXX.jpg

"""
import os
import sys
import io
import tempfile
import cv2
from pymongo import MongoClient
from minio import Minio
from dotenv import load_dotenv

load_dotenv()

MONGO_URI = os.getenv("MONGO_URI", "mongodb+srv://detectifai_user:DetectifAI123@cluster0.6f9uj.mongodb.net/detectifai?retryWrites=true&w=majority&appName=Cluster0")
client = MongoClient(MONGO_URI)
db = client.detectifai

minio_client = Minio(
    os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com'),
    access_key=os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001'),
    secret_key=os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA'),
    secure=os.getenv('MINIO_SECURE', 'true').lower() == 'true',
    region=os.getenv('MINIO_REGION', 'eu-central-003') or None
)
KEYFRAME_BUCKET = os.getenv('MINIO_KEYFRAME_BUCKET', 'detectifai-keyframes')
VIDEO_BUCKET = os.getenv('MINIO_VIDEO_BUCKET', 'detectifai-videos')

BASE_DIR = os.getenv('BASE_DIR', r"d:\FAST\Final Year Project\sem1_finalized_malaika\sem1")

def get_video_source(video_id):
    """Return path to video file. Download from MinIO if not local."""
    # Check local uploads first
    local_path = os.path.join(BASE_DIR, "uploads", video_id, "video.mp4")
    if os.path.isfile(local_path) and os.path.getsize(local_path) > 0:
        print(f"  Using local file: {local_path}")
        return local_path
    
    # Check MinIO
    rec = db.video_file.find_one({"video_id": video_id}, {"minio_object_key": 1, "minio_bucket": 1})
    if rec and rec.get("minio_object_key"):
        bucket = rec.get("minio_bucket", VIDEO_BUCKET)
        obj_key = rec["minio_object_key"]
        
        # Verify the object actually exists before downloading
        try:
            minio_client.stat_object(bucket, obj_key)
        except Exception:
            print(f"  MinIO object not found: {bucket}/{obj_key}")
            return None
        
        print(f"  Downloading from MinIO: {bucket}/{obj_key}")
        tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
        minio_client.fget_object(bucket, obj_key, tmp_path)
        print(f"  Downloaded to: {tmp_path}")
        return tmp_path
    
    return None


import numpy as np


def upload_placeholder_keyframes(video_id, frame_ids):
    """Generate and upload placeholder keyframe images for videos whose source is gone."""
    uploaded = 0
    
    for frame_id in frame_ids:
        # Get the caption text for this frame to display on placeholder
        caption_doc = db.video_captions.find_one(
            {"video_id": video_id, "frame_id": frame_id},
            {"caption": 1, "_id": 0}
        )
        caption_text = caption_doc.get("caption", "No caption") if caption_doc else "No caption"
        
        # Create a 640x360 dark gradient placeholder image
        img = np.zeros((360, 640, 3), dtype=np.uint8)
        # Dark blue gradient
        for y in range(360):
            val = int(30 + (y / 360) * 40)
            img[y, :] = [val, int(val * 0.8), int(val * 0.5)]
        
        # Add text
        font = cv2.FONT_HERSHEY_SIMPLEX
        # Video ID
        cv2.putText(img, video_id, (20, 40), font, 0.5, (150, 150, 150), 1)
        # Frame ID
        cv2.putText(img, frame_id, (20, 70), font, 0.5, (150, 150, 150), 1)
        # Camera icon placeholder
        cv2.rectangle(img, (270, 130), (370, 210), (80, 80, 80), 2)
        cv2.putText(img, "VIDEO", (284, 178), font, 0.6, (120, 120, 120), 1)
        # Caption (wrap if long)
        words = caption_text[:80].split()
        line = ""
        y_pos = 250
        for w in words:
            test = line + " " + w if line else w
            if len(test) > 50:
                cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
                y_pos += 22
                line = w
            else:
                line = test
        if line:
            cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
        
        # Encode as JPEG
        success, buffer = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 85])
        if not success:
            continue
        
        minio_path = f"{video_id}/{frame_id}.jpg"
        data = io.BytesIO(buffer.tobytes())
        minio_client.put_object(
            KEYFRAME_BUCKET, minio_path, data,
            length=len(buffer.tobytes()),
            content_type='image/jpeg'
        )
        uploaded += 1
    
    return uploaded


def extract_and_upload_keyframes(video_id, frame_ids):
    """Extract specific frames from video and upload to MinIO."""
    video_path = get_video_source(video_id)
    if not video_path:
        print(f"  No video source found — generating placeholder keyframes")
        return upload_placeholder_keyframes(video_id, frame_ids)
    
    # Parse frame numbers from frame_ids like "frame_000060"
    frame_numbers = {}
    for fid in frame_ids:
        try:
            num = int(fid.replace("frame_", ""))
            frame_numbers[num] = fid
        except ValueError:
            print(f"  WARNING: Could not parse frame_id: {fid}")
    
    if not frame_numbers:
        print(f"  No valid frame numbers to extract")
        return 0
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"  ERROR: Could not open video: {video_path}")
        return 0
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"  Video: {total_frames} frames, {fps:.1f} fps")
    
    uploaded = 0
    max_frame = max(frame_numbers.keys())
    
    for frame_num in sorted(frame_numbers.keys()):
        if frame_num >= total_frames:
            # Use last available frame
            frame_num_actual = total_frames - 1
            print(f"  Frame {frame_num} beyond total ({total_frames}), using frame {frame_num_actual}")
        else:
            frame_num_actual = frame_num
        
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num_actual)
        ret, frame = cap.read()
        if not ret:
            print(f"  ERROR: Could not read frame {frame_num_actual}")
            continue
        
        # Encode as JPEG
        success, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
        if not success:
            print(f"  ERROR: Could not encode frame {frame_num}")
            continue
        
        frame_id = frame_numbers[frame_num]
        minio_path = f"{video_id}/{frame_id}.jpg"
        
        # Upload to MinIO
        data = io.BytesIO(buffer.tobytes())
        minio_client.put_object(
            KEYFRAME_BUCKET,
            minio_path,
            data,
            length=len(buffer.tobytes()),
            content_type='image/jpeg'
        )
        uploaded += 1
    
    cap.release()
    
    # Clean up temp file if downloaded from MinIO
    tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
    if os.path.exists(tmp_path) and video_path == tmp_path:
        os.remove(tmp_path)
    
    return uploaded


def main():
    # Get all video_ids with captions
    caption_vids = db.video_captions.distinct("video_id")
    
    for video_id in caption_vids:
        if video_id.startswith("test_"):
            continue
        
        # Check if keyframes already exist in MinIO
        existing = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
        if len(existing) > 0:
            print(f"SKIP {video_id}: already has {len(existing)} keyframes in MinIO")
            continue
        
        # Get frame_ids from captions
        frame_ids = db.video_captions.distinct("frame_id", {"video_id": video_id})
        if not frame_ids:
            print(f"SKIP {video_id}: no frame_ids in captions")
            continue
        
        print(f"\nPROCESSING {video_id}: {len(frame_ids)} frames to extract")
        uploaded = extract_and_upload_keyframes(video_id, frame_ids)
        print(f"  Uploaded {uploaded}/{len(frame_ids)} keyframes to MinIO")
    
    print("\n=== DONE ===")
    # Final check
    for video_id in caption_vids:
        if video_id.startswith("test_"):
            continue
        objs = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
        print(f"  {video_id}: {len(objs)} keyframes in MinIO")


if __name__ == "__main__":
    main()