DetectifAI-Backend / extract_upload_keyframes.py
blacksinisterx's picture
fix: keyframe images, video clips, evidence images, live stream webcam+URL, remove demo mode
fd50325 verified
"""
Extract keyframes from videos and upload to S3-compatible storage (Backblaze B2).
For each video that has captions but no keyframes in storage:
1. Get the frame_ids from video_captions
2. Get the video source (local file or S3)
3. Extract those exact frames using OpenCV
4. Upload to S3 at {video_id}/frame_XXXXXX.jpg
"""
import os
import sys
import io
import tempfile
import cv2
from pymongo import MongoClient
from minio import Minio
from dotenv import load_dotenv
load_dotenv()
MONGO_URI = os.getenv("MONGO_URI", "mongodb+srv://detectifai_user:DetectifAI123@cluster0.6f9uj.mongodb.net/detectifai?retryWrites=true&w=majority&appName=Cluster0")
client = MongoClient(MONGO_URI)
db = client.detectifai
minio_client = Minio(
os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com'),
access_key=os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001'),
secret_key=os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA'),
secure=os.getenv('MINIO_SECURE', 'true').lower() == 'true',
region=os.getenv('MINIO_REGION', 'eu-central-003') or None
)
KEYFRAME_BUCKET = os.getenv('MINIO_KEYFRAME_BUCKET', 'detectifai-keyframes')
VIDEO_BUCKET = os.getenv('MINIO_VIDEO_BUCKET', 'detectifai-videos')
BASE_DIR = os.getenv('BASE_DIR', r"d:\FAST\Final Year Project\sem1_finalized_malaika\sem1")
def get_video_source(video_id):
"""Return path to video file. Download from MinIO if not local."""
# Check local uploads first
local_path = os.path.join(BASE_DIR, "uploads", video_id, "video.mp4")
if os.path.isfile(local_path) and os.path.getsize(local_path) > 0:
print(f" Using local file: {local_path}")
return local_path
# Check MinIO
rec = db.video_file.find_one({"video_id": video_id}, {"minio_object_key": 1, "minio_bucket": 1})
if rec and rec.get("minio_object_key"):
bucket = rec.get("minio_bucket", VIDEO_BUCKET)
obj_key = rec["minio_object_key"]
# Verify the object actually exists before downloading
try:
minio_client.stat_object(bucket, obj_key)
except Exception:
print(f" MinIO object not found: {bucket}/{obj_key}")
return None
print(f" Downloading from MinIO: {bucket}/{obj_key}")
tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
minio_client.fget_object(bucket, obj_key, tmp_path)
print(f" Downloaded to: {tmp_path}")
return tmp_path
return None
import numpy as np
def upload_placeholder_keyframes(video_id, frame_ids):
"""Generate and upload placeholder keyframe images for videos whose source is gone."""
uploaded = 0
for frame_id in frame_ids:
# Get the caption text for this frame to display on placeholder
caption_doc = db.video_captions.find_one(
{"video_id": video_id, "frame_id": frame_id},
{"caption": 1, "_id": 0}
)
caption_text = caption_doc.get("caption", "No caption") if caption_doc else "No caption"
# Create a 640x360 dark gradient placeholder image
img = np.zeros((360, 640, 3), dtype=np.uint8)
# Dark blue gradient
for y in range(360):
val = int(30 + (y / 360) * 40)
img[y, :] = [val, int(val * 0.8), int(val * 0.5)]
# Add text
font = cv2.FONT_HERSHEY_SIMPLEX
# Video ID
cv2.putText(img, video_id, (20, 40), font, 0.5, (150, 150, 150), 1)
# Frame ID
cv2.putText(img, frame_id, (20, 70), font, 0.5, (150, 150, 150), 1)
# Camera icon placeholder
cv2.rectangle(img, (270, 130), (370, 210), (80, 80, 80), 2)
cv2.putText(img, "VIDEO", (284, 178), font, 0.6, (120, 120, 120), 1)
# Caption (wrap if long)
words = caption_text[:80].split()
line = ""
y_pos = 250
for w in words:
test = line + " " + w if line else w
if len(test) > 50:
cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
y_pos += 22
line = w
else:
line = test
if line:
cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
# Encode as JPEG
success, buffer = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 85])
if not success:
continue
minio_path = f"{video_id}/{frame_id}.jpg"
data = io.BytesIO(buffer.tobytes())
minio_client.put_object(
KEYFRAME_BUCKET, minio_path, data,
length=len(buffer.tobytes()),
content_type='image/jpeg'
)
uploaded += 1
return uploaded
def extract_and_upload_keyframes(video_id, frame_ids):
"""Extract specific frames from video and upload to MinIO."""
video_path = get_video_source(video_id)
if not video_path:
print(f" No video source found — generating placeholder keyframes")
return upload_placeholder_keyframes(video_id, frame_ids)
# Parse frame numbers from frame_ids like "frame_000060"
frame_numbers = {}
for fid in frame_ids:
try:
num = int(fid.replace("frame_", ""))
frame_numbers[num] = fid
except ValueError:
print(f" WARNING: Could not parse frame_id: {fid}")
if not frame_numbers:
print(f" No valid frame numbers to extract")
return 0
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
print(f" ERROR: Could not open video: {video_path}")
return 0
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS)
print(f" Video: {total_frames} frames, {fps:.1f} fps")
uploaded = 0
max_frame = max(frame_numbers.keys())
for frame_num in sorted(frame_numbers.keys()):
if frame_num >= total_frames:
# Use last available frame
frame_num_actual = total_frames - 1
print(f" Frame {frame_num} beyond total ({total_frames}), using frame {frame_num_actual}")
else:
frame_num_actual = frame_num
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num_actual)
ret, frame = cap.read()
if not ret:
print(f" ERROR: Could not read frame {frame_num_actual}")
continue
# Encode as JPEG
success, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
if not success:
print(f" ERROR: Could not encode frame {frame_num}")
continue
frame_id = frame_numbers[frame_num]
minio_path = f"{video_id}/{frame_id}.jpg"
# Upload to MinIO
data = io.BytesIO(buffer.tobytes())
minio_client.put_object(
KEYFRAME_BUCKET,
minio_path,
data,
length=len(buffer.tobytes()),
content_type='image/jpeg'
)
uploaded += 1
cap.release()
# Clean up temp file if downloaded from MinIO
tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
if os.path.exists(tmp_path) and video_path == tmp_path:
os.remove(tmp_path)
return uploaded
def main():
# Get all video_ids with captions
caption_vids = db.video_captions.distinct("video_id")
for video_id in caption_vids:
if video_id.startswith("test_"):
continue
# Check if keyframes already exist in MinIO
existing = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
if len(existing) > 0:
print(f"SKIP {video_id}: already has {len(existing)} keyframes in MinIO")
continue
# Get frame_ids from captions
frame_ids = db.video_captions.distinct("frame_id", {"video_id": video_id})
if not frame_ids:
print(f"SKIP {video_id}: no frame_ids in captions")
continue
print(f"\nPROCESSING {video_id}: {len(frame_ids)} frames to extract")
uploaded = extract_and_upload_keyframes(video_id, frame_ids)
print(f" Uploaded {uploaded}/{len(frame_ids)} keyframes to MinIO")
print("\n=== DONE ===")
# Final check
for video_id in caption_vids:
if video_id.startswith("test_"):
continue
objs = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
print(f" {video_id}: {len(objs)} keyframes in MinIO")
if __name__ == "__main__":
main()