Spaces:

blacksinisterx
/

DetectifAI-Backend

Running

App Files Files Community

DetectifAI-Backend / extract_upload_keyframes.py

blacksinisterx

fix: keyframe images, video clips, evidence images, live stream webcam+URL, remove demo mode

fd50325 verified 2 months ago

raw

history blame contribute delete

8.89 kB

	"""
	Extract keyframes from videos and upload to S3-compatible storage (Backblaze B2).

	For each video that has captions but no keyframes in storage:
	1. Get the frame_ids from video_captions
	2. Get the video source (local file or S3)
	3. Extract those exact frames using OpenCV
	4. Upload to S3 at {video_id}/frame_XXXXXX.jpg
	"""
	import os
	import sys
	import io
	import tempfile
	import cv2
	from pymongo import MongoClient
	from minio import Minio
	from dotenv import load_dotenv

	load_dotenv()

	MONGO_URI = os.getenv("MONGO_URI", "mongodb+srv://detectifai_user:DetectifAI123@cluster0.6f9uj.mongodb.net/detectifai?retryWrites=true&w=majority&appName=Cluster0")
	client = MongoClient(MONGO_URI)
	db = client.detectifai

	minio_client = Minio(
	os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com'),
	access_key=os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001'),
	secret_key=os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA'),
	secure=os.getenv('MINIO_SECURE', 'true').lower() == 'true',
	region=os.getenv('MINIO_REGION', 'eu-central-003') or None
	)
	KEYFRAME_BUCKET = os.getenv('MINIO_KEYFRAME_BUCKET', 'detectifai-keyframes')
	VIDEO_BUCKET = os.getenv('MINIO_VIDEO_BUCKET', 'detectifai-videos')

	BASE_DIR = os.getenv('BASE_DIR', r"d:\FAST\Final Year Project\sem1_finalized_malaika\sem1")

	def get_video_source(video_id):
	"""Return path to video file. Download from MinIO if not local."""
	# Check local uploads first
	local_path = os.path.join(BASE_DIR, "uploads", video_id, "video.mp4")
	if os.path.isfile(local_path) and os.path.getsize(local_path) > 0:
	print(f" Using local file: {local_path}")
	return local_path

	# Check MinIO
	rec = db.video_file.find_one({"video_id": video_id}, {"minio_object_key": 1, "minio_bucket": 1})
	if rec and rec.get("minio_object_key"):
	bucket = rec.get("minio_bucket", VIDEO_BUCKET)
	obj_key = rec["minio_object_key"]

	# Verify the object actually exists before downloading
	try:
	minio_client.stat_object(bucket, obj_key)
	except Exception:
	print(f" MinIO object not found: {bucket}/{obj_key}")
	return None

	print(f" Downloading from MinIO: {bucket}/{obj_key}")
	tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
	minio_client.fget_object(bucket, obj_key, tmp_path)
	print(f" Downloaded to: {tmp_path}")
	return tmp_path

	return None


	import numpy as np


	def upload_placeholder_keyframes(video_id, frame_ids):
	"""Generate and upload placeholder keyframe images for videos whose source is gone."""
	uploaded = 0

	for frame_id in frame_ids:
	# Get the caption text for this frame to display on placeholder
	caption_doc = db.video_captions.find_one(
	{"video_id": video_id, "frame_id": frame_id},
	{"caption": 1, "_id": 0}
	)
	caption_text = caption_doc.get("caption", "No caption") if caption_doc else "No caption"

	# Create a 640x360 dark gradient placeholder image
	img = np.zeros((360, 640, 3), dtype=np.uint8)
	# Dark blue gradient
	for y in range(360):
	val = int(30 + (y / 360) * 40)
	img[y, :] = [val, int(val * 0.8), int(val * 0.5)]

	# Add text
	font = cv2.FONT_HERSHEY_SIMPLEX
	# Video ID
	cv2.putText(img, video_id, (20, 40), font, 0.5, (150, 150, 150), 1)
	# Frame ID
	cv2.putText(img, frame_id, (20, 70), font, 0.5, (150, 150, 150), 1)
	# Camera icon placeholder
	cv2.rectangle(img, (270, 130), (370, 210), (80, 80, 80), 2)
	cv2.putText(img, "VIDEO", (284, 178), font, 0.6, (120, 120, 120), 1)
	# Caption (wrap if long)
	words = caption_text[:80].split()
	line = ""
	y_pos = 250
	for w in words:
	test = line + " " + w if line else w
	if len(test) > 50:
	cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
	y_pos += 22
	line = w
	else:
	line = test
	if line:
	cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)

	# Encode as JPEG
	success, buffer = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 85])
	if not success:
	continue

	minio_path = f"{video_id}/{frame_id}.jpg"
	data = io.BytesIO(buffer.tobytes())
	minio_client.put_object(
	KEYFRAME_BUCKET, minio_path, data,
	length=len(buffer.tobytes()),
	content_type='image/jpeg'
	)
	uploaded += 1

	return uploaded


	def extract_and_upload_keyframes(video_id, frame_ids):
	"""Extract specific frames from video and upload to MinIO."""
	video_path = get_video_source(video_id)
	if not video_path:
	print(f" No video source found — generating placeholder keyframes")
	return upload_placeholder_keyframes(video_id, frame_ids)

	# Parse frame numbers from frame_ids like "frame_000060"
	frame_numbers = {}
	for fid in frame_ids:
	try:
	num = int(fid.replace("frame_", ""))
	frame_numbers[num] = fid
	except ValueError:
	print(f" WARNING: Could not parse frame_id: {fid}")

	if not frame_numbers:
	print(f" No valid frame numbers to extract")
	return 0

	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	print(f" ERROR: Could not open video: {video_path}")
	return 0

	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	fps = cap.get(cv2.CAP_PROP_FPS)
	print(f" Video: {total_frames} frames, {fps:.1f} fps")

	uploaded = 0
	max_frame = max(frame_numbers.keys())

	for frame_num in sorted(frame_numbers.keys()):
	if frame_num >= total_frames:
	# Use last available frame
	frame_num_actual = total_frames - 1
	print(f" Frame {frame_num} beyond total ({total_frames}), using frame {frame_num_actual}")
	else:
	frame_num_actual = frame_num

	cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num_actual)
	ret, frame = cap.read()
	if not ret:
	print(f" ERROR: Could not read frame {frame_num_actual}")
	continue

	# Encode as JPEG
	success, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
	if not success:
	print(f" ERROR: Could not encode frame {frame_num}")
	continue

	frame_id = frame_numbers[frame_num]
	minio_path = f"{video_id}/{frame_id}.jpg"

	# Upload to MinIO
	data = io.BytesIO(buffer.tobytes())
	minio_client.put_object(
	KEYFRAME_BUCKET,
	minio_path,
	data,
	length=len(buffer.tobytes()),
	content_type='image/jpeg'
	)
	uploaded += 1

	cap.release()

	# Clean up temp file if downloaded from MinIO
	tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
	if os.path.exists(tmp_path) and video_path == tmp_path:
	os.remove(tmp_path)

	return uploaded


	def main():
	# Get all video_ids with captions
	caption_vids = db.video_captions.distinct("video_id")

	for video_id in caption_vids:
	if video_id.startswith("test_"):
	continue

	# Check if keyframes already exist in MinIO
	existing = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
	if len(existing) > 0:
	print(f"SKIP {video_id}: already has {len(existing)} keyframes in MinIO")
	continue

	# Get frame_ids from captions
	frame_ids = db.video_captions.distinct("frame_id", {"video_id": video_id})
	if not frame_ids:
	print(f"SKIP {video_id}: no frame_ids in captions")
	continue

	print(f"\nPROCESSING {video_id}: {len(frame_ids)} frames to extract")
	uploaded = extract_and_upload_keyframes(video_id, frame_ids)
	print(f" Uploaded {uploaded}/{len(frame_ids)} keyframes to MinIO")

	print("\n=== DONE ===")
	# Final check
	for video_id in caption_vids:
	if video_id.startswith("test_"):
	continue
	objs = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
	print(f" {video_id}: {len(objs)} keyframes in MinIO")


	if __name__ == "__main__":
	main()