File size: 8,885 Bytes
fd50325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
"""

Extract keyframes from videos and upload to S3-compatible storage (Backblaze B2).



For each video that has captions but no keyframes in storage:

1. Get the frame_ids from video_captions

2. Get the video source (local file or S3)

3. Extract those exact frames using OpenCV

4. Upload to S3 at {video_id}/frame_XXXXXX.jpg

"""
import os
import sys
import io
import tempfile
import cv2
from pymongo import MongoClient
from minio import Minio
from dotenv import load_dotenv

load_dotenv()

MONGO_URI = os.getenv("MONGO_URI", "mongodb+srv://detectifai_user:DetectifAI123@cluster0.6f9uj.mongodb.net/detectifai?retryWrites=true&w=majority&appName=Cluster0")
client = MongoClient(MONGO_URI)
db = client.detectifai

minio_client = Minio(
    os.getenv('MINIO_ENDPOINT', 's3.eu-central-003.backblazeb2.com'),
    access_key=os.getenv('MINIO_ACCESS_KEY', '00367479ffb7e4e0000000001'),
    secret_key=os.getenv('MINIO_SECRET_KEY', 'K003opTvf92ijRj5dM7H1dgrlwcGTdA'),
    secure=os.getenv('MINIO_SECURE', 'true').lower() == 'true',
    region=os.getenv('MINIO_REGION', 'eu-central-003') or None
)
KEYFRAME_BUCKET = os.getenv('MINIO_KEYFRAME_BUCKET', 'detectifai-keyframes')
VIDEO_BUCKET = os.getenv('MINIO_VIDEO_BUCKET', 'detectifai-videos')

BASE_DIR = os.getenv('BASE_DIR', r"d:\FAST\Final Year Project\sem1_finalized_malaika\sem1")

def get_video_source(video_id):
    """Return path to video file. Download from MinIO if not local."""
    # Check local uploads first
    local_path = os.path.join(BASE_DIR, "uploads", video_id, "video.mp4")
    if os.path.isfile(local_path) and os.path.getsize(local_path) > 0:
        print(f"  Using local file: {local_path}")
        return local_path
    
    # Check MinIO
    rec = db.video_file.find_one({"video_id": video_id}, {"minio_object_key": 1, "minio_bucket": 1})
    if rec and rec.get("minio_object_key"):
        bucket = rec.get("minio_bucket", VIDEO_BUCKET)
        obj_key = rec["minio_object_key"]
        
        # Verify the object actually exists before downloading
        try:
            minio_client.stat_object(bucket, obj_key)
        except Exception:
            print(f"  MinIO object not found: {bucket}/{obj_key}")
            return None
        
        print(f"  Downloading from MinIO: {bucket}/{obj_key}")
        tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
        minio_client.fget_object(bucket, obj_key, tmp_path)
        print(f"  Downloaded to: {tmp_path}")
        return tmp_path
    
    return None


import numpy as np


def upload_placeholder_keyframes(video_id, frame_ids):
    """Generate and upload placeholder keyframe images for videos whose source is gone."""
    uploaded = 0
    
    for frame_id in frame_ids:
        # Get the caption text for this frame to display on placeholder
        caption_doc = db.video_captions.find_one(
            {"video_id": video_id, "frame_id": frame_id},
            {"caption": 1, "_id": 0}
        )
        caption_text = caption_doc.get("caption", "No caption") if caption_doc else "No caption"
        
        # Create a 640x360 dark gradient placeholder image
        img = np.zeros((360, 640, 3), dtype=np.uint8)
        # Dark blue gradient
        for y in range(360):
            val = int(30 + (y / 360) * 40)
            img[y, :] = [val, int(val * 0.8), int(val * 0.5)]
        
        # Add text
        font = cv2.FONT_HERSHEY_SIMPLEX
        # Video ID
        cv2.putText(img, video_id, (20, 40), font, 0.5, (150, 150, 150), 1)
        # Frame ID
        cv2.putText(img, frame_id, (20, 70), font, 0.5, (150, 150, 150), 1)
        # Camera icon placeholder
        cv2.rectangle(img, (270, 130), (370, 210), (80, 80, 80), 2)
        cv2.putText(img, "VIDEO", (284, 178), font, 0.6, (120, 120, 120), 1)
        # Caption (wrap if long)
        words = caption_text[:80].split()
        line = ""
        y_pos = 250
        for w in words:
            test = line + " " + w if line else w
            if len(test) > 50:
                cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
                y_pos += 22
                line = w
            else:
                line = test
        if line:
            cv2.putText(img, line, (20, y_pos), font, 0.4, (200, 200, 200), 1)
        
        # Encode as JPEG
        success, buffer = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 85])
        if not success:
            continue
        
        minio_path = f"{video_id}/{frame_id}.jpg"
        data = io.BytesIO(buffer.tobytes())
        minio_client.put_object(
            KEYFRAME_BUCKET, minio_path, data,
            length=len(buffer.tobytes()),
            content_type='image/jpeg'
        )
        uploaded += 1
    
    return uploaded


def extract_and_upload_keyframes(video_id, frame_ids):
    """Extract specific frames from video and upload to MinIO."""
    video_path = get_video_source(video_id)
    if not video_path:
        print(f"  No video source found — generating placeholder keyframes")
        return upload_placeholder_keyframes(video_id, frame_ids)
    
    # Parse frame numbers from frame_ids like "frame_000060"
    frame_numbers = {}
    for fid in frame_ids:
        try:
            num = int(fid.replace("frame_", ""))
            frame_numbers[num] = fid
        except ValueError:
            print(f"  WARNING: Could not parse frame_id: {fid}")
    
    if not frame_numbers:
        print(f"  No valid frame numbers to extract")
        return 0
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"  ERROR: Could not open video: {video_path}")
        return 0
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"  Video: {total_frames} frames, {fps:.1f} fps")
    
    uploaded = 0
    max_frame = max(frame_numbers.keys())
    
    for frame_num in sorted(frame_numbers.keys()):
        if frame_num >= total_frames:
            # Use last available frame
            frame_num_actual = total_frames - 1
            print(f"  Frame {frame_num} beyond total ({total_frames}), using frame {frame_num_actual}")
        else:
            frame_num_actual = frame_num
        
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num_actual)
        ret, frame = cap.read()
        if not ret:
            print(f"  ERROR: Could not read frame {frame_num_actual}")
            continue
        
        # Encode as JPEG
        success, buffer = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 85])
        if not success:
            print(f"  ERROR: Could not encode frame {frame_num}")
            continue
        
        frame_id = frame_numbers[frame_num]
        minio_path = f"{video_id}/{frame_id}.jpg"
        
        # Upload to MinIO
        data = io.BytesIO(buffer.tobytes())
        minio_client.put_object(
            KEYFRAME_BUCKET,
            minio_path,
            data,
            length=len(buffer.tobytes()),
            content_type='image/jpeg'
        )
        uploaded += 1
    
    cap.release()
    
    # Clean up temp file if downloaded from MinIO
    tmp_path = os.path.join(tempfile.gettempdir(), f"{video_id}.mp4")
    if os.path.exists(tmp_path) and video_path == tmp_path:
        os.remove(tmp_path)
    
    return uploaded


def main():
    # Get all video_ids with captions
    caption_vids = db.video_captions.distinct("video_id")
    
    for video_id in caption_vids:
        if video_id.startswith("test_"):
            continue
        
        # Check if keyframes already exist in MinIO
        existing = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
        if len(existing) > 0:
            print(f"SKIP {video_id}: already has {len(existing)} keyframes in MinIO")
            continue
        
        # Get frame_ids from captions
        frame_ids = db.video_captions.distinct("frame_id", {"video_id": video_id})
        if not frame_ids:
            print(f"SKIP {video_id}: no frame_ids in captions")
            continue
        
        print(f"\nPROCESSING {video_id}: {len(frame_ids)} frames to extract")
        uploaded = extract_and_upload_keyframes(video_id, frame_ids)
        print(f"  Uploaded {uploaded}/{len(frame_ids)} keyframes to MinIO")
    
    print("\n=== DONE ===")
    # Final check
    for video_id in caption_vids:
        if video_id.startswith("test_"):
            continue
        objs = list(minio_client.list_objects(KEYFRAME_BUCKET, prefix=f"{video_id}/", recursive=True))
        print(f"  {video_id}: {len(objs)} keyframes in MinIO")


if __name__ == "__main__":
    main()