Factor Studios commited on
Commit
14be113
Β·
verified Β·
1 Parent(s): 028934e

Update vision_analyzer.py

Browse files
Files changed (1) hide show
  1. vision_analyzer.py +748 -692
vision_analyzer.py CHANGED
@@ -1,692 +1,748 @@
1
- import os
2
- import json
3
- import requests
4
- import subprocess
5
- import shutil
6
- import time
7
- import re
8
- import threading
9
- from typing import Dict, List, Set, Optional
10
- from huggingface_hub import HfApi, list_repo_files
11
- from fastapi import FastAPI, File, UploadFile, Form
12
- from fastapi.responses import JSONResponse
13
- from pathlib import Path
14
- import smtplib
15
- from email.message import EmailMessage
16
- import tempfile
17
- import rarfile
18
- import zipfile
19
- import cv2
20
- import numpy as np
21
- from PIL import Image
22
- import torch
23
- from transformers import AutoProcessor, AutoModelForCausalLM
24
-
25
- # Initialize FastAPI
26
- app = FastAPI()
27
-
28
- # ==== CONFIGURATION ====
29
- HF_TOKEN = os.getenv("HF_TOKEN", "")
30
- SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1")
31
-
32
- # Path Configuration
33
- DOWNLOAD_FOLDER = "downloads"
34
- EXTRACT_FOLDER = "extracted"
35
- FRAMES_OUTPUT_FOLDER = "extracted_frames"
36
- ANALYSIS_OUTPUT_FOLDER = "analysis_results"
37
-
38
- os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
39
- os.makedirs(EXTRACT_FOLDER, exist_ok=True)
40
- os.makedirs(FRAMES_OUTPUT_FOLDER, exist_ok=True)
41
- os.makedirs(ANALYSIS_OUTPUT_FOLDER, exist_ok=True)
42
-
43
- # State Files
44
- DOWNLOAD_STATE_FILE = "download_progress.json"
45
- PROCESS_STATE_FILE = "process_progress.json"
46
- FAILED_FILES_LOG = "failed_files.log"
47
-
48
- # Processing Parameters
49
- CHUNK_SIZE = 1
50
- PROCESSING_DELAY = 2
51
- MAX_RETRIES = 3
52
- MIN_FREE_SPACE_GB = 2 # Minimum free space in GB before processing
53
-
54
- # Frame Extraction Parameters
55
- DEFAULT_FPS = 3 # Default frames per second for extraction
56
-
57
- # Initialize HF API
58
- hf_api = HfApi(token=HF_TOKEN)
59
-
60
- # Global State
61
- processing_status = {
62
- "is_running": False,
63
- "current_file": None,
64
- "total_files": 0,
65
- "processed_files": 0,
66
- "failed_files": 0,
67
- "extracted_courses": 0,
68
- "extracted_videos": 0,
69
- "extracted_frames_count": 0,
70
- "analyzed_frames_count": 0,
71
- "last_update": None,
72
- "logs": []
73
- }
74
-
75
- import torch
76
- import subprocess
77
- import sys
78
-
79
-
80
-
81
- device = "cpu" # Explicitly ensure CPU usage
82
-
83
- try:
84
- # Load the model, forcing the 'eager' (CPU-compatible) attention implementation
85
- vision_language_model_large = AutoModelForCausalLM.from_pretrained(
86
- "microsoft/Florence-2-Base",
87
- trust_remote_code=True
88
- ).to(device).eval()
89
- vision_language_processor_large = AutoProcessor.from_pretrained(
90
- "microsoft/Florence-2-Base",
91
- trust_remote_code=True
92
- )
93
- print("Florence-2 large model and processor loaded successfully on CPU using eager attention.")
94
- except Exception as e:
95
- print(f"Error loading Florence-2 model on CPU: {e}")
96
- print("Please ensure you have enough RAM and a compatible PyTorch version.")
97
- vision_language_model_large = None
98
- vision_language_processor_large = None
99
-
100
-
101
- def log_message(message: str):
102
- """Log messages with timestamp"""
103
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
104
- log_entry = f"[{timestamp}] {message}"
105
- print(log_entry)
106
- processing_status["logs"].append(log_entry)
107
- processing_status["last_update"] = timestamp
108
- if len(processing_status["logs"]) > 100:
109
- processing_status["logs"] = processing_status["logs"][-100:]
110
-
111
- def log_failed_file(filename: str, error: str):
112
- """Log failed files to persistent file"""
113
- with open(FAILED_FILES_LOG, "a") as f:
114
- f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {filename}: {error}\n")
115
-
116
- def get_disk_usage(path: str) -> Dict[str, float]:
117
- """Get disk usage statistics in GB"""
118
- statvfs = os.statvfs(path)
119
- total = statvfs.f_frsize * statvfs.f_blocks / (1024**3)
120
- free = statvfs.f_frsize * statvfs.f_bavail / (1024**3)
121
- used = total - free
122
- return {"total": total, "free": free, "used": used}
123
-
124
- def check_disk_space(path: str = ".") -> bool:
125
- """Check if there's enough disk space"""
126
- disk_info = get_disk_usage(path)
127
- if disk_info["free"] < MIN_FREE_SPACE_GB:
128
- log_message(f'⚠️ Low disk space: {disk_info["free"]:.2f}GB free, {disk_info["used"]:.2f}GB used')
129
- return False
130
- return True
131
-
132
- def cleanup_temp_files():
133
- """Clean up temporary files to free space"""
134
- log_message("🧹 Cleaning up temporary files...")
135
-
136
- # Clean old downloads (keep only current processing file)
137
- current_file = processing_status.get("current_file")
138
- for file in os.listdir(DOWNLOAD_FOLDER):
139
- if file != current_file and file.endswith((".rar", ".zip")):
140
- try:
141
- os.remove(os.path.join(DOWNLOAD_FOLDER, file))
142
- log_message(f"πŸ—‘οΈ Removed old download: {file}")
143
- except:
144
- pass
145
-
146
- def load_json_state(file_path: str, default_value):
147
- """Load state from JSON file"""
148
- if os.path.exists(file_path):
149
- try:
150
- with open(file_path, "r") as f:
151
- return json.load(f)
152
- except json.JSONDecodeError:
153
- log_message(f"⚠️ Corrupted state file: {file_path}")
154
- return default_value
155
-
156
- def save_json_state(file_path: str, data):
157
- """Save state to JSON file"""
158
- with open(file_path, "w") as f:
159
- json.dump(data, f, indent=2)
160
-
161
- def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
162
- """Download file with retry logic and disk space checking"""
163
- if not check_disk_space():
164
- cleanup_temp_files()
165
- if not check_disk_space():
166
- log_message("❌ Insufficient disk space even after cleanup")
167
- return False
168
-
169
- headers = {"Authorization": f"Bearer {HF_TOKEN}"}
170
- for attempt in range(max_retries):
171
- try:
172
- with requests.get(url, headers=headers, stream=True) as r:
173
- r.raise_for_status()
174
-
175
- # Check content length if available
176
- content_length = r.headers.get("content-length")
177
- if content_length:
178
- size_gb = int(content_length) / (1024**3)
179
- disk_info = get_disk_usage(".")
180
- if size_gb > disk_info["free"] - 0.5: # Leave 0.5GB buffer
181
- log_message(f'❌ File too large: {size_gb:.2f}GB, only {disk_info["free"]:.2f}GB free')
182
- return False
183
-
184
- with open(dest_path, "wb") as f:
185
- for chunk in r.iter_content(chunk_size=8192):
186
- f.write(chunk)
187
- return True
188
- except Exception as e:
189
- if attempt < max_retries - 1:
190
- time.sleep(2 ** attempt)
191
- continue
192
- log_message(f"❌ Download failed after {max_retries} attempts: {e}")
193
- return False
194
- return False
195
-
196
- def is_multipart_rar(filename: str) -> bool:
197
- """Check if this is a multi-part RAR file"""
198
- return ".part" in filename.lower() and filename.lower().endswith(".rar")
199
-
200
- def get_rar_part_base(filename: str) -> str:
201
- """Get the base name for multi-part RAR files"""
202
- if ".part" in filename.lower():
203
- return filename.split(".part")[0]
204
- return filename.replace(".rar", "")
205
-
206
- def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) -> bool:
207
- """Extract RAR with retry and recovery, handling multi-part archives"""
208
- filename = os.path.basename(rar_path)
209
-
210
- # For multi-part RARs, we need the first part
211
- if is_multipart_rar(filename):
212
- base_name = get_rar_part_base(filename)
213
- first_part = f"{base_name}.part01.rar"
214
- first_part_path = os.path.join(os.path.dirname(rar_path), first_part)
215
-
216
- if not os.path.exists(first_part_path):
217
- log_message(f"⚠️ Multi-part RAR detected but first part not found: {first_part}")
218
- return False
219
-
220
- rar_path = first_part_path
221
- log_message(f"πŸ“¦ Processing multi-part RAR starting with: {first_part}")
222
-
223
- for attempt in range(max_retries):
224
- try:
225
- # Test RAR first
226
- test_cmd = ["unrar", "t", rar_path]
227
- test_result = subprocess.run(test_cmd, capture_output=True, text=True)
228
- if test_result.returncode != 0:
229
- log_message(f"⚠️ RAR test failed: {test_result.stderr}")
230
- if attempt == max_retries - 1:
231
- return False
232
- continue
233
-
234
- # Extract RAR
235
- cmd = ["unrar", "x", "-o+", rar_path, output_dir]
236
- if attempt > 0: # Try recovery on subsequent attempts
237
- cmd.insert(2, "-kb")
238
-
239
- result = subprocess.run(cmd, capture_output=True, text=True)
240
- if result.returncode == 0:
241
- log_message(f"βœ… Successfully extracted: {os.path.basename(rar_path)}")
242
- return True
243
- else:
244
- error_msg = result.stderr or result.stdout
245
- log_message(f"⚠️ Extraction attempt {attempt + 1} failed: {error_msg}")
246
-
247
- if "checksum error" in error_msg.lower() or "CRC failed" in error_msg:
248
- log_message(f"⚠️ Data corruption detected, attempt {attempt + 1}")
249
- elif result.returncode == 10:
250
- log_message(f"⚠️ No files to extract (exit code 10)")
251
- return False
252
- elif result.returncode == 1:
253
- log_message(f"⚠️ Non-fatal error (exit code 1)")
254
-
255
- except Exception as e:
256
- log_message(f"❌ Extraction exception: {str(e)}")
257
- if attempt == max_retries - 1:
258
- return False
259
- time.sleep(1)
260
-
261
- return False
262
-
263
- def ensure_dir(path):
264
- os.makedirs(path, exist_ok=True)
265
-
266
- def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
267
- """Extract frames from video at the specified frames per second (fps)."""
268
- log_message(f"[INFO] Extracting frames from {video_path} to {output_dir} at {fps} fps...")
269
- ensure_dir(output_dir)
270
- cap = cv2.VideoCapture(str(video_path))
271
- if not cap.isOpened():
272
- log_message(f"[ERROR] Failed to open video file: {video_path}")
273
- return 0
274
- video_fps = cap.get(cv2.CAP_PROP_FPS)
275
- if not video_fps or video_fps <= 0:
276
- video_fps = 30 # fallback if FPS is not available
277
- log_message(f"[WARN] Using fallback FPS: {video_fps}")
278
- frame_interval = int(round(video_fps / fps))
279
- frame_idx = 0
280
- saved_idx = 1
281
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
282
- log_message(f"[DEBUG] Total frames in video: {total_frames}")
283
- while cap.isOpened():
284
- ret, frame = cap.read()
285
- if not ret:
286
- break
287
- if frame_idx % frame_interval == 0:
288
- frame_name = f"{saved_idx:04d}.png"
289
- cv2.imwrite(str(Path(output_dir) / frame_name), frame)
290
- saved_idx += 1
291
- frame_idx += 1
292
- cap.release()
293
- log_message(f"Extracted {saved_idx-1} frames from {video_path} to {output_dir}")
294
- return saved_idx - 1
295
-
296
- def analyze_frame_with_florence2(image_path: str, prompt: str = "<CAPTION>") -> Dict:
297
- """Analyze a single frame using Florence-2 vision model."""
298
- if not vision_language_model_large or not vision_language_processor_large:
299
- return {
300
- "image": os.path.basename(image_path),
301
- "description": "[ERROR] Vision model not loaded."
302
- }
303
-
304
- image = Image.open(image_path).convert("RGB")
305
- inputs = vision_language_processor_large(images=image, text=prompt, return_tensors="pt").to(device)
306
-
307
- with torch.no_grad():
308
- generated_ids = vision_language_model_large.generate(
309
- input_ids=inputs["input_ids"],
310
- pixel_values=inputs["pixel_values"],
311
- max_new_tokens=512,
312
- do_sample=False,
313
- num_beams=3
314
- )
315
-
316
- generated_text = vision_language_processor_large.batch_decode(generated_ids, skip_special_tokens=False)[0]
317
- description = vision_language_processor_large.post_process_generation(
318
- generated_text,
319
- task="<CAPTION>",
320
- image_size=(image.width, image.height)
321
- )["<CAPTION>"]
322
-
323
- return {
324
- "image": os.path.basename(image_path),
325
- "description": description
326
- }
327
-
328
- def summarize_activities(frame_analyses: List[Dict]) -> Dict:
329
- """Summarize activities from frame analyses."""
330
- return {
331
- # "steps": [
332
- # {
333
- # "action": "Open Blender software",
334
- # "description": "User launches Blender 3D modeling application on their computer"
335
- # },
336
- # {
337
- # "action": "Create 3D object",
338
- # "description": "User works with a default cube object in the 3D viewport"
339
- # },
340
- # {
341
- # "action": "Manipulate 3D model",
342
- # "description": "User rotates and transforms the cube using mouse interactions"
343
- # },
344
- # {
345
- # "action": "Navigate interface",
346
- # "description": "User explores different tools and panels in the Blender interface"
347
- # }
348
- # ],
349
- # "high_level_goal": "Learning basic 3D modeling operations in Blender software",
350
- # "creative_actions": "3D object manipulation, interface navigation, basic modeling workflow",
351
- # "objects": ["computer", "monitor", "mouse", "keyboard", "Blender software", "3D cube", "desktop interface"],
352
- # "final_goal": "Introduction to Blender 3D modeling fundamentals and basic object manipulation"
353
- }
354
-
355
- def analyze_frames(frames_dir: str, output_json_path: str, prompt: Optional[str] = None) -> int:
356
- """Analyze all frames in directory using Florence-2 model."""
357
- log_message(f"[INFO] Analyzing frames in {frames_dir}...")
358
- frames_dir = Path(frames_dir).resolve()
359
- output_json_path = Path(output_json_path).resolve()
360
- ensure_dir(frames_dir)
361
- ensure_dir(output_json_path.parent)
362
-
363
- frame_analyses = []
364
- analyzed_count = 0
365
-
366
- for frame_file in sorted(frames_dir.glob("*.png")):
367
- analysis = analyze_frame_with_florence2(str(frame_file), prompt)
368
- frame_analyses.append(analysis)
369
- analyzed_count += 1
370
-
371
- # Generate summary
372
- summary = summarize_activities(frame_analyses)
373
-
374
- # Save results
375
- results = {
376
- "frame_analyses": frame_analyses,
377
- "summary": summary
378
- }
379
-
380
- try:
381
- with open(output_json_path, "w") as f:
382
- json.dump(results, f, indent=2)
383
- log_message(f"[SUCCESS] Analysis results saved to {output_json_path}")
384
- except Exception as e:
385
- log_message(f"[ERROR] Failed to write output JSON: {e}")
386
-
387
- return analyzed_count
388
-
389
-
390
- def process_rar_file(rar_path: str) -> bool:
391
- """Process a single RAR file - extract, then process videos for frames and vision analysis"""
392
- filename = os.path.basename(rar_path)
393
- processing_status["current_file"] = filename
394
-
395
- # Handle multi-part RAR naming
396
- if is_multipart_rar(filename):
397
- course_name = get_rar_part_base(filename)
398
- else:
399
- course_name = filename.replace(".rar", "")
400
-
401
- extract_dir = os.path.join(EXTRACT_FOLDER, course_name)
402
-
403
- try:
404
- log_message(f"πŸ”„ Processing: {filename}")
405
-
406
- # Clean up any existing directory
407
- if os.path.exists(extract_dir):
408
- shutil.rmtree(extract_dir, ignore_errors=True)
409
-
410
- # Extract RAR
411
- os.makedirs(extract_dir, exist_ok=True)
412
- if not extract_with_retry(rar_path, extract_dir):
413
- raise Exception("RAR extraction failed")
414
-
415
- # Count extracted files
416
- file_count = 0
417
- video_files_found = []
418
- for root, dirs, files in os.walk(extract_dir):
419
- for file in files:
420
- file_count += 1
421
- if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
422
- video_files_found.append(os.path.join(root, file))
423
-
424
- processing_status["extracted_courses"] += 1
425
- log_message(f"βœ… Successfully extracted '{course_name}' ({file_count} files, {len(video_files_found)} videos)")
426
-
427
- # Process video files for frame extraction and vision analysis
428
- for video_path in video_files_found:
429
- video_filename = Path(video_path).name
430
- # Create a unique output directory for frames for each video
431
- frames_output_dir = os.path.join(FRAMES_OUTPUT_FOLDER, f"{course_name}_{video_filename.replace('.', '_')}_frames")
432
- ensure_dir(frames_output_dir)
433
-
434
- extracted_frames_count = extract_frames(video_path, frames_output_dir, fps=DEFAULT_FPS)
435
- processing_status["extracted_frames_count"] += extracted_frames_count
436
- if extracted_frames_count > 0:
437
- processing_status["extracted_videos"] += 1
438
- log_message(f"[INFO] Extracted {extracted_frames_count} frames from {video_filename}")
439
-
440
- # Perform vision analysis on the extracted frames
441
- analysis_output_json = os.path.join(ANALYSIS_OUTPUT_FOLDER, f"{course_name}_{video_filename.replace('.', '_')}_analysis.json")
442
- analyzed_frames = analyze_frames(frames_output_dir, analysis_output_json)
443
- processing_status["analyzed_frames_count"] += analyzed_frames
444
- log_message(f"[INFO] Analyzed {analyzed_frames} frames from {video_filename}")
445
- else:
446
- log_message(f"[WARN] No frames extracted from {video_filename}")
447
-
448
- return True
449
-
450
- except Exception as e:
451
- error_msg = str(e)
452
- log_message(f"❌ Processing failed: {error_msg}")
453
- log_failed_file(filename, error_msg)
454
- return False
455
-
456
- finally:
457
- processing_status["current_file"] = None
458
-
459
- def main_processing_loop(start_index: int = 0):
460
- """Main processing workflow - extraction, frame extraction, and vision analysis"""
461
- processing_status["is_running"] = True
462
-
463
- try:
464
- # Load state
465
- processed_rars = load_json_state(PROCESS_STATE_FILE, {"processed_rars": []})["processed_rars"]
466
- download_state = load_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": 0})
467
-
468
- # Use start_index if provided, otherwise use the saved state
469
- next_index = start_index if start_index > 0 else download_state["next_download_index"]
470
-
471
- log_message(f"πŸ“Š Starting from index {next_index}")
472
- log_message(f"πŸ“Š Previously processed: {len(processed_rars)} files")
473
-
474
- # Get file list
475
- try:
476
- files = list(hf_api.list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset"))
477
- rar_files = sorted([f for f in files if f.endswith(".rar")])
478
-
479
- processing_status["total_files"] = len(rar_files)
480
- log_message(f"πŸ“ Found {len(rar_files)} RAR files in repository")
481
-
482
- if next_index >= len(rar_files):
483
- log_message("βœ… All files have been processed!")
484
- return
485
-
486
- except Exception as e:
487
- log_message(f"❌ Failed to get file list: {str(e)}")
488
- return
489
-
490
- # Process only one file per run
491
- if next_index < len(rar_files):
492
- rar_file = rar_files[next_index]
493
- filename = os.path.basename(rar_file)
494
-
495
- if filename in processed_rars:
496
- log_message(f"⏭️ Skipping already processed: {filename}")
497
- processing_status["processed_files"] += 1
498
- # Move to next file
499
- next_index += 1
500
- save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
501
- log_message(f"πŸ“Š Moving to next file. Progress: {next_index}/{len(rar_files)}")
502
- return
503
-
504
- log_message(f"πŸ“₯ Downloading: {filename}")
505
- dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
506
-
507
- # Download file
508
- download_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{rar_file}"
509
- if download_with_retry(download_url, dest_path):
510
- # Process file
511
- if process_rar_file(dest_path):
512
- processed_rars.append(filename)
513
- save_json_state(PROCESS_STATE_FILE, {"processed_rars": processed_rars})
514
- log_message(f"βœ… Successfully processed: {filename}")
515
- processing_status["processed_files"] += 1
516
- else:
517
- log_message(f"❌ Failed to process: {filename}")
518
- processing_status["failed_files"] += 1
519
-
520
- # Clean up downloaded file
521
- try:
522
- os.remove(dest_path)
523
- log_message(f"πŸ—‘οΈ Cleaned up download: {filename}")
524
- except:
525
- pass
526
- else:
527
- log_message(f"❌ Failed to download: {filename}")
528
- processing_status["failed_files"] += 1
529
-
530
- # Update download state for next run
531
- next_index += 1
532
- save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
533
-
534
- # Status update
535
- log_message(f"πŸ“Š Progress: {next_index}/{len(rar_files)} files processed")
536
- log_message(f'πŸ“Š Extracted: {processing_status["extracted_courses"]} courses')
537
- log_message(f'πŸ“Š Videos Processed: {processing_status["extracted_videos"]}')
538
- log_message(f'πŸ“Š Frames Extracted: {processing_status["extracted_frames_count"]}')
539
- log_message(f'πŸ“Š Frames Analyzed: {processing_status["analyzed_frames_count"]}')
540
- log_message(f'πŸ“Š Failed: {processing_status["failed_files"]} files')
541
-
542
- if next_index < len(rar_files):
543
- log_message(f"πŸ”„ Run the script again to process the next file: {os.path.basename(rar_files[next_index])}")
544
- else:
545
- log_message("πŸŽ‰ All files have been processed!")
546
- else:
547
- log_message("βœ… All files have been processed!")
548
-
549
- log_message("πŸŽ‰ Processing complete!")
550
- log_message(f'πŸ“Š Final stats: {processing_status["extracted_courses"]} courses extracted, {processing_status["extracted_videos"]} videos processed, {processing_status["extracted_frames_count"]} frames extracted, {processing_status["analyzed_frames_count"]} frames analyzed')
551
-
552
- except KeyboardInterrupt:
553
- log_message("⏹️ Processing interrupted by user")
554
- except Exception as e:
555
- log_message(f"❌ Fatal error: {str(e)}")
556
- finally:
557
- processing_status["is_running"] = False
558
- cleanup_temp_files()
559
-
560
- # FastAPI Endpoints
561
- @app.post("/analyze-video")
562
- async def analyze_video_endpoint(
563
- file: UploadFile = File(...),
564
- fps: int = Form(DEFAULT_FPS),
565
- prompt: Optional[str] = Form(None)
566
- ):
567
- """Analyze a single video file and return frame-by-frame analysis."""
568
- if not file.filename.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
569
- return JSONResponse(status_code=400, content={
570
- "error": "File type not allowed",
571
- "allowed_types": [".mp4", ".avi", ".mov", ".mkv"]
572
- })
573
-
574
- with tempfile.TemporaryDirectory() as temp_dir:
575
- temp_dir_path = Path(temp_dir)
576
- file_path = temp_dir_path / file.filename
577
-
578
- with open(file_path, "wb") as buffer:
579
- shutil.copyfileobj(file.file, buffer)
580
-
581
- frames_dir = temp_dir_path / "frames"
582
- frame_count = extract_frames(file_path, frames_dir, fps)
583
-
584
- frame_analyses = []
585
- for frame_file in sorted(frames_dir.glob("*.png")):
586
- analysis = analyze_frame_with_florence2(str(frame_file), prompt)
587
- frame_analyses.append(analysis)
588
-
589
- summary = summarize_activities(frame_analyses)
590
-
591
- return JSONResponse(content={
592
- "video_filename": file.filename,
593
- "frame_count": frame_count,
594
- "fps": fps,
595
- "frame_analyses": frame_analyses,
596
- "summary": summary
597
- })
598
-
599
- @app.post("/analyze-archive")
600
- async def analyze_archive_endpoint(
601
- file: UploadFile = File(...),
602
- fps: int = Form(DEFAULT_FPS),
603
- prompt: Optional[str] = Form(None)
604
- ):
605
- """Analyze videos from RAR/ZIP archive and return frame-by-frame analysis."""
606
- if not file.filename.lower().endswith((".rar", ".zip")):
607
- return JSONResponse(status_code=400, content={
608
- "error": "File type not allowed",
609
- "allowed_types": [".rar", ".zip"]
610
- })
611
-
612
- with tempfile.TemporaryDirectory() as temp_dir:
613
- temp_dir_path = Path(temp_dir)
614
- file_path = temp_dir_path / file.filename
615
-
616
- with open(file_path, "wb") as buffer:
617
- shutil.copyfileobj(file.file, buffer)
618
-
619
- extract_dir = temp_dir_path / "extracted"
620
- video_files = []
621
-
622
- if file.filename.lower().endswith(".rar"):
623
- with rarfile.RarFile(file_path) as rf:
624
- rf.extractall(extract_dir)
625
- else:
626
- with zipfile.ZipFile(file_path) as zf:
627
- zf.extractall(extract_dir)
628
-
629
- # Find video files in extracted content
630
- for root, dirs, files in os.walk(extract_dir):
631
- for file in files:
632
- if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
633
- video_files.append(Path(root) / file)
634
-
635
- if not video_files:
636
- return JSONResponse(status_code=400, content={
637
- "error": "No video files found in archive"
638
- })
639
-
640
- results = []
641
- for video_path in video_files:
642
- video_name = video_path.name
643
- frames_dir = temp_dir_path / f"frames_{video_name}"
644
- frame_count = extract_frames(video_path, frames_dir, fps)
645
-
646
- frame_analyses = []
647
- for frame_file in sorted(frames_dir.glob("*.png")):
648
- analysis = analyze_frame_with_florence2(str(frame_file), prompt)
649
- frame_analyses.append(analysis)
650
-
651
- summary = summarize_activities(frame_analyses)
652
-
653
- results.append({
654
- "video_filename": video_name,
655
- "frame_count": frame_count,
656
- "fps": fps,
657
- "frame_analyses": frame_analyses,
658
- "summary": summary
659
- })
660
-
661
- return JSONResponse(content={
662
- "archive_filename": file.filename,
663
- "videos_processed": len(video_files),
664
- "results": results
665
- })
666
-
667
- @app.get("/health")
668
- async def health_check():
669
- """Health check endpoint."""
670
- return JSONResponse(content={
671
- "status": "healthy",
672
- "model": "Florence-2 (Mock)",
673
- "note": "Florence-2 model is mocked due to sandbox memory limitations."
674
- })
675
-
676
- @app.get("/status")
677
- async def get_processing_status():
678
- """Get current processing status."""
679
- return JSONResponse(content=processing_status)
680
-
681
- # Expose necessary functions and variables
682
- __all__ = [
683
- "main_processing_loop",
684
- "processing_status",
685
- "ANALYSIS_OUTPUT_FOLDER",
686
- "log_message",
687
- "send_email_with_attachment",
688
- "analyze_frames",
689
- "extract_frames",
690
- "DEFAULT_FPS",
691
- "ensure_dir"
692
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+ import subprocess
5
+ import shutil
6
+ import time
7
+ import re
8
+ import threading
9
+ from typing import Dict, List, Set, Optional
10
+ from huggingface_hub import HfApi, list_repo_files
11
+ from fastapi import FastAPI, File, UploadFile, Form
12
+ from fastapi.responses import JSONResponse
13
+ from pathlib import Path
14
+ import smtplib
15
+ from email.message import EmailMessage
16
+ import tempfile
17
+ import rarfile
18
+ import zipfile
19
+ import cv2
20
+ import numpy as np
21
+ from PIL import Image
22
+ import torch
23
+ from transformers import AutoProcessor, AutoModelForCausalLM
24
+
25
+ # Initialize FastAPI
26
+ app = FastAPI()
27
+
28
+ # ==== CONFIGURATION ====
29
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
30
+ SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1")
31
+
32
+ # Path Configuration
33
+ DOWNLOAD_FOLDER = "downloads"
34
+ EXTRACT_FOLDER = "extracted"
35
+ FRAMES_OUTPUT_FOLDER = "extracted_frames"
36
+ ANALYSIS_OUTPUT_FOLDER = "analysis_results"
37
+
38
+ os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
39
+ os.makedirs(EXTRACT_FOLDER, exist_ok=True)
40
+ os.makedirs(FRAMES_OUTPUT_FOLDER, exist_ok=True)
41
+ os.makedirs(ANALYSIS_OUTPUT_FOLDER, exist_ok=True)
42
+
43
+ # State Files
44
+ DOWNLOAD_STATE_FILE = "download_progress.json"
45
+ PROCESS_STATE_FILE = "process_progress.json"
46
+ FAILED_FILES_LOG = "failed_files.log"
47
+
48
+ # Processing Parameters
49
+ CHUNK_SIZE = 1
50
+ PROCESSING_DELAY = 2
51
+ MAX_RETRIES = 3
52
+ MIN_FREE_SPACE_GB = 2 # Minimum free space in GB before processing
53
+
54
+ # Frame Extraction Parameters
55
+ DEFAULT_FPS = 0.1 # Default frames per second for extraction
56
+
57
+ # Initialize HF API
58
+ hf_api = HfApi(token=HF_TOKEN)
59
+
60
+ # Global State
61
+ processing_status = {
62
+ "is_running": False,
63
+ "current_file": None,
64
+ "total_files": 0,
65
+ "processed_files": 0,
66
+ "failed_files": 0,
67
+ "extracted_courses": 0,
68
+ "extracted_videos": 0,
69
+ "extracted_frames_count": 0,
70
+ "analyzed_frames_count": 0,
71
+ "last_update": None,
72
+ "logs": []
73
+ }
74
+
75
+ import torch
76
+ import subprocess
77
+ import sys
78
+
79
+ device = "cpu" # Explicitly ensure CPU usage
80
+
81
+ try:
82
+ # Load processor with padding configuration
83
+ vision_processor = AutoProcessor.from_pretrained(
84
+ "microsoft/git-base-coco",
85
+ padding="max_length",
86
+ truncation=True
87
+ )
88
+
89
+ # Load model with CPU optimizations
90
+ vision_model = AutoModelForCausalLM.from_pretrained(
91
+ "microsoft/git-base-coco",
92
+ torch_dtype=torch.float32,
93
+ low_cpu_mem_usage=True,
94
+ device_map="cpu"
95
+ ).eval()
96
+
97
+ print("βœ… Successfully loaded GIT model and processor")
98
+
99
+ except Exception as e:
100
+ print(f"❌ Error loading model: {str(e)}")
101
+ vision_model = None
102
+ vision_processor = None
103
+
104
+ # Preprompt templates
105
+ PREPROMPT_TEMPLATES = {
106
+ "default": "This image shows: ",
107
+ "design": "This design tutorial frame shows: ",
108
+ "ui": "This user interface demonstrates: ",
109
+ "motion": "This motion design example illustrates: "
110
+ }
111
+
112
+ def get_preprompt(video_filename: str) -> str:
113
+ """Select appropriate preprompt based on video content"""
114
+ filename = video_filename.lower()
115
+ if any(x in filename for x in ["ui", "interface", "ux"]):
116
+ return PREPROMPT_TEMPLATES["ui"]
117
+ elif any(x in filename for x in ["design", "tutorial"]):
118
+ return PREPROMPT_TEMPLATES["design"]
119
+ elif any(x in filename for x in ["motion", "animation"]):
120
+ return PREPROMPT_TEMPLATES["motion"]
121
+ return PREPROMPT_TEMPLATES["default"]
122
+
123
+ def log_message(message: str):
124
+ """Log messages with timestamp"""
125
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
126
+ log_entry = f"[{timestamp}] {message}"
127
+ print(log_entry)
128
+ processing_status["logs"].append(log_entry)
129
+ processing_status["last_update"] = timestamp
130
+ if len(processing_status["logs"]) > 100:
131
+ processing_status["logs"] = processing_status["logs"][-100:]
132
+
133
+ def log_failed_file(filename: str, error: str):
134
+ """Log failed files to persistent file"""
135
+ with open(FAILED_FILES_LOG, "a") as f:
136
+ f.write(f'{time.strftime("%Y-%m-%d %H:%M:%S")} - {filename}: {error}\n')
137
+
138
+ def get_disk_usage(path: str) -> Dict[str, float]:
139
+ """Get disk usage statistics in GB"""
140
+ statvfs = os.statvfs(path)
141
+ total = statvfs.f_frsize * statvfs.f_blocks / (1024**3)
142
+ free = statvfs.f_frsize * statvfs.f_bavail / (1024**3)
143
+ used = total - free
144
+ return {"total": total, "free": free, "used": used}
145
+
146
+ def check_disk_space(path: str = ".") -> bool:
147
+ """Check if there\'s enough disk space"""
148
+ disk_info = get_disk_usage(path)
149
+ if disk_info["free"] < MIN_FREE_SPACE_GB:
150
+ log_message(f'⚠️ Low disk space: {disk_info["free"]:.2f}GB free, {disk_info["used"]:.2f}GB used')
151
+ return False
152
+ return True
153
+
154
+ def cleanup_temp_files():
155
+ """Clean up temporary files to free space"""
156
+ log_message("🧹 Cleaning up temporary files...")
157
+
158
+ # Clean old downloads (keep only current processing file)
159
+ current_file = processing_status.get("current_file")
160
+ for file in os.listdir(DOWNLOAD_FOLDER):
161
+ if file != current_file and file.endswith((".rar", ".zip")):
162
+ try:
163
+ os.remove(os.path.join(DOWNLOAD_FOLDER, file))
164
+ log_message(f"πŸ—‘οΈ Removed old download: {file}")
165
+ except:
166
+ pass
167
+
168
+ def load_json_state(file_path: str, default_value):
169
+ """Load state from JSON file"""
170
+ if os.path.exists(file_path):
171
+ try:
172
+ with open(file_path, "r") as f:
173
+ return json.load(f)
174
+ except json.JSONDecodeError:
175
+ log_message(f"⚠️ Corrupted state file: {file_path}")
176
+ return default_value
177
+
178
+ def save_json_state(file_path: str, data):
179
+ """Save state to JSON file"""
180
+ with open(file_path, "w") as f:
181
+ json.dump(data, f, indent=2)
182
+
183
+ def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
184
+ """Download file with retry logic and disk space checking"""
185
+ if not check_disk_space():
186
+ cleanup_temp_files()
187
+ if not check_disk_space():
188
+ log_message("❌ Insufficient disk space even after cleanup")
189
+ return False
190
+
191
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
192
+ for attempt in range(max_retries):
193
+ try:
194
+ with requests.get(url, headers=headers, stream=True) as r:
195
+ r.raise_for_status()
196
+
197
+ # Check content length if available
198
+ content_length = r.headers.get("content-length")
199
+ if content_length:
200
+ size_gb = int(content_length) / (1024**3)
201
+ disk_info = get_disk_usage(".")
202
+ if size_gb > disk_info["free"] - 0.5: # Leave 0.5GB buffer
203
+ log_message(f'❌ File too large: {size_gb:.2f}GB, only {disk_info["free"]:.2f}GB free')
204
+ return False
205
+
206
+ with open(dest_path, "wb") as f:
207
+ for chunk in r.iter_content(chunk_size=8192):
208
+ f.write(chunk)
209
+ return True
210
+ except Exception as e:
211
+ if attempt < max_retries - 1:
212
+ time.sleep(2 ** attempt)
213
+ continue
214
+ log_message(f"❌ Download failed after {max_retries} attempts: {e}")
215
+ return False
216
+ return False
217
+
218
+ def is_multipart_rar(filename: str) -> bool:
219
+ """Check if this is a multi-part RAR file"""
220
+ return ".part" in filename.lower() and filename.lower().endswith(".rar")
221
+
222
+ def get_rar_part_base(filename: str) -> str:
223
+ """Get the base name for multi-part RAR files"""
224
+ if ".part" in filename.lower():
225
+ return filename.split(".part")[0]
226
+ return filename.replace(".rar", "")
227
+
228
+ def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) -> bool:
229
+ """Extract RAR with retry and recovery, handling multi-part archives"""
230
+ filename = os.path.basename(rar_path)
231
+
232
+ # For multi-part RARs, we need the first part
233
+ if is_multipart_rar(filename):
234
+ base_name = get_rar_part_base(filename)
235
+ first_part = f"{base_name}.part01.rar"
236
+ first_part_path = os.path.join(os.path.dirname(rar_path), first_part)
237
+
238
+ if not os.path.exists(first_part_path):
239
+ log_message(f"⚠️ Multi-part RAR detected but first part not found: {first_part}")
240
+ return False
241
+
242
+ rar_path = first_part_path
243
+ log_message(f"πŸ“¦ Processing multi-part RAR starting with: {first_part}")
244
+
245
+ for attempt in range(max_retries):
246
+ try:
247
+ # Test RAR first
248
+ test_cmd = ["unrar", "t", rar_path]
249
+ test_result = subprocess.run(test_cmd, capture_output=True, text=True)
250
+ if test_result.returncode != 0:
251
+ log_message(f"⚠️ RAR test failed: {test_result.stderr}")
252
+ if attempt == max_retries - 1:
253
+ return False
254
+ continue
255
+
256
+ # Extract RAR
257
+ cmd = ["unrar", "x", "-o+", rar_path, output_dir]
258
+ if attempt > 0: # Try recovery on subsequent attempts
259
+ cmd.insert(2, "-kb")
260
+
261
+ result = subprocess.run(cmd, capture_output=True, text=True)
262
+ if result.returncode == 0:
263
+ log_message(f"βœ… Successfully extracted: {os.path.basename(rar_path)}")
264
+ return True
265
+ else:
266
+ error_msg = result.stderr or result.stdout
267
+ log_message(f"⚠️ Extraction attempt {attempt + 1} failed: {error_msg}")
268
+
269
+ if "checksum error" in error_msg.lower() or "CRC failed" in error_msg:
270
+ log_message(f"⚠️ Data corruption detected, attempt {attempt + 1}")
271
+ elif result.returncode == 10:
272
+ log_message(f"⚠️ No files to extract (exit code 10)")
273
+ return False
274
+ elif result.returncode == 1:
275
+ log_message(f"⚠️ Non-fatal error (exit code 1)")
276
+
277
+ except Exception as e:
278
+ log_message(f"❌ Extraction exception: {str(e)}")
279
+ if attempt == max_retries - 1:
280
+ return False
281
+ time.sleep(1)
282
+
283
+ return False
284
+
285
+ def ensure_dir(path):
286
+ os.makedirs(path, exist_ok=True)
287
+
288
+ def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
289
+ """Extract frames from video at the specified frames per second (fps)."""
290
+ log_message(f"[INFO] Extracting frames from {video_path} to {output_dir} at {fps} fps...")
291
+ ensure_dir(output_dir)
292
+ cap = cv2.VideoCapture(str(video_path))
293
+ if not cap.isOpened():
294
+ log_message(f"[ERROR] Failed to open video file: {video_path}")
295
+ return 0
296
+ video_fps = cap.get(cv2.CAP_PROP_FPS)
297
+ if not video_fps or video_fps <= 0:
298
+ video_fps = 30 # fallback if FPS is not available
299
+ log_message(f"[WARN] Using fallback FPS: {video_fps}")
300
+ frame_interval = int(round(video_fps / fps))
301
+ frame_idx = 0
302
+ saved_idx = 1
303
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
304
+ log_message(f"[DEBUG] Total frames in video: {total_frames}")
305
+ while cap.isOpened():
306
+ ret, frame = cap.read()
307
+ if not ret:
308
+ break
309
+ if frame_idx % frame_interval == 0:
310
+ if saved_idx <= 10: # Limit to 10 frames for testing
311
+ frame_name = f"{saved_idx:04d}.png"
312
+ cv2.imwrite(str(Path(output_dir) / frame_name), frame)
313
+ saved_idx += 1
314
+ else:
315
+ break # Stop extracting after 10 frames
316
+ frame_idx += 1
317
+ cap.release()
318
+ log_message(f"Extracted {saved_idx-1} frames from {video_path} to {output_dir}")
319
+ return saved_idx - 1
320
+
321
+ def analyze_single_frame(image_path: str, preprompt: str = "") -> dict:
322
+ """Consistent frame processing function with robust error handling"""
323
+ if not vision_model or not vision_processor:
324
+ return {
325
+ "image": os.path.basename(image_path),
326
+ "description": "[ERROR] Model not loaded",
327
+ "success": False
328
+ }
329
+
330
+ try:
331
+ # Load and resize image
332
+ image = Image.open(image_path).convert("RGB")
333
+ image = image.resize((224, 224))
334
+
335
+ # Ensure tokenizer padding config is safe
336
+ tokenizer = vision_processor.tokenizer
337
+ if tokenizer.pad_token is None:
338
+ tokenizer.pad_token = tokenizer.eos_token
339
+ tokenizer.padding_side = "right"
340
+
341
+ # Preprocess inputs
342
+ inputs = vision_processor(
343
+ images=[image],
344
+ text=preprompt,
345
+ return_tensors="pt",
346
+ padding="max_length",
347
+ truncation=True,
348
+ max_length=512
349
+ ).to(device)
350
+
351
+ # Safety: check pixel_values shape
352
+ pixel_values = inputs["pixel_values"]
353
+ if pixel_values.dim() == 3:
354
+ pixel_values = pixel_values.unsqueeze(0)
355
+
356
+ # Generate caption
357
+ with torch.no_grad():
358
+ outputs = vision_model.generate(
359
+ input_ids=inputs["input_ids"],
360
+ attention_mask=inputs["attention_mask"],
361
+ pixel_values=pixel_values,
362
+ max_new_tokens=500,
363
+ num_beams=5,
364
+ early_stopping=False,
365
+ pad_token_id=tokenizer.pad_token_id
366
+ )
367
+
368
+ caption = vision_processor.batch_decode(
369
+ outputs,
370
+ skip_special_tokens=True
371
+ )[0].strip()
372
+
373
+ return {
374
+ "image": os.path.basename(image_path),
375
+ "description": caption,
376
+ "success": True
377
+ }
378
+
379
+ except Exception as e:
380
+ return {
381
+ "image": os.path.basename(image_path),
382
+ "description": f"[ERROR] {str(e)}",
383
+ "success": False
384
+ }
385
+
386
+
387
+ def process_video_frames(frames_dir: str, video_filename: str, output_file: str) -> bool:
388
+ """Main processing function with first-frame validation"""
389
+ try:
390
+ frames = sorted(Path(frames_dir).glob("*.png"))
391
+ if not frames:
392
+ print("❌ No frames found in directory")
393
+ return False
394
+
395
+ # Validate first frame
396
+ first_frame_result = analyze_single_frame(str(frames[0]), get_preprompt(video_filename))
397
+ print("\n=== FIRST FRAME VALIDATION ===")
398
+ print(f'Image: {first_frame_result["image"]}')
399
+ print(f'Result: {first_frame_result["description"]}')
400
+ print(f'Status: {"Success" if first_frame_result["success"] else "Failed"}\n')
401
+
402
+ if not first_frame_result["success"]:
403
+ print("❌ Aborting due to first frame failure")
404
+ return False
405
+
406
+ preprompt = get_preprompt(video_filename)
407
+ results = {
408
+ "metadata": {
409
+ "video": video_filename,
410
+ "preprompt": preprompt,
411
+ "total_frames": len(frames),
412
+ "processed_frames": 0,
413
+ "failed_frames": 0
414
+ },
415
+ "frames": []
416
+ }
417
+
418
+ for i, frame_path in enumerate(frames):
419
+ result = analyze_single_frame(str(frame_path), preprompt)
420
+ results["frames"].append(result)
421
+
422
+ if result["success"]:
423
+ results["metadata"]["processed_frames"] += 1
424
+ else:
425
+ results["metadata"]["failed_frames"] += 1
426
+
427
+ # Periodic saving
428
+ if i % 10 == 0:
429
+ with open(output_file, "w") as f:
430
+ json.dump(results, f, indent=2)
431
+
432
+ # Final save
433
+ with open(output_file, "w") as f:
434
+ json.dump(results, f, indent=2)
435
+
436
+ return True
437
+
438
+ except Exception as e:
439
+ print(f"❌ Processing failed: {str(e)}")
440
+ return False
441
+
442
+ def summarize_activities(frame_analyses: List[Dict]) -> Dict:
443
+ """Summarize activities from frame analyses."""
444
+ return {}
445
+
446
+ def process_rar_file(rar_path: str) -> bool:
447
+ """Process a single RAR file with new frame processing"""
448
+ filename = os.path.basename(rar_path)
449
+ processing_status["current_file"] = filename
450
+
451
+ # Handle multi-part RAR naming
452
+ if is_multipart_rar(filename):
453
+ course_name = get_rar_part_base(filename)
454
+ else:
455
+ course_name = filename.replace(".rar", "")
456
+
457
+ extract_dir = os.path.join(EXTRACT_FOLDER, course_name)
458
+
459
+ try:
460
+ log_message(f"πŸ”„ Processing: {filename}")
461
+
462
+ # Clean up any existing directory
463
+ if os.path.exists(extract_dir):
464
+ shutil.rmtree(extract_dir, ignore_errors=True)
465
+
466
+ # Extract RAR
467
+ os.makedirs(extract_dir, exist_ok=True)
468
+ if not extract_with_retry(rar_path, extract_dir):
469
+ raise Exception("RAR extraction failed")
470
+
471
+ # Process video files
472
+ video_files = []
473
+ for root, _, files in os.walk(extract_dir):
474
+ for file in files:
475
+ if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
476
+ video_files.append(os.path.join(root, file))
477
+
478
+ processing_status["extracted_courses"] += 1
479
+ log_message(f"βœ… Extracted {len(video_files)} videos from \'{course_name}\'")
480
+
481
+ # Process each video
482
+ for video_path in video_files:
483
+ video_filename = Path(video_path).name
484
+ video_filename_clean = video_filename.replace(".", "_")
485
+ frames_dir = os.path.join(FRAMES_OUTPUT_FOLDER, f"{course_name}_{video_filename_clean}_frames")
486
+ ensure_dir(frames_dir)
487
+
488
+ # Extract frames
489
+ extracted_count = extract_frames(video_path, frames_dir, DEFAULT_FPS)
490
+ if extracted_count == 0:
491
+ raise Exception(f"No frames extracted from {video_filename}")
492
+
493
+ processing_status["extracted_frames_count"] += extracted_count
494
+
495
+ # Analyze frames
496
+ video_filename_clean = video_filename.replace(".", "_")
497
+ analysis_output = os.path.join(ANALYSIS_OUTPUT_FOLDER, f"{course_name}_{video_filename_clean}_analysis.json")
498
+ if process_video_frames(frames_dir, video_filename, analysis_output):
499
+ processing_status["analyzed_frames_count"] += extracted_count
500
+ processing_status["extracted_videos"] += 1
501
+ else:
502
+ raise Exception(f"Frame analysis failed for {video_filename}")
503
+
504
+ return True
505
+
506
+ except Exception as e:
507
+ error_msg = str(e)
508
+ log_message(f"❌ Processing failed: {error_msg}")
509
+ log_failed_file(filename, error_msg)
510
+ return False
511
+
512
+ finally:
513
+ processing_status["current_file"] = None
514
+
515
+ def main_processing_loop(start_index: int = 0):
516
+ """Main processing workflow - extraction, frame extraction, and vision analysis"""
517
+ processing_status["is_running"] = True
518
+
519
+ try:
520
+ # Load state
521
+ processed_rars = load_json_state(PROCESS_STATE_FILE, {"processed_rars": []})["processed_rars"]
522
+ download_state = load_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": 0})
523
+
524
+ # Use start_index if provided, otherwise use the saved state
525
+ next_index = start_index if start_index > 0 else download_state["next_download_index"]
526
+
527
+ log_message(f"πŸ“Š Starting from index {next_index}")
528
+ log_message(f"πŸ“Š Previously processed: {len(processed_rars)} files")
529
+
530
+ # Get file list
531
+ try:
532
+ files = list(hf_api.list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset"))
533
+ rar_files = sorted([f for f in files if f.endswith(".rar")])
534
+
535
+ processing_status["total_files"] = len(rar_files)
536
+ log_message(f"πŸ“ Found {len(rar_files)} RAR files in repository")
537
+
538
+ if next_index >= len(rar_files):
539
+ log_message("βœ… All files have been processed!")
540
+ return
541
+
542
+ except Exception as e:
543
+ log_message(f"❌ Failed to get file list: {str(e)}")
544
+ return
545
+
546
+ # Process only one file per run
547
+ if next_index < len(rar_files):
548
+ rar_file = rar_files[next_index]
549
+ filename = os.path.basename(rar_file)
550
+
551
+ if filename in processed_rars:
552
+ log_message(f"⏭️ Skipping already processed: {filename}")
553
+ processing_status["processed_files"] += 1
554
+ # Move to next file
555
+ next_index += 1
556
+ save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
557
+ log_message(f"πŸ“Š Moving to next file. Progress: {next_index}/{len(rar_files)}")
558
+ return
559
+
560
+ log_message(f"πŸ“₯ Downloading: {filename}")
561
+ dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
562
+
563
+ # Download file
564
+ download_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{rar_file}"
565
+ if download_with_retry(download_url, dest_path):
566
+ # Process file
567
+ if process_rar_file(dest_path):
568
+ processed_rars.append(filename)
569
+ save_json_state(PROCESS_STATE_FILE, {"processed_rars": processed_rars})
570
+ log_message(f"βœ… Successfully processed: {filename}")
571
+ processing_status["processed_files"] += 1
572
+ else:
573
+ log_message(f"❌ Failed to process: {filename}")
574
+ processing_status["failed_files"] += 1
575
+
576
+ # Clean up downloaded file
577
+ try:
578
+ os.remove(dest_path)
579
+ log_message(f"πŸ—‘οΈ Cleaned up download: {filename}")
580
+ except:
581
+ pass
582
+ else:
583
+ log_message(f"❌ Failed to download: {filename}")
584
+ processing_status["failed_files"] += 1
585
+
586
+ # Update download state for next run
587
+ next_index += 1
588
+ save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
589
+
590
+ # Status update
591
+ log_message(f"πŸ“Š Progress: {next_index}/{len(rar_files)} files processed")
592
+ log_message(f"πŸ“Š Extracted: {processing_status['extracted_courses']} courses")
593
+ log_message(f"πŸ“Š Videos Processed: {processing_status['extracted_videos']} videos")
594
+ log_message(f"πŸ“Š Frames Extracted: {processing_status['extracted_frames_count']} frames")
595
+ log_message(f"πŸ“Š Frames Analyzed: {processing_status['analyzed_frames_count']} frames")
596
+ log_message(f"πŸ“Š Failed: {processing_status['failed_files']} files")
597
+
598
+ if next_index < len(rar_files):
599
+ log_message(f"πŸ”„ Run the script again to process the next file: {os.path.basename(rar_files[next_index])}")
600
+ else:
601
+ log_message("πŸŽ‰ All files have been processed!")
602
+ else:
603
+ log_message("βœ… All files have been processed!")
604
+
605
+ log_message(f"πŸŽ‰ Processing complete!")
606
+ log_message(f"πŸ“Š Final stats: {processing_status['extracted_courses']} courses extracted, {processing_status['extracted_videos']} videos processed, {processing_status['extracted_frames_count']} frames extracted, {processing_status['analyzed_frames_count']} frames analyzed")
607
+
608
+ except KeyboardInterrupt:
609
+ log_message("⏹️ Processing interrupted by user")
610
+ except Exception as e:
611
+ log_message(f"❌ Fatal error: {str(e)}")
612
+ finally:
613
+ processing_status["is_running"] = False
614
+ cleanup_temp_files()
615
+
616
+ # FastAPI Endpoints
617
+ @app.post("/analyze-video")
618
+ async def analyze_video_endpoint(
619
+ file: UploadFile = File(...),
620
+ fps: float = Form(DEFAULT_FPS),
621
+ prompt: Optional[str] = Form(None)
622
+ ):
623
+ """Analyze a single video file and return frame-by-frame analysis."""
624
+ if not file.filename.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
625
+ return JSONResponse(status_code=400, content={
626
+ "error": "File type not allowed",
627
+ "allowed_types": [".mp4", ".avi", ".mov", ".mkv"]
628
+ })
629
+
630
+ with tempfile.TemporaryDirectory() as temp_dir:
631
+ temp_dir_path = Path(temp_dir)
632
+ file_path = temp_dir_path / file.filename
633
+
634
+ with open(file_path, "wb") as buffer:
635
+ shutil.copyfileobj(file.file, buffer)
636
+
637
+ frames_dir = temp_dir_path / "frames"
638
+ frame_count = extract_frames(file_path, frames_dir, fps)
639
+
640
+ frame_analyses = []
641
+ for frame_file in sorted(frames_dir.glob("*.png")):
642
+ analysis = analyze_single_frame(str(frame_file), prompt or "")
643
+ frame_analyses.append(analysis)
644
+
645
+ summary = summarize_activities(frame_analyses)
646
+
647
+ return JSONResponse(content={
648
+ "video_filename": file.filename,
649
+ "frame_count": frame_count,
650
+ "fps": fps,
651
+ "frame_analyses": frame_analyses,
652
+ "summary": summary
653
+ })
654
+
655
+ @app.post("/analyze-archive")
656
+ async def analyze_archive_endpoint(
657
+ file: UploadFile = File(...),
658
+ fps: float = Form(DEFAULT_FPS),
659
+ prompt: Optional[str] = Form(None)
660
+ ):
661
+ """Analyze videos from RAR/ZIP archive and return frame-by-frame analysis."""
662
+ if not file.filename.lower().endswith((".rar", ".zip")):
663
+ return JSONResponse(status_code=400, content={
664
+ "error": "File type not allowed",
665
+ "allowed_types": [".rar", ".zip"]
666
+ })
667
+
668
+ with tempfile.TemporaryDirectory() as temp_dir:
669
+ temp_dir_path = Path(temp_dir)
670
+ file_path = temp_dir_path / file.filename
671
+
672
+ with open(file_path, "wb") as buffer:
673
+ shutil.copyfileobj(file.file, buffer)
674
+
675
+ extract_dir = temp_dir_path / "extracted"
676
+ video_files = []
677
+
678
+ if file.filename.lower().endswith(".rar"):
679
+ with rarfile.RarFile(file_path) as rf:
680
+ rf.extractall(extract_dir)
681
+ else:
682
+ with zipfile.ZipFile(file_path) as zf:
683
+ zf.extractall(extract_dir)
684
+
685
+ # Find video files in extracted content
686
+ for root, dirs, files in os.walk(extract_dir):
687
+ for file in files:
688
+ if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
689
+ video_files.append(Path(root) / file)
690
+
691
+ if not video_files:
692
+ return JSONResponse(status_code=400, content={
693
+ "error": "No video files found in archive"
694
+ })
695
+
696
+ results = []
697
+ for video_path in video_files:
698
+ video_name = video_path.name
699
+ frames_dir = temp_dir_path / f"frames_{video_name}"
700
+ frame_count = extract_frames(video_path, frames_dir, fps)
701
+
702
+ frame_analyses = []
703
+ for frame_file in sorted(frames_dir.glob("*.png")):
704
+ analysis = analyze_single_frame(str(frame_file), prompt or "")
705
+ frame_analyses.append(analysis)
706
+
707
+ summary = summarize_activities(frame_analyses)
708
+
709
+ results.append({
710
+ "video_filename": video_name,
711
+ "frame_count": frame_count,
712
+ "fps": fps,
713
+ "frame_analyses": frame_analyses,
714
+ "summary": summary
715
+ })
716
+
717
+ return JSONResponse(content={
718
+ "archive_filename": file.filename,
719
+ "videos_processed": len(video_files),
720
+ "results": results
721
+ })
722
+
723
+ @app.get("/health")
724
+ async def health_check():
725
+ """Health check endpoint."""
726
+ return JSONResponse(content={
727
+ "status": "healthy",
728
+ "model": "GIT",
729
+ "note": "Now using GIT model."
730
+ })
731
+
732
+ @app.get("/status")
733
+ async def get_processing_status():
734
+ """Get current processing status."""
735
+ return JSONResponse(content=processing_status)
736
+
737
+ # Expose necessary functions and variables
738
+ __all__ = [
739
+ "main_processing_loop",
740
+ "processing_status",
741
+ "ANALYSIS_OUTPUT_FOLDER",
742
+ "log_message",
743
+ "analyze_single_frame",
744
+ "extract_frames",
745
+ "DEFAULT_FPS",
746
+ "ensure_dir"
747
+ ]
748
+