Factor Studios commited on
Commit
f9d329b
Β·
verified Β·
1 Parent(s): bec355c

Update vision_analyzer.py

Browse files
Files changed (1) hide show
  1. vision_analyzer.py +704 -223
vision_analyzer.py CHANGED
@@ -1,267 +1,748 @@
1
  import os
2
- import time
3
  import json
 
4
  import subprocess
 
 
 
5
  import threading
6
- from datetime import datetime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- # Global status dictionary
 
 
 
 
 
 
9
  processing_status = {
10
  "is_running": False,
11
- "current_step": "Idle",
12
- "progress": 0, # Percentage
13
- "total_videos_processed": 0,
14
- "current_video": "N/A",
15
- "logs": [],
16
- "last_update": datetime.now().isoformat()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
 
19
- # Configuration
20
- VIDEO_INPUT_FOLDER = "./input_videos"
21
- RAR_INPUT_FOLDER = "./input_rars"
22
- FRAME_OUTPUT_FOLDER = "./output_frames"
23
- ANALYSIS_OUTPUT_FOLDER = "./output_analysis"
 
 
 
 
 
24
 
25
- def log_message(message):
26
- """Add a log message with timestamp"""
27
- timestamp = datetime.now().strftime("%H:%M:%S")
28
  log_entry = f"[{timestamp}] {message}"
 
29
  processing_status["logs"].append(log_entry)
30
-
31
- # Keep only the last 100 logs
32
  if len(processing_status["logs"]) > 100:
33
  processing_status["logs"] = processing_status["logs"][-100:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- print(log_entry)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- def update_status(step, progress, current_video="N/A"):
38
- """Update the global processing status"""
39
- processing_status["current_step"] = step
40
- processing_status["progress"] = progress
41
- processing_status["current_video"] = current_video
42
- processing_status["last_update"] = datetime.now().isoformat()
43
- log_message(f"Status: {step} - {progress}% for {current_video}")
44
-
45
- def extract_rar_files(rar_folder, video_output_folder):
46
- """Extracts RAR files to the specified video output folder."""
47
- os.makedirs(video_output_folder, exist_ok=True)
48
- rar_files = [f for f in os.listdir(rar_folder) if f.endswith('.rar')]
49
- total_rars = len(rar_files)
50
 
51
- if total_rars == 0:
52
- log_message("No RAR files found to extract.")
53
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- for i, rar_file in enumerate(rar_files):
56
- if not processing_status["is_running"]:
57
- log_message("RAR extraction interrupted.")
58
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- rar_path = os.path.join(rar_folder, rar_file)
61
- log_message(f"Extracting {rar_file}...")
62
- update_status("Extracting RAR files", int((i / total_rars) * 100), rar_file)
63
 
64
- try:
65
- # Using 'bsdtar' which is commonly available and handles RAR
66
- subprocess.run(['bsdtar', '-xf', rar_path, '-C', video_output_folder], check=True)
67
- log_message(f"Successfully extracted {rar_file}")
68
- except subprocess.CalledProcessError as e:
69
- log_message(f"Error extracting {rar_file}: {e}")
70
- except FileNotFoundError:
71
- log_message("Error: 'bsdtar' command not found. Please install it (e.g., sudo apt-get install bsdtar).")
72
- processing_status["is_running"] = False # Stop processing if essential tool is missing
73
- return
74
-
75
- def extract_frames(video_path, output_folder):
76
- """Extracts frames from a video using ffmpeg."""
77
- video_name = Path(video_path).stem
78
- frame_output_path = os.path.join(output_folder, video_name)
79
- os.makedirs(frame_output_path, exist_ok=True)
80
 
81
- log_message(f"Extracting frames from {video_path} to {frame_output_path}")
82
- command = [
83
- 'ffmpeg',
84
- '-i', video_path,
85
- '-vf', 'fps=1',
86
- f'{frame_output_path}/frame_%04d.png'
87
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  try:
90
- subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
91
- log_message(f"Finished extracting frames for {video_name}")
92
- return frame_output_path
93
- except subprocess.CalledProcessError as e:
94
- log_message(f"Error extracting frames from {video_name}: {e.stderr.decode()}")
95
- return None
96
- except FileNotFoundError:
97
- log_message("Error: 'ffmpeg' command not found. Please install ffmpeg.")
98
- processing_status["is_running"] = False
99
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- def analyze_frame(frame_path):
102
- """Simulates frame analysis and returns dummy data."""
103
- # In a real scenario, this would involve ML models (e.g., YOLO, CLIP, custom models)
104
- # For demonstration, we return a dummy analysis.
105
 
106
- # Simulate some processing time
107
- time.sleep(0.1)
 
 
 
108
 
109
- frame_id = Path(frame_path).stem
110
- dummy_analysis = {
111
- "frame_id": frame_id,
112
- "timestamp": datetime.now().isoformat(),
113
- "objects_detected": [
114
- {"label": "person", "confidence": 0.95, "bbox": [10, 20, 30, 40]},
115
- {"label": "car", "confidence": 0.80, "bbox": [50, 60, 70, 80]}
116
- ],
117
- "description": f"A {frame_id} showing various objects."
118
- }
119
- return dummy_analysis
120
-
121
- def perform_vision_analysis(frame_folder):
122
- """Performs vision analysis on all frames in a folder."""
123
- if not frame_folder or not os.path.exists(frame_folder):
124
- log_message(f"Frame folder {frame_folder} not found for analysis.")
125
- return None
126
-
127
- frames = sorted([f for f in os.listdir(frame_folder) if f.endswith('.png')])
128
- total_frames = len(frames)
129
- video_name = Path(frame_folder).name
130
 
131
- if total_frames == 0:
132
- log_message(f"No frames found in {frame_folder} for analysis.")
133
- return None
134
-
135
- analysis_results = []
136
- for i, frame_file in enumerate(frames):
137
- if not processing_status["is_running"]:
138
- log_message("Vision analysis interrupted.")
139
- break
140
 
141
- frame_path = os.path.join(frame_folder, frame_file)
142
- update_status("Performing Vision Analysis", int((i / total_frames) * 100), video_name)
 
143
 
144
- analysis = analyze_frame(frame_path)
145
- if analysis:
146
- analysis_results.append(analysis)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- # Dummy summary generation
149
- summary = {
150
- "total_frames_analyzed": len(analysis_results),
151
- "avg_objects_per_frame": sum(len(a['objects_detected']) for a in analysis_results) / len(analysis_results) if analysis_results else 0,
152
- "dominant_objects": "person, car",
153
- "high_level_goal": "Identify key activities",
154
- "final_goal": "Generate comprehensive video report",
155
- "steps": ["Frame extraction", "Object detection", "Activity recognition"]
156
- }
157
-
158
- return {"video_name": video_name, "frame_analyses": analysis_results, "summary": summary}
159
-
160
- def save_analysis_results(analysis_data, output_folder):
161
- """Saves the analysis results to a JSON file."""
162
- os.makedirs(output_folder, exist_ok=True)
163
- video_name = analysis_data["video_name"]
164
- output_path = os.path.join(output_folder, f"{video_name}_analysis.json")
165
 
166
- with open(output_path, 'w') as f:
167
- json.dump(analysis_data, f, indent=4)
168
- log_message(f"Analysis results saved to {output_path}")
169
- return output_path
170
 
171
  def main_processing_loop(start_index: int = 0):
172
- """Main function to orchestrate the video processing pipeline."""
173
  processing_status["is_running"] = True
174
- processing_status["total_videos_processed"] = 0
175
- processing_status["logs"] = [] # Clear logs on new run
176
- log_message("Starting main processing loop...")
177
-
178
  try:
179
- # Step 1: Extract RAR files
180
- update_status("Starting RAR Extraction", 0)
181
- extract_rar_files(RAR_INPUT_FOLDER, VIDEO_INPUT_FOLDER)
182
- if not processing_status["is_running"]:
183
- return
184
- log_message("RAR extraction complete.")
185
-
186
- # Step 2: Process videos
187
- video_files = [f for f in os.listdir(VIDEO_INPUT_FOLDER) if f.endswith(('.mp4', '.avi', '.mov', '.mkv'))]
188
- total_videos = len(video_files)
189
 
190
- if total_videos == 0:
191
- log_message("No video files found to process.")
192
- update_status("Finished", 100)
193
- processing_status["is_running"] = False
 
 
 
 
 
 
 
 
 
 
194
  return
195
-
196
- for i, video_file in enumerate(video_files):
197
- if not processing_status["is_running"]:
198
- log_message("Video processing interrupted.")
199
- break
200
 
201
- if i < start_index:
202
- log_message(f"Skipping video {video_file} due to start_index.")
203
- continue
204
-
205
- video_path = os.path.join(VIDEO_INPUT_FOLDER, video_file)
206
- log_message(f"Processing video: {video_file}")
207
- processing_status["current_video"] = video_file
208
-
209
- # Extract frames
210
- update_status("Extracting Frames", 0, video_file)
211
- frame_folder = extract_frames(video_path, FRAME_OUTPUT_FOLDER)
212
- if not frame_folder:
213
- log_message(f"Skipping analysis for {video_file} due to frame extraction failure.")
214
- continue
215
- if not processing_status["is_running"]:
216
- break
217
-
218
- # Perform vision analysis
219
- update_status("Performing Vision Analysis", 0, video_file)
220
- analysis_data = perform_vision_analysis(frame_folder)
221
- if not analysis_data:
222
- log_message(f"Skipping saving results for {video_file} due to analysis failure.")
223
- continue
224
- if not processing_status["is_running"]:
225
- break
226
-
227
- # Save results
228
- update_status("Saving Analysis Results", 0, video_file)
229
- save_analysis_results(analysis_data, ANALYSIS_OUTPUT_FOLDER)
230
 
231
- processing_status["total_videos_processed"] += 1
232
- log_message(f"Finished processing {video_file}")
233
 
234
- # Update overall progress
235
- overall_progress = int(((i + 1) / total_videos) * 100)
236
- update_status("Processing Videos", overall_progress, video_file)
237
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  except Exception as e:
239
- log_message(f"An error occurred in the main processing loop: {e}")
240
  finally:
241
  processing_status["is_running"] = False
242
- processing_status["current_step"] = "Finished" if processing_status["is_running"] else "Stopped"
243
- processing_status["progress"] = 100 if processing_status["is_running"] else processing_status["progress"]
244
- log_message("Main processing loop finished or stopped.")
245
-
246
- if __name__ == "__main__":
247
- # Example usage: Ensure folders exist and place dummy files
248
- os.makedirs(RAR_INPUT_FOLDER, exist_ok=True)
249
- os.makedirs(VIDEO_INPUT_FOLDER, exist_ok=True)
250
- os.makedirs(FRAME_OUTPUT_FOLDER, exist_ok=True)
251
- os.makedirs(ANALYSIS_OUTPUT_FOLDER, exist_ok=True)
252
-
253
- # Create a dummy RAR file for testing
254
- # with open(os.path.join(RAR_INPUT_FOLDER, "dummy.rar"), "w") as f:
255
- # f.write("This is a dummy rar file.")
256
-
257
- # Create a dummy video file for testing (requires ffmpeg to extract frames)
258
- # You can create a small dummy mp4 using ffmpeg:
259
- # ffmpeg -f lavfi -i color=c=red:s=320x240:d=1 -c:v libx264 -preset superfast -crf 23 dummy_video.mp4
260
- # Or just create an empty file as a placeholder
261
- # with open(os.path.join(VIDEO_INPUT_FOLDER, "dummy_video.mp4"), "w") as f:
262
- # f.write("This is a dummy video file.")
263
-
264
- print("Setup complete. You can now run the FastAPI/Gradio app.")
265
- # To run the processing loop directly (for testing without FastAPI/Gradio):
266
- # main_processing_loop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
 
1
  import os
 
2
  import json
3
+ import requests
4
  import subprocess
5
+ import shutil
6
+ import time
7
+ import re
8
  import threading
9
+ from typing import Dict, List, Set, Optional
10
+ from huggingface_hub import HfApi, list_repo_files
11
+ from fastapi import FastAPI, File, UploadFile, Form
12
+ from fastapi.responses import JSONResponse
13
+ from pathlib import Path
14
+ import smtplib
15
+ from email.message import EmailMessage
16
+ import tempfile
17
+ import rarfile
18
+ import zipfile
19
+ import cv2
20
+ import numpy as np
21
+ from PIL import Image
22
+ import torch
23
+ from transformers import AutoProcessor, AutoModelForCausalLM
24
+
25
+ # Initialize FastAPI
26
+ app = FastAPI()
27
+
28
+ # ==== CONFIGURATION ====
29
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
30
+ SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1")
31
+
32
+ # Path Configuration
33
+ DOWNLOAD_FOLDER = "downloads"
34
+ EXTRACT_FOLDER = "extracted"
35
+ FRAMES_OUTPUT_FOLDER = "extracted_frames"
36
+ ANALYSIS_OUTPUT_FOLDER = "analysis_results"
37
+
38
+ os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
39
+ os.makedirs(EXTRACT_FOLDER, exist_ok=True)
40
+ os.makedirs(FRAMES_OUTPUT_FOLDER, exist_ok=True)
41
+ os.makedirs(ANALYSIS_OUTPUT_FOLDER, exist_ok=True)
42
+
43
+ # State Files
44
+ DOWNLOAD_STATE_FILE = "download_progress.json"
45
+ PROCESS_STATE_FILE = "process_progress.json"
46
+ FAILED_FILES_LOG = "failed_files.log"
47
+
48
+ # Processing Parameters
49
+ CHUNK_SIZE = 1
50
+ PROCESSING_DELAY = 2
51
+ MAX_RETRIES = 3
52
+ MIN_FREE_SPACE_GB = 2 # Minimum free space in GB before processing
53
 
54
+ # Frame Extraction Parameters
55
+ DEFAULT_FPS = 0.1 # Default frames per second for extraction
56
+
57
+ # Initialize HF API
58
+ hf_api = HfApi(token=HF_TOKEN)
59
+
60
+ # Global State
61
  processing_status = {
62
  "is_running": False,
63
+ "current_file": None,
64
+ "total_files": 0,
65
+ "processed_files": 0,
66
+ "failed_files": 0,
67
+ "extracted_courses": 0,
68
+ "extracted_videos": 0,
69
+ "extracted_frames_count": 0,
70
+ "analyzed_frames_count": 0,
71
+ "last_update": None,
72
+ "logs": []
73
+ }
74
+
75
+ import torch
76
+ import subprocess
77
+ import sys
78
+
79
+ device = "cpu" # Explicitly ensure CPU usage
80
+
81
+ try:
82
+ # Load processor with padding configuration
83
+ vision_processor = AutoProcessor.from_pretrained(
84
+ "microsoft/git-base-coco",
85
+ padding="max_length",
86
+ truncation=True
87
+ )
88
+
89
+ # Load model with CPU optimizations
90
+ vision_model = AutoModelForCausalLM.from_pretrained(
91
+ "microsoft/git-base-coco",
92
+ torch_dtype=torch.float32,
93
+ low_cpu_mem_usage=True,
94
+ device_map="cpu"
95
+ ).eval()
96
+
97
+ print("βœ… Successfully loaded GIT model and processor")
98
+
99
+ except Exception as e:
100
+ print(f"❌ Error loading model: {str(e)}")
101
+ vision_model = None
102
+ vision_processor = None
103
+
104
+ # Preprompt templates
105
+ PREPROMPT_TEMPLATES = {
106
+ "default": "This image shows: ",
107
+ "design": "This design tutorial frame shows: ",
108
+ "ui": "This user interface demonstrates: ",
109
+ "motion": "This motion design example illustrates: "
110
  }
111
 
112
+ def get_preprompt(video_filename: str) -> str:
113
+ """Select appropriate preprompt based on video content"""
114
+ filename = video_filename.lower()
115
+ if any(x in filename for x in ["ui", "interface", "ux"]):
116
+ return PREPROMPT_TEMPLATES["ui"]
117
+ elif any(x in filename for x in ["design", "tutorial"]):
118
+ return PREPROMPT_TEMPLATES["design"]
119
+ elif any(x in filename for x in ["motion", "animation"]):
120
+ return PREPROMPT_TEMPLATES["motion"]
121
+ return PREPROMPT_TEMPLATES["default"]
122
 
123
+ def log_message(message: str):
124
+ """Log messages with timestamp"""
125
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
126
  log_entry = f"[{timestamp}] {message}"
127
+ print(log_entry)
128
  processing_status["logs"].append(log_entry)
129
+ processing_status["last_update"] = timestamp
 
130
  if len(processing_status["logs"]) > 100:
131
  processing_status["logs"] = processing_status["logs"][-100:]
132
+
133
+ def log_failed_file(filename: str, error: str):
134
+ """Log failed files to persistent file"""
135
+ with open(FAILED_FILES_LOG, "a") as f:
136
+ f.write(f'{time.strftime("%Y-%m-%d %H:%M:%S")} - {filename}: {error}\n')
137
+
138
+ def get_disk_usage(path: str) -> Dict[str, float]:
139
+ """Get disk usage statistics in GB"""
140
+ statvfs = os.statvfs(path)
141
+ total = statvfs.f_frsize * statvfs.f_blocks / (1024**3)
142
+ free = statvfs.f_frsize * statvfs.f_bavail / (1024**3)
143
+ used = total - free
144
+ return {"total": total, "free": free, "used": used}
145
+
146
+ def check_disk_space(path: str = ".") -> bool:
147
+ """Check if there\'s enough disk space"""
148
+ disk_info = get_disk_usage(path)
149
+ if disk_info["free"] < MIN_FREE_SPACE_GB:
150
+ log_message(f'⚠️ Low disk space: {disk_info["free"]:.2f}GB free, {disk_info["used"]:.2f}GB used')
151
+ return False
152
+ return True
153
+
154
+ def cleanup_temp_files():
155
+ """Clean up temporary files to free space"""
156
+ log_message("🧹 Cleaning up temporary files...")
157
 
158
+ # Clean old downloads (keep only current processing file)
159
+ current_file = processing_status.get("current_file")
160
+ for file in os.listdir(DOWNLOAD_FOLDER):
161
+ if file != current_file and file.endswith((".rar", ".zip")):
162
+ try:
163
+ os.remove(os.path.join(DOWNLOAD_FOLDER, file))
164
+ log_message(f"πŸ—‘οΈ Removed old download: {file}")
165
+ except:
166
+ pass
167
+
168
+ def load_json_state(file_path: str, default_value):
169
+ """Load state from JSON file"""
170
+ if os.path.exists(file_path):
171
+ try:
172
+ with open(file_path, "r") as f:
173
+ return json.load(f)
174
+ except json.JSONDecodeError:
175
+ log_message(f"⚠️ Corrupted state file: {file_path}")
176
+ return default_value
177
 
178
+ def save_json_state(file_path: str, data):
179
+ """Save state to JSON file"""
180
+ with open(file_path, "w") as f:
181
+ json.dump(data, f, indent=2)
182
+
183
+ def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
184
+ """Download file with retry logic and disk space checking"""
185
+ if not check_disk_space():
186
+ cleanup_temp_files()
187
+ if not check_disk_space():
188
+ log_message("❌ Insufficient disk space even after cleanup")
189
+ return False
 
190
 
191
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
192
+ for attempt in range(max_retries):
193
+ try:
194
+ with requests.get(url, headers=headers, stream=True) as r:
195
+ r.raise_for_status()
196
+
197
+ # Check content length if available
198
+ content_length = r.headers.get("content-length")
199
+ if content_length:
200
+ size_gb = int(content_length) / (1024**3)
201
+ disk_info = get_disk_usage(".")
202
+ if size_gb > disk_info["free"] - 0.5: # Leave 0.5GB buffer
203
+ log_message(f'❌ File too large: {size_gb:.2f}GB, only {disk_info["free"]:.2f}GB free')
204
+ return False
205
+
206
+ with open(dest_path, "wb") as f:
207
+ for chunk in r.iter_content(chunk_size=8192):
208
+ f.write(chunk)
209
+ return True
210
+ except Exception as e:
211
+ if attempt < max_retries - 1:
212
+ time.sleep(2 ** attempt)
213
+ continue
214
+ log_message(f"❌ Download failed after {max_retries} attempts: {e}")
215
+ return False
216
+ return False
217
 
218
+ def is_multipart_rar(filename: str) -> bool:
219
+ """Check if this is a multi-part RAR file"""
220
+ return ".part" in filename.lower() and filename.lower().endswith(".rar")
221
+
222
+ def get_rar_part_base(filename: str) -> str:
223
+ """Get the base name for multi-part RAR files"""
224
+ if ".part" in filename.lower():
225
+ return filename.split(".part")[0]
226
+ return filename.replace(".rar", "")
227
+
228
+ def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) -> bool:
229
+ """Extract RAR with retry and recovery, handling multi-part archives"""
230
+ filename = os.path.basename(rar_path)
231
+
232
+ # For multi-part RARs, we need the first part
233
+ if is_multipart_rar(filename):
234
+ base_name = get_rar_part_base(filename)
235
+ first_part = f"{base_name}.part01.rar"
236
+ first_part_path = os.path.join(os.path.dirname(rar_path), first_part)
237
 
238
+ if not os.path.exists(first_part_path):
239
+ log_message(f"⚠️ Multi-part RAR detected but first part not found: {first_part}")
240
+ return False
241
 
242
+ rar_path = first_part_path
243
+ log_message(f"πŸ“¦ Processing multi-part RAR starting with: {first_part}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
+ for attempt in range(max_retries):
246
+ try:
247
+ # Test RAR first
248
+ test_cmd = ["unrar", "t", rar_path]
249
+ test_result = subprocess.run(test_cmd, capture_output=True, text=True)
250
+ if test_result.returncode != 0:
251
+ log_message(f"⚠️ RAR test failed: {test_result.stderr}")
252
+ if attempt == max_retries - 1:
253
+ return False
254
+ continue
255
+
256
+ # Extract RAR
257
+ cmd = ["unrar", "x", "-o+", rar_path, output_dir]
258
+ if attempt > 0: # Try recovery on subsequent attempts
259
+ cmd.insert(2, "-kb")
260
+
261
+ result = subprocess.run(cmd, capture_output=True, text=True)
262
+ if result.returncode == 0:
263
+ log_message(f"βœ… Successfully extracted: {os.path.basename(rar_path)}")
264
+ return True
265
+ else:
266
+ error_msg = result.stderr or result.stdout
267
+ log_message(f"⚠️ Extraction attempt {attempt + 1} failed: {error_msg}")
268
+
269
+ if "checksum error" in error_msg.lower() or "CRC failed" in error_msg:
270
+ log_message(f"⚠️ Data corruption detected, attempt {attempt + 1}")
271
+ elif result.returncode == 10:
272
+ log_message(f"⚠️ No files to extract (exit code 10)")
273
+ return False
274
+ elif result.returncode == 1:
275
+ log_message(f"⚠️ Non-fatal error (exit code 1)")
276
+
277
+ except Exception as e:
278
+ log_message(f"❌ Extraction exception: {str(e)}")
279
+ if attempt == max_retries - 1:
280
+ return False
281
+ time.sleep(1)
282
 
283
+ return False
284
+
285
+ def ensure_dir(path):
286
+ os.makedirs(path, exist_ok=True)
287
+
288
+ def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
289
+ """Extract frames from video at the specified frames per second (fps)."""
290
+ log_message(f"[INFO] Extracting frames from {video_path} to {output_dir} at {fps} fps...")
291
+ ensure_dir(output_dir)
292
+ cap = cv2.VideoCapture(str(video_path))
293
+ if not cap.isOpened():
294
+ log_message(f"[ERROR] Failed to open video file: {video_path}")
295
+ return 0
296
+ video_fps = cap.get(cv2.CAP_PROP_FPS)
297
+ if not video_fps or video_fps <= 0:
298
+ video_fps = 30 # fallback if FPS is not available
299
+ log_message(f"[WARN] Using fallback FPS: {video_fps}")
300
+ frame_interval = int(round(video_fps / fps))
301
+ frame_idx = 0
302
+ saved_idx = 1
303
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
304
+ log_message(f"[DEBUG] Total frames in video: {total_frames}")
305
+ while cap.isOpened():
306
+ ret, frame = cap.read()
307
+ if not ret:
308
+ break
309
+ if frame_idx % frame_interval == 0:
310
+ if saved_idx <= 10: # Limit to 10 frames for testing
311
+ frame_name = f"{saved_idx:04d}.png"
312
+ cv2.imwrite(str(Path(output_dir) / frame_name), frame)
313
+ saved_idx += 1
314
+ else:
315
+ break # Stop extracting after 10 frames
316
+ frame_idx += 1
317
+ cap.release()
318
+ log_message(f"Extracted {saved_idx-1} frames from {video_path} to {output_dir}")
319
+ return saved_idx - 1
320
+
321
+ def analyze_single_frame(image_path: str, preprompt: str = "") -> dict:
322
+ """Consistent frame processing function with robust error handling"""
323
+ if not vision_model or not vision_processor:
324
+ return {
325
+ "image": os.path.basename(image_path),
326
+ "description": "[ERROR] Model not loaded",
327
+ "success": False
328
+ }
329
+
330
  try:
331
+ # Load and resize image
332
+ image = Image.open(image_path).convert("RGB")
333
+ image = image.resize((224, 224))
334
+
335
+ # Ensure tokenizer padding config is safe
336
+ tokenizer = vision_processor.tokenizer
337
+ if tokenizer.pad_token is None:
338
+ tokenizer.pad_token = tokenizer.eos_token
339
+ tokenizer.padding_side = "right"
340
+
341
+ # Preprocess inputs
342
+ inputs = vision_processor(
343
+ images=[image],
344
+ text=preprompt,
345
+ return_tensors="pt",
346
+ padding="max_length",
347
+ truncation=True,
348
+ max_length=512
349
+ ).to(device)
350
+
351
+ # Safety: check pixel_values shape
352
+ pixel_values = inputs["pixel_values"]
353
+ if pixel_values.dim() == 3:
354
+ pixel_values = pixel_values.unsqueeze(0)
355
+
356
+ # Generate caption
357
+ with torch.no_grad():
358
+ outputs = vision_model.generate(
359
+ input_ids=inputs["input_ids"],
360
+ attention_mask=inputs["attention_mask"],
361
+ pixel_values=pixel_values,
362
+ max_new_tokens=500,
363
+ num_beams=5,
364
+ early_stopping=False,
365
+ pad_token_id=tokenizer.pad_token_id
366
+ )
367
+
368
+ caption = vision_processor.batch_decode(
369
+ outputs,
370
+ skip_special_tokens=True
371
+ )[0].strip()
372
+
373
+ return {
374
+ "image": os.path.basename(image_path),
375
+ "description": caption,
376
+ "success": True
377
+ }
378
+
379
+ except Exception as e:
380
+ return {
381
+ "image": os.path.basename(image_path),
382
+ "description": f"[ERROR] {str(e)}",
383
+ "success": False
384
+ }
385
+
386
+
387
+ def process_video_frames(frames_dir: str, video_filename: str, output_file: str) -> bool:
388
+ """Main processing function with first-frame validation"""
389
+ try:
390
+ frames = sorted(Path(frames_dir).glob("*.png"))
391
+ if not frames:
392
+ print("❌ No frames found in directory")
393
+ return False
394
+
395
+ # Validate first frame
396
+ first_frame_result = analyze_single_frame(str(frames[0]), get_preprompt(video_filename))
397
+ print("\n=== FIRST FRAME VALIDATION ===")
398
+ print(f'Image: {first_frame_result["image"]}')
399
+ print(f'Result: {first_frame_result["description"]}')
400
+ print(f'Status: {"Success" if first_frame_result["success"] else "Failed"}\n')
401
+
402
+ if not first_frame_result["success"]:
403
+ print("❌ Aborting due to first frame failure")
404
+ return False
405
+
406
+ preprompt = get_preprompt(video_filename)
407
+ results = {
408
+ "metadata": {
409
+ "video": video_filename,
410
+ "preprompt": preprompt,
411
+ "total_frames": len(frames),
412
+ "processed_frames": 0,
413
+ "failed_frames": 0
414
+ },
415
+ "frames": []
416
+ }
417
+
418
+ for i, frame_path in enumerate(frames):
419
+ result = analyze_single_frame(str(frame_path), preprompt)
420
+ results["frames"].append(result)
421
+
422
+ if result["success"]:
423
+ results["metadata"]["processed_frames"] += 1
424
+ else:
425
+ results["metadata"]["failed_frames"] += 1
426
+
427
+ # Periodic saving
428
+ if i % 10 == 0:
429
+ with open(output_file, "w") as f:
430
+ json.dump(results, f, indent=2)
431
+
432
+ # Final save
433
+ with open(output_file, "w") as f:
434
+ json.dump(results, f, indent=2)
435
+
436
+ return True
437
+
438
+ except Exception as e:
439
+ print(f"❌ Processing failed: {str(e)}")
440
+ return False
441
+
442
+ def summarize_activities(frame_analyses: List[Dict]) -> Dict:
443
+ """Summarize activities from frame analyses."""
444
+ return {}
445
 
446
+ def process_rar_file(rar_path: str) -> bool:
447
+ """Process a single RAR file with new frame processing"""
448
+ filename = os.path.basename(rar_path)
449
+ processing_status["current_file"] = filename
450
 
451
+ # Handle multi-part RAR naming
452
+ if is_multipart_rar(filename):
453
+ course_name = get_rar_part_base(filename)
454
+ else:
455
+ course_name = filename.replace(".rar", "")
456
 
457
+ extract_dir = os.path.join(EXTRACT_FOLDER, course_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
 
459
+ try:
460
+ log_message(f"πŸ”„ Processing: {filename}")
 
 
 
 
 
 
 
461
 
462
+ # Clean up any existing directory
463
+ if os.path.exists(extract_dir):
464
+ shutil.rmtree(extract_dir, ignore_errors=True)
465
 
466
+ # Extract RAR
467
+ os.makedirs(extract_dir, exist_ok=True)
468
+ if not extract_with_retry(rar_path, extract_dir):
469
+ raise Exception("RAR extraction failed")
470
+
471
+ # Process video files
472
+ video_files = []
473
+ for root, _, files in os.walk(extract_dir):
474
+ for file in files:
475
+ if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
476
+ video_files.append(os.path.join(root, file))
477
+
478
+ processing_status["extracted_courses"] += 1
479
+ log_message(f"βœ… Extracted {len(video_files)} videos from \'{course_name}\'")
480
+
481
+ # Process each video
482
+ for video_path in video_files:
483
+ video_filename = Path(video_path).name
484
+ video_filename_clean = video_filename.replace(".", "_")
485
+ frames_dir = os.path.join(FRAMES_OUTPUT_FOLDER, f"{course_name}_{video_filename_clean}_frames")
486
+ ensure_dir(frames_dir)
487
+
488
+ # Extract frames
489
+ extracted_count = extract_frames(video_path, frames_dir, DEFAULT_FPS)
490
+ if extracted_count == 0:
491
+ raise Exception(f"No frames extracted from {video_filename}")
492
+
493
+ processing_status["extracted_frames_count"] += extracted_count
494
 
495
+ # Analyze frames
496
+ video_filename_clean = video_filename.replace(".", "_")
497
+ analysis_output = os.path.join(ANALYSIS_OUTPUT_FOLDER, f"{course_name}_{video_filename_clean}_analysis.json")
498
+ if process_video_frames(frames_dir, video_filename, analysis_output):
499
+ processing_status["analyzed_frames_count"] += extracted_count
500
+ processing_status["extracted_videos"] += 1
501
+ else:
502
+ raise Exception(f"Frame analysis failed for {video_filename}")
503
+
504
+ return True
505
+
506
+ except Exception as e:
507
+ error_msg = str(e)
508
+ log_message(f"❌ Processing failed: {error_msg}")
509
+ log_failed_file(filename, error_msg)
510
+ return False
 
511
 
512
+ finally:
513
+ processing_status["current_file"] = None
 
 
514
 
515
  def main_processing_loop(start_index: int = 0):
516
+ """Main processing workflow - extraction, frame extraction, and vision analysis"""
517
  processing_status["is_running"] = True
518
+
 
 
 
519
  try:
520
+ # Load state
521
+ processed_rars = load_json_state(PROCESS_STATE_FILE, {"processed_rars": []})["processed_rars"]
522
+ download_state = load_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": 0})
523
+
524
+ # Use start_index if provided, otherwise use the saved state
525
+ next_index = start_index if start_index > 0 else download_state["next_download_index"]
526
+
527
+ log_message(f"πŸ“Š Starting from index {next_index}")
528
+ log_message(f"πŸ“Š Previously processed: {len(processed_rars)} files")
 
529
 
530
+ # Get file list
531
+ try:
532
+ files = list(hf_api.list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset"))
533
+ rar_files = sorted([f for f in files if f.endswith(".rar")])
534
+
535
+ processing_status["total_files"] = len(rar_files)
536
+ log_message(f"πŸ“ Found {len(rar_files)} RAR files in repository")
537
+
538
+ if next_index >= len(rar_files):
539
+ log_message("βœ… All files have been processed!")
540
+ return
541
+
542
+ except Exception as e:
543
+ log_message(f"❌ Failed to get file list: {str(e)}")
544
  return
545
+
546
+ # Process only one file per run
547
+ if next_index < len(rar_files):
548
+ rar_file = rar_files[next_index]
549
+ filename = os.path.basename(rar_file)
550
 
551
+ if filename in processed_rars:
552
+ log_message(f"⏭️ Skipping already processed: {filename}")
553
+ processing_status["processed_files"] += 1
554
+ # Move to next file
555
+ next_index += 1
556
+ save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
557
+ log_message(f"πŸ“Š Moving to next file. Progress: {next_index}/{len(rar_files)}")
558
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
560
+ log_message(f"πŸ“₯ Downloading: {filename}")
561
+ dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
562
 
563
+ # Download file
564
+ download_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{rar_file}"
565
+ if download_with_retry(download_url, dest_path):
566
+ # Process file
567
+ if process_rar_file(dest_path):
568
+ processed_rars.append(filename)
569
+ save_json_state(PROCESS_STATE_FILE, {"processed_rars": processed_rars})
570
+ log_message(f"βœ… Successfully processed: {filename}")
571
+ processing_status["processed_files"] += 1
572
+ else:
573
+ log_message(f"❌ Failed to process: {filename}")
574
+ processing_status["failed_files"] += 1
575
+
576
+ # Clean up downloaded file
577
+ try:
578
+ os.remove(dest_path)
579
+ log_message(f"πŸ—‘οΈ Cleaned up download: {filename}")
580
+ except:
581
+ pass
582
+ else:
583
+ log_message(f"❌ Failed to download: {filename}")
584
+ processing_status["failed_files"] += 1
585
+
586
+ # Update download state for next run
587
+ next_index += 1
588
+ save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
589
+
590
+ # Status update
591
+ log_message(f"πŸ“Š Progress: {next_index}/{len(rar_files)} files processed")
592
+ log_message(f"πŸ“Š Extracted: {processing_status['extracted_courses']} courses")
593
+ log_message(f"πŸ“Š Videos Processed: {processing_status['extracted_videos']} videos")
594
+ log_message(f"πŸ“Š Frames Extracted: {processing_status['extracted_frames_count']} frames")
595
+ log_message(f"πŸ“Š Frames Analyzed: {processing_status['analyzed_frames_count']} frames")
596
+ log_message(f"πŸ“Š Failed: {processing_status['failed_files']} files")
597
+
598
+ if next_index < len(rar_files):
599
+ log_message(f"πŸ”„ Run the script again to process the next file: {os.path.basename(rar_files[next_index])}")
600
+ else:
601
+ log_message("πŸŽ‰ All files have been processed!")
602
+ else:
603
+ log_message("βœ… All files have been processed!")
604
+
605
+ log_message(f"πŸŽ‰ Processing complete!")
606
+ log_message(f"πŸ“Š Final stats: {processing_status['extracted_courses']} courses extracted, {processing_status['extracted_videos']} videos processed, {processing_status['extracted_frames_count']} frames extracted, {processing_status['analyzed_frames_count']} frames analyzed")
607
+
608
+ except KeyboardInterrupt:
609
+ log_message("⏹️ Processing interrupted by user")
610
  except Exception as e:
611
+ log_message(f"❌ Fatal error: {str(e)}")
612
  finally:
613
  processing_status["is_running"] = False
614
+ cleanup_temp_files()
615
+
616
+ # FastAPI Endpoints
617
+ @app.post("/analyze-video")
618
+ async def analyze_video_endpoint(
619
+ file: UploadFile = File(...),
620
+ fps: float = Form(DEFAULT_FPS),
621
+ prompt: Optional[str] = Form(None)
622
+ ):
623
+ """Analyze a single video file and return frame-by-frame analysis."""
624
+ if not file.filename.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
625
+ return JSONResponse(status_code=400, content={
626
+ "error": "File type not allowed",
627
+ "allowed_types": [".mp4", ".avi", ".mov", ".mkv"]
628
+ })
629
+
630
+ with tempfile.TemporaryDirectory() as temp_dir:
631
+ temp_dir_path = Path(temp_dir)
632
+ file_path = temp_dir_path / file.filename
633
+
634
+ with open(file_path, "wb") as buffer:
635
+ shutil.copyfileobj(file.file, buffer)
636
+
637
+ frames_dir = temp_dir_path / "frames"
638
+ frame_count = extract_frames(file_path, frames_dir, fps)
639
+
640
+ frame_analyses = []
641
+ for frame_file in sorted(frames_dir.glob("*.png")):
642
+ analysis = analyze_single_frame(str(frame_file), prompt or "")
643
+ frame_analyses.append(analysis)
644
+
645
+ summary = summarize_activities(frame_analyses)
646
+
647
+ return JSONResponse(content={
648
+ "video_filename": file.filename,
649
+ "frame_count": frame_count,
650
+ "fps": fps,
651
+ "frame_analyses": frame_analyses,
652
+ "summary": summary
653
+ })
654
+
655
+ @app.post("/analyze-archive")
656
+ async def analyze_archive_endpoint(
657
+ file: UploadFile = File(...),
658
+ fps: float = Form(DEFAULT_FPS),
659
+ prompt: Optional[str] = Form(None)
660
+ ):
661
+ """Analyze videos from RAR/ZIP archive and return frame-by-frame analysis."""
662
+ if not file.filename.lower().endswith((".rar", ".zip")):
663
+ return JSONResponse(status_code=400, content={
664
+ "error": "File type not allowed",
665
+ "allowed_types": [".rar", ".zip"]
666
+ })
667
+
668
+ with tempfile.TemporaryDirectory() as temp_dir:
669
+ temp_dir_path = Path(temp_dir)
670
+ file_path = temp_dir_path / file.filename
671
+
672
+ with open(file_path, "wb") as buffer:
673
+ shutil.copyfileobj(file.file, buffer)
674
+
675
+ extract_dir = temp_dir_path / "extracted"
676
+ video_files = []
677
+
678
+ if file.filename.lower().endswith(".rar"):
679
+ with rarfile.RarFile(file_path) as rf:
680
+ rf.extractall(extract_dir)
681
+ else:
682
+ with zipfile.ZipFile(file_path) as zf:
683
+ zf.extractall(extract_dir)
684
+
685
+ # Find video files in extracted content
686
+ for root, dirs, files in os.walk(extract_dir):
687
+ for file in files:
688
+ if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
689
+ video_files.append(Path(root) / file)
690
+
691
+ if not video_files:
692
+ return JSONResponse(status_code=400, content={
693
+ "error": "No video files found in archive"
694
+ })
695
+
696
+ results = []
697
+ for video_path in video_files:
698
+ video_name = video_path.name
699
+ frames_dir = temp_dir_path / f"frames_{video_name}"
700
+ frame_count = extract_frames(video_path, frames_dir, fps)
701
+
702
+ frame_analyses = []
703
+ for frame_file in sorted(frames_dir.glob("*.png")):
704
+ analysis = analyze_single_frame(str(frame_file), prompt or "")
705
+ frame_analyses.append(analysis)
706
+
707
+ summary = summarize_activities(frame_analyses)
708
+
709
+ results.append({
710
+ "video_filename": video_name,
711
+ "frame_count": frame_count,
712
+ "fps": fps,
713
+ "frame_analyses": frame_analyses,
714
+ "summary": summary
715
+ })
716
+
717
+ return JSONResponse(content={
718
+ "archive_filename": file.filename,
719
+ "videos_processed": len(video_files),
720
+ "results": results
721
+ })
722
+
723
+ @app.get("/health")
724
+ async def health_check():
725
+ """Health check endpoint."""
726
+ return JSONResponse(content={
727
+ "status": "healthy",
728
+ "model": "GIT",
729
+ "note": "Now using GIT model."
730
+ })
731
+
732
+ @app.get("/status")
733
+ async def get_processing_status():
734
+ """Get current processing status."""
735
+ return JSONResponse(content=processing_status)
736
+
737
+ # Expose necessary functions and variables
738
+ __all__ = [
739
+ "main_processing_loop",
740
+ "processing_status",
741
+ "ANALYSIS_OUTPUT_FOLDER",
742
+ "log_message",
743
+ "analyze_single_frame",
744
+ "extract_frames",
745
+ "DEFAULT_FPS",
746
+ "ensure_dir"
747
+ ]
748