Update cursor_tracker.py
Browse files- cursor_tracker.py +266 -136
cursor_tracker.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
import os
|
| 2 |
import json
|
| 3 |
import requests
|
| 4 |
import subprocess
|
|
@@ -14,6 +13,7 @@ import numpy as np
|
|
| 14 |
from pathlib import Path
|
| 15 |
import smtplib
|
| 16 |
from email.message import EmailMessage
|
|
|
|
| 17 |
|
| 18 |
# ==== CONFIGURATION ====
|
| 19 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
|
@@ -22,15 +22,15 @@ SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1")
|
|
| 22 |
# Path Configuration
|
| 23 |
DOWNLOAD_FOLDER = "downloads"
|
| 24 |
EXTRACT_FOLDER = "extracted"
|
| 25 |
-
FRAMES_OUTPUT_FOLDER = "extracted_frames"
|
| 26 |
-
CURSOR_TRACKING_OUTPUT_FOLDER = "cursor_tracking_results"
|
| 27 |
CURSOR_TEMPLATES_DIR = "cursors"
|
| 28 |
|
| 29 |
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
|
| 30 |
os.makedirs(EXTRACT_FOLDER, exist_ok=True)
|
| 31 |
os.makedirs(FRAMES_OUTPUT_FOLDER, exist_ok=True)
|
| 32 |
os.makedirs(CURSOR_TRACKING_OUTPUT_FOLDER, exist_ok=True)
|
| 33 |
-
os.makedirs(CURSOR_TEMPLATES_DIR, exist_ok=True)
|
| 34 |
|
| 35 |
# State Files
|
| 36 |
DOWNLOAD_STATE_FILE = "download_progress.json"
|
|
@@ -44,7 +44,7 @@ MAX_RETRIES = 3
|
|
| 44 |
MIN_FREE_SPACE_GB = 2 # Minimum free space in GB before processing
|
| 45 |
|
| 46 |
# Frame Extraction Parameters
|
| 47 |
-
DEFAULT_FPS = 3
|
| 48 |
|
| 49 |
# Cursor Tracking Parameters
|
| 50 |
CURSOR_THRESHOLD = 0.8
|
|
@@ -67,6 +67,7 @@ processing_status = {
|
|
| 67 |
"logs": []
|
| 68 |
}
|
| 69 |
|
|
|
|
| 70 |
def log_message(message: str):
|
| 71 |
"""Log messages with timestamp"""
|
| 72 |
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
@@ -77,11 +78,13 @@ def log_message(message: str):
|
|
| 77 |
if len(processing_status["logs"]) > 100:
|
| 78 |
processing_status["logs"] = processing_status["logs"][-100:]
|
| 79 |
|
|
|
|
| 80 |
def log_failed_file(filename: str, error: str):
|
| 81 |
"""Log failed files to persistent file"""
|
| 82 |
with open(FAILED_FILES_LOG, "a") as f:
|
| 83 |
f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {filename}: {error}\n")
|
| 84 |
|
|
|
|
| 85 |
def get_disk_usage(path: str) -> Dict[str, float]:
|
| 86 |
"""Get disk usage statistics in GB"""
|
| 87 |
statvfs = os.statvfs(path)
|
|
@@ -90,6 +93,7 @@ def get_disk_usage(path: str) -> Dict[str, float]:
|
|
| 90 |
used = total - free
|
| 91 |
return {"total": total, "free": free, "used": used}
|
| 92 |
|
|
|
|
| 93 |
def check_disk_space(path: str = ".") -> bool:
|
| 94 |
"""Check if there's enough disk space"""
|
| 95 |
disk_info = get_disk_usage(path)
|
|
@@ -98,10 +102,11 @@ def check_disk_space(path: str = ".") -> bool:
|
|
| 98 |
return False
|
| 99 |
return True
|
| 100 |
|
|
|
|
| 101 |
def cleanup_temp_files():
|
| 102 |
"""Clean up temporary files to free space"""
|
| 103 |
log_message("π§Ή Cleaning up temporary files...")
|
| 104 |
-
|
| 105 |
# Clean old downloads (keep only current processing file)
|
| 106 |
current_file = processing_status.get("current_file")
|
| 107 |
for file in os.listdir(DOWNLOAD_FOLDER):
|
|
@@ -109,9 +114,10 @@ def cleanup_temp_files():
|
|
| 109 |
try:
|
| 110 |
os.remove(os.path.join(DOWNLOAD_FOLDER, file))
|
| 111 |
log_message(f"ποΈ Removed old download: {file}")
|
| 112 |
-
except:
|
| 113 |
pass
|
| 114 |
|
|
|
|
| 115 |
def load_json_state(file_path: str, default_value):
|
| 116 |
"""Load state from JSON file"""
|
| 117 |
if os.path.exists(file_path):
|
|
@@ -122,11 +128,13 @@ def load_json_state(file_path: str, default_value):
|
|
| 122 |
log_message(f"β οΈ Corrupted state file: {file_path}")
|
| 123 |
return default_value
|
| 124 |
|
|
|
|
| 125 |
def save_json_state(file_path: str, data):
|
| 126 |
"""Save state to JSON file"""
|
| 127 |
with open(file_path, "w") as f:
|
| 128 |
json.dump(data, f, indent=2)
|
| 129 |
|
|
|
|
| 130 |
def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
|
| 131 |
"""Download file with retry logic and disk space checking"""
|
| 132 |
if not check_disk_space():
|
|
@@ -134,13 +142,13 @@ def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
|
|
| 134 |
if not check_disk_space():
|
| 135 |
log_message("β Insufficient disk space even after cleanup")
|
| 136 |
return False
|
| 137 |
-
|
| 138 |
-
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
| 139 |
for attempt in range(max_retries):
|
| 140 |
try:
|
| 141 |
with requests.get(url, headers=headers, stream=True) as r:
|
| 142 |
r.raise_for_status()
|
| 143 |
-
|
| 144 |
# Check content length if available
|
| 145 |
content_length = r.headers.get("content-length")
|
| 146 |
if content_length:
|
|
@@ -149,10 +157,11 @@ def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
|
|
| 149 |
if size_gb > disk_info["free"] - 0.5: # Leave 0.5GB buffer
|
| 150 |
log_message(f'β File too large: {size_gb:.2f}GB, only {disk_info["free"]:.2f}GB free')
|
| 151 |
return False
|
| 152 |
-
|
| 153 |
with open(dest_path, "wb") as f:
|
| 154 |
for chunk in r.iter_content(chunk_size=8192):
|
| 155 |
-
|
|
|
|
| 156 |
return True
|
| 157 |
except Exception as e:
|
| 158 |
if attempt < max_retries - 1:
|
|
@@ -162,33 +171,36 @@ def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
|
|
| 162 |
return False
|
| 163 |
return False
|
| 164 |
|
|
|
|
| 165 |
def is_multipart_rar(filename: str) -> bool:
|
| 166 |
"""Check if this is a multi-part RAR file"""
|
| 167 |
return ".part" in filename.lower() and filename.lower().endswith(".rar")
|
| 168 |
|
|
|
|
| 169 |
def get_rar_part_base(filename: str) -> str:
|
| 170 |
"""Get the base name for multi-part RAR files"""
|
| 171 |
if ".part" in filename.lower():
|
| 172 |
return filename.split(".part")[0]
|
| 173 |
return filename.replace(".rar", "")
|
| 174 |
|
|
|
|
| 175 |
def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) -> bool:
|
| 176 |
"""Extract RAR with retry and recovery, handling multi-part archives"""
|
| 177 |
filename = os.path.basename(rar_path)
|
| 178 |
-
|
| 179 |
# For multi-part RARs, we need the first part
|
| 180 |
if is_multipart_rar(filename):
|
| 181 |
base_name = get_rar_part_base(filename)
|
| 182 |
first_part = f"{base_name}.part01.rar"
|
| 183 |
first_part_path = os.path.join(os.path.dirname(rar_path), first_part)
|
| 184 |
-
|
| 185 |
if not os.path.exists(first_part_path):
|
| 186 |
log_message(f"β οΈ Multi-part RAR detected but first part not found: {first_part}")
|
| 187 |
return False
|
| 188 |
-
|
| 189 |
rar_path = first_part_path
|
| 190 |
log_message(f"π¦ Processing multi-part RAR starting with: {first_part}")
|
| 191 |
-
|
| 192 |
for attempt in range(max_retries):
|
| 193 |
try:
|
| 194 |
# Test RAR first
|
|
@@ -199,12 +211,12 @@ def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) ->
|
|
| 199 |
if attempt == max_retries - 1:
|
| 200 |
return False
|
| 201 |
continue
|
| 202 |
-
|
| 203 |
# Extract RAR
|
| 204 |
cmd = ["unrar", "x", "-o+", rar_path, output_dir]
|
| 205 |
if attempt > 0: # Try recovery on subsequent attempts
|
| 206 |
cmd.insert(2, "-kb")
|
| 207 |
-
|
| 208 |
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 209 |
if result.returncode == 0:
|
| 210 |
log_message(f"β
Successfully extracted: {os.path.basename(rar_path)}")
|
|
@@ -212,7 +224,7 @@ def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) ->
|
|
| 212 |
else:
|
| 213 |
error_msg = result.stderr or result.stdout
|
| 214 |
log_message(f"β οΈ Extraction attempt {attempt + 1} failed: {error_msg}")
|
| 215 |
-
|
| 216 |
if "checksum error" in error_msg.lower() or "CRC failed" in error_msg:
|
| 217 |
log_message(f"β οΈ Data corruption detected, attempt {attempt + 1}")
|
| 218 |
elif result.returncode == 10:
|
|
@@ -220,19 +232,21 @@ def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) ->
|
|
| 220 |
return False
|
| 221 |
elif result.returncode == 1:
|
| 222 |
log_message(f"β οΈ Non-fatal error (exit code 1)")
|
| 223 |
-
|
| 224 |
except Exception as e:
|
| 225 |
log_message(f"β Extraction exception: {str(e)}")
|
| 226 |
if attempt == max_retries - 1:
|
| 227 |
return False
|
| 228 |
time.sleep(1)
|
| 229 |
-
|
| 230 |
return False
|
| 231 |
-
|
|
|
|
| 232 |
# --- Frame Extraction Utilities ---
|
| 233 |
def ensure_dir(path):
|
| 234 |
os.makedirs(path, exist_ok=True)
|
| 235 |
|
|
|
|
| 236 |
def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
|
| 237 |
"""Extract frames from video at the specified frames per second (fps)."""
|
| 238 |
log_message(f"[INFO] Extracting frames from {video_path} to {output_dir} at {fps} fps...")
|
|
@@ -242,12 +256,12 @@ def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
|
|
| 242 |
log_message(f"[ERROR] Failed to open video file: {video_path}")
|
| 243 |
return 0
|
| 244 |
video_fps = cap.get(cv2.CAP_PROP_FPS)
|
| 245 |
-
# log_message(f"[DEBUG] Video FPS: {video_fps}")
|
| 246 |
if not video_fps or video_fps <= 0:
|
| 247 |
video_fps = 30 # fallback if FPS is not available
|
| 248 |
log_message(f"[WARN] Using fallback FPS: {video_fps}")
|
| 249 |
frame_interval = int(round(video_fps / fps))
|
| 250 |
-
|
|
|
|
| 251 |
frame_idx = 0
|
| 252 |
saved_idx = 1
|
| 253 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
@@ -255,19 +269,18 @@ def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
|
|
| 255 |
while cap.isOpened():
|
| 256 |
ret, frame = cap.read()
|
| 257 |
if not ret:
|
| 258 |
-
# log_message(f"[DEBUG] No more frames to read at frame_idx {frame_idx}.")
|
| 259 |
break
|
| 260 |
if frame_idx % frame_interval == 0:
|
| 261 |
frame_name = f"{saved_idx:04d}.png"
|
| 262 |
cv2.imwrite(str(Path(output_dir) / frame_name), frame)
|
| 263 |
-
# log_message(f"[DEBUG] Saved frame {frame_idx} as {frame_name}")
|
| 264 |
saved_idx += 1
|
| 265 |
frame_idx += 1
|
| 266 |
cap.release()
|
| 267 |
log_message(f"Extracted {saved_idx-1} frames from {video_path} to {output_dir}")
|
| 268 |
return saved_idx - 1
|
| 269 |
|
| 270 |
-
|
|
|
|
| 271 |
def to_rgb(img):
|
| 272 |
if img is None:
|
| 273 |
return None
|
|
@@ -277,12 +290,14 @@ def to_rgb(img):
|
|
| 277 |
return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
|
| 278 |
return img
|
| 279 |
|
|
|
|
| 280 |
def get_mask_from_alpha(template_img):
|
| 281 |
if template_img is not None and len(template_img.shape) == 3 and template_img.shape[2] == 4:
|
| 282 |
# Use alpha channel as mask (nonzero alpha = 255)
|
| 283 |
return (template_img[:, :, 3] > 0).astype(np.uint8) * 255
|
| 284 |
return None
|
| 285 |
|
|
|
|
| 286 |
def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=CURSOR_THRESHOLD):
|
| 287 |
"""Detect cursor position in a frame using multiple templates. Returns best match above threshold."""
|
| 288 |
best_pos = None
|
|
@@ -293,12 +308,11 @@ def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=CURSOR_THRES
|
|
| 293 |
template_rgb = to_rgb(cursor_template)
|
| 294 |
mask = get_mask_from_alpha(cursor_template)
|
| 295 |
if template_rgb is None or frame_rgb is None or template_rgb.shape[2] != frame_rgb.shape[2]:
|
| 296 |
-
|
| 297 |
continue
|
| 298 |
try:
|
| 299 |
result = cv2.matchTemplate(frame_rgb, template_rgb, cv2.TM_CCOEFF_NORMED, mask=mask)
|
| 300 |
-
except Exception
|
| 301 |
-
log_message(f"[WARN] matchTemplate failed for {template_name}: {e}")
|
| 302 |
continue
|
| 303 |
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
| 304 |
if max_val > best_conf:
|
|
@@ -313,91 +327,173 @@ def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=CURSOR_THRES
|
|
| 313 |
return best_pos, best_conf, best_template_name
|
| 314 |
return None, best_conf, None
|
| 315 |
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
try:
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
except Exception as e:
|
| 332 |
-
log_message(f"[ERROR]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
|
| 334 |
-
def track_cursor(frames_dir, cursor_templates_dir, output_json_path, threshold=CURSOR_THRESHOLD, start_frame=1, email_results=False):
|
| 335 |
-
"""Detect cursor in each frame using multiple templates, print status, and write positions to a JSON file."""
|
| 336 |
-
log_message(f"[INFO] Tracking cursors in {frames_dir}...")
|
| 337 |
frames_dir = Path(frames_dir).resolve()
|
| 338 |
output_json_path = Path(output_json_path).resolve()
|
| 339 |
cursor_templates_dir = Path(cursor_templates_dir).resolve()
|
| 340 |
-
# log_message(f"[DEBUG] frames_dir: {frames_dir}")
|
| 341 |
-
# log_message(f"[DEBUG] cursor_templates_dir: {cursor_templates_dir}")
|
| 342 |
-
# log_message(f"[DEBUG] output_json_path: {output_json_path}")
|
| 343 |
-
ensure_dir(frames_dir)
|
| 344 |
ensure_dir(output_json_path.parent)
|
| 345 |
-
|
|
|
|
| 346 |
cursor_templates = {}
|
| 347 |
for template_file in cursor_templates_dir.glob("*.png"):
|
| 348 |
-
|
| 349 |
-
if
|
| 350 |
-
cursor_templates[template_file.name] =
|
| 351 |
-
else:
|
| 352 |
-
log_message(f"[WARN] Could not load template: {template_file}")
|
| 353 |
if not cursor_templates:
|
| 354 |
-
log_message(f"[ERROR] No cursor templates found in
|
| 355 |
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
results = []
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
frame_num = int(frame_file.stem)
|
| 360 |
-
if frame_num < start_frame:
|
| 361 |
-
continue
|
| 362 |
-
frame = cv2.imread(str(frame_file), cv2.IMREAD_UNCHANGED)
|
| 363 |
-
if frame is None:
|
| 364 |
-
log_message(f"[WARN] Could not load frame: {frame_file}")
|
| 365 |
-
continue
|
| 366 |
-
pos, conf, template_name = detect_cursor_in_frame_multi(frame, cursor_templates, threshold)
|
| 367 |
-
if pos is not None:
|
| 368 |
-
# log_message(f"{frame_file.name}: Cursor at {pos} (template: {template_name})")
|
| 369 |
-
results.append({
|
| 370 |
-
"frame": frame_file.name,
|
| 371 |
-
"cursor_active": True,
|
| 372 |
-
"x": pos[0],
|
| 373 |
-
"y": pos[1],
|
| 374 |
-
"confidence": conf,
|
| 375 |
-
"template": template_name
|
| 376 |
-
})
|
| 377 |
-
tracked_count += 1
|
| 378 |
-
else:
|
| 379 |
-
# log_message(f"{frame_file.name}: Cursor disabled")
|
| 380 |
-
results.append({
|
| 381 |
-
"frame": frame_file.name,
|
| 382 |
-
"cursor_active": False,
|
| 383 |
-
"x": None,
|
| 384 |
-
"y": None,
|
| 385 |
-
"confidence": conf,
|
| 386 |
-
"template": None
|
| 387 |
-
})
|
| 388 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
with open(output_json_path, "w") as f:
|
| 390 |
json.dump(results, f, indent=2)
|
| 391 |
log_message(f"[SUCCESS] Cursor tracking results saved to {output_json_path}")
|
|
|
|
|
|
|
|
|
|
| 392 |
if email_results:
|
| 393 |
-
log_message("[INFO]
|
| 394 |
to_email = os.environ.get("TO_EMAIL")
|
| 395 |
from_email = os.environ.get("FROM_EMAIL")
|
| 396 |
app_password = os.environ.get("GMAIL_APP_PASSWORD")
|
| 397 |
-
if
|
| 398 |
-
log_message("[ERROR] Email environment variables not set. Please set TO_EMAIL, FROM_EMAIL, and GMAIL_APP_PASSWORD.")
|
| 399 |
-
# return tracked_count # Don't return here, just log error
|
| 400 |
-
else:
|
| 401 |
send_email_with_attachment(
|
| 402 |
subject="Cursor Tracking Results",
|
| 403 |
body="See attached JSON results.",
|
|
@@ -406,36 +502,71 @@ def track_cursor(frames_dir, cursor_templates_dir, output_json_path, threshold=C
|
|
| 406 |
app_password=app_password,
|
| 407 |
attachment_path=output_json_path
|
| 408 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
except Exception as e:
|
| 410 |
-
log_message(f"[ERROR] Failed to
|
| 411 |
-
|
| 412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
|
| 414 |
def process_rar_file(rar_path: str) -> bool:
|
| 415 |
"""Process a single RAR file - extract, then process videos for frames and cursor tracking"""
|
| 416 |
filename = os.path.basename(rar_path)
|
| 417 |
processing_status["current_file"] = filename
|
| 418 |
-
|
| 419 |
# Handle multi-part RAR naming
|
| 420 |
if is_multipart_rar(filename):
|
| 421 |
course_name = get_rar_part_base(filename)
|
| 422 |
else:
|
| 423 |
course_name = filename.replace(".rar", "")
|
| 424 |
-
|
| 425 |
extract_dir = os.path.join(EXTRACT_FOLDER, course_name)
|
| 426 |
-
|
| 427 |
try:
|
| 428 |
log_message(f"π Processing: {filename}")
|
| 429 |
-
|
| 430 |
# Clean up any existing directory
|
| 431 |
if os.path.exists(extract_dir):
|
| 432 |
shutil.rmtree(extract_dir, ignore_errors=True)
|
| 433 |
-
|
| 434 |
# Extract RAR
|
| 435 |
os.makedirs(extract_dir, exist_ok=True)
|
| 436 |
if not extract_with_retry(rar_path, extract_dir):
|
| 437 |
raise Exception("RAR extraction failed")
|
| 438 |
-
|
| 439 |
# Count extracted files
|
| 440 |
file_count = 0
|
| 441 |
video_files_found = []
|
|
@@ -444,17 +575,18 @@ def process_rar_file(rar_path: str) -> bool:
|
|
| 444 |
file_count += 1
|
| 445 |
if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
|
| 446 |
video_files_found.append(os.path.join(root, file))
|
| 447 |
-
|
| 448 |
processing_status["extracted_courses"] += 1
|
| 449 |
-
log_message(f"β
Successfully extracted
|
| 450 |
|
| 451 |
# Process video files for frame extraction and cursor tracking
|
| 452 |
for video_path in video_files_found:
|
| 453 |
video_filename = Path(video_path).name
|
| 454 |
# Create a unique output directory for frames for each video
|
| 455 |
-
|
|
|
|
| 456 |
ensure_dir(frames_output_dir)
|
| 457 |
-
|
| 458 |
extracted_frames_count = extract_frames(video_path, frames_output_dir, fps=DEFAULT_FPS)
|
| 459 |
processing_status["extracted_frames_count"] += extracted_frames_count
|
| 460 |
if extracted_frames_count > 0:
|
|
@@ -462,60 +594,61 @@ def process_rar_file(rar_path: str) -> bool:
|
|
| 462 |
log_message(f"[INFO] Extracted {extracted_frames_count} frames from {video_filename}")
|
| 463 |
|
| 464 |
# Perform cursor tracking on the extracted frames
|
| 465 |
-
cursor_output_json = os.path.join(CURSOR_TRACKING_OUTPUT_FOLDER, f"{course_name}_{
|
| 466 |
-
tracked_cursors = track_cursor(frames_output_dir, CURSOR_TEMPLATES_DIR, cursor_output_json, threshold=CURSOR_THRESHOLD)
|
| 467 |
processing_status["tracked_cursors_count"] += tracked_cursors
|
| 468 |
log_message(f"[INFO] Tracked {tracked_cursors} cursors in frames from {video_filename}")
|
| 469 |
else:
|
| 470 |
log_message(f"[WARN] No frames extracted from {video_filename}")
|
| 471 |
|
| 472 |
return True
|
| 473 |
-
|
| 474 |
except Exception as e:
|
| 475 |
error_msg = str(e)
|
| 476 |
log_message(f"β Processing failed: {error_msg}")
|
| 477 |
log_failed_file(filename, error_msg)
|
| 478 |
return False
|
| 479 |
-
|
| 480 |
finally:
|
| 481 |
processing_status["current_file"] = None
|
| 482 |
|
|
|
|
| 483 |
def main_processing_loop(start_index: int = 0):
|
| 484 |
"""Main processing workflow - extraction, frame extraction, and cursor tracking"""
|
| 485 |
processing_status["is_running"] = True
|
| 486 |
-
|
| 487 |
try:
|
| 488 |
# Load state
|
| 489 |
processed_rars = load_json_state(PROCESS_STATE_FILE, {"processed_rars": []})["processed_rars"]
|
| 490 |
-
download_state = load_json_state(DOWNLOAD_STATE_FILE, {"next_download_index":
|
| 491 |
-
|
| 492 |
# Use start_index if provided, otherwise use the saved state
|
| 493 |
next_index = start_index if start_index > 0 else download_state["next_download_index"]
|
| 494 |
-
|
| 495 |
log_message(f"π Starting from index {next_index}")
|
| 496 |
log_message(f"π Previously processed: {len(processed_rars)} files")
|
| 497 |
-
|
| 498 |
# Get file list
|
| 499 |
try:
|
| 500 |
files = list(hf_api.list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset"))
|
| 501 |
rar_files = sorted([f for f in files if f.endswith(".rar")])
|
| 502 |
-
|
| 503 |
processing_status["total_files"] = len(rar_files)
|
| 504 |
log_message(f"π Found {len(rar_files)} RAR files in repository")
|
| 505 |
-
|
| 506 |
if next_index >= len(rar_files):
|
| 507 |
log_message("β
All files have been processed!")
|
| 508 |
return
|
| 509 |
-
|
| 510 |
except Exception as e:
|
| 511 |
log_message(f"β Failed to get file list: {str(e)}")
|
| 512 |
return
|
| 513 |
-
|
| 514 |
-
# Process
|
| 515 |
if next_index < len(rar_files):
|
| 516 |
rar_file = rar_files[next_index]
|
| 517 |
filename = os.path.basename(rar_file)
|
| 518 |
-
|
| 519 |
if filename in processed_rars:
|
| 520 |
log_message(f"βοΈ Skipping already processed: {filename}")
|
| 521 |
processing_status["processed_files"] += 1
|
|
@@ -524,10 +657,10 @@ def main_processing_loop(start_index: int = 0):
|
|
| 524 |
save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
|
| 525 |
log_message(f"π Moving to next file. Progress: {next_index}/{len(rar_files)}")
|
| 526 |
return
|
| 527 |
-
|
| 528 |
log_message(f"π₯ Downloading: {filename}")
|
| 529 |
dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
|
| 530 |
-
|
| 531 |
# Download file
|
| 532 |
download_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{rar_file}"
|
| 533 |
if download_with_retry(download_url, dest_path):
|
|
@@ -540,21 +673,21 @@ def main_processing_loop(start_index: int = 0):
|
|
| 540 |
else:
|
| 541 |
log_message(f"β Failed to process: {filename}")
|
| 542 |
processing_status["failed_files"] += 1
|
| 543 |
-
|
| 544 |
# Clean up downloaded file
|
| 545 |
try:
|
| 546 |
os.remove(dest_path)
|
| 547 |
log_message(f"ποΈ Cleaned up download: {filename}")
|
| 548 |
-
except:
|
| 549 |
pass
|
| 550 |
else:
|
| 551 |
log_message(f"β Failed to download: {filename}")
|
| 552 |
processing_status["failed_files"] += 1
|
| 553 |
-
|
| 554 |
# Update download state for next run
|
| 555 |
next_index += 1
|
| 556 |
save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
|
| 557 |
-
|
| 558 |
# Status update
|
| 559 |
log_message(f"π Progress: {next_index}/{len(rar_files)} files processed")
|
| 560 |
log_message(f'π Extracted: {processing_status["extracted_courses"]} courses')
|
|
@@ -562,17 +695,17 @@ def main_processing_loop(start_index: int = 0):
|
|
| 562 |
log_message(f'π Frames Extracted: {processing_status["extracted_frames_count"]}')
|
| 563 |
log_message(f'π Cursors Tracked: {processing_status["tracked_cursors_count"]}')
|
| 564 |
log_message(f'π Failed: {processing_status["failed_files"]} files')
|
| 565 |
-
|
| 566 |
if next_index < len(rar_files):
|
| 567 |
log_message(f"π Run the script again to process the next file: {os.path.basename(rar_files[next_index])}")
|
| 568 |
else:
|
| 569 |
log_message("π All files have been processed!")
|
| 570 |
else:
|
| 571 |
log_message("β
All files have been processed!")
|
| 572 |
-
|
| 573 |
log_message("π Processing complete!")
|
| 574 |
log_message(f'π Final stats: {processing_status["extracted_courses"]} courses extracted, {processing_status["extracted_videos"]} videos processed, {processing_status["extracted_frames_count"]} frames extracted, {processing_status["tracked_cursors_count"]} cursors tracked')
|
| 575 |
-
|
| 576 |
except KeyboardInterrupt:
|
| 577 |
log_message("βΉοΈ Processing interrupted by user")
|
| 578 |
except Exception as e:
|
|
@@ -581,6 +714,7 @@ def main_processing_loop(start_index: int = 0):
|
|
| 581 |
processing_status["is_running"] = False
|
| 582 |
cleanup_temp_files()
|
| 583 |
|
|
|
|
| 584 |
# Expose necessary functions and variables for download_api.py
|
| 585 |
__all__ = [
|
| 586 |
"main_processing_loop",
|
|
@@ -595,7 +729,3 @@ __all__ = [
|
|
| 595 |
"CURSOR_THRESHOLD",
|
| 596 |
"ensure_dir"
|
| 597 |
]
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
import requests
|
| 3 |
import subprocess
|
|
|
|
| 13 |
from pathlib import Path
|
| 14 |
import smtplib
|
| 15 |
from email.message import EmailMessage
|
| 16 |
+
import multiprocessing
|
| 17 |
|
| 18 |
# ==== CONFIGURATION ====
|
| 19 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
|
|
|
| 22 |
# Path Configuration
|
| 23 |
DOWNLOAD_FOLDER = "downloads"
|
| 24 |
EXTRACT_FOLDER = "extracted"
|
| 25 |
+
FRAMES_OUTPUT_FOLDER = "extracted_frames" # New folder for extracted frames
|
| 26 |
+
CURSOR_TRACKING_OUTPUT_FOLDER = "cursor_tracking_results" # New folder for cursor tracking results
|
| 27 |
CURSOR_TEMPLATES_DIR = "cursors"
|
| 28 |
|
| 29 |
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
|
| 30 |
os.makedirs(EXTRACT_FOLDER, exist_ok=True)
|
| 31 |
os.makedirs(FRAMES_OUTPUT_FOLDER, exist_ok=True)
|
| 32 |
os.makedirs(CURSOR_TRACKING_OUTPUT_FOLDER, exist_ok=True)
|
| 33 |
+
os.makedirs(CURSOR_TEMPLATES_DIR, exist_ok=True) # Ensure cursor templates directory exists
|
| 34 |
|
| 35 |
# State Files
|
| 36 |
DOWNLOAD_STATE_FILE = "download_progress.json"
|
|
|
|
| 44 |
MIN_FREE_SPACE_GB = 2 # Minimum free space in GB before processing
|
| 45 |
|
| 46 |
# Frame Extraction Parameters
|
| 47 |
+
DEFAULT_FPS = 3 # Default frames per second for extraction
|
| 48 |
|
| 49 |
# Cursor Tracking Parameters
|
| 50 |
CURSOR_THRESHOLD = 0.8
|
|
|
|
| 67 |
"logs": []
|
| 68 |
}
|
| 69 |
|
| 70 |
+
|
| 71 |
def log_message(message: str):
|
| 72 |
"""Log messages with timestamp"""
|
| 73 |
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
| 78 |
if len(processing_status["logs"]) > 100:
|
| 79 |
processing_status["logs"] = processing_status["logs"][-100:]
|
| 80 |
|
| 81 |
+
|
| 82 |
def log_failed_file(filename: str, error: str):
|
| 83 |
"""Log failed files to persistent file"""
|
| 84 |
with open(FAILED_FILES_LOG, "a") as f:
|
| 85 |
f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {filename}: {error}\n")
|
| 86 |
|
| 87 |
+
|
| 88 |
def get_disk_usage(path: str) -> Dict[str, float]:
|
| 89 |
"""Get disk usage statistics in GB"""
|
| 90 |
statvfs = os.statvfs(path)
|
|
|
|
| 93 |
used = total - free
|
| 94 |
return {"total": total, "free": free, "used": used}
|
| 95 |
|
| 96 |
+
|
| 97 |
def check_disk_space(path: str = ".") -> bool:
|
| 98 |
"""Check if there's enough disk space"""
|
| 99 |
disk_info = get_disk_usage(path)
|
|
|
|
| 102 |
return False
|
| 103 |
return True
|
| 104 |
|
| 105 |
+
|
| 106 |
def cleanup_temp_files():
|
| 107 |
"""Clean up temporary files to free space"""
|
| 108 |
log_message("π§Ή Cleaning up temporary files...")
|
| 109 |
+
|
| 110 |
# Clean old downloads (keep only current processing file)
|
| 111 |
current_file = processing_status.get("current_file")
|
| 112 |
for file in os.listdir(DOWNLOAD_FOLDER):
|
|
|
|
| 114 |
try:
|
| 115 |
os.remove(os.path.join(DOWNLOAD_FOLDER, file))
|
| 116 |
log_message(f"ποΈ Removed old download: {file}")
|
| 117 |
+
except Exception:
|
| 118 |
pass
|
| 119 |
|
| 120 |
+
|
| 121 |
def load_json_state(file_path: str, default_value):
|
| 122 |
"""Load state from JSON file"""
|
| 123 |
if os.path.exists(file_path):
|
|
|
|
| 128 |
log_message(f"β οΈ Corrupted state file: {file_path}")
|
| 129 |
return default_value
|
| 130 |
|
| 131 |
+
|
| 132 |
def save_json_state(file_path: str, data):
|
| 133 |
"""Save state to JSON file"""
|
| 134 |
with open(file_path, "w") as f:
|
| 135 |
json.dump(data, f, indent=2)
|
| 136 |
|
| 137 |
+
|
| 138 |
def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
|
| 139 |
"""Download file with retry logic and disk space checking"""
|
| 140 |
if not check_disk_space():
|
|
|
|
| 142 |
if not check_disk_space():
|
| 143 |
log_message("β Insufficient disk space even after cleanup")
|
| 144 |
return False
|
| 145 |
+
|
| 146 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
|
| 147 |
for attempt in range(max_retries):
|
| 148 |
try:
|
| 149 |
with requests.get(url, headers=headers, stream=True) as r:
|
| 150 |
r.raise_for_status()
|
| 151 |
+
|
| 152 |
# Check content length if available
|
| 153 |
content_length = r.headers.get("content-length")
|
| 154 |
if content_length:
|
|
|
|
| 157 |
if size_gb > disk_info["free"] - 0.5: # Leave 0.5GB buffer
|
| 158 |
log_message(f'β File too large: {size_gb:.2f}GB, only {disk_info["free"]:.2f}GB free')
|
| 159 |
return False
|
| 160 |
+
|
| 161 |
with open(dest_path, "wb") as f:
|
| 162 |
for chunk in r.iter_content(chunk_size=8192):
|
| 163 |
+
if chunk:
|
| 164 |
+
f.write(chunk)
|
| 165 |
return True
|
| 166 |
except Exception as e:
|
| 167 |
if attempt < max_retries - 1:
|
|
|
|
| 171 |
return False
|
| 172 |
return False
|
| 173 |
|
| 174 |
+
|
| 175 |
def is_multipart_rar(filename: str) -> bool:
|
| 176 |
"""Check if this is a multi-part RAR file"""
|
| 177 |
return ".part" in filename.lower() and filename.lower().endswith(".rar")
|
| 178 |
|
| 179 |
+
|
| 180 |
def get_rar_part_base(filename: str) -> str:
|
| 181 |
"""Get the base name for multi-part RAR files"""
|
| 182 |
if ".part" in filename.lower():
|
| 183 |
return filename.split(".part")[0]
|
| 184 |
return filename.replace(".rar", "")
|
| 185 |
|
| 186 |
+
|
| 187 |
def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) -> bool:
|
| 188 |
"""Extract RAR with retry and recovery, handling multi-part archives"""
|
| 189 |
filename = os.path.basename(rar_path)
|
| 190 |
+
|
| 191 |
# For multi-part RARs, we need the first part
|
| 192 |
if is_multipart_rar(filename):
|
| 193 |
base_name = get_rar_part_base(filename)
|
| 194 |
first_part = f"{base_name}.part01.rar"
|
| 195 |
first_part_path = os.path.join(os.path.dirname(rar_path), first_part)
|
| 196 |
+
|
| 197 |
if not os.path.exists(first_part_path):
|
| 198 |
log_message(f"β οΈ Multi-part RAR detected but first part not found: {first_part}")
|
| 199 |
return False
|
| 200 |
+
|
| 201 |
rar_path = first_part_path
|
| 202 |
log_message(f"π¦ Processing multi-part RAR starting with: {first_part}")
|
| 203 |
+
|
| 204 |
for attempt in range(max_retries):
|
| 205 |
try:
|
| 206 |
# Test RAR first
|
|
|
|
| 211 |
if attempt == max_retries - 1:
|
| 212 |
return False
|
| 213 |
continue
|
| 214 |
+
|
| 215 |
# Extract RAR
|
| 216 |
cmd = ["unrar", "x", "-o+", rar_path, output_dir]
|
| 217 |
if attempt > 0: # Try recovery on subsequent attempts
|
| 218 |
cmd.insert(2, "-kb")
|
| 219 |
+
|
| 220 |
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 221 |
if result.returncode == 0:
|
| 222 |
log_message(f"β
Successfully extracted: {os.path.basename(rar_path)}")
|
|
|
|
| 224 |
else:
|
| 225 |
error_msg = result.stderr or result.stdout
|
| 226 |
log_message(f"β οΈ Extraction attempt {attempt + 1} failed: {error_msg}")
|
| 227 |
+
|
| 228 |
if "checksum error" in error_msg.lower() or "CRC failed" in error_msg:
|
| 229 |
log_message(f"β οΈ Data corruption detected, attempt {attempt + 1}")
|
| 230 |
elif result.returncode == 10:
|
|
|
|
| 232 |
return False
|
| 233 |
elif result.returncode == 1:
|
| 234 |
log_message(f"β οΈ Non-fatal error (exit code 1)")
|
| 235 |
+
|
| 236 |
except Exception as e:
|
| 237 |
log_message(f"β Extraction exception: {str(e)}")
|
| 238 |
if attempt == max_retries - 1:
|
| 239 |
return False
|
| 240 |
time.sleep(1)
|
| 241 |
+
|
| 242 |
return False
|
| 243 |
+
|
| 244 |
+
|
| 245 |
# --- Frame Extraction Utilities ---
|
| 246 |
def ensure_dir(path):
|
| 247 |
os.makedirs(path, exist_ok=True)
|
| 248 |
|
| 249 |
+
|
| 250 |
def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
|
| 251 |
"""Extract frames from video at the specified frames per second (fps)."""
|
| 252 |
log_message(f"[INFO] Extracting frames from {video_path} to {output_dir} at {fps} fps...")
|
|
|
|
| 256 |
log_message(f"[ERROR] Failed to open video file: {video_path}")
|
| 257 |
return 0
|
| 258 |
video_fps = cap.get(cv2.CAP_PROP_FPS)
|
|
|
|
| 259 |
if not video_fps or video_fps <= 0:
|
| 260 |
video_fps = 30 # fallback if FPS is not available
|
| 261 |
log_message(f"[WARN] Using fallback FPS: {video_fps}")
|
| 262 |
frame_interval = int(round(video_fps / fps))
|
| 263 |
+
if frame_interval <= 0:
|
| 264 |
+
frame_interval = 1
|
| 265 |
frame_idx = 0
|
| 266 |
saved_idx = 1
|
| 267 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
|
|
| 269 |
while cap.isOpened():
|
| 270 |
ret, frame = cap.read()
|
| 271 |
if not ret:
|
|
|
|
| 272 |
break
|
| 273 |
if frame_idx % frame_interval == 0:
|
| 274 |
frame_name = f"{saved_idx:04d}.png"
|
| 275 |
cv2.imwrite(str(Path(output_dir) / frame_name), frame)
|
|
|
|
| 276 |
saved_idx += 1
|
| 277 |
frame_idx += 1
|
| 278 |
cap.release()
|
| 279 |
log_message(f"Extracted {saved_idx-1} frames from {video_path} to {output_dir}")
|
| 280 |
return saved_idx - 1
|
| 281 |
|
| 282 |
+
|
| 283 |
+
# --- Cursor Tracking Utilities (multiprocessing) ---
|
| 284 |
def to_rgb(img):
|
| 285 |
if img is None:
|
| 286 |
return None
|
|
|
|
| 290 |
return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
|
| 291 |
return img
|
| 292 |
|
| 293 |
+
|
| 294 |
def get_mask_from_alpha(template_img):
|
| 295 |
if template_img is not None and len(template_img.shape) == 3 and template_img.shape[2] == 4:
|
| 296 |
# Use alpha channel as mask (nonzero alpha = 255)
|
| 297 |
return (template_img[:, :, 3] > 0).astype(np.uint8) * 255
|
| 298 |
return None
|
| 299 |
|
| 300 |
+
|
| 301 |
def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=CURSOR_THRESHOLD):
|
| 302 |
"""Detect cursor position in a frame using multiple templates. Returns best match above threshold."""
|
| 303 |
best_pos = None
|
|
|
|
| 308 |
template_rgb = to_rgb(cursor_template)
|
| 309 |
mask = get_mask_from_alpha(cursor_template)
|
| 310 |
if template_rgb is None or frame_rgb is None or template_rgb.shape[2] != frame_rgb.shape[2]:
|
| 311 |
+
# Channel mismatch or load error
|
| 312 |
continue
|
| 313 |
try:
|
| 314 |
result = cv2.matchTemplate(frame_rgb, template_rgb, cv2.TM_CCOEFF_NORMED, mask=mask)
|
| 315 |
+
except Exception:
|
|
|
|
| 316 |
continue
|
| 317 |
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
| 318 |
if max_val > best_conf:
|
|
|
|
| 327 |
return best_pos, best_conf, best_template_name
|
| 328 |
return None, best_conf, None
|
| 329 |
|
| 330 |
+
|
| 331 |
+
# Multiprocessing worker init and worker function
|
| 332 |
+
# These globals are loaded in each worker process via initializer for efficiency
|
| 333 |
+
_WORKER_CURSOR_TEMPLATES = None
|
| 334 |
+
_WORKER_THRESHOLD = None
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def _init_worker(template_paths, threshold):
|
| 338 |
+
"""Initializer for pool workers: load templates into process-local global variable"""
|
| 339 |
+
global _WORKER_CURSOR_TEMPLATES
|
| 340 |
+
global _WORKER_THRESHOLD
|
| 341 |
+
_WORKER_CURSOR_TEMPLATES = {}
|
| 342 |
+
for tp in template_paths:
|
| 343 |
+
try:
|
| 344 |
+
img = cv2.imread(tp, cv2.IMREAD_UNCHANGED)
|
| 345 |
+
if img is not None:
|
| 346 |
+
_WORKER_CURSOR_TEMPLATES[os.path.basename(tp)] = img
|
| 347 |
+
except Exception:
|
| 348 |
+
pass
|
| 349 |
+
_WORKER_THRESHOLD = threshold
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def track_cursor_worker(frame_file, cursor_templates, threshold, log_queue):
|
| 353 |
+
"""Worker function that tracks cursor and sends logs back."""
|
| 354 |
+
frame = cv2.imread(str(frame_file), cv2.IMREAD_UNCHANGED)
|
| 355 |
+
if frame is None:
|
| 356 |
+
log_queue.put(f"[WARN] Frame unreadable: {frame_file.name}")
|
| 357 |
+
return {
|
| 358 |
+
"frame": frame_file.name,
|
| 359 |
+
"cursor_active": False,
|
| 360 |
+
"x": None,
|
| 361 |
+
"y": None,
|
| 362 |
+
"confidence": -1,
|
| 363 |
+
"template": None
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
pos, conf, template_name = detect_cursor_in_frame_multi(frame, cursor_templates, threshold)
|
| 367 |
+
|
| 368 |
+
if pos is not None:
|
| 369 |
+
log_queue.put(
|
| 370 |
+
f"[FRAME] {frame_file.name} β FOUND cursor at ({pos[0]},{pos[1]}) conf={conf:.3f} template={template_name}"
|
| 371 |
+
)
|
| 372 |
+
return {
|
| 373 |
+
"frame": frame_file.name,
|
| 374 |
+
"cursor_active": True,
|
| 375 |
+
"x": pos[0],
|
| 376 |
+
"y": pos[1],
|
| 377 |
+
"confidence": conf,
|
| 378 |
+
"template": template_name
|
| 379 |
+
}
|
| 380 |
+
else:
|
| 381 |
+
log_queue.put(
|
| 382 |
+
f"[FRAME] {frame_file.name} β NO cursor (max_conf={conf:.3f})"
|
| 383 |
+
)
|
| 384 |
+
return {
|
| 385 |
+
"frame": frame_file.name,
|
| 386 |
+
"cursor_active": False,
|
| 387 |
+
"x": None,
|
| 388 |
+
"y": None,
|
| 389 |
+
"confidence": conf,
|
| 390 |
+
"template": None
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
def upload_to_hf_dataset(local_path, dataset_repo_id="Fred808/data", hf_token=None):
|
| 396 |
+
"""Upload JSON tracking results to Hugging Face dataset repo"""
|
| 397 |
try:
|
| 398 |
+
api = HfApi(token=hf_token or HF_TOKEN)
|
| 399 |
+
filename = os.path.basename(local_path)
|
| 400 |
+
repo_path = f"results/{filename}"
|
| 401 |
+
api.upload_file(
|
| 402 |
+
path_or_fileobj=local_path,
|
| 403 |
+
path_in_repo=repo_path,
|
| 404 |
+
repo_id=dataset_repo_id,
|
| 405 |
+
repo_type="dataset"
|
| 406 |
+
)
|
| 407 |
+
log_message(f"[UPLOAD] β
Uploaded {filename} to {dataset_repo_id}/{repo_path}")
|
| 408 |
except Exception as e:
|
| 409 |
+
log_message(f"[UPLOAD ERROR] {e}")
|
| 410 |
+
|
| 411 |
+
def log_listener(log_queue):
|
| 412 |
+
"""Continuously print log messages from worker processes."""
|
| 413 |
+
while True:
|
| 414 |
+
msg = log_queue.get()
|
| 415 |
+
if msg == "STOP":
|
| 416 |
+
break
|
| 417 |
+
log_message(msg)
|
| 418 |
+
|
| 419 |
+
def track_cursor_parallel(frames_dir, cursor_templates_dir, output_json_path,
|
| 420 |
+
threshold=CURSOR_THRESHOLD, start_frame=1,
|
| 421 |
+
batch_size=100, email_results=False):
|
| 422 |
+
"""Parallelized cursor tracking with real-time logging"""
|
| 423 |
+
log_message(f"[INFO] Tracking cursors in {frames_dir} with real-time logging...")
|
| 424 |
|
|
|
|
|
|
|
|
|
|
| 425 |
frames_dir = Path(frames_dir).resolve()
|
| 426 |
output_json_path = Path(output_json_path).resolve()
|
| 427 |
cursor_templates_dir = Path(cursor_templates_dir).resolve()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
ensure_dir(output_json_path.parent)
|
| 429 |
+
|
| 430 |
+
# Load cursor templates
|
| 431 |
cursor_templates = {}
|
| 432 |
for template_file in cursor_templates_dir.glob("*.png"):
|
| 433 |
+
img = cv2.imread(str(template_file), cv2.IMREAD_UNCHANGED)
|
| 434 |
+
if img is not None:
|
| 435 |
+
cursor_templates[template_file.name] = img
|
|
|
|
|
|
|
| 436 |
if not cursor_templates:
|
| 437 |
+
log_message(f"[ERROR] No cursor templates found in {cursor_templates_dir}")
|
| 438 |
return 0
|
| 439 |
+
|
| 440 |
+
# List frames
|
| 441 |
+
all_frames = sorted(frames_dir.glob("*.png"))
|
| 442 |
+
all_frames = [f for f in all_frames if int(f.stem) >= start_frame]
|
| 443 |
+
total_frames = len(all_frames)
|
| 444 |
+
if not total_frames:
|
| 445 |
+
log_message("[WARN] No frames found to process.")
|
| 446 |
+
return 0
|
| 447 |
+
|
| 448 |
+
log_message(f"[INFO] Total frames to track: {total_frames}")
|
| 449 |
+
|
| 450 |
+
# Multiprocessing setup
|
| 451 |
+
manager = multiprocessing.Manager()
|
| 452 |
+
log_queue = manager.Queue()
|
| 453 |
+
listener = multiprocessing.Process(target=log_listener, args=(log_queue,))
|
| 454 |
+
listener.start()
|
| 455 |
+
|
| 456 |
+
pool = multiprocessing.Pool(multiprocessing.cpu_count())
|
| 457 |
results = []
|
| 458 |
+
processed = 0
|
| 459 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
try:
|
| 461 |
+
# Feed tasks to pool in batches
|
| 462 |
+
for i in range(0, total_frames, batch_size):
|
| 463 |
+
batch = all_frames[i:i + batch_size]
|
| 464 |
+
tasks = [
|
| 465 |
+
pool.apply_async(
|
| 466 |
+
track_cursor_worker,
|
| 467 |
+
(frame_file, cursor_templates, threshold, log_queue)
|
| 468 |
+
)
|
| 469 |
+
for frame_file in batch
|
| 470 |
+
]
|
| 471 |
+
|
| 472 |
+
for t in tasks:
|
| 473 |
+
res = t.get()
|
| 474 |
+
results.append(res)
|
| 475 |
+
processed += 1
|
| 476 |
+
if processed % 50 == 0 or processed == total_frames:
|
| 477 |
+
log_message(f"[PROGRESS] {processed}/{total_frames} frames processed")
|
| 478 |
+
with open(output_json_path, "w") as f:
|
| 479 |
+
json.dump(results, f, indent=2)
|
| 480 |
+
|
| 481 |
+
pool.close()
|
| 482 |
+
pool.join()
|
| 483 |
+
|
| 484 |
+
# Final write
|
| 485 |
with open(output_json_path, "w") as f:
|
| 486 |
json.dump(results, f, indent=2)
|
| 487 |
log_message(f"[SUCCESS] Cursor tracking results saved to {output_json_path}")
|
| 488 |
+
|
| 489 |
+
upload_to_hf_dataset(output_json_path, dataset_repo_id="Fred808/data", hf_token=HF_TOKEN)
|
| 490 |
+
|
| 491 |
if email_results:
|
| 492 |
+
log_message("[INFO] Sending email results (if configured)...")
|
| 493 |
to_email = os.environ.get("TO_EMAIL")
|
| 494 |
from_email = os.environ.get("FROM_EMAIL")
|
| 495 |
app_password = os.environ.get("GMAIL_APP_PASSWORD")
|
| 496 |
+
if to_email and from_email and app_password:
|
|
|
|
|
|
|
|
|
|
| 497 |
send_email_with_attachment(
|
| 498 |
subject="Cursor Tracking Results",
|
| 499 |
body="See attached JSON results.",
|
|
|
|
| 502 |
app_password=app_password,
|
| 503 |
attachment_path=output_json_path
|
| 504 |
)
|
| 505 |
+
|
| 506 |
+
except Exception as e:
|
| 507 |
+
log_message(f"[ERROR] Exception during parallel tracking: {e}")
|
| 508 |
+
pool.terminate()
|
| 509 |
+
|
| 510 |
+
finally:
|
| 511 |
+
log_queue.put("STOP")
|
| 512 |
+
listener.join()
|
| 513 |
+
|
| 514 |
+
active = len([r for r in results if r["cursor_active"]])
|
| 515 |
+
log_message(f"[DONE] {active}/{total_frames} frames contained cursors.")
|
| 516 |
+
return active
|
| 517 |
+
|
| 518 |
+
def send_email_with_attachment(subject, body, to_email, from_email, app_password, attachment_path):
|
| 519 |
+
msg = EmailMessage()
|
| 520 |
+
msg["Subject"] = subject
|
| 521 |
+
msg["From"] = from_email
|
| 522 |
+
msg["To"] = to_email
|
| 523 |
+
msg.set_content(body)
|
| 524 |
+
with open(attachment_path, "rb") as f:
|
| 525 |
+
file_data = f.read()
|
| 526 |
+
file_name = Path(attachment_path).name
|
| 527 |
+
msg.add_attachment(file_data, maintype="application", subtype="octet-stream", filename=file_name)
|
| 528 |
+
try:
|
| 529 |
+
with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp:
|
| 530 |
+
smtp.login(from_email, app_password)
|
| 531 |
+
smtp.send_message(msg)
|
| 532 |
+
log_message(f"[SUCCESS] Email sent to {to_email}")
|
| 533 |
except Exception as e:
|
| 534 |
+
log_message(f"[ERROR] Failed to send email: {e}")
|
| 535 |
+
|
| 536 |
+
|
| 537 |
+
def track_cursor(frames_dir, cursor_templates_dir, output_json_path, threshold=CURSOR_THRESHOLD, start_frame=1, batch_size=100, email_results=False):
|
| 538 |
+
"""
|
| 539 |
+
Backwards-compatible wrapper that calls the parallel implementation.
|
| 540 |
+
Keep this name so other parts of your code that call track_cursor() keep working.
|
| 541 |
+
"""
|
| 542 |
+
return track_cursor_parallel(frames_dir, cursor_templates_dir, output_json_path, threshold, start_frame, batch_size, email_results)
|
| 543 |
+
|
| 544 |
|
| 545 |
def process_rar_file(rar_path: str) -> bool:
|
| 546 |
"""Process a single RAR file - extract, then process videos for frames and cursor tracking"""
|
| 547 |
filename = os.path.basename(rar_path)
|
| 548 |
processing_status["current_file"] = filename
|
| 549 |
+
|
| 550 |
# Handle multi-part RAR naming
|
| 551 |
if is_multipart_rar(filename):
|
| 552 |
course_name = get_rar_part_base(filename)
|
| 553 |
else:
|
| 554 |
course_name = filename.replace(".rar", "")
|
| 555 |
+
|
| 556 |
extract_dir = os.path.join(EXTRACT_FOLDER, course_name)
|
| 557 |
+
|
| 558 |
try:
|
| 559 |
log_message(f"π Processing: {filename}")
|
| 560 |
+
|
| 561 |
# Clean up any existing directory
|
| 562 |
if os.path.exists(extract_dir):
|
| 563 |
shutil.rmtree(extract_dir, ignore_errors=True)
|
| 564 |
+
|
| 565 |
# Extract RAR
|
| 566 |
os.makedirs(extract_dir, exist_ok=True)
|
| 567 |
if not extract_with_retry(rar_path, extract_dir):
|
| 568 |
raise Exception("RAR extraction failed")
|
| 569 |
+
|
| 570 |
# Count extracted files
|
| 571 |
file_count = 0
|
| 572 |
video_files_found = []
|
|
|
|
| 575 |
file_count += 1
|
| 576 |
if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
|
| 577 |
video_files_found.append(os.path.join(root, file))
|
| 578 |
+
|
| 579 |
processing_status["extracted_courses"] += 1
|
| 580 |
+
log_message(f"β
Successfully extracted '{course_name}' ({file_count} files, {len(video_files_found)} videos)")
|
| 581 |
|
| 582 |
# Process video files for frame extraction and cursor tracking
|
| 583 |
for video_path in video_files_found:
|
| 584 |
video_filename = Path(video_path).name
|
| 585 |
# Create a unique output directory for frames for each video
|
| 586 |
+
safe_video_name = video_filename.replace(".", "_")
|
| 587 |
+
frames_output_dir = os.path.join(FRAMES_OUTPUT_FOLDER, f"{course_name}_{safe_video_name}_frames")
|
| 588 |
ensure_dir(frames_output_dir)
|
| 589 |
+
|
| 590 |
extracted_frames_count = extract_frames(video_path, frames_output_dir, fps=DEFAULT_FPS)
|
| 591 |
processing_status["extracted_frames_count"] += extracted_frames_count
|
| 592 |
if extracted_frames_count > 0:
|
|
|
|
| 594 |
log_message(f"[INFO] Extracted {extracted_frames_count} frames from {video_filename}")
|
| 595 |
|
| 596 |
# Perform cursor tracking on the extracted frames
|
| 597 |
+
cursor_output_json = os.path.join(CURSOR_TRACKING_OUTPUT_FOLDER, f"{course_name}_{safe_video_name}_cursor_data.json")
|
| 598 |
+
tracked_cursors = track_cursor(frames_output_dir, CURSOR_TEMPLATES_DIR, cursor_output_json, threshold=CURSOR_THRESHOLD, batch_size=100)
|
| 599 |
processing_status["tracked_cursors_count"] += tracked_cursors
|
| 600 |
log_message(f"[INFO] Tracked {tracked_cursors} cursors in frames from {video_filename}")
|
| 601 |
else:
|
| 602 |
log_message(f"[WARN] No frames extracted from {video_filename}")
|
| 603 |
|
| 604 |
return True
|
| 605 |
+
|
| 606 |
except Exception as e:
|
| 607 |
error_msg = str(e)
|
| 608 |
log_message(f"β Processing failed: {error_msg}")
|
| 609 |
log_failed_file(filename, error_msg)
|
| 610 |
return False
|
| 611 |
+
|
| 612 |
finally:
|
| 613 |
processing_status["current_file"] = None
|
| 614 |
|
| 615 |
+
|
| 616 |
def main_processing_loop(start_index: int = 0):
|
| 617 |
"""Main processing workflow - extraction, frame extraction, and cursor tracking"""
|
| 618 |
processing_status["is_running"] = True
|
| 619 |
+
|
| 620 |
try:
|
| 621 |
# Load state
|
| 622 |
processed_rars = load_json_state(PROCESS_STATE_FILE, {"processed_rars": []})["processed_rars"]
|
| 623 |
+
download_state = load_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": 5})
|
| 624 |
+
|
| 625 |
# Use start_index if provided, otherwise use the saved state
|
| 626 |
next_index = start_index if start_index > 0 else download_state["next_download_index"]
|
| 627 |
+
|
| 628 |
log_message(f"π Starting from index {next_index}")
|
| 629 |
log_message(f"π Previously processed: {len(processed_rars)} files")
|
| 630 |
+
|
| 631 |
# Get file list
|
| 632 |
try:
|
| 633 |
files = list(hf_api.list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset"))
|
| 634 |
rar_files = sorted([f for f in files if f.endswith(".rar")])
|
| 635 |
+
|
| 636 |
processing_status["total_files"] = len(rar_files)
|
| 637 |
log_message(f"π Found {len(rar_files)} RAR files in repository")
|
| 638 |
+
|
| 639 |
if next_index >= len(rar_files):
|
| 640 |
log_message("β
All files have been processed!")
|
| 641 |
return
|
| 642 |
+
|
| 643 |
except Exception as e:
|
| 644 |
log_message(f"β Failed to get file list: {str(e)}")
|
| 645 |
return
|
| 646 |
+
|
| 647 |
+
# Process one file per run
|
| 648 |
if next_index < len(rar_files):
|
| 649 |
rar_file = rar_files[next_index]
|
| 650 |
filename = os.path.basename(rar_file)
|
| 651 |
+
|
| 652 |
if filename in processed_rars:
|
| 653 |
log_message(f"βοΈ Skipping already processed: {filename}")
|
| 654 |
processing_status["processed_files"] += 1
|
|
|
|
| 657 |
save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
|
| 658 |
log_message(f"π Moving to next file. Progress: {next_index}/{len(rar_files)}")
|
| 659 |
return
|
| 660 |
+
|
| 661 |
log_message(f"π₯ Downloading: {filename}")
|
| 662 |
dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
|
| 663 |
+
|
| 664 |
# Download file
|
| 665 |
download_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{rar_file}"
|
| 666 |
if download_with_retry(download_url, dest_path):
|
|
|
|
| 673 |
else:
|
| 674 |
log_message(f"β Failed to process: {filename}")
|
| 675 |
processing_status["failed_files"] += 1
|
| 676 |
+
|
| 677 |
# Clean up downloaded file
|
| 678 |
try:
|
| 679 |
os.remove(dest_path)
|
| 680 |
log_message(f"ποΈ Cleaned up download: {filename}")
|
| 681 |
+
except Exception:
|
| 682 |
pass
|
| 683 |
else:
|
| 684 |
log_message(f"β Failed to download: {filename}")
|
| 685 |
processing_status["failed_files"] += 1
|
| 686 |
+
|
| 687 |
# Update download state for next run
|
| 688 |
next_index += 1
|
| 689 |
save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
|
| 690 |
+
|
| 691 |
# Status update
|
| 692 |
log_message(f"π Progress: {next_index}/{len(rar_files)} files processed")
|
| 693 |
log_message(f'π Extracted: {processing_status["extracted_courses"]} courses')
|
|
|
|
| 695 |
log_message(f'π Frames Extracted: {processing_status["extracted_frames_count"]}')
|
| 696 |
log_message(f'π Cursors Tracked: {processing_status["tracked_cursors_count"]}')
|
| 697 |
log_message(f'π Failed: {processing_status["failed_files"]} files')
|
| 698 |
+
|
| 699 |
if next_index < len(rar_files):
|
| 700 |
log_message(f"π Run the script again to process the next file: {os.path.basename(rar_files[next_index])}")
|
| 701 |
else:
|
| 702 |
log_message("π All files have been processed!")
|
| 703 |
else:
|
| 704 |
log_message("β
All files have been processed!")
|
| 705 |
+
|
| 706 |
log_message("π Processing complete!")
|
| 707 |
log_message(f'π Final stats: {processing_status["extracted_courses"]} courses extracted, {processing_status["extracted_videos"]} videos processed, {processing_status["extracted_frames_count"]} frames extracted, {processing_status["tracked_cursors_count"]} cursors tracked')
|
| 708 |
+
|
| 709 |
except KeyboardInterrupt:
|
| 710 |
log_message("βΉοΈ Processing interrupted by user")
|
| 711 |
except Exception as e:
|
|
|
|
| 714 |
processing_status["is_running"] = False
|
| 715 |
cleanup_temp_files()
|
| 716 |
|
| 717 |
+
|
| 718 |
# Expose necessary functions and variables for download_api.py
|
| 719 |
__all__ = [
|
| 720 |
"main_processing_loop",
|
|
|
|
| 729 |
"CURSOR_THRESHOLD",
|
| 730 |
"ensure_dir"
|
| 731 |
]
|
|
|
|
|
|
|
|
|
|
|
|