Fred808 commited on
Commit
f84e72f
Β·
verified Β·
1 Parent(s): 27762cf

Update cursor_tracker.py

Browse files
Files changed (1) hide show
  1. cursor_tracker.py +519 -86
cursor_tracker.py CHANGED
@@ -1,16 +1,273 @@
1
- """
2
- Cursor detection and action logging utilities.
3
- """
4
-
5
  import os
6
  import json
 
 
 
 
 
 
 
 
 
7
  import cv2
8
  import numpy as np
9
  from pathlib import Path
10
- from email.message import EmailMessage
11
  import smtplib
12
- from typing import List, Tuple
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def to_rgb(img):
15
  if img is None:
16
  return None
@@ -22,10 +279,12 @@ def to_rgb(img):
22
 
23
  def get_mask_from_alpha(template_img):
24
  if template_img is not None and len(template_img.shape) == 3 and template_img.shape[2] == 4:
 
25
  return (template_img[:, :, 3] > 0).astype(np.uint8) * 255
26
  return None
27
 
28
- def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=0.8):
 
29
  best_pos = None
30
  best_conf = -1
31
  best_template_name = None
@@ -34,14 +293,14 @@ def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=0.8):
34
  template_rgb = to_rgb(cursor_template)
35
  mask = get_mask_from_alpha(cursor_template)
36
  if template_rgb is None or frame_rgb is None or template_rgb.shape[2] != frame_rgb.shape[2]:
37
- print(f"[WARN] Skipping template {template_name} due to channel mismatch or load error.")
38
  continue
39
  try:
40
  result = cv2.matchTemplate(frame_rgb, template_rgb, cv2.TM_CCOEFF_NORMED, mask=mask)
41
  except Exception as e:
42
- print(f"[WARN] matchTemplate failed for {template_name}: {e}")
43
  continue
44
- _, max_val, _, max_loc = cv2.minMaxLoc(result)
45
  if max_val > best_conf:
46
  best_conf = max_val
47
  if max_val >= threshold:
@@ -54,111 +313,285 @@ def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=0.8):
54
  return best_pos, best_conf, best_template_name
55
  return None, best_conf, None
56
 
57
- def ensure_dir(path):
58
- os.makedirs(path, exist_ok=True)
59
-
60
  def send_email_with_attachment(subject, body, to_email, from_email, app_password, attachment_path):
61
  msg = EmailMessage()
62
- msg['Subject'] = subject
63
- msg['From'] = from_email
64
- msg['To'] = to_email
65
  msg.set_content(body)
66
- with open(attachment_path, 'rb') as f:
67
  file_data = f.read()
68
  file_name = Path(attachment_path).name
69
- msg.add_attachment(file_data, maintype='application', subtype='octet-stream', filename=file_name)
70
  try:
71
- with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
72
  smtp.login(from_email, app_password)
73
  smtp.send_message(msg)
74
- print(f"[SUCCESS] Email sent to {to_email}")
75
  except Exception as e:
76
- print(f"[ERROR] Failed to send email: {e}")
77
-
78
- def track_cursor_from_images(
79
- images: List[Tuple[str, np.ndarray]],
80
- cursor_templates_dir: str,
81
- output_json_path: str = None,
82
- threshold=0.8,
83
- email_results=False,
84
- return_results=False,
85
- append=False
86
- ):
87
- """Detect cursor in a list of in-memory images and optionally save or return results."""
88
 
 
 
 
 
 
89
  cursor_templates_dir = Path(cursor_templates_dir).resolve()
90
- if output_json_path:
91
- output_json_path = Path(output_json_path).resolve()
92
- ensure_dir(output_json_path.parent)
93
-
 
 
94
  cursor_templates = {}
95
- for template_file in cursor_templates_dir.glob('*.png'):
96
  template_img = cv2.imread(str(template_file), cv2.IMREAD_UNCHANGED)
97
  if template_img is not None:
98
  cursor_templates[template_file.name] = template_img
99
  else:
100
- print(f"[WARN] Could not load template: {template_file}")
101
  if not cursor_templates:
102
- raise FileNotFoundError(f"No cursor templates found in: {cursor_templates_dir}")
103
-
104
  results = []
105
-
106
- for frame_filename, frame in images:
 
 
 
 
107
  if frame is None:
108
- print(f"[WARN] Frame {frame_filename} is empty")
109
  continue
110
-
111
  pos, conf, template_name = detect_cursor_in_frame_multi(frame, cursor_templates, threshold)
112
  if pos is not None:
113
- print(f"{frame_filename}: Cursor at {pos} (template: {template_name})")
114
  results.append({
115
- 'frame': frame_filename,
116
- 'cursor_active': True,
117
- 'x': pos[0],
118
- 'y': pos[1],
119
- 'confidence': conf,
120
- 'template': template_name
121
  })
 
122
  else:
123
- print(f"{frame_filename}: Cursor disabled")
124
  results.append({
125
- 'frame': frame_filename,
126
- 'cursor_active': False,
127
- 'x': None,
128
- 'y': None,
129
- 'confidence': conf,
130
- 'template': None
131
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- if return_results:
134
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- if output_json_path:
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  try:
138
- if append and output_json_path.exists():
139
- with open(output_json_path, 'r', encoding='utf-8') as f:
140
- old_data = json.load(f)
141
- results = old_data + results
142
- with open(output_json_path, 'w', encoding='utf-8') as f:
143
- json.dump(results, f, indent=2)
144
- print(f"[SUCCESS] Cursor tracking results saved to {output_json_path}")
 
 
 
145
  except Exception as e:
146
- print(f"[ERROR] Failed to write output JSON: {e}")
147
- raise
148
-
149
- if email_results:
150
- print("[INFO] Preparing to email results...")
151
- to_email = os.environ.get("TO_EMAIL")
152
- from_email = os.environ.get("FROM_EMAIL")
153
- app_password = os.environ.get("GMAIL_APP_PASSWORD")
154
- if not (to_email and from_email and app_password):
155
- print("[ERROR] Email environment variables not set.")
156
  return
157
- send_email_with_attachment(
158
- subject="Cursor Tracking Results",
159
- body="See attached JSON results.",
160
- to_email=to_email,
161
- from_email=from_email,
162
- app_password=app_password,
163
- attachment_path=output_json_path
164
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import json
3
+ import requests
4
+ import subprocess
5
+ import shutil
6
+ import time
7
+ import re
8
+ import threading
9
+ from typing import Dict, List, Set, Optional
10
+ from huggingface_hub import HfApi, list_repo_files
11
+
12
  import cv2
13
  import numpy as np
14
  from pathlib import Path
 
15
  import smtplib
16
+ from email.message import EmailMessage
17
+
18
+ # ==== CONFIGURATION ====
19
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
20
+ SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1")
21
+
22
+ # Path Configuration
23
+ DOWNLOAD_FOLDER = "downloads"
24
+ EXTRACT_FOLDER = "extracted"
25
+ FRAMES_OUTPUT_FOLDER = "extracted_frames" # New folder for extracted frames
26
+ CURSOR_TRACKING_OUTPUT_FOLDER = "cursor_tracking_results" # New folder for cursor tracking results
27
+ CURSOR_TEMPLATES_DIR = "cursors"
28
+
29
+ os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
30
+ os.makedirs(EXTRACT_FOLDER, exist_ok=True)
31
+ os.makedirs(FRAMES_OUTPUT_FOLDER, exist_ok=True)
32
+ os.makedirs(CURSOR_TRACKING_OUTPUT_FOLDER, exist_ok=True)
33
+ os.makedirs(CURSOR_TEMPLATES_DIR, exist_ok=True) # Ensure cursor templates directory exists
34
+
35
+ # State Files
36
+ DOWNLOAD_STATE_FILE = "download_progress.json"
37
+ PROCESS_STATE_FILE = "process_progress.json"
38
+ FAILED_FILES_LOG = "failed_files.log"
39
+
40
+ # Processing Parameters
41
+ CHUNK_SIZE = 1
42
+ PROCESSING_DELAY = 2
43
+ MAX_RETRIES = 3
44
+ MIN_FREE_SPACE_GB = 2 # Minimum free space in GB before processing
45
+
46
+ # Frame Extraction Parameters
47
+ DEFAULT_FPS = 3 # Default frames per second for extraction
48
+
49
+ # Cursor Tracking Parameters
50
+ CURSOR_THRESHOLD = 0.8
51
+
52
+ # Initialize HF API
53
+ hf_api = HfApi(token=HF_TOKEN)
54
+
55
+ # Global State
56
+ processing_status = {
57
+ "is_running": False,
58
+ "current_file": None,
59
+ "total_files": 0,
60
+ "processed_files": 0,
61
+ "failed_files": 0,
62
+ "extracted_courses": 0,
63
+ "extracted_videos": 0,
64
+ "extracted_frames_count": 0,
65
+ "tracked_cursors_count": 0,
66
+ "last_update": None,
67
+ "logs": []
68
+ }
69
+
70
+ def log_message(message: str):
71
+ """Log messages with timestamp"""
72
+ timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
73
+ log_entry = f"[{timestamp}] {message}"
74
+ print(log_entry)
75
+ processing_status["logs"].append(log_entry)
76
+ processing_status["last_update"] = timestamp
77
+ if len(processing_status["logs"]) > 100:
78
+ processing_status["logs"] = processing_status["logs"][-100:]
79
+
80
+ def log_failed_file(filename: str, error: str):
81
+ """Log failed files to persistent file"""
82
+ with open(FAILED_FILES_LOG, "a") as f:
83
+ f.write(f"{time.strftime("%Y-%m-%d %H:%M:%S")} - {filename}: {error}\n")
84
+
85
+ def get_disk_usage(path: str) -> Dict[str, float]:
86
+ """Get disk usage statistics in GB"""
87
+ statvfs = os.statvfs(path)
88
+ total = statvfs.f_frsize * statvfs.f_blocks / (1024**3)
89
+ free = statvfs.f_frsize * statvfs.f_bavail / (1024**3)
90
+ used = total - free
91
+ return {"total": total, "free": free, "used": used}
92
+
93
+ def check_disk_space(path: str = ".") -> bool:
94
+ """Check if there's enough disk space"""
95
+ disk_info = get_disk_usage(path)
96
+ if disk_info["free"] < MIN_FREE_SPACE_GB:
97
+ log_message(f'⚠️ Low disk space: {disk_info["free"]:.2f}GB free, {disk_info["used"]:.2f}GB used')
98
+ return False
99
+ return True
100
+
101
+ def cleanup_temp_files():
102
+ """Clean up temporary files to free space"""
103
+ log_message("🧹 Cleaning up temporary files...")
104
+
105
+ # Clean old downloads (keep only current processing file)
106
+ current_file = processing_status.get("current_file")
107
+ for file in os.listdir(DOWNLOAD_FOLDER):
108
+ if file != current_file and file.endswith((".rar", ".zip")):
109
+ try:
110
+ os.remove(os.path.join(DOWNLOAD_FOLDER, file))
111
+ log_message(f"πŸ—‘οΈ Removed old download: {file}")
112
+ except:
113
+ pass
114
+
115
+ def load_json_state(file_path: str, default_value):
116
+ """Load state from JSON file"""
117
+ if os.path.exists(file_path):
118
+ try:
119
+ with open(file_path, "r") as f:
120
+ return json.load(f)
121
+ except json.JSONDecodeError:
122
+ log_message(f"⚠️ Corrupted state file: {file_path}")
123
+ return default_value
124
+
125
+ def save_json_state(file_path: str, data):
126
+ """Save state to JSON file"""
127
+ with open(file_path, "w") as f:
128
+ json.dump(data, f, indent=2)
129
+
130
+ def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
131
+ """Download file with retry logic and disk space checking"""
132
+ if not check_disk_space():
133
+ cleanup_temp_files()
134
+ if not check_disk_space():
135
+ log_message("❌ Insufficient disk space even after cleanup")
136
+ return False
137
+
138
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
139
+ for attempt in range(max_retries):
140
+ try:
141
+ with requests.get(url, headers=headers, stream=True) as r:
142
+ r.raise_for_status()
143
+
144
+ # Check content length if available
145
+ content_length = r.headers.get("content-length")
146
+ if content_length:
147
+ size_gb = int(content_length) / (1024**3)
148
+ disk_info = get_disk_usage(".")
149
+ if size_gb > disk_info["free"] - 0.5: # Leave 0.5GB buffer
150
+ log_message(f'❌ File too large: {size_gb:.2f}GB, only {disk_info["free"]:.2f}GB free')
151
+ return False
152
+
153
+ with open(dest_path, "wb") as f:
154
+ for chunk in r.iter_content(chunk_size=8192):
155
+ f.write(chunk)
156
+ return True
157
+ except Exception as e:
158
+ if attempt < max_retries - 1:
159
+ time.sleep(2 ** attempt)
160
+ continue
161
+ log_message(f"❌ Download failed after {max_retries} attempts: {e}")
162
+ return False
163
+ return False
164
+
165
+ def is_multipart_rar(filename: str) -> bool:
166
+ """Check if this is a multi-part RAR file"""
167
+ return ".part" in filename.lower() and filename.lower().endswith(".rar")
168
+
169
+ def get_rar_part_base(filename: str) -> str:
170
+ """Get the base name for multi-part RAR files"""
171
+ if ".part" in filename.lower():
172
+ return filename.split(".part")[0]
173
+ return filename.replace(".rar", "")
174
+
175
+ def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) -> bool:
176
+ """Extract RAR with retry and recovery, handling multi-part archives"""
177
+ filename = os.path.basename(rar_path)
178
+
179
+ # For multi-part RARs, we need the first part
180
+ if is_multipart_rar(filename):
181
+ base_name = get_rar_part_base(filename)
182
+ first_part = f"{base_name}.part01.rar"
183
+ first_part_path = os.path.join(os.path.dirname(rar_path), first_part)
184
+
185
+ if not os.path.exists(first_part_path):
186
+ log_message(f"⚠️ Multi-part RAR detected but first part not found: {first_part}")
187
+ return False
188
+
189
+ rar_path = first_part_path
190
+ log_message(f"πŸ“¦ Processing multi-part RAR starting with: {first_part}")
191
+
192
+ for attempt in max_retries:
193
+ try:
194
+ # Test RAR first
195
+ test_cmd = ["unrar", "t", rar_path]
196
+ test_result = subprocess.run(test_cmd, capture_output=True, text=True)
197
+ if test_result.returncode != 0:
198
+ log_message(f"⚠️ RAR test failed: {test_result.stderr}")
199
+ if attempt == max_retries - 1:
200
+ return False
201
+ continue
202
+
203
+ # Extract RAR
204
+ cmd = ["unrar", "x", "-o+", rar_path, output_dir]
205
+ if attempt > 0: # Try recovery on subsequent attempts
206
+ cmd.insert(2, "-kb")
207
+
208
+ result = subprocess.run(cmd, capture_output=True, text=True)
209
+ if result.returncode == 0:
210
+ log_message(f"βœ… Successfully extracted: {os.path.basename(rar_path)}")
211
+ return True
212
+ else:
213
+ error_msg = result.stderr or result.stdout
214
+ log_message(f"⚠️ Extraction attempt {attempt + 1} failed: {error_msg}")
215
+
216
+ if "checksum error" in error_msg.lower() or "CRC failed" in error_msg:
217
+ log_message(f"⚠️ Data corruption detected, attempt {attempt + 1}")
218
+ elif result.returncode == 10:
219
+ log_message(f"⚠️ No files to extract (exit code 10)")
220
+ return False
221
+ elif result.returncode == 1:
222
+ log_message(f"⚠️ Non-fatal error (exit code 1)")
223
+
224
+ except Exception as e:
225
+ log_message(f"❌ Extraction exception: {str(e)}")
226
+ if attempt == max_retries - 1:
227
+ return False
228
+ time.sleep(1)
229
+
230
+ return False
231
 
232
+ # --- Frame Extraction Utilities ---
233
+ def ensure_dir(path):
234
+ os.makedirs(path, exist_ok=True)
235
+
236
+ def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
237
+ """Extract frames from video at the specified frames per second (fps)."""
238
+ log_message(f"[INFO] Extracting frames from {video_path} to {output_dir} at {fps} fps...")
239
+ ensure_dir(output_dir)
240
+ cap = cv2.VideoCapture(str(video_path))
241
+ if not cap.isOpened():
242
+ log_message(f"[ERROR] Failed to open video file: {video_path}")
243
+ return 0
244
+ video_fps = cap.get(cv2.CAP_PROP_FPS)
245
+ # log_message(f"[DEBUG] Video FPS: {video_fps}")
246
+ if not video_fps or video_fps <= 0:
247
+ video_fps = 30 # fallback if FPS is not available
248
+ log_message(f"[WARN] Using fallback FPS: {video_fps}")
249
+ frame_interval = int(round(video_fps / fps))
250
+ # log_message(f"[DEBUG] Frame interval: {frame_interval}")
251
+ frame_idx = 0
252
+ saved_idx = 1
253
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
254
+ log_message(f"[DEBUG] Total frames in video: {total_frames}")
255
+ while cap.isOpened():
256
+ ret, frame = cap.read()
257
+ if not ret:
258
+ # log_message(f"[DEBUG] No more frames to read at frame_idx {frame_idx}.")
259
+ break
260
+ if frame_idx % frame_interval == 0:
261
+ frame_name = f"{saved_idx:04d}.png"
262
+ cv2.imwrite(str(Path(output_dir) / frame_name), frame)
263
+ # log_message(f"[DEBUG] Saved frame {frame_idx} as {frame_name}")
264
+ saved_idx += 1
265
+ frame_idx += 1
266
+ cap.release()
267
+ log_message(f"Extracted {saved_idx-1} frames from {video_path} to {output_dir}")
268
+ return saved_idx - 1
269
+
270
+ # --- Cursor Tracking Utilities ---
271
  def to_rgb(img):
272
  if img is None:
273
  return None
 
279
 
280
  def get_mask_from_alpha(template_img):
281
  if template_img is not None and len(template_img.shape) == 3 and template_img.shape[2] == 4:
282
+ # Use alpha channel as mask (nonzero alpha = 255)
283
  return (template_img[:, :, 3] > 0).astype(np.uint8) * 255
284
  return None
285
 
286
+ def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=CURSOR_THRESHOLD):
287
+ """Detect cursor position in a frame using multiple templates. Returns best match above threshold."""
288
  best_pos = None
289
  best_conf = -1
290
  best_template_name = None
 
293
  template_rgb = to_rgb(cursor_template)
294
  mask = get_mask_from_alpha(cursor_template)
295
  if template_rgb is None or frame_rgb is None or template_rgb.shape[2] != frame_rgb.shape[2]:
296
+ log_message(f"[WARN] Skipping template {template_name} due to channel mismatch or load error.")
297
  continue
298
  try:
299
  result = cv2.matchTemplate(frame_rgb, template_rgb, cv2.TM_CCOEFF_NORMED, mask=mask)
300
  except Exception as e:
301
+ log_message(f"[WARN] matchTemplate failed for {template_name}: {e}")
302
  continue
303
+ min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
304
  if max_val > best_conf:
305
  best_conf = max_val
306
  if max_val >= threshold:
 
313
  return best_pos, best_conf, best_template_name
314
  return None, best_conf, None
315
 
 
 
 
316
  def send_email_with_attachment(subject, body, to_email, from_email, app_password, attachment_path):
317
  msg = EmailMessage()
318
+ msg["Subject"] = subject
319
+ msg["From"] = from_email
320
+ msg["To"] = to_email
321
  msg.set_content(body)
322
+ with open(attachment_path, "rb") as f:
323
  file_data = f.read()
324
  file_name = Path(attachment_path).name
325
+ msg.add_attachment(file_data, maintype="application", subtype="octet-stream", filename=file_name)
326
  try:
327
+ with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp:
328
  smtp.login(from_email, app_password)
329
  smtp.send_message(msg)
330
+ log_message(f"[SUCCESS] Email sent to {to_email}")
331
  except Exception as e:
332
+ log_message(f"[ERROR] Failed to send email: {e}")
 
 
 
 
 
 
 
 
 
 
 
333
 
334
+ def track_cursor(frames_dir, cursor_templates_dir, output_json_path, threshold=CURSOR_THRESHOLD, start_frame=1, email_results=False):
335
+ """Detect cursor in each frame using multiple templates, print status, and write positions to a JSON file."""
336
+ log_message(f"[INFO] Tracking cursors in {frames_dir}...")
337
+ frames_dir = Path(frames_dir).resolve()
338
+ output_json_path = Path(output_json_path).resolve()
339
  cursor_templates_dir = Path(cursor_templates_dir).resolve()
340
+ # log_message(f"[DEBUG] frames_dir: {frames_dir}")
341
+ # log_message(f"[DEBUG] cursor_templates_dir: {cursor_templates_dir}")
342
+ # log_message(f"[DEBUG] output_json_path: {output_json_path}")
343
+ ensure_dir(frames_dir)
344
+ ensure_dir(output_json_path.parent)
345
+ # Load all PNG templates from the cursor_templates_dir
346
  cursor_templates = {}
347
+ for template_file in cursor_templates_dir.glob("*.png"):
348
  template_img = cv2.imread(str(template_file), cv2.IMREAD_UNCHANGED)
349
  if template_img is not None:
350
  cursor_templates[template_file.name] = template_img
351
  else:
352
+ log_message(f"[WARN] Could not load template: {template_file}")
353
  if not cursor_templates:
354
+ log_message(f"[ERROR] No cursor templates found in: {cursor_templates_dir}")
355
+ return 0
356
  results = []
357
+ tracked_count = 0
358
+ for frame_file in sorted(frames_dir.glob("*.png")):
359
+ frame_num = int(frame_file.stem)
360
+ if frame_num < start_frame:
361
+ continue
362
+ frame = cv2.imread(str(frame_file), cv2.IMREAD_UNCHANGED)
363
  if frame is None:
364
+ log_message(f"[WARN] Could not load frame: {frame_file}")
365
  continue
 
366
  pos, conf, template_name = detect_cursor_in_frame_multi(frame, cursor_templates, threshold)
367
  if pos is not None:
368
+ # log_message(f"{frame_file.name}: Cursor at {pos} (template: {template_name})")
369
  results.append({
370
+ "frame": frame_file.name,
371
+ "cursor_active": True,
372
+ "x": pos[0],
373
+ "y": pos[1],
374
+ "confidence": conf,
375
+ "template": template_name
376
  })
377
+ tracked_count += 1
378
  else:
379
+ # log_message(f"{frame_file.name}: Cursor disabled")
380
  results.append({
381
+ "frame": frame_file.name,
382
+ "cursor_active": False,
383
+ "x": None,
384
+ "y": None,
385
+ "confidence": conf,
386
+ "template": None
387
  })
388
+ try:
389
+ with open(output_json_path, "w") as f:
390
+ json.dump(results, f, indent=2)
391
+ log_message(f"[SUCCESS] Cursor tracking results saved to {output_json_path}")
392
+ if email_results:
393
+ log_message("[INFO] Preparing to email results...")
394
+ to_email = os.environ.get("TO_EMAIL")
395
+ from_email = os.environ.get("FROM_EMAIL")
396
+ app_password = os.environ.get("GMAIL_APP_PASSWORD")
397
+ if not (to_email and from_email and app_password):
398
+ log_message("[ERROR] Email environment variables not set. Please set TO_EMAIL, FROM_EMAIL, and GMAIL_APP_PASSWORD.")
399
+ # return tracked_count # Don't return here, just log error
400
+ else:
401
+ send_email_with_attachment(
402
+ subject="Cursor Tracking Results",
403
+ body="See attached JSON results.",
404
+ to_email=to_email,
405
+ from_email=from_email,
406
+ app_password=app_password,
407
+ attachment_path=output_json_path
408
+ )
409
+ except Exception as e:
410
+ log_message(f"[ERROR] Failed to write output JSON: {e}")
411
+ # raise # Don't raise, just log error
412
+ return tracked_count
413
+
414
+ def process_rar_file(rar_path: str) -> bool:
415
+ """Process a single RAR file - extract, then process videos for frames and cursor tracking"""
416
+ filename = os.path.basename(rar_path)
417
+ processing_status["current_file"] = filename
418
+
419
+ # Handle multi-part RAR naming
420
+ if is_multipart_rar(filename):
421
+ course_name = get_rar_part_base(filename)
422
+ else:
423
+ course_name = filename.replace(".rar", "")
424
+
425
+ extract_dir = os.path.join(EXTRACT_FOLDER, course_name)
426
+
427
+ try:
428
+ log_message(f"πŸ”„ Processing: {filename}")
429
+
430
+ # Clean up any existing directory
431
+ if os.path.exists(extract_dir):
432
+ shutil.rmtree(extract_dir, ignore_errors=True)
433
+
434
+ # Extract RAR
435
+ os.makedirs(extract_dir, exist_ok=True)
436
+ if not extract_with_retry(rar_path, extract_dir):
437
+ raise Exception("RAR extraction failed")
438
+
439
+ # Count extracted files
440
+ file_count = 0
441
+ video_files_found = []
442
+ for root, dirs, files in os.walk(extract_dir):
443
+ for file in files:
444
+ file_count += 1
445
+ if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
446
+ video_files_found.append(os.path.join(root, file))
447
+
448
+ processing_status["extracted_courses"] += 1
449
+ log_message(f"βœ… Successfully extracted \'{course_name}\' ({file_count} files, {len(video_files_found)} videos)")
450
 
451
+ # Process video files for frame extraction and cursor tracking
452
+ for video_path in video_files_found:
453
+ video_filename = Path(video_path).name
454
+ # Create a unique output directory for frames for each video
455
+ frames_output_dir = os.path.join(FRAMES_OUTPUT_FOLDER, f"{course_name}_{video_filename.replace('.', '_')}_frames")
456
+ ensure_dir(frames_output_dir)
457
+
458
+ extracted_frames_count = extract_frames(video_path, frames_output_dir, fps=DEFAULT_FPS)
459
+ processing_status["extracted_frames_count"] += extracted_frames_count
460
+ if extracted_frames_count > 0:
461
+ processing_status["extracted_videos"] += 1
462
+ log_message(f"[INFO] Extracted {extracted_frames_count} frames from {video_filename}")
463
+
464
+ # Perform cursor tracking on the extracted frames
465
+ cursor_output_json = os.path.join(CURSOR_TRACKING_OUTPUT_FOLDER, f"{course_name}_{video_filename.replace('.', '_')}_cursor_data.json")
466
+ tracked_cursors = track_cursor(frames_output_dir, CURSOR_TEMPLATES_DIR, cursor_output_json, threshold=CURSOR_THRESHOLD)
467
+ processing_status["tracked_cursors_count"] += tracked_cursors
468
+ log_message(f"[INFO] Tracked {tracked_cursors} cursors in frames from {video_filename}")
469
+ else:
470
+ log_message(f"[WARN] No frames extracted from {video_filename}")
471
+
472
+ return True
473
+
474
+ except Exception as e:
475
+ error_msg = str(e)
476
+ log_message(f"❌ Processing failed: {error_msg}")
477
+ log_failed_file(filename, error_msg)
478
+ return False
479
+
480
+ finally:
481
+ processing_status["current_file"] = None
482
 
483
+ def main_processing_loop():
484
+ """Main processing workflow - extraction, frame extraction, and cursor tracking"""
485
+ processing_status["is_running"] = True
486
+
487
+ try:
488
+ # Load state
489
+ processed_rars = load_json_state(PROCESS_STATE_FILE, {"processed_rars": []})["processed_rars"]
490
+ download_state = load_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": 0})
491
+ next_index = download_state["next_download_index"]
492
+
493
+ log_message(f"πŸ“Š Starting from index {next_index}")
494
+ log_message(f"πŸ“Š Previously processed: {len(processed_rars)} files")
495
+
496
+ # Get file list
497
  try:
498
+ files = list(hf_api.list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset"))
499
+ rar_files = sorted([f for f in files if f.endswith(".rar")])
500
+
501
+ processing_status["total_files"] = len(rar_files)
502
+ log_message(f"πŸ“ Found {len(rar_files)} RAR files in repository")
503
+
504
+ if next_index >= len(rar_files):
505
+ log_message("βœ… All files have been processed!")
506
+ return
507
+
508
  except Exception as e:
509
+ log_message(f"❌ Failed to get file list: {str(e)}")
 
 
 
 
 
 
 
 
 
510
  return
511
+
512
+ # Process only one file per run
513
+ if next_index < len(rar_files):
514
+ rar_file = rar_files[next_index]
515
+ filename = os.path.basename(rar_file)
516
+
517
+ if filename in processed_rars:
518
+ log_message(f"⏭️ Skipping already processed: {filename}")
519
+ processing_status["processed_files"] += 1
520
+ # Move to next file
521
+ next_index += 1
522
+ save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
523
+ log_message(f"πŸ“Š Moving to next file. Progress: {next_index}/{len(rar_files)}")
524
+ return
525
+
526
+ log_message(f"πŸ“₯ Downloading: {filename}")
527
+ dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
528
+
529
+ # Download file
530
+ download_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{rar_file}"
531
+ if download_with_retry(download_url, dest_path):
532
+ # Process file
533
+ if process_rar_file(dest_path):
534
+ processed_rars.append(filename)
535
+ save_json_state(PROCESS_STATE_FILE, {"processed_rars": processed_rars})
536
+ log_message(f"βœ… Successfully processed: {filename}")
537
+ processing_status["processed_files"] += 1
538
+ else:
539
+ log_message(f"❌ Failed to process: {filename}")
540
+ processing_status["failed_files"] += 1
541
+
542
+ # Clean up downloaded file
543
+ try:
544
+ os.remove(dest_path)
545
+ log_message(f"πŸ—‘οΈ Cleaned up download: {filename}")
546
+ except:
547
+ pass
548
+ else:
549
+ log_message(f"❌ Failed to download: {filename}")
550
+ processing_status["failed_files"] += 1
551
+
552
+ # Update download state for next run
553
+ next_index += 1
554
+ save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
555
+
556
+ # Status update
557
+ log_message(f"πŸ“Š Progress: {next_index}/{len(rar_files)} files processed")
558
+ log_message(f'πŸ“Š Extracted: {processing_status["extracted_courses"]} courses')
559
+ log_message(f'πŸ“Š Videos Processed: {processing_status["extracted_videos"]}')
560
+ log_message(f'πŸ“Š Frames Extracted: {processing_status["extracted_frames_count"]}')
561
+ log_message(f'πŸ“Š Cursors Tracked: {processing_status["tracked_cursors_count"]}')
562
+ log_message(f'πŸ“Š Failed: {processing_status["failed_files"]} files')
563
+
564
+ if next_index < len(rar_files):
565
+ log_message(f"πŸ”„ Run the script again to process the next file: {os.path.basename(rar_files[next_index])}")
566
+ else:
567
+ log_message("πŸŽ‰ All files have been processed!")
568
+ else:
569
+ log_message("βœ… All files have been processed!")
570
+
571
+ log_message("πŸŽ‰ Processing complete!")
572
+ log_message(f'πŸ“Š Final stats: {processing_status["extracted_courses"]} courses extracted, {processing_status["extracted_videos"]} videos processed, {processing_status["extracted_frames_count"]} frames extracted, {processing_status["tracked_cursors_count"]} cursors tracked')
573
+
574
+ except KeyboardInterrupt:
575
+ log_message("⏹️ Processing interrupted by user")
576
+ except Exception as e:
577
+ log_message(f"❌ Fatal error: {str(e)}")
578
+ finally:
579
+ processing_status["is_running"] = False
580
+ cleanup_temp_files()
581
+
582
+ # Expose necessary functions and variables for download_api.py
583
+ __all__ = [
584
+ "main_processing_loop",
585
+ "processing_status",
586
+ "CURSOR_TRACKING_OUTPUT_FOLDER",
587
+ "CURSOR_TEMPLATES_DIR",
588
+ "log_message",
589
+ "send_email_with_attachment",
590
+ "track_cursor",
591
+ "extract_frames",
592
+ "DEFAULT_FPS",
593
+ "CURSOR_THRESHOLD",
594
+ "ensure_dir"
595
+ ]
596
+
597
+