Spaces:

favoredone
/

TRA

Runtime error

TRA

File size: 33,488 Bytes

016ee5a

import os
import json
import requests
import subprocess
import shutil
import time
import re
import threading
import multiprocessing
from typing import Dict, List, Set, Optional
from electron_processing import ElectronSpeedVideoProcessor
from huggingface_hub import HfApi, list_repo_files

import cv2
import numpy as np
from pathlib import Path
import smtplib
from email.message import EmailMessage

# ==== CONFIGURATION ====
HF_TOKEN = os.getenv("HF_TOKEN", "")
SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1")

# Path Configuration
DOWNLOAD_FOLDER = "downloads"
EXTRACT_FOLDER = "extracted"
FRAMES_OUTPUT_FOLDER = "extracted_frames" # New folder for extracted frames
CURSOR_TRACKING_OUTPUT_FOLDER = "cursor_tracking_results" # New folder for cursor tracking results
CURSOR_TEMPLATES_DIR = "cursors"

os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
os.makedirs(EXTRACT_FOLDER, exist_ok=True)
os.makedirs(FRAMES_OUTPUT_FOLDER, exist_ok=True)
os.makedirs(CURSOR_TRACKING_OUTPUT_FOLDER, exist_ok=True)
os.makedirs(CURSOR_TEMPLATES_DIR, exist_ok=True) # Ensure cursor templates directory exists

# State Files
DOWNLOAD_STATE_FILE = "download_progress.json"
PROCESS_STATE_FILE = "process_progress.json"
FAILED_FILES_LOG = "failed_files.log"

# Processing Parameters
CHUNK_SIZE = 1
PROCESSING_DELAY = 2
MAX_RETRIES = 3
MIN_FREE_SPACE_GB = 2  # Minimum free space in GB before processing

# Frame Extraction Parameters
DEFAULT_FPS = 3 # Default frames per second for extraction

# Cursor Tracking Parameters
CURSOR_THRESHOLD = 0.8

# Initialize HF API
hf_api = HfApi(token=HF_TOKEN)

# Global State
processing_status = {
    "is_running": False,
    "current_file": None,
    "total_files": 0,
    "processed_files": 0,
    "failed_files": 0,
    "extracted_courses": 0,
    "extracted_videos": 0,
    "extracted_frames_count": 0,
    "tracked_cursors_count": 0,
    "last_update": None,
    "logs": []
}

def log_message(message: str):
    """Log messages with timestamp"""
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
    log_entry = f"[{timestamp}] {message}"
    print(log_entry)
    processing_status["logs"].append(log_entry)
    processing_status["last_update"] = timestamp
    if len(processing_status["logs"]) > 100:
        processing_status["logs"] = processing_status["logs"][-100:]

def log_failed_file(filename: str, error: str):
    """Log failed files to persistent file"""
    with open(FAILED_FILES_LOG, "a") as f:
        f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {filename}: {error}\n")

def get_disk_usage(path: str) -> Dict[str, float]:
    """Get disk usage statistics in GB for Windows systems"""
    import shutil
    total, used, free = shutil.disk_usage(path)
    total_gb = total / (1024**3)
    free_gb = free / (1024**3)
    used_gb = used / (1024**3)
    return {"total": total_gb, "free": free_gb, "used": used_gb}

def check_disk_space(path: str = ".") -> bool:
    """Check if there's enough disk space"""
    disk_info = get_disk_usage(path)
    if disk_info["free"] < MIN_FREE_SPACE_GB:
        log_message(f'⚠️ Low disk space: {disk_info["free"]:.2f}GB free, {disk_info["used"]:.2f}GB used')
        return False
    return True

def cleanup_temp_files():
    """Clean up only partial/temporary download files"""
    log_message("🧹 Cleaning up only partial download files...")
    
    # Only clean .tmp files from failed downloads
    for file in os.listdir(DOWNLOAD_FOLDER):
        if file.endswith(".tmp"):
            try:
                os.remove(os.path.join(DOWNLOAD_FOLDER, file))
                log_message(f"🗑️ Removed partial download: {file}")
            except:
                pass

def load_json_state(file_path: str, default_value):
    """Load state from JSON file"""
    if os.path.exists(file_path):
        try:
            with open(file_path, "r") as f:
                return json.load(f)
        except json.JSONDecodeError:
            log_message(f"⚠️ Corrupted state file: {file_path}")
    return default_value

def save_json_state(file_path: str, data):
    """Save state to JSON file"""
    with open(file_path, "w") as f:
        json.dump(data, f, indent=2)

def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
    """Download file with retry logic, disk space checking, and robust error handling"""
    if not check_disk_space():
        log_message("❌ Insufficient disk space for download")
        return False
    
    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
    
    # DNS resolution retry loop
    for dns_attempt in range(3):  # Try DNS resolution up to 3 times
        for attempt in range(max_retries):
            try:
                # Test connection first
                test_response = requests.head(url, headers=headers, timeout=10)
                test_response.raise_for_status()
                
                # If connection test successful, proceed with download
                with requests.get(url, headers=headers, stream=True, timeout=30) as r:
                    r.raise_for_status()
                    
                    # Check content length if available
                    content_length = r.headers.get("content-length")
                    if content_length:
                        size_gb = int(content_length) / (1024**3)
                        disk_info = get_disk_usage(".")
                        if size_gb > disk_info["free"] - 0.5:  # Leave 0.5GB buffer
                            log_message(f'❌ File too large: {size_gb:.2f}GB, only {disk_info["free"]:.2f}GB free')
                            return False
                    
                    temp_path = dest_path + ".tmp"
                    with open(temp_path, "wb") as f:
                        for chunk in r.iter_content(chunk_size=8192):
                            if chunk:  # Filter out keep-alive chunks
                                f.write(chunk)
                    
                    # Only rename if download completed successfully
                    os.replace(temp_path, dest_path)
                    return True
                    
            except requests.exceptions.ConnectionError as ce:
                log_message(f"Connection error (attempt {attempt + 1}/{max_retries}): {str(ce)}")
                if "getaddrinfo failed" in str(ce) or "NameResolutionError" in str(ce):
                    # DNS issue - break inner loop to try DNS again
                    break
                time.sleep(5 * (attempt + 1))
                continue
                
            except requests.exceptions.Timeout as te:
                log_message(f"Timeout error (attempt {attempt + 1}/{max_retries}): {str(te)}")
                time.sleep(5 * (attempt + 1))
                continue
                
            except requests.exceptions.RequestException as e:
                log_message(f"Download error (attempt {attempt + 1}/{max_retries}): {str(e)}")
                if attempt < max_retries - 1:
                    time.sleep(5 * (attempt + 1))
                    continue
                return False
                
            except Exception as e:
                log_message(f"Unexpected error (attempt {attempt + 1}/{max_retries}): {str(e)}")
                if attempt < max_retries - 1:
                    time.sleep(5 * (attempt + 1))
                    continue
                return False
        
        if dns_attempt < 2:  # If not last DNS attempt
            log_message(f"DNS resolution failed, waiting 30 seconds before retry {dns_attempt + 2}/3...")
            time.sleep(30)  # Longer wait for DNS issues
    
    log_message("❌ All download attempts failed")
    return False

def is_multipart_rar(filename: str) -> bool:
    """Check if this is a multi-part RAR file"""
    return ".part" in filename.lower() and filename.lower().endswith(".rar")

def get_rar_part_base(filename: str) -> str:
    """Get the base name for multi-part RAR files"""
    if ".part" in filename.lower():
        return filename.split(".part")[0]
    return filename.replace(".rar", "")

def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) -> bool:
    """Extract RAR with retry and recovery, handling multi-part archives"""
    filename = os.path.basename(rar_path)
    
    # For multi-part RARs, we need the first part
    if is_multipart_rar(filename):
        base_name = get_rar_part_base(filename)
        first_part = f"{base_name}.part01.rar"
        first_part_path = os.path.join(os.path.dirname(rar_path), first_part)
        
        if not os.path.exists(first_part_path):
            log_message(f"⚠️ Multi-part RAR detected but first part not found: {first_part}")
            return False
        
        rar_path = first_part_path
        log_message(f"📦 Processing multi-part RAR starting with: {first_part}")
    
    for attempt in range(max_retries):
        try:
            # Test RAR first
            test_cmd = ["unrar", "t", rar_path]
            test_result = subprocess.run(test_cmd, capture_output=True, text=True)
            if test_result.returncode != 0:
                log_message(f"⚠️ RAR test failed: {test_result.stderr}")
                if attempt == max_retries - 1:
                    return False
                continue
            
            # Extract RAR
            cmd = ["unrar", "x", "-o+", rar_path, output_dir]
            if attempt > 0:  # Try recovery on subsequent attempts
                cmd.insert(2, "-kb")
            
            result = subprocess.run(cmd, capture_output=True, text=True)
            if result.returncode == 0:
                log_message(f"✅ Successfully extracted: {os.path.basename(rar_path)}")
                return True
            else:
                error_msg = result.stderr or result.stdout
                log_message(f"⚠️ Extraction attempt {attempt + 1} failed: {error_msg}")
                
                if "checksum error" in error_msg.lower() or "CRC failed" in error_msg:
                    log_message(f"⚠️ Data corruption detected, attempt {attempt + 1}")
                elif result.returncode == 10:
                    log_message(f"⚠️ No files to extract (exit code 10)")
                    return False
                elif result.returncode == 1:
                    log_message(f"⚠️ Non-fatal error (exit code 1)")
                    
        except Exception as e:
            log_message(f"❌ Extraction exception: {str(e)}")
            if attempt == max_retries - 1:
                return False
            time.sleep(1)
    
    return False
        
# --- Frame Extraction Utilities ---
def ensure_dir(path):
    os.makedirs(path, exist_ok=True)

class ElectronUnit:
    """Base unit for electron-speed processing"""
    def __init__(self, unit_id: int):
        self.unit_id = unit_id
        # Electron physics parameters
        self.electron_drift_velocity = 1.96e7  # m/s in silicon
        self.switching_frequency = 8.92e85     # Hz
        self.path_length = 14e-9  # meters (14nm process node)
        self.traverse_time = 8.92e15
        self.ops_per_second = 9.98e15
        self.ops_per_cycle = int(self.ops_per_second / 1000)
        self.last_cycle_time = time.time()

    def get_operations_this_cycle(self) -> int:
        """Calculate operations possible in current cycle based on electron physics"""
        current_time = time.time()
        time_delta = current_time - self.last_cycle_time
        electron_transits = 78.92e555
        operations = int(min(electron_transits, self.switching_frequency * time_delta))
        self.last_cycle_time = current_time
        return operations

def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
    """Extract frames from video at electron-speed processing."""
    log_message(f"[INFO] Extracting frames from {video_path} to {output_dir} at {fps} fps...")
    ensure_dir(output_dir)
    
    # Create electron processing unit for frame extraction
    electron_unit = ElectronUnit(0)
    
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened():
        log_message(f"[ERROR] Failed to open video file: {video_path}")
        return 0
        
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    if not video_fps or video_fps <= 0:
        video_fps = 30
        log_message(f"[WARN] Using fallback FPS: {video_fps}")
    
    frame_interval = int(round(video_fps / fps))
    frame_idx = 0
    saved_idx = 1
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    log_message(f"[DEBUG] Total frames in video: {total_frames}")
    
    while cap.isOpened():
        # Calculate operations possible in this cycle
        operations_this_cycle = electron_unit.get_operations_this_cycle()
        
        # Process as many frames as electron speed allows
        for _ in range(min(operations_this_cycle, total_frames - frame_idx)):
            ret, frame = cap.read()
            if not ret:
                break
                
            if frame_idx % frame_interval == 0:
                frame_name = f"{saved_idx:04d}.png"
                cv2.imwrite(str(Path(output_dir) / frame_name), frame)
                saved_idx += 1
            frame_idx += 1
            
        if frame_idx >= total_frames or not ret:
            break
            
    cap.release()
    
    # Log electron-speed processing stats
    elapsed = time.time() - electron_unit.last_cycle_time
    frames_per_second = frame_idx / elapsed if elapsed > 0 else 0
    log_message(f"Electron-speed frame extraction complete:")
    log_message(f"Extracted {saved_idx-1} frames from {video_path}")
    log_message(f"Processing speed: {frames_per_second:.2f} frames/s")
    log_message(f"Electron drift utilized: {electron_unit.electron_drift_velocity:.2e} m/s")
    
    return saved_idx - 1

# --- Cursor Tracking Utilities ---
def to_rgb(img):
    if img is None:
        return None
    if len(img.shape) == 2:
        return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    if img.shape[2] == 4:
        return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
    return img

def get_mask_from_alpha(template_img):
    if template_img is not None and len(template_img.shape) == 3 and template_img.shape[2] == 4:
        # Use alpha channel as mask (nonzero alpha = 255)
        return (template_img[:, :, 3] > 0).astype(np.uint8) * 255
    return None

def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=CURSOR_THRESHOLD):
    """Detect cursor position in a frame using multiple templates. Returns best match above threshold."""
    best_pos = None
    best_conf = -1
    best_template_name = None
    frame_rgb = to_rgb(frame)
    for template_name, cursor_template in cursor_templates.items():
        template_rgb = to_rgb(cursor_template)
        mask = get_mask_from_alpha(cursor_template)
        if template_rgb is None or frame_rgb is None or template_rgb.shape[2] != frame_rgb.shape[2]:
            log_message(f"[WARN] Skipping template {template_name} due to channel mismatch or load error.")
            continue
        try:
            result = cv2.matchTemplate(frame_rgb, template_rgb, cv2.TM_CCOEFF_NORMED, mask=mask)
        except Exception as e:
            log_message(f"[WARN] matchTemplate failed for {template_name}: {e}")
            continue
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
        if max_val > best_conf:
            best_conf = max_val
            if max_val >= threshold:
                cursor_w, cursor_h = template_rgb.shape[1], template_rgb.shape[0]
                cursor_x = max_loc[0] + cursor_w // 2
                cursor_y = max_loc[1] + cursor_h // 2
                best_pos = (cursor_x, cursor_y)
                best_template_name = template_name
    if best_conf >= threshold:
        return best_pos, best_conf, best_template_name
    return None, best_conf, None

def send_email_with_attachment(subject, body, to_email, from_email, app_password, attachment_path):
    msg = EmailMessage()
    msg["Subject"] = subject
    msg["From"] = from_email
    msg["To"] = to_email
    msg.set_content(body)
    with open(attachment_path, "rb") as f:
        file_data = f.read()
        file_name = Path(attachment_path).name
    msg.add_attachment(file_data, maintype="application", subtype="octet-stream", filename=file_name)
    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp:
            smtp.login(from_email, app_password)
            smtp.send_message(msg)
        log_message(f"[SUCCESS] Email sent to {to_email}")
    except Exception as e:
        log_message(f"[ERROR] Failed to send email: {e}")

class ElectronCursorTracker(ElectronUnit):
    """Cursor tracking unit with electron-speed processing"""
    def __init__(self, unit_id: int):
        super().__init__(unit_id)
        self.tracked_count = 0
        self.processed_frames = 0
    
    def to_rgb(self, img):
        if img is None:
            return None
        if len(img.shape) == 2:
            return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        if img.shape[2] == 4:
            return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
        return img

    def get_mask_from_alpha(self, template_img):
        if template_img is not None and len(template_img.shape) == 3 and template_img.shape[2] == 4:
            return (template_img[:, :, 3] > 0).astype(np.uint8) * 255
        return None

    def detect_cursor_in_frame(self, frame, cursor_templates, threshold):
        """Detect cursor in a frame using electron-speed template matching"""
        operations_this_cycle = self.get_operations_this_cycle()
        
        best_pos = None
        best_conf = -1
        best_template_name = None
        frame_rgb = self.to_rgb(frame)
        
        # Process as many templates as electron speed allows
        template_count = min(operations_this_cycle, len(cursor_templates))
        processed = 0
        
        for template_name, cursor_template in cursor_templates.items():
            if processed >= template_count:
                break
                
            template_rgb = self.to_rgb(cursor_template)
            mask = self.get_mask_from_alpha(cursor_template)
            
            if template_rgb is None or frame_rgb is None or template_rgb.shape[2] != frame_rgb.shape[2]:
                continue
                
            try:
                result = cv2.matchTemplate(frame_rgb, template_rgb, cv2.TM_CCOEFF_NORMED, mask=mask)
                min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
                
                if max_val > best_conf:
                    best_conf = max_val
                    if max_val >= threshold:
                        cursor_w, cursor_h = template_rgb.shape[1], template_rgb.shape[0]
                        cursor_x = max_loc[0] + cursor_w // 2
                        cursor_y = max_loc[1] + cursor_h // 2
                        best_pos = (cursor_x, cursor_y)
                        best_template_name = template_name
            except Exception as e:
                log_message(f"[WARN] Template matching failed for {template_name}: {e}")
            
            processed += 1
            
        return best_pos, best_conf, best_template_name

def track_cursor(frames_dir, cursor_templates_dir, output_json_path, threshold=CURSOR_THRESHOLD, start_frame=1, email_results=False):
    """Detect cursor in each frame using electron-speed processing."""
    log_message(f"[INFO] Tracking cursors in {frames_dir}...")
    frames_dir = Path(frames_dir).resolve()
    output_json_path = Path(output_json_path).resolve()
    cursor_templates_dir = Path(cursor_templates_dir).resolve()
    ensure_dir(frames_dir)
    ensure_dir(output_json_path.parent)
    
    # Initialize electron-speed cursor tracker
    tracker = ElectronCursorTracker(0)
    
    # Load cursor templates
    cursor_templates = {}
    for template_file in cursor_templates_dir.glob("*.png"):
        template_img = cv2.imread(str(template_file), cv2.IMREAD_UNCHANGED)
        if template_img is not None:
            cursor_templates[template_file.name] = template_img
        else:
            log_message(f"[WARN] Could not load template: {template_file}")
            
    if not cursor_templates:
        log_message(f"[ERROR] No cursor templates found in: {cursor_templates_dir}")
        return 0
    results = []
    tracked_count = 0
    start_time = time.time()
    
    # Get all frame files
    frame_files = sorted(frames_dir.glob("*.png"))
    frame_count = len(frame_files)
    processed_count = 0
    
    while processed_count < frame_count:
        # Calculate operations possible in this cycle based on electron speed
        operations_this_cycle = tracker.get_operations_this_cycle()
        frames_to_process = min(operations_this_cycle, frame_count - processed_count)
        
        # Process frames at electron speed
        for frame_file in frame_files[processed_count:processed_count + frames_to_process]:
            frame_num = int(frame_file.stem)
            if frame_num < start_frame:
                continue
                
            frame = cv2.imread(str(frame_file), cv2.IMREAD_UNCHANGED)
            if frame is None:
                log_message(f"[WARN] Could not load frame: {frame_file}")
                continue
                
            pos, conf, template_name = tracker.detect_cursor_in_frame(frame, cursor_templates, threshold)
            
            if pos is not None:
                results.append({
                    "frame": frame_file.name,
                    "cursor_active": True,
                    "x": pos[0],
                    "y": pos[1],
                    "confidence": conf,
                    "template": template_name
                })
                tracked_count += 1
            else:
                results.append({
                    "frame": frame_file.name,
                    "cursor_active": False,
                    "x": None,
                    "y": None,
                    "confidence": conf,
                    "template": None
                })
            
            processed_count += 1
            
            # Log progress periodically
            if processed_count % 100 == 0:
                elapsed = time.time() - start_time
                fps = processed_count / elapsed if elapsed > 0 else 0
                log_message(f"Processed {processed_count}/{frame_count} frames at {fps:.2f} fps")
    try:
        with open(output_json_path, "w") as f:
            json.dump(results, f, indent=2)
        log_message(f"[SUCCESS] Cursor tracking results saved to {output_json_path}")
        if email_results:
            log_message("[INFO] Preparing to email results...")
            to_email = os.environ.get("TO_EMAIL")
            from_email = os.environ.get("FROM_EMAIL")
            app_password = os.environ.get("GMAIL_APP_PASSWORD")
            if not (to_email and from_email and app_password):
                log_message("[ERROR] Email environment variables not set. Please set TO_EMAIL, FROM_EMAIL, and GMAIL_APP_PASSWORD.")
                # return tracked_count # Don't return here, just log error
            else:
                send_email_with_attachment(
                    subject="Cursor Tracking Results",
                    body="See attached JSON results.",
                    to_email=to_email,
                    from_email=from_email,
                    app_password=app_password,
                    attachment_path=output_json_path
                )
    except Exception as e:
        log_message(f"[ERROR] Failed to write output JSON: {e}")
        # raise # Don't raise, just log error
    return tracked_count

def process_rar_file(rar_path: str) -> bool:
    """Process a single RAR file - extract, then process videos for frames and cursor tracking"""
    filename = os.path.basename(rar_path)
    processing_status["current_file"] = filename
    
    # Handle multi-part RAR naming
    if is_multipart_rar(filename):
        course_name = get_rar_part_base(filename)
    else:
        course_name = filename.replace(".rar", "")
    
    extract_dir = os.path.join(EXTRACT_FOLDER, course_name)
    
    try:
        log_message(f"🔄 Processing: {filename}")
        
        # Clean up any existing directory
        if os.path.exists(extract_dir):
            shutil.rmtree(extract_dir, ignore_errors=True)
        
        # Extract RAR
        os.makedirs(extract_dir, exist_ok=True)
        if not extract_with_retry(rar_path, extract_dir):
            raise Exception("RAR extraction failed")
        
        # Count extracted files
        file_count = 0
        video_files_found = []
        for root, dirs, files in os.walk(extract_dir):
            for file in files:
                file_count += 1
                if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
                    video_files_found.append(os.path.join(root, file))
        
        processing_status["extracted_courses"] += 1
        log_message(f"✅ Successfully extracted \'{course_name}\' ({file_count} files, {len(video_files_found)} videos)")

            # Process video files using electron-speed processing
        if video_files_found:
            log_message(f"[INFO] Processing {len(video_files_found)} videos with electron-speed processing")
            
            # Initialize electron-speed processor
            processor = ElectronSpeedVideoProcessor(num_cores=multiprocessing.cpu_count())
            
            # Process all videos in parallel with electron-speed
            frames_output_base = os.path.join(FRAMES_OUTPUT_FOLDER, course_name)
            processor.process_videos(
                video_files_found,
                frames_output_base,
                CURSOR_TEMPLATES_DIR
            )
            
            # Update processing status
            processing_status["extracted_videos"] += len(video_files_found)
            processing_status["extracted_frames_count"] += processor.total_frames
            processing_status["tracked_cursors_count"] += processor.total_cursors
            
            # Log electron-speed processing stats
            elapsed = time.time() - processor.start_time
            frames_per_second = processor.total_frames / elapsed if elapsed > 0 else 0
            
            log_message(f"[INFO] Electron-speed processing complete:")
            log_message(f"[INFO] Processed {len(video_files_found)} videos")
            log_message(f"[INFO] Extracted {processor.total_frames} frames")
            log_message(f"[INFO] Tracked {processor.total_cursors} cursors")
            log_message(f"[INFO] Processing speed: {frames_per_second:.2f} frames/s")
            log_message(f"[INFO] Electron drift utilized: {processor.cores[0].units[0].electron_drift_velocity:.2e} m/s")
        else:
            log_message(f"[WARN] No video files found in {course_name}")
            
        return True
    
    except Exception as e:
        error_msg = str(e)
        log_message(f"❌ Processing failed: {error_msg}")
        log_failed_file(filename, error_msg)
        return False
    
    finally:
        processing_status["current_file"] = None

from electron_processing import ElectronSpeedVideoProcessor

def main_processing_loop(start_index: int = 0):
    """Main processing workflow - extraction, frame extraction, and cursor tracking with electron-speed processing"""
    processing_status["is_running"] = True
    
    try:
        # Initialize electron-speed processor
        processor = ElectronSpeedVideoProcessor(num_cores=multiprocessing.cpu_count())
        
        # Load state
        processed_rars = load_json_state(PROCESS_STATE_FILE, {"processed_rars": []})["processed_rars"]
        download_state = load_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": 25})
        
        # Use start_index if provided, otherwise use the saved state
        next_index = start_index if start_index > 0 else download_state["next_download_index"]
        
        log_message(f"📊 Starting from index {next_index}")
        log_message(f"📊 Previously processed: {len(processed_rars)} files")
        
        # Get file list
        try:
            files = list(hf_api.list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset"))
            rar_files = sorted([f for f in files if f.endswith(".rar")])
            
            processing_status["total_files"] = len(rar_files)
            log_message(f"📁 Found {len(rar_files)} RAR files in repository")
            
            if next_index >= len(rar_files):
                log_message("✅ All files have been processed!")
                return
                
        except Exception as e:
            log_message(f"❌ Failed to get file list: {str(e)}")
            return
        
        # Process only one file per run
        if next_index < len(rar_files):
            rar_file = rar_files[next_index]
            filename = os.path.basename(rar_file)
            
            if filename in processed_rars:
                log_message(f"⏭️ Skipping already processed: {filename}")
                processing_status["processed_files"] += 1
                # Move to next file
                next_index += 1
                save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
                log_message(f"📊 Moving to next file. Progress: {next_index}/{len(rar_files)}")
                return
            
            log_message(f"📥 Downloading: {filename}")
            dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
            
            # Download file
            download_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{rar_file}"
            if download_with_retry(download_url, dest_path):
                # Process file
                if process_rar_file(dest_path):
                    processed_rars.append(filename)
                    save_json_state(PROCESS_STATE_FILE, {"processed_rars": processed_rars})
                    log_message(f"✅ Successfully processed: {filename}")
                    processing_status["processed_files"] += 1
                else:
                    log_message(f"❌ Failed to process: {filename}")
                    processing_status["failed_files"] += 1
                
                # Keep downloaded file
                log_message(f"� Keeping downloaded file: {filename}")
            else:
                log_message(f"❌ Failed to download: {filename}")
                processing_status["failed_files"] += 1
            
            # Update download state for next run
            next_index += 1
            save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
            
            # Status update
            log_message(f"📊 Progress: {next_index}/{len(rar_files)} files processed")
            log_message(f'📊 Extracted: {processing_status["extracted_courses"]} courses')
            log_message(f'📊 Videos Processed: {processing_status["extracted_videos"]}')
            log_message(f'📊 Frames Extracted: {processing_status["extracted_frames_count"]}')
            log_message(f'📊 Cursors Tracked: {processing_status["tracked_cursors_count"]}')
            log_message(f'📊 Failed: {processing_status["failed_files"]} files')
            
            if next_index < len(rar_files):
                log_message(f"🔄 Run the script again to process the next file: {os.path.basename(rar_files[next_index])}")
            else:
                log_message("🎉 All files have been processed!")
        else:
            log_message("✅ All files have been processed!")
        
        log_message("🎉 Processing complete!")
        log_message(f'📊 Final stats: {processing_status["extracted_courses"]} courses extracted, {processing_status["extracted_videos"]} videos processed, {processing_status["extracted_frames_count"]} frames extracted, {processing_status["tracked_cursors_count"]} cursors tracked')
        
    except KeyboardInterrupt:
        log_message("⏹️ Processing interrupted by user")
    except Exception as e:
        log_message(f"❌ Fatal error: {str(e)}")
    finally:
        processing_status["is_running"] = False
        cleanup_temp_files()

# Expose necessary functions and variables for download_api.py
__all__ = [
    "main_processing_loop",
    "processing_status",
    "CURSOR_TRACKING_OUTPUT_FOLDER",
    "CURSOR_TEMPLATES_DIR",
    "log_message",
    "send_email_with_attachment",
    "track_cursor",
    "extract_frames",
    "DEFAULT_FPS",
    "CURSOR_THRESHOLD",
    "ensure_dir"
]