Spaces:

Samfredoly
/

switches

Paused

App Files Files Community

Samfredoly commited on Nov 17, 2025

Commit

445d058

verified ·

1 Parent(s): 5a560c5

Rename download_api.py to app.py

Browse files

Files changed (2) hide show

app.py +534 -0
download_api.py +0 -407

app.py ADDED Viewed

	@@ -0,0 +1,534 @@

+import os
+import json
+import requests
+import subprocess
+import shutil
+import time
+import sys
+from typing import Dict, List, Optional, Any
+from huggingface_hub import HfApi, hf_hub_url
+# Fix Unicode encoding for Windows
+if sys.platform == 'win32':
+    import io
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+# ==== CONFIGURATION ====
+HF_TOKEN = ""
+SOURCE_REPO_ID = "Fred808/BG3"  # Fetch audio files from here
+TARGET_REPO_ID = "Samfredoly/BG_Tran"  # Upload transcriptions here
+REFERENCE_REPO_ID = "Samfredoly/BG_Vid"  # Reference repo to match audio filenames
+# Path Configuration
+DOWNLOAD_FOLDER = "downloads_audio"
+TRANSCRIPTIONS_FOLDER = "transcriptions"
+LOCAL_STATE_FOLDER = ".state_audio"
+os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
+os.makedirs(TRANSCRIPTIONS_FOLDER, exist_ok=True)
+os.makedirs(LOCAL_STATE_FOLDER, exist_ok=True)
+# State Files
+FAILED_FILES_LOG = "failed_audio_files.log"
+HF_STATE_FILE = "processing_audio_state.json"
+# Processing Parameters
+PROCESSING_DELAY = 2
+MAX_RETRIES = 3
+MIN_FREE_SPACE_GB = 1
+WHISPER_MODEL = "small"  # Whisper model size
+# Initialize HF API
+hf_api = HfApi(token=HF_TOKEN)
+# Global State
+processing_status = {
+    "is_running": False,
+    "current_file": None,
+    "total_files": 0,
+    "processed_files": 0,
+    "failed_files": 0,
+    "transcribed_files": 0,
+    "last_update": None,
+    "logs": []
+}
+def log_message(message: str, level: str = "INFO"):
+    """Log messages with timestamp"""
+    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
+    log_entry = f"[{timestamp}] {level}: {message}"
+    print(log_entry)
+    processing_status["logs"].append(log_entry)
+    processing_status["last_update"] = timestamp
+    if len(processing_status["logs"]) > 100:
+        processing_status["logs"] = processing_status["logs"][-100:]
+def log_failed_file(filename: str, error: str):
+    """Log failed files to persistent file"""
+    with open(FAILED_FILES_LOG, "a") as f:
+        f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {filename}: {error}\n")
+def get_disk_usage(path: str) -> Dict[str, float]:
+    """Get disk usage statistics in GB"""
+    statvfs = os.statvfs(path)
+    total = statvfs.f_frsize * statvfs.f_blocks / (1024**3)
+    free = statvfs.f_frsize * statvfs.f_bavail / (1024**3)
+    used = total - free
+    return {"total": total, "free": free, "used": used}
+def check_disk_space(path: str = ".") -> bool:
+    """Check if there's enough disk space"""
+    disk_info = get_disk_usage(path)
+    if disk_info["free"] < MIN_FREE_SPACE_GB:
+        log_message(f'⚠️ Low disk space: {disk_info["free"]:.2f}GB free, {disk_info["used"]:.2f}GB used')
+        return False
+    return True
+def cleanup_temp_files():
+    """Clean up temporary files to free space"""
+    log_message("🧹 Cleaning up temporary files...", "INFO")
+    current_file = processing_status.get("current_file")
+    for file in os.listdir(DOWNLOAD_FOLDER):
+        if file != current_file and file.endswith((".wav", ".mp3")):
+            try:
+                os.remove(os.path.join(DOWNLOAD_FOLDER, file))
+                log_message(f"🗑️ Removed old download: {file}", "INFO")
+            except:
+                pass
+def load_json_state(file_path: str, default_value: Dict[str, Any]) -> Dict[str, Any]:
+    """Load state from JSON file with migration logic for new structure."""
+    if os.path.exists(file_path):
+        try:
+            with open(file_path, "r") as f:
+                data = json.load(f)
+                if "file_states" not in data or not isinstance(data["file_states"], dict):
+                    log_message("ℹ️ Initializing 'file_states' dictionary.", "INFO")
+                    data["file_states"] = {}
+                if "next_download_index" not in data:
+                    data["next_download_index"] = 0
+                return data
+        except json.JSONDecodeError:
+            log_message(f"⚠️ Corrupted state file: {file_path}", "WARNING")
+    return default_value
+def save_json_state(file_path: str, data: Dict[str, Any]):
+    """Save state to JSON file"""
+    with open(file_path, "w") as f:
+        json.dump(data, f, indent=2)
+def download_hf_state(repo_id: str, filename: str) -> Dict[str, Any]:
+    """Downloads the state file from Hugging Face or returns a default state."""
+    local_path = os.path.join(LOCAL_STATE_FOLDER, filename)
+    default_state = {"next_download_index": 0, "file_states": {}}
+    try:
+        files = hf_api.list_repo_files(repo_id=repo_id, repo_type="dataset")
+        if filename not in files:
+            log_message(f"ℹ️ State file {filename} not found in {repo_id}. Starting from default state.", "INFO")
+            return default_state
+        from huggingface_hub import hf_hub_download
+        hf_hub_download(
+            repo_id=repo_id,
+            filename=filename,
+            repo_type="dataset",
+            local_dir=LOCAL_STATE_FOLDER,
+            local_dir_use_symlinks=False
+        )
+        log_message(f"✅ Successfully downloaded state file from {repo_id}.", "INFO")
+        return load_json_state(local_path, default_state)
+    except Exception as e:
+        log_message(f"⚠️ Failed to download state file from Hugging Face: {str(e)}. Starting from default state.", "WARNING")
+        return default_state
+def upload_hf_state(repo_id: str, filename: str, state: Dict[str, Any]) -> bool:
+    """Uploads the state file to Hugging Face."""
+    local_path = os.path.join(LOCAL_STATE_FOLDER, filename)
+    try:
+        save_json_state(local_path, state)
+        hf_api.upload_file(
+            path_or_fileobj=local_path,
+            path_in_repo=filename,
+            repo_id=repo_id,
+            repo_type="dataset",
+            commit_message=f"Update audio processing state: next_index={state['next_download_index']}"
+        )
+        log_message(f"✅ Successfully uploaded updated state file to {repo_id}", "INFO")
+        return True
+    except Exception as e:
+        log_message(f"❌ Failed to upload state file to Hugging Face: {str(e)}", "ERROR")
+        return False
+def lock_file_for_processing(wav_filename: str, state: Dict[str, Any]) -> bool:
+    """Marks a file as 'processing' in the state file and uploads the lock."""
+    log_message(f"🔒 Attempting to lock file: {wav_filename} (Marking as 'processing')", "INFO")
+    state["file_states"][wav_filename] = "processing"
+    if upload_hf_state(TARGET_REPO_ID, HF_STATE_FILE, state):
+        log_message(f"✅ Successfully locked file: {wav_filename}", "INFO")
+        return True
+    else:
+        log_message(f"❌ Failed to upload lock for file: {wav_filename}. Aborting processing.", "ERROR")
+        if wav_filename in state["file_states"]:
+            del state["file_states"][wav_filename]
+        return False
+def unlock_file_as_processed(wav_filename: str, state: Dict[str, Any], next_index: int) -> bool:
+    """Marks a file as 'processed', updates the index, and uploads the state."""
+    log_message(f"🔓 Attempting to unlock file: {wav_filename} (Marking as 'processed')", "INFO")
+    state["file_states"][wav_filename] = "processed"
+    state["next_download_index"] = next_index
+    if upload_hf_state(TARGET_REPO_ID, HF_STATE_FILE, state):
+        log_message(f"✅ Successfully unlocked and marked as processed: {wav_filename}", "INFO")
+        return True
+    else:
+        log_message(f"❌ Failed to upload final state for file: {wav_filename}.", "ERROR")
+        return False
+def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
+    """Download file with retry logic and disk space checking"""
+    if not check_disk_space():
+        cleanup_temp_files()
+        if not check_disk_space():
+            log_message("❌ Insufficient disk space even after cleanup", "ERROR")
+            return False
+    try:
+        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+    except Exception as e:
+        log_message(f"❌ Failed to create directory for download path {os.path.dirname(dest_path)}: {str(e)}", "ERROR")
+        return False
+    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+    for attempt in range(max_retries):
+        try:
+            with requests.get(url, headers=headers, stream=True) as r:
+                r.raise_for_status()
+                with open(dest_path, "wb") as f:
+                    for chunk in r.iter_content(chunk_size=8192):
+                        if chunk:
+                            f.write(chunk)
+                log_message(f"✅ Download successful: {dest_path}", "INFO")
+                return True
+        except requests.exceptions.RequestException as e:
+            log_message(f"❌ Download attempt {attempt + 1} failed for {url}: {str(e)}", "WARNING")
+            time.sleep(PROCESSING_DELAY)
+        except Exception as e:
+            log_message(f"❌ An unexpected error occurred during download: {str(e)}", "ERROR")
+            return False
+    log_message(f"❌ Failed to download {url} after {max_retries} attempts.", "ERROR")
+    return False
+def fetch_reference_files(repo_id: str) -> Dict[str, str]:
+    """Fetch all files from Fred808/BG3 repo to match with audio filenames."""
+    log_message(f"📋 Fetching file list from {repo_id}...", "INFO")
+    try:
+        files_list = hf_api.list_repo_files(repo_id=repo_id, repo_type="dataset")
+        # Include all file types (zip, rar, wav, mp3, etc.)
+        all_files = [f for f in files_list]
+        # Create a mapping of base filename (without extension) to full path
+        filename_map = {}
+        for file_path in all_files:
+            base_name = os.path.splitext(os.path.basename(file_path))[0]
+            filename_map[base_name] = file_path
+        log_message(f"✅ Found {len(filename_map)} files in reference repo", "INFO")
+        return filename_map
+    except Exception as e:
+        log_message(f"❌ Failed to fetch reference files: {str(e)}", "ERROR")
+        return {}
+def find_matching_filename(transcribed_filename: str, reference_map: Dict[str, str]) -> Optional[str]:
+    """Find matching filename in reference map from Fred808/BG3."""
+    base_name = os.path.splitext(transcribed_filename)[0]
+    # Exact match first
+    if base_name in reference_map:
+        full_path = reference_map[base_name]
+        print(f"\n✅ MATCH FOUND:")
+        print(f"   Audio: {transcribed_filename}")
+        print(f"   File:  {full_path}")
+        log_message(f"✅ Found exact match: {transcribed_filename} -> {full_path}", "INFO")
+        return full_path
+    # Partial/fuzzy match (check if reference contains transcribed as substring)
+    matches = []
+    for ref_base, ref_full_path in reference_map.items():
+        if base_name.lower() in ref_base.lower() or ref_base.lower() in base_name.lower():
+            matches.append((ref_base, ref_full_path))
+    # Return first partial match if found
+    if matches:
+        ref_base, ref_full_path = matches[0]
+        print(f"\n✅ PARTIAL MATCH FOUND:")
+        print(f"   Audio: {transcribed_filename}")
+        print(f"   File:  {ref_full_path}")
+        log_message(f"✅ Found partial match: {transcribed_filename} -> {ref_full_path}", "INFO")
+        return ref_full_path
+    print(f"\n❌ NO MATCH FOUND:")
+    print(f"   Audio: {transcribed_filename}")
+    log_message(f"⚠️ No matching filename found for: {transcribed_filename}", "WARNING")
+    return None
+def transcribe_audio(wav_path: str) -> Optional[Dict[str, Any]]:
+    """Transcribe audio file using Whisper."""
+    log_message(f"🎤 Transcribing audio file: {wav_path}", "INFO")
+    try:
+        import whisper
+        # Load model
+        log_message(f"Loading Whisper {WHISPER_MODEL} model...", "INFO")
+        model = whisper.load_model(WHISPER_MODEL)
+        # Transcribe
+        result = model.transcribe(wav_path)
+        log_message(f"✅ Successfully transcribed: {wav_path}", "INFO")
+        return result
+    except ImportError:
+        log_message("❌ Whisper not installed. Install with: pip install openai-whisper", "ERROR")
+        return None
+    except Exception as e:
+        log_message(f"❌ Failed to transcribe {wav_path}: {str(e)}", "ERROR")
+        return None
+def process_audio_file(wav_path: str, reference_map: Dict[str, str], matched_filename: str) -> bool:
+    """
+    Main processing logic for a single audio file:
+    1. Transcribe using Whisper
+    2. Save transcription as JSON
+    3. Upload to HF dataset
+    4. Clean up local files
+    """
+    wav_filename = os.path.basename(wav_path)
+    # 1. Transcribe audio
+    transcription = transcribe_audio(wav_path)
+    if transcription is None:
+        log_failed_file(wav_filename, "Transcription failed")
+        return False
+    # 2. Save transcription as JSON
+    json_filename = os.path.splitext(matched_filename)[0] + "_transcription.json"
+    json_output_path = os.path.join(TRANSCRIPTIONS_FOLDER, json_filename)
+    try:
+        os.makedirs(os.path.dirname(json_output_path), exist_ok=True)
+        with open(json_output_path, "w", encoding="utf-8") as f:
+            json.dump(transcription, f, indent=2, ensure_ascii=False)
+        log_message(f"✅ Saved transcription: {json_output_path}", "INFO")
+    except Exception as e:
+        log_message(f"❌ Failed to save transcription JSON: {str(e)}", "ERROR")
+        log_failed_file(wav_filename, f"Failed to save JSON: {str(e)}")
+        return False
+    # 3. Upload to HF dataset
+    try:
+        path_in_repo = f"transcriptions/{json_filename}"
+        commit_message = f"Add transcription for: {matched_filename}"
+        hf_api.upload_file(
+            path_or_fileobj=json_output_path,
+            path_in_repo=path_in_repo,
+            repo_id=TARGET_REPO_ID,
+            repo_type="dataset",
+            commit_message=commit_message
+        )
+        log_message(f"✅ Successfully uploaded transcription: {json_filename}", "INFO")
+        processing_status["transcribed_files"] += 1
+    except Exception as e:
+        log_message(f"❌ Failed to upload transcription to HF: {str(e)}", "ERROR")
+        log_failed_file(wav_filename, f"Failed to upload: {str(e)}")
+        return False
+    # 4. Clean up local files
+    try:
+        os.remove(json_output_path)
+        log_message(f"🗑️ Cleaned up local transcription file: {json_output_path}", "INFO")
+    except:
+        pass
+    return True
+def get_next_file_to_process(repo_id: str, state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """
+    Finds the next audio file to process from the source repo in reverse order (oldest to newest).
+    Returns: { 'filename': str, 'url': str, 'index': int } or None
+    """
+    log_message(f"🔍 Searching for next audio file to process in {repo_id}", "INFO")
+    try:
+        files_list = hf_api.list_repo_files(repo_id=repo_id, repo_type="dataset")
+        # Filter for audio files and sort in reverse order (descending)
+        audio_files = sorted([f for f in files_list if f.endswith(('.wav', '.mp3'))], reverse=True)
+        if not audio_files:
+            log_message("ℹ️ No audio files found in the source repository.", "INFO")
+            return None
+        processing_status["total_files"] = len(audio_files)
+        start_index = state.get("next_download_index", 0)
+        for index in range(start_index, len(audio_files)):
+            filename = audio_files[index]
+            file_state = state["file_states"].get(filename)
+            if file_state is None or file_state == "failed":
+                url = hf_hub_url(repo_id=repo_id, filename=filename, repo_type="dataset", subfolder=None)
+                log_message(f"✅ Found next audio file: {filename} at index {index}", "INFO")
+                return {
+                    'filename': filename,
+                    'url': url,
+                    'index': index
+                }
+            elif file_state == "processing":
+                log_message(f"⚠️ File {filename} is currently marked as 'processing'. Skipping for now.", "WARNING")
+            elif file_state == "processed":
+                log_message(f"ℹ️ File {filename} already processed. Skipping.", "INFO")
+        log_message("ℹ️ All files up to the current index have been processed or skipped.", "INFO")
+        if start_index >= len(audio_files):
+            log_message("ℹ️ Reached end of file list. Resetting index to 0 for next loop.", "INFO")
+            state["next_download_index"] = 0
+            upload_hf_state(TARGET_REPO_ID, HF_STATE_FILE, state)
+        return None
+    except Exception as e:
+        log_message(f"❌ Failed to list files from Hugging Face: {str(e)}", "ERROR")
+        return None
+def main_processing_loop():
+    """The main loop that orchestrates the download, transcription, and upload cycle."""
+    if processing_status["is_running"]:
+        log_message("⚠️ Processing loop is already running.", "WARNING")
+        return
+    processing_status["is_running"] = True
+    try:
+        log_message("🚀 Starting audio transcription processing loop...", "INFO")
+        # Fetch reference files from BG_Vid repo once at the start
+        reference_map = fetch_reference_files(REFERENCE_REPO_ID)
+        if not reference_map:
+            log_message("❌ No reference files found. Cannot proceed.", "ERROR")
+            return
+        while processing_status["is_running"]:
+            current_state = download_hf_state(TARGET_REPO_ID, HF_STATE_FILE)
+            next_file_info = get_next_file_to_process(SOURCE_REPO_ID, current_state)
+            if next_file_info is None:
+                log_message("💤 No new audio files to process. Sleeping for a while...", "INFO")
+                time.sleep(PROCESSING_DELAY * 5)
+                continue
+            target_file = next_file_info['filename']
+            audio_url = next_file_info['url']
+            target_index = next_file_info['index']
+            processing_status["current_file"] = target_file
+            success = False
+            matched_filename = None
+            try:
+                if not lock_file_for_processing(target_file, current_state):
+                    log_message(f"❌ Failed to lock file {target_file}. Skipping.", "ERROR")
+                    time.sleep(PROCESSING_DELAY)
+                    continue
+                local_wav_path = os.path.join(DOWNLOAD_FOLDER, os.path.basename(target_file))
+                log_message(f"⬇️ Downloading audio file: {target_file}", "INFO")
+                if download_with_retry(audio_url, local_wav_path):
+                    # Extract base filename for matching
+                    base_filename = os.path.basename(target_file)
+                    matched_filename = find_matching_filename(base_filename, reference_map)
+                    if matched_filename:
+                        if process_audio_file(local_wav_path, reference_map, matched_filename):
+                            success = True
+                            log_message(f"✅ Finished processing: {target_file}", "INFO")
+                        else:
+                            log_message(f"❌ Processing failed for: {target_file}", "ERROR")
+                    else:
+                        log_message(f"❌ No matching filename found for: {base_filename}", "ERROR")
+                        log_failed_file(target_file, "No matching reference filename")
+                else:
+                    log_message(f"❌ Download failed for: {target_file}", "ERROR")
+            except Exception as e:
+                log_message(f"🔥 An unhandled error occurred while processing {target_file}: {str(e)}", "ERROR")
+                log_failed_file(target_file, str(e))
+            finally:
+                next_index_to_save = target_index + 1
+                current_state = download_hf_state(TARGET_REPO_ID, HF_STATE_FILE)
+                if success:
+                    unlock_file_as_processed(target_file, current_state, next_index_to_save)
+                    processing_status["processed_files"] += 1
+                else:
+                    log_message(f"⚠️ Processing failed for {target_file}. Marking as 'failed' and advancing index.", "WARNING")
+                    current_state["file_states"][target_file] = "failed"
+                    current_state["next_download_index"] = next_index_to_save
+                    upload_hf_state(TARGET_REPO_ID, HF_STATE_FILE, current_state)
+                    processing_status["failed_files"] += 1
+                if os.path.exists(local_wav_path):
+                    os.remove(local_wav_path)
+                    log_message(f"🗑️ Cleaned up local file: {local_wav_path}", "INFO")
+            time.sleep(PROCESSING_DELAY)
+        log_message("🎉 Processing complete!", "INFO")
+        log_message(f"📊 Final stats: {processing_status['transcribed_files']} audio files transcribed, {processing_status['processed_files']} files processed", "INFO")
+    except KeyboardInterrupt:
+        log_message("⏹️ Processing interrupted by user", "WARNING")
+    except Exception as e:
+        log_message(f"❌ Fatal error: {str(e)}", "ERROR")
+    finally:
+        processing_status["is_running"] = False
+        cleanup_temp_files()
+if __name__ == "__main__":
+    main_processing_loop()

download_api.py DELETED Viewed

@@ -1,407 +0,0 @@
-import os
-import json
-import time
-import threading
-import asyncio
-from fastapi import FastAPI, HTTPException, BackgroundTasks
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse, FileResponse
-from fastapi.staticfiles import StaticFiles
-import uvicorn
-from typing import Dict
-from pathlib import Path
-import subprocess
-from datetime import datetime
-import torch
-# Import core functionality
-from vision_analyzer import (
-    main_processing_loop,
-    processing_status,
-    log_message,
-    FRAMES_OUTPUT_FOLDER
-)
-# FastAPI App Definition
-app = FastAPI(title="Video Analysis API",
-             description="API to access video frame analysis results and extracted images",
-             version="1.0.0")
-# Add CORS middleware to allow cross-origin requests
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],  # Allows all origins
-    allow_credentials=True,
-    allow_methods=["*"],  # Allows all methods
-    allow_headers=["*"],
-)
-# Global variables for processing and frame tracking
-processing_thread = None
-frame_locks = {}  # Dict to track frame locks: {course: {frame: {"locked_by": id, "locked_at": timestamp}}}
-processed_frames = {}  # Dict to track processed frames: {course: {frame: {"processed_by": id, "processed_at": timestamp}}}
-LOCK_TIMEOUT = 300  # 5 minutes timeout for locks
-TRACKING_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "frame_tracking.json")
-def save_tracking_state():
-    """Save frame tracking state to disk"""
-    state = {
-        "frame_locks": frame_locks,
-        "processed_frames": processed_frames
-    }
-    try:
-        with open(TRACKING_FILE, "w") as f:
-            json.dump(state, f, indent=2)
-    except Exception as e:
-        log_message(f"Error saving tracking state: {e}")
-def load_tracking_state():
-    """Load frame tracking state from disk"""
-    global frame_locks, processed_frames
-    try:
-        with open(TRACKING_FILE, "r") as f:
-            state = json.load(f)
-            frame_locks = state.get("frame_locks", {})
-            processed_frames = state.get("processed_frames", {})
-    except FileNotFoundError:
-        log_message("No previous tracking state found")
-    except Exception as e:
-        log_message(f"Error loading tracking state: {e}")
-def check_frame_lock(course: str, frame: str) -> bool:
-    """Check if frame is locked and lock hasn't expired"""
-    if course in frame_locks and frame in frame_locks[course]:
-        lock = frame_locks[course][frame]
-        if time.time() - lock["locked_at"] < LOCK_TIMEOUT:
-            return True
-        # Lock expired, remove it
-        del frame_locks[course][frame]
-        save_tracking_state()
-    return False
-def lock_frame(course: str, frame: str, requester_id: str) -> bool:
-    """Attempt to lock a frame for processing"""
-    if check_frame_lock(course, frame):
-        return False
-    if course not in frame_locks:
-        frame_locks[course] = {}
-    frame_locks[course][frame] = {
-        "locked_by": requester_id,
-        "locked_at": time.time()
-    }
-    save_tracking_state()
-    return True
-def mark_frame_processed(course: str, frame: str, requester_id: str):
-    """Mark a frame as successfully processed"""
-    if course not in processed_frames:
-        processed_frames[course] = {}
-    processed_frames[course][frame] = {
-        "processed_by": requester_id,
-        "processed_at": time.time()
-    }
-    # Remove the lock if it exists
-    if course in frame_locks and frame in frame_locks[course]:
-        del frame_locks[course][frame]
-    save_tracking_state()
-def log_message(message):
-    """Add a log message with timestamp"""
-    timestamp = datetime.now().strftime("%H:%M:%S")
-    log_entry = f"[{timestamp}] {message}"
-    processing_status["logs"].append(log_entry)
-    # Keep only the last 100 logs
-    if len(processing_status["logs"]) > 100:
-        processing_status["logs"] = processing_status["logs"][-100:]
-    print(log_entry)
-@app.on_event("startup")
-async def startup_event():
-    """Initialize frame tracking and start processing loop"""
-    # Load frame tracking state
-    load_tracking_state()
-    log_message("✓ Loaded frame tracking state")
-    # Start processing thread
-    global processing_thread
-    if not (processing_thread and processing_thread.is_alive()):
-        log_message("🚀 Starting RAR extraction, frame extraction, and vision analysis pipeline in background...")
-        processing_thread = threading.Thread(target=main_processing_loop)
-        processing_thread.daemon = True
-        processing_thread.start()
-@app.get("/")
-async def root():
-    """Root endpoint that returns basic info"""
-    return {
-        "message": "Video Analysis API",
-        "status": "running",
-        "endpoints": {
-            "/status": "Get processing status",
-            "/courses": "List all available course folders",
-            "/images/{course_folder}": "List images in a course folder",
-            "/images/{course_folder}/{frame_filename}": "Get specific frame image",
-            "/start-processing": "Start processing pipeline",
-            "/stop-processing": "Stop processing pipeline"
-        }
-    }
-@app.get("/status")
-async def get_status():
-    """Get current processing status"""
-    return {
-        "processing_status": processing_status,
-        "frames_folder": FRAMES_OUTPUT_FOLDER,
-        "frames_folder_exists": os.path.exists(FRAMES_OUTPUT_FOLDER)
-    }
-# ===== NEW IMAGE SERVING ENDPOINTS =====
-@app.get("/middleware/next/course")
-async def get_next_course(requester_id: str):
-    """Get next available course for processing"""
-    if not os.path.exists(FRAMES_OUTPUT_FOLDER):
-        raise HTTPException(status_code=404, detail="No courses available")
-    # Load latest state
-    load_tracking_state()
-    # Find a course with unprocessed frames
-    for folder in os.listdir(FRAMES_OUTPUT_FOLDER):
-        folder_path = os.path.join(FRAMES_OUTPUT_FOLDER, folder)
-        if not os.path.isdir(folder_path):
-            continue
-        # Check if course has any unprocessed frames
-        image_files = [f for f in os.listdir(folder_path)
-                      if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
-        for image in image_files:
-            if (folder not in processed_frames or
-                image not in processed_frames[folder]):
-                return {"course": folder}
-    raise HTTPException(status_code=404, detail="No courses with unprocessed frames")
-@app.get("/middleware/next/image/{course_folder}")
-async def get_next_image(course_folder: str, requester_id: str):
-    """Get next available image from a course"""
-    folder_path = os.path.join(FRAMES_OUTPUT_FOLDER, course_folder)
-    if not os.path.exists(folder_path):
-        raise HTTPException(status_code=404, detail=f"Course not found: {course_folder}")
-    # Load latest state
-    load_tracking_state()
-    # Find first unprocessed and unlocked frame
-    for file in sorted(os.listdir(folder_path)):
-        if not file.lower().endswith(('.png', '.jpg', '.jpeg')):
-            continue
-        # Skip if processed
-        if (course_folder in processed_frames and
-            file in processed_frames[course_folder]):
-            continue
-        # Skip if locked by another requester
-        if check_frame_lock(course_folder, file):
-            continue
-        # Try to lock the frame
-        if lock_frame(course_folder, file, requester_id):
-            file_path = os.path.join(folder_path, file)
-            file_stats = os.stat(file_path)
-            return {
-                "file_id": f"frame:{course_folder}/{file}",
-                "frame": file,
-                "video": os.path.splitext(file)[0],
-                "size_bytes": file_stats.st_size,
-                "modified_time": time.ctime(file_stats.st_mtime),
-                "url": f"/images/{course_folder}/{file}"
-            }
-    raise HTTPException(status_code=404, detail="No available frames in course")
-@app.post("/middleware/release/frame/{course_folder}/{video}/{frame}")
-async def release_frame(course_folder: str, video: str, frame: str, requester_id: str):
-    """Release a frame lock"""
-    if course_folder in frame_locks and frame in frame_locks[course_folder]:
-        lock = frame_locks[course_folder][frame]
-        if lock["locked_by"] == requester_id:
-            del frame_locks[course_folder][frame]
-            save_tracking_state()
-            return {"status": "released"}
-    return {"status": "not_found"}
-@app.post("/middleware/release/course/{course_folder}")
-async def release_course(course_folder: str, requester_id: str):
-    """Release all frame locks for a course"""
-    if course_folder in frame_locks:
-        # Only release frames locked by this requester
-        frames_to_release = [
-            frame for frame, lock in frame_locks[course_folder].items()
-            if lock["locked_by"] == requester_id
-        ]
-        for frame in frames_to_release:
-            del frame_locks[course_folder][frame]
-        save_tracking_state()
-    return {"status": "released"}
-@app.get("/images/{course_folder}/{frame_filename}")
-async def get_frame_image(course_folder: str, frame_filename: str, requester_id: str = None):
-    """
-    Serve extracted frame images from course folders with locking
-    Args:
-        course_folder: The course folder name (e.g., "course1_video1_mp4_frames")
-        frame_filename: The frame file name (e.g., "0001.png")
-        requester_id: Optional requester ID for frame locking
-    """
-    # Load latest state
-    load_tracking_state()
-    # Construct the full path to the image
-    image_path = os.path.join(FRAMES_OUTPUT_FOLDER, course_folder, frame_filename)
-    # Check if file exists
-    if not os.path.exists(image_path):
-        raise HTTPException(status_code=404, detail=f"Image not found: {course_folder}/{frame_filename}")
-    # Verify it's an image file
-    if not frame_filename.lower().endswith(('.png', '.jpg', '.jpeg')):
-        raise HTTPException(status_code=400, detail="File must be an image (PNG, JPG, JPEG)")
-    # If requester_id provided, verify frame lock
-    if requester_id:
-        if check_frame_lock(course_folder, frame_filename):
-            lock = frame_locks[course_folder][frame_filename]
-            if lock["locked_by"] != requester_id:
-                raise HTTPException(status_code=423, detail="Frame is locked by another requester")
-    # Return the image file
-    return FileResponse(image_path)
-@app.get("/images/{course_folder}")
-async def list_course_images(course_folder: str):
-    """
-    List all available images in a specific course folder
-    Args:
-        course_folder: The course folder name
-    """
-    folder_path = os.path.join(FRAMES_OUTPUT_FOLDER, course_folder)
-    if not os.path.exists(folder_path):
-        raise HTTPException(status_code=404, detail=f"Course folder not found: {course_folder}")
-    # Get all image files
-    image_files = []
-    for file in os.listdir(folder_path):
-        if file.lower().endswith(('.png', '.jpg', '.jpeg')):
-            file_path = os.path.join(folder_path, file)
-            file_stats = os.stat(file_path)
-            image_files.append({
-                "filename": file,
-                "size_bytes": file_stats.st_size,
-                "modified_time": time.ctime(file_stats.st_mtime),
-                "url": f"/images/{course_folder}/{file}"
-            })
-    return {
-        "course_folder": course_folder,
-        "total_images": len(image_files),
-        "images": image_files
-    }
-@app.get("/courses")
-async def list_all_courses():
-    """
-    List all available course folders with their image counts
-    """
-    if not os.path.exists(FRAMES_OUTPUT_FOLDER):
-        return {"courses": [], "message": "Frames output folder does not exist yet"}
-    courses = []
-    for folder in os.listdir(FRAMES_OUTPUT_FOLDER):
-        folder_path = os.path.join(FRAMES_OUTPUT_FOLDER, folder)
-        if os.path.isdir(folder_path):
-            # Count image files
-            image_count = len([f for f in os.listdir(folder_path)
-                             if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
-            courses.append({
-                "course_folder": folder,
-                "image_count": image_count,
-                "images_url": f"/images/{folder}",
-                "sample_image_url": f"/images/{folder}/0001.png" if image_count > 0 else None
-            })
-    return {
-        "total_courses": len(courses),
-        "courses": courses
-    }
-# Signal handlers to prevent accidental shutdown
-def handle_shutdown(signum, frame):
-    """Prevent shutdown on SIGTERM/SIGINT"""
-    print(f"\n⚠️ Received signal {signum}. Server will continue running.")
-    print("Use Ctrl+Break or kill -9 to force stop.")
-# Setup signal handlers for graceful shutdown prevention
-import signal
-signal.signal(signal.SIGINT, handle_shutdown)
-signal.signal(signal.SIGTERM, handle_shutdown)
-# Server lifecycle events
-@app.on_event("shutdown")
-async def shutdown_event():
-    """Save state on shutdown attempt"""
-    save_tracking_state()
-    print("💾 Saved tracking state")
-    print("⚠️ Server shutdown prevented - use Ctrl+Break or kill -9 to force stop")
-    # Prevent shutdown by not returning
-    while True:
-        await asyncio.sleep(1)
-if __name__ == "__main__":
-    # Start the FastAPI server
-    print("🚀 Starting Video Analysis FastAPI Server (Persistent Mode)...")
-    print("API Documentation will be available at: http://localhost:8000/docs")
-    print("API Root endpoint: http://localhost:8000/")
-    print("⚠️ Server will continue running even after processing completes")
-    print("Use Ctrl+Break or kill -9 to force stop")
-    # Ensure the analysis output folder exists
-    os.makedirs(FRAMES_OUTPUT_FOLDER, exist_ok=True)
-    # Start processing in thread instead of blocking
-    processing_thread = threading.Thread(target=main_processing_loop)
-    processing_thread.daemon = False  # Make non-daemon so it doesn't exit
-    processing_thread.start()
-    # Configure uvicorn for persistent running
-    config = uvicorn.Config(
-        app=app,
-        host="0.0.0.0",
-        port=8000,
-        log_level="info",
-        reload=False,
-        workers=1,
-        loop="asyncio",
-        timeout_keep_alive=600,  # Keep connections alive longer
-        access_log=True
-    )
-    # Run server with persistent config
-    server = uvicorn.Server(config)
-    server.run()