Spaces:

Fred808
/

FLOWCAP2

Paused

App Files Files Community

Fred808 commited on Oct 30, 2025

Commit

a303362

verified ·

1 Parent(s): ef25d3a

Update app.py

Browse files

Files changed (1) hide show

app.py +434 -803

app.py CHANGED Viewed

@@ -4,79 +4,76 @@ import time
 import asyncio
 import aiohttp
 import zipfile
-from typing import Dict, List, Set, Optional
 from urllib.parse import quote
 from datetime import datetime
 from pathlib import Path
-import io
-from fastapi import FastAPI, BackgroundTasks, HTTPException, status
-from fastapi.responses import HTMLResponse, FileResponse
-from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel, Field
-from huggingface_hub import HfApi, hf_hub_download
 import uvicorn
 # --- Configuration ---
 FLOW_ID = os.getenv("FLOW_ID", "flow_default")
 FLOW_PORT = int(os.getenv("FLOW_PORT", 8001))
 MANAGER_URL = os.getenv("MANAGER_URL", "https://fred808-fcord.hf.space")
 MANAGER_COMPLETE_TASK_URL = f"{MANAGER_URL}/task/complete"
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 HF_DATASET_ID = os.getenv("HF_DATASET_ID", "Fred808/BG3")
 HF_OUTPUT_DATASET_ID = os.getenv("HF_OUTPUT_DATASET_ID", "fred808/helium")
 CAPTION_SERVERS = [
-    "https://favoredone-favoredone-tv88mp.hf.space/analyze",
-"https://favoredone-favoredone-7p1dcf.hf.space/analyze",
-"https://favoredone-favoredone-k7b4mf.hf.space/analyze",
-"https://favoredone-favoredone-mzlxc7.hf.space/analyze",
-"https://favoredone-favoredone-aomfwa.hf.space/analyze",
-"https://favoredone-favoredone-7g6v04.hf.space/analyze",
-"https://favoredone-favoredone-dk1skh.hf.space/analyze",
-"https://favoredone-favoredone-z4yo0y.hf.space/analyze",
-"https://favoredone-favoredone-f6czeq.hf.space/analyze",
-"https://favoredone-favoredone-5fo8ga.hf.space/analyze",
-"https://favoredone-favoredone-zde8x6.hf.space/analyze",
-"https://favoredone-favoredone-r0biih.hf.space/analyze",
-"https://favoredone-favoredone-ljdzkf.hf.space/analyze",
-"https://favoredone-favoredone-irrpe5.hf.space/analyze",
-"https://favoredone-favoredone-bh9rwz.hf.space/analyze",
-"https://favoredone-favoredone-u8c4dt.hf.space/analyze",
-"https://favoredone-favoredone-futwyd.hf.space/analyze",
-"https://favoredone-favoredone-hg2sot.hf.space/analyze",
-"https://favoredone-favoredone-pvweug.hf.space/analyze",
-"https://favoredone-favoredone-z6azk2.hf.space/analyze",
-"https://favoredone-favoredone-4zid9w.hf.space/analyze",
-"https://favoredone-favoredone-be7a1r.hf.space/analyze",
-"https://favoredone-favoredone-ayazxa.hf.space/analyze",
-"https://favoredone-favoredone-6ckj4m.hf.space/analyze",
-"https://favoredone-favoredone-whn0xu.hf.space/analyze",
-"https://favoredone-favoredone-t49exm.hf.space/analyze",
-"https://favoredone-favoredone-cgrh0a.hf.space/analyze",
-"https://favoredone-favoredone-r1kb5g.hf.space/analyze"
 ]
 MODEL_TYPE = "Florence-2-large"
 TEMP_DIR = Path(f"temp_images_{FLOW_ID}")
 TEMP_DIR.mkdir(exist_ok=True)
-# --- Global State for Statistics ---
-processing_stats = {
-    "current_course": None,
-    "total_images": 0,
-    "captions_completed": 0,
-    "captions_failed": 0,
-    "files_processed": 0,
-    "start_time": None,
-    "status": "idle",
-    "error_message": None,
-    "server_stats": {}
-}
 # --- Models ---
 class ProcessCourseRequest(BaseModel):
     course_name: Optional[str] = None
@@ -93,11 +90,126 @@ class CaptionServer:
     def fps(self):
         return self.total_processed / self.total_time if self.total_time > 0 else 0
-# Global state for caption servers
 servers = [CaptionServer(url) for url in CAPTION_SERVERS]
 server_index = 0
-# --- Core Processing Functions ---
 async def get_available_server(timeout: float = 300.0) -> CaptionServer:
     """Round-robin selection of an available caption server."""
@@ -142,13 +254,14 @@ async def send_image_for_captioning(image_path: Path, course_name: str, progress
                         caption = result.get("caption")
                         if caption:
                             progress_tracker['completed'] += 1
                             if progress_tracker['completed'] % 50 == 0:
                                 print(f"[{FLOW_ID}] PROGRESS: {progress_tracker['completed']}/{progress_tracker['total']} captions completed.")
-                            if progress_tracker['completed'] % 50 != 0:
-                                print(f"[{FLOW_ID}] Success: {image_path.name} captioned by {server.url}")
                             return {
                                 "course": course_name,
                                 "image_path": image_path.name,
@@ -177,62 +290,45 @@ async def send_image_for_captioning(image_path: Path, course_name: str, progress
                 server.total_time += (end_time - start_time)
     print(f"[{FLOW_ID}] FAILED after {MAX_RETRIES} attempts for {image_path.name}.")
-    processing_stats['captions_failed'] += 1
     return None
-async def download_and_extract_zip(course_name: str, processed_files: Set[str]) -> Optional[tuple[Path, str, str]]:
-    """Downloads the zip file for the course and extracts its contents."""
-    print(f"[{FLOW_ID}] Looking for files starting with '{course_name}' in frames/ directory...")
     try:
-        api = HfApi(token=HF_TOKEN)
-        repo_files = api.list_repo_files(
-            repo_id=HF_DATASET_ID,
-            repo_type="dataset"
-        )
-        matching_files = [
-            f for f in repo_files
-            if f.startswith(f"frames/{course_name}") and f.endswith('.zip')
-        ]
-        if not matching_files:
-            print(f"[{FLOW_ID}] No zip files found starting with '{course_name}' in frames/ directory.")
-            return None, None
-        unprocessed_files = [f for f in matching_files if f not in processed_files]
-        if not unprocessed_files:
-            print(f"[{FLOW_ID}] No new zip files found for '{course_name}'.")
-            return None, None, None
-        repo_file_full_path = unprocessed_files[0]
-        zip_full_name = Path(repo_file_full_path).name
-        print(f"[{FLOW_ID}] Found new matching file: {repo_file_full_path}. Full name: {zip_full_name}")
         zip_path = hf_hub_download(
             repo_id=HF_DATASET_ID,
-            filename=repo_file_full_path,
             repo_type="dataset",
             token=HF_TOKEN,
         )
         print(f"[{FLOW_ID}] Downloaded to {zip_path}. Extracting...")
-        extract_dir = TEMP_DIR / course_name
-        extract_dir.mkdir(exist_ok=True)
         with zipfile.ZipFile(zip_path, 'r') as zip_ref:
             zip_ref.extractall(extract_dir)
         print(f"[{FLOW_ID}] Extraction complete to {extract_dir}.")
-        return extract_dir, zip_full_name, repo_file_full_path
     except Exception as e:
-        print(f"[{FLOW_ID}] Error downloading or extracting zip for {course_name}: {e}")
-        return None, None, None
 async def upload_captions_to_hf(zip_full_name: str, captions: List[Dict]) -> bool:
     """Uploads the final captions JSON file to the output dataset."""
@@ -243,7 +339,7 @@ async def upload_captions_to_hf(zip_full_name: str, captions: List[Dict]) -> boo
         json_content = json.dumps(captions, indent=2, ensure_ascii=False).encode('utf-8')
-        api = HfApi(token=HF_TOKEN)
         api.upload_file(
             path_or_fileobj=io.BytesIO(json_content),
             path_in_repo=caption_filename,
@@ -259,782 +355,317 @@ async def upload_captions_to_hf(zip_full_name: str, captions: List[Dict]) -> boo
         print(f"[{FLOW_ID}] Error uploading captions for {zip_full_name}: {e}")
         return False
-async def process_course_task(course_name: str):
-    """Main task to process a single course, looping until all files are processed."""
-    global processing_stats
-    processing_stats['current_course'] = course_name
-    processing_stats['status'] = 'processing'
-    processing_stats['start_time'] = datetime.now().isoformat()
-    processing_stats['total_images'] = 0
-    processing_stats['captions_completed'] = 0
-    processing_stats['captions_failed'] = 0
-    processing_stats['files_processed'] = 0
-    processing_stats['error_message'] = None
-    print(f"[{FLOW_ID}] Starting continuous processing for course: {course_name}")
-    processed_files = set()
-    all_processed_files_log = []
-    global_success = True
-    while True:
         extract_dir = None
         zip_full_name = None
-        repo_file_full_path = None
         try:
-            download_result = await download_and_extract_zip(course_name, processed_files)
-            if download_result is None or download_result[0] is None:
-                if download_result is not None and download_result[0] is None and download_result[1] is None:
-                    print(f"[{FLOW_ID}] No new files found for {course_name}. Exiting loop.")
-                    break
-                else:
-                    raise Exception("Failed to download or extract zip file.")
-            extract_dir, zip_full_name, repo_file_full_path = download_result
-            processed_files.add(repo_file_full_path)
-            all_processed_files_log.append(repo_file_full_path)
-            processing_stats['files_processed'] = len(all_processed_files_log)
             image_paths = [p for p in extract_dir.glob("**/*") if p.is_file() and p.suffix.lower() in ['.jpg', '.jpeg', '.png']]
-            processing_stats['total_images'] += len(image_paths)
             print(f"[{FLOW_ID}] Found {len(image_paths)} images to process in {zip_full_name}.")
-            current_file_success = False
             if not image_paths:
                 print(f"[{FLOW_ID}] No images found in {zip_full_name}. Marking as complete.")
-                current_file_success = True
             else:
                 progress_tracker = {
                     'total': len(image_paths),
                     'completed': 0
                 }
-                print(f"[{FLOW_ID}] Starting captioning for {progress_tracker['total']} images in {zip_full_name}...")
                 semaphore = asyncio.Semaphore(len(servers))
                 async def limited_send_image_for_captioning(image_path, course_name, progress_tracker):
                     async with semaphore:
                         return await send_image_for_captioning(image_path, course_name, progress_tracker)
-                caption_tasks = []
-                for image_path in image_paths:
-                    caption_tasks.append(limited_send_image_for_captioning(image_path, course_name, progress_tracker))
                 results = await asyncio.gather(*caption_tasks)
                 all_captions = [r for r in results if r is not None]
-                processing_stats['captions_completed'] = sum(1 for r in results if r is not None)
                 if len(all_captions) == len(image_paths):
                     print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Successfully completed all {len(all_captions)} captions.")
-                    current_file_success = True
                 else:
                     print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Completed with partial result: {len(all_captions)}/{len(image_paths)} captions.")
-                    current_file_success = False
                 if all_captions and zip_full_name:
-                    print(f"[{FLOW_ID}] Uploading {len(all_captions)} captions for {zip_full_name}...")
                     if await upload_captions_to_hf(zip_full_name, all_captions):
                         print(f"[{FLOW_ID}] Successfully uploaded captions for {zip_full_name}.")
-                        if not current_file_success:
-                            global_success = False
                     else:
-                        print(f"[{FLOW_ID}] Failed to upload captions for {zip_full_name}.")
-                        current_file_success = False
-                        global_success = False
                 else:
-                    print(f"[{FLOW_ID}] No captions generated or zip_full_name is missing. Skipping upload for {zip_full_name}.")
-                    current_file_success = False
-                    global_success = False
         except Exception as e:
             error_message = str(e)
-            print(f"[{FLOW_ID}] Critical error in process_course_task for {course_name}: {error_message}")
-            processing_stats['error_message'] = error_message
-            global_success = False
         finally:
             if extract_dir and extract_dir.exists():
                 print(f"[{FLOW_ID}] Cleaned up temporary directory {extract_dir}.")
-                import shutil
                 shutil.rmtree(extract_dir, ignore_errors=True)
-            if download_result is None and extract_dir is None:
-                break
-    print(f"[{FLOW_ID}] All processing loops complete for {course_name}.")
-    print(f"[{FLOW_ID}] Total files processed: {len(all_processed_files_log)}")
-    print(f"[{FLOW_ID}] List of processed files: {all_processed_files_log}")
-    processing_stats['status'] = 'completed' if global_success else 'failed'
-    return global_success
-async def report_completion(course_name: str, success: bool, error_message: Optional[str] = None):
-    """Reports the task result back to the Manager Server."""
-    print(f"[{FLOW_ID}] Reporting completion for {course_name} (Success: {success})...")
-    payload = {
-        "flow_id": FLOW_ID,
-        "course_name": course_name,
-        "success": success,
-        "error_message": error_message
-    }
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.post(MANAGER_COMPLETE_TASK_URL, json=payload) as resp:
-                if resp.status != 200:
-                    print(f"[{FLOW_ID}] ERROR: Manager reported non-200 status: {resp.status} - {await resp.text()}")
-                else:
-                    print(f"[{FLOW_ID}] Successfully reported completion to Manager.")
-    except aiohttp.ClientError as e:
-        print(f"[{FLOW_ID}] CRITICAL ERROR: Could not connect to Manager at {MANAGER_COMPLETE_TASK_URL}. Task completion not reported. Error: {e}")
-    except Exception as e:
-        print(f"[{FLOW_ID}] Unexpected error during reporting: {e}")
 # --- FastAPI App and Endpoints ---
 app = FastAPI(
     title=f"Flow Server {FLOW_ID} API",
     description="Fetches, extracts, and captions images for a given course.",
-    version="1.0.0"
 )
-HTML_UI = """
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Course Processing Dashboard</title>
-    <style>
-        * {
-            margin: 0;
-            padding: 0;
-            box-sizing: border-box;
-        }
-        body {
-            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-            min-height: 100vh;
-            padding: 20px;
-        }
-        .container {
-            max-width: 1200px;
-            margin: 0 auto;
-        }
-        .header {
-            text-align: center;
-            color: white;
-            margin-bottom: 40px;
-        }
-        .header h1 {
-            font-size: 2.5em;
-            margin-bottom: 10px;
-            text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
-        }
-        .header p {
-            font-size: 1.1em;
-            opacity: 0.9;
-        }
-        .main-card {
-            background: white;
-            border-radius: 15px;
-            padding: 40px;
-            box-shadow: 0 20px 60px rgba(0,0,0,0.3);
-            margin-bottom: 30px;
-        }
-        .input-section {
-            display: flex;
-            gap: 15px;
-            margin-bottom: 30px;
-            flex-wrap: wrap;
-        }
-        .input-group {
-            flex: 1;
-            min-width: 250px;
-        }
-        .input-group label {
-            display: block;
-            margin-bottom: 8px;
-            font-weight: 600;
-            color: #333;
-            font-size: 0.95em;
-        }
-        .input-group input {
-            width: 100%;
-            padding: 12px 15px;
-            border: 2px solid #e0e0e0;
-            border-radius: 8px;
-            font-size: 1em;
-            transition: border-color 0.3s;
-        }
-        .input-group input:focus {
-            outline: none;
-            border-color: #667eea;
-            box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
-        }
-        .button-group {
-            display: flex;
-            gap: 10px;
-            flex-wrap: wrap;
-        }
-        button {
-            padding: 12px 30px;
-            border: none;
-            border-radius: 8px;
-            font-size: 1em;
-            font-weight: 600;
-            cursor: pointer;
-            transition: all 0.3s;
-        }
-        .btn-primary {
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-            color: white;
-            flex: 1;
-            min-width: 150px;
-        }
-        .btn-primary:hover {
-            transform: translateY(-2px);
-            box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3);
-        }
-        .btn-primary:active {
-            transform: translateY(0);
-        }
-        .btn-primary:disabled {
-            opacity: 0.6;
-            cursor: not-allowed;
-            transform: none;
-        }
-        .btn-secondary {
-            background: #f0f0f0;
-            color: #333;
-            flex: 1;
-            min-width: 150px;
-        }
-        .btn-secondary:hover {
-            background: #e0e0e0;
-        }
-        .stats-grid {
-            display: grid;
-            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
-            gap: 20px;
-            margin: 30px 0;
-        }
-        .stat-card {
-            background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
-            padding: 20px;
-            border-radius: 12px;
-            border-left: 5px solid #667eea;
-        }
-        .stat-card.success {
-            border-left-color: #4caf50;
-            background: linear-gradient(135deg, #e8f5e9 0%, #c8e6c9 100%);
-        }
-        .stat-card.warning {
-            border-left-color: #ff9800;
-            background: linear-gradient(135deg, #fff3e0 0%, #ffe0b2 100%);
-        }
-        .stat-card.error {
-            border-left-color: #f44336;
-            background: linear-gradient(135deg, #ffebee 0%, #ffcdd2 100%);
-        }
-        .stat-label {
-            font-size: 0.9em;
-            color: #666;
-            margin-bottom: 8px;
-            font-weight: 500;
-            text-transform: uppercase;
-            letter-spacing: 0.5px;
-        }
-        .stat-value {
-            font-size: 2em;
-            font-weight: 700;
-            color: #333;
-        }
-        .progress-section {
-            margin: 30px 0;
-        }
-        .progress-label {
-            display: flex;
-            justify-content: space-between;
-            margin-bottom: 10px;
-            font-weight: 600;
-            color: #333;
-        }
-        .progress-bar {
-            width: 100%;
-            height: 25px;
-            background: #e0e0e0;
-            border-radius: 12px;
-            overflow: hidden;
-            box-shadow: inset 0 2px 4px rgba(0,0,0,0.1);
-        }
-        .progress-fill {
-            height: 100%;
-            background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
-            width: 0%;
-            transition: width 0.3s ease;
-            display: flex;
-            align-items: center;
-            justify-content: center;
-            color: white;
-            font-size: 0.85em;
-            font-weight: 600;
-        }
-        .status-badge {
-            display: inline-block;
-            padding: 6px 12px;
-            border-radius: 20px;
-            font-size: 0.9em;
-            font-weight: 600;
-            margin-top: 10px;
-        }
-        .status-badge.idle {
-            background: #e0e0e0;
-            color: #666;
-        }
-        .status-badge.processing {
-            background: #fff3e0;
-            color: #ff9800;
-            animation: pulse 1.5s infinite;
-        }
-        .status-badge.completed {
-            background: #e8f5e9;
-            color: #4caf50;
-        }
-        .status-badge.failed {
-            background: #ffebee;
-            color: #f44336;
-        }
-        @keyframes pulse {
-            0%, 100% { opacity: 1; }
-            50% { opacity: 0.7; }
-        }
-        .error-message {
-            background: #ffebee;
-            color: #c62828;
-            padding: 15px;
-            border-radius: 8px;
-            margin-top: 15px;
-            border-left: 4px solid #f44336;
-            display: none;
-        }
-        .error-message.show {
-            display: block;
-        }
-        .server-stats {
-            margin-top: 30px;
-            padding-top: 30px;
-            border-top: 2px solid #e0e0e0;
-        }
-        .server-stats h3 {
-            color: #333;
-            margin-bottom: 15px;
-            font-size: 1.2em;
-        }
-        .server-list {
-            display: grid;
-            grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
-            gap: 15px;
-        }
-        .server-item {
-            background: #f5f5f5;
-            padding: 12px;
-            border-radius: 8px;
-            font-size: 0.9em;
-            border: 1px solid #e0e0e0;
-        }
-        .server-item.busy {
-            background: #fff3e0;
-            border-color: #ff9800;
-        }
-        .server-name {
-            font-weight: 600;
-            color: #333;
-            margin-bottom: 5px;
-            word-break: break-all;
-        }
-        .server-info {
-            color: #666;
-            font-size: 0.85em;
-        }
-        .loading-spinner {
-            display: inline-block;
-            width: 20px;
-            height: 20px;
-            border: 3px solid #f3f3f3;
-            border-top: 3px solid #667eea;
-            border-radius: 50%;
-            animation: spin 1s linear infinite;
-            margin-right: 10px;
-            vertical-align: middle;
-        }
-        @keyframes spin {
-            0% { transform: rotate(0deg); }
-            100% { transform: rotate(360deg); }
-        }
-        .timestamp {
-            color: #999;
-            font-size: 0.9em;
-            margin-top: 20px;
-            text-align: right;
-        }
-        @media (max-width: 768px) {
-            .header h1 {
-                font-size: 1.8em;
-            }
-            .main-card {
-                padding: 20px;
-            }
-            .input-section {
-                flex-direction: column;
-            }
-            .button-group {
-                flex-direction: column;
-            }
-            button {
-                width: 100%;
-            }
-        }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <div class="header">
-            <h1>📚 Course Processing Dashboard</h1>
-            <p>Submit course index names for image captioning and processing</p>
-        </div>
-        <div class="main-card">
-            <div class="input-section">
-                <div class="input-group">
-                    <label for="courseInput">Course Index Name</label>
-                    <input
-                        type="text"
-                        id="courseInput"
-                        placeholder="e.g., COURSE_001, BIO101, etc."
-                        autocomplete="off"
-                    >
-                </div>
-                <div class="button-group">
-                    <button class="btn-primary" onclick="submitCourse()">
-                        <span id="submitText">Start Processing</span>
-                    </button>
-                    <button class="btn-secondary" onclick="refreshStats()">Refresh Stats</button>
-                </div>
-            </div>
-            <div class="error-message" id="errorMessage"></div>
-            <div id="statsContainer" style="display: none;">
-                <div style="margin-bottom: 20px;">
-                    <h2 style="color: #333; margin-bottom: 10px;">Current Course: <span id="currentCourse" style="color: #667eea;"></span></h2>
-                    <span class="status-badge idle" id="statusBadge">Idle</span>
-                </div>
-                <div class="progress-section">
-                    <div class="progress-label">
-                        <span>Overall Progress</span>
-                        <span id="progressText">0%</span>
-                    </div>
-                    <div class="progress-bar">
-                        <div class="progress-fill" id="progressFill" style="width: 0%;">
-                            <span id="progressPercent">0%</span>
-                        </div>
-                    </div>
-                </div>
-                <div class="stats-grid">
-                    <div class="stat-card">
-                        <div class="stat-label">Total Images Found</div>
-                        <div class="stat-value" id="totalImages">0</div>
-                    </div>
-                    <div class="stat-card success">
-                        <div class="stat-label">Captions Completed</div>
-                        <div class="stat-value" id="captionsCompleted">0</div>
-                    </div>
-                    <div class="stat-card warning">
-                        <div class="stat-label">Captions Failed</div>
-                        <div class="stat-value" id="captionsFailed">0</div>
-                    </div>
-                    <div class="stat-card">
-                        <div class="stat-label">Files Processed</div>
-                        <div class="stat-value" id="filesProcessed">0</div>
-                    </div>
-                    <div class="stat-card">
-                        <div class="stat-label">Success Rate</div>
-                        <div class="stat-value" id="successRate">0%</div>
-                    </div>
-                    <div class="stat-card">
-                        <div class="stat-label">Processing Time</div>
-                        <div class="stat-value" id="processingTime">0s</div>
-                    </div>
-                </div>
-                <div class="server-stats">
-                    <h3>Caption Server Status</h3>
-                    <div class="server-list" id="serverList">
-                        <!-- Server items will be populated here -->
-                    </div>
-                </div>
-            </div>
-            <div class="timestamp" id="timestamp"></div>
-        </div>
-    </div>
-    <script>
-        let processingInterval = null;
-        async function submitCourse() {
-            const courseInput = document.getElementById('courseInput');
-            const courseName = courseInput.value.trim();
-            if (!courseName) {
-                showError('Please enter a course index name');
-                return;
-            }
-            const submitBtn = event.target;
-            submitBtn.disabled = true;
-            document.getElementById('submitText').innerHTML = '<span class="loading-spinner"></span>Submitting...';
-            try {
-                const response = await fetch('/process_course', {
-                    method: 'POST',
-                    headers: {
-                        'Content-Type': 'application/json',
-                    },
-                    body: JSON.stringify({ course_name: courseName })
-                });
-                if (!response.ok) {
-                    throw new Error('Failed to submit course');
-                }
-                const data = await response.json();
-                clearError();
-                document.getElementById('statsContainer').style.display = 'block';
-                courseInput.value = '';
-                // Start polling for stats
-                if (processingInterval) clearInterval(processingInterval);
-                processingInterval = setInterval(refreshStats, 2000);
-                refreshStats();
-            } catch (error) {
-                showError('Error submitting course: ' + error.message);
-            } finally {
-                submitBtn.disabled = false;
-                document.getElementById('submitText').textContent = 'Start Processing';
-            }
-        }
-        async function refreshStats() {
-            try {
-                const response = await fetch('/stats');
-                if (!response.ok) throw new Error('Failed to fetch stats');
-                const stats = await response.json();
-                updateStatsDisplay(stats);
-            } catch (error) {
-                console.error('Error fetching stats:', error);
-            }
-        }
-        function updateStatsDisplay(stats) {
-            document.getElementById('currentCourse').textContent = stats.current_course || 'None';
-            document.getElementById('totalImages').textContent = stats.total_images;
-            document.getElementById('captionsCompleted').textContent = stats.captions_completed;
-            document.getElementById('captionsFailed').textContent = stats.captions_failed;
-            document.getElementById('filesProcessed').textContent = stats.files_processed;
-            // Calculate success rate
-            const total = stats.captions_completed + stats.captions_failed;
-            const successRate = total > 0 ? Math.round((stats.captions_completed / total) * 100) : 0;
-            document.getElementById('successRate').textContent = successRate + '%';
-            // Update progress bar
-            const progress = stats.total_images > 0
-                ? Math.round((stats.captions_completed / stats.total_images) * 100)
-                : 0;
-            document.getElementById('progressFill').style.width = progress + '%';
-            document.getElementById('progressPercent').textContent = progress + '%';
-            document.getElementById('progressText').textContent = progress + '%';
-            // Update status badge
-            const statusBadge = document.getElementById('statusBadge');
-            statusBadge.className = 'status-badge ' + stats.status;
-            statusBadge.textContent = stats.status.charAt(0).toUpperCase() + stats.status.slice(1);
-            // Update processing time
-            if (stats.start_time) {
-                const startTime = new Date(stats.start_time);
-                const elapsed = Math.floor((Date.now() - startTime.getTime()) / 1000);
-                document.getElementById('processingTime').textContent = formatTime(elapsed);
-            }
-            // Update timestamp
-            document.getElementById('timestamp').textContent = 'Last updated: ' + new Date().toLocaleTimeString();
-            // Update server stats
-            updateServerStats(stats.server_stats);
-            // Show error if present
-            if (stats.error_message) {
-                showError('Error: ' + stats.error_message);
-            }
-        }
-        function updateServerStats(serverStats) {
-            const serverList = document.getElementById('serverList');
-            serverList.innerHTML = '';
-            for (const [url, stats] of Object.entries(serverStats)) {
-                const serverItem = document.createElement('div');
-                serverItem.className = 'server-item' + (stats.busy ? ' busy' : '');
-                serverItem.innerHTML = `
-                    <div class="server-name">${url.split('/').slice(-3, -1).join('-')}</div>
-                    <div class="server-info">Processed: ${stats.total_processed}</div>
-                    <div class="server-info">FPS: ${stats.fps.toFixed(2)}</div>
-                    <div class="server-info">Status: ${stats.busy ? '🔴 Busy' : '🟢 Available'}</div>
-                `;
-                serverList.appendChild(serverItem);
-            }
-        }
-        function formatTime(seconds) {
-            if (seconds < 60) return seconds + 's';
-            const minutes = Math.floor(seconds / 60);
-            const secs = seconds % 60;
-            return minutes + 'm ' + secs + 's';
-        }
-        function showError(message) {
-            const errorDiv = document.getElementById('errorMessage');
-            errorDiv.textContent = message;
-            errorDiv.classList.add('show');
-        }
-        function clearError() {
-            const errorDiv = document.getElementById('errorMessage');
-            errorDiv.classList.remove('show');
-        }
-        // Allow Enter key to submit
-        document.getElementById('courseInput').addEventListener('keypress', function(e) {
-            if (e.key === 'Enter') submitCourse();
-        });
-        // Initial stats load
-        refreshStats();
-    </script>
-</body>
-</html>
-"""
 @app.on_event("startup")
 async def startup_event():
     print(f"Flow Server {FLOW_ID} started on port {FLOW_PORT}. Manager URL: {MANAGER_URL}")
 @app.get("/", response_class=HTMLResponse)
-async def root():
-    return HTML_UI
-@app.get("/stats")
-async def get_stats():
-    """Returns current processing statistics."""
-    server_stats = {}
-    for server in servers:
-        server_stats[server.url] = {
-            "busy": server.busy,
-            "total_processed": server.total_processed,
-            "total_time": server.total_time,
-            "fps": server.fps
-        }
-    processing_stats['server_stats'] = server_stats
-    return processing_stats
-@app.post("/process_course")
-async def process_course(request: ProcessCourseRequest, background_tasks: BackgroundTasks):
-    """
-    Receives a course name from the UI and starts processing in the background.
-    """
-    course_name = request.course_name
-    if not course_name:
-        print(f"[{FLOW_ID}] Received empty course name. Stopping processing loop.")
-        return {"status": "stopped", "message": "No more courses to process."}
-    print(f"[{FLOW_ID}] Received course: {course_name}. Starting background task.")
-    background_tasks.add_task(process_course_task, course_name)
-    return {"status": "processing", "course_name": course_name, "message": "Processing started in background."}
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=FLOW_PORT)

 import asyncio
 import aiohttp
 import zipfile
+import io
+import shutil
+from typing import Dict, List, Set, Optional, Any
 from urllib.parse import quote
 from datetime import datetime
 from pathlib import Path
+from fastapi import FastAPI, BackgroundTasks, HTTPException, status, Request
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
 from pydantic import BaseModel, Field
+from huggingface_hub import HfApi, hf_hub_download, HfFileSystem
 import uvicorn
 # --- Configuration ---
 FLOW_ID = os.getenv("FLOW_ID", "flow_default")
 FLOW_PORT = int(os.getenv("FLOW_PORT", 8001))
 MANAGER_URL = os.getenv("MANAGER_URL", "https://fred808-fcord.hf.space")
 MANAGER_COMPLETE_TASK_URL = f"{MANAGER_URL}/task/complete"
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 HF_DATASET_ID = os.getenv("HF_DATASET_ID", "Fred808/BG3")
 HF_OUTPUT_DATASET_ID = os.getenv("HF_OUTPUT_DATASET_ID", "fred808/helium")
+STATE_FILE_NAME = f"{FLOW_ID}_state.json"
+# Using the full list from the user's original code for actual deployment
 CAPTION_SERVERS = [
+    "https://fred808-pil-4-1.hf.space/analyze",
+    "https://fred808-pil-4-2.hf.space/analyze",
+    "https://fred808-pil-4-3.hf.space/analyze",
+    "https://fred1012-fred1012-gw0j2h.hf.space/analyze",
+    "https://fred1012-fred1012-wqs6c2.hf.space/analyze",
+    "https://fred1012-fred1012-oncray.hf.space/analyze",
+    "https://fred1012-fred1012-4goge7.hf.space/analyze",
+    "https://fred1012-fred1012-z0eh7m.hf.space/analyze",
+    "https://fred1012-fred1012-u95rte.hf.space/analyze",
+    "https://fred1012-fred1012-igje22.hf.space/analyze",
+    "https://fred1012-fred1012-ibkuf8.hf.space/analyze",
+    "https://fred1012-fred1012-nwqthy.hf.space/analyze",
+    "https://fred1012-fred1012-4ldqj4.hf.space/analyze",
+    "https://fred1012-fred1012-pivlzg.hf.space/analyze",
+    "https://fred1012-fred1012-ptlc5u.hf.space/analyze",
+    "https://fred1012-fred1012-u7lh57.hf.space/analyze",
+    "https://fred1012-fred1012-q8djv1.hf.space/analyze",
+    "https://fredalone-fredalone-ozugrp.hf.space/analyze",
+    "https://fredalone-fredalone-9brxj2.hf.space/analyze",
+    "https://fredalone-fredalone-p8vq9a.hf.space/analyze",
+    "https://fredalone-fredalone-vbli2y.hf.space/analyze",
+    "https://fredalone-fredalone-uggger.hf.space/analyze",
+    "https://fredalone-fredalone-nmi7e8.hf.space/analyze",
+    "https://fredalone-fredalone-d1f26d.hf.space/analyze",
+    "https://fredalone-fredalone-461jp2.hf.space/analyze",
+    "https://fredalone-fredalone-3enfg4.hf.space/analyze",
+    "https://fredalone-fredalone-dqdbpv.hf.space/analyze",
+    "https://fredalone-fredalone-ivtjua.hf.space/analyze",
+    "https://fredalone-fredalone-6bezt2.hf.space/analyze",
+    "https://fredalone-fredalone-e0wfnk.hf.space/analyze",
+    "https://fredalone-fredalone-zu2t7j.hf.space/analyze",
+    "https://fredalone-fredalone-dqtv1o.hf.space/analyze",
+    "https://fredalone-fredalone-wclyog.hf.space/analyze",
+    "https://fredalone-fredalone-t27vig.hf.space/analyze",
+    "https://fredalone-fredalone-gahbxh.hf.space/analyze",
+    "https://fredalone-fredalone-kw2po4.hf.space/analyze",
+    "https://fredalone-fredalone-8h285h.hf.space/analyze"
 ]
 MODEL_TYPE = "Florence-2-large"
+# Temporary storage for images
 TEMP_DIR = Path(f"temp_images_{FLOW_ID}")
 TEMP_DIR.mkdir(exist_ok=True)
 # --- Models ---
 class ProcessCourseRequest(BaseModel):
     course_name: Optional[str] = None
     def fps(self):
         return self.total_processed / self.total_time if self.total_time > 0 else 0
+class ServerState(BaseModel):
+    # The list of all zip files in the dataset (frames/ directory)
+    all_zip_files: List[str] = Field(default_factory=list)
+    # The set of zip files that have been successfully processed and uploaded
+    processed_files: Set[str] = Field(default_factory=set)
+    # The index in all_zip_files from which the next download should start
+    current_index: int = 0
+    # Total number of files to process
+    total_files: int = 0
+    # Status of the current operation
+    status: str = "Idle"
+    # Name of the file currently being processed
+    current_file: Optional[str] = None
+    # Progress within the current file
+    current_file_progress: str = "0/0"
+    # Timestamp of the last update
+    last_update: str = datetime.now().isoformat()
+    # Flag to control the processing loop
+    is_running: bool = False
+# Global state for caption servers and the overall server state
 servers = [CaptionServer(url) for url in CAPTION_SERVERS]
 server_index = 0
+state = ServerState()
+# Lock for thread-safe access to the global state
+state_lock = asyncio.Lock()
+# --- Persistence Functions ---
+def get_hf_api():
+    """Helper to get HfApi instance."""
+    return HfApi(token=HF_TOKEN)
+def get_hf_fs():
+    """Helper to get HfFileSystem instance."""
+    return HfFileSystem(token=HF_TOKEN)
+async def load_state_from_hf():
+    """Loads the state from the Hugging Face output dataset."""
+    global state
+    fs = get_hf_fs()
+    state_path = f"{HF_OUTPUT_DATASET_ID}/{STATE_FILE_NAME}"
+    async with state_lock:
+        try:
+            if fs.exists(state_path):
+                print(f"[{FLOW_ID}] Loading state from {state_path}...")
+                with fs.open(state_path, 'rb') as f:
+                    data = json.load(f)
+                    # Convert list of processed files back to a set
+                    if 'processed_files' in data and isinstance(data['processed_files'], list):
+                        data['processed_files'] = set(data['processed_files'])
+                    state = ServerState.parse_obj(data)
+                    print(f"[{FLOW_ID}] State loaded successfully. Current index: {state.current_index}")
+            else:
+                print(f"[{FLOW_ID}] State file {state_path} not found. Starting with default state.")
+        except Exception as e:
+            print(f"[{FLOW_ID}] Error loading state from HF: {e}. Starting with default state.")
+            state = ServerState()
+async def save_state_to_hf():
+    """Saves the current state to the Hugging Face output dataset."""
+    global state
+    api = get_hf_api()
+    state_path = STATE_FILE_NAME
+    async with state_lock:
+        state.last_update = datetime.now().isoformat()
+        # Convert set of processed files to a list for JSON serialization
+        data_to_save = state.dict()
+        data_to_save['processed_files'] = list(state.processed_files)
+        json_content = json.dumps(data_to_save, indent=2, ensure_ascii=False).encode('utf-8')
+        try:
+            print(f"[{FLOW_ID}] Saving state to {state_path} in {HF_OUTPUT_DATASET_ID}...")
+            api.upload_file(
+                path_or_fileobj=io.BytesIO(json_content),
+                path_in_repo=state_path,
+                repo_id=HF_OUTPUT_DATASET_ID,
+                repo_type="dataset",
+                commit_message=f"[{FLOW_ID}] Update server state. Index: {state.current_index}"
+            )
+            print(f"[{FLOW_ID}] State saved successfully.")
+            return True
+        except Exception as e:
+            print(f"[{FLOW_ID}] Error saving state to HF: {e}")
+            return False
+async def update_file_list():
+    """Fetches the list of all zip files from the BG3 dataset."""
+    global state
+    api = get_hf_api()
+    async with state_lock:
+        try:
+            state.status = "Updating file list..."
+            print(f"[{FLOW_ID}] Fetching file list from {HF_DATASET_ID}...")
+            repo_files = api.list_repo_files(
+                repo_id=HF_DATASET_ID,
+                repo_type="dataset"
+            )
+            # Filter for zip files in the 'frames/' directory
+            zip_files = sorted([
+                f for f in repo_files
+                if f.startswith("frames/") and f.endswith('.zip')
+            ])
+            state.all_zip_files = zip_files
+            state.total_files = len(zip_files)
+            state.status = "File list updated."
+            print(f"[{FLOW_ID}] Found {state.total_files} zip files.")
+        except Exception as e:
+            state.status = f"Error updating file list: {e}"
+            print(f"[{FLOW_ID}] Error updating file list: {e}")
+        await save_state_to_hf()
+# --- Core Processing Functions (Modified) ---
 async def get_available_server(timeout: float = 300.0) -> CaptionServer:
     """Round-robin selection of an available caption server."""
                         caption = result.get("caption")
                         if caption:
+                            # Update progress counter and global state
                             progress_tracker['completed'] += 1
+                            async with state_lock:
+                                state.current_file_progress = f"{progress_tracker['completed']}/{progress_tracker['total']}"
                             if progress_tracker['completed'] % 50 == 0:
                                 print(f"[{FLOW_ID}] PROGRESS: {progress_tracker['completed']}/{progress_tracker['total']} captions completed.")
                             return {
                                 "course": course_name,
                                 "image_path": image_path.name,
                 server.total_time += (end_time - start_time)
     print(f"[{FLOW_ID}] FAILED after {MAX_RETRIES} attempts for {image_path.name}.")
     return None
+async def download_and_extract_zip(repo_file_full_path: str) -> Optional[tuple[Path, str]]:
+    """Downloads the zip file at the given path and extracts its contents."""
+    zip_full_name = Path(repo_file_full_path).name
+    course_name = zip_full_name.split('_')[0] # Assuming course name is the prefix before the first underscore
     try:
+        print(f"[{FLOW_ID}] Downloading file: {repo_file_full_path}. Full name: {zip_full_name}")
+        # Use hf_hub_download to get the file path
         zip_path = hf_hub_download(
             repo_id=HF_DATASET_ID,
+            filename=repo_file_full_path, # Use the full path in the repo
             repo_type="dataset",
             token=HF_TOKEN,
         )
         print(f"[{FLOW_ID}] Downloaded to {zip_path}. Extracting...")
+        # Create a temporary directory for extraction
+        extract_dir = TEMP_DIR / course_name / zip_full_name.replace('.', '_')
+        extract_dir.mkdir(parents=True, exist_ok=True)
         with zipfile.ZipFile(zip_path, 'r') as zip_ref:
             zip_ref.extractall(extract_dir)
         print(f"[{FLOW_ID}] Extraction complete to {extract_dir}.")
+        # Clean up the downloaded zip file
+        os.remove(zip_path)
+        # Return the extraction directory and the full zip file name
+        return extract_dir, zip_full_name
     except Exception as e:
+        print(f"[{FLOW_ID}] Error downloading or extracting zip for {repo_file_full_path}: {e}")
+        return None
 async def upload_captions_to_hf(zip_full_name: str, captions: List[Dict]) -> bool:
     """Uploads the final captions JSON file to the output dataset."""
         json_content = json.dumps(captions, indent=2, ensure_ascii=False).encode('utf-8')
+        api = get_hf_api()
         api.upload_file(
             path_or_fileobj=io.BytesIO(json_content),
             path_in_repo=caption_filename,
         print(f"[{FLOW_ID}] Error uploading captions for {zip_full_name}: {e}")
         return False
+async def process_next_file_task():
+    """Task to process the next file in the list based on the current index."""
+    global state
+    if not state.is_running:
+        print(f"[{FLOW_ID}] Processing loop is not running. Exiting task.")
+        return
+    while state.is_running:
+        repo_file_full_path = None
+        current_index = -1
+        async with state_lock:
+            current_index = state.current_index
+            if current_index >= state.total_files:
+                state.status = "Finished processing all files."
+                state.is_running = False
+                print(f"[{FLOW_ID}] Reached end of file list. Stopping processing.")
+                await save_state_to_hf()
+                break
+            repo_file_full_path = state.all_zip_files[current_index]
+            if repo_file_full_path in state.processed_files:
+                state.current_index += 1
+                state.status = f"Skipping processed file: {Path(repo_file_full_path).name}"
+                state.current_file = Path(repo_file_full_path).name
+                print(f"[{FLOW_ID}] Skipping already processed file: {repo_file_full_path}")
+                await save_state_to_hf()
+                continue
+            # Mark the file as in-progress in the state
+            state.status = f"Processing file {current_index + 1}/{state.total_files}"
+            state.current_file = Path(repo_file_full_path).name
+            state.current_file_progress = "0/0"
+            await save_state_to_hf()
+        # --- Start Processing ---
         extract_dir = None
         zip_full_name = None
+        global_success = False
         try:
+            download_result = await download_and_extract_zip(repo_file_full_path)
+            if download_result is None:
+                raise Exception("Failed to download or extract zip file.")
+            extract_dir, zip_full_name = download_result
+            course_name = zip_full_name.split('_')[0]
+            # Find images
             image_paths = [p for p in extract_dir.glob("**/*") if p.is_file() and p.suffix.lower() in ['.jpg', '.jpeg', '.png']]
             print(f"[{FLOW_ID}] Found {len(image_paths)} images to process in {zip_full_name}.")
             if not image_paths:
                 print(f"[{FLOW_ID}] No images found in {zip_full_name}. Marking as complete.")
+                global_success = True
             else:
+                # Initialize progress tracker
                 progress_tracker = {
                     'total': len(image_paths),
                     'completed': 0
                 }
+                async with state_lock:
+                    state.current_file_progress = f"0/{len(image_paths)}"
+                    await save_state_to_hf()
+                # Create and run captioning tasks
                 semaphore = asyncio.Semaphore(len(servers))
                 async def limited_send_image_for_captioning(image_path, course_name, progress_tracker):
                     async with semaphore:
                         return await send_image_for_captioning(image_path, course_name, progress_tracker)
+                caption_tasks = [limited_send_image_for_captioning(p, course_name, progress_tracker) for p in image_paths]
                 results = await asyncio.gather(*caption_tasks)
                 all_captions = [r for r in results if r is not None]
+                # Final progress report
                 if len(all_captions) == len(image_paths):
                     print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Successfully completed all {len(all_captions)} captions.")
+                    global_success = True
                 else:
                     print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Completed with partial result: {len(all_captions)}/{len(image_paths)} captions.")
+                    global_success = False
+                # Upload results
                 if all_captions and zip_full_name:
                     if await upload_captions_to_hf(zip_full_name, all_captions):
                         print(f"[{FLOW_ID}] Successfully uploaded captions for {zip_full_name}.")
+                        # If upload is successful, we mark the file as processed, regardless of partial success
+                        # The uploaded JSON will reflect the actual number of captions
+                        if global_success:
+                            print(f"[{FLOW_ID}] Fully processed and uploaded: {zip_full_name}")
+                        else:
+                            print(f"[{FLOW_ID}] Partially processed but uploaded: {zip_full_name}. Needs manual review.")
+                        # Mark as processed only if upload succeeded
+                        async with state_lock:
+                            state.processed_files.add(repo_file_full_path)
+                            state.current_index += 1
+                            state.current_file = None
+                            state.current_file_progress = "0/0"
+                            state.status = "Idle"
+                            await save_state_to_hf()
                     else:
+                        print(f"[{FLOW_ID}] Failed to upload captions for {zip_full_name}. Will retry this file later.")
+                        # Do NOT increment index or mark as processed, so it will be retried
+                        async with state_lock:
+                            state.status = f"Error uploading captions for {zip_full_name}. Retrying later."
+                            await save_state_to_hf()
+                        # Wait before retrying to avoid immediate re-attempt on a transient error
+                        await asyncio.sleep(60)
                 else:
+                    print(f"[{FLOW_ID}] No captions generated or zip_full_name is missing. Skipping upload for {zip_full_name}. Will retry later.")
+                    # Do NOT increment index or mark as processed
+                    async with state_lock:
+                        state.status = f"No captions generated for {zip_full_name}. Retrying later."
+                        await save_state_to_hf()
+                    await asyncio.sleep(60)
         except Exception as e:
             error_message = str(e)
+            print(f"[{FLOW_ID}] Critical error in process_next_file_task for {repo_file_full_path}: {error_message}")
+            async with state_lock:
+                state.status = f"CRITICAL ERROR for {Path(repo_file_full_path).name}. Retrying later. Error: {error_message[:50]}..."
+                await save_state_to_hf()
+            # Wait before retrying
+            await asyncio.sleep(60)
         finally:
+            # Cleanup temporary files
             if extract_dir and extract_dir.exists():
                 print(f"[{FLOW_ID}] Cleaned up temporary directory {extract_dir}.")
                 shutil.rmtree(extract_dir, ignore_errors=True)
+            # If the loop is still running, wait a short time before checking for the next file
+            if state.is_running:
+                await asyncio.sleep(5)
 # --- FastAPI App and Endpoints ---
 app = FastAPI(
     title=f"Flow Server {FLOW_ID} API",
     description="Fetches, extracts, and captions images for a given course.",
+    version="2.0.0"
 )
+# Setup Jinja2 templates for the UI
+templates = Jinja2Templates(directory="templates")
 @app.on_event("startup")
 async def startup_event():
     print(f"Flow Server {FLOW_ID} started on port {FLOW_PORT}. Manager URL: {MANAGER_URL}")
+    # 1. Load state from persistence (HF)
+    await load_state_from_hf()
+    # 2. Update the list of all files from the dataset
+    await update_file_list()
+    # 3. Start the continuous processing task if the index is valid
+    if state.current_index < state.total_files:
+        state.is_running = True
+        BackgroundTasks().add_task(process_next_file_task)
+    else:
+        state.is_running = False
+        print(f"[{FLOW_ID}] Index {state.current_index} is out of bounds. Starting in Idle mode.")
 @app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+    """Home page with status and controls."""
+    async with state_lock:
+        processed_count = len(state.processed_files)
+        remaining_count = state.total_files - processed_count
+        # Calculate server stats
+        server_stats = [
+            {
+                "url": s.url,
+                "busy": s.busy,
+                "processed": s.total_processed,
+                "fps": f"{s.fps:.2f}"
+            } for s in servers
+        ]
+        # Calculate overall FPS
+        total_processed = sum(s.total_processed for s in servers)
+        total_time = sum(s.total_time for s in servers)
+        overall_fps = total_processed / total_time if total_time > 0 else 0
+        context = {
+            "request": request,
+            "flow_id": FLOW_ID,
+            "status": state.status,
+            "is_running": state.is_running,
+            "total_files": state.total_files,
+            "processed_count": processed_count,
+            "remaining_count": remaining_count,
+            "current_index": state.current_index,
+            "current_file": state.current_file if state.current_file else "N/A",
+            "current_file_progress": state.current_file_progress,
+            "last_update": state.last_update,
+            "overall_fps": f"{overall_fps:.2f}",
+            "server_stats": server_stats
+        }
+    return templates.TemplateResponse("index.html", context)
+@app.post("/set_index")
+async def set_index(request: Request, background_tasks: BackgroundTasks):
+    """Endpoint to manually set the start index."""
+    global state
+    form = await request.form()
+    try:
+        new_index = int(form.get("start_index"))
+    except (TypeError, ValueError):
+        raise HTTPException(status_code=400, detail="Invalid index value.")
+    async with state_lock:
+        if 0 <= new_index < state.total_files:
+            state.current_index = new_index
+            state.status = f"Index set to {new_index}. Restarting processing."
+            # If the loop is not running, start it
+            if not state.is_running:
+                state.is_running = True
+                background_tasks.add_task(process_next_file_task)
+            await save_state_to_hf()
+            print(f"[{FLOW_ID}] Index manually set to {new_index}.")
+            return {"status": "success", "message": f"Start index set to {new_index}. Processing will resume from this point."}
+        elif new_index == state.total_files:
+            state.current_index = new_index
+            state.is_running = False
+            state.status = "Finished processing all files."
+            await save_state_to_hf()
+            return {"status": "success", "message": "Index set to end of list. Processing stopped."}
+        else:
+            raise HTTPException(status_code=400, detail=f"Index {new_index} is out of bounds (0 to {state.total_files}).")
+@app.post("/control_processing")
+async def control_processing(request: Request, background_tasks: BackgroundTasks):
+    """Endpoint to start/stop the processing loop."""
+    global state
+    form = await request.form()
+    action = form.get("action")
+    async with state_lock:
+        if action == "start":
+            if not state.is_running and state.current_index < state.total_files:
+                state.is_running = True
+                state.status = "Processing started."
+                background_tasks.add_task(process_next_file_task)
+                await save_state_to_hf()
+                return {"status": "success", "message": "Processing loop started."}
+            elif state.current_index >= state.total_files:
+                return {"status": "error", "message": "Cannot start. All files have been processed."}
+            else:
+                return {"status": "info", "message": "Processing is already running."}
+        elif action == "stop":
+            if state.is_running:
+                state.is_running = False
+                state.status = "Processing stopped by user."
+                await save_state_to_hf()
+                return {"status": "success", "message": "Processing loop stopped."}
+            else:
+                return {"status": "info", "message": "Processing is already stopped."}
+        else:
+            raise HTTPException(status_code=400, detail="Invalid action.")
+@app.get("/status")
+async def get_status():
+    """API endpoint to get the current server status as JSON."""
+    async with state_lock:
+        processed_count = len(state.processed_files)
+        server_stats = [
+            {
+                "url": s.url,
+                "busy": s.busy,
+                "processed": s.total_processed,
+                "fps": f"{s.fps:.2f}"
+            } for s in servers
+        ]
+        total_processed = sum(s.total_processed for s in servers)
+        total_time = sum(s.total_time for s in servers)
+        overall_fps = total_processed / total_time if total_time > 0 else 0
+        return {
+            "flow_id": FLOW_ID,
+            "status": state.status,
+            "is_running": state.is_running,
+            "total_files": state.total_files,
+            "processed_count": processed_count,
+            "remaining_count": state.total_files - processed_count,
+            "current_index": state.current_index,
+            "current_file": state.current_file,
+            "current_file_progress": state.current_file_progress,
+            "last_update": state.last_update,
+            "overall_fps": f"{overall_fps:.2f}",
+            "server_stats": server_stats
+        }
+# The original /process_course endpoint is now obsolete as the server manages its own queue
+# @app.post("/process_course")
+# async def process_course(request: ProcessCourseRequest, background_tasks: BackgroundTasks):
+#     return {"status": "obsolete", "message": "The server now manages its own processing queue based on the index."}
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=FLOW_PORT)