Spaces:

Fred808
/

FCORD

Paused

App Files Files Community

Fred808 commited on Oct 30, 2025

Commit

f5f8b6b

verified ·

1 Parent(s): 5b60a10

Upload app.py

Browse files

Files changed (1) hide show

app.py +475 -360

app.py CHANGED Viewed

@@ -1,360 +1,475 @@
-import os
-import json
-import asyncio
-import aiohttp
-from typing import Dict, List, Set, Optional
-from pathlib import Path
-from datetime import datetime
-from fastapi import FastAPI, BackgroundTasks, HTTPException, status
-from pydantic import BaseModel
-from huggingface_hub import HfApi, HfFileSystem
-import uvicorn
-# --- Configuration ---
-# Manager Server will run on port 8000
-MANAGER_PORT = 8000
-# Hugging Face Configuration
-HF_TOKEN = "" # User provided token
-HF_DATASET_ID = "Fred808/BG3" # Dataset where the zip files are located
-HF_DATASET_REPO_TYPE = "dataset"
-FRAMES_FOLDER = "frames"
-STATE_FILE_PATH = "flow_processing_state.json"
-# Flow Server Configuration (Hardcoded as per user request)
-# NOTE: These URLs must be accessible to the Manager Server.
-# For local testing, you might use localhost with different ports (e.g., 8001 and 8002)
-FLOW_SERVERS = {
-    "flow1": "http://localhost:8001",
-    "flow2": "https://fred808-flowcap2.hf.space",
-}
-# --- State Management Models ---
-class TaskStatus(BaseModel):
-    status: str # UNPROCESSED, IN_PROGRESS, COMPLETED, FAILED
-    assigned_to: Optional[str] = None # flow1 or flow2
-    assigned_at: Optional[datetime] = None
-    completed_at: Optional[datetime] = None
-    error_message: Optional[str] = None
-class ProcessingState(BaseModel):
-    # Key is the zip file name (which is the course name)
-    tasks: Dict[str, TaskStatus] = {}
-    # Track which flow server is currently processing which task
-    flow_assignments: Dict[str, Optional[str]] = {
-        "flow1": None,
-        "flow2": None,
-    }
-class CompleteTaskRequest(BaseModel):
-    flow_id: str
-    course_name: str
-    success: bool
-    error_message: Optional[str] = None
-# --- Global State and Initialization ---
-app = FastAPI(
-    title="BG3 Processing Manager",
-    description="Coordinates flow servers for BG3 dataset processing.",
-    version="1.0.0"
-)
-api = HfApi(token=HF_TOKEN)
-fs = HfFileSystem(token=HF_TOKEN)
-state = ProcessingState()
-is_coordinating = False
-# --- Persistence Functions ---
-def get_full_path(filename: str) -> str:
-    return f"{HF_DATASET_ID}/{filename}"
-async def load_state_from_hf():
-    global state
-    try:
-        # Check if state file exists
-        if fs.exists(f"{HF_DATASET_ID}/{STATE_FILE_PATH}"):
-            print(f"Loading state from {STATE_FILE_PATH}...")
-            # Use HfApi to download the file content
-            content = api.read_file(
-                path_in_repo=STATE_FILE_PATH,
-                repo_id=HF_DATASET_ID,
-                repo_type=HF_DATASET_REPO_TYPE
-            ).decode('utf-8')
-            data = json.loads(content)
-            state = ProcessingState(**data)
-            print(f"State loaded. Total tasks: {len(state.tasks)}")
-        else:
-            print(f"State file {STATE_FILE_PATH} not found. Initializing.")
-            await initialize_tasks()
-            await save_state_to_hf() # Save initial state
-    except Exception as e:
-        print(f"Error loading state from HF: {e}")
-        # Fallback to initialization if loading fails
-        await initialize_tasks()
-async def save_state_to_hf():
-    try:
-        print(f"Saving state to {STATE_FILE_PATH}...")
-        content = state.model_dump_json(indent=2).encode('utf-8')
-        api.upload_file(
-            path_or_fileobj=content,
-            path_in_repo=STATE_FILE_PATH,
-            repo_id=HF_DATASET_ID,
-            repo_type=HF_DATASET_REPO_TYPE,
-            commit_message="Update processing state"
-        )
-        print("State saved successfully.")
-    except Exception as e:
-        print(f"Error saving state to HF: {e}")
-async def initialize_tasks():
-    global state
-    print(f"Discovering zip files in {FRAMES_FOLDER}/...")
-    # 1. Fetch the list of valid course names from Fred808/BG1
-    print("Fetching valid course names from Fred808/BG1...")
-    try:
-        # Assuming the 'BG1' dataset contains a file listing the course names.
-        # We will use the base name of files in the root of the BG1 dataset as the list of valid course names.
-        bg1_files = fs.ls("Fred808/BG1", detail=False)
-        # We use Path(f).stem to get the name without extension (e.g., 'course_name.zip' -> 'course_name')
-        valid_course_names = {Path(f).stem for f in bg1_files if not Path(f).name.startswith('.')}
-        if not valid_course_names:
-            print("Warning: Fred808/BG1 dataset seems empty or contains no processable files. Using all found zip files.")
-            # Fallback to using all zip files found in BG3 if BG1 is empty
-    except Exception as e:
-        print(f"Error fetching course names from Fred808/BG1: {e}. Falling back to all zip files in BG3.")
-        valid_course_names = set()
-    # 2. List zip files in the frames folder of the main dataset (BG3)
-    try:
-        file_list = fs.ls(f"{HF_DATASET_ID}/{FRAMES_FOLDER}", detail=False)
-        zip_files = [
-            Path(f).name
-            for f in file_list
-            if f.endswith(".zip") and not f.endswith(".zip.json")
-        ]
-        new_tasks = {}
-        for zip_file in zip_files:
-            course_name = zip_file.replace(".zip", "")
-            # 3. Filter: Only process if the course name is in the valid list from BG1 (if non-empty)
-            if valid_course_names and course_name not in valid_course_names:
-                print(f"Skipping {course_name}: Not found in Fred808/BG1 list.")
-                continue
-            if course_name not in state.tasks:
-                new_tasks[course_name] = TaskStatus(status="UNPROCESSED")
-            else:
-                # Keep existing status if it was already tracked
-                new_tasks[course_name] = state.tasks[course_name]
-        # Merge new tasks with existing state, only keeping tasks that still exist as zip files
-        # This prevents old, deleted zip files from persisting in the state.
-        existing_tasks_to_keep = {
-            k: v for k, v in state.tasks.items()
-            if k in new_tasks or v.status in ["IN_PROGRESS", "COMPLETED", "FAILED"] # Keep history
-        }
-        # Prioritize new tasks over existing ones for the latest status
-        state.tasks = {**existing_tasks_to_keep, **new_tasks}
-        print(f"Found {len(zip_files)} zip files in {HF_DATASET_ID}/{FRAMES_FOLDER}. Valid course names from BG1: {len(valid_course_names)}. Total tasks tracked: {len(state.tasks)}")
-    except Exception as e:
-        print(f"Error discovering files from HF: {e}")
-        # If discovery fails, we can't proceed.
-        raise RuntimeError(f"Failed to discover files: {e}")
-# --- Core Coordination Logic ---
-async def assign_next_task(flow_id: str):
-    """
-    Finds the next UNPROCESSED task and assigns it to the given flow server.
-    """
-    global state
-    # 1. Find an UNPROCESSED task
-    next_course = None
-    for course_name, task_status in state.tasks.items():
-        if task_status.status == "UNPROCESSED":
-            next_course = course_name
-            break
-    if next_course is None:
-        print(f"No UNPROCESSED tasks left for {flow_id}.")
-        course_to_assign = None
-    else:
-        # 2. Update state to IN_PROGRESS
-        state.tasks[next_course] = TaskStatus(
-            status="IN_PROGRESS",
-            assigned_to=flow_id,
-            assigned_at=datetime.now()
-        )
-        state.flow_assignments[flow_id] = next_course
-        course_to_assign = next_course
-        # 3. Persist state change
-        await save_state_to_hf()
-    # 4. Notify the Flow Server
-    flow_url = FLOW_SERVERS.get(flow_id)
-    if not flow_url:
-        print(f"Error: Unknown flow_id {flow_id}")
-        return
-    try:
-        print(f"Assigning '{course_to_assign}' to {flow_id} at {flow_url}/process_course")
-        async with aiohttp.ClientSession() as session:
-            async with session.post(
-                f"{flow_url}/process_course",
-                json={"course_name": course_to_assign}
-            ) as response:
-                if response.status != 200:
-                    print(f"Error sending task to {flow_id}: {response.status} - {await response.text()}")
-                    # Revert state if assignment fails
-                    if next_course:
-                        state.tasks[next_course] = TaskStatus(status="UNPROCESSED")
-                        state.flow_assignments[flow_id] = None
-                        await save_state_to_hf()
-                else:
-                    print(f"Successfully assigned {course_to_assign} to {flow_id}.")
-    except aiohttp.ClientConnectorError as e:
-        print(f"Connection Error: Could not connect to {flow_id} at {flow_url}. Reverting task status. Error: {e}")
-        # Revert state if connection fails
-        if next_course:
-            state.tasks[next_course] = TaskStatus(status="UNPROCESSED")
-            state.flow_assignments[flow_id] = None
-            await save_state_to_hf()
-    except Exception as e:
-        print(f"Unexpected error during assignment to {flow_id}. Error: {e}")
-        # Revert state for safety
-        if next_course:
-            state.tasks[next_course] = TaskStatus(status="UNPROCESSED")
-            state.flow_assignments[flow_id] = None
-            await save_state_to_hf()
-async def coordinate_loop():
-    """
-    The main coordination loop that runs in the background.
-    """
-    global is_coordinating
-    if is_coordinating:
-        print("Coordinator is already running.")
-        return
-    is_coordinating = True
-    print("Starting coordination loop...")
-    try:
-        # Load state and initialize tasks on startup
-        await load_state_from_hf()
-        # Check and assign tasks to any free flow server
-        for flow_id in FLOW_SERVERS.keys():
-            if state.flow_assignments.get(flow_id) is None:
-                asyncio.create_task(assign_next_task(flow_id))
-    except Exception as e:
-        print(f"Coordination loop failed to start: {e}")
-    finally:
-        # The loop is now event-driven based on /task/complete calls
-        pass
-# --- API Endpoints ---
-@app.on_event("startup")
-async def startup_event():
-    # Start the coordination loop as a background task
-    BackgroundTasks().add_task(coordinate_loop)
-@app.get("/")
-async def root():
-    return {
-        "message": "BG3 Processing Manager API",
-        "status": "running",
-        "is_coordinating": is_coordinating,
-        "flow_assignments": state.flow_assignments,
-        "total_tasks": len(state.tasks),
-        "unprocessed": sum(1 for t in state.tasks.values() if t.status == "UNPROCESSED"),
-        "in_progress": sum(1 for t in state.tasks.values() if t.status == "IN_PROGRESS"),
-        "completed": sum(1 for t in state.tasks.values() if t.status == "COMPLETED"),
-    }
-@app.post("/task/complete")
-async def task_complete(request: CompleteTaskRequest):
-    """
-    Endpoint for flow servers to report task completion.
-    """
-    global state
-    flow_id = request.flow_id
-    course_name = request.course_name
-    if course_name not in state.tasks:
-        raise HTTPException(status_code=404, detail=f"Unknown course: {course_name}")
-    task = state.tasks[course_name]
-    if task.assigned_to != flow_id:
-        # This is a safety check, should not happen in normal operation
-        print(f"Warning: {flow_id} reported completion for a task not assigned to it: {course_name}")
-    if request.success:
-        print(f"Task COMPLETED: {course_name} by {flow_id}")
-        task.status = "COMPLETED"
-        task.completed_at = datetime.now()
-        task.error_message = None
-    else:
-        print(f"Task FAILED: {course_name} by {flow_id}. Error: {request.error_message}")
-        # For now, mark as FAILED. A more robust system might retry.
-        task.status = "FAILED"
-        task.completed_at = datetime.now()
-        task.error_message = request.error_message
-    # Free up the flow server slot
-    state.flow_assignments[flow_id] = None
-    # Persist state change
-    await save_state_to_hf()
-    # Assign the next task to the now-free flow server
-    asyncio.create_task(assign_next_task(flow_id))
-    return {"status": "success", "message": f"Task {course_name} marked as {'COMPLETED' if request.success else 'FAILED'}. Next task assigned."}
-@app.post("/start_coordination")
-async def start_coordination(background_tasks: BackgroundTasks):
-    """
-    Manually trigger the coordination loop.
-    """
-    if is_coordinating:
-        return {"status": "info", "message": "Coordination is already running."}
-    background_tasks.add_task(coordinate_loop)
-    return {"status": "success", "message": "Coordination loop started."}
-@app.get("/state")
-async def get_state():
-    """
-    Returns the current processing state.
-    """
-    return state
-if __name__ == "__main__":
-    # Note: When running in the sandbox, we need to use 0.0.0.0 to expose the port.
-    uvicorn.run(app, host="0.0.0.0", port=MANAGER_PORT)

+import os
+import json
+import time
+import asyncio
+import aiohttp
+import zipfile
+from typing import Dict, List, Set, Optional
+from urllib.parse import quote
+from datetime import datetime
+from pathlib import Path
+import io
+from fastapi import FastAPI, BackgroundTasks, HTTPException, status
+from pydantic import BaseModel, Field
+from huggingface_hub import HfApi, hf_hub_download
+import uvicorn
+# --- Configuration ---
+# Flow Server ID and Port will be set via environment variables for easy deployment
+FLOW_ID = os.getenv("FLOW_ID", "flow_default")
+FLOW_PORT = int(os.getenv("FLOW_PORT", 8001)) # Default to 8001 for flow1
+# Manager Server Configuration
+MANAGER_URL = os.getenv("MANAGER_URL", "https://fred808-fcord.hf.space")
+MANAGER_COMPLETE_TASK_URL = f"{MANAGER_URL}/task/complete"
+# Hugging Face Configuration
+HF_TOKEN = os.getenv("HF_TOKEN", "") # User provided token
+HF_DATASET_ID = os.getenv("HF_DATASET_ID", "Fred808/BG3")
+HF_OUTPUT_DATASET_ID = os.getenv("HF_OUTPUT_DATASET_ID", "fred808/helium") # Target dataset for captions
+# Using the full list from the user's original code for actual deployment
+CAPTION_SERVERS = [
+    "https://fred808-pil-4-1.hf.space/analyze",
+    "https://fred808-pil-4-2.hf.space/analyze",
+    "https://fred808-pil-4-3.hf.space/analyze",
+    "https://fred1012-fred1012-gw0j2h.hf.space/analyze",
+    "https://fred1012-fred1012-wqs6c2.hf.space/analyze",
+    "https://fred1012-fred1012-oncray.hf.space/analyze",
+    "https://fred1012-fred1012-4goge7.hf.space/analyze",
+    "https://fred1012-fred1012-z0eh7m.hf.space/analyze",
+    "https://fred1012-fred1012-u95rte.hf.space/analyze",
+    "https://fred1012-fred1012-igje22.hf.space/analyze",
+    "https://fred1012-fred1012-ibkuf8.hf.space/analyze",
+    "https://fred1012-fred1012-nwqthy.hf.space/analyze",
+    "https://fred1012-fred1012-4ldqj4.hf.space/analyze",
+    "https://fred1012-fred1012-pivlzg.hf.space/analyze",
+    "https://fred1012-fred1012-ptlc5u.hf.space/analyze",
+    "https://fred1012-fred1012-u7lh57.hf.space/analyze",
+    "https://fred1012-fred1012-q8djv1.hf.space/analyze",
+    "https://fredalone-fredalone-ozugrp.hf.space/analyze",
+    "https://fredalone-fredalone-9brxj2.hf.space/analyze",
+    "https://fredalone-fredalone-p8vq9a.hf.space/analyze",
+    "https://fredalone-fredalone-vbli2y.hf.space/analyze",
+    "https://fredalone-fredalone-uggger.hf.space/analyze",
+    "https://fredalone-fredalone-nmi7e8.hf.space/analyze",
+    "https://fredalone-fredalone-d1f26d.hf.space/analyze",
+    "https://fredalone-fredalone-461jp2.hf.space/analyze",
+    "https://fredalone-fredalone-3enfg4.hf.space/analyze",
+    "https://fredalone-fredalone-dqdbpv.hf.space/analyze",
+    "https://fredalone-fredalone-ivtjua.hf.space/analyze",
+    "https://fredalone-fredalone-6bezt2.hf.space/analyze",
+    "https://fredalone-fredalone-e0wfnk.hf.space/analyze",
+    "https://fredalone-fredalone-zu2t7j.hf.space/analyze",
+    "https://fredalone-fredalone-dqtv1o.hf.space/analyze",
+    "https://fredalone-fredalone-wclyog.hf.space/analyze",
+    "https://fredalone-fredalone-t27vig.hf.space/analyze",
+    "https://fredalone-fredalone-gahbxh.hf.space/analyze",
+    "https://fredalone-fredalone-kw2po4.hf.space/analyze",
+    "https://fredalone-fredalone-8h285h.hf.space/analyze"
+]
+MODEL_TYPE = "Florence-2-large"
+# Temporary storage for images
+TEMP_DIR = Path(f"temp_images_{FLOW_ID}")
+TEMP_DIR.mkdir(exist_ok=True)
+# --- Models ---
+class ProcessCourseRequest(BaseModel):
+    course_name: Optional[str] = None
+class CaptionServer:
+    def __init__(self, url):
+        self.url = url
+        self.busy = False
+        self.total_processed = 0
+        self.total_time = 0
+        self.model = MODEL_TYPE
+    @property
+    def fps(self):
+        return self.total_processed / self.total_time if self.total_time > 0 else 0
+# Global state for caption servers
+servers = [CaptionServer(url) for url in CAPTION_SERVERS]
+server_index = 0
+# --- Core Processing Functions ---
+async def get_available_server(timeout: float = 300.0) -> CaptionServer:
+    """Round-robin selection of an available caption server."""
+    global server_index
+    start_time = time.time()
+    while True:
+        # Round-robin check for an available server
+        for _ in range(len(servers)):
+            server = servers[server_index]
+            server_index = (server_index + 1) % len(servers)
+            if not server.busy:
+                return server
+        # If all servers are busy, wait for a short period and check again
+        await asyncio.sleep(0.5)
+        # Check if timeout has been reached
+        if time.time() - start_time > timeout:
+            raise TimeoutError(f"Timeout ({timeout}s) waiting for an available caption server.")
+async def send_image_for_captioning(image_path: Path, course_name: str, progress_tracker: Dict) -> Optional[Dict]:
+    """Sends a single image to a caption server for processing."""
+    # This function now handles server selection and retries internally
+    MAX_RETRIES = 3
+    for attempt in range(MAX_RETRIES):
+        server = None
+        try:
+            # 1. Get an available server (will wait if all are busy, with a timeout)
+            server = await get_available_server()
+            server.busy = True
+            start_time = time.time()
+            # Print a less verbose message only on the first attempt
+            if attempt == 0:
+                print(f"[{FLOW_ID}] Starting attempt on {image_path.name}...")
+            # 2. Prepare request data
+            form_data = aiohttp.FormData()
+            form_data.add_field('file',
+                                image_path.open('rb'),
+                                filename=image_path.name,
+                                content_type='image/jpeg')
+            form_data.add_field('model_choice', MODEL_TYPE)
+            # 3. Send request
+            async with aiohttp.ClientSession() as session:
+                # Increased timeout to 10 minutes (600s) as requested by user's problem description
+                async with session.post(server.url, data=form_data, timeout=600) as resp:
+                    if resp.status == 200:
+                        result = await resp.json()
+                        caption = result.get("caption")
+                        if caption:
+                            # Update progress counter
+                            progress_tracker['completed'] += 1
+                            if progress_tracker['completed'] % 50 == 0:
+                                print(f"[{FLOW_ID}] PROGRESS: {progress_tracker['completed']}/{progress_tracker['total']} captions completed.")
+                            # Log success only if it's not a progress report interval
+                            if progress_tracker['completed'] % 50 != 0:
+                                print(f"[{FLOW_ID}] Success: {image_path.name} captioned by {server.url}")
+                            return {
+                                "course": course_name,
+                                "image_path": image_path.name,
+                                "caption": caption,
+                                "timestamp": datetime.now().isoformat()
+                            }
+                        else:
+                            print(f"[{FLOW_ID}] Server {server.url} returned success but no caption for {image_path.name}. Retrying...")
+                            continue # Retry with a different server
+                    else:
+                        error_text = await resp.text()
+                        print(f"[{FLOW_ID}] Error from server {server.url} for {image_path.name}: {resp.status} - {error_text}. Retrying...")
+                        continue # Retry with a different server
+        except (aiohttp.ClientError, asyncio.TimeoutError, TimeoutError) as e:
+            print(f"[{FLOW_ID}] Connection/Timeout error for {image_path.name} on {server.url if server else 'unknown server'}: {e}. Retrying...")
+            continue # Retry with a different server
+        except Exception as e:
+            print(f"[{FLOW_ID}] Unexpected error during captioning for {image_path.name}: {e}. Retrying...")
+            continue # Retry with a different server
+        finally:
+            if server:
+                end_time = time.time()
+                server.busy = False
+                server.total_processed += 1
+                server.total_time += (end_time - start_time)
+    print(f"[{FLOW_ID}] FAILED after {MAX_RETRIES} attempts for {image_path.name}.")
+    return None
+async def download_and_extract_zip(course_name: str, processed_files: Set[str]) -> Optional[tuple[Path, str, str]]:
+    """Downloads the zip file for the course and extracts its contents."""
+    print(f"[{FLOW_ID}] Looking for files starting with '{course_name}' in frames/ directory...")
+    try:
+        api = HfApi(token=HF_TOKEN)
+        # List all files in the frames directory
+        repo_files = api.list_repo_files(
+            repo_id=HF_DATASET_ID,
+            repo_type="dataset"
+        )
+        # Find zip files that start with the course name
+        matching_files = [
+            f for f in repo_files
+            if f.startswith(f"frames/{course_name}") and f.endswith('.zip')
+        ]
+        if not matching_files:
+            print(f"[{FLOW_ID}] No zip files found starting with '{course_name}' in frames/ directory.")
+            return None, None
+        # Filter out already processed files and select the first one
+        unprocessed_files = [f for f in matching_files if f not in processed_files]
+        if not unprocessed_files:
+            print(f"[{FLOW_ID}] No new zip files found for '{course_name}'.")
+            return None, None, None
+        repo_file_full_path = unprocessed_files[0] # e.g., frames/DAREEFSA_full_name.zip
+        # Extract the full file name from the path (e.g., DAREEFSA_full_name.zip)
+        zip_full_name = Path(repo_file_full_path).name
+        print(f"[{FLOW_ID}] Found new matching file: {repo_file_full_path}. Full name: {zip_full_name}")
+        # Use hf_hub_download to get the file path
+        zip_path = hf_hub_download(
+            repo_id=HF_DATASET_ID,
+            filename=repo_file_full_path, # Use the full path in the repo
+            repo_type="dataset",
+            token=HF_TOKEN,
+        )
+        print(f"[{FLOW_ID}] Downloaded to {zip_path}. Extracting...")
+        # Create a temporary directory for extraction
+        extract_dir = TEMP_DIR / course_name
+        extract_dir.mkdir(exist_ok=True)
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(extract_dir)
+        print(f"[{FLOW_ID}] Extraction complete to {extract_dir}.")
+        # Return the extraction directory, the full zip file name, and the repo path
+        return extract_dir, zip_full_name, repo_file_full_path
+    except Exception as e:
+        print(f"[{FLOW_ID}] Error downloading or extracting zip for {course_name}: {e}")
+        return None, None, None
+async def upload_captions_to_hf(zip_full_name: str, captions: List[Dict]) -> bool:
+    """Uploads the final captions JSON file to the output dataset.
+    The user requested the output JSON file to be named after the full zip file name.
+    """
+    # Use the full zip name, replacing the extension with .json
+    caption_filename = Path(zip_full_name).with_suffix('.json').name
+    try:
+        print(f"[{FLOW_ID}] Uploading {len(captions)} captions for {zip_full_name} as {caption_filename} to {HF_OUTPUT_DATASET_ID}...")
+        # Create JSON content in memory
+        json_content = json.dumps(captions, indent=2, ensure_ascii=False).encode('utf-8')
+        api = HfApi(token=HF_TOKEN)
+        api.upload_file(
+            path_or_fileobj=io.BytesIO(json_content),
+            path_in_repo=caption_filename,
+            repo_id=HF_OUTPUT_DATASET_ID,
+            repo_type="dataset",
+            commit_message=f"[{FLOW_ID}] Captions for {zip_full_name}"
+        )
+        print(f"[{FLOW_ID}] Successfully uploaded captions for {zip_full_name}.")
+        return True
+    except Exception as e:
+        print(f"[{FLOW_ID}] Error uploading captions for {zip_full_name}: {e}")
+        return False
+async def process_course_task(course_name: str):
+    """Main task to process a single course, looping until all files are processed."""
+    print(f"[{FLOW_ID}] Starting continuous processing for course: {course_name}")
+    processed_files = set()
+    all_processed_files_log = []
+    global_success = True
+    # Loop to continuously check for new files matching the course_name prefix
+    while True:
+        extract_dir = None
+        zip_full_name = None
+        repo_file_full_path = None
+        try:
+            # download_and_extract_zip now returns a tuple: (extract_dir, zip_full_name, repo_file_full_path)
+            download_result = await download_and_extract_zip(course_name, processed_files)
+            if download_result is None or download_result[0] is None:
+                # No new files found, or an error occurred during search/download
+                if download_result is not None and download_result[0] is None and download_result[1] is None:
+                    print(f"[{FLOW_ID}] No new files found for {course_name}. Exiting loop.")
+                    break
+                else:
+                    # An error occurred during search/download
+                    raise Exception("Failed to download or extract zip file.")
+            extract_dir, zip_full_name, repo_file_full_path = download_result
+            # Add the file to the processed set immediately to avoid re-processing in the next loop
+            processed_files.add(repo_file_full_path)
+            all_processed_files_log.append(repo_file_full_path)
+            # --- Start Processing the single file ---
+            # FIX: Use recursive glob to find images in subdirectories
+            image_paths = [p for p in extract_dir.glob("**/*") if p.is_file() and p.suffix.lower() in ['.jpg', '.jpeg', '.png']]
+            print(f"[{FLOW_ID}] Found {len(image_paths)} images to process in {zip_full_name}.")
+            current_file_success = False
+            if not image_paths:
+                print(f"[{FLOW_ID}] No images found in {zip_full_name}. Marking as complete.")
+                current_file_success = True
+            else:
+                # Initialize progress tracker
+                progress_tracker = {
+                    'total': len(image_paths),
+                    'completed': 0
+                }
+                print(f"[{FLOW_ID}] Starting captioning for {progress_tracker['total']} images in {zip_full_name}...")
+                # Create a semaphore to limit concurrent tasks to the number of available servers
+                semaphore = asyncio.Semaphore(len(servers))
+                async def limited_send_image_for_captioning(image_path, course_name, progress_tracker):
+                    async with semaphore:
+                        return await send_image_for_captioning(image_path, course_name, progress_tracker)
+                # Create a list of tasks for parallel captioning
+                caption_tasks = []
+                for image_path in image_paths:
+                    caption_tasks.append(limited_send_image_for_captioning(image_path, course_name, progress_tracker))
+                # Run all captioning tasks concurrently
+                results = await asyncio.gather(*caption_tasks)
+                # Filter out failed results
+                all_captions = [r for r in results if r is not None]
+                # Final progress report for the current file
+                if len(all_captions) == len(image_paths):
+                    print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Successfully completed all {len(all_captions)} captions.")
+                    current_file_success = True
+                else:
+                    print(f"[{FLOW_ID}] FINAL PROGRESS for {zip_full_name}: Completed with partial result: {len(all_captions)}/{len(image_paths)} captions.")
+                    current_file_success = False
+                # Upload results
+                if all_captions and zip_full_name:
+                    # Use the full zip file name for the upload as requested
+                    print(f"[{FLOW_ID}] Uploading {len(all_captions)} captions for {zip_full_name}...")
+                    if await upload_captions_to_hf(zip_full_name, all_captions):
+                        print(f"[{FLOW_ID}] Successfully uploaded captions for {zip_full_name}.")
+                        # If partial success, we still upload, but the overall task is marked as failure if any file failed
+                        if not current_file_success:
+                            global_success = False
+                    else:
+                        print(f"[{FLOW_ID}] Failed to upload captions for {zip_full_name}.")
+                        current_file_success = False
+                        global_success = False
+                else:
+                    print(f"[{FLOW_ID}] No captions generated or zip_full_name is missing. Skipping upload for {zip_full_name}.")
+                    current_file_success = False
+                    global_success = False
+            # --- End Processing the single file ---
+        except Exception as e:
+            error_message = str(e)
+            print(f"[{FLOW_ID}] Critical error in process_course_task for {course_name}: {error_message}")
+            global_success = False
+        finally:
+            # Cleanup temporary files for the current file
+            if extract_dir and extract_dir.exists():
+                print(f"[{FLOW_ID}] Cleaned up temporary directory {extract_dir}.")
+                import shutil
+                shutil.rmtree(extract_dir, ignore_errors=True)
+            # If an unrecoverable error occurred (e.g., during search/download), break the loop
+            if download_result is None and extract_dir is None:
+                break
+    # --- Final Report after the loop is complete ---
+    print(f"[{FLOW_ID}] All processing loops complete for {course_name}.")
+    print(f"[{FLOW_ID}] Total files processed: {len(all_processed_files_log)}")
+    print(f"[{FLOW_ID}] List of processed files: {all_processed_files_log}")
+    # Report completion to manager
+    final_error_message = error_message if not global_success else None
+    # Assuming report_completion exists and is an async function
+    # await report_completion(course_name, global_success, final_error_message)
+    return global_success
+async def report_completion(course_name: str, success: bool, error_message: Optional[str] = None):
+    """Reports the task result back to the Manager Server."""
+    print(f"[{FLOW_ID}] Reporting completion for {course_name} (Success: {success})...")
+    payload = {
+        "flow_id": FLOW_ID,
+        "course_name": course_name,
+        "success": success,
+        "error_message": error_message
+    }
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(MANAGER_COMPLETE_TASK_URL, json=payload) as resp:
+                if resp.status != 200:
+                    print(f"[{FLOW_ID}] ERROR: Manager reported non-200 status: {resp.status} - {await resp.text()}")
+                else:
+                    print(f"[{FLOW_ID}] Successfully reported completion to Manager.")
+    except aiohttp.ClientError as e:
+        print(f"[{FLOW_ID}] CRITICAL ERROR: Could not connect to Manager at {MANAGER_COMPLETE_TASK_URL}. Task completion not reported. Error: {e}")
+    except Exception as e:
+        print(f"[{FLOW_ID}] Unexpected error during reporting: {e}")
+# --- FastAPI App and Endpoints ---
+app = FastAPI(
+    title=f"Flow Server {FLOW_ID} API",
+    description="Fetches, extracts, and captions images for a given course.",
+    version="1.0.0"
+)
+@app.on_event("startup")
+async def startup_event():
+    print(f"Flow Server {FLOW_ID} started on port {FLOW_PORT}. Manager URL: {MANAGER_URL}")
+@app.get("/")
+async def root():
+    return {
+        "flow_id": FLOW_ID,
+        "status": "ready",
+        "manager_url": MANAGER_URL,
+        "total_servers": len(servers),
+        "busy_servers": sum(1 for s in servers if s.busy),
+    }
+@app.post("/process_course")
+async def process_course(request: ProcessCourseRequest, background_tasks: BackgroundTasks):
+    """
+    Receives a course name from the Manager and starts processing in the background.
+    """
+    course_name = request.course_name
+    if not course_name:
+        print(f"[{FLOW_ID}] Received empty course name. Stopping processing loop.")
+        return {"status": "stopped", "message": "No more courses to process."}
+    print(f"[{FLOW_ID}] Received course: {course_name}. Starting background task.")
+    # Start the heavy processing in a background task so the API call returns immediately
+    background_tasks.add_task(process_course_task, course_name)
+    return {"status": "processing", "course_name": course_name, "message": "Processing started in background."}
+if __name__ == "__main__":
+    # Note: When running in the sandbox, we need to use 0.0.0.0 to expose the port.
+    uvicorn.run(app, host="0.0.0.0", port=FLOW_PORT)