Spaces:

Fred808
/

FLOWCAP1

Paused

App Files Files Community

Fred808 commited on Oct 29, 2025

Commit

3f9f811

verified ·

1 Parent(s): 838784a

Update app.py

Browse files

Files changed (1) hide show

app.py +350 -855

app.py CHANGED Viewed

@@ -1,855 +1,350 @@
-import os
-import json
-import time
-import asyncio
-import aiohttp
-from typing import Dict, List, Set, Optional
-from urllib.parse import quote, urljoin
-from datetime import datetime
-from pathlib import Path
-from datasets import Dataset, DatasetDict
-import huggingface_hub
-from fastapi import FastAPI, BackgroundTasks, HTTPException, status
-from fastapi.responses import JSONResponse
-from pydantic import BaseModel, Field
-import uvicorn
-import aiohttp
-# Path for storing caption data
-CAPTIONS_DIR = Path("captions_data")
-CAPTIONS_DIR.mkdir(exist_ok=True)
-# Hugging Face configuration
-HF_TOKEN = os.getenv("HF_TOKEN")
-HF_DATASET_ID = os.getenv("HF_DATASET_ID", "fred808/helium")
-if not HF_TOKEN:
-    raise ValueError("HF_TOKEN environment variable is required")
-def get_caption_file_path(course: str) -> Path:
-    """Get the path to the JSON file for storing course captions"""
-    safe_name = quote(course, safe='')
-    return CAPTIONS_DIR / f"{safe_name}_captions.json"
-def save_captions_to_file(course: str, captions: List[Dict]) -> None:
-    """Save captions to a JSON file"""
-    try:
-        file_path = get_caption_file_path(course)
-        with open(file_path, 'w', encoding='utf-8') as f:
-            json.dump(captions, f, indent=2, ensure_ascii=False)
-        print(f"✓ Saved {len(captions)} captions for {course}")
-    except Exception as e:
-        print(f"Error saving captions for {course}: {e}")
-def load_captions_from_file(course: str) -> List[Dict]:
-    """Load existing captions from JSON file"""
-    try:
-        file_path = get_caption_file_path(course)
-        if file_path.exists():
-            with open(file_path, 'r', encoding='utf-8') as f:
-                captions = json.load(f)
-                print(f"✓ Loaded {len(captions)} existing captions for {course}")
-                return captions
-    except Exception as e:
-        print(f"Error loading captions for {course}: {e}")
-    return []
-# Configuration
-SOURCE_SERVER = "https://samelias1-vs2.hf.space"
-CAPTION_SERVERS = [
-    "https://fred808-pil-4-1.hf.space/analyze",
-    "https://fred808-pil-4-2.hf.space/analyze",
-    "https://fred808-pil-4-3.hf.space/analyze",
-    "https://fred1012-fred1012-gw0j2h.hf.space/analyze",
-    "https://fred1012-fred1012-wqs6c2.hf.space/analyze",
-    "https://fred1012-fred1012-oncray.hf.space/analyze",
-    "https://fred1012-fred1012-4goge7.hf.space/analyze",
-    "https://fred1012-fred1012-z0eh7m.hf.space/analyze",
-    "https://fred1012-fred1012-u95rte.hf.space/analyze",
-    "https://fred1012-fred1012-igje22.hf.space/analyze",
-    "https://fred1012-fred1012-ibkuf8.hf.space/analyze",
-    "https://fred1012-fred1012-nwqthy.hf.space/analyze",
-    "https://fred1012-fred1012-4ldqj4.hf.space/analyze",
-    "https://fred1012-fred1012-pivlzg.hf.space/analyze",
-    "https://fred1012-fred1012-ptlc5u.hf.space/analyze",
-    "https://fred1012-fred1012-u7lh57.hf.space/analyze",
-    "https://fred1012-fred1012-q8djv1.hf.space/analyze",
-    "https://fredalone-fredalone-ozugrp.hf.space/analyze",
-    "https://fredalone-fredalone-9brxj2.hf.space/analyze",
-    "https://fredalone-fredalone-p8vq9a.hf.space/analyze",
-    "https://fredalone-fredalone-vbli2y.hf.space/analyze",
-    "https://fredalone-fredalone-uggger.hf.space/analyze",
-    "https://fredalone-fredalone-nmi7e8.hf.space/analyze",
-    "https://fredalone-fredalone-d1f26d.hf.space/analyze",
-    "https://fredalone-fredalone-461jp2.hf.space/analyze",
-    "https://fredalone-fredalone-3enfg4.hf.space/analyze",
-    "https://fredalone-fredalone-dqdbpv.hf.space/analyze",
-    "https://fredalone-fredalone-ivtjua.hf.space/analyze",
-    "https://fredalone-fredalone-6bezt2.hf.space/analyze",
-    "https://fredalone-fredalone-e0wfnk.hf.space/analyze",
-    "https://fredalone-fredalone-zu2t7j.hf.space/analyze",
-    "https://fredalone-fredalone-dqtv1o.hf.space/analyze",
-    "https://fredalone-fredalone-wclyog.hf.space/analyze",
-    "https://fredalone-fredalone-t27vig.hf.space/analyze",
-    "https://fredalone-fredalone-gahbxh.hf.space/analyze",
-    "https://fredalone-fredalone-kw2po4.hf.space/analyze",
-    "https://fredalone-fredalone-8h285h.hf.space/analyze"
-]
-MODEL_TYPE = "Florence-2-large"  # Explicitly request large model
-# FastAPI Models
-class CourseInfo(BaseModel):
-    course_folder: str
-class ImageInfo(BaseModel):
-    filename: str
-class CaptionRequest(BaseModel):
-    image_url: str
-    model_choice: str = MODEL_TYPE
-class CaptionResponse(BaseModel):
-    success: bool
-    caption: Optional[str] = None
-    error: Optional[str] = None
-class ServerStatus(BaseModel):
-    url: str
-    model: str
-    busy: bool
-    total_processed: int
-    total_time: float
-    fps: float
-class ProcessingStatus(BaseModel):
-    course: str
-    total_images: int
-    processed_images: int
-    progress_percent: float
-    status: str
-class StartProcessingRequest(BaseModel):
-    courses: Optional[List[str]] = None  # If None, process all courses
-    continuous: bool = True  # Default to continuous like original
-# FastAPI App
-app = FastAPI(
-    title="Caption Coordinator API",
-    description="Distributed caption processing coordinator",
-    version="1.0.0"
-)
-# Global state
-processed_images: Dict[str, Set[str]] = {}  # {course: set(image_names)}
-course_captions: Dict[str, List[Dict]] = {}  # {course: [{image, caption, metadata}]}
-failed_images: Dict[str, Set[str]] = {}  # {course: set(image_names)}
-servers = []
-is_processing = False
-current_processing_task = None
-auto_start_processing = True  # Set to False if you don't want auto-start
-# Map of course -> vs2 callback URL
-pending_vs2_callbacks: Dict[str, str] = {}
-class CaptionServer:
-    def __init__(self, url):
-        self.url = url
-        self.busy = False
-        self.model = "unknown"
-        self.total_processed = 0
-        self.total_time = 0
-    @property
-    def fps(self):
-        return self.total_processed / self.total_time if self.total_time > 0 else 0
-# Initialize servers
-def initialize_servers():
-    global servers
-    servers = [CaptionServer(url) for url in CAPTION_SERVERS]
-# API Routes
-@app.get("/")
-async def root():
-    return {
-        "message": "Caption Coordinator API",
-        "status": "running",
-        "auto_processing": auto_start_processing,
-        "is_processing": is_processing
-    }
-@app.get("/health")
-async def health():
-    return {
-        "status": "healthy",
-        "servers_available": len([s for s in servers if not s.busy]),
-        "total_servers": len(servers),
-        "is_processing": is_processing,
-        "auto_processing": auto_start_processing
-    }
-@app.get("/courses")
-async def get_courses():
-    """Fetch available courses from source server"""
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(f"{SOURCE_SERVER}/courses") as resp:
-                data = await resp.json()
-                if isinstance(data, dict) and 'courses' in data:
-                    return [c['course_folder'] for c in data['courses'] if isinstance(c, dict)]
-                return []
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error fetching courses: {e}")
-@app.post("/vs2/register")
-async def vs2_register(payload: Dict):
-    """Register a VS2 callback and optionally start processing for the given course.
-    Expected payload: {"course": "course_name", "callback_url": "http://vs2-host/flow/done", "start": true}
-    """
-    try:
-        course = payload.get("course")
-        callback = payload.get("callback_url")
-        start = payload.get("start", True)
-        if not callback:
-            raise HTTPException(status_code=400, detail="callback_url is required")
-        # Store callback for later notification
-        if course:
-            pending_vs2_callbacks[course] = callback
-        else:
-            # store under wildcard key if course not provided
-            pending_vs2_callbacks["*"] = callback
-        # If caller asks to start processing this course immediately, and we're not currently processing,
-        # kick off a one-shot processing loop for that course.
-        if start:
-            global is_processing, current_processing_task
-            if not is_processing:
-                is_processing = True
-                current_processing_task = asyncio.create_task(processing_loop([course] if course else None, False))
-        return {"registered": True, "course": course}
-    except HTTPException:
-        raise
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/courses/{course}/images")
-async def get_course_images(course: str):
-    """Fetch images list for a course"""
-    try:
-        course_frames = f"{course}_frames" if not course.endswith("_frames") else course
-        url = f"{SOURCE_SERVER}/images/{quote(course_frames)}"
-        async with aiohttp.ClientSession() as session:
-            async with session.get(url) as resp:
-                data = await resp.json()
-                if isinstance(data, dict) and 'images' in data:
-                    return data['images']
-                return []
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error fetching images: {e}")
-@app.get("/servers/status")
-async def get_servers_status():
-    """Get status of all caption servers"""
-    server_statuses = []
-    for server in servers:
-        server_statuses.append(ServerStatus(
-            url=server.url,
-            model=server.model,
-            busy=server.busy,
-            total_processed=server.total_processed,
-            total_time=server.total_time,
-            fps=server.fps
-        ))
-    return server_statuses
-@app.get("/processing/status")
-async def get_processing_status():
-    """Get current processing status"""
-    status_info = {}
-    for course in processed_images:
-        total = len(processed_images[course])
-        processed = len(course_captions.get(course, []))
-        failed = len(failed_images.get(course, set()))
-        status_info[course] = {
-            "course": course,
-            "total_images": total,
-            "processed_images": processed,
-            "failed_images": failed,
-            "progress_percent": (processed / total * 100) if total > 0 else 0,
-            "status": "completed" if processed + failed >= total else "processing"
-        }
-    return status_info
-@app.post("/processing/start")
-async def start_processing(request: StartProcessingRequest = StartProcessingRequest()):
-    """Start caption processing"""
-    global is_processing, current_processing_task
-    if is_processing:
-        raise HTTPException(status_code=400, detail="Processing is already running")
-    is_processing = True
-    current_processing_task = asyncio.create_task(
-        processing_loop(request.courses, request.continuous)
-    )
-    return {
-        "message": "Processing started",
-        "continuous": request.continuous,
-        "specific_courses": request.courses
-    }
-@app.post("/processing/stop")
-async def stop_processing():
-    """Stop caption processing"""
-    global is_processing, current_processing_task
-    if not is_processing:
-        raise HTTPException(status_code=400, detail="Processing is not running")
-    is_processing = False
-    if current_processing_task:
-        current_processing_task.cancel()
-        try:
-            await current_processing_task
-        except asyncio.CancelledError:
-            pass
-        current_processing_task = None
-    return {"message": "Processing stopped"}
-@app.get("/captions/{course}")
-async def get_captions(course: str):
-    """Get captions for a specific course"""
-    captions = load_captions_from_file(course)
-    return {
-        "course": course,
-        "total_captions": len(captions),
-        "captions": captions
-    }
-@app.delete("/captions/{course}")
-async def delete_captions(course: str):
-    """Delete captions for a specific course"""
-    try:
-        file_path = get_caption_file_path(course)
-        if file_path.exists():
-            file_path.unlink()
-            if course in processed_images:
-                del processed_images[course]
-            if course in course_captions:
-                del course_captions[course]
-            if course in failed_images:
-                del failed_images[course]
-            return {"message": f"Captions for {course} deleted"}
-        else:
-            raise HTTPException(status_code=404, detail=f"No captions found for {course}")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error deleting captions: {e}")
-# Core processing functions
-async def fetch_courses() -> List[str]:
-    """Fetch available courses from source server"""
-    async with aiohttp.ClientSession() as session:
-        async with session.get(f"{SOURCE_SERVER}/courses") as resp:
-            data = await resp.json()
-            if isinstance(data, dict) and 'courses' in data:
-                return [c['course_folder'] for c in data['courses'] if isinstance(c, dict)]
-            return []
-async def fetch_course_images(course: str) -> List[Dict]:
-    """Fetch images list for a course"""
-    course_frames = f"{course}_frames" if not course.endswith("_frames") else course
-    url = f"{SOURCE_SERVER}/images/{quote(course_frames)}"
-    async with aiohttp.ClientSession() as session:
-        async with session.get(url) as resp:
-            data = await resp.json()
-            if isinstance(data, dict) and 'images' in data:
-                return data['images']
-            return []
-async def get_caption(server: str, image_url: str) -> Dict:
-    """Get caption from a specific server"""
-    params = {
-        'image_url': image_url,
-        'model_choice': MODEL_TYPE
-    }
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(server, params=params, timeout=30) as resp:
-                return await resp.json()
-    except Exception as e:
-        print(f"Error from {server}: {e}")
-        return None
-async def get_model_info():
-    """Get model information from caption servers"""
-    model_info = []
-    async with aiohttp.ClientSession() as session:
-        for server in CAPTION_SERVERS:
-            try:
-                health_url = server.rsplit('/analyze', 1)[0] + '/health'
-                async with session.get(health_url) as resp:
-                    info = await resp.json()
-                    model_info.append({
-                        'url': server,
-                        'model': info.get('model_choice', 'unknown')
-                    })
-            except Exception as e:
-                print(f"Couldn't get model info from {server}: {e}")
-    return model_info
-async def wait_for_vs2_ready(course: str, timeout: Optional[int] = None, interval: int = 5):
-    """Poll the SOURCE_SERVER /vs2/state endpoint until VS2 reports 'ready' for the given course.
-    If timeout is None, this will poll indefinitely until VS2 is ready or idle.
-    """
-    url = f"{SOURCE_SERVER}/vs2/state"
-    elapsed = 0
-    async with aiohttp.ClientSession() as session:
-        while True:
-            try:
-                async with session.get(url, timeout=10) as resp:
-                    if resp.status == 200:
-                        data = await resp.json()
-                        # data may be either {'state': ..., 'current_course': ...} or {'states': {...}}
-                        state = data.get('state') or None
-                        current = data.get('current_course') or data.get('current_file')
-                        if state is None and 'states' in data:
-                            # per-course states dict was returned
-                            states = data['states']
-                            state = states.get(course)
-                            current = course
-                        print(f"VS2 state: {state}, current: {current}")
-                        # If VS2 explicitly ready for this course, proceed
-                        if state == 'ready':
-                            return True
-                        # If VS2 idle for this course (or unknown), proceed
-                        if state in (None, 'idle'):
-                            return True
-                    else:
-                        print(f"VS2 state endpoint returned {resp.status}")
-            except Exception as e:
-                print(f"Could not query VS2 state: {e}")
-            # if timeout set and exceeded, raise; otherwise continue indefinitely
-            if timeout is not None:
-                elapsed += interval
-                if elapsed >= timeout:
-                    raise Exception(f"Timeout waiting for VS2 to be ready for course {course}")
-            await asyncio.sleep(interval)
-async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict:
-    """Process single image through one caption server with better error handling"""
-    if server.busy:
-        return None
-    server.busy = True
-    start_time = time.time()
-    try:
-        # Structure URL correctly: /images/COURSE_NAME_frames/IMAGE.png
-        course_frames = f"{course}_frames" if not course.endswith("_frames") else course
-        image_url = urljoin(SOURCE_SERVER, f"/images/{quote(course_frames)}/{quote(image['filename'])}")
-        result = await get_caption(server.url, image_url)
-        processing_time = time.time() - start_time
-        server.total_time += processing_time
-        if result and result.get('success') and result.get('caption'):
-            server.total_processed += 1
-            metadata = {
-                "image": image['filename'],
-                "caption": result['caption'],
-                "server": server.url,
-                "processing_time": processing_time,
-                "timestamp": datetime.now().isoformat()
-            }
-            print(f"Server {server.url} processed {image['filename']} in {processing_time:.2f}s ({server.fps:.2f} fps)")
-            return metadata
-        else:
-            # Server responded but no caption (might be error or empty response)
-            error_msg = result.get('error', 'Unknown error') if result else 'No response'
-            print(f"Server {server.url} failed for {image['filename']}: {error_msg}")
-            return None
-    except asyncio.TimeoutError:
-        print(f"Server {server.url} timeout for {image['filename']}")
-        return None
-    except Exception as e:
-        print(f"Error processing {image['filename']} on {server.url}: {e}")
-        return None
-    finally:
-        server.busy = False
-async def upload_to_huggingface(course: str, metadata_list: List[Dict]):
-    """Upload course captions to Hugging Face dataset"""
-    try:
-        print(f"📤 Uploading {len(metadata_list)} captions for {course} to Hugging Face...")
-        # Prepare data for Hugging Face dataset
-        dataset_data = {
-            "course": [],
-            "image_filename": [],
-            "caption": [],
-            "processing_server": [],
-            "processing_time": [],
-            "timestamp": []
-        }
-        for metadata in metadata_list:
-            dataset_data["course"].append(course)
-            dataset_data["image_filename"].append(metadata["image"])
-            dataset_data["caption"].append(metadata["caption"])
-            dataset_data["processing_server"].append(metadata["server"])
-            dataset_data["processing_time"].append(metadata["processing_time"])
-            dataset_data["timestamp"].append(metadata["timestamp"])
-        # Create dataset
-        dataset = Dataset.from_dict(dataset_data)
-        # Login to Hugging Face
-        huggingface_hub.login(token=HF_TOKEN)
-        # Push to hub
-        dataset.push_to_hub(
-            HF_DATASET_ID,
-            config_name=course.replace("/", "_").replace(" ", "_"),
-            split="train",  # You can change this to "train", "validation", "test" as needed
-            commit_message=f"Add captions for course {course} - {len(metadata_list)} images"
-        )
-        print(f"✅ Successfully uploaded {len(metadata_list)} captions for {course} to {HF_DATASET_ID}")
-        # Notify VS2 (if VS2 provided a callback for this course)
-        try:
-            await notify_vs2_flow_done(course, success=True)
-        except Exception as e:
-            print(f"Warning: failed to notify VS2 about completion for {course}: {e}")
-        return True
-    except Exception as e:
-        print(f"❌ Error uploading to Hugging Face: {e}")
-        return False
-async def notify_vs2_flow_done(course: str, success: bool):
-    """If VS2 provided a callback URL for this course, POST a completion signal."""
-    callback = pending_vs2_callbacks.get(course)
-    if not callback:
-        # try fallback: look for any callback registered under partial names
-        for key, cb in pending_vs2_callbacks.items():
-            if key in course:
-                callback = cb
-                break
-    if not callback:
-        # nothing to do
-        return
-    payload = {
-        "course": course,
-        "status": "done" if success else "failed",
-        "timestamp": datetime.now().isoformat()
-    }
-    print(f"Notifying VS2 at {callback} about course {course} -> {payload['status']}")
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.post(callback, json=payload, timeout=30) as resp:
-                if resp.status >= 400:
-                    text = await resp.text()
-                    print(f"VS2 callback returned {resp.status}: {text}")
-    except Exception as e:
-        print(f"Error notifying VS2 callback {callback}: {e}")
-async def process_course(course: str, servers: List[CaptionServer]):
-    """Process all images in a course using available servers with proper retry logic"""
-    # Initialize course tracking
-    if course not in processed_images:
-        processed_images[course] = set()
-    if course not in course_captions:
-        course_captions[course] = load_captions_from_file(course)
-        # Update processed images set from loaded captions
-        for cap in course_captions[course]:
-            processed_images[course].add(cap['image'])
-    if course not in failed_images:
-        failed_images[course] = set()
-    # Get list of images
-    images = await fetch_course_images(course)
-    if not images:
-        print(f"No images found for course {course}")
-        return
-    print(f"\nProcessing {len(images)} images for course {course}")
-    # Track images that need processing with retry count (5 retries)
-    pending_images = {}
-    for img in images:
-        filename = img['filename']
-        if filename not in processed_images[course] and filename not in failed_images[course]:
-            pending_images[filename] = {'image': img, 'retries': 0, 'max_retries': 5}
-    if not pending_images:
-        print(f"All images already processed or failed for course {course}")
-        print(f"- Processed: {len(processed_images[course])}, Failed: {len(failed_images[course])}")
-        # If course is completed, upload to Hugging Face
-        if len(processed_images[course]) + len(failed_images[course]) >= len(images):
-            if course_captions[course]:
-                print(f"📤 Course {course} completed, uploading to Hugging Face...")
-                await upload_to_huggingface(course, course_captions[course])
-        return
-    print(f"Images to process: {len(pending_images)} (already processed: {len(processed_images[course])}, failed: {len(failed_images[course])})")
-    batch_size = len([s for s in servers if not s.busy])
-    processed_in_this_run = 0
-    while pending_images and is_processing:
-        # Create tasks for each available server
-        tasks = []
-        assigned_images = []
-        for server in servers:
-            if not server.busy and pending_images:
-                # Get the next pending image
-                filename, img_data = next(iter(pending_images.items()))
-                img = img_data['image']
-                # Assign this image to the server
-                tasks.append(process_image(server, course, img))
-                assigned_images.append((filename, img, img_data['retries']))
-                # Remove from pending temporarily while it's being processed
-                del pending_images[filename]
-        if not tasks:
-            # If no servers available, wait a bit
-            await asyncio.sleep(0.1)
-            continue
-        # Process images in parallel across servers
-        results = await asyncio.gather(*tasks)
-        # Handle results and retry logic
-        has_new_results = False
-        for (filename, img, current_retries), result in zip(assigned_images, results):
-            if result:
-                # Success - image was processed
-                processed_images[course].add(filename)
-                course_captions[course].append(result)
-                has_new_results = True
-                processed_in_this_run += 1
-                print(f"✓ Successfully processed {filename}")
-            else:
-                # Failure - check if we should retry
-                if current_retries < 5:  # max_retries
-                    # Put back in pending for retry with incremented retry count
-                    pending_images[filename] = {
-                        'image': img,
-                        'retries': current_retries + 1,
-                        'max_retries': 5
-                    }
-                    print(f"↻ Retry {current_retries + 1}/5 for {filename}")
-                else:
-                    # Max retries exceeded, mark as failed
-                    failed_images[course].add(filename)
-                    print(f"✗ Failed to process {filename} after 5 retries")
-        # Save progress after each batch with new results
-        if has_new_results:
-            save_captions_to_file(course, course_captions[course])
-        # Show progress
-        total = len(images)
-        done = len(processed_images[course])
-        failed_count = len(failed_images[course])
-        pending_count = len(pending_images)
-        progress_percent = (done / total * 100) if total > 0 else 0
-        print(f"\rProgress: {done}/{total} ({progress_percent:.1f}%) - {pending_count} pending, {failed_count} failed, {processed_in_this_run} new", end="", flush=True)
-        # Small delay to prevent overwhelming the servers
-        await asyncio.sleep(0.5)
-    # Final status for this course
-    total = len(images)
-    done = len(processed_images[course])
-    failed_count = len(failed_images[course])
-    if done + failed_count >= total:
-        if failed_count > 0:
-            print(f"\n✓ Course {course} completed with {failed_count} failed images")
-        else:
-            print(f"\n✓ Course {course} fully completed")
-        # Upload to Hugging Face when course is completed
-        if course_captions[course]:
-            print(f"📤 Uploading {len(course_captions[course])} captions to Hugging Face...")
-            success = await upload_to_huggingface(course, course_captions[course])
-            if success:
-                print(f"✅ Successfully uploaded {course} to Hugging Face")
-            else:
-                print(f"❌ Failed to upload {course} to Hugging Face")
-    else:
-        print(f"\n→ Course {course} partially completed: {done}/{total} processed, {failed_count} failed")
-async def processing_loop(specific_courses: Optional[List[str]] = None, continuous: bool = True):
-    """Main processing loop with proper error handling"""
-    global is_processing
-    # Get model information and verify Florence-2-large availability
-    model_info = await get_model_info()
-    print("\nCaption Servers:")
-    available_servers = []
-    for info, server in zip(model_info, servers):
-        server.model = info['model']
-        if MODEL_TYPE in info.get('model', ''):
-            available_servers.append(server)
-            print(f"✓ {server.url} confirmed {MODEL_TYPE}")
-        else:
-            print(f"✗ {server.url} using {server.model} - skipping (requires {MODEL_TYPE})")
-    if not available_servers:
-        print(f"\nError: No servers with {MODEL_TYPE} available!")
-        is_processing = False
-        return
-    # Update servers list to only use those with large model
-    processing_servers = available_servers
-    print(f"\nUsing {len(processing_servers)} servers with {MODEL_TYPE}")
-    # Check for existing caption files and report
-    existing_captions = list(CAPTIONS_DIR.glob("*_captions.json"))
-    if existing_captions:
-        print("\nFound existing caption files:")
-        for cap_file in existing_captions:
-            course = cap_file.stem.replace("_captions", "")
-            try:
-                with open(cap_file, 'r', encoding='utf-8') as f:
-                    captions = json.load(f)
-                    print(f"- {course}: {len(captions)} captions")
-            except Exception as e:
-                print(f"- Error reading {cap_file.name}: {e}")
-        print()
-    start_time = time.time()
-    iteration = 0
-    while is_processing:
-        try:
-            iteration += 1
-            print(f"\n{'='*50}")
-            print(f"Processing Iteration {iteration}")
-            print(f"{'='*50}")
-            # Get available courses
-            if specific_courses:
-                courses = specific_courses
-                print(f"Processing specific courses: {courses}")
-            else:
-                courses = await fetch_courses()
-                print(f"Found {len(courses)} courses")
-            if not courses:
-                print("No courses found, waiting...")
-                if not continuous:
-                    break
-                await asyncio.sleep(10)
-                continue
-            # Process each course with all available servers
-            for course in courses:
-                if not is_processing:
-                    break
-                print(f"\n--- Processing course: {course} ---")
-                # Before processing, ensure VS2 has finished extracting frames for this course
-                try:
-                    await wait_for_vs2_ready(course)
-                except Exception as e:
-                    print(f"Warning: error while checking VS2 readiness for {course}: {e}")
-                await process_course(course, processing_servers)
-            # Show server stats
-            print("\nServer Stats:")
-            total_processed = sum(s.total_processed for s in processing_servers)
-            elapsed = time.time() - start_time
-            if elapsed > 0:
-                print(f"Total images processed: {total_processed}")
-                print(f"Overall speed: {total_processed/elapsed:.2f} fps")
-                for s in processing_servers:
-                    print(f"- {s.url}: {s.total_processed} images, {s.fps:.2f} fps")
-            print()
-            if not continuous:
-                print("One-time processing completed")
-                break
-            # Wait before next check
-            print("Waiting for new courses...")
-            await asyncio.sleep(5)
-        except asyncio.CancelledError:
-            print("Processing cancelled")
-            break
-        except Exception as e:
-            print(f"Error in processing loop: {str(e)}")
-            import traceback
-            traceback.print_exc()
-            await asyncio.sleep(10)
-    is_processing = False
-    print("Processing loop stopped")
-# Startup event
-@app.on_event("startup")
-async def startup_event():
-    """Initialize servers and start processing on startup"""
-    initialize_servers()
-    print("Caption Coordinator API started")
-    print(f"Source server: {SOURCE_SERVER}")
-    print(f"Caption servers: {len(CAPTION_SERVERS)}")
-    print(f"Hugging Face dataset: {HF_DATASET_ID}")
-    print(f"HF Token: {'✅ Set' if HF_TOKEN else '❌ Missing'}")
-    # Start processing automatically (like original main())
-    if auto_start_processing:
-        print("Auto-starting processing loop...")
-        global is_processing, current_processing_task
-        is_processing = True
-        current_processing_task = asyncio.create_task(processing_loop())
-@app.post("/vs2/ready")
-async def vs2_ready(course: str, callback_url: str = None):
-    """Called by VS2 when it has finished extracting frames for a course.
-    VS2 should POST course (string) and its callback_url (where Flow will POST when captioning is done).
-    """
-    if not course:
-        raise HTTPException(status_code=400, detail="course is required")
-    if callback_url:
-        pending_vs2_callbacks[course] = callback_url
-        print(f"Registered VS2 callback for {course} -> {callback_url}")
-    # Acknowledge. The processing loop will discover the new course via SOURCE_SERVER /courses.
-    return {"status": "accepted", "course": course, "callback_url": callback_url}
-@app.get("/vs2/callbacks")
-async def list_vs2_callbacks():
-    """List pending VS2 callbacks (debug)"""
-    return pending_vs2_callbacks
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)

+import os
+import json
+import time
+import asyncio
+import aiohttp
+import zipfile
+from typing import Dict, List, Set, Optional
+from urllib.parse import quote
+from datetime import datetime
+from pathlib import Path
+import io
+from fastapi import FastAPI, BackgroundTasks, HTTPException, status
+from pydantic import BaseModel, Field
+from huggingface_hub import HfApi, hf_hub_download
+import uvicorn
+# --- Configuration ---
+# Flow Server ID and Port will be set via environment variables for easy deployment
+FLOW_ID = os.getenv("FLOW_ID", "flow_default")
+FLOW_PORT = int(os.getenv("FLOW_PORT", 8001)) # Default to 8001 for flow1
+# Manager Server Configuration
+MANAGER_URL = os.getenv("MANAGER_URL", "http://localhost:8000")
+MANAGER_COMPLETE_TASK_URL = f"{MANAGER_URL}/task/complete"
+# Hugging Face Configuration
+HF_TOKEN = os.getenv("HF_TOKEN", "") # User provided token
+HF_DATASET_ID = os.getenv("HF_DATASET_ID", "Fred808/BG3")
+HF_OUTPUT_DATASET_ID = os.getenv("HF_OUTPUT_DATASET_ID", "fred808/helium") # Target dataset for captions
+# Using the full list from the user's original code for actual deployment
+CAPTION_SERVERS = [
+    "https://fred808-pil-4-1.hf.space/analyze",
+    "https://fred808-pil-4-2.hf.space/analyze",
+    "https://fred808-pil-4-3.hf.space/analyze",
+    "https://fred1012-fred1012-gw0j2h.hf.space/analyze",
+    "https://fred1012-fred1012-wqs6c2.hf.space/analyze",
+    "https://fred1012-fred1012-oncray.hf.space/analyze",
+    "https://fred1012-fred1012-4goge7.hf.space/analyze",
+    "https://fred1012-fred1012-z0eh7m.hf.space/analyze",
+    "https://fred1012-fred1012-u95rte.hf.space/analyze",
+    "https://fred1012-fred1012-igje22.hf.space/analyze",
+    "https://fred1012-fred1012-ibkuf8.hf.space/analyze",
+    "https://fred1012-fred1012-nwqthy.hf.space/analyze",
+    "https://fred1012-fred1012-4ldqj4.hf.space/analyze",
+    "https://fred1012-fred1012-pivlzg.hf.space/analyze",
+    "https://fred1012-fred1012-ptlc5u.hf.space/analyze",
+    "https://fred1012-fred1012-u7lh57.hf.space/analyze",
+    "https://fred1012-fred1012-q8djv1.hf.space/analyze",
+    "https://fredalone-fredalone-ozugrp.hf.space/analyze",
+    "https://fredalone-fredalone-9brxj2.hf.space/analyze",
+    "https://fredalone-fredalone-p8vq9a.hf.space/analyze",
+    "https://fredalone-fredalone-vbli2y.hf.space/analyze",
+    "https://fredalone-fredalone-uggger.hf.space/analyze",
+    "https://fredalone-fredalone-nmi7e8.hf.space/analyze",
+    "https://fredalone-fredalone-d1f26d.hf.space/analyze",
+    "https://fredalone-fredalone-461jp2.hf.space/analyze",
+    "https://fredalone-fredalone-3enfg4.hf.space/analyze",
+    "https://fredalone-fredalone-dqdbpv.hf.space/analyze",
+    "https://fredalone-fredalone-ivtjua.hf.space/analyze",
+    "https://fredalone-fredalone-6bezt2.hf.space/analyze",
+    "https://fredalone-fredalone-e0wfnk.hf.space/analyze",
+    "https://fredalone-fredalone-zu2t7j.hf.space/analyze",
+    "https://fredalone-fredalone-dqtv1o.hf.space/analyze",
+    "https://fredalone-fredalone-wclyog.hf.space/analyze",
+    "https://fredalone-fredalone-t27vig.hf.space/analyze",
+    "https://fredalone-fredalone-gahbxh.hf.space/analyze",
+    "https://fredalone-fredalone-kw2po4.hf.space/analyze",
+    "https://fredalone-fredalone-8h285h.hf.space/analyze"
+]
+MODEL_TYPE = "Florence-2-large"
+# Temporary storage for images
+TEMP_DIR = Path(f"temp_images_{FLOW_ID}")
+TEMP_DIR.mkdir(exist_ok=True)
+# --- Models ---
+class ProcessCourseRequest(BaseModel):
+    course_name: Optional[str] = None
+class CaptionServer:
+    def __init__(self, url):
+        self.url = url
+        self.busy = False
+        self.total_processed = 0
+        self.total_time = 0
+        self.model = MODEL_TYPE
+    @property
+    def fps(self):
+        return self.total_processed / self.total_time if self.total_time > 0 else 0
+# Global state for caption servers
+servers = [CaptionServer(url) for url in CAPTION_SERVERS]
+server_index = 0
+# --- Core Processing Functions ---
+async def get_available_server() -> CaptionServer:
+    """Round-robin selection of an available caption server."""
+    global server_index
+    start_index = server_index
+    while True:
+        server = servers[server_index]
+        server_index = (server_index + 1) % len(servers)
+        if not server.busy:
+            return server
+        # If we've checked all servers and they are all busy, wait and try again
+        if server_index == start_index:
+            await asyncio.sleep(0.5)
+async def send_image_for_captioning(image_path: Path, course_name: str, server: CaptionServer) -> Optional[Dict]:
+    """Sends a single image to a caption server for processing."""
+    server.busy = True
+    start_time = time.time()
+    try:
+        # The caption server expects a file upload
+        files = {'file': (image_path.name, image_path.open('rb'), 'image/jpeg')}
+        # The caption server also expects a model_choice field in the data
+        data = {'model_choice': MODEL_TYPE}
+        async with aiohttp.ClientSession() as session:
+            async with session.post(server.url, data=data, files=files, timeout=600) as resp:
+                if resp.status == 200:
+                    result = await resp.json()
+                    caption = result.get("caption")
+                    if caption:
+                        return {
+                            "course": course_name,
+                            "image_path": image_path.name,
+                            "caption": caption,
+                            "timestamp": datetime.now().isoformat()
+                        }
+                    else:
+                        print(f"Server {server.url} returned success but no caption for {image_path.name}.")
+                        return None
+                else:
+                    error_text = await resp.text()
+                    print(f"Error from server {server.url} for {image_path.name}: {resp.status} - {error_text}")
+                    return None
+    except aiohttp.ClientError as e:
+        print(f"Client error connecting to {server.url}: {e}")
+        return None
+    except asyncio.TimeoutError:
+        print(f"Timeout while waiting for response from {server.url}")
+        return None
+    except Exception as e:
+        print(f"Unexpected error during captioning for {image_path.name}: {e}")
+        return None
+    finally:
+        end_time = time.time()
+        server.busy = False
+        server.total_processed += 1
+        server.total_time += (end_time - start_time)
+async def download_and_extract_zip(course_name: str) -> Optional[Path]:
+    """Downloads the zip file for the course and extracts its contents."""
+    zip_filename = f"{course_name}.zip"
+    repo_file = f"frames/{zip_filename}"
+    print(f"[{FLOW_ID}] Downloading {repo_file} from {HF_DATASET_ID}...")
+    try:
+        # Use hf_hub_download to get the file path
+        zip_path = hf_hub_download(
+            repo_id=HF_DATASET_ID,
+            filename=repo_file,
+            repo_type="dataset",
+            token=HF_TOKEN,
+        )
+        print(f"[{FLOW_ID}] Downloaded to {zip_path}. Extracting...")
+        # Create a temporary directory for extraction
+        extract_dir = TEMP_DIR / course_name
+        extract_dir.mkdir(exist_ok=True)
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(extract_dir)
+        print(f"[{FLOW_ID}] Extraction complete to {extract_dir}.")
+        return extract_dir
+    except Exception as e:
+        print(f"[{FLOW_ID}] Error downloading or extracting zip for {course_name}: {e}")
+        return None
+async def upload_captions_to_hf(course_name: str, captions: List[Dict]) -> bool:
+    """Uploads the final captions JSON file to the output dataset."""
+    caption_filename = f"{course_name}_captions.json"
+    try:
+        print(f"[{FLOW_ID}] Uploading {len(captions)} captions for {course_name} to {HF_OUTPUT_DATASET_ID}...")
+        # Create JSON content in memory
+        json_content = json.dumps(captions, indent=2, ensure_ascii=False).encode('utf-8')
+        api = HfApi(token=HF_TOKEN)
+        api.upload_file(
+            path_or_fileobj=io.BytesIO(json_content),
+            path_in_repo=caption_filename,
+            repo_id=HF_OUTPUT_DATASET_ID,
+            repo_type="dataset",
+            commit_message=f"[{FLOW_ID}] Captions for {course_name}"
+        )
+        print(f"[{FLOW_ID}] Successfully uploaded captions for {course_name}.")
+        return True
+    except Exception as e:
+        print(f"[{FLOW_ID}] Error uploading captions for {course_name}: {e}")
+        return False
+async def process_course_task(course_name: str):
+    """Main task to process a single course."""
+    print(f"[{FLOW_ID}] Starting processing for course: {course_name}")
+    extract_dir = None
+    success = False
+    error_message = None
+    all_captions = []
+    try:
+        extract_dir = await download_and_extract_zip(course_name)
+        if not extract_dir:
+            raise Exception("Failed to download or extract zip file.")
+        image_paths = [p for p in extract_dir.glob("*") if p.suffix.lower() in ['.jpg', '.jpeg', '.png']]
+        print(f"[{FLOW_ID}] Found {len(image_paths)} images to process.")
+        if not image_paths:
+            print(f"[{FLOW_ID}] No images found in {course_name}. Marking as complete.")
+            success = True
+        else:
+            # Create a list of tasks for parallel captioning
+            caption_tasks = []
+            for image_path in image_paths:
+                server = await get_available_server()
+                caption_tasks.append(send_image_for_captioning(image_path, course_name, server))
+            # Run all captioning tasks concurrently
+            results = await asyncio.gather(*caption_tasks)
+            # Filter out failed results
+            all_captions = [r for r in results if r is not None]
+            if len(all_captions) == len(image_paths):
+                print(f"[{FLOW_ID}] Successfully captioned all {len(all_captions)} images.")
+                success = True
+            elif len(all_captions) > 0:
+                print(f"[{FLOW_ID}] Completed with {len(all_captions)}/{len(image_paths)} captions. Proceeding with partial result.")
+                success = True # Consider partial success as success for now
+            else:
+                error_message = "All captioning attempts failed."
+                success = False
+        # Upload captions if successful (even partial success)
+        if success and all_captions:
+            if not await upload_captions_to_hf(course_name, all_captions):
+                error_message = "Failed to upload captions to Hugging Face."
+                success = False
+    except Exception as e:
+        error_message = str(e)
+        success = False
+        print(f"[{FLOW_ID}] Critical error during processing: {e}")
+    finally:
+        # Clean up temporary files
+        if extract_dir and extract_dir.exists():
+            import shutil
+            shutil.rmtree(extract_dir, ignore_errors=True)
+            print(f"[{FLOW_ID}] Cleaned up temporary directory {extract_dir}.")
+        # Report back to the Manager
+        await report_completion(course_name, success, error_message)
+async def report_completion(course_name: str, success: bool, error_message: Optional[str] = None):
+    """Reports the task result back to the Manager Server."""
+    print(f"[{FLOW_ID}] Reporting completion for {course_name} (Success: {success})...")
+    payload = {
+        "flow_id": FLOW_ID,
+        "course_name": course_name,
+        "success": success,
+        "error_message": error_message
+    }
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(MANAGER_COMPLETE_TASK_URL, json=payload) as resp:
+                if resp.status != 200:
+                    print(f"[{FLOW_ID}] ERROR: Manager reported non-200 status: {resp.status} - {await resp.text()}")
+                else:
+                    print(f"[{FLOW_ID}] Successfully reported completion to Manager.")
+    except aiohttp.ClientError as e:
+        print(f"[{FLOW_ID}] CRITICAL ERROR: Could not connect to Manager at {MANAGER_COMPLETE_TASK_URL}. Task completion not reported. Error: {e}")
+    except Exception as e:
+        print(f"[{FLOW_ID}] Unexpected error during reporting: {e}")
+# --- FastAPI App and Endpoints ---
+app = FastAPI(
+    title=f"Flow Server {FLOW_ID} API",
+    description="Fetches, extracts, and captions images for a given course.",
+    version="1.0.0"
+)
+@app.on_event("startup")
+async def startup_event():
+    print(f"Flow Server {FLOW_ID} started on port {FLOW_PORT}. Manager URL: {MANAGER_URL}")
+@app.get("/")
+async def root():
+    return {
+        "flow_id": FLOW_ID,
+        "status": "ready",
+        "manager_url": MANAGER_URL,
+        "total_servers": len(servers),
+        "busy_servers": sum(1 for s in servers if s.busy),
+    }
+@app.post("/process_course")
+async def process_course(request: ProcessCourseRequest, background_tasks: BackgroundTasks):
+    """
+    Receives a course name from the Manager and starts processing in the background.
+    """
+    course_name = request.course_name
+    if not course_name:
+        print(f"[{FLOW_ID}] Received empty course name. Stopping processing loop.")
+        return {"status": "stopped", "message": "No more courses to process."}
+    print(f"[{FLOW_ID}] Received course: {course_name}. Starting background task.")
+    # Start the heavy processing in a background task so the API call returns immediately
+    background_tasks.add_task(process_course_task, course_name)
+    return {"status": "processing", "course_name": course_name, "message": "Processing started in background."}
+if __name__ == "__main__":
+    # Note: When running in the sandbox, we need to use 0.0.0.0 to expose the port.
+    uvicorn.run(app, host="0.0.0.0", port=FLOW_PORT)