Spaces:

Fred808
/

Flow

Paused

App Files Files Community

Fred808 commited on Oct 21, 2025

Commit

a1e320e

verified ·

1 Parent(s): 10dec39

Update app.py

Browse files

Files changed (1) hide show

app.py +262 -65

app.py CHANGED Viewed

@@ -3,11 +3,16 @@ import json
 import time
 import asyncio
 import aiohttp
-from typing import Dict, List, Set
 from urllib.parse import quote, urljoin
 from datetime import datetime
 from pathlib import Path
 # Path for storing caption data
 CAPTIONS_DIR = Path("captions_data")
 CAPTIONS_DIR.mkdir(exist_ok=True)
@@ -58,34 +63,225 @@ CAPTION_SERVERS = [
     "https://fred1012-fred1012-ptlc5u.hf.space/analyze",
     "https://fred1012-fred1012-u7lh57.hf.space/analyze",
     "https://fred1012-fred1012-q8djv1.hf.space/analyze",
-  "https://fredalone-fredalone-ozugrp.hf.space/analyze",
-  "https://fredalone-fredalone-9brxj2.hf.space/analyze",
-  "https://fredalone-fredalone-p8vq9a.hf.space/analyze",
-  "https://fredalone-fredalone-vbli2y.hf.space/analyze",
-  "https://fredalone-fredalone-uggger.hf.space/analyze",
-  "https://fredalone-fredalone-nmi7e8.hf.space/analyze",
-  "https://fredalone-fredalone-d1f26d.hf.space/analyze",
-  "https://fredalone-fredalone-461jp2.hf.space/analyze",
-  "https://fredalone-fredalone-3enfg4.hf.space/analyze",
-  "https://fredalone-fredalone-dqdbpv.hf.space/analyze",
-  "https://fredalone-fredalone-ivtjua.hf.space/analyze",
-  "https://fredalone-fredalone-6bezt2.hf.space/analyze",
-  "https://fredalone-fredalone-e0wfnk.hf.space/analyze",
-  "https://fredalone-fredalone-zu2t7j.hf.space/analyze",
-  "https://fredalone-fredalone-dqtv1o.hf.space/analyze",
-  "https://fredalone-fredalone-wclyog.hf.space/analyze",
-  "https://fredalone-fredalone-t27vig.hf.space/analyze",
-  "https://fredalone-fredalone-gahbxh.hf.space/analyze",
-  "https://fredalone-fredalone-kw2po4.hf.space/analyze",
-  "https://fredalone-fredalone-8h285h.hf.space/analyze"
 ]
 MODEL_TYPE = "Florence-2-large"  # Explicitly request large model
 DATA_COLLECTION_SERVER = "https://fred808-flow.hf.space"
-# Tracking state
 processed_images: Dict[str, Set[str]] = {}  # {course: set(image_names)}
 course_captions: Dict[str, List[Dict]] = {}  # {course: [{image, caption, metadata}]}
 async def fetch_courses() -> List[str]:
     """Fetch available courses from source server"""
     async with aiohttp.ClientSession() as session:
@@ -137,18 +333,6 @@ async def get_model_info():
                 print(f"Couldn't get model info from {server}: {e}")
     return model_info
-class CaptionServer:
-    def __init__(self, url):
-        self.url = url
-        self.busy = False
-        self.model = "unknown"
-        self.total_processed = 0
-        self.total_time = 0
-    @property
-    def fps(self):
-        return self.total_processed / self.total_time if self.total_time > 0 else 0
 async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict:
     """Process single image through one caption server"""
     if server.busy:
@@ -230,7 +414,7 @@ async def process_course(course: str, servers: List[CaptionServer]):
     print(f"\nProcessing {len(images)} images for course {course}")
     remaining_images = [img for img in images if img['filename'] not in processed_images[course]]
-    while remaining_images:
         # Create tasks for each available server
         tasks = []
         for server in servers:
@@ -270,23 +454,9 @@ async def process_course(course: str, servers: List[CaptionServer]):
             course_captions[course].clear()
             break
-async def main():
-    # Initialize caption servers
-    servers = [CaptionServer(url) for url in CAPTION_SERVERS]
-    # Check for existing caption files and report
-    existing_captions = list(CAPTIONS_DIR.glob("*_captions.json"))
-    if existing_captions:
-        print("\nFound existing caption files:")
-        for cap_file in existing_captions:
-            course = cap_file.stem.replace("_captions", "")
-            try:
-                with open(cap_file, 'r', encoding='utf-8') as f:
-                    captions = json.load(f)
-                    print(f"- {course}: {len(captions)} captions")
-            except Exception as e:
-                print(f"- Error reading {cap_file.name}: {e}")
-        print()
     # Get model information and verify Florence-2-large availability
     model_info = await get_model_info()
@@ -302,21 +472,28 @@ async def main():
     if not available_servers:
         print(f"\nError: No servers with {MODEL_TYPE} available!")
         return
     # Update servers list to only use those with large model
-    servers = available_servers
-    print(f"\nUsing {len(servers)} servers with {MODEL_TYPE}")
     print()
     start_time = time.time()
-    while True:
         try:
             # Get available courses
-            courses = await fetch_courses()
             if not courses:
                 print("No courses found, waiting...")
                 await asyncio.sleep(10)
                 continue
@@ -324,30 +501,50 @@ async def main():
             # Process each course with all available servers
             for course in courses:
-                await process_course(course, servers)
                 # Show server stats
                 print("\nServer Stats:")
-                total_processed = sum(s.total_processed for s in servers)
                 elapsed = time.time() - start_time
                 if elapsed > 0:
                     print(f"Total images processed: {total_processed}")
                     print(f"Overall speed: {total_processed/elapsed:.2f} fps")
-                    for s in servers:
                         print(f"- {s.url}: {s.total_processed} images, {s.fps:.2f} fps")
                 print()
             # Wait before next check
             await asyncio.sleep(5)
         except Exception as e:
-            print(f"Error in main loop: {e}")
             await asyncio.sleep(10)
-if __name__ == "__main__":
-    print("Starting caption coordinator...")
     print(f"Source server: {SOURCE_SERVER}")
-    print(f"Caption servers: {CAPTION_SERVERS}")
     print(f"Dataset server: {DATA_COLLECTION_SERVER}")
-    asyncio.run(main())

 import time
 import asyncio
 import aiohttp
+from typing import Dict, List, Set, Optional
 from urllib.parse import quote, urljoin
 from datetime import datetime
 from pathlib import Path
+from fastapi import FastAPI, BackgroundTasks, HTTPException, status
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+import uvicorn
 # Path for storing caption data
 CAPTIONS_DIR = Path("captions_data")
 CAPTIONS_DIR.mkdir(exist_ok=True)
     "https://fred1012-fred1012-ptlc5u.hf.space/analyze",
     "https://fred1012-fred1012-u7lh57.hf.space/analyze",
     "https://fred1012-fred1012-q8djv1.hf.space/analyze",
+    "https://fredalone-fredalone-ozugrp.hf.space/analyze",
+    "https://fredalone-fredalone-9brxj2.hf.space/analyze",
+    "https://fredalone-fredalone-p8vq9a.hf.space/analyze",
+    "https://fredalone-fredalone-vbli2y.hf.space/analyze",
+    "https://fredalone-fredalone-uggger.hf.space/analyze",
+    "https://fredalone-fredalone-nmi7e8.hf.space/analyze",
+    "https://fredalone-fredalone-d1f26d.hf.space/analyze",
+    "https://fredalone-fredalone-461jp2.hf.space/analyze",
+    "https://fredalone-fredalone-3enfg4.hf.space/analyze",
+    "https://fredalone-fredalone-dqdbpv.hf.space/analyze",
+    "https://fredalone-fredalone-ivtjua.hf.space/analyze",
+    "https://fredalone-fredalone-6bezt2.hf.space/analyze",
+    "https://fredalone-fredalone-e0wfnk.hf.space/analyze",
+    "https://fredalone-fredalone-zu2t7j.hf.space/analyze",
+    "https://fredalone-fredalone-dqtv1o.hf.space/analyze",
+    "https://fredalone-fredalone-wclyog.hf.space/analyze",
+    "https://fredalone-fredalone-t27vig.hf.space/analyze",
+    "https://fredalone-fredalone-gahbxh.hf.space/analyze",
+    "https://fredalone-fredalone-kw2po4.hf.space/analyze",
+    "https://fredalone-fredalone-8h285h.hf.space/analyze"
 ]
 MODEL_TYPE = "Florence-2-large"  # Explicitly request large model
 DATA_COLLECTION_SERVER = "https://fred808-flow.hf.space"
+# FastAPI Models
+class CourseInfo(BaseModel):
+    course_folder: str
+class ImageInfo(BaseModel):
+    filename: str
+class CaptionRequest(BaseModel):
+    image_url: str
+    model_choice: str = MODEL_TYPE
+class CaptionResponse(BaseModel):
+    success: bool
+    caption: Optional[str] = None
+    error: Optional[str] = None
+class ServerStatus(BaseModel):
+    url: str
+    model: str
+    busy: bool
+    total_processed: int
+    total_time: float
+    fps: float
+class ProcessingStatus(BaseModel):
+    course: str
+    total_images: int
+    processed_images: int
+    progress_percent: float
+    status: str
+class StartProcessingRequest(BaseModel):
+    courses: Optional[List[str]] = None  # If None, process all courses
+    continuous: bool = False
+# FastAPI App
+app = FastAPI(
+    title="Caption Coordinator API",
+    description="Distributed caption processing coordinator",
+    version="1.0.0"
+)
+# Global state
 processed_images: Dict[str, Set[str]] = {}  # {course: set(image_names)}
 course_captions: Dict[str, List[Dict]] = {}  # {course: [{image, caption, metadata}]}
+servers = []
+is_processing = False
+current_processing_task = None
+class CaptionServer:
+    def __init__(self, url):
+        self.url = url
+        self.busy = False
+        self.model = "unknown"
+        self.total_processed = 0
+        self.total_time = 0
+    @property
+    def fps(self):
+        return self.total_processed / self.total_time if self.total_time > 0 else 0
+# Initialize servers
+def initialize_servers():
+    global servers
+    servers = [CaptionServer(url) for url in CAPTION_SERVERS]
+# API Routes
+@app.get("/")
+async def root():
+    return {"message": "Caption Coordinator API", "status": "running"}
+@app.get("/health")
+async def health():
+    return {
+        "status": "healthy",
+        "servers_available": len([s for s in servers if not s.busy]),
+        "total_servers": len(servers),
+        "is_processing": is_processing
+    }
+@app.get("/courses")
+async def get_courses():
+    """Fetch available courses from source server"""
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{SOURCE_SERVER}/courses") as resp:
+                data = await resp.json()
+                if isinstance(data, dict) and 'courses' in data:
+                    return [c['course_folder'] for c in data['courses'] if isinstance(c, dict)]
+                return []
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error fetching courses: {e}")
+@app.get("/courses/{course}/images")
+async def get_course_images(course: str):
+    """Fetch images list for a course"""
+    try:
+        course_frames = f"{course}_frames" if not course.endswith("_frames") else course
+        url = f"{SOURCE_SERVER}/images/{quote(course_frames)}"
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url) as resp:
+                data = await resp.json()
+                if isinstance(data, dict) and 'images' in data:
+                    return data['images']
+                return []
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error fetching images: {e}")
+@app.get("/servers/status")
+async def get_servers_status():
+    """Get status of all caption servers"""
+    server_statuses = []
+    for server in servers:
+        server_statuses.append(ServerStatus(
+            url=server.url,
+            model=server.model,
+            busy=server.busy,
+            total_processed=server.total_processed,
+            total_time=server.total_time,
+            fps=server.fps
+        ))
+    return server_statuses
+@app.get("/processing/status")
+async def get_processing_status():
+    """Get current processing status"""
+    status_info = {}
+    for course in processed_images:
+        total = len(processed_images[course])
+        processed = len(course_captions.get(course, []))
+        status_info[course] = ProcessingStatus(
+            course=course,
+            total_images=total,
+            processed_images=processed,
+            progress_percent=(processed / total * 100) if total > 0 else 0,
+            status="processing" if processed < total else "completed"
+        )
+    return status_info
+@app.post("/processing/start")
+async def start_processing(request: StartProcessingRequest, background_tasks: BackgroundTasks):
+    """Start caption processing"""
+    global is_processing, current_processing_task
+    if is_processing:
+        raise HTTPException(status_code=400, detail="Processing is already running")
+    is_processing = True
+    current_processing_task = asyncio.create_task(processing_loop(request.courses, request.continuous))
+    return {"message": "Processing started", "continuous": request.continuous}
+@app.post("/processing/stop")
+async def stop_processing():
+    """Stop caption processing"""
+    global is_processing, current_processing_task
+    if not is_processing:
+        raise HTTPException(status_code=400, detail="Processing is not running")
+    is_processing = False
+    if current_processing_task:
+        current_processing_task.cancel()
+        current_processing_task = None
+    return {"message": "Processing stopped"}
+@app.get("/captions/{course}")
+async def get_captions(course: str):
+    """Get captions for a specific course"""
+    captions = load_captions_from_file(course)
+    return {
+        "course": course,
+        "total_captions": len(captions),
+        "captions": captions
+    }
+@app.delete("/captions/{course}")
+async def delete_captions(course: str):
+    """Delete captions for a specific course"""
+    try:
+        file_path = get_caption_file_path(course)
+        if file_path.exists():
+            file_path.unlink()
+            if course in processed_images:
+                del processed_images[course]
+            if course in course_captions:
+                del course_captions[course]
+            return {"message": f"Captions for {course} deleted"}
+        else:
+            raise HTTPException(status_code=404, detail=f"No captions found for {course}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error deleting captions: {e}")
+# Core processing functions (same as original)
 async def fetch_courses() -> List[str]:
     """Fetch available courses from source server"""
     async with aiohttp.ClientSession() as session:
                 print(f"Couldn't get model info from {server}: {e}")
     return model_info
 async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict:
     """Process single image through one caption server"""
     if server.busy:
     print(f"\nProcessing {len(images)} images for course {course}")
     remaining_images = [img for img in images if img['filename'] not in processed_images[course]]
+    while remaining_images and is_processing:
         # Create tasks for each available server
         tasks = []
         for server in servers:
             course_captions[course].clear()
             break
+async def processing_loop(specific_courses: Optional[List[str]] = None, continuous: bool = False):
+    """Main processing loop"""
+    global is_processing
     # Get model information and verify Florence-2-large availability
     model_info = await get_model_info()
     if not available_servers:
         print(f"\nError: No servers with {MODEL_TYPE} available!")
+        is_processing = False
         return
     # Update servers list to only use those with large model
+    processing_servers = available_servers
+    print(f"\nUsing {len(processing_servers)} servers with {MODEL_TYPE}")
     print()
     start_time = time.time()
+    while is_processing:
         try:
             # Get available courses
+            if specific_courses:
+                courses = specific_courses
+            else:
+                courses = await fetch_courses()
             if not courses:
                 print("No courses found, waiting...")
+                if not continuous:
+                    break
                 await asyncio.sleep(10)
                 continue
             # Process each course with all available servers
             for course in courses:
+                if not is_processing:
+                    break
+                await process_course(course, processing_servers)
                 # Show server stats
                 print("\nServer Stats:")
+                total_processed = sum(s.total_processed for s in processing_servers)
                 elapsed = time.time() - start_time
                 if elapsed > 0:
                     print(f"Total images processed: {total_processed}")
                     print(f"Overall speed: {total_processed/elapsed:.2f} fps")
+                    for s in processing_servers:
                         print(f"- {s.url}: {s.total_processed} images, {s.fps:.2f} fps")
                 print()
+            if not continuous:
+                break
             # Wait before next check
             await asyncio.sleep(5)
+        except asyncio.CancelledError:
+            print("Processing cancelled")
+            break
         except Exception as e:
+            print(f"Error in processing loop: {e}")
             await asyncio.sleep(10)
+    is_processing = False
+# Startup event
+@app.on_event("startup")
+async def startup_event():
+    """Initialize servers on startup"""
+    initialize_servers()
+    print("Caption Coordinator API started")
     print(f"Source server: {SOURCE_SERVER}")
+    print(f"Caption servers: {len(CAPTION_SERVERS)}")
     print(f"Dataset server: {DATA_COLLECTION_SERVER}")
+if __name__ == "__main__":
+    uvicorn.run(
+        "app:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True
+    )