Spaces:

Fred808
/

Flow

Paused

App Files Files Community

Fred808 commited on Oct 21, 2025

Commit

6beb7ba

verified ·

1 Parent(s): a1e320e

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -22

app.py CHANGED Viewed

@@ -120,7 +120,7 @@ class ProcessingStatus(BaseModel):
 class StartProcessingRequest(BaseModel):
     courses: Optional[List[str]] = None  # If None, process all courses
-    continuous: bool = False
 # FastAPI App
 app = FastAPI(
@@ -135,6 +135,7 @@ course_captions: Dict[str, List[Dict]] = {}  # {course: [{image, caption, metada
 servers = []
 is_processing = False
 current_processing_task = None
 class CaptionServer:
     def __init__(self, url):
@@ -156,7 +157,12 @@ def initialize_servers():
 # API Routes
 @app.get("/")
 async def root():
-    return {"message": "Caption Coordinator API", "status": "running"}
 @app.get("/health")
 async def health():
@@ -164,7 +170,8 @@ async def health():
         "status": "healthy",
         "servers_available": len([s for s in servers if not s.busy]),
         "total_servers": len(servers),
-        "is_processing": is_processing
     }
 @app.get("/courses")
@@ -227,7 +234,7 @@ async def get_processing_status():
     return status_info
 @app.post("/processing/start")
-async def start_processing(request: StartProcessingRequest, background_tasks: BackgroundTasks):
     """Start caption processing"""
     global is_processing, current_processing_task
@@ -235,9 +242,15 @@ async def start_processing(request: StartProcessingRequest, background_tasks: Ba
         raise HTTPException(status_code=400, detail="Processing is already running")
     is_processing = True
-    current_processing_task = asyncio.create_task(processing_loop(request.courses, request.continuous))
-    return {"message": "Processing started", "continuous": request.continuous}
 @app.post("/processing/stop")
 async def stop_processing():
@@ -250,6 +263,10 @@ async def stop_processing():
     is_processing = False
     if current_processing_task:
         current_processing_task.cancel()
         current_processing_task = None
     return {"message": "Processing stopped"}
@@ -281,7 +298,7 @@ async def delete_captions(course: str):
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error deleting captions: {e}")
-# Core processing functions (same as original)
 async def fetch_courses() -> List[str]:
     """Fetch available courses from source server"""
     async with aiohttp.ClientSession() as session:
@@ -354,7 +371,10 @@ async def process_image(server: CaptionServer, course: str, image: Dict) -> Dict
             server.total_processed += 1
             metadata = {
                 "image": image['filename'],
-                "caption": result['caption']
             }
             print(f"Server {server.url} processed {image['filename']} in {processing_time:.2f}s ({server.fps:.2f} fps)")
             return metadata
@@ -378,7 +398,8 @@ async def submit_to_dataset(course: str, metadata_list: List[Dict]):
         "course": parent_folder,
         "metadata": {
             "course_name": course,
-            "image_count": len(metadata_list)
         },
         "captions": metadata_list
     }
@@ -409,11 +430,16 @@ async def process_course(course: str, servers: List[CaptionServer]):
     # Get list of images
     images = await fetch_course_images(course)
     if not images:
         return
     print(f"\nProcessing {len(images)} images for course {course}")
     remaining_images = [img for img in images if img['filename'] not in processed_images[course]]
     while remaining_images and is_processing:
         # Create tasks for each available server
         tasks = []
@@ -445,17 +471,20 @@ async def process_course(course: str, servers: List[CaptionServer]):
         # Show progress
         total = len(images)
         done = len(processed_images[course])
-        print(f"\rProgress: {done}/{total} images ({done/total*100:.1f}%)", end="")
         if not remaining_images and len(processed_images[course]) == len(images):
             print(f"\nCourse {course} complete, submitting to dataset...")
             await submit_to_dataset(course, course_captions[course])
-            processed_images[course].clear()
-            course_captions[course].clear()
             break
-async def processing_loop(specific_courses: Optional[List[str]] = None, continuous: bool = False):
-    """Main processing loop"""
     global is_processing
     # Get model information and verify Florence-2-large availability
@@ -478,7 +507,20 @@ async def processing_loop(specific_courses: Optional[List[str]] = None, continuo
     # Update servers list to only use those with large model
     processing_servers = available_servers
     print(f"\nUsing {len(processing_servers)} servers with {MODEL_TYPE}")
-    print()
     start_time = time.time()
@@ -517,9 +559,11 @@ async def processing_loop(specific_courses: Optional[List[str]] = None, continuo
                 print()
             if not continuous:
                 break
             # Wait before next check
             await asyncio.sleep(5)
         except asyncio.CancelledError:
@@ -530,21 +574,25 @@ async def processing_loop(specific_courses: Optional[List[str]] = None, continuo
             await asyncio.sleep(10)
     is_processing = False
 # Startup event
 @app.on_event("startup")
 async def startup_event():
-    """Initialize servers on startup"""
     initialize_servers()
     print("Caption Coordinator API started")
     print(f"Source server: {SOURCE_SERVER}")
     print(f"Caption servers: {len(CAPTION_SERVERS)}")
     print(f"Dataset server: {DATA_COLLECTION_SERVER}")
 if __name__ == "__main__":
-    uvicorn.run(
-        "app:app",
-        host="0.0.0.0",
-        port=8000,
-        reload=True
-    )

 class StartProcessingRequest(BaseModel):
     courses: Optional[List[str]] = None  # If None, process all courses
+    continuous: bool = True  # Default to continuous like original
 # FastAPI App
 app = FastAPI(
 servers = []
 is_processing = False
 current_processing_task = None
+auto_start_processing = True  # Set to False if you don't want auto-start
 class CaptionServer:
     def __init__(self, url):
 # API Routes
 @app.get("/")
 async def root():
+    return {
+        "message": "Caption Coordinator API",
+        "status": "running",
+        "auto_processing": auto_start_processing,
+        "is_processing": is_processing
+    }
 @app.get("/health")
 async def health():
         "status": "healthy",
         "servers_available": len([s for s in servers if not s.busy]),
         "total_servers": len(servers),
+        "is_processing": is_processing,
+        "auto_processing": auto_start_processing
     }
 @app.get("/courses")
     return status_info
 @app.post("/processing/start")
+async def start_processing(request: StartProcessingRequest = StartProcessingRequest()):
     """Start caption processing"""
     global is_processing, current_processing_task
         raise HTTPException(status_code=400, detail="Processing is already running")
     is_processing = True
+    current_processing_task = asyncio.create_task(
+        processing_loop(request.courses, request.continuous)
+    )
+    return {
+        "message": "Processing started",
+        "continuous": request.continuous,
+        "specific_courses": request.courses
+    }
 @app.post("/processing/stop")
 async def stop_processing():
     is_processing = False
     if current_processing_task:
         current_processing_task.cancel()
+        try:
+            await current_processing_task
+        except asyncio.CancelledError:
+            pass
         current_processing_task = None
     return {"message": "Processing stopped"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error deleting captions: {e}")
+# Core processing functions
 async def fetch_courses() -> List[str]:
     """Fetch available courses from source server"""
     async with aiohttp.ClientSession() as session:
             server.total_processed += 1
             metadata = {
                 "image": image['filename'],
+                "caption": result['caption'],
+                "server": server.url,
+                "processing_time": processing_time,
+                "timestamp": datetime.now().isoformat()
             }
             print(f"Server {server.url} processed {image['filename']} in {processing_time:.2f}s ({server.fps:.2f} fps)")
             return metadata
         "course": parent_folder,
         "metadata": {
             "course_name": course,
+            "image_count": len(metadata_list),
+            "completed_at": datetime.now().isoformat()
         },
         "captions": metadata_list
     }
     # Get list of images
     images = await fetch_course_images(course)
     if not images:
+        print(f"No images found for course {course}")
         return
     print(f"\nProcessing {len(images)} images for course {course}")
     remaining_images = [img for img in images if img['filename'] not in processed_images[course]]
+    if not remaining_images:
+        print(f"All images already processed for course {course}")
+        return
     while remaining_images and is_processing:
         # Create tasks for each available server
         tasks = []
         # Show progress
         total = len(images)
         done = len(processed_images[course])
+        progress_percent = (done / total * 100) if total > 0 else 0
+        print(f"\rProgress: {done}/{total} images ({progress_percent:.1f}%)", end="")
         if not remaining_images and len(processed_images[course]) == len(images):
             print(f"\nCourse {course} complete, submitting to dataset...")
             await submit_to_dataset(course, course_captions[course])
+            # Don't clear the data, keep it for API queries
             break
+        # Small delay to prevent overwhelming the servers
+        await asyncio.sleep(0.5)
+async def processing_loop(specific_courses: Optional[List[str]] = None, continuous: bool = True):
+    """Main processing loop - same as original main() function"""
     global is_processing
     # Get model information and verify Florence-2-large availability
     # Update servers list to only use those with large model
     processing_servers = available_servers
     print(f"\nUsing {len(processing_servers)} servers with {MODEL_TYPE}")
+    # Check for existing caption files and report
+    existing_captions = list(CAPTIONS_DIR.glob("*_captions.json"))
+    if existing_captions:
+        print("\nFound existing caption files:")
+        for cap_file in existing_captions:
+            course = cap_file.stem.replace("_captions", "")
+            try:
+                with open(cap_file, 'r', encoding='utf-8') as f:
+                    captions = json.load(f)
+                    print(f"- {course}: {len(captions)} captions")
+            except Exception as e:
+                print(f"- Error reading {cap_file.name}: {e}")
+        print()
     start_time = time.time()
                 print()
             if not continuous:
+                print("One-time processing completed")
                 break
             # Wait before next check
+            print("Waiting for new courses...")
             await asyncio.sleep(5)
         except asyncio.CancelledError:
             await asyncio.sleep(10)
     is_processing = False
+    print("Processing loop stopped")
 # Startup event
 @app.on_event("startup")
 async def startup_event():
+    """Initialize servers and start processing on startup"""
     initialize_servers()
     print("Caption Coordinator API started")
     print(f"Source server: {SOURCE_SERVER}")
     print(f"Caption servers: {len(CAPTION_SERVERS)}")
     print(f"Dataset server: {DATA_COLLECTION_SERVER}")
+    # Start processing automatically (like original main())
+    if auto_start_processing:
+        print("Auto-starting processing loop...")
+        global is_processing, current_processing_task
+        is_processing = True
+        current_processing_task = asyncio.create_task(processing_loop())
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)