Spaces:

ch404
/

cardserver

Sleeping

GitHub Actions commited on Jun 15, 2025

Commit

88d625f

1 Parent(s): 175fee8

🚀 Auto-deploy from GitHub

Files changed (2) hide show

app/api/v1/endpoints/health.py CHANGED Viewed

@@ -13,6 +13,7 @@ router = APIRouter()
 class HealthResponse(BaseModel):
     status: str
     server: str
     huggingface_space: Optional[str] = None
     huggingface_space_url: Optional[str] = None
@@ -33,15 +34,26 @@ async def check_huggingface_space():
 @router.get("/health", response_model=HealthResponse)
 async def health_check():
     """
-    Health check endpoint that verifies server status and HuggingFace space availability
     """
     try:
         # Check HuggingFace space if configured
         hf_status = await check_huggingface_space()
         return HealthResponse(
             status="healthy",
             server="running",
             huggingface_space=hf_status,
             huggingface_space_url=HF_SPACE_URL
         )

 class HealthResponse(BaseModel):
     status: str
     server: str
+    model_status: str
     huggingface_space: Optional[str] = None
     huggingface_space_url: Optional[str] = None
 @router.get("/health", response_model=HealthResponse)
 async def health_check():
     """
+    Health check endpoint that verifies server status, model loading status and HuggingFace space availability
     """
     try:
+        # Check model loading status
+        model_status = "loading"
+        try:
+            from ....core.model_loader import get_generator
+            # Try to get the generator - if it's cached, this will be instant
+            generator = get_generator()
+            model_status = "ready" if generator else "loading"
+        except Exception as e:
+            model_status = f"error: {str(e)[:50]}..."
         # Check HuggingFace space if configured
         hf_status = await check_huggingface_space()
         return HealthResponse(
             status="healthy",
             server="running",
+            model_status=model_status,
             huggingface_space=hf_status,
             huggingface_space_url=HF_SPACE_URL
         )

app/main.py CHANGED Viewed

@@ -24,15 +24,29 @@ settings.resolved_static_files_mount_dir.mkdir(parents=True, exist_ok=True)
 # Lifecycle management for the model
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Startup: Preload the model
-    logger.info("Anwendung startet... Lade das LLM-Modell vorab.")
     try:
-        get_generator()  # Calls get_generator to load and cache the model
-        logger.info("LLM-Modell erfolgreich vorab geladen und Pipeline initialisiert.")
     except Exception as e:
-        logger.error(f"Fehler beim Vorabladen des LLM-Modells: {e}", exc_info=True)
-        # Decide whether to prevent the application from starting
-        # raise # Uncomment to prevent startup on error
     yield
     # Shutdown: Cleanup actions could go here (not currently needed for the model)
     logger.info("Anwendung wird heruntergefahren.")

 # Lifecycle management for the model
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    # Startup: Preload the model (non-blocking)
+    logger.info("Anwendung startet... Starte LLM-Modell-Loading im Hintergrund.")
     try:
+        # Start model loading in background to avoid blocking app startup
+        import asyncio
+        import threading
+        def load_model_background():
+            try:
+                logger.info("Hintergrund-Loading des LLM-Modells gestartet...")
+                get_generator()  # Calls get_generator to load and cache the model
+                logger.info("✅ LLM-Modell erfolgreich vorab geladen und Pipeline initialisiert.")
+            except Exception as e:
+                logger.error(f"❌ Fehler beim Hintergrund-Loading des LLM-Modells: {e}", exc_info=True)
+        # Start model loading in a separate thread
+        model_thread = threading.Thread(target=load_model_background, daemon=True)
+        model_thread.start()
+        logger.info("🚀 Anwendung gestartet - Modell lädt im Hintergrund...")
     except Exception as e:
+        logger.error(f"Fehler beim Starten des Hintergrund-Loadings: {e}", exc_info=True)
     yield
     # Shutdown: Cleanup actions could go here (not currently needed for the model)
     logger.info("Anwendung wird heruntergefahren.")