Spaces:

pgits
/

tts-gpu-service

Sleeping

Peter Michael Gits Claude commited on Aug 22, 2025

Commit

acdec04

1 Parent(s): 5e08a8b

feat: Improve health endpoint with lazy loading explanation v0.2.1

- Add detailed health endpoint explaining ZeroGPU lazy loading strategy
- Add /preload endpoint for optional model preloading during testing
- Update status messages to clarify lazy loading is intentional and optimized
- Explain that model_loaded: false is expected behavior for ZeroGPU optimization
- Model loads automatically on first synthesis request to prevent GPU timeouts

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

__pycache__/app.cpython-313.pyc +0 -0
app.py +22 -4

__pycache__/app.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ

app.py CHANGED Viewed

@@ -333,10 +333,10 @@ def get_system_info():
         "💾 GPU Memory": f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB" if torch.cuda.is_available() else "N/A",
         "⚡ CUDA Version": torch.version.cuda if torch.cuda.is_available() else "N/A",
         "🔧 PyTorch": torch.__version__,
-        "🤖 Model Status": "✅ Loaded" if model is not None else "⏳ Loading on first use",
         "🎛️ Mixed Precision": "✅ Enabled" if torch.cuda.is_available() else "❌ CPU Mode",
         "🔌 MCP Server": "✅ Available" if MCP_AVAILABLE else "❌ Not Available",
-        "🌐 WebSocket TTS": "✅ Ready" if model is not None else "⏳ Initializing"
     }
     return "\n".join([f"{k}: {v}" for k, v in info.items()])
@@ -542,13 +542,31 @@ def create_tts_fastapi_app():
     @app.get("/health")
     async def tts_health():
-        """Health check endpoint"""
         return {
             "status": "healthy",
             "model_loaded": model is not None,
-            "gpu_available": torch.cuda.is_available()
         }
     return app
 # MCP Tool Definitions and Handlers

         "💾 GPU Memory": f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB" if torch.cuda.is_available() else "N/A",
         "⚡ CUDA Version": torch.version.cuda if torch.cuda.is_available() else "N/A",
         "🔧 PyTorch": torch.__version__,
+        "🤖 Model Status": "✅ Loaded" if model is not None else "💤 Lazy Loading (ZeroGPU optimized)",
         "🎛️ Mixed Precision": "✅ Enabled" if torch.cuda.is_available() else "❌ CPU Mode",
         "🔌 MCP Server": "✅ Available" if MCP_AVAILABLE else "❌ Not Available",
+        "🌐 WebSocket TTS": "✅ Ready" if model is not None else "💤 Ready (lazy loading)"
     }
     return "\n".join([f"{k}: {v}" for k, v in info.items()])
     @app.get("/health")
     async def tts_health():
+        """Health check endpoint with detailed status"""
         return {
             "status": "healthy",
             "model_loaded": model is not None,
+            "gpu_available": torch.cuda.is_available(),
+            "loading_strategy": "lazy (ZeroGPU optimized)",
+            "note": "Model loads on first synthesis request to optimize GPU usage"
         }
+    @app.post("/preload")
+    async def preload_model():
+        """Preload the TTS model (optional for testing)"""
+        global model
+        if model is not None:
+            return {"status": "success", "message": "Model already loaded"}
+        try:
+            success = load_model()
+            if success:
+                return {"status": "success", "message": "Model preloaded successfully"}
+            else:
+                return {"status": "error", "message": "Failed to preload model"}
+        except Exception as e:
+            return {"status": "error", "message": f"Preload error: {str(e)}"}
     return app
 # MCP Tool Definitions and Handlers