Spaces:
Sleeping
Sleeping
Peter Michael Gits Claude commited on
Commit Β·
acdec04
1
Parent(s): 5e08a8b
feat: Improve health endpoint with lazy loading explanation v0.2.1
Browse files- Add detailed health endpoint explaining ZeroGPU lazy loading strategy
- Add /preload endpoint for optional model preloading during testing
- Update status messages to clarify lazy loading is intentional and optimized
- Explain that model_loaded: false is expected behavior for ZeroGPU optimization
- Model loads automatically on first synthesis request to prevent GPU timeouts
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
- __pycache__/app.cpython-313.pyc +0 -0
- app.py +22 -4
__pycache__/app.cpython-313.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
|
|
|
app.py
CHANGED
|
@@ -333,10 +333,10 @@ def get_system_info():
|
|
| 333 |
"πΎ GPU Memory": f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB" if torch.cuda.is_available() else "N/A",
|
| 334 |
"β‘ CUDA Version": torch.version.cuda if torch.cuda.is_available() else "N/A",
|
| 335 |
"π§ PyTorch": torch.__version__,
|
| 336 |
-
"π€ Model Status": "β
Loaded" if model is not None else "
|
| 337 |
"ποΈ Mixed Precision": "β
Enabled" if torch.cuda.is_available() else "β CPU Mode",
|
| 338 |
"π MCP Server": "β
Available" if MCP_AVAILABLE else "β Not Available",
|
| 339 |
-
"π WebSocket TTS": "β
Ready" if model is not None else "
|
| 340 |
}
|
| 341 |
return "\n".join([f"{k}: {v}" for k, v in info.items()])
|
| 342 |
|
|
@@ -542,13 +542,31 @@ def create_tts_fastapi_app():
|
|
| 542 |
|
| 543 |
@app.get("/health")
|
| 544 |
async def tts_health():
|
| 545 |
-
"""Health check endpoint"""
|
| 546 |
return {
|
| 547 |
"status": "healthy",
|
| 548 |
"model_loaded": model is not None,
|
| 549 |
-
"gpu_available": torch.cuda.is_available()
|
|
|
|
|
|
|
| 550 |
}
|
| 551 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 552 |
return app
|
| 553 |
|
| 554 |
# MCP Tool Definitions and Handlers
|
|
|
|
| 333 |
"πΎ GPU Memory": f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB" if torch.cuda.is_available() else "N/A",
|
| 334 |
"β‘ CUDA Version": torch.version.cuda if torch.cuda.is_available() else "N/A",
|
| 335 |
"π§ PyTorch": torch.__version__,
|
| 336 |
+
"π€ Model Status": "β
Loaded" if model is not None else "π€ Lazy Loading (ZeroGPU optimized)",
|
| 337 |
"ποΈ Mixed Precision": "β
Enabled" if torch.cuda.is_available() else "β CPU Mode",
|
| 338 |
"π MCP Server": "β
Available" if MCP_AVAILABLE else "β Not Available",
|
| 339 |
+
"π WebSocket TTS": "β
Ready" if model is not None else "π€ Ready (lazy loading)"
|
| 340 |
}
|
| 341 |
return "\n".join([f"{k}: {v}" for k, v in info.items()])
|
| 342 |
|
|
|
|
| 542 |
|
| 543 |
@app.get("/health")
|
| 544 |
async def tts_health():
|
| 545 |
+
"""Health check endpoint with detailed status"""
|
| 546 |
return {
|
| 547 |
"status": "healthy",
|
| 548 |
"model_loaded": model is not None,
|
| 549 |
+
"gpu_available": torch.cuda.is_available(),
|
| 550 |
+
"loading_strategy": "lazy (ZeroGPU optimized)",
|
| 551 |
+
"note": "Model loads on first synthesis request to optimize GPU usage"
|
| 552 |
}
|
| 553 |
|
| 554 |
+
@app.post("/preload")
|
| 555 |
+
async def preload_model():
|
| 556 |
+
"""Preload the TTS model (optional for testing)"""
|
| 557 |
+
global model
|
| 558 |
+
if model is not None:
|
| 559 |
+
return {"status": "success", "message": "Model already loaded"}
|
| 560 |
+
|
| 561 |
+
try:
|
| 562 |
+
success = load_model()
|
| 563 |
+
if success:
|
| 564 |
+
return {"status": "success", "message": "Model preloaded successfully"}
|
| 565 |
+
else:
|
| 566 |
+
return {"status": "error", "message": "Failed to preload model"}
|
| 567 |
+
except Exception as e:
|
| 568 |
+
return {"status": "error", "message": f"Preload error: {str(e)}"}
|
| 569 |
+
|
| 570 |
return app
|
| 571 |
|
| 572 |
# MCP Tool Definitions and Handlers
|