Peter Michael Gits Claude commited on
Commit
acdec04
Β·
1 Parent(s): 5e08a8b

feat: Improve health endpoint with lazy loading explanation v0.2.1

Browse files

- Add detailed health endpoint explaining ZeroGPU lazy loading strategy
- Add /preload endpoint for optional model preloading during testing
- Update status messages to clarify lazy loading is intentional and optimized
- Explain that model_loaded: false is expected behavior for ZeroGPU optimization
- Model loads automatically on first synthesis request to prevent GPU timeouts

πŸ€– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. __pycache__/app.cpython-313.pyc +0 -0
  2. app.py +22 -4
__pycache__/app.cpython-313.pyc CHANGED
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
 
app.py CHANGED
@@ -333,10 +333,10 @@ def get_system_info():
333
  "πŸ’Ύ GPU Memory": f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB" if torch.cuda.is_available() else "N/A",
334
  "⚑ CUDA Version": torch.version.cuda if torch.cuda.is_available() else "N/A",
335
  "πŸ”§ PyTorch": torch.__version__,
336
- "πŸ€– Model Status": "βœ… Loaded" if model is not None else "⏳ Loading on first use",
337
  "πŸŽ›οΈ Mixed Precision": "βœ… Enabled" if torch.cuda.is_available() else "❌ CPU Mode",
338
  "πŸ”Œ MCP Server": "βœ… Available" if MCP_AVAILABLE else "❌ Not Available",
339
- "🌐 WebSocket TTS": "βœ… Ready" if model is not None else "⏳ Initializing"
340
  }
341
  return "\n".join([f"{k}: {v}" for k, v in info.items()])
342
 
@@ -542,13 +542,31 @@ def create_tts_fastapi_app():
542
 
543
  @app.get("/health")
544
  async def tts_health():
545
- """Health check endpoint"""
546
  return {
547
  "status": "healthy",
548
  "model_loaded": model is not None,
549
- "gpu_available": torch.cuda.is_available()
 
 
550
  }
551
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
  return app
553
 
554
  # MCP Tool Definitions and Handlers
 
333
  "πŸ’Ύ GPU Memory": f"{torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB" if torch.cuda.is_available() else "N/A",
334
  "⚑ CUDA Version": torch.version.cuda if torch.cuda.is_available() else "N/A",
335
  "πŸ”§ PyTorch": torch.__version__,
336
+ "πŸ€– Model Status": "βœ… Loaded" if model is not None else "πŸ’€ Lazy Loading (ZeroGPU optimized)",
337
  "πŸŽ›οΈ Mixed Precision": "βœ… Enabled" if torch.cuda.is_available() else "❌ CPU Mode",
338
  "πŸ”Œ MCP Server": "βœ… Available" if MCP_AVAILABLE else "❌ Not Available",
339
+ "🌐 WebSocket TTS": "βœ… Ready" if model is not None else "πŸ’€ Ready (lazy loading)"
340
  }
341
  return "\n".join([f"{k}: {v}" for k, v in info.items()])
342
 
 
542
 
543
  @app.get("/health")
544
  async def tts_health():
545
+ """Health check endpoint with detailed status"""
546
  return {
547
  "status": "healthy",
548
  "model_loaded": model is not None,
549
+ "gpu_available": torch.cuda.is_available(),
550
+ "loading_strategy": "lazy (ZeroGPU optimized)",
551
+ "note": "Model loads on first synthesis request to optimize GPU usage"
552
  }
553
 
554
+ @app.post("/preload")
555
+ async def preload_model():
556
+ """Preload the TTS model (optional for testing)"""
557
+ global model
558
+ if model is not None:
559
+ return {"status": "success", "message": "Model already loaded"}
560
+
561
+ try:
562
+ success = load_model()
563
+ if success:
564
+ return {"status": "success", "message": "Model preloaded successfully"}
565
+ else:
566
+ return {"status": "error", "message": "Failed to preload model"}
567
+ except Exception as e:
568
+ return {"status": "error", "message": f"Preload error: {str(e)}"}
569
+
570
  return app
571
 
572
  # MCP Tool Definitions and Handlers