Spaces:

samarthnaikk
/

ttlm

Sleeping

ttlm / app.py

Samarth Naik

feat: Switch to minimal Coqui TTS VITS model implementation

1059c3e about 2 months ago

4.57 kB

	"""
	Minimal Text-to-Speech API using Coqui TTS VITS model
	FastAPI application for Hugging Face Spaces
	"""

	import os
	import tempfile
	import logging
	from pathlib import Path
	from typing import Optional

	from fastapi import FastAPI, HTTPException, Form
	from fastapi.responses import FileResponse
	from pydantic import BaseModel
	import uvicorn

	# Import TTS
	try:
	from TTS.api import TTS
	except ImportError:
	raise ImportError("TTS library not found. Install with: pip install TTS")

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Initialize FastAPI app
	app = FastAPI(
	title="Text-to-Speech API",
	description="Minimal TTS API using Coqui TTS VITS model",
	version="1.0.0"
	)

	# Global TTS model variable
	tts_model = None

	# Request model
	class TTSRequest(BaseModel):
	text: str


	@app.on_event("startup")
	async def startup_event():
	"""
	Load the TTS model once at startup.
	Using the VITS model for LJSpeech dataset.
	"""
	global tts_model
	try:
	logger.info("Loading TTS model...")

	# Use the specific VITS model requested
	model_name = "tts_models/en/ljspeech/vits"
	tts_model = TTS(model_name=model_name, progress_bar=False)

	logger.info("TTS model loaded successfully!")

	except Exception as e:
	logger.error(f"Failed to load TTS model: {str(e)}")
	raise e


	@app.get("/")
	async def root():
	"""Health check endpoint"""
	return {
	"status": "healthy",
	"message": "Text-to-Speech API is running",
	"model": "tts_models/en/ljspeech/vits",
	"engine": "Coqui TTS"
	}


	@app.get("/tts")
	async def tts_get(text: str):
	"""
	Simple GET endpoint for TTS
	Usage: GET /tts?text=Hello%20world
	"""
	if not text or len(text.strip()) == 0:
	raise HTTPException(status_code=400, detail="Text parameter is required")

	return await generate_speech(text)


	@app.post("/tts")
	async def tts_post(
	request: TTSRequest = None,
	text: str = Form(None)
	):
	"""
	POST endpoint for TTS
	Accepts JSON body or form data
	"""
	# Handle different input formats
	if request:
	input_text = request.text
	elif text:
	input_text = text
	else:
	raise HTTPException(status_code=400, detail="Text is required")

	if not input_text or len(input_text.strip()) == 0:
	raise HTTPException(status_code=400, detail="Text cannot be empty")

	return await generate_speech(input_text)


	async def generate_speech(text: str):
	"""
	Generate speech from text using the VITS model
	"""
	if not tts_model:
	raise HTTPException(status_code=503, detail="TTS model not loaded")

	try:
	# Create temporary file for output
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
	output_path = tmp_file.name

	logger.info(f"Generating speech for text: '{text[:50]}...'")

	# Generate speech using VITS model
	tts_model.tts_to_file(
	text=text,
	file_path=output_path
	)

	# Verify the file was created and has content
	if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
	raise Exception("Generated audio file is empty or was not created")

	logger.info(f"Speech generated successfully, file size: {os.path.getsize(output_path)} bytes")

	# Return the audio file
	return FileResponse(
	path=output_path,
	media_type="audio/wav",
	filename="speech.wav",
	headers={
	"Content-Disposition": "attachment; filename=speech.wav",
	"Cache-Control": "no-cache"
	}
	)

	except Exception as e:
	logger.error(f"Error generating speech: {str(e)}")
	# Clean up output file on error
	if 'output_path' in locals() and os.path.exists(output_path):
	try:
	os.unlink(output_path)
	except:
	pass
	raise HTTPException(status_code=500, detail=f"Failed to generate speech: {str(e)}")


	@app.get("/health")
	async def health_check():
	"""Health check endpoint"""
	return {
	"status": "healthy",
	"model_loaded": tts_model is not None,
	"model_name": "tts_models/en/ljspeech/vits"
	}


	if __name__ == "__main__":
	# For local development and HF Spaces
	uvicorn.run(app, host="0.0.0.0", port=7860)