neu / app.py
Rajhuggingface4253's picture
Update app.py
ff7d020 verified
raw
history blame
2 kB
import tempfile
import soundfile as sf
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from pydantic import BaseModel
from neuttsair.neutts import NeuTTSAir
# Initialize FastAPI app
app = FastAPI(title="NeuTTS-Air API", description="A FastAPI service for the NeuTTS-Air model.")
# Load the NeuTTS-Air model
# The path is relative to the working directory in the Docker container
MODEL_PATH = "neutts-air-q4-gguf"
try:
tts = NeuTTSAir(backbone_repo=MODEL_PATH, backbone_device="cpu")
except Exception as e:
print(f"Error loading model: {e}")
tts = None
# Pydantic model for the request body
class TTSRequest(BaseModel):
text: str
ref_audio_path: str
ref_text: str
@app.get("/")
def read_root():
"""Simple health check endpoint."""
return {"message": "NeuTTS-Air FastAPI is running."}
@app.post("/tts", summary="Generate speech from text")
async def tts_endpoint(request: TTSRequest):
"""
Generates a WAV audio file from text using a reference audio and transcript.
"""
if tts is None:
raise HTTPException(status_code=503, detail="Model is not loaded.")
try:
# Load the reference audio
# Note: You must provide a valid path to an audio file
# The user will need to upload their own reference audios or use pre-uploaded ones
ref_codes = tts.encode_reference(request.ref_audio_path)
# Perform inference
wav_audio = tts.infer(request.text, ref_codes, request.ref_text)
# Save the audio to a temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
sf.write(tmp.name, wav_audio, tts.codec.sampling_rate)
filepath = tmp.name
# Return the audio file
return FileResponse(filepath, media_type="audio/wav", filename="generated_speech.wav")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}")