Spaces:
Paused
Paused
File size: 1,995 Bytes
ff7d020 58b0f90 8a6294a ff7d020 58b0f90 ff7d020 d512c0d ff7d020 58b0f90 ff7d020 58b0f90 ff7d020 26c5cf5 ff7d020 58b0f90 ff7d020 58b0f90 ff7d020 3b32b80 58b0f90 ff7d020 58b0f90 ff7d020 58b0f90 ff7d020 58b0f90 ff7d020 58b0f90 ff7d020 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import tempfile
import soundfile as sf
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from pydantic import BaseModel
from neuttsair.neutts import NeuTTSAir
# Initialize FastAPI app
app = FastAPI(title="NeuTTS-Air API", description="A FastAPI service for the NeuTTS-Air model.")
# Load the NeuTTS-Air model
# The path is relative to the working directory in the Docker container
MODEL_PATH = "neutts-air-q4-gguf"
try:
tts = NeuTTSAir(backbone_repo=MODEL_PATH, backbone_device="cpu")
except Exception as e:
print(f"Error loading model: {e}")
tts = None
# Pydantic model for the request body
class TTSRequest(BaseModel):
text: str
ref_audio_path: str
ref_text: str
@app.get("/")
def read_root():
"""Simple health check endpoint."""
return {"message": "NeuTTS-Air FastAPI is running."}
@app.post("/tts", summary="Generate speech from text")
async def tts_endpoint(request: TTSRequest):
"""
Generates a WAV audio file from text using a reference audio and transcript.
"""
if tts is None:
raise HTTPException(status_code=503, detail="Model is not loaded.")
try:
# Load the reference audio
# Note: You must provide a valid path to an audio file
# The user will need to upload their own reference audios or use pre-uploaded ones
ref_codes = tts.encode_reference(request.ref_audio_path)
# Perform inference
wav_audio = tts.infer(request.text, ref_codes, request.ref_text)
# Save the audio to a temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
sf.write(tmp.name, wav_audio, tts.codec.sampling_rate)
filepath = tmp.name
# Return the audio file
return FileResponse(filepath, media_type="audio/wav", filename="generated_speech.wav")
except Exception as e:
raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}")
|