File size: 1,995 Bytes
ff7d020
 
 
58b0f90
 
8a6294a
 
ff7d020
 
58b0f90
ff7d020
 
 
 
 
 
 
 
d512c0d
ff7d020
58b0f90
 
ff7d020
 
58b0f90
ff7d020
 
 
 
26c5cf5
ff7d020
 
58b0f90
ff7d020
58b0f90
ff7d020
 
3b32b80
58b0f90
ff7d020
 
 
 
58b0f90
ff7d020
 
58b0f90
ff7d020
 
 
 
58b0f90
ff7d020
 
58b0f90
ff7d020
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import tempfile
import soundfile as sf
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from pydantic import BaseModel
from neuttsair.neutts import NeuTTSAir

# Initialize FastAPI app
app = FastAPI(title="NeuTTS-Air API", description="A FastAPI service for the NeuTTS-Air model.")

# Load the NeuTTS-Air model
# The path is relative to the working directory in the Docker container
MODEL_PATH = "neutts-air-q4-gguf"
try:
    tts = NeuTTSAir(backbone_repo=MODEL_PATH, backbone_device="cpu")
except Exception as e:
    print(f"Error loading model: {e}")
    tts = None

# Pydantic model for the request body
class TTSRequest(BaseModel):
    text: str
    ref_audio_path: str
    ref_text: str

@app.get("/")
def read_root():
    """Simple health check endpoint."""
    return {"message": "NeuTTS-Air FastAPI is running."}

@app.post("/tts", summary="Generate speech from text")
async def tts_endpoint(request: TTSRequest):
    """
    Generates a WAV audio file from text using a reference audio and transcript.
    """
    if tts is None:
        raise HTTPException(status_code=503, detail="Model is not loaded.")

    try:
        # Load the reference audio
        # Note: You must provide a valid path to an audio file
        # The user will need to upload their own reference audios or use pre-uploaded ones
        ref_codes = tts.encode_reference(request.ref_audio_path)
        
        # Perform inference
        wav_audio = tts.infer(request.text, ref_codes, request.ref_text)
        
        # Save the audio to a temporary file
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
            sf.write(tmp.name, wav_audio, tts.codec.sampling_rate)
            filepath = tmp.name

        # Return the audio file
        return FileResponse(filepath, media_type="audio/wav", filename="generated_speech.wav")

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}")