Spaces:
Paused
Paused
| import tempfile | |
| import soundfile as sf | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import FileResponse | |
| from pydantic import BaseModel | |
| from neuttsair.neutts import NeuTTSAir | |
| # Initialize FastAPI app | |
| app = FastAPI(title="NeuTTS-Air API", description="A FastAPI service for the NeuTTS-Air model.") | |
| # Load the NeuTTS-Air model | |
| # The path is relative to the working directory in the Docker container | |
| MODEL_PATH = "neutts-air-q4-gguf" | |
| try: | |
| tts = NeuTTSAir(backbone_repo=MODEL_PATH, backbone_device="cpu") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| tts = None | |
| # Pydantic model for the request body | |
| class TTSRequest(BaseModel): | |
| text: str | |
| ref_audio_path: str | |
| ref_text: str | |
| def read_root(): | |
| """Simple health check endpoint.""" | |
| return {"message": "NeuTTS-Air FastAPI is running."} | |
| async def tts_endpoint(request: TTSRequest): | |
| """ | |
| Generates a WAV audio file from text using a reference audio and transcript. | |
| """ | |
| if tts is None: | |
| raise HTTPException(status_code=503, detail="Model is not loaded.") | |
| try: | |
| # Load the reference audio | |
| # Note: You must provide a valid path to an audio file | |
| # The user will need to upload their own reference audios or use pre-uploaded ones | |
| ref_codes = tts.encode_reference(request.ref_audio_path) | |
| # Perform inference | |
| wav_audio = tts.infer(request.text, ref_codes, request.ref_text) | |
| # Save the audio to a temporary file | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: | |
| sf.write(tmp.name, wav_audio, tts.codec.sampling_rate) | |
| filepath = tmp.name | |
| # Return the audio file | |
| return FileResponse(filepath, media_type="audio/wav", filename="generated_speech.wav") | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}") | |