Spaces:
Sleeping
Sleeping
| import io, base64, tempfile, os | |
| from fastapi import FastAPI | |
| from fastapi.responses import StreamingResponse | |
| from pydantic import BaseModel | |
| import torchaudio as ta | |
| from chatterbox.tts import ChatterboxTTS | |
| app = FastAPI() | |
| # Load model once at startup | |
| print("Loading ChatterboxTTS model...") | |
| model = ChatterboxTTS.from_pretrained(device="cpu") | |
| print("Model ready.") | |
| class TTSRequest(BaseModel): | |
| text: str | |
| ref_audio: str | None = None # base64-encoded audio file | |
| exaggeration: float = 0.5 | |
| cfg_weight: float = 0.5 | |
| temperature: float = 0.8 | |
| def health(): | |
| return {"status": "ok"} | |
| def synthesize(req: TTSRequest): | |
| ref_path = None | |
| # Write ref audio to a temp file if provided | |
| if req.ref_audio: | |
| audio_bytes = base64.b64decode(req.ref_audio) | |
| tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| tmp.write(audio_bytes) | |
| tmp.close() | |
| ref_path = tmp.name | |
| try: | |
| wav = model.generate( | |
| req.text, | |
| audio_prompt_path=ref_path, | |
| exaggeration=req.exaggeration, | |
| cfg_weight=req.cfg_weight, | |
| temperature=req.temperature, | |
| ) | |
| finally: | |
| if ref_path and os.path.exists(ref_path): | |
| os.unlink(ref_path) | |
| # Write wav to buffer and return as audio/wav | |
| buf = io.BytesIO() | |
| ta.save(buf, wav, model.sr, format="wav") | |
| buf.seek(0) | |
| return StreamingResponse(buf, media_type="audio/wav") | |