Spaces:
Sleeping
Sleeping
File size: 1,510 Bytes
97f0fe1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | import io, base64, tempfile, os
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import torchaudio as ta
from chatterbox.tts import ChatterboxTTS
app = FastAPI()
# Load model once at startup
print("Loading ChatterboxTTS model...")
model = ChatterboxTTS.from_pretrained(device="cpu")
print("Model ready.")
class TTSRequest(BaseModel):
text: str
ref_audio: str | None = None # base64-encoded audio file
exaggeration: float = 0.5
cfg_weight: float = 0.5
temperature: float = 0.8
@app.get("/health")
def health():
return {"status": "ok"}
@app.post("/synthesize")
def synthesize(req: TTSRequest):
ref_path = None
# Write ref audio to a temp file if provided
if req.ref_audio:
audio_bytes = base64.b64decode(req.ref_audio)
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
tmp.write(audio_bytes)
tmp.close()
ref_path = tmp.name
try:
wav = model.generate(
req.text,
audio_prompt_path=ref_path,
exaggeration=req.exaggeration,
cfg_weight=req.cfg_weight,
temperature=req.temperature,
)
finally:
if ref_path and os.path.exists(ref_path):
os.unlink(ref_path)
# Write wav to buffer and return as audio/wav
buf = io.BytesIO()
ta.save(buf, wav, model.sr, format="wav")
buf.seek(0)
return StreamingResponse(buf, media_type="audio/wav")
|