chatterbox-api / app.py
truegleai's picture
Upload app.py with huggingface_hub
97f0fe1 verified
import io, base64, tempfile, os
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import torchaudio as ta
from chatterbox.tts import ChatterboxTTS
app = FastAPI()
# Load model once at startup
print("Loading ChatterboxTTS model...")
model = ChatterboxTTS.from_pretrained(device="cpu")
print("Model ready.")
class TTSRequest(BaseModel):
text: str
ref_audio: str | None = None # base64-encoded audio file
exaggeration: float = 0.5
cfg_weight: float = 0.5
temperature: float = 0.8
@app.get("/health")
def health():
return {"status": "ok"}
@app.post("/synthesize")
def synthesize(req: TTSRequest):
ref_path = None
# Write ref audio to a temp file if provided
if req.ref_audio:
audio_bytes = base64.b64decode(req.ref_audio)
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
tmp.write(audio_bytes)
tmp.close()
ref_path = tmp.name
try:
wav = model.generate(
req.text,
audio_prompt_path=ref_path,
exaggeration=req.exaggeration,
cfg_weight=req.cfg_weight,
temperature=req.temperature,
)
finally:
if ref_path and os.path.exists(ref_path):
os.unlink(ref_path)
# Write wav to buffer and return as audio/wav
buf = io.BytesIO()
ta.save(buf, wav, model.sr, format="wav")
buf.seek(0)
return StreamingResponse(buf, media_type="audio/wav")