from fastapi import FastAPI, HTTPException from fastapi.responses import FileResponse from pydantic import BaseModel import torch from transformers import AutoTokenizer, AutoModelForTextToWaveform from scipy.io.wavfile import write as wav_write import tempfile import os app = FastAPI() # Initialize the TTS model globally tok = AutoTokenizer.from_pretrained("facebook/mms-tts-som") model = AutoModelForTextToWaveform.from_pretrained("facebook/mms-tts-som") class TextRequest(BaseModel): text: str @app.post("/tts") async def text_to_speech(request: TextRequest): try: if not request.text.strip(): raise HTTPException(status_code=400, detail="Text cannot be empty") # Generate audio inputs = tok(request.text, return_tensors="pt") with torch.no_grad(): audio = model(**inputs).waveform sr = model.config.sampling_rate # usually 16000 Hz audio_numpy = audio.squeeze().cpu().numpy() # Speed up audio by resampling (increase sampling rate) speed_factor = 1.09 # Adjust this value: 1.0 = normal, 1.5 = 50% faster, 2.0 = 2x faster new_sr = int(sr * speed_factor) # Create temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: wav_write(tmp_file.name, new_sr, audio_numpy) # Return the file return FileResponse( tmp_file.name, media_type='audio/wav', filename='output.wav' ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.get("/health") async def health_check(): return {"status": "healthy", "model": "facebook/mms-tts-som"} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)