Spaces:
Sleeping
Sleeping
File size: 1,832 Bytes
00904d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
from pydantic import BaseModel
import torch
from transformers import AutoTokenizer, AutoModelForTextToWaveform
from scipy.io.wavfile import write as wav_write
import tempfile
import os
app = FastAPI()
# Initialize the TTS model globally
tok = AutoTokenizer.from_pretrained("facebook/mms-tts-som")
model = AutoModelForTextToWaveform.from_pretrained("facebook/mms-tts-som")
class TextRequest(BaseModel):
text: str
@app.post("/tts")
async def text_to_speech(request: TextRequest):
try:
if not request.text.strip():
raise HTTPException(status_code=400, detail="Text cannot be empty")
# Generate audio
inputs = tok(request.text, return_tensors="pt")
with torch.no_grad():
audio = model(**inputs).waveform
sr = model.config.sampling_rate # usually 16000 Hz
audio_numpy = audio.squeeze().cpu().numpy()
# Speed up audio by resampling (increase sampling rate)
speed_factor = 1.09 # Adjust this value: 1.0 = normal, 1.5 = 50% faster, 2.0 = 2x faster
new_sr = int(sr * speed_factor)
# Create temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
wav_write(tmp_file.name, new_sr, audio_numpy)
# Return the file
return FileResponse(
tmp_file.name,
media_type='audio/wav',
filename='output.wav'
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
return {"status": "healthy", "model": "facebook/mms-tts-som"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|