Spaces:
Sleeping
Sleeping
File size: 2,450 Bytes
6a49547 18340e2 40754f7 44ae691 ee0cc85 ed05d40 18340e2 89eee7b 6d29231 757db5b 89eee7b 6d29231 ee0cc85 1d303c5 44ae691 18340e2 44ae691 1d303c5 b159dc0 6a49547 89eee7b f5c8ea8 ed05d40 44ae691 18340e2 ee0cc85 6a49547 44ae691 ee0cc85 6a49547 44ae691 dd10c80 ee0cc85 18340e2 ee0cc85 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | from flask import Flask, request, send_file, jsonify, after_this_request
from TTS.api import TTS
import os
import uuid
import soundfile as sf
import torch
app = Flask(__name__)
MODEL_MULTI = "tts_models/multilingual/multi-dataset/xtts_v2"
tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
SPEAKERS = {
"en_male": "Baldur Sanjin",
"en_female": "Gracie Wise",
"ar_male": "Damian Black",
"ar_female": "Claribel Dervla"
}
print("EN/AR Speakers:", SPEAKERS)
# Use high-level API only - no low-level synthesizer access
# tts.speakers lists all 58 presets
@app.route("/tts", methods=["POST"])
def tts_api():
text = request.form.get("text", "")
language = request.form.get("lang", "en")
gender = request.form.get("voice", "female")
speed = float(request.form.get("speed", 1.0))
if not text:
return jsonify({"error": "Text is required"}), 400
speaker_name = SPEAKERS.get(f"{language}_{gender}", "Baldur Sanjin")
out_path = f"/tmp/{uuid.uuid4()}.wav"
try:
# High-level tts_to_file with preset speaker - XTTS handles internals safely
tts_multi.tts_to_file(
text=text,
speaker=speaker_name, # Works for presets without files in recent TTS[web:26]
language=language,
file_path=out_path,
speed=speed,
split_sentences=True # Better for speed control
)
except Exception as e:
# Fallback: Provide dummy WAV for cloning mode (forces preset lookup)
dummy_wav = "/tmp/dummy_silent.wav"
if not os.path.exists(dummy_wav):
sf.write(dummy_wav, torch.zeros(24000 * 3), 24000) # 3s silence
tts_multi.tts_to_file(
text=text,
speaker_wav=dummy_wav, # Triggers embedding computation from preset name
language=language,
file_path=out_path,
speed=speed
)
os.remove(dummy_wav)
@after_this_request
def cleanup(response):
try:
os.remove(out_path)
except:
pass
return response
return send_file(out_path, mimetype="audio/wav")
@app.route("/speakers", methods=["GET"]) # Bonus: List speakers
def list_speakers():
speakers = getattr(tts_multi, 'speakers', [])
return jsonify({"speakers": speakers[:20], "total": len(speakers)})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860, debug=False)
|