from flask import Flask, request, send_file, jsonify, after_this_request from TTS.api import TTS import os import uuid import soundfile as sf import torch app = Flask(__name__) MODEL_MULTI = "tts_models/multilingual/multi-dataset/xtts_v2" tts_multi = TTS(model_name=MODEL_MULTI, gpu=False) SPEAKERS = { "en_male": "Baldur Sanjin", "en_female": "Gracie Wise", "ar_male": "Damian Black", "ar_female": "Claribel Dervla" } print("EN/AR Speakers:", SPEAKERS) # Use high-level API only - no low-level synthesizer access # tts.speakers lists all 58 presets @app.route("/tts", methods=["POST"]) def tts_api(): text = request.form.get("text", "") language = request.form.get("lang", "en") gender = request.form.get("voice", "female") speed = float(request.form.get("speed", 1.0)) if not text: return jsonify({"error": "Text is required"}), 400 speaker_name = SPEAKERS.get(f"{language}_{gender}", "Baldur Sanjin") out_path = f"/tmp/{uuid.uuid4()}.wav" try: # High-level tts_to_file with preset speaker - XTTS handles internals safely tts_multi.tts_to_file( text=text, speaker=speaker_name, # Works for presets without files in recent TTS[web:26] language=language, file_path=out_path, speed=speed, split_sentences=True # Better for speed control ) except Exception as e: # Fallback: Provide dummy WAV for cloning mode (forces preset lookup) dummy_wav = "/tmp/dummy_silent.wav" if not os.path.exists(dummy_wav): sf.write(dummy_wav, torch.zeros(24000 * 3), 24000) # 3s silence tts_multi.tts_to_file( text=text, speaker_wav=dummy_wav, # Triggers embedding computation from preset name language=language, file_path=out_path, speed=speed ) os.remove(dummy_wav) @after_this_request def cleanup(response): try: os.remove(out_path) except: pass return response return send_file(out_path, mimetype="audio/wav") @app.route("/speakers", methods=["GET"]) # Bonus: List speakers def list_speakers(): speakers = getattr(tts_multi, 'speakers', []) return jsonify({"speakers": speakers[:20], "total": len(speakers)}) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=False)