Spaces:
Sleeping
Sleeping
| from flask import Flask, request, send_file, jsonify, after_this_request | |
| from TTS.api import TTS | |
| import os | |
| import uuid | |
| import soundfile as sf | |
| import torch | |
| app = Flask(__name__) | |
| MODEL_MULTI = "tts_models/multilingual/multi-dataset/xtts_v2" | |
| tts_multi = TTS(model_name=MODEL_MULTI, gpu=False) | |
| SPEAKERS = { | |
| "en_male": "Baldur Sanjin", | |
| "en_female": "Gracie Wise", | |
| "ar_male": "Damian Black", | |
| "ar_female": "Claribel Dervla" | |
| } | |
| print("EN/AR Speakers:", SPEAKERS) | |
| # Use high-level API only - no low-level synthesizer access | |
| # tts.speakers lists all 58 presets | |
| def tts_api(): | |
| text = request.form.get("text", "") | |
| language = request.form.get("lang", "en") | |
| gender = request.form.get("voice", "female") | |
| speed = float(request.form.get("speed", 1.0)) | |
| if not text: | |
| return jsonify({"error": "Text is required"}), 400 | |
| speaker_name = SPEAKERS.get(f"{language}_{gender}", "Baldur Sanjin") | |
| out_path = f"/tmp/{uuid.uuid4()}.wav" | |
| try: | |
| # High-level tts_to_file with preset speaker - XTTS handles internals safely | |
| tts_multi.tts_to_file( | |
| text=text, | |
| speaker=speaker_name, # Works for presets without files in recent TTS[web:26] | |
| language=language, | |
| file_path=out_path, | |
| speed=speed, | |
| split_sentences=True # Better for speed control | |
| ) | |
| except Exception as e: | |
| # Fallback: Provide dummy WAV for cloning mode (forces preset lookup) | |
| dummy_wav = "/tmp/dummy_silent.wav" | |
| if not os.path.exists(dummy_wav): | |
| sf.write(dummy_wav, torch.zeros(24000 * 3), 24000) # 3s silence | |
| tts_multi.tts_to_file( | |
| text=text, | |
| speaker_wav=dummy_wav, # Triggers embedding computation from preset name | |
| language=language, | |
| file_path=out_path, | |
| speed=speed | |
| ) | |
| os.remove(dummy_wav) | |
| def cleanup(response): | |
| try: | |
| os.remove(out_path) | |
| except: | |
| pass | |
| return response | |
| return send_file(out_path, mimetype="audio/wav") | |
| # Bonus: List speakers | |
| def list_speakers(): | |
| speakers = getattr(tts_multi, 'speakers', []) | |
| return jsonify({"speakers": speakers[:20], "total": len(speakers)}) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860, debug=False) | |