from flask import Flask, request, send_file, jsonify, after_this_request from TTS.api import TTS import tempfile import os import uuid import soundfile as sf app = Flask(__name__) # Use a multilingual VITS model compatible with TTS 0.22.x MODEL_MULTI = "tts_models/multilingual/multi-dataset/vits" tts_multi = TTS(model_name=MODEL_MULTI, gpu=False) SPEAKERS = [s.strip() for s in tts_multi.speakers] print("Speakers:", SPEAKERS) def pick_speaker(gender): if not SPEAKERS: return None if gender == "male": return next((s for s in SPEAKERS if "male" in s), SPEAKERS[0]) return next((s for s in SPEAKERS if "female" in s), SPEAKERS[0]) @app.route("/tts", methods=["POST"]) def tts_api(): text = request.form.get("text", "") language = request.form.get("lang", "en") gender = request.form.get("voice", "female") speed = float(request.form.get("speed", 1.0)) if not text: return jsonify({"error": "Text is required"}), 400 speaker = pick_speaker(gender) out_path = f"/tmp/{uuid.uuid4()}.wav" wav = tts_multi.tts( text=text, speaker=speaker, language=language, speed=speed ) sf.write(out_path, wav, tts_multi.synthesizer.output_sample_rate, subtype="PCM_16") @after_this_request def cleanup(response): try: os.remove(out_path) except Exception as e: print("Cleanup error:", e) return response return send_file(out_path, mimetype="audio/wav") if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)