File size: 1,582 Bytes
9c690cc
 
 
 
 
 
 
 
 
89eee7b
 
 
 
9c690cc
89eee7b
9c690cc
89eee7b
 
 
 
 
 
9c690cc
 
 
 
 
 
 
 
 
 
 
89eee7b
9c690cc
 
89eee7b
9c690cc
 
 
 
 
 
89eee7b
9c690cc
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from flask import Flask, request, send_file, jsonify, after_this_request
from TTS.api import TTS
import tempfile
import os
import uuid
import soundfile as sf

app = Flask(__name__)

# Use a multilingual VITS model compatible with TTS 0.22.x
MODEL_MULTI = "tts_models/multilingual/multi-dataset/vits"
tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
SPEAKERS = [s.strip() for s in tts_multi.speakers]

print("Speakers:", SPEAKERS)

def pick_speaker(gender):
    if not SPEAKERS:
        return None
    if gender == "male":
        return next((s for s in SPEAKERS if "male" in s), SPEAKERS[0])
    return next((s for s in SPEAKERS if "female" in s), SPEAKERS[0])

@app.route("/tts", methods=["POST"])
def tts_api():
    text = request.form.get("text", "")
    language = request.form.get("lang", "en")
    gender = request.form.get("voice", "female")
    speed = float(request.form.get("speed", 1.0))

    if not text:
        return jsonify({"error": "Text is required"}), 400

    speaker = pick_speaker(gender)
    out_path = f"/tmp/{uuid.uuid4()}.wav"

    wav = tts_multi.tts(
        text=text,
        speaker=speaker,
        language=language,
        speed=speed
    )

    sf.write(out_path, wav, tts_multi.synthesizer.output_sample_rate, subtype="PCM_16")

    @after_this_request
    def cleanup(response):
        try:
            os.remove(out_path)
        except Exception as e:
            print("Cleanup error:", e)
        return response

    return send_file(out_path, mimetype="audio/wav")

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)