MaenGit commited on
Commit
9c690cc
·
1 Parent(s): 5085f02

use vits model instead of two models

Browse files
Files changed (2) hide show
  1. server.py +14 -68
  2. server1.py +111 -0
server.py CHANGED
@@ -7,44 +7,19 @@ import soundfile as sf
7
 
8
  app = Flask(__name__)
9
 
10
- # Load XTTS-v2
11
- # MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
12
- # this is for clonning but I don't want it now
 
13
 
14
- # --- Load models ---
15
- # English/French/Portuguese model
16
- MODEL_EN = "tts_models/multilingual/multi-dataset/your_tts"
17
- tts_en = TTS(model_name=MODEL_EN, gpu=False)
18
- SPEAKERS_EN = [s.strip() for s in tts_en.speakers]
19
-
20
- # Arabic model
21
- MODEL_AR = "tts_models/multilingual/arabic/arabic_tts"
22
- tts_ar = TTS(model_name=MODEL_AR, gpu=False)
23
- SPEAKERS_AR = [s.strip() for s in tts_ar.speakers]
24
-
25
- print("English Speakers:", SPEAKERS_EN)
26
- print("Arabic Speakers:", SPEAKERS_AR)
27
-
28
- # --- Helper to pick model + speaker ---
29
- def get_model_and_speaker(lang, gender):
30
- if lang == "ar":
31
- tts_model = tts_ar
32
- speakers = SPEAKERS_AR
33
- else:
34
- tts_model = tts_en
35
- speakers = SPEAKERS_EN
36
-
37
- # Pick speaker by gender
38
- if speakers:
39
- if gender == "male":
40
- speaker = next((s for s in speakers if "male" in s), speakers[0])
41
- else:
42
- speaker = next((s for s in speakers if "female" in s), speakers[0])
43
- else:
44
- speaker = None
45
-
46
- return tts_model, speaker
47
 
 
 
 
 
 
 
48
 
49
  @app.route("/tts", methods=["POST"])
50
  def tts_api():
@@ -55,43 +30,18 @@ def tts_api():
55
 
56
  if not text:
57
  return jsonify({"error": "Text is required"}), 400
58
-
59
- # Pick a speaker
60
- if SPEAKERS:
61
- if gender == "male":
62
- speaker = SPEAKERS[0]
63
- else:
64
- speaker = SPEAKERS[-1]
65
- else:
66
- speaker = None
67
 
68
- # This is for clonning and I don't want it now
69
- # voice_file = request.files.get("voice")
70
- # if not voice_file:
71
- # return jsonify({"error": "Voice file is required"}), 400
72
- # voice_path = f"/tmp/{uuid.uuid4()}.wav"
73
- # voice_file.save(voice_path)
74
- tts_model, speaker = get_model_and_speaker(language, gender)
75
  out_path = f"/tmp/{uuid.uuid4()}.wav"
76
 
77
-
78
- # tts.tts_to_file(
79
- # text=text,
80
- # # speaker_wav=voice_path, this is for clonning but I don't want it now
81
- # speaker=speaker,
82
- # language=language,
83
- # file_path=out_path
84
- # )
85
-
86
- wav = tts_model.tts(
87
  text=text,
88
  speaker=speaker,
89
  language=language,
90
  speed=speed
91
  )
92
 
93
- sf.write(out_path, wav, tts_model.synthesizer.output_sample_rate, subtype="PCM_16")
94
- # os.remove(voice_path)
95
 
96
  @after_this_request
97
  def cleanup(response):
@@ -103,9 +53,5 @@ def tts_api():
103
 
104
  return send_file(out_path, mimetype="audio/wav")
105
 
106
-
107
  if __name__ == "__main__":
108
  app.run(host="0.0.0.0", port=7860)
109
-
110
-
111
-
 
7
 
8
  app = Flask(__name__)
9
 
10
+ # Use a multilingual VITS model compatible with TTS 0.22.x
11
+ MODEL_MULTI = "tts_models/multilingual/multi-dataset/vits"
12
+ tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
13
+ SPEAKERS = [s.strip() for s in tts_multi.speakers]
14
 
15
+ print("Speakers:", SPEAKERS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ def pick_speaker(gender):
18
+ if not SPEAKERS:
19
+ return None
20
+ if gender == "male":
21
+ return next((s for s in SPEAKERS if "male" in s), SPEAKERS[0])
22
+ return next((s for s in SPEAKERS if "female" in s), SPEAKERS[0])
23
 
24
  @app.route("/tts", methods=["POST"])
25
  def tts_api():
 
30
 
31
  if not text:
32
  return jsonify({"error": "Text is required"}), 400
 
 
 
 
 
 
 
 
 
33
 
34
+ speaker = pick_speaker(gender)
 
 
 
 
 
 
35
  out_path = f"/tmp/{uuid.uuid4()}.wav"
36
 
37
+ wav = tts_multi.tts(
 
 
 
 
 
 
 
 
 
38
  text=text,
39
  speaker=speaker,
40
  language=language,
41
  speed=speed
42
  )
43
 
44
+ sf.write(out_path, wav, tts_multi.synthesizer.output_sample_rate, subtype="PCM_16")
 
45
 
46
  @after_this_request
47
  def cleanup(response):
 
53
 
54
  return send_file(out_path, mimetype="audio/wav")
55
 
 
56
  if __name__ == "__main__":
57
  app.run(host="0.0.0.0", port=7860)
 
 
 
server1.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, send_file, jsonify, after_this_request
2
+ from TTS.api import TTS
3
+ import tempfile
4
+ import os
5
+ import uuid
6
+ import soundfile as sf
7
+
8
+ app = Flask(__name__)
9
+
10
+ # Load XTTS-v2
11
+ # MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
12
+ # this is for clonning but I don't want it now
13
+
14
+ # --- Load models ---
15
+ # English/French/Portuguese model
16
+ MODEL_EN = "tts_models/multilingual/multi-dataset/your_tts"
17
+ tts_en = TTS(model_name=MODEL_EN, gpu=False)
18
+ SPEAKERS_EN = [s.strip() for s in tts_en.speakers]
19
+
20
+ # Arabic model
21
+ MODEL_AR = "tts_models/multilingual/arabic/arabic_tts"
22
+ tts_ar = TTS(model_name=MODEL_AR, gpu=False)
23
+ SPEAKERS_AR = [s.strip() for s in tts_ar.speakers]
24
+
25
+ print("English Speakers:", SPEAKERS_EN)
26
+ print("Arabic Speakers:", SPEAKERS_AR)
27
+
28
+ # --- Helper to pick model + speaker ---
29
+ def get_model_and_speaker(lang, gender):
30
+ if lang == "ar":
31
+ tts_model = tts_ar
32
+ speakers = SPEAKERS_AR
33
+ else:
34
+ tts_model = tts_en
35
+ speakers = SPEAKERS_EN
36
+
37
+ # Pick speaker by gender
38
+ if speakers:
39
+ if gender == "male":
40
+ speaker = next((s for s in speakers if "male" in s), speakers[0])
41
+ else:
42
+ speaker = next((s for s in speakers if "female" in s), speakers[0])
43
+ else:
44
+ speaker = None
45
+
46
+ return tts_model, speaker
47
+
48
+
49
+ @app.route("/tts", methods=["POST"])
50
+ def tts_api():
51
+ text = request.form.get("text", "")
52
+ language = request.form.get("lang", "en")
53
+ gender = request.form.get("voice", "female")
54
+ speed = float(request.form.get("speed", 1.0))
55
+
56
+ if not text:
57
+ return jsonify({"error": "Text is required"}), 400
58
+
59
+ # Pick a speaker
60
+ if SPEAKERS:
61
+ if gender == "male":
62
+ speaker = SPEAKERS[0]
63
+ else:
64
+ speaker = SPEAKERS[-1]
65
+ else:
66
+ speaker = None
67
+
68
+ # This is for clonning and I don't want it now
69
+ # voice_file = request.files.get("voice")
70
+ # if not voice_file:
71
+ # return jsonify({"error": "Voice file is required"}), 400
72
+ # voice_path = f"/tmp/{uuid.uuid4()}.wav"
73
+ # voice_file.save(voice_path)
74
+ tts_model, speaker = get_model_and_speaker(language, gender)
75
+ out_path = f"/tmp/{uuid.uuid4()}.wav"
76
+
77
+
78
+ # tts.tts_to_file(
79
+ # text=text,
80
+ # # speaker_wav=voice_path, this is for clonning but I don't want it now
81
+ # speaker=speaker,
82
+ # language=language,
83
+ # file_path=out_path
84
+ # )
85
+
86
+ wav = tts_model.tts(
87
+ text=text,
88
+ speaker=speaker,
89
+ language=language,
90
+ speed=speed
91
+ )
92
+
93
+ sf.write(out_path, wav, tts_model.synthesizer.output_sample_rate, subtype="PCM_16")
94
+ # os.remove(voice_path)
95
+
96
+ @after_this_request
97
+ def cleanup(response):
98
+ try:
99
+ os.remove(out_path)
100
+ except Exception as e:
101
+ print("Cleanup error:", e)
102
+ return response
103
+
104
+ return send_file(out_path, mimetype="audio/wav")
105
+
106
+
107
+ if __name__ == "__main__":
108
+ app.run(host="0.0.0.0", port=7860)
109
+
110
+
111
+