MaenGit commited on
Commit
89eee7b
·
1 Parent(s): c336072

update to xtts

Browse files
Files changed (2) hide show
  1. server.py +43 -17
  2. server1.py +14 -68
server.py CHANGED
@@ -7,19 +7,23 @@ import soundfile as sf
7
 
8
  app = Flask(__name__)
9
 
10
- # Use a multilingual VITS model compatible with TTS 0.22.x
11
- MODEL_MULTI = "tts_models/multilingual/multi-dataset/vits"
12
  tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
13
- SPEAKERS = [s.strip() for s in tts_multi.speakers]
14
 
15
- print("Speakers:", SPEAKERS)
 
 
 
 
 
 
 
16
 
17
- def pick_speaker(gender):
18
- if not SPEAKERS:
19
- return None
20
- if gender == "male":
21
- return next((s for s in SPEAKERS if "male" in s), SPEAKERS[0])
22
- return next((s for s in SPEAKERS if "female" in s), SPEAKERS[0])
23
 
24
  @app.route("/tts", methods=["POST"])
25
  def tts_api():
@@ -30,18 +34,36 @@ def tts_api():
30
 
31
  if not text:
32
  return jsonify({"error": "Text is required"}), 400
 
 
 
33
 
34
- speaker = pick_speaker(gender)
 
 
 
 
 
35
  out_path = f"/tmp/{uuid.uuid4()}.wav"
36
 
 
 
 
 
 
 
 
 
 
37
  wav = tts_multi.tts(
38
- text=text,
39
- speaker=speaker,
40
- language=language,
41
- speed=speed
42
- )
43
 
44
- sf.write(out_path, wav, tts_multi.synthesizer.output_sample_rate, subtype="PCM_16")
 
45
 
46
  @after_this_request
47
  def cleanup(response):
@@ -53,5 +75,9 @@ def tts_api():
53
 
54
  return send_file(out_path, mimetype="audio/wav")
55
 
 
56
  if __name__ == "__main__":
57
  app.run(host="0.0.0.0", port=7860)
 
 
 
 
7
 
8
  app = Flask(__name__)
9
 
10
+ MODEL_MULTI = "tts_models/multilingual/multi-dataset/xtts_v2"
 
11
  tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
 
12
 
13
+ # Full obscure speakers dict (en/ar compatible)
14
+ SPEAKERS = {
15
+ "en_male": "Baldur Sanjin",
16
+ "en_female": "Gracie Wise",
17
+ "ar_male": "Damian Black",
18
+ "ar_female": "Claribel Dervla"
19
+ }
20
+ print("EN/AR Speakers:", SPEAKERS)
21
 
22
+ def pick_speaker(lang, gender):
23
+ key = f"{lang}_{gender}"
24
+ return SPEAKERS.get(key, "Baldur Sanjin") # Fallback obscure male
25
+
26
+ print("EN/AR Speakers:", SPEAKERS)
 
27
 
28
  @app.route("/tts", methods=["POST"])
29
  def tts_api():
 
34
 
35
  if not text:
36
  return jsonify({"error": "Text is required"}), 400
37
+
38
+ # Pick a speaker
39
+ speaker = pick_speaker(language, gender)
40
 
41
+ # This is for clonning and I don't want it now
42
+ # voice_file = request.files.get("voice")
43
+ # if not voice_file:
44
+ # return jsonify({"error": "Voice file is required"}), 400
45
+ # voice_path = f"/tmp/{uuid.uuid4()}.wav"
46
+ # voice_file.save(voice_path)
47
  out_path = f"/tmp/{uuid.uuid4()}.wav"
48
 
49
+
50
+ # tts.tts_to_file(
51
+ # text=text,
52
+ # # speaker_wav=voice_path, this is for clonning but I don't want it now
53
+ # speaker=speaker,
54
+ # language=language,
55
+ # file_path=out_path
56
+ # )
57
+
58
  wav = tts_multi.tts(
59
+ text=text,
60
+ speaker_idx=speaker, # XTTS uses speaker_idx
61
+ language=language, # "en" or "ar"
62
+ speed=speed
63
+ )
64
 
65
+ sf.write(out_path, wav,24000, subtype="PCM_16")
66
+ # os.remove(voice_path)
67
 
68
  @after_this_request
69
  def cleanup(response):
 
75
 
76
  return send_file(out_path, mimetype="audio/wav")
77
 
78
+
79
  if __name__ == "__main__":
80
  app.run(host="0.0.0.0", port=7860)
81
+
82
+
83
+
server1.py CHANGED
@@ -7,44 +7,19 @@ import soundfile as sf
7
 
8
  app = Flask(__name__)
9
 
10
- # Load XTTS-v2
11
- # MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
12
- # this is for clonning but I don't want it now
 
13
 
14
- # --- Load models ---
15
- # English/French/Portuguese model
16
- MODEL_EN = "tts_models/multilingual/multi-dataset/your_tts"
17
- tts_en = TTS(model_name=MODEL_EN, gpu=False)
18
- SPEAKERS_EN = [s.strip() for s in tts_en.speakers]
19
-
20
- # Arabic model
21
- MODEL_AR = "tts_models/multilingual/arabic/arabic_tts"
22
- tts_ar = TTS(model_name=MODEL_AR, gpu=False)
23
- SPEAKERS_AR = [s.strip() for s in tts_ar.speakers]
24
-
25
- print("English Speakers:", SPEAKERS_EN)
26
- print("Arabic Speakers:", SPEAKERS_AR)
27
-
28
- # --- Helper to pick model + speaker ---
29
- def get_model_and_speaker(lang, gender):
30
- if lang == "ar":
31
- tts_model = tts_ar
32
- speakers = SPEAKERS_AR
33
- else:
34
- tts_model = tts_en
35
- speakers = SPEAKERS_EN
36
-
37
- # Pick speaker by gender
38
- if speakers:
39
- if gender == "male":
40
- speaker = next((s for s in speakers if "male" in s), speakers[0])
41
- else:
42
- speaker = next((s for s in speakers if "female" in s), speakers[0])
43
- else:
44
- speaker = None
45
-
46
- return tts_model, speaker
47
 
 
 
 
 
 
 
48
 
49
  @app.route("/tts", methods=["POST"])
50
  def tts_api():
@@ -55,43 +30,18 @@ def tts_api():
55
 
56
  if not text:
57
  return jsonify({"error": "Text is required"}), 400
58
-
59
- # Pick a speaker
60
- if SPEAKERS:
61
- if gender == "male":
62
- speaker = SPEAKERS[0]
63
- else:
64
- speaker = SPEAKERS[-1]
65
- else:
66
- speaker = None
67
 
68
- # This is for clonning and I don't want it now
69
- # voice_file = request.files.get("voice")
70
- # if not voice_file:
71
- # return jsonify({"error": "Voice file is required"}), 400
72
- # voice_path = f"/tmp/{uuid.uuid4()}.wav"
73
- # voice_file.save(voice_path)
74
- tts_model, speaker = get_model_and_speaker(language, gender)
75
  out_path = f"/tmp/{uuid.uuid4()}.wav"
76
 
77
-
78
- # tts.tts_to_file(
79
- # text=text,
80
- # # speaker_wav=voice_path, this is for clonning but I don't want it now
81
- # speaker=speaker,
82
- # language=language,
83
- # file_path=out_path
84
- # )
85
-
86
- wav = tts_model.tts(
87
  text=text,
88
  speaker=speaker,
89
  language=language,
90
  speed=speed
91
  )
92
 
93
- sf.write(out_path, wav, tts_model.synthesizer.output_sample_rate, subtype="PCM_16")
94
- # os.remove(voice_path)
95
 
96
  @after_this_request
97
  def cleanup(response):
@@ -103,9 +53,5 @@ def tts_api():
103
 
104
  return send_file(out_path, mimetype="audio/wav")
105
 
106
-
107
  if __name__ == "__main__":
108
  app.run(host="0.0.0.0", port=7860)
109
-
110
-
111
-
 
7
 
8
  app = Flask(__name__)
9
 
10
+ # Use a multilingual VITS model compatible with TTS 0.22.x
11
+ MODEL_MULTI = "tts_models/multilingual/multi-dataset/vits"
12
+ tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
13
+ SPEAKERS = [s.strip() for s in tts_multi.speakers]
14
 
15
+ print("Speakers:", SPEAKERS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ def pick_speaker(gender):
18
+ if not SPEAKERS:
19
+ return None
20
+ if gender == "male":
21
+ return next((s for s in SPEAKERS if "male" in s), SPEAKERS[0])
22
+ return next((s for s in SPEAKERS if "female" in s), SPEAKERS[0])
23
 
24
  @app.route("/tts", methods=["POST"])
25
  def tts_api():
 
30
 
31
  if not text:
32
  return jsonify({"error": "Text is required"}), 400
 
 
 
 
 
 
 
 
 
33
 
34
+ speaker = pick_speaker(gender)
 
 
 
 
 
 
35
  out_path = f"/tmp/{uuid.uuid4()}.wav"
36
 
37
+ wav = tts_multi.tts(
 
 
 
 
 
 
 
 
 
38
  text=text,
39
  speaker=speaker,
40
  language=language,
41
  speed=speed
42
  )
43
 
44
+ sf.write(out_path, wav, tts_multi.synthesizer.output_sample_rate, subtype="PCM_16")
 
45
 
46
  @after_this_request
47
  def cleanup(response):
 
53
 
54
  return send_file(out_path, mimetype="audio/wav")
55
 
 
56
  if __name__ == "__main__":
57
  app.run(host="0.0.0.0", port=7860)