Spaces:

MaenMN
/

tts-server

Sleeping

App Files Files Community

MaenGit commited on Feb 4

Commit

89eee7b

1 Parent(s): c336072

update to xtts

Browse files

Files changed (2) hide show

server.py +43 -17
server1.py +14 -68

server.py CHANGED Viewed

@@ -7,19 +7,23 @@ import soundfile as sf
 app = Flask(__name__)
-# Use a multilingual VITS model compatible with TTS 0.22.x
-MODEL_MULTI = "tts_models/multilingual/multi-dataset/vits"
 tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
-SPEAKERS = [s.strip() for s in tts_multi.speakers]
-print("Speakers:", SPEAKERS)
-def pick_speaker(gender):
-    if not SPEAKERS:
-        return None
-    if gender == "male":
-        return next((s for s in SPEAKERS if "male" in s), SPEAKERS[0])
-    return next((s for s in SPEAKERS if "female" in s), SPEAKERS[0])
 @app.route("/tts", methods=["POST"])
 def tts_api():
@@ -30,18 +34,36 @@ def tts_api():
     if not text:
         return jsonify({"error": "Text is required"}), 400
-    speaker = pick_speaker(gender)
     out_path = f"/tmp/{uuid.uuid4()}.wav"
     wav = tts_multi.tts(
-        text=text,
-        speaker=speaker,
-        language=language,
-        speed=speed
-    )
-    sf.write(out_path, wav, tts_multi.synthesizer.output_sample_rate, subtype="PCM_16")
     @after_this_request
     def cleanup(response):
@@ -53,5 +75,9 @@ def tts_api():
     return send_file(out_path, mimetype="audio/wav")
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)

 app = Flask(__name__)
+MODEL_MULTI = "tts_models/multilingual/multi-dataset/xtts_v2"
 tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
+# Full obscure speakers dict (en/ar compatible)
+SPEAKERS = {
+    "en_male": "Baldur Sanjin",
+    "en_female": "Gracie Wise",
+    "ar_male": "Damian Black",
+    "ar_female": "Claribel Dervla"
+}
+print("EN/AR Speakers:", SPEAKERS)
+def pick_speaker(lang, gender):
+    key = f"{lang}_{gender}"
+    return SPEAKERS.get(key, "Baldur Sanjin")  # Fallback obscure male
+print("EN/AR Speakers:", SPEAKERS)
 @app.route("/tts", methods=["POST"])
 def tts_api():
     if not text:
         return jsonify({"error": "Text is required"}), 400
+    # Pick a speaker
+    speaker = pick_speaker(language, gender)
+    # This is for clonning and I don't want it now
+    # voice_file = request.files.get("voice")
+    # if not voice_file:
+    #     return jsonify({"error": "Voice file is required"}), 400
+    # voice_path = f"/tmp/{uuid.uuid4()}.wav"
+    # voice_file.save(voice_path)
     out_path = f"/tmp/{uuid.uuid4()}.wav"
+    # tts.tts_to_file(
+    #     text=text,
+    #     # speaker_wav=voice_path, this is for clonning but I don't want it now
+    #     speaker=speaker,
+    #     language=language,
+    #     file_path=out_path
+    # )
     wav = tts_multi.tts(
+    text=text,
+    speaker_idx=speaker,  # XTTS uses speaker_idx
+    language=language,    # "en" or "ar"
+    speed=speed
+)
+    sf.write(out_path, wav,24000, subtype="PCM_16")
+    # os.remove(voice_path)
     @after_this_request
     def cleanup(response):
     return send_file(out_path, mimetype="audio/wav")
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)

server1.py CHANGED Viewed

@@ -7,44 +7,19 @@ import soundfile as sf
 app = Flask(__name__)
-# Load XTTS-v2
-# MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
-# this is for clonning but I don't want it now
-# --- Load models ---
-# English/French/Portuguese model
-MODEL_EN = "tts_models/multilingual/multi-dataset/your_tts"
-tts_en = TTS(model_name=MODEL_EN, gpu=False)
-SPEAKERS_EN = [s.strip() for s in tts_en.speakers]
-# Arabic model
-MODEL_AR = "tts_models/multilingual/arabic/arabic_tts"
-tts_ar = TTS(model_name=MODEL_AR, gpu=False)
-SPEAKERS_AR = [s.strip() for s in tts_ar.speakers]
-print("English Speakers:", SPEAKERS_EN)
-print("Arabic Speakers:", SPEAKERS_AR)
-# --- Helper to pick model + speaker ---
-def get_model_and_speaker(lang, gender):
-    if lang == "ar":
-        tts_model = tts_ar
-        speakers = SPEAKERS_AR
-    else:
-        tts_model = tts_en
-        speakers = SPEAKERS_EN
-    # Pick speaker by gender
-    if speakers:
-        if gender == "male":
-            speaker = next((s for s in speakers if "male" in s), speakers[0])
-        else:
-            speaker = next((s for s in speakers if "female" in s), speakers[0])
-    else:
-        speaker = None
-    return tts_model, speaker
 @app.route("/tts", methods=["POST"])
 def tts_api():
@@ -55,43 +30,18 @@ def tts_api():
     if not text:
         return jsonify({"error": "Text is required"}), 400
-    # Pick a speaker
-    if SPEAKERS:
-        if gender == "male":
-            speaker = SPEAKERS[0]
-        else:
-            speaker = SPEAKERS[-1]
-    else:
-        speaker = None
-    # This is for clonning and I don't want it now
-    # voice_file = request.files.get("voice")
-    # if not voice_file:
-    #     return jsonify({"error": "Voice file is required"}), 400
-    # voice_path = f"/tmp/{uuid.uuid4()}.wav"
-    # voice_file.save(voice_path)
-    tts_model, speaker = get_model_and_speaker(language, gender)
     out_path = f"/tmp/{uuid.uuid4()}.wav"
-    # tts.tts_to_file(
-    #     text=text,
-    #     # speaker_wav=voice_path, this is for clonning but I don't want it now
-    #     speaker=speaker,
-    #     language=language,
-    #     file_path=out_path
-    # )
-    wav = tts_model.tts(
         text=text,
         speaker=speaker,
         language=language,
         speed=speed
     )
-    sf.write(out_path, wav, tts_model.synthesizer.output_sample_rate, subtype="PCM_16")
-    # os.remove(voice_path)
     @after_this_request
     def cleanup(response):
@@ -103,9 +53,5 @@ def tts_api():
     return send_file(out_path, mimetype="audio/wav")
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)

 app = Flask(__name__)
+# Use a multilingual VITS model compatible with TTS 0.22.x
+MODEL_MULTI = "tts_models/multilingual/multi-dataset/vits"
+tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
+SPEAKERS = [s.strip() for s in tts_multi.speakers]
+print("Speakers:", SPEAKERS)
+def pick_speaker(gender):
+    if not SPEAKERS:
+        return None
+    if gender == "male":
+        return next((s for s in SPEAKERS if "male" in s), SPEAKERS[0])
+    return next((s for s in SPEAKERS if "female" in s), SPEAKERS[0])
 @app.route("/tts", methods=["POST"])
 def tts_api():
     if not text:
         return jsonify({"error": "Text is required"}), 400
+    speaker = pick_speaker(gender)
     out_path = f"/tmp/{uuid.uuid4()}.wav"
+    wav = tts_multi.tts(
         text=text,
         speaker=speaker,
         language=language,
         speed=speed
     )
+    sf.write(out_path, wav, tts_multi.synthesizer.output_sample_rate, subtype="PCM_16")
     @after_this_request
     def cleanup(response):
     return send_file(out_path, mimetype="audio/wav")
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)