Spaces:

MaenMN
/

tts-server

Sleeping

App Files Files Community

MaenGit commited on Feb 4

Commit

9c690cc

1 Parent(s): 5085f02

use vits model instead of two models

Browse files

Files changed (2) hide show

server.py +14 -68
server1.py +111 -0

server.py CHANGED Viewed

@@ -7,44 +7,19 @@ import soundfile as sf
 app = Flask(__name__)
-# Load XTTS-v2
-# MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
-# this is for clonning but I don't want it now
-# --- Load models ---
-# English/French/Portuguese model
-MODEL_EN = "tts_models/multilingual/multi-dataset/your_tts"
-tts_en = TTS(model_name=MODEL_EN, gpu=False)
-SPEAKERS_EN = [s.strip() for s in tts_en.speakers]
-# Arabic model
-MODEL_AR = "tts_models/multilingual/arabic/arabic_tts"
-tts_ar = TTS(model_name=MODEL_AR, gpu=False)
-SPEAKERS_AR = [s.strip() for s in tts_ar.speakers]
-print("English Speakers:", SPEAKERS_EN)
-print("Arabic Speakers:", SPEAKERS_AR)
-# --- Helper to pick model + speaker ---
-def get_model_and_speaker(lang, gender):
-    if lang == "ar":
-        tts_model = tts_ar
-        speakers = SPEAKERS_AR
-    else:
-        tts_model = tts_en
-        speakers = SPEAKERS_EN
-    # Pick speaker by gender
-    if speakers:
-        if gender == "male":
-            speaker = next((s for s in speakers if "male" in s), speakers[0])
-        else:
-            speaker = next((s for s in speakers if "female" in s), speakers[0])
-    else:
-        speaker = None
-    return tts_model, speaker
 @app.route("/tts", methods=["POST"])
 def tts_api():
@@ -55,43 +30,18 @@ def tts_api():
     if not text:
         return jsonify({"error": "Text is required"}), 400
-    # Pick a speaker
-    if SPEAKERS:
-        if gender == "male":
-            speaker = SPEAKERS[0]
-        else:
-            speaker = SPEAKERS[-1]
-    else:
-        speaker = None
-    # This is for clonning and I don't want it now
-    # voice_file = request.files.get("voice")
-    # if not voice_file:
-    #     return jsonify({"error": "Voice file is required"}), 400
-    # voice_path = f"/tmp/{uuid.uuid4()}.wav"
-    # voice_file.save(voice_path)
-    tts_model, speaker = get_model_and_speaker(language, gender)
     out_path = f"/tmp/{uuid.uuid4()}.wav"
-    # tts.tts_to_file(
-    #     text=text,
-    #     # speaker_wav=voice_path, this is for clonning but I don't want it now
-    #     speaker=speaker,
-    #     language=language,
-    #     file_path=out_path
-    # )
-    wav = tts_model.tts(
         text=text,
         speaker=speaker,
         language=language,
         speed=speed
     )
-    sf.write(out_path, wav, tts_model.synthesizer.output_sample_rate, subtype="PCM_16")
-    # os.remove(voice_path)
     @after_this_request
     def cleanup(response):
@@ -103,9 +53,5 @@ def tts_api():
     return send_file(out_path, mimetype="audio/wav")
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)

 app = Flask(__name__)
+# Use a multilingual VITS model compatible with TTS 0.22.x
+MODEL_MULTI = "tts_models/multilingual/multi-dataset/vits"
+tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
+SPEAKERS = [s.strip() for s in tts_multi.speakers]
+print("Speakers:", SPEAKERS)
+def pick_speaker(gender):
+    if not SPEAKERS:
+        return None
+    if gender == "male":
+        return next((s for s in SPEAKERS if "male" in s), SPEAKERS[0])
+    return next((s for s in SPEAKERS if "female" in s), SPEAKERS[0])
 @app.route("/tts", methods=["POST"])
 def tts_api():
     if not text:
         return jsonify({"error": "Text is required"}), 400
+    speaker = pick_speaker(gender)
     out_path = f"/tmp/{uuid.uuid4()}.wav"
+    wav = tts_multi.tts(
         text=text,
         speaker=speaker,
         language=language,
         speed=speed
     )
+    sf.write(out_path, wav, tts_multi.synthesizer.output_sample_rate, subtype="PCM_16")
     @after_this_request
     def cleanup(response):
     return send_file(out_path, mimetype="audio/wav")
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860)

server1.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from flask import Flask, request, send_file, jsonify, after_this_request
+from TTS.api import TTS
+import tempfile
+import os
+import uuid
+import soundfile as sf
+app = Flask(__name__)
+# Load XTTS-v2
+# MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
+# this is for clonning but I don't want it now
+# --- Load models ---
+# English/French/Portuguese model
+MODEL_EN = "tts_models/multilingual/multi-dataset/your_tts"
+tts_en = TTS(model_name=MODEL_EN, gpu=False)
+SPEAKERS_EN = [s.strip() for s in tts_en.speakers]
+# Arabic model
+MODEL_AR = "tts_models/multilingual/arabic/arabic_tts"
+tts_ar = TTS(model_name=MODEL_AR, gpu=False)
+SPEAKERS_AR = [s.strip() for s in tts_ar.speakers]
+print("English Speakers:", SPEAKERS_EN)
+print("Arabic Speakers:", SPEAKERS_AR)
+# --- Helper to pick model + speaker ---
+def get_model_and_speaker(lang, gender):
+    if lang == "ar":
+        tts_model = tts_ar
+        speakers = SPEAKERS_AR
+    else:
+        tts_model = tts_en
+        speakers = SPEAKERS_EN
+    # Pick speaker by gender
+    if speakers:
+        if gender == "male":
+            speaker = next((s for s in speakers if "male" in s), speakers[0])
+        else:
+            speaker = next((s for s in speakers if "female" in s), speakers[0])
+    else:
+        speaker = None
+    return tts_model, speaker
+@app.route("/tts", methods=["POST"])
+def tts_api():
+    text = request.form.get("text", "")
+    language = request.form.get("lang", "en")
+    gender = request.form.get("voice", "female")
+    speed = float(request.form.get("speed", 1.0))
+    if not text:
+        return jsonify({"error": "Text is required"}), 400
+    # Pick a speaker
+    if SPEAKERS:
+        if gender == "male":
+            speaker = SPEAKERS[0]
+        else:
+            speaker = SPEAKERS[-1]
+    else:
+        speaker = None
+    # This is for clonning and I don't want it now
+    # voice_file = request.files.get("voice")
+    # if not voice_file:
+    #     return jsonify({"error": "Voice file is required"}), 400
+    # voice_path = f"/tmp/{uuid.uuid4()}.wav"
+    # voice_file.save(voice_path)
+    tts_model, speaker = get_model_and_speaker(language, gender)
+    out_path = f"/tmp/{uuid.uuid4()}.wav"
+    # tts.tts_to_file(
+    #     text=text,
+    #     # speaker_wav=voice_path, this is for clonning but I don't want it now
+    #     speaker=speaker,
+    #     language=language,
+    #     file_path=out_path
+    # )
+    wav = tts_model.tts(
+        text=text,
+        speaker=speaker,
+        language=language,
+        speed=speed
+    )
+    sf.write(out_path, wav, tts_model.synthesizer.output_sample_rate, subtype="PCM_16")
+    # os.remove(voice_path)
+    @after_this_request
+    def cleanup(response):
+        try:
+            os.remove(out_path)
+        except Exception as e:
+            print("Cleanup error:", e)
+        return response
+    return send_file(out_path, mimetype="audio/wav")
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)