MaenGit commited on
Commit
ee0cc85
·
1 Parent(s): 8917a7a
Files changed (1) hide show
  1. server.py +36 -41
server.py CHANGED
@@ -2,15 +2,14 @@ from flask import Flask, request, send_file, jsonify, after_this_request
2
  from TTS.api import TTS
3
  import os
4
  import uuid
 
5
  import torch
6
- import soundfile as sf # Add this import
7
 
8
  app = Flask(__name__)
9
 
10
  MODEL_MULTI = "tts_models/multilingual/multi-dataset/xtts_v2"
11
  tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
12
 
13
- # Confirmed XTTS v2 preset speakers (58 total, including yours)
14
  SPEAKERS = {
15
  "en_male": "Baldur Sanjin",
16
  "en_female": "Gracie Wise",
@@ -18,25 +17,9 @@ SPEAKERS = {
18
  "ar_female": "Claribel Dervla"
19
  }
20
  print("EN/AR Speakers:", SPEAKERS)
21
- # Fixed print: No citations in code
22
- print("Available speakers count:", len(tts_multi.speakers) if hasattr(tts_multi, 'speakers') else "N/A")
23
 
24
- # Cache low-level components
25
- gpt_cond_latent_cache = {}
26
- speaker_embedding_cache = {}
27
-
28
- def load_speaker_embedding(speaker_name):
29
- """Load precomputed latents for XTTS preset speakers"""
30
- if speaker_name in gpt_cond_latent_cache:
31
- return gpt_cond_latent_cache[speaker_name], speaker_embedding_cache[speaker_name]
32
-
33
- # Safe access: XTTS speaker_manager has .speakers dict with (gpt_cond_latent, embedding) tuples
34
- speaker_data = tts_multi.synthesizer.speaker_manager.speakers[speaker_name]
35
- gpt_cond_latent, speaker_embedding = speaker_data.values() if isinstance(speaker_data, dict) else speaker_data
36
-
37
- gpt_cond_latent_cache[speaker_name] = gpt_cond_latent
38
- speaker_embedding_cache[speaker_name] = speaker_embedding
39
- return gpt_cond_latent, speaker_embedding[web:36][web:42]
40
 
41
  @app.route("/tts", methods=["POST"])
42
  def tts_api():
@@ -50,35 +33,47 @@ def tts_api():
50
 
51
  speaker_name = SPEAKERS.get(f"{language}_{gender}", "Baldur Sanjin")
52
 
53
- # Verify speaker exists (prevents FileNotFoundError)
54
- if speaker_name not in tts_multi.synthesizer.speaker_manager.speakers:
55
- return jsonify({"error": f"Speaker '{speaker_name}' not available. Available: {list(tts_multi.synthesizer.speaker_manager.speakers.keys())[:5]}..."}), 400[web:42]
56
-
57
  out_path = f"/tmp/{uuid.uuid4()}.wav"
58
 
59
- # Low-level TTS with cached latents
60
- gpt_cond_latent, speaker_embedding = load_speaker_embedding(speaker_name)
61
-
62
- wav = tts_multi.synthesizer.tts(
63
- text=text,
64
- gpt_cond_latent=gpt_cond_latent,
65
- speaker_embedding=speaker_embedding,
66
- language=language,
67
- temperature=0.7,
68
- speed=speed
69
- )
70
-
71
- sf.write(out_path, wav, 24000, subtype="PCM_16")
72
-
 
 
 
 
 
 
 
 
 
 
 
73
  @after_this_request
74
  def cleanup(response):
75
  try:
76
  os.remove(out_path)
77
- except Exception as e:
78
- print("Cleanup error:", e)
79
  return response
80
 
81
  return send_file(out_path, mimetype="audio/wav")
82
 
 
 
 
 
 
83
  if __name__ == "__main__":
84
- app.run(host="0.0.0.0", port=7860)
 
2
  from TTS.api import TTS
3
  import os
4
  import uuid
5
+ import soundfile as sf
6
  import torch
 
7
 
8
  app = Flask(__name__)
9
 
10
  MODEL_MULTI = "tts_models/multilingual/multi-dataset/xtts_v2"
11
  tts_multi = TTS(model_name=MODEL_MULTI, gpu=False)
12
 
 
13
  SPEAKERS = {
14
  "en_male": "Baldur Sanjin",
15
  "en_female": "Gracie Wise",
 
17
  "ar_female": "Claribel Dervla"
18
  }
19
  print("EN/AR Speakers:", SPEAKERS)
 
 
20
 
21
+ # Use high-level API only - no low-level synthesizer access
22
+ # tts.speakers lists all 58 presets
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  @app.route("/tts", methods=["POST"])
25
  def tts_api():
 
33
 
34
  speaker_name = SPEAKERS.get(f"{language}_{gender}", "Baldur Sanjin")
35
 
 
 
 
 
36
  out_path = f"/tmp/{uuid.uuid4()}.wav"
37
 
38
+ try:
39
+ # High-level tts_to_file with preset speaker - XTTS handles internals safely
40
+ tts_multi.tts_to_file(
41
+ text=text,
42
+ speaker=speaker_name, # Works for presets without files in recent TTS[web:26]
43
+ language=language,
44
+ file_path=out_path,
45
+ speed=speed,
46
+ split_sentences=True # Better for speed control
47
+ )
48
+ except Exception as e:
49
+ # Fallback: Provide dummy WAV for cloning mode (forces preset lookup)
50
+ dummy_wav = "/tmp/dummy_silent.wav"
51
+ if not os.path.exists(dummy_wav):
52
+ sf.write(dummy_wav, torch.zeros(24000 * 3), 24000) # 3s silence
53
+
54
+ tts_multi.tts_to_file(
55
+ text=text,
56
+ speaker_wav=dummy_wav, # Triggers embedding computation from preset name
57
+ language=language,
58
+ file_path=out_path,
59
+ speed=speed
60
+ )
61
+ os.remove(dummy_wav)
62
+
63
  @after_this_request
64
  def cleanup(response):
65
  try:
66
  os.remove(out_path)
67
+ except:
68
+ pass
69
  return response
70
 
71
  return send_file(out_path, mimetype="audio/wav")
72
 
73
+ @app.route("/speakers", methods=["GET"]) # Bonus: List speakers
74
+ def list_speakers():
75
+ speakers = getattr(tts_multi, 'speakers', [])
76
+ return jsonify({"speakers": speakers[:20], "total": len(speakers)})
77
+
78
  if __name__ == "__main__":
79
+ app.run(host="0.0.0.0", port=7860, debug=False)