Oviya commited on
Commit
66a2b6d
·
1 Parent(s): 69a1d5d

update tts code

Browse files
Files changed (1) hide show
  1. pron.py +77 -23
pron.py CHANGED
@@ -16,7 +16,9 @@ from flask import Blueprint, request, jsonify, send_file, send_from_directory
16
  from difflib import SequenceMatcher
17
  from werkzeug.utils import secure_filename
18
  from pydub import AudioSegment
19
- from TTS.api import TTS
 
 
20
 
21
  # -------------------------------------------------------------------------
22
  # OPTIONAL MODULES
@@ -56,16 +58,7 @@ DEFAULT_REFERENCE = os.path.join(REF_DIR, "voice1.wav")
56
 
57
  pron_bp = Blueprint("pron", __name__)
58
 
59
- # -------------------------------------------------------------------------
60
- # LOAD TTS MODEL (TEACHER VOICE)
61
- # -------------------------------------------------------------------------
62
- print("Loading XTTS...")
63
- try:
64
- tts_model = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
65
- print("XTTS loaded ✔")
66
- except Exception:
67
- print("XTTS failed to load.")
68
- tts_model = None
69
 
70
  # -------------------------------------------------------------------------
71
  # HELPERS
@@ -209,21 +202,67 @@ def strong_word_match(word, heard, teacher_ph, student_ph):
209
  # TTS (Teacher Voice)
210
  # -------------------------------------------------------------------------
211
  def clone_voice(text, out_path, reference=DEFAULT_REFERENCE):
212
- if tts_model is None:
213
- raise RuntimeError("TTS model unavailable")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
- tts_model.tts_to_file(text=text, file_path=out_path, speaker_wav=reference, language="en")
216
  return out_path
217
 
218
 
219
  def clone_voice_bytes(text, reference=DEFAULT_REFERENCE):
220
- tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
221
- clone_voice(text, tmp, reference)
222
- with open(tmp, "rb") as f:
223
- data = f.read()
224
- os.remove(tmp)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  return data
226
 
 
227
  # -------------------------------------------------------------------------
228
  # WAVEFORM / SPECTROGRAM HELPERS
229
  # -------------------------------------------------------------------------
@@ -450,19 +489,34 @@ def generate_teacher_audio_stream():
450
  print(app_msg)
451
  return error_response("reference_save_failed", app_msg, 500)
452
 
453
- if tts_model is None:
454
- print("TTS model unavailable when trying to generate teacher audio stream.")
455
- return error_response("tts_unavailable", "TTS model unavailable", 503)
456
-
457
  try:
 
458
  data = clone_voice_bytes(word, reference=ref_path)
459
  bio = io.BytesIO(data)
460
  bio.seek(0)
461
  return send_file(bio, mimetype="audio/wav", as_attachment=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  except Exception as exc:
463
  print("generate_teacher_audio_stream error:", exc)
464
  return error_response("tts_generation_failed", f"TTS generation failed: {exc}", 500)
465
 
 
466
  # -------------------------------------------------------------------------
467
  # ROUTE: PRONUNCIATION CHECK
468
  # -------------------------------------------------------------------------
 
16
  from difflib import SequenceMatcher
17
  from werkzeug.utils import secure_filename
18
  from pydub import AudioSegment
19
+ from pathlib import Path
20
+ from ragg.tts import xtts_speak_to_file
21
+
22
 
23
  # -------------------------------------------------------------------------
24
  # OPTIONAL MODULES
 
58
 
59
  pron_bp = Blueprint("pron", __name__)
60
 
61
+
 
 
 
 
 
 
 
 
 
62
 
63
  # -------------------------------------------------------------------------
64
  # HELPERS
 
202
  # TTS (Teacher Voice)
203
  # -------------------------------------------------------------------------
204
  def clone_voice(text, out_path, reference=DEFAULT_REFERENCE):
205
+ """
206
+ Generate teacher audio for 'text' into out_path using the shared XTTS utility.
207
+ If 'reference' is a file path, use it as the speaker reference.
208
+ Otherwise, fall back to the default reference directory.
209
+ """
210
+ ref_path = Path(str(reference))
211
+
212
+ if ref_path.is_file():
213
+ # Use the given file as the speaker reference
214
+ xtts_speak_to_file(
215
+ text=text,
216
+ out_file=out_path,
217
+ reference_files=[ref_path],
218
+ language="en",
219
+ )
220
+ else:
221
+ # Fall back: use the directory of DEFAULT_REFERENCE as reference_dir
222
+ xtts_speak_to_file(
223
+ text=text,
224
+ out_file=out_path,
225
+ reference_dir=REF_DIR, # static/references
226
+ language="en",
227
+ )
228
 
 
229
  return out_path
230
 
231
 
232
  def clone_voice_bytes(text, reference=DEFAULT_REFERENCE):
233
+ """
234
+ Generate teacher audio for 'text' and return raw bytes (used by stream endpoint).
235
+ """
236
+ tmp_path = Path(tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name)
237
+
238
+ try:
239
+ ref_path = Path(str(reference))
240
+ if ref_path.is_file():
241
+ xtts_speak_to_file(
242
+ text=text,
243
+ out_file=tmp_path,
244
+ reference_files=[ref_path],
245
+ language="en",
246
+ )
247
+ else:
248
+ xtts_speak_to_file(
249
+ text=text,
250
+ out_file=tmp_path,
251
+ reference_dir=REF_DIR,
252
+ language="en",
253
+ )
254
+
255
+ with open(tmp_path, "rb") as f:
256
+ data = f.read()
257
+ finally:
258
+ try:
259
+ tmp_path.unlink()
260
+ except Exception:
261
+ pass
262
+
263
  return data
264
 
265
+
266
  # -------------------------------------------------------------------------
267
  # WAVEFORM / SPECTROGRAM HELPERS
268
  # -------------------------------------------------------------------------
 
489
  print(app_msg)
490
  return error_response("reference_save_failed", app_msg, 500)
491
 
 
 
 
 
492
  try:
493
+ # this will internally call xtts_speak_to_file via clone_voice_bytes
494
  data = clone_voice_bytes(word, reference=ref_path)
495
  bio = io.BytesIO(data)
496
  bio.seek(0)
497
  return send_file(bio, mimetype="audio/wav", as_attachment=False)
498
+
499
+ except FileNotFoundError as e:
500
+ # no reference audio available
501
+ msg = f"Reference audio not found: {e}"
502
+ print("generate_teacher_audio_stream FileNotFoundError:", e)
503
+ return error_response("reference_not_found", msg, 500)
504
+
505
+ except RuntimeError as e:
506
+ # XTTS model problem (e.g. cannot load on Hugging Face)
507
+ msg = (
508
+ "Teacher voice model is not available on this server. "
509
+ "You can still practise pronunciation, but teacher audio cannot be generated."
510
+ )
511
+ print("generate_teacher_audio_stream RuntimeError (XTTS):", e)
512
+ # 200 so frontend can show message without treating as fatal server error
513
+ return structured_feedback_error("tts_unavailable", msg, status=200)
514
+
515
  except Exception as exc:
516
  print("generate_teacher_audio_stream error:", exc)
517
  return error_response("tts_generation_failed", f"TTS generation failed: {exc}", 500)
518
 
519
+
520
  # -------------------------------------------------------------------------
521
  # ROUTE: PRONUNCIATION CHECK
522
  # -------------------------------------------------------------------------