amasha03 commited on
Commit
7ee5ee5
·
verified ·
1 Parent(s): d7143c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -34
app.py CHANGED
@@ -3,12 +3,27 @@ from TTS.api import TTS
3
  from TTS.tts.configs.vits_config import VitsConfig
4
  from huggingface_hub import hf_hub_download
5
  import os
6
- import json
7
- import gc
8
  from romanizer import sinhala_to_roman
9
 
10
- def load_my_model(repo_id):
11
- print(f"Downloading {repo_id}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
13
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
14
 
@@ -16,57 +31,59 @@ def load_my_model(repo_id):
16
  config = VitsConfig()
17
  config.load_json(config_path)
18
 
19
- if "english" in repo_id.lower():
20
- print(f"--- SUCCESS: FORCING 137 CHARACTERS FOR ENGLISH ---")
 
21
  if hasattr(config, "model_args"):
22
  config.model_args.num_chars = 137
23
 
24
- # Initialize TTS shell
25
  tts = TTS(gpu=False)
26
 
27
- # Use positional arguments (no keywords) to avoid 'unexpected argument' errors
28
  tts.load_tts_model_by_path(model_path, config)
29
 
30
- gc.collect()
31
  return tts
32
 
33
- # --- Loading Models ---
34
- print("Initializing Models...")
35
- try:
36
- eng_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-english")
37
- sin_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-sinhala")
38
- tam_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-tamil")
39
- except Exception as e:
40
- print(f"CRITICAL ERROR DURING LOADING: {e}")
41
-
42
  def generate_voice(text, language):
 
43
  try:
44
- if language == "English":
45
- engine = eng_tts
46
- processed_text = text
47
- elif language == "Sinhala":
48
- engine = sin_tts
49
- # Your separate romanizer file logic
 
 
 
 
 
 
50
  processed_text = sinhala_to_roman(text)
51
- else:
52
- engine = tam_tts
53
- processed_text = text
54
 
55
- output_path = "output.wav"
56
- engine.tts_to_file(text=processed_text, file_path=output_path)
 
 
57
  return output_path
 
58
  except Exception as e:
59
- return f"Error: {e}"
 
60
 
61
  # Gradio Interface
62
  demo = gr.Interface(
63
  fn=generate_voice,
64
  inputs=[
65
- gr.Textbox(label="Input Text"),
66
- gr.Dropdown(["English", "Sinhala", "Tamil"], label="Select Language")
67
  ],
68
- outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
69
- title="Multilingual VITS TTS"
 
70
  )
71
 
72
  if __name__ == "__main__":
 
3
  from TTS.tts.configs.vits_config import VitsConfig
4
  from huggingface_hub import hf_hub_download
5
  import os
6
+ import gc
 
7
  from romanizer import sinhala_to_roman
8
 
9
+ # Dictionary to hold our loaded models to avoid reloading every time
10
+ models = {
11
+ "English": None,
12
+ "Sinhala": None,
13
+ "Tamil": None
14
+ }
15
+
16
+ def load_my_model(language):
17
+ # Mapping languages to their Hugging Face repos
18
+ repos = {
19
+ "English": "E-motionAssistant/text-to-speech-VITS-english",
20
+ "Sinhala": "E-motionAssistant/text-to-speech-VITS-sinhala",
21
+ "Tamil": "E-motionAssistant/text-to-speech-VITS-tamil"
22
+ }
23
+
24
+ repo_id = repos[language]
25
+
26
+ print(f"--- Loading {language} Model ---")
27
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
28
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
29
 
 
31
  config = VitsConfig()
32
  config.load_json(config_path)
33
 
34
+ # The English Character Fix (131 -> 137)
35
+ if language == "English":
36
+ print("Applying 137 character fix for English...")
37
  if hasattr(config, "model_args"):
38
  config.model_args.num_chars = 137
39
 
40
+ # Initialize the TTS engine
41
  tts = TTS(gpu=False)
42
 
43
+ # Load using positional arguments for maximum compatibility
44
  tts.load_tts_model_by_path(model_path, config)
45
 
46
+ gc.collect() # Clean up RAM
47
  return tts
48
 
 
 
 
 
 
 
 
 
 
49
  def generate_voice(text, language):
50
+ global models
51
  try:
52
+ # Check if the model is already loaded, if not, load it now
53
+ if models[language] is None:
54
+ # Optional: Clear other models from RAM to stay under 16GB limit
55
+ # models = {k: None for k in models}
56
+ # gc.collect()
57
+ models[language] = load_my_model(language)
58
+
59
+ engine = models[language]
60
+ processed_text = text
61
+
62
+ # Apply Romanization logic for Sinhala
63
+ if language == "Sinhala":
64
  processed_text = sinhala_to_roman(text)
65
+ print(f"Sinhala Romanized: {processed_text}")
 
 
66
 
67
+ # Generate the audio file
68
+ output_path = f"output_{language.lower()}.wav"
69
+ engine.tts_to_file(text=str(processed_text), file_path=output_path)
70
+
71
  return output_path
72
+
73
  except Exception as e:
74
+ print(f"Error during {language} generation: {e}")
75
+ return None
76
 
77
  # Gradio Interface
78
  demo = gr.Interface(
79
  fn=generate_voice,
80
  inputs=[
81
+ gr.Textbox(label="Input Text", placeholder="Type your message here..."),
82
+ gr.Dropdown(["English", "Sinhala", "Tamil"], label="Select Language", value="English")
83
  ],
84
+ outputs=gr.Audio(label="Generated Speech", type="filepath"),
85
+ title="Multilingual VITS TTS System",
86
+ description="A lightweight Text-to-Speech system for English, Sinhala, and Tamil."
87
  )
88
 
89
  if __name__ == "__main__":