amasha03 commited on
Commit
0d91b65
·
verified ·
1 Parent(s): b75b6a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -30
app.py CHANGED
@@ -1,59 +1,59 @@
1
  import gradio as gr
2
  from TTS.api import TTS
 
3
  from huggingface_hub import hf_hub_download
4
  import os
5
  import json
6
- from TTS.tts.configs.vits_config import VitsConfig
7
-
8
- # --- IMPORTING YOUR SEPARATE ROMANIZER ---
9
- from romanizer import sinhala_to_roman
10
 
11
  def load_my_model(repo_id):
12
  print(f"Downloading {repo_id}...")
13
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
14
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
15
 
16
- # Load the config into a Python object
17
  config = VitsConfig()
18
  config.load_json(config_path)
19
 
20
- # FORCE the character count to 137 if it's the English model
21
  if "english" in repo_id.lower():
22
- print(f"--- FORCING 137 CHARACTERS FOR ENGLISH ---")
23
- config.model_args.num_chars = 137
 
 
 
 
24
 
25
- # Initialize the TTS using the manual config object instead of the file path
26
- return TTS(items_per_group=1).load_tts_model_by_path(
27
- config,
28
- model_path=model_path,
29
- gpu=False
30
  )
31
 
32
- # Initialize TTS
33
- return TTS(model_path=model_path, config_path=config_path, gpu=False)
34
 
35
- # Load Models
36
- print("Initializing English...")
37
- eng_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-english")
38
-
39
- print("Initializing Sinhala...")
40
- sin_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-sinhala")
41
-
42
- print("Initializing Tamil...")
43
- tam_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-tamil")
44
 
45
  def generate_voice(text, language):
46
  try:
47
  if language == "English":
48
  engine = eng_tts
 
49
  elif language == "Sinhala":
50
- # Use your separate function
51
- processed_text = sinhala_to_roman(text)
52
- print(f"Input: {text} -> Romanized: {processed_text}")
53
  engine = sin_tts
 
 
54
  else:
55
- processed_text = text
56
  engine = tam_tts
 
57
 
58
  output_path = "output.wav"
59
  engine.tts_to_file(text=processed_text, file_path=output_path)
@@ -67,10 +67,10 @@ demo = gr.Interface(
67
  fn=generate_voice,
68
  inputs=[
69
  gr.Textbox(label="Input Text"),
70
- gr.Dropdown(["Sinhala", "Tamil"], label="Select Language")
71
  ],
72
  outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
73
  title="Multilingual VITS TTS"
74
  )
75
 
76
- demo.launch()
 
1
  import gradio as gr
2
  from TTS.api import TTS
3
+ from TTS.tts.configs.vits_config import VitsConfig
4
  from huggingface_hub import hf_hub_download
5
  import os
6
  import json
7
+ import gc
8
+ from romanizer import sinhala_to_roman
 
 
9
 
10
  def load_my_model(repo_id):
11
  print(f"Downloading {repo_id}...")
12
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
13
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
14
 
15
+ # Load and fix the config
16
  config = VitsConfig()
17
  config.load_json(config_path)
18
 
 
19
  if "english" in repo_id.lower():
20
+ print(f"--- SUCCESS: FORCING 137 CHARACTERS FOR ENGLISH ---")
21
+ if hasattr(config, "model_args"):
22
+ config.model_args.num_chars = 137
23
+
24
+ # Initialize TTS without the 'items_per_group' argument
25
+ tts = TTS(gpu=False)
26
 
27
+ # Load the model using the config object we just fixed
28
+ tts.load_tts_model_by_path(
29
+ checkpoint_path=model_path,
30
+ config_path=config
 
31
  )
32
 
33
+ gc.collect()
34
+ return tts
35
 
36
+ # Load All Models
37
+ print("Initializing Models...")
38
+ try:
39
+ eng_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-english")
40
+ sin_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-sinhala")
41
+ tam_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-tamil")
42
+ except Exception as e:
43
+ print(f"CRITICAL ERROR DURING LOADING: {e}")
 
44
 
45
  def generate_voice(text, language):
46
  try:
47
  if language == "English":
48
  engine = eng_tts
49
+ processed_text = text
50
  elif language == "Sinhala":
 
 
 
51
  engine = sin_tts
52
+ # Using your romanizer logic
53
+ processed_text = sinhala_to_roman(text)
54
  else:
 
55
  engine = tam_tts
56
+ processed_text = text
57
 
58
  output_path = "output.wav"
59
  engine.tts_to_file(text=processed_text, file_path=output_path)
 
67
  fn=generate_voice,
68
  inputs=[
69
  gr.Textbox(label="Input Text"),
70
+ gr.Dropdown(["English", "Sinhala", "Tamil"], label="Select Language")
71
  ],
72
  outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
73
  title="Multilingual VITS TTS"
74
  )
75
 
76
+ demo.launch()