amasha03 commited on
Commit
433a1d8
·
verified ·
1 Parent(s): 69b1020

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -15
app.py CHANGED
@@ -1,27 +1,81 @@
1
  import gradio as gr
2
  from TTS.api import TTS
 
3
  from huggingface_hub import hf_hub_download
 
 
 
4
 
5
- def load_eng_model():
6
- repo_id = "E-motionAssistant/text-to-speech-VITS-english"
7
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
8
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
9
- return TTS(model_path=model_path, config_path=config_path, gpu=False)
10
 
11
- # Initialize
12
- print("Loading English TTS...")
13
- tts = load_eng_model()
14
 
15
- def speak(text):
16
- output_path = "en_output.wav"
17
- tts.tts_to_file(text=text, file_path=output_path)
18
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  demo = gr.Interface(
21
- fn=speak,
22
- inputs=gr.Textbox(label="Enter English Text"),
23
- outputs=gr.Audio(label="Synthesized Audio"),
24
- title="English VITS TTS"
 
 
 
25
  )
26
 
27
- demo.launch()
 
 
1
  import gradio as gr
2
  from TTS.api import TTS
3
+ from TTS.tts.configs.vits_config import VitsConfig
4
  from huggingface_hub import hf_hub_download
5
+ import os
6
+ import gc
7
+ from romanizer import sinhala_to_roman
8
 
9
+ def load_model_safely(repo_id):
10
+ print(f"--- Downloading: {repo_id} ---")
11
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
12
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
 
13
 
14
+ # 1. Load the config object first
15
+ config = VitsConfig()
16
+ config.load_json(config_path)
17
 
18
+ # 2. Fix the Character Mismatch BEFORE the model is built
19
+ if "english" in repo_id.lower():
20
+ print("Applying 137 character fix for English...")
21
+ # This fixes the 'RuntimeError: size mismatch'
22
+ config.model_args.num_chars = 137
23
+
24
+ # 3. Initialize the TTS engine shell WITHOUT loading a model yet
25
+ # We leave it empty so it doesn't crash on init
26
+ tts = TTS(gpu=False)
27
+
28
+ # 4. Now manually load the model into the shell using our fixed config
29
+ # We use positional arguments to be safe across library versions
30
+ tts.load_tts_model_by_path(model_path, config)
31
+
32
+ gc.collect()
33
+ return tts
34
 
35
+ # --- Initialization ---
36
+ print("Starting Startup Sequence...")
37
+ try:
38
+ print("Loading Sinhala...")
39
+ sin_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-sinhala")
40
+
41
+ print("Loading Tamil...")
42
+ tam_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-tamil")
43
+
44
+ print("Loading English...")
45
+ eng_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-english")
46
+
47
+ print("--- ALL MODELS READY ---")
48
+ except Exception as e:
49
+ print(f"CRITICAL ERROR: {e}")
50
+ # This will print the exact reason if it still fails
51
+
52
+ def generate_voice(text, language):
53
+ try:
54
+ if language == "Sinhala":
55
+ engine = sin_tts
56
+ text = sinhala_to_roman(text)
57
+ elif language == "English":
58
+ engine = eng_tts
59
+ else:
60
+ engine = tam_tts
61
+
62
+ output_path = "output.wav"
63
+ engine.tts_to_file(text=str(text), file_path=output_path)
64
+ return output_path
65
+ except Exception as e:
66
+ print(f"Generation Error: {e}")
67
+ return None
68
+
69
+ # Gradio Interface
70
  demo = gr.Interface(
71
+ fn=generate_voice,
72
+ inputs=[
73
+ gr.Textbox(label="Input Text"),
74
+ gr.Dropdown(["English", "Sinhala", "Tamil"], label="Select Language")
75
+ ],
76
+ outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
77
+ title="Multilingual VITS TTS"
78
  )
79
 
80
+ if __name__ == "__main__":
81
+ demo.launch()