amasha03 commited on
Commit
16efb96
·
verified ·
1 Parent(s): d25e80a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -68
app.py CHANGED
@@ -1,96 +1,52 @@
1
  import gradio as gr
2
  from TTS.api import TTS
3
- from TTS.tts.configs.vits_config import VitsConfig
4
  from huggingface_hub import hf_hub_download
5
  import os
6
- import gc
7
 
8
  # --- IMPORTING YOUR SEPARATE ROMANIZER ---
9
- try:
10
- from romanizer import sinhala_to_roman
11
- except ImportError:
12
- print("Warning: romanizer.py not found. Sinhala might not work correctly.")
13
- def sinhala_to_roman(text): return text
14
 
15
- def load_model_safely(repo_id):
16
- print(f"--- Downloading: {repo_id} ---")
17
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
18
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
19
-
20
- # Load the config
21
- config = VitsConfig()
22
- config.load_json(config_path)
23
-
24
- # THE ENGLISH "MAGIC NUMBER" FIX
25
- # This prevents the 'num_chars' mismatch error for the English model
26
- if "english" in repo_id.lower():
27
- print("Applying 137 character fix for English...")
28
- if hasattr(config, "model_args"):
29
- config.model_args.num_chars = 137
30
-
31
  # Initialize TTS
32
- tts = TTS(gpu=False)
33
-
34
- # Load model positionally to avoid keyword argument errors across versions
35
- tts.load_tts_model_by_path(model_path, config)
36
-
37
- # Clean up RAM after loading
38
- gc.collect()
39
- return tts
40
 
41
- # --- Global Model Initialization ---
42
- print("Initializing all models (this may take a moment)...")
43
- try:
44
- sin_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-sinhala")
45
- tam_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-tamil")
46
- eng_tts = load_model_safely("E-motionAssistant/text-to-speech-VITS-english")
47
- print("SUCCESS: All 3 models loaded and ready.")
48
- except Exception as e:
49
- print(f"CRITICAL STARTUP ERROR: {e}")
50
 
51
  def generate_voice(text, language):
52
- print(f"User Request -> Language: {language} | Text: {text}")
53
  try:
54
- # Select the correct engine and process text
55
  if language == "Sinhala":
56
- engine = sin_tts
57
  processed_text = sinhala_to_roman(text)
58
- print(f"Romanized Sinhala: {processed_text}")
59
- elif language == "English":
60
- engine = eng_tts
61
- processed_text = text
62
- else: # Tamil
63
- engine = tam_tts
64
  processed_text = text
 
65
 
66
- if engine is None:
67
- return None
68
-
69
- # Define output filename
70
- output_path = f"output_{language.lower()}.wav"
71
-
72
- # Generate Audio
73
- engine.tts_to_file(text=str(processed_text), file_path=output_path)
74
-
75
- if os.path.exists(output_path):
76
- return output_path
77
- return None
78
-
79
  except Exception as e:
80
- print(f"GENERATION ERROR: {e}")
81
  return None
82
 
83
- # --- Gradio Interface ---
84
  demo = gr.Interface(
85
  fn=generate_voice,
86
  inputs=[
87
- gr.Textbox(label="Input Text", placeholder="Type your text here..."),
88
- gr.Dropdown(["English", "Sinhala", "Tamil"], label="Select Language", value="English")
89
  ],
90
  outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
91
- title="Multilingual VITS TTS System",
92
- description="A high-quality Text-to-Speech system for English, Sinhala, and Tamil. Note: Sinhala uses an automated romanizer."
93
  )
94
 
95
- if __name__ == "__main__":
96
- demo.launch()
 
1
  import gradio as gr
2
  from TTS.api import TTS
 
3
  from huggingface_hub import hf_hub_download
4
  import os
 
5
 
6
  # --- IMPORTING YOUR SEPARATE ROMANIZER ---
7
+ from romanizer import sinhala_to_roman
 
 
 
 
8
 
9
+ def load_my_model(repo_id):
10
+ print(f"Downloading {repo_id}...")
11
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
12
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Initialize TTS
14
+ return TTS(model_path=model_path, config_path=config_path, gpu=False)
15
+
16
+ # Load Models
17
+ print("Initializing Sinhala...")
18
+ sin_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-sinhala")
 
 
 
19
 
20
+ print("Initializing Tamil...")
21
+ tam_tts = load_my_model("E-motionAssistant/text-to-speech-VITS-tamil")
 
 
 
 
 
 
 
22
 
23
  def generate_voice(text, language):
 
24
  try:
 
25
  if language == "Sinhala":
26
+ # Use your separate function
27
  processed_text = sinhala_to_roman(text)
28
+ print(f"Input: {text} -> Romanized: {processed_text}")
29
+ engine = sin_tts
30
+ else:
 
 
 
31
  processed_text = text
32
+ engine = tam_tts
33
 
34
+ output_path = "output.wav"
35
+ engine.tts_to_file(text=processed_text, file_path=output_path)
36
+ return output_path
 
 
 
 
 
 
 
 
 
 
37
  except Exception as e:
38
+ print(f"Error: {e}")
39
  return None
40
 
41
+ # Gradio Interface
42
  demo = gr.Interface(
43
  fn=generate_voice,
44
  inputs=[
45
+ gr.Textbox(label="Input Text"),
46
+ gr.Dropdown(["Sinhala", "Tamil"], label="Select Language")
47
  ],
48
  outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
49
+ title="Multilingual VITS TTS"
 
50
  )
51
 
52
+ demo.launch()