amasha03 commited on
Commit
f6174a1
·
verified ·
1 Parent(s): c66ca6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  from TTS.utils.synthesizer import Synthesizer
3
- from TTS.tts.configs.vits_config import VitsConfig
4
  from huggingface_hub import hf_hub_download
5
  import os
6
  import json
@@ -10,22 +9,27 @@ def load_eng_model():
10
  repo_id = "E-motionAssistant/text-to-speech-VITS-english"
11
  print(f"--- Downloading English Model ---")
12
 
 
13
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
14
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
15
 
16
- # 1. Load and Fix the Config Object
17
- config = VitsConfig()
18
- config.load_json(config_path)
19
 
20
- print("Force-setting num_chars to 137...")
21
- config.model_args.num_chars = 137
22
 
23
- # 2. Use Synthesizer directly instead of the TTS wrapper
24
- # This bypasses the logic that causes the 131 vs 137 mismatch
25
- print("Building Synthesizer...")
 
 
 
 
26
  syn = Synthesizer(
27
  tts_checkpoint=model_path,
28
- tts_config_path=config, # We pass the FIXED object directly
29
  use_cuda=False
30
  )
31
 
@@ -35,7 +39,6 @@ def load_eng_model():
35
  # --- Global Initialization ---
36
  print("Starting English TTS Startup...")
37
  try:
38
- # eng_tts is now a Synthesizer object
39
  eng_tts = load_eng_model()
40
  print("--- SUCCESS: ENGLISH MODEL LOADED ---")
41
  except Exception as e:
@@ -43,18 +46,14 @@ except Exception as e:
43
  eng_tts = None
44
 
45
  def generate_voice(text):
46
- if eng_tts is None:
47
  return None
48
  try:
49
- output_path = os.path.join(os.getcwd(), "en_output.wav")
50
-
51
- # Synthesizer uses 'tts' instead of 'tts_to_file'
52
- # It returns a list of audio samples
53
  wav = eng_tts.tts(text=str(text))
54
-
55
- # We save the wav samples to a file
56
  eng_tts.save_wav(wav, output_path)
57
-
58
  return output_path
59
  except Exception as e:
60
  print(f"Generation Error: {e}")
@@ -63,9 +62,9 @@ def generate_voice(text):
63
  # Gradio Interface
64
  demo = gr.Interface(
65
  fn=generate_voice,
66
- inputs=gr.Textbox(label="Input English Text"),
67
  outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
68
- title="English VITS TTS (Direct Synth)"
69
  )
70
 
71
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from TTS.utils.synthesizer import Synthesizer
 
3
  from huggingface_hub import hf_hub_download
4
  import os
5
  import json
 
9
  repo_id = "E-motionAssistant/text-to-speech-VITS-english"
10
  print(f"--- Downloading English Model ---")
11
 
12
+ # 1. Download original files
13
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
14
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
15
 
16
+ # 2. Read the config file and manually change the number to 137
17
+ with open(config_path, 'r') as f:
18
+ config_dict = json.load(f)
19
 
20
+ print("Fixing character count: 131 -> 137")
21
+ config_dict["model_args"]["num_chars"] = 137
22
 
23
+ # 3. Save this as a NEW file (a physical path the library will accept)
24
+ fixed_config_path = os.path.join(os.getcwd(), "fixed_config.json")
25
+ with open(fixed_config_path, 'w') as f:
26
+ json.dump(config_dict, f)
27
+
28
+ # 4. Initialize the Synthesizer using the PATH to our new file
29
+ print("Building Synthesizer with fixed_config.json path...")
30
  syn = Synthesizer(
31
  tts_checkpoint=model_path,
32
+ tts_config_path=fixed_config_path, # This is now a STRING path
33
  use_cuda=False
34
  )
35
 
 
39
  # --- Global Initialization ---
40
  print("Starting English TTS Startup...")
41
  try:
 
42
  eng_tts = load_eng_model()
43
  print("--- SUCCESS: ENGLISH MODEL LOADED ---")
44
  except Exception as e:
 
46
  eng_tts = None
47
 
48
  def generate_voice(text):
49
+ if not eng_tts:
50
  return None
51
  try:
52
+ output_path = os.path.join(os.getcwd(), "output.wav")
53
+ # Generate the audio samples
 
 
54
  wav = eng_tts.tts(text=str(text))
55
+ # Save samples to a .wav file
 
56
  eng_tts.save_wav(wav, output_path)
 
57
  return output_path
58
  except Exception as e:
59
  print(f"Generation Error: {e}")
 
62
  # Gradio Interface
63
  demo = gr.Interface(
64
  fn=generate_voice,
65
+ inputs=gr.Textbox(label="Input English Text", placeholder="Hello world!"),
66
  outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
67
+ title="English VITS TTS"
68
  )
69
 
70
  if __name__ == "__main__":