amasha03 commited on
Commit
253b843
·
verified ·
1 Parent(s): f6174a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -22
app.py CHANGED
@@ -1,46 +1,52 @@
1
  import gradio as gr
2
  from TTS.utils.synthesizer import Synthesizer
 
 
3
  from huggingface_hub import hf_hub_download
 
4
  import os
5
  import json
6
- import gc
7
 
8
  def load_eng_model():
9
  repo_id = "E-motionAssistant/text-to-speech-VITS-english"
10
  print(f"--- Downloading English Model ---")
11
 
12
- # 1. Download original files
13
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
14
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
15
 
16
- # 2. Read the config file and manually change the number to 137
17
- with open(config_path, 'r') as f:
18
- config_dict = json.load(f)
 
19
 
20
- print("Fixing character count: 131 -> 137")
21
- config_dict["model_args"]["num_chars"] = 137
 
22
 
23
- # 3. Save this as a NEW file (a physical path the library will accept)
24
- fixed_config_path = os.path.join(os.getcwd(), "fixed_config.json")
25
- with open(fixed_config_path, 'w') as f:
26
- json.dump(config_dict, f)
 
 
 
27
 
28
- # 4. Initialize the Synthesizer using the PATH to our new file
29
- print("Building Synthesizer with fixed_config.json path...")
30
  syn = Synthesizer(
31
  tts_checkpoint=model_path,
32
- tts_config_path=fixed_config_path, # This is now a STRING path
33
  use_cuda=False
34
  )
 
 
35
 
36
- gc.collect()
37
  return syn
38
 
39
- # --- Global Initialization ---
40
  print("Starting English TTS Startup...")
41
  try:
42
  eng_tts = load_eng_model()
43
- print("--- SUCCESS: ENGLISH MODEL LOADED ---")
44
  except Exception as e:
45
  print(f"CRITICAL ERROR: {e}")
46
  eng_tts = None
@@ -50,9 +56,7 @@ def generate_voice(text):
50
  return None
51
  try:
52
  output_path = os.path.join(os.getcwd(), "output.wav")
53
- # Generate the audio samples
54
  wav = eng_tts.tts(text=str(text))
55
- # Save samples to a .wav file
56
  eng_tts.save_wav(wav, output_path)
57
  return output_path
58
  except Exception as e:
@@ -62,9 +66,9 @@ def generate_voice(text):
62
  # Gradio Interface
63
  demo = gr.Interface(
64
  fn=generate_voice,
65
- inputs=gr.Textbox(label="Input English Text", placeholder="Hello world!"),
66
- outputs=gr.Audio(label="Synthesized Speech", type="filepath"),
67
- title="English VITS TTS"
68
  )
69
 
70
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from TTS.utils.synthesizer import Synthesizer
3
+ from TTS.tts.models.vits import Vits
4
+ from TTS.tts.configs.vits_config import VitsConfig
5
  from huggingface_hub import hf_hub_download
6
+ import torch
7
  import os
8
  import json
 
9
 
10
  def load_eng_model():
11
  repo_id = "E-motionAssistant/text-to-speech-VITS-english"
12
  print(f"--- Downloading English Model ---")
13
 
 
14
  model_path = hf_hub_download(repo_id=repo_id, filename="best_model.pth")
15
  config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
16
 
17
+ # 1. Load and fix config
18
+ config = VitsConfig()
19
+ config.load_json(config_path)
20
+ config.model_args.num_chars = 137 # The magic number
21
 
22
+ # 2. Build the Model Architecture Manually
23
+ print("Building model architecture...")
24
+ model = Vits.init_from_config(config)
25
 
26
+ # 3. Load the checkpoint weights
27
+ print("Loading weights (using non-strict mode to bypass mismatch)...")
28
+ checkpoint = torch.load(model_path, map_location="cpu")
29
+
30
+ # This is the line that solves the "Size Mismatch" crash
31
+ model.load_state_dict(checkpoint["model"], strict=False)
32
+ model.eval()
33
 
34
+ # 4. Wrap it in a Synthesizer
 
35
  syn = Synthesizer(
36
  tts_checkpoint=model_path,
37
+ tts_config_path=config_path,
38
  use_cuda=False
39
  )
40
+ # Overwrite the internal model with our forced one
41
+ syn.tts_model = model
42
 
 
43
  return syn
44
 
45
+ # --- Initialization ---
46
  print("Starting English TTS Startup...")
47
  try:
48
  eng_tts = load_eng_model()
49
+ print("--- SUCCESS: SYSTEM READY ---")
50
  except Exception as e:
51
  print(f"CRITICAL ERROR: {e}")
52
  eng_tts = None
 
56
  return None
57
  try:
58
  output_path = os.path.join(os.getcwd(), "output.wav")
 
59
  wav = eng_tts.tts(text=str(text))
 
60
  eng_tts.save_wav(wav, output_path)
61
  return output_path
62
  except Exception as e:
 
66
  # Gradio Interface
67
  demo = gr.Interface(
68
  fn=generate_voice,
69
+ inputs=gr.Textbox(label="Input English Text"),
70
+ outputs=gr.Audio(label="Audio Output", type="filepath"),
71
+ title="English VITS TTS (Forced Load)"
72
  )
73
 
74
  if __name__ == "__main__":