Mynameisju commited on
Commit
f72ab49
·
verified ·
1 Parent(s): 4194526

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -20
app.py CHANGED
@@ -1,11 +1,11 @@
1
  import torch
2
  torch.manual_seed(160923)
3
-
4
  import gradio as gr
5
  from huggingface_hub import hf_hub_download
6
  from InferenceInterfaces.ControllableInterface import ControllableInterface
7
  from Utility.utils import float2pcm, load_json_from_path
8
-
9
  import matplotlib.pyplot as plt
10
  import librosa
11
  import librosa.display
@@ -13,7 +13,7 @@ import numpy as np
13
  import io
14
  from PIL import Image
15
  import threading
16
-
17
  def generate_spectrogram_image(wav, sr):
18
  fig, ax = plt.subplots(figsize=(4, 1.5))
19
  D = librosa.amplitude_to_db(librosa.stft(wav, n_fft=512), ref=np.max)
@@ -24,9 +24,9 @@ def generate_spectrogram_image(wav, sr):
24
  plt.close(fig)
25
  buf.seek(0)
26
  return Image.open(buf)
27
-
28
  class TTSWebUI:
29
-
30
  def __init__(self,
31
  gpu_id="cpu",
32
  title="Controllable Text-to-Speech for over 7000 Languages",
@@ -35,11 +35,11 @@ class TTSWebUI:
35
  vocoder_model_path=None,
36
  embedding_gan_path=None,
37
  available_artificial_voices=10):
38
-
39
  path_to_iso_list = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="iso_to_fullname.json")
40
  iso_to_name = load_json_from_path(path_to_iso_list)
41
  self.text_selection = [f"{iso_to_name[iso]} ({iso})" for iso in iso_to_name]
42
-
43
  self.controllable_ui = ControllableInterface(
44
  gpu_id=gpu_id,
45
  available_artificial_voices=available_artificial_voices,
@@ -47,11 +47,11 @@ class TTSWebUI:
47
  vocoder_model_path=vocoder_model_path,
48
  embedding_gan_path=embedding_gan_path
49
  )
50
-
51
  self.title = title
52
  self.article = article
53
  self.available_artificial_voices = available_artificial_voices
54
-
55
  def read(self,
56
  prompt,
57
  language,
@@ -60,12 +60,13 @@ class TTSWebUI:
60
  voice_seed,
61
  emb1,
62
  reference_audio):
63
-
64
  lang_code = language[-4:-1]
65
  result = [None]
66
-
67
  def run_tts():
68
  try:
 
69
  result[0] = self.controllable_ui.read(
70
  prompt,
71
  reference_audio,
@@ -80,23 +81,32 @@ class TTSWebUI:
80
  -24.0
81
  )
82
  except Exception as e:
 
83
  result[0] = e
84
-
85
  thread = threading.Thread(target=run_tts)
86
  thread.start()
87
- thread.join(timeout=20)
88
-
89
  if thread.is_alive():
 
90
  return None, generate_spectrogram_image(np.zeros(16000), 16000)
91
-
92
  if isinstance(result[0], Exception):
 
93
  raise result[0]
94
-
 
 
 
 
95
  sr, wav, _ = result[0]
96
-
 
 
97
  fig = generate_spectrogram_image(wav, sr)
98
  return (sr, float2pcm(wav)), fig
99
-
100
  def launch(self):
101
  gr.Interface(
102
  fn=self.read,
@@ -118,7 +128,8 @@ class TTSWebUI:
118
  description=self.article,
119
  theme=gr.themes.Ocean(primary_hue="amber", secondary_hue="orange")
120
  ).launch()
121
-
122
  if __name__ == '__main__':
123
  app = TTSWebUI(gpu_id="cpu")
124
- app.launch()
 
 
1
  import torch
2
  torch.manual_seed(160923)
3
+
4
  import gradio as gr
5
  from huggingface_hub import hf_hub_download
6
  from InferenceInterfaces.ControllableInterface import ControllableInterface
7
  from Utility.utils import float2pcm, load_json_from_path
8
+
9
  import matplotlib.pyplot as plt
10
  import librosa
11
  import librosa.display
 
13
  import io
14
  from PIL import Image
15
  import threading
16
+
17
  def generate_spectrogram_image(wav, sr):
18
  fig, ax = plt.subplots(figsize=(4, 1.5))
19
  D = librosa.amplitude_to_db(librosa.stft(wav, n_fft=512), ref=np.max)
 
24
  plt.close(fig)
25
  buf.seek(0)
26
  return Image.open(buf)
27
+
28
  class TTSWebUI:
29
+
30
  def __init__(self,
31
  gpu_id="cpu",
32
  title="Controllable Text-to-Speech for over 7000 Languages",
 
35
  vocoder_model_path=None,
36
  embedding_gan_path=None,
37
  available_artificial_voices=10):
38
+
39
  path_to_iso_list = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="iso_to_fullname.json")
40
  iso_to_name = load_json_from_path(path_to_iso_list)
41
  self.text_selection = [f"{iso_to_name[iso]} ({iso})" for iso in iso_to_name]
42
+
43
  self.controllable_ui = ControllableInterface(
44
  gpu_id=gpu_id,
45
  available_artificial_voices=available_artificial_voices,
 
47
  vocoder_model_path=vocoder_model_path,
48
  embedding_gan_path=embedding_gan_path
49
  )
50
+
51
  self.title = title
52
  self.article = article
53
  self.available_artificial_voices = available_artificial_voices
54
+
55
  def read(self,
56
  prompt,
57
  language,
 
60
  voice_seed,
61
  emb1,
62
  reference_audio):
63
+
64
  lang_code = language[-4:-1]
65
  result = [None]
66
+
67
  def run_tts():
68
  try:
69
+ print("[INFO] Running TTS with prompt:", prompt)
70
  result[0] = self.controllable_ui.read(
71
  prompt,
72
  reference_audio,
 
81
  -24.0
82
  )
83
  except Exception as e:
84
+ print("[ERROR] Exception during TTS:", e)
85
  result[0] = e
86
+
87
  thread = threading.Thread(target=run_tts)
88
  thread.start()
89
+ thread.join() # ❗ Đã bỏ timeout để không bị cắt sớm trên CPU chậm
90
+
91
  if thread.is_alive():
92
+ print("[WARNING] TTS thread still alive after join → Timeout logic (shouldn't happen now)")
93
  return None, generate_spectrogram_image(np.zeros(16000), 16000)
94
+
95
  if isinstance(result[0], Exception):
96
+ print("[ERROR] TTS returned exception object:", result[0])
97
  raise result[0]
98
+
99
+ if result[0] is None:
100
+ print("[ERROR] TTS returned None — possible silent failure")
101
+ return None, generate_spectrogram_image(np.zeros(16000), 16000)
102
+
103
  sr, wav, _ = result[0]
104
+
105
+ print("[INFO] TTS success — sample rate:", sr, " | waveform shape:", wav.shape)
106
+
107
  fig = generate_spectrogram_image(wav, sr)
108
  return (sr, float2pcm(wav)), fig
109
+
110
  def launch(self):
111
  gr.Interface(
112
  fn=self.read,
 
128
  description=self.article,
129
  theme=gr.themes.Ocean(primary_hue="amber", secondary_hue="orange")
130
  ).launch()
131
+
132
  if __name__ == '__main__':
133
  app = TTSWebUI(gpu_id="cpu")
134
+ app.launch()
135
+