MassivelyMultilingualTTS

Build error

App Files Files Community

Mynameisju commited on Jul 2, 2025

Commit

f72ab49

verified ·

1 Parent(s): 4194526

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -20

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import torch
 torch.manual_seed(160923)
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from InferenceInterfaces.ControllableInterface import ControllableInterface
 from Utility.utils import float2pcm, load_json_from_path
 import matplotlib.pyplot as plt
 import librosa
 import librosa.display
@@ -13,7 +13,7 @@ import numpy as np
 import io
 from PIL import Image
 import threading
 def generate_spectrogram_image(wav, sr):
     fig, ax = plt.subplots(figsize=(4, 1.5))
     D = librosa.amplitude_to_db(librosa.stft(wav, n_fft=512), ref=np.max)
@@ -24,9 +24,9 @@ def generate_spectrogram_image(wav, sr):
     plt.close(fig)
     buf.seek(0)
     return Image.open(buf)
 class TTSWebUI:
     def __init__(self,
                  gpu_id="cpu",
                  title="Controllable Text-to-Speech for over 7000 Languages",
@@ -35,11 +35,11 @@ class TTSWebUI:
                  vocoder_model_path=None,
                  embedding_gan_path=None,
                  available_artificial_voices=10):
         path_to_iso_list = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="iso_to_fullname.json")
         iso_to_name = load_json_from_path(path_to_iso_list)
         self.text_selection = [f"{iso_to_name[iso]} ({iso})" for iso in iso_to_name]
         self.controllable_ui = ControllableInterface(
             gpu_id=gpu_id,
             available_artificial_voices=available_artificial_voices,
@@ -47,11 +47,11 @@ class TTSWebUI:
             vocoder_model_path=vocoder_model_path,
             embedding_gan_path=embedding_gan_path
         )
         self.title = title
         self.article = article
         self.available_artificial_voices = available_artificial_voices
     def read(self,
              prompt,
              language,
@@ -60,12 +60,13 @@ class TTSWebUI:
              voice_seed,
              emb1,
              reference_audio):
         lang_code = language[-4:-1]
         result = [None]
         def run_tts():
             try:
                 result[0] = self.controllable_ui.read(
                     prompt,
                     reference_audio,
@@ -80,23 +81,32 @@ class TTSWebUI:
                     -24.0
                 )
             except Exception as e:
                 result[0] = e
         thread = threading.Thread(target=run_tts)
         thread.start()
-        thread.join(timeout=20)
         if thread.is_alive():
             return None, generate_spectrogram_image(np.zeros(16000), 16000)
         if isinstance(result[0], Exception):
             raise result[0]
         sr, wav, _ = result[0]
         fig = generate_spectrogram_image(wav, sr)
         return (sr, float2pcm(wav)), fig
     def launch(self):
         gr.Interface(
             fn=self.read,
@@ -118,7 +128,8 @@ class TTSWebUI:
             description=self.article,
             theme=gr.themes.Ocean(primary_hue="amber", secondary_hue="orange")
         ).launch()
 if __name__ == '__main__':
     app = TTSWebUI(gpu_id="cpu")
-    app.launch()

 import torch
 torch.manual_seed(160923)
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from InferenceInterfaces.ControllableInterface import ControllableInterface
 from Utility.utils import float2pcm, load_json_from_path
 import matplotlib.pyplot as plt
 import librosa
 import librosa.display
 import io
 from PIL import Image
 import threading
 def generate_spectrogram_image(wav, sr):
     fig, ax = plt.subplots(figsize=(4, 1.5))
     D = librosa.amplitude_to_db(librosa.stft(wav, n_fft=512), ref=np.max)
     plt.close(fig)
     buf.seek(0)
     return Image.open(buf)
 class TTSWebUI:
     def __init__(self,
                  gpu_id="cpu",
                  title="Controllable Text-to-Speech for over 7000 Languages",
                  vocoder_model_path=None,
                  embedding_gan_path=None,
                  available_artificial_voices=10):
         path_to_iso_list = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="iso_to_fullname.json")
         iso_to_name = load_json_from_path(path_to_iso_list)
         self.text_selection = [f"{iso_to_name[iso]} ({iso})" for iso in iso_to_name]
         self.controllable_ui = ControllableInterface(
             gpu_id=gpu_id,
             available_artificial_voices=available_artificial_voices,
             vocoder_model_path=vocoder_model_path,
             embedding_gan_path=embedding_gan_path
         )
         self.title = title
         self.article = article
         self.available_artificial_voices = available_artificial_voices
     def read(self,
              prompt,
              language,
              voice_seed,
              emb1,
              reference_audio):
         lang_code = language[-4:-1]
         result = [None]
         def run_tts():
             try:
+                print("[INFO] Running TTS with prompt:", prompt)
                 result[0] = self.controllable_ui.read(
                     prompt,
                     reference_audio,
                     -24.0
                 )
             except Exception as e:
+                print("[ERROR] Exception during TTS:", e)
                 result[0] = e
         thread = threading.Thread(target=run_tts)
         thread.start()
+        thread.join()  # ❗ Đã bỏ timeout để không bị cắt sớm trên CPU chậm
         if thread.is_alive():
+            print("[WARNING] TTS thread still alive after join → Timeout logic (shouldn't happen now)")
             return None, generate_spectrogram_image(np.zeros(16000), 16000)
         if isinstance(result[0], Exception):
+            print("[ERROR] TTS returned exception object:", result[0])
             raise result[0]
+        if result[0] is None:
+            print("[ERROR] TTS returned None — possible silent failure")
+            return None, generate_spectrogram_image(np.zeros(16000), 16000)
         sr, wav, _ = result[0]
+        print("[INFO] TTS success — sample rate:", sr, " | waveform shape:", wav.shape)
         fig = generate_spectrogram_image(wav, sr)
         return (sr, float2pcm(wav)), fig
     def launch(self):
         gr.Interface(
             fn=self.read,
             description=self.article,
             theme=gr.themes.Ocean(primary_hue="amber", secondary_hue="orange")
         ).launch()
 if __name__ == '__main__':
     app = TTSWebUI(gpu_id="cpu")
+    app.launch()