Spaces:

tonyshark
/

styletts2

Runtime error

App Files Files Community

tonyshark commited on Sep 17, 2025

Commit

e79f3e0

verified ·

1 Parent(s): 715f74a

Upload 2 files

Browse files

Files changed (1) hide show

app.py +10 -27

app.py CHANGED Viewed

@@ -8,10 +8,10 @@ import librosa
 from transformers import pipeline
 # ---------------------------
-# Load HF TTS model (hexgrad/styletts2)
 # ---------------------------
 SR_OUT = 24000
-tts_pipe = pipeline("text-to-speech", model="hexgrad/styletts2")
 # ---------------------------
 # Audio helpers
@@ -47,7 +47,7 @@ def plot_waveforms(clean, processed, sr=SR_OUT):
     t_clean = np.arange(len(clean)) / sr
     t_proc = np.arange(len(processed)) / sr
     axes[0].plot(t_clean, clean, color="blue")
-    axes[0].set_title("Waveform sạch (hexgrad/styletts2)")
     axes[1].plot(t_proc, processed, color="red")
     axes[1].set_title("Waveform sau khi áp môi trường/noise")
     axes[1].set_xlabel("Thời gian (s)")
@@ -60,28 +60,11 @@ def plot_waveforms(clean, processed, sr=SR_OUT):
 TAG_LIST = {
     "laugh": "😆 Cười thoải mái",
     "whisper": "🤫 Thì thầm",
-    "naughty": "😏 Tinh nghịch",
     "giggle": "😂 Cười rúc rích",
-    "tease": "😉 Trêu chọc",
-    "smirk": "😼 Đắc ý",
     "surprise": "😲 Ngạc nhiên",
-    "shock": "😱 Hoảng hốt",
-    "romantic": "❤️ Lãng mạn",
-    "shy": "🫣 Bẽn lẽn",
-    "excited": "🤩 Phấn khích",
-    "curious": "🧐 Tò mò",
-    "discover": "✨ Phát hiện",
-    "blush": "🌸 Ngượng ngùng",
-    "angry": "😡 Giận dữ",
     "sad": "😢 Buồn",
     "happy": "😊 Vui vẻ",
-    "fear": "😨 Sợ hãi",
-    "confident": "😎 Tự tin",
-    "serious": "😐 Nghiêm túc",
-    "tired": "🥱 Mệt mỏi",
-    "cry": "😭 Khóc",
-    "love": "😍 Yêu thương",
-    "disgust": "🤢 Ghê tởm",
 }
 TAG_PATTERN = r"(<\/?(?:" + "|".join(TAG_LIST.keys()) + ")>)"
@@ -96,8 +79,8 @@ def synthesize(text, env, snr_db=10):
         if not tok or tok.isspace():
             continue
         if tok.startswith("<") and tok.endswith(">"):
-            # Model hexgrad/styletts2 chưa hỗ trợ style embedding,
-            # nên tags chỉ chia đoạn text.
             continue
         else:
             result = tts_pipe(text=tok)
@@ -133,20 +116,20 @@ def synthesize(text, env, snr_db=10):
     return (SR_OUT, processed), fig, (SR_OUT, clean_audio)
 # ---------------------------
-# Examples
 # ---------------------------
 EXAMPLES = [
     "Xin chào <whisper> tôi nói nhỏ </whisper> rồi <laugh> bật cười </laugh>.",
     "Tôi cảm thấy <happy> vui </happy> nhưng cũng <sad> buồn </sad>.",
-    "Khi <surprise> bất ngờ </surprise> tôi <shock> hoảng hốt </shock>.",
 ]
 # ---------------------------
 # Gradio UI
 # ---------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🎙️ hexgrad/styletts2 + Tags + Environment + Waveform Preview")
-    gr.Markdown("Dùng model `hexgrad/styletts2` (giọng LibriTTS, mặc định). Tags chia đoạn text.")
     with gr.Accordion("📑 Danh sách Tags + Emoji", open=False):
         md = "| Tag | Ý nghĩa |\n|-----|----------|\n"

 from transformers import pipeline
 # ---------------------------
+# Load HF TTS model (ak36/styletts2)
 # ---------------------------
 SR_OUT = 24000
+tts_pipe = pipeline("text-to-speech", model="ak36/styletts2")
 # ---------------------------
 # Audio helpers
     t_clean = np.arange(len(clean)) / sr
     t_proc = np.arange(len(processed)) / sr
     axes[0].plot(t_clean, clean, color="blue")
+    axes[0].set_title("Waveform sạch (ak36/styletts2)")
     axes[1].plot(t_proc, processed, color="red")
     axes[1].set_title("Waveform sau khi áp môi trường/noise")
     axes[1].set_xlabel("Thời gian (s)")
 TAG_LIST = {
     "laugh": "😆 Cười thoải mái",
     "whisper": "🤫 Thì thầm",
     "giggle": "😂 Cười rúc rích",
     "surprise": "😲 Ngạc nhiên",
     "sad": "😢 Buồn",
     "happy": "😊 Vui vẻ",
+    "angry": "😡 Giận dữ",
 }
 TAG_PATTERN = r"(<\/?(?:" + "|".join(TAG_LIST.keys()) + ")>)"
         if not tok or tok.isspace():
             continue
         if tok.startswith("<") and tok.endswith(">"):
+            # Model ak36/styletts2 chưa hỗ trợ style embedding riêng,
+            # nên tags chỉ chia text thành đoạn.
             continue
         else:
             result = tts_pipe(text=tok)
     return (SR_OUT, processed), fig, (SR_OUT, clean_audio)
 # ---------------------------
+# Example texts
 # ---------------------------
 EXAMPLES = [
     "Xin chào <whisper> tôi nói nhỏ </whisper> rồi <laugh> bật cười </laugh>.",
     "Tôi cảm thấy <happy> vui </happy> nhưng cũng <sad> buồn </sad>.",
+    "Khi <surprise> bất ngờ </surprise> tôi <angry> giận dữ </angry>.",
 ]
 # ---------------------------
 # Gradio UI
 # ---------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎙️ ak36/styletts2 + Tags + Environment + Waveform Preview")
+    gr.Markdown("Dùng model `ak36/styletts2` (giọng LibriTTS mặc định). Tags chia text thành đoạn.")
     with gr.Accordion("📑 Danh sách Tags + Emoji", open=False):
         md = "| Tag | Ý nghĩa |\n|-----|----------|\n"