tts

Sleeping

App Files Files Community

geopromini commited on Dec 28, 2025

Commit

792e819

verified ·

1 Parent(s): 1fd11d5

Upload app.py

Browse files

Files changed (1) hide show

app.py +78 -67

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import gradio as gr
 import soundfile as sf
 import tempfile
 import torch
 from vieneu_tts import VieNeuTTS
 import time
@@ -28,7 +29,7 @@ except Exception as e:
             return np.random.uniform(-0.1, 0.1, 24000*2)
     tts = MockTTS()
-# --- 2. DATA ---
 VOICE_SAMPLES = {
     "Tuyên (nam miền Bắc)": {"audio": "./sample/Tuyên (nam miền Bắc).wav", "text": "./sample/Tuyên (nam miền Bắc).txt"},
     "Thiện Tâm": {"audio": "./sample/thientam.mp3", "text": "./sample/thientam.txt"},
@@ -54,13 +55,22 @@ def load_reference_info(voice_choice):
     return None, ""
 @spaces.GPU(duration=120)
-def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab):
     try:
         if not text or text.strip() == "":
             return None, "⚠️ Vui lòng nhập nội dung!"
-        if len(text) > 250:
-            return None, f"❌ Giới hạn 250 ký tự (Hiện tại: {len(text)})."
         if mode_tab == "custom_mode":
             if custom_audio is None or not custom_text:
                 return None, "⚠️ Thiếu Audio mẫu hoặc Text mẫu."
@@ -71,126 +81,133 @@ def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab):
             with open(VOICE_SAMPLES[voice_choice]["text"], "r", encoding="utf-8") as f:
                 ref_text_raw = f.read()
         start_time = time.time()
         ref_codes = tts.encode_reference(ref_audio_path)
-        wav = tts.infer(text, ref_codes, ref_text_raw)
         process_time = time.time() - start_time
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             sf.write(tmp_file.name, wav, 24000)
             output_path = tmp_file.name
-        return output_path, f"⚡ Xử lý: {process_time:.2f}s"
     except Exception as e:
         return None, f"❌ Lỗi: {str(e)}"
-# --- 4. THEME & CSS SETUP ---
 theme = gr.themes.Default(
     primary_hue="indigo",
-    secondary_hue="cyan",
     neutral_hue="slate",
     font=[gr.themes.GoogleFont('Inter'), 'sans-serif'],
 ).set(
-    body_background_fill="#070b14",
     block_background_fill="#0f172a",
     block_border_width="1px",
-    block_label_text_color="#94a3b8",
-    button_primary_background_fill="linear-gradient(135deg, #6366f1 0%, #a855f7 100%)",
-    button_primary_background_fill_hover="linear-gradient(135deg, #4f46e5 0%, #9333ea 100%)",
-    button_primary_text_color="white",
     input_background_fill="#1e293b",
     input_border_color="#334155",
 )
 css = """
-.gradio-container { background-color: #070b14 !important; border: none !important; }
-.main-wrap { max-width: 1100px !important; margin: auto !important; padding: 40px 20px !important; }
-.header-area { text-align: center; margin-bottom: 40px; }
-.header-area h1 {
-    background: linear-gradient(90deg, #60a5fa, #c084fc);
-    -webkit-background-clip: text; -webkit-text-fill-color: transparent;
-    font-size: 3rem !important; font-weight: 900 !important; letter-spacing: -1px; margin: 0;
-}
-.header-area p { color: #64748b; font-size: 1.1rem; margin-top: 10px; }
 .st-card {
-    border-radius: 20px !important;
-    border: 1px solid rgba(255,255,255,0.08) !important;
-    box-shadow: 0 20px 50px rgba(0,0,0,0.3) !important;
     padding: 15px;
 }
 .result-card {
-    margin-top: 25px;
-    background: rgba(15, 23, 42, 0.6) !important;
-    border: 1px dashed rgba(99, 102, 241, 0.3) !important;
 }
-.footer { text-align: center; margin-top: 50px; color: #334155; font-size: 0.8rem; letter-spacing: 1px; }
-audio { filter: invert(90%) hue-rotate(180deg) brightness(1.5); width: 100%; }
 """
 # --- 5. UI CONSTRUCTION ---
-with gr.Blocks(title="VieNeu-TTS Studio") as demo:
     with gr.Column(elem_classes="main-wrap"):
-        gr.HTML("""
-            <div class="header-area">
-                <h1>VieNeu Studio</h1>
-                <p>Nền tảng chuyển đổi giọng nói AI chuyên nghiệp</p>
-            </div>
-        """)
         with gr.Row(equal_height=True):
-            # Cột trái
             with gr.Column(scale=1):
                 with gr.Group(elem_classes="st-card"):
                     text_input = gr.Textbox(
-                        label="VĂN BẢN ĐẦU VÀO",
-                        placeholder="Hãy nhập nội dung bạn muốn AI chuyển thành giọng nói...",
-                        lines=11,
                         show_label=True,
                     )
-                    char_count = gr.HTML("<div style='text-align: right; color: #475569; font-size: 0.85rem; padding: 5px;'>0 / 250</div>")
-            # Cột phải
             with gr.Column(scale=1):
                 with gr.Tabs() as tabs:
-                    with gr.TabItem("👤 Giọng Mẫu", id="preset_mode"):
                         voice_select = gr.Dropdown(
                             choices=list(VOICE_SAMPLES.keys()),
                             value="Tuyên (nam miền Bắc)",
-                            label="Chọn nghệ sĩ đọc",
                         )
                         with gr.Accordion("Nghe thử giọng mẫu", open=False):
                             ref_audio_preview = gr.Audio(interactive=False, show_label=False)
                             ref_text_preview = gr.Markdown("...")
-                    with gr.TabItem("🎙️ Tự Clone", id="custom_mode"):
-                        gr.Markdown("<p style='color: #94a3b8; font-size: 0.8rem;'>Tải lên audio giọng nói để hệ thống mô phỏng.</p>")
-                        custom_audio = gr.Audio(label="Audio mẫu (.wav/mp3)", type="filepath")
-                        custom_text = gr.Textbox(label="Nội dung audio mẫu", placeholder="Nhập chính xác lời thoại của audio...")
                 current_mode = gr.State(value="preset_mode")
                 gr.Markdown("<br>")
-                btn_generate = gr.Button("TỔNG HỢP NGAY", variant="primary", size="lg")
-                # Khu vực kết quả đã sửa lỗi 'style'
                 with gr.Group(elem_classes="st-card result-card"):
-                    audio_output = gr.Audio(label="KẾT QUẢ AUDIO", interactive=False, autoplay=True)
-                    status_output = gr.Markdown("<p style='text-align: center; color: #6366f1; margin-top:10px;'>✨ Sẵn sàng thực hiện</p>")
-        gr.HTML("<div class='footer'>POWERED BY VIENEU-TTS ENGINE • 2025</div>")
     # --- LOGIC ---
     def update_count(text):
         l = len(text)
-        color = "#475569" if l <= 250 else "#f43f5e"
-        return f"<div style='text-align: right; color: {color}; font-size: 0.85rem; font-weight: 600; padding: 5px;'>{l} / 250</div>"
     text_input.change(update_count, text_input, char_count)
     def update_ref_preview(voice):
         audio, text = load_reference_info(voice)
-        return audio, f"> \"{text}\""
     voice_select.change(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
     demo.load(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
@@ -200,15 +217,9 @@ with gr.Blocks(title="VieNeu-TTS Studio") as demo:
     btn_generate.click(
         fn=synthesize_speech,
-        inputs=[text_input, voice_select, custom_audio, custom_text, current_mode],
         outputs=[audio_output, status_output]
     )
 if __name__ == "__main__":
-    # Đưa theme và css vào launch() để tránh cảnh báo trên Gradio 6.0+
-    demo.queue().launch(
-        theme=theme,
-        css=css,
-        server_name="0.0.0.0",
-        server_port=7860
-    )

 import soundfile as sf
 import tempfile
 import torch
+import librosa # Thêm thư viện xử lý âm thanh
 from vieneu_tts import VieNeuTTS
 import time
             return np.random.uniform(-0.1, 0.1, 24000*2)
     tts = MockTTS()
+# --- 2. DATA (Giữ nguyên danh sách giọng mẫu) ---
 VOICE_SAMPLES = {
     "Tuyên (nam miền Bắc)": {"audio": "./sample/Tuyên (nam miền Bắc).wav", "text": "./sample/Tuyên (nam miền Bắc).txt"},
     "Thiện Tâm": {"audio": "./sample/thientam.mp3", "text": "./sample/thientam.txt"},
     return None, ""
 @spaces.GPU(duration=120)
+def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab, pause_level, speed_value):
     try:
         if not text or text.strip() == "":
             return None, "⚠️ Vui lòng nhập nội dung!"
+        # 3.1. Xử lý độ ngắt nghỉ (Pause level)
+        processed_text = text
+        if pause_level == "Trung bình":
+            processed_text = processed_text.replace(",", ", , ").replace(".", ". . ")
+        elif pause_level == "Dài":
+            processed_text = processed_text.replace(",", ", , , ").replace(".", ". . . . ")
+        if len(processed_text) > 400:
+             processed_text = processed_text[:400]
+        # 3.2. Lấy dữ liệu Reference
         if mode_tab == "custom_mode":
             if custom_audio is None or not custom_text:
                 return None, "⚠️ Thiếu Audio mẫu hoặc Text mẫu."
             with open(VOICE_SAMPLES[voice_choice]["text"], "r", encoding="utf-8") as f:
                 ref_text_raw = f.read()
+        # 3.3. Thực hiện Inference
         start_time = time.time()
         ref_codes = tts.encode_reference(ref_audio_path)
+        wav = tts.infer(processed_text, ref_codes, ref_text_raw)
+        # 3.4. Điều chỉnh Tốc độ (Speed) bằng librosa
+        if speed_value != 1.0:
+            # Time stretch giữ nguyên pitch
+            wav = librosa.effects.time_stretch(wav, rate=float(speed_value))
         process_time = time.time() - start_time
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             sf.write(tmp_file.name, wav, 24000)
             output_path = tmp_file.name
+        return output_path, f"⚡ Xử lý: {process_time:.2f}s | Tốc độ: {speed_value}x"
     except Exception as e:
         return None, f"❌ Lỗi: {str(e)}"
+# --- 4. THEME & CSS ---
 theme = gr.themes.Default(
     primary_hue="indigo",
+    secondary_hue="blue",
     neutral_hue="slate",
     font=[gr.themes.GoogleFont('Inter'), 'sans-serif'],
 ).set(
+    body_background_fill="#020617",
     block_background_fill="#0f172a",
     block_border_width="1px",
     input_background_fill="#1e293b",
     input_border_color="#334155",
+    button_primary_background_fill="linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%)",
 )
 css = """
+.main-wrap { max-width: 1200px !important; margin: auto !important; padding: 20px !important; }
 .st-card {
+    border-radius: 16px !important;
+    border: 1px solid rgba(255,255,255,0.1) !important;
+    box-shadow: 0 4px 20px rgba(0,0,0,0.5) !important;
     padding: 15px;
 }
 .result-card {
+    background: linear-gradient(180deg, rgba(15, 23, 42, 0.8) 0%, rgba(30, 41, 59, 0.8) 100%) !important;
+    border: 1px solid rgba(99, 102, 241, 0.2) !important;
+    margin-top: 15px;
 }
+audio { filter: invert(90%) hue-rotate(180deg) brightness(1.5); width: 100%; border-radius: 8px; }
+.footer { text-align: center; margin-top: 40px; color: #475569; font-size: 0.8rem; font-weight: 500; }
 """
 # --- 5. UI CONSTRUCTION ---
+with gr.Blocks(title="AI Voice Studio") as demo:
     with gr.Column(elem_classes="main-wrap"):
         with gr.Row(equal_height=True):
+            # CỘT TRÁI
             with gr.Column(scale=1):
                 with gr.Group(elem_classes="st-card"):
                     text_input = gr.Textbox(
+                        label="VĂN BẢN CẦN CHUYỂN ĐỔI",
+                        placeholder="Nhập nội dung vào đây...",
+                        lines=20, # Tăng thêm để cân bằng với các nút mới
                         show_label=True,
                     )
+                    char_count = gr.HTML("<div style='text-align: right; color: #6366f1; font-size: 0.85rem; font-weight: bold; padding: 5px;'>0 / 250</div>")
+            # CỘT PHẢI
             with gr.Column(scale=1):
                 with gr.Tabs() as tabs:
+                    with gr.TabItem("👤 Nghệ sĩ đọc", id="preset_mode"):
                         voice_select = gr.Dropdown(
                             choices=list(VOICE_SAMPLES.keys()),
                             value="Tuyên (nam miền Bắc)",
+                            label="Lựa chọn giọng đọc mẫu",
                         )
                         with gr.Accordion("Nghe thử giọng mẫu", open=False):
                             ref_audio_preview = gr.Audio(interactive=False, show_label=False)
                             ref_text_preview = gr.Markdown("...")
+                    with gr.TabItem("🎙️ Nhân bản (Clone)", id="custom_mode"):
+                        custom_audio = gr.Audio(label="Audio gốc", type="filepath")
+                        custom_text = gr.Textbox(
+                            label="NỘI DUNG AUDIO MẪU",
+                            placeholder="Nhập lời thoại của audio mẫu...",
+                            lines=4,
+                            show_label=True
+                        )
+                # --- KHU VỰC ĐIỀU CHỈNH ÂM THANH ---
+                with gr.Row():
+                    pause_level = gr.Radio(
+                        choices=["Mặc định", "Trung bình", "Dài"],
+                        value="Mặc định",
+                        label="Độ ngắt nghỉ",
+                        scale=1
+                    )
+                    speed_select = gr.Dropdown(
+                        choices=[0.8, 0.9, 1.0, 1.1, 1.2, 1.5],
+                        value=1.0,
+                        label="Tốc độ đọc",
+                        scale=1
+                    )
                 current_mode = gr.State(value="preset_mode")
                 gr.Markdown("<br>")
+                btn_generate = gr.Button("BẮT ĐẦU TỔNG HỢP", variant="primary", size="lg")
                 with gr.Group(elem_classes="st-card result-card"):
+                    audio_output = gr.Audio(label="AUDIO KẾT QUẢ", interactive=False, autoplay=True)
+                    status_output = gr.Markdown("<p style='text-align: center; color: #818cf8; font-weight: 500;'>✨ Sẵn sàng thực hiện</p>")
+        gr.HTML("<div class='footer'>ENGINE BY VIENEU-TTS • PROFESSIONAL AI SOLUTIONS 2025</div>")
     # --- LOGIC ---
     def update_count(text):
         l = len(text)
+        color = "#6366f1" if l <= 250 else "#f43f5e"
+        return f"<div style='text-align: right; color: {color}; font-size: 0.85rem; font-weight: bold; padding: 5px;'>{l} / 250</div>"
     text_input.change(update_count, text_input, char_count)
     def update_ref_preview(voice):
         audio, text = load_reference_info(voice)
+        return audio, f"**Nội dung mẫu:** *\"{text}\"*"
     voice_select.change(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
     demo.load(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
     btn_generate.click(
         fn=synthesize_speech,
+        inputs=[text_input, voice_select, custom_audio, custom_text, current_mode, pause_level, speed_select],
         outputs=[audio_output, status_output]
     )
 if __name__ == "__main__":
+    demo.queue().launch(theme=theme, css=css, server_name="0.0.0.0", server_port=7860)