TTS-Demo

Sleeping

App Files Files Community

CVNSS commited on 18 days ago

Commit

d0acdd5

verified ·

1 Parent(s): 5a15d93

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -264

app.py CHANGED Viewed

@@ -2,11 +2,10 @@
 # -*- coding: utf-8 -*-
 """
-💎 CVNSS4.0 Vietnamese TTS Studio - Azure Horizon Edition
-- Architecture: Singleton Pattern & Lazy Loading
-- Design System: Soft UI (Light Theme) & Ceramic Typography
-- Author: Long Ngo | Refactored by 100-Year AI Expert
-- Advisor: Trần Tư Bình
 """
 import os
@@ -14,33 +13,34 @@ import sys
 import json
 import time
 import re
-import hashlib
-import tempfile
 import logging
-import gc
 from pathlib import Path
-from typing import Optional, Tuple, List
 import torch
 import numpy as np
 import soundfile as sf
 import gradio as gr
-# --- 1. LOGGING & PATH SETUP ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
 logger = logging.getLogger("CVNSS_Studio")
-# Auto-detect root path
-try:
-    ROOT_DIR = Path(__file__).resolve().parent
-except NameError:
-    ROOT_DIR = Path.cwd()
 if str(ROOT_DIR) not in sys.path:
     sys.path.insert(0, str(ROOT_DIR))
-# --- 2. CORE MODULE LOADER ---
 try:
     from src.vietnamese.text_processor import process_vietnamese_text
     from src.vietnamese.phonemizer import text_to_phonemes, VIPHONEME_AVAILABLE
     from src.models.synthesizer import SynthesizerTrn
@@ -48,27 +48,27 @@ try:
     from src.text import cleaned_text_to_sequence
     from src.nn import commons
     CORE_LOADED = True
-except ImportError as e:
-    logger.error(f"❌ Core modules missing: {e}")
     CORE_LOADED = False
     VIPHONEME_AVAILABLE = False
     symbols = []
 # =========================================================
-# 3. ELEGANT LIGHT THEME CSS (The "Azure Horizon" Style)
 # =========================================================
 ELEGANT_CSS = r"""
 @import url('https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;800&family=Roboto+Mono:wght@400;500&display=swap');
 :root {
     --primary-blue: #3b82f6;
-    --soft-blue: #eff6ff;
     --text-dark: #1e293b;
     --text-gray: #64748b;
     --surface-white: #ffffff;
     --bg-gradient: linear-gradient(135deg, #f8fafc 0%, #e0f2fe 100%);
-    --shadow-soft: 0 10px 30px -10px rgba(59, 130, 246, 0.15);
-    --radius-xl: 24px;
 }
 body, .gradio-container {
@@ -77,100 +77,43 @@ body, .gradio-container {
     color: var(--text-dark) !important;
 }
-/* --- CARDS & PANELS --- */
 .elegant-card {
     background: var(--surface-white);
     border-radius: var(--radius-xl);
     border: 1px solid rgba(255, 255, 255, 0.8);
-    box-shadow: var(--shadow-soft);
-    padding: 30px;
-    transition: transform 0.2s ease, box-shadow 0.2s ease;
-}
-.elegant-card:hover {
-    transform: translateY(-2px);
-    box-shadow: 0 20px 40px -10px rgba(59, 130, 246, 0.2);
 }
-/* --- TYPOGRAPHY --- */
 .header-title {
-    font-weight: 800;
-    font-size: 2.2rem;
-    background: linear-gradient(to right, #0f172a, #3b82f6);
-    -webkit-background-clip: text;
-    -webkit-text-fill-color: transparent;
     letter-spacing: -0.03em;
-    margin-bottom: 0.5rem;
-}
-.header-subtitle {
-    color: var(--text-gray);
-    font-size: 0.95rem;
-    font-weight: 500;
-}
-/* --- INPUTS --- */
-textarea, input {
-    background: #f1f5f9 !important;
-    border: 2px solid transparent !important;
-    border-radius: 12px !important;
-    color: var(--text-dark) !important;
-    font-size: 1rem !important;
-    transition: all 0.3s ease !important;
-}
-textarea:focus, input:focus {
-    background: #ffffff !important;
-    border-color: var(--primary-blue) !important;
-    box-shadow: 0 0 0 4px rgba(59, 130, 246, 0.1) !important;
 }
-/* --- BUTTONS --- */
 button.primary-btn {
     background: var(--primary-blue) !important;
     color: white !important;
-    font-weight: 600 !important;
     border-radius: 12px !important;
     border: none !important;
-    padding: 12px 24px !important;
-    box-shadow: 0 4px 12px rgba(59, 130, 246, 0.3) !important;
-    transition: all 0.2s ease !important;
-}
-button.primary-btn:hover {
-    background: #2563eb !important;
-    box-shadow: 0 6px 16px rgba(59, 130, 246, 0.4) !important;
-    transform: translateY(-1px);
-}
-button.primary-btn:active { transform: translateY(0); }
-/* --- STATUS BADGES --- */
-.badge {
-    display: inline-flex;
-    align-items: center;
-    padding: 6px 16px;
-    border-radius: 99px;
-    font-size: 0.85rem;
-    font-weight: 600;
-    font-family: 'Roboto Mono', monospace;
 }
-.badge-success { background: #dcfce7; color: #15803d; border: 1px solid #bbf7d0; }
-.badge-error { background: #fee2e2; color: #b91c1c; border: 1px solid #fecaca; }
-.badge-info { background: #e0f2fe; color: #0369a1; border: 1px solid #bae6fd; }
-/* --- TABS --- */
-.tabs { border-bottom: none !important; }
-.tab-nav button { font-weight: 600 !important; color: var(--text-gray) !important; }
-.tab-nav button.selected { color: var(--primary-blue) !important; border-bottom: 3px solid var(--primary-blue) !important; }
 """
 # =========================================================
-# 4. INTELLIGENT UTILITIES
 # =========================================================
 def split_text_smart(text: str, max_chars: int = 300) -> List[str]:
-    """Cắt câu thông minh, giữ nguyên ngữ điệu tiếng Việt."""
     if not text: return []
     text = re.sub(r'\s+', ' ', text).strip()
-    # Tách câu dựa trên dấu chấm câu nhưng giữ lại dấu
     raw = re.split(r'([.?!;:])', text)
     sentences = []
     current = ""
@@ -182,8 +125,7 @@ def split_text_smart(text: str, max_chars: int = 300) -> List[str]:
         else:
             current += part
     if current: sentences.append(current.strip())
-    # Ghép lại thành chunk
     chunks = []
     chunk = ""
     for sent in sentences:
@@ -196,49 +138,89 @@ def split_text_smart(text: str, max_chars: int = 300) -> List[str]:
     return chunks if chunks else [text]
 # =========================================================
-# 5. ENGINE CORE (Singleton)
 # =========================================================
 class TTSManager:
-    """Quản lý mô hình VITS với tối ưu hóa bộ nhớ."""
-    def __init__(self, model_path):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model_dir = Path(model_path)
-        self.config_path = self.model_dir / "config.json"
-        self._load_model()
-    def _load_model(self):
-        # Find latest checkpoint
-        ckpts = sorted(list(self.model_dir.glob("G_*.pth")),
-                      key=lambda x: int(re.search(r'G_(\d+)', x.name).group(1)) if re.search(r'G_(\d+)', x.name) else 0,
-                      reverse=True)
-        if not ckpts: raise FileNotFoundError("Không tìm thấy checkpoint G_*.pth")
-        with open(self.config_path, "r", encoding="utf-8") as f:
-            self.hps = json.load(f)
-        self.spk2id = self.hps["data"]["spk2id"]
-        self.speakers = sorted(list(self.spk2id.keys()))
-        self.net_g = SynthesizerTrn(
-            len(symbols),
-            self.hps["data"]["filter_length"] // 2 + 1,
-            self.hps["train"]["segment_size"] // self.hps["data"]["hop_length"],
-            n_speakers=self.hps["data"]["n_speakers"],
-            **self.hps["model"]
-        ).to(self.device)
-        ckpt = torch.load(ckpts[0], map_location=self.device)
-        self.net_g.load_state_dict(ckpt['model'])
-        self.net_g.eval()
-        logger.info(f"✅ Model loaded on {self.device}")
-    def infer(self, text, spk, speed, noise_scale, noise_width, sdp_ratio):
-        if not text: return None, 0
-        # Clean memory
         if self.device.type == 'cuda': torch.cuda.empty_cache()
-        # Processing
         text_norm = process_vietnamese_text(text)
         phones, tones, _ = text_to_phonemes(text_norm, use_viphoneme=VIPHONEME_AVAILABLE)
         phone_ids, tone_ids, lang_ids = cleaned_text_to_sequence(phones, tones, "VI")
@@ -251,173 +233,127 @@ class TTSManager:
             sid = torch.LongTensor([self.spk2id.get(spk, 0)]).to(self.device)
             outputs = self.net_g.infer(x, x_len, sid, tone, lang,
-                                     noise_scale=noise_scale, noise_scale_w=noise_width,
-                                     length_scale=speed, sdp_ratio=sdp_ratio)
-            # Safe Detach
             audio = outputs[0][0, 0].data.cpu().float().numpy()
-            del x, x_len, tone, lang, sid, outputs
-        return audio, self.hps["data"]["sampling_rate"]
 # =========================================================
-# 6. MODEL FETCHING
-# =========================================================
-def setup_engine():
-    cache_dir = Path.home() / ".cache" / "cvnss_vits"
-    model_dir = cache_dir / "vits-vietnamese"
-    if not (model_dir / "config.json").exists():
-        print("⬇️ Đang tải mô hình CVNSS4.0 từ Server...")
-        from huggingface_hub import snapshot_download
-        snapshot_download(repo_id="valtecAI-team/valtec-tts-pretrained", local_dir=str(model_dir))
-    return TTSManager(model_dir)
-# =========================================================
-# 7. UI CONSTRUCTION (Clean & Bright)
 # =========================================================
 def build_interface():
-    engine = None
-    if CORE_LOADED:
-        try:
-            engine = setup_engine()
-        except Exception as e:
-            logger.error(str(e))
-    def run_inference(text, spk, speed, ns, nsw, sdp, is_long=False, chunk_size=300, pause=250, progress=gr.Progress()):
-        if not engine: return None, "<span class='badge badge-error'>❌ Engine chưa sẵn sàng</span>"
         start_time = time.time()
         try:
             if not is_long:
-                audio, sr = engine.infer(text, spk, speed, ns, nsw, sdp)
-                full_audio = audio
             else:
                 chunks = split_text_smart(text, chunk_size)
                 segments = []
-                sr = 22050
-                silence = np.zeros(int(sr * pause / 1000))
                 for i, chunk in enumerate(chunks):
-                    progress((i)/len(chunks), desc=f"Đang đọc: {chunk[:15]}...")
-                    a, r = engine.infer(chunk, spk, speed, ns, nsw, sdp)
-                    if a is not None:
-                        sr = r
-                        segments.append(a)
-                        segments.append(silence)
-                full_audio = np.concatenate(segments) if segments else None
-            if full_audio is None: return None, "<span class='badge badge-error'>❌ Không có âm thanh</span>"
             proc_time = time.time() - start_time
             dur = len(full_audio) / sr
-            # Output HTML for Light Theme
-            html = f"""
-            <div style="display: flex; gap: 10px; margin-top: 10px;">
-                <span class="badge badge-success">✅ Hoàn tất</span>
-                <span class="badge badge-info">⏱️ Xử lý: {proc_time:.2f}s</span>
-                <span class="badge badge-info">🔊 Độ dài: {dur:.1f}s</span>
-            </div>
-            """
-            # Save to temp file needed for Gradio
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
                 sf.write(fp.name, full_audio, sr)
-                return fp.name, html
-        except Exception as ex:
-            return None, f"<span class='badge badge-error'>❌ Lỗi: {str(ex)}</span>"
-    # --- UI LAYOUT ---
-    speakers = engine.speakers if engine else ["Đang tải..."]
-    with gr.Blocks(theme=gr.themes.Soft(), css=ELEGANT_CSS, title="CVNSS4.0 Studio") as app:
-        # HEADER
         with gr.Row():
-            with gr.Column(scale=1):
-                gr.HTML("""
-                <div style="padding: 20px 0;">
                     <div class="header-title">CVNSS4.0 Studio</div>
-                    <div class="header-subtitle">
-                        Tác giả: <b>Long Ngo</b> • Cố vấn: <b>Thầy Trần Tư Bình</b><br>
-                        Công nghệ lõi: VITS 4.0 • Grandmaster Edition
                     </div>
                 </div>
                 """)
         with gr.Tabs():
-            # TAB 1: QUICK MODE
             with gr.Tab("⚡ Chế độ Nhanh"):
                 with gr.Row():
-                    # Input Panel
-                    with gr.Column(scale=5, elem_classes="elegant-card"):
-                        txt_input = gr.Textbox(
-                            label="Nhập văn bản (Tiếng Việt)",
-                            placeholder="Xin chào, hôm nay là một ngày tuyệt vời...",
-                            lines=4,
-                            show_label=True
-                        )
                         with gr.Row():
-                            spk_drop = gr.Dropdown(speakers, value=speakers[0], label="Giọng đọc", scale=2)
-                            spd_slider = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Tốc độ", scale=3)
-                        btn_fast = gr.Button("🔊 Đọc Ngay", elem_classes="primary-btn")
-                    # Output Panel
-                    with gr.Column(scale=4, elem_classes="elegant-card"):
-                        out_audio = gr.Audio(label="Kết quả", type="filepath", interactive=False)
-                        out_status = gr.HTML()
-                # Event Binding
-                btn_fast.click(
-                    fn=lambda t, s, sp: run_inference(t, s, sp, 0.667, 0.8, 0.2, False),
-                    inputs=[txt_input, spk_drop, spd_slider],
-                    outputs=[out_audio, out_status]
-                )
-            # TAB 2: PRO MODE
-            with gr.Tab("💎 Chế độ Chuyên sâu"):
                 with gr.Row():
-                    with gr.Column(scale=5, elem_classes="elegant-card"):
-                        txt_pro = gr.Textbox(label="Văn bản dài", placeholder="Dán nội dung bài báo hoặc truyện vào đây...", lines=8)
-                        with gr.Accordion("🛠️ Cấu hình Nâng cao", open=False):
-                            with gr.Row():
-                                ns = gr.Slider(0.1, 1.5, 0.667, label="Độ biến thiên (Noise Scale)")
-                                nsw = gr.Slider(0.1, 1.5, 0.8, label="Độ rộng âm (Noise Width)")
-                            with gr.Row():
-                                sdp = gr.Slider(0, 1, 0.2, label="Ngẫu nhiên (SDP)")
-                                chunk = gr.Slider(100, 1000, 300, step=50, label="Ngắt đoạn (Ký tự)")
-                                pause = gr.Slider(0, 1000, 250, label="Nghỉ câu (ms)")
-                        btn_pro = gr.Button("🚀 Xử lý Văn bản Dài", elem_classes="primary-btn")
-                    with gr.Column(scale=4, elem_classes="elegant-card"):
-                        out_audio_pro = gr.Audio(label="Audio Tổng hợp", type="filepath")
-                        out_status_pro = gr.HTML()
-                btn_pro.click(
-                    fn=lambda t, s, sp, n, nw, sd, c, p: run_inference(t, s, sp, n, nw, sd, True, c, p),
-                    inputs=[txt_pro, spk_drop, spd_slider, ns, nsw, sdp, chunk, pause],
-                    outputs=[out_audio_pro, out_status_pro]
-                )
     return app
-# =========================================================
-# 8. LAUNCHER (Fix lỗi Space Init)
-# =========================================================
 if __name__ == "__main__":
-    try:
-        if not (ROOT_DIR / "src").exists():
-            print("⚠️ CẢNH BÁO: Chưa tìm thấy thư mục 'src'. Vui lòng upload đầy đủ mã nguồn!")
-        ui = build_interface()
-        # Launch với settings tối ưu cho Hugging Face Spaces
-        ui.queue(max_size=10).launch(server_name="0.0.0.0", show_error=True)
-    except Exception as e:
-        print(f"❌ Lỗi khởi động: {e}")

 # -*- coding: utf-8 -*-
 """
+💎 CVNSS4.0 Vietnamese TTS Studio - Công nghệ giọng nói
+- Compatibility: Valtec Source Structure
+- Author: Long Ngo, 2026 | Phiên bản 1.0.1
+- Advisor: Thầy Trần Tư Bình
 """
 import os
 import json
 import time
 import re
 import logging
+import tempfile
+import shutil
 from pathlib import Path
+from typing import Optional, List
 import torch
 import numpy as np
 import soundfile as sf
 import gradio as gr
+from huggingface_hub import hf_hub_download
+# --- 1. ROBUST LOGGING & PATH SETUP ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
 logger = logging.getLogger("CVNSS_Studio")
+# Định vị thư mục gốc chính xác
+ROOT_DIR = Path(__file__).resolve().parent
 if str(ROOT_DIR) not in sys.path:
     sys.path.insert(0, str(ROOT_DIR))
+# --- 2. IMPORT HANDLER (CRITICAL FIX) ---
+# Chúng ta sẽ thử import, nếu thiếu src sẽ báo lỗi rõ ràng
 try:
+    # Kiểm tra xem folder src có tồn tại không
+    if not (ROOT_DIR / "src").exists():
+        raise ImportError("Thư mục 'src' không tồn tại. Vui lòng upload folder src từ repo gốc!")
     from src.vietnamese.text_processor import process_vietnamese_text
     from src.vietnamese.phonemizer import text_to_phonemes, VIPHONEME_AVAILABLE
     from src.models.synthesizer import SynthesizerTrn
     from src.text import cleaned_text_to_sequence
     from src.nn import commons
     CORE_LOADED = True
+    IMPORT_ERROR_MSG = ""
+except Exception as e:
+    logger.error(f"❌ Core load failed: {e}")
     CORE_LOADED = False
+    IMPORT_ERROR_MSG = str(e)
     VIPHONEME_AVAILABLE = False
     symbols = []
 # =========================================================
+# 3. ELEGANT CSS (AZURE HORIZON)
 # =========================================================
 ELEGANT_CSS = r"""
 @import url('https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;800&family=Roboto+Mono:wght@400;500&display=swap');
 :root {
     --primary-blue: #3b82f6;
     --text-dark: #1e293b;
     --text-gray: #64748b;
     --surface-white: #ffffff;
     --bg-gradient: linear-gradient(135deg, #f8fafc 0%, #e0f2fe 100%);
+    --radius-xl: 20px;
 }
 body, .gradio-container {
     color: var(--text-dark) !important;
 }
 .elegant-card {
     background: var(--surface-white);
     border-radius: var(--radius-xl);
     border: 1px solid rgba(255, 255, 255, 0.8);
+    box-shadow: 0 10px 30px -10px rgba(59, 130, 246, 0.15);
+    padding: 24px;
 }
 .header-title {
+    font-weight: 800;
+    font-size: 2rem;
+    color: #0f172a;
     letter-spacing: -0.03em;
 }
 button.primary-btn {
     background: var(--primary-blue) !important;
     color: white !important;
     border-radius: 12px !important;
     border: none !important;
+    font-weight: 600 !important;
+    transition: 0.2s !important;
 }
+button.primary-btn:hover { transform: translateY(-2px); box-shadow: 0 10px 20px -5px rgba(59, 130, 246, 0.4); }
+.badge { display: inline-flex; align-items: center; padding: 4px 12px; border-radius: 99px; font-size: 0.8rem; font-weight: 600; margin-right: 5px;}
+.badge-success { background: #dcfce7; color: #15803d; }
+.badge-error { background: #fee2e2; color: #b91c1c; }
+.badge-warn { background: #fef9c3; color: #854d0e; }
 """
 # =========================================================
+# 4. UTILITIES & LOGIC
 # =========================================================
 def split_text_smart(text: str, max_chars: int = 300) -> List[str]:
     if not text: return []
     text = re.sub(r'\s+', ' ', text).strip()
     raw = re.split(r'([.?!;:])', text)
     sentences = []
     current = ""
         else:
             current += part
     if current: sentences.append(current.strip())
     chunks = []
     chunk = ""
     for sent in sentences:
     return chunks if chunks else [text]
 # =========================================================
+# 5. ENGINE CORE (Auto-Downloading)
 # =========================================================
 class TTSManager:
+    def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.net_g = None
+        self.hps = None
+        self.ready = False
+        self.status_msg = "Khởi tạo..."
+        # Tự động load ngay khi init
+        self._initialize_model()
+    def _download_file_if_missing(self, repo_id, filename, local_dir):
+        target_path = local_dir / filename
+        if not target_path.exists():
+            logger.info(f"⬇️ Đang tải {filename}...")
+            try:
+                # Tải về file tạm rồi move vào đúng chỗ để tránh lỗi cache
+                file_path = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir)
+                return Path(file_path)
+            except Exception as e:
+                logger.error(f"Không tải được {filename}: {e}")
+                return None
+        return target_path
+    def _initialize_model(self):
+        try:
+            # 1. Định nghĩa thư mục chứa model cục b�� (để kiểm soát chắc chắn)
+            model_dir = ROOT_DIR / "model_cache"
+            model_dir.mkdir(exist_ok=True)
+            repo_id = "valtecAI-team/valtec-vietnamese-tts" # Repo gốc bạn cung cấp
+            # 2. Tải Config
+            cfg_path = self._download_file_if_missing(repo_id, "config.json", model_dir)
+            # 3. Tải Model (G_100000.pth hoặc file G mới nhất)
+            # Ở đây ta hardcode file G_100000.pth vì repo valtec thường dùng tên này hoặc tương tự
+            # Bạn có thể đổi tên file nếu repo update
+            ckpt_path = self._download_file_if_missing(repo_id, "G_100000.pth", model_dir)
+            if not cfg_path or not ckpt_path:
+                self.status_msg = "❌ Không tải được file model. Kiểm tra kết nối mạng."
+                return
+            # 4. Load Config
+            with open(cfg_path, "r", encoding="utf-8") as f:
+                self.hps = json.load(f)
+            self.spk2id = self.hps["data"]["spk2id"]
+            self.speakers = sorted(list(self.spk2id.keys()))
+            # 5. Load Network
+            if CORE_LOADED:
+                self.net_g = SynthesizerTrn(
+                    len(symbols),
+                    self.hps["data"]["filter_length"] // 2 + 1,
+                    self.hps["train"]["segment_size"] // self.hps["data"]["hop_length"],
+                    n_speakers=self.hps["data"]["n_speakers"],
+                    **self.hps["model"]
+                ).to(self.device)
+                ckpt = torch.load(ckpt_path, map_location=self.device)
+                self.net_g.load_state_dict(ckpt['model'])
+                self.net_g.eval()
+                self.ready = True
+                self.status_msg = f"✅ Sẵn sàng ({self.device})"
+                logger.info("Engine Ready!")
+            else:
+                self.status_msg = "❌ Lỗi Import Core (src folder missing)"
+        except Exception as e:
+            self.ready = False
+            self.status_msg = f"❌ Lỗi Init: {str(e)}"
+            logger.error(self.status_msg)
+    def infer(self, text, spk, speed, ns, nsw, sdp):
+        if not self.ready:
+            raise RuntimeError(f"Engine chưa sẵn sàng: {self.status_msg}")
         if self.device.type == 'cuda': torch.cuda.empty_cache()
         text_norm = process_vietnamese_text(text)
         phones, tones, _ = text_to_phonemes(text_norm, use_viphoneme=VIPHONEME_AVAILABLE)
         phone_ids, tone_ids, lang_ids = cleaned_text_to_sequence(phones, tones, "VI")
             sid = torch.LongTensor([self.spk2id.get(spk, 0)]).to(self.device)
             outputs = self.net_g.infer(x, x_len, sid, tone, lang,
+                                     noise_scale=ns, noise_scale_w=nsw,
+                                     length_scale=speed, sdp_ratio=sdp)
             audio = outputs[0][0, 0].data.cpu().float().numpy()
+            return audio, self.hps["data"]["sampling_rate"]
 # =========================================================
+# 6. UI CONSTRUCTION
 # =========================================================
 def build_interface():
+    # Khởi tạo Manager
+    manager = TTSManager()
+    def run_inference(text, spk, speed, ns, nsw, sdp, is_long, chunk_size, pause, progress=gr.Progress()):
+        if not manager.ready:
+            return None, f"<span class='badge badge-error'>{manager.status_msg}</span><br><small>{IMPORT_ERROR_MSG}</small>"
+        if not text: return None, "⚠️ Chưa nhập nội dung"
         start_time = time.time()
         try:
+            full_audio = None
+            sr = 0
             if not is_long:
+                full_audio, sr = manager.infer(text, spk, speed, ns, nsw, sdp)
             else:
                 chunks = split_text_smart(text, chunk_size)
                 segments = []
+                # Dummy sr, will be updated
+                sr = 22050
                 for i, chunk in enumerate(chunks):
+                    progress((i)/len(chunks), desc=f"Đoạn {i+1}/{len(chunks)}")
+                    a, r = manager.infer(chunk, spk, speed, ns, nsw, sdp)
+                    sr = r
+                    segments.append(a)
+                    if pause > 0:
+                        segments.append(np.zeros(int(sr * pause / 1000)))
+                if segments:
+                    full_audio = np.concatenate(segments)
+            if full_audio is None: return None, "❌ Lỗi tạo âm thanh"
+            # Export
             proc_time = time.time() - start_time
             dur = len(full_audio) / sr
             with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
                 sf.write(fp.name, full_audio, sr)
+                return fp.name, f"<span class='badge badge-success'>Hoàn thành: {dur:.1f}s</span>"
+        except Exception as e:
+            return None, f"<span class='badge badge-error'>Lỗi: {str(e)}</span>"
+    # --- LAYOUT ---
+    speaker_list = manager.speakers if manager.ready else ["Chưa tải model"]
+    with gr.Blocks(theme=gr.themes.Soft(), css=ELEGANT_CSS, title="CVNSS4.0 Auto-Fix") as app:
         with gr.Row():
+            with gr.Column():
+                gr.HTML(f"""
+                <div style="margin-bottom: 20px;">
                     <div class="header-title">CVNSS4.0 Studio</div>
+                    <div style="color: #64748b; font-size: 0.9rem;">
+                        Long Ngo • Trần Tư Bình • Valtec TTS Core<br>
+                        Trạng thái Engine: <b>{manager.status_msg}</b>
                     </div>
                 </div>
                 """)
+                if not CORE_LOADED:
+                    gr.HTML(f"""
+                    <div style="background: #fee2e2; color: #b91c1c; padding: 10px; border-radius: 8px; margin-bottom: 10px;">
+                        <b>⚠️ CẢNH BÁO QUAN TRỌNG:</b><br>
+                        Không tìm thấy thư mục <code>src</code>. Engine không thể chạy.<br>
+                        Vui lòng đảm bảo bạn đã upload thư mục <code>src</code> từ repo Valtec lên tab Files của Space.
+                        <br><i>Chi tiết lỗi: {IMPORT_ERROR_MSG}</i>
+                    </div>
+                    """)
         with gr.Tabs():
+            # Tab Nhanh
             with gr.Tab("⚡ Chế độ Nhanh"):
                 with gr.Row():
+                    with gr.Column(scale=3, elem_classes="elegant-card"):
+                        txt_input = gr.Textbox(label="Văn bản", placeholder="Nhập gì đó đi...", lines=3)
                         with gr.Row():
+                            spk_drp = gr.Dropdown(speaker_list, value=speaker_list[0] if speaker_list else None, label="Giọng")
+                            spd_sld = gr.Slider(0.5, 2.0, 1.0, label="Tốc độ")
+                        btn_run = gr.Button("🔊 Đọc Ngay", elem_classes="primary-btn")
+                    with gr.Column(scale=2, elem_classes="elegant-card"):
+                        out_aud = gr.Audio(label="Kết quả", type="filepath")
+                        out_html = gr.HTML()
+                btn_run.click(lambda t, s, sp: run_inference(t, s, sp, 0.667, 0.8, 0.2, False, 0, 0),
+                              [txt_input, spk_drp, spd_sld], [out_aud, out_html])
+            # Tab Chuyên sâu
+            with gr.Tab("💎 Chế độ Dài"):
                 with gr.Row():
+                    with gr.Column(scale=3, elem_classes="elegant-card"):
+                        txt_long = gr.Textbox(label="Văn bản dài", lines=6)
+                        with gr.Accordion("Cấu hình", open=False):
+                            ns = gr.Slider(0.1, 1.5, 0.667, label="Noise Scale")
+                            nsw = gr.Slider(0.1, 1.5, 0.8, label="Noise Width")
+                            sdp = gr.Slider(0, 1, 0.2, label="SDP")
+                            chunk = gr.Slider(100, 1000, 300, label="Ngắt câu (ký tự)")
+                            pause = gr.Slider(0, 1000, 250, label="Nghỉ (ms)")
+                        btn_long = gr.Button("🚀 Xử lý", elem_classes="primary-btn")
+                    with gr.Column(scale=2, elem_classes="elegant-card"):
+                        out_long = gr.Audio(label="Audio", type="filepath")
+                        out_html_long = gr.HTML()
+                btn_long.click(lambda t, s, sp, n, nw, sd, c, p: run_inference(t, s, sp, n, nw, sd, True, c, p),
+                               [txt_long, spk_drp, spd_sld, ns, nsw, sdp, chunk, pause],
+                               [out_long, out_html_long])
     return app
 if __name__ == "__main__":
+    ui = build_interface()
+    ui.queue().launch(server_name="0.0.0.0", show_error=True)