TTS-Demo

Sleeping

App Files Files Community

CVNSS commited on 19 days ago

Commit

0bc2dfc

verified ·

1 Parent(s): 320f794

Update app.py

Browse files

Files changed (1) hide show

app.py +711 -256

app.py CHANGED Viewed

@@ -1,340 +1,795 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-💎 CVNSS4.0 Vietnamese TTS Studio - Final Repair Edition
-- Fix: 'No module named imp' (Python 3.12+ Compatibility Patch)
-- Fix: Auto-download 'src' folder if missing
-- Fix: Auto-download Model checkpoints
-- Design: Azure Horizon (Ceramic White & Soft Blue)
-- Author: Refactored by 100-Year AI Expert
 """
 import os
 import sys
 import json
 import time
 import re
-import logging
 import tempfile
-import importlib
-import types
 from pathlib import Path
-from typing import Optional, List
-# --- 0. CRITICAL PATCH: FIX 'No module named imp' ---
-# Mã nguồn cũ dùng 'imp' (đã bị xóa ở Python 3.12). Ta tạo module giả để đánh lừa nó.
-try:
-    import imp
-except ImportError:
-    import types
-    # Tạo module giả
-    imp = types.ModuleType("imp")
-    # Map các hàm quan trọng từ importlib sang imp
-    imp.new_module = types.ModuleType
-    imp.reload = importlib.reload
-    sys.modules["imp"] = imp
-    print("🔧 Đã vá lỗi 'imp' module cho Python 3.12+")
 import torch
 import numpy as np
 import soundfile as sf
 import gradio as gr
-from huggingface_hub import hf_hub_download, snapshot_download
-# --- 1. SETUP LOGGING & PATH ---
-logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
-logger = logging.getLogger("CVNSS_Studio")
-ROOT_DIR = Path(__file__).resolve().parent
-if str(ROOT_DIR) not in sys.path:
-    sys.path.insert(0, str(ROOT_DIR))
-# --- 2. AUTO-HEALING: TẢI SOURCE CODE NẾU THIẾU ---
-def ensure_source_code():
-    src_path = ROOT_DIR / "src"
-    if not src_path.exists():
-        logger.warning("⚠️ Không thấy thư mục 'src'. Đang tự động tải từ kho gốc Valtec...")
-        try:
-            # Tải folder src từ repo gốc về thư mục hiện tại
-            snapshot_download(
-                repo_id="valtecAI-team/valtec-vietnamese-tts",
-                repo_type="space", # Repo gốc là Space
-                allow_patterns=["src/*", "src/**/*"],
-                local_dir=str(ROOT_DIR),
-                token=None # Public repo không cần token
-            )
-            logger.info("✅ Đã tải xong mã nguồn 'src'.")
-        except Exception as e:
-            logger.error(f"❌ Không thể tải mã nguồn: {e}")
-            raise RuntimeError("Không thể tải 'src'. Vui lòng kiểm tra kết nối mạng.")
-# Chạy hàm kiểm tra ngay lập tức
-try:
-    ensure_source_code()
-except Exception:
-    pass # Sẽ xử lý lỗi ở phần import bên dưới
-# --- 3. IMPORT CORE MODULES ---
 try:
     from src.vietnamese.text_processor import process_vietnamese_text
     from src.vietnamese.phonemizer import text_to_phonemes, VIPHONEME_AVAILABLE
     from src.models.synthesizer import SynthesizerTrn
     from src.text.symbols import symbols
-    from src.text import cleaned_text_to_sequence
-    from src.nn import commons
-    CORE_LOADED = True
-    IMPORT_ERROR_MSG = ""
-except ImportError as e:
-    logger.error(f"❌ Lỗi Import Core: {e}")
-    CORE_LOADED = False
-    IMPORT_ERROR_MSG = str(e)
     VIPHONEME_AVAILABLE = False
     symbols = []
 # =========================================================
-# 4. ELEGANT CSS (AZURE HORIZON)
 # =========================================================
-ELEGANT_CSS = r"""
-@import url('https://fonts.googleapis.com/css2?family=Manrope:wght@400;600;800&family=Roboto+Mono:wght@400;500&display=swap');
 :root {
-    --primary-blue: #3b82f6;
-    --text-dark: #1e293b;
-    --surface-white: #ffffff;
-    --bg-gradient: linear-gradient(135deg, #f8fafc 0%, #e0f2fe 100%);
 }
-body, .gradio-container {
-    background: var(--bg-gradient) !important;
-    font-family: 'Manrope', sans-serif !important;
-    color: var(--text-dark) !important;
 }
-.elegant-card {
-    background: var(--surface-white);
-    border-radius: 20px;
-    border: 1px solid rgba(255, 255, 255, 0.8);
-    box-shadow: 0 10px 30px -10px rgba(59, 130, 246, 0.15);
-    padding: 24px;
 }
-.header-title {
-    font-weight: 800; font-size: 2rem; color: #0f172a;
-    letter-spacing: -0.03em;
 }
-button.primary-btn {
-    background: var(--primary-blue) !important;
-    color: white !important;
-    border-radius: 12px !important;
     border: none !important;
-    font-weight: 600 !important;
-    padding: 10px 20px;
-    transition: 0.2s !important;
 }
-button.primary-btn:hover { transform: translateY(-2px); box-shadow: 0 10px 20px -5px rgba(59, 130, 246, 0.4); }
-.badge { display: inline-flex; align-items: center; padding: 4px 12px; border-radius: 99px; font-size: 0.8rem; font-weight: 600; margin-right: 5px;}
-.badge-success { background: #dcfce7; color: #15803d; }
-.badge-error { background: #fee2e2; color: #b91c1c; }
 """
 # =========================================================
-# 5. UTILITIES
 # =========================================================
-def split_text_smart(text: str, max_chars: int = 300) -> List[str]:
     if not text: return []
     text = re.sub(r'\s+', ' ', text).strip()
-    raw = re.split(r'([.?!;:])', text)
-    sentences = []
-    current = ""
-    for part in raw:
-        if part in ".?!;:":
-            current += part
-            sentences.append(current.strip())
-            current = ""
-        else:
-            current += part
-    if current: sentences.append(current.strip())
     chunks = []
-    chunk = ""
-    for sent in sentences:
-        if len(chunk) + len(sent) < max_chars:
-            chunk += " " + sent if chunk else sent
         else:
-            if chunk: chunks.append(chunk)
-            chunk = sent
-    if chunk: chunks.append(chunk)
-    return chunks if chunks else [text]
 # =========================================================
-# 6. ENGINE CORE
 # =========================================================
 class TTSManager:
     def __init__(self):
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.net_g = None
-        self.hps = None
-        self.ready = False
-        self.status_msg = "Đang khởi tạo..."
-        self._initialize_model()
-    def _initialize_model(self):
         try:
-            # Tự động tải model về thư mục riêng
-            model_dir = ROOT_DIR / "model_cache"
-            model_dir.mkdir(exist_ok=True)
-            # Repo chứa model
-            repo_id = "valtecAI-team/valtec-vietnamese-tts"
-            logger.info("⬇️ Đang tải Config & Model...")
-            cfg_path = hf_hub_download(repo_id=repo_id, filename="config.json", local_dir=model_dir)
-            # Tìm file G_*.pth mới nhất hoặc mặc định
-            try:
-                ckpt_path = hf_hub_download(repo_id=repo_id, filename="G_100000.pth", local_dir=model_dir)
-            except:
-                # Fallback nếu tên file khác
-                ckpt_path = hf_hub_download(repo_id=repo_id, filename="G_0.pth", local_dir=model_dir)
-            with open(cfg_path, "r", encoding="utf-8") as f:
-                self.hps = json.load(f)
-            self.spk2id = self.hps["data"]["spk2id"]
-            self.speakers = sorted(list(self.spk2id.keys()))
-            if CORE_LOADED:
-                self.net_g = SynthesizerTrn(
-                    len(symbols),
-                    self.hps["data"]["filter_length"] // 2 + 1,
-                    self.hps["train"]["segment_size"] // self.hps["data"]["hop_length"],
-                    n_speakers=self.hps["data"]["n_speakers"],
-                    **self.hps["model"]
-                ).to(self.device)
-                ckpt = torch.load(ckpt_path, map_location=self.device)
-                self.net_g.load_state_dict(ckpt['model'])
-                self.net_g.eval()
-                self.ready = True
-                self.status_msg = f"✅ Sẵn sàng ({self.device})"
-                logger.info("🚀 Engine đã khởi động thành công!")
-            else:
-                self.status_msg = "❌ Lỗi: Không load được mã nguồn (src)"
         except Exception as e:
-            self.ready = False
-            self.status_msg = f"❌ Lỗi Model: {str(e)}"
-            logger.error(self.status_msg)
-    def infer(self, text, spk, speed, ns, nsw, sdp):
-        if not self.ready: raise RuntimeError(self.status_msg)
-        if self.device.type == 'cuda': torch.cuda.empty_cache()
-        text_norm = process_vietnamese_text(text)
-        phones, tones, _ = text_to_phonemes(text_norm, use_viphoneme=VIPHONEME_AVAILABLE)
         phone_ids, tone_ids, lang_ids = cleaned_text_to_sequence(phones, tones, "VI")
         with torch.no_grad():
-            x = torch.LongTensor(commons.intersperse(phone_ids, 0)).unsqueeze(0).to(self.device)
-            x_len = torch.LongTensor([x.size(1)]).to(self.device)
-            tone = torch.LongTensor(commons.intersperse(tone_ids, 0)).unsqueeze(0).to(self.device)
-            lang = torch.LongTensor(commons.intersperse(lang_ids, 0)).unsqueeze(0).to(self.device)
-            sid = torch.LongTensor([self.spk2id.get(spk, 0)]).to(self.device)
-            outputs = self.net_g.infer(x, x_len, sid, tone, lang, noise_scale=ns, noise_scale_w=nsw, length_scale=speed, sdp_ratio=sdp)
-            return outputs[0][0, 0].data.cpu().float().numpy(), self.hps["data"]["sampling_rate"]
 # =========================================================
-# 7. UI CONSTRUCTION
 # =========================================================
-def build_interface():
-    manager = TTSManager()
-    def run_inference(text, spk, speed, ns, nsw, sdp, is_long, chunk_size, pause, progress=gr.Progress()):
-        if not manager.ready:
-            # Thử load lại nếu trước đó thất bại
-            if not CORE_LOADED:
-                 return None, f"<span class='badge badge-error'>Lỗi mã nguồn: {IMPORT_ERROR_MSG}</span>"
-            return None, f"<span class='badge badge-error'>{manager.status_msg}</span>"
-        if not text: return None, "⚠️ Chưa nhập nội dung"
-        try:
-            full_audio = None
-            sr = 0
-            if not is_long:
-                full_audio, sr = manager.infer(text, spk, speed, ns, nsw, sdp)
-            else:
-                chunks = split_text_smart(text, chunk_size)
-                segments = []
-                sr = 22050
-                for i, chunk in enumerate(chunks):
-                    progress((i)/len(chunks), desc=f"Đoạn {i+1}/{len(chunks)}")
-                    a, r = manager.infer(chunk, spk, speed, ns, nsw, sdp)
-                    sr = r
-                    segments.append(a)
-                    if pause > 0: segments.append(np.zeros(int(sr * pause / 1000)))
-                if segments: full_audio = np.concatenate(segments)
-            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-                sf.write(fp.name, full_audio, sr)
-                dur = len(full_audio)/sr
-                return fp.name, f"<span class='badge badge-success'>Hoàn thành: {dur:.1f}s</span>"
-        except Exception as e:
-            return None, f"<span class='badge badge-error'>Lỗi: {str(e)}</span>"
-    speaker_list = manager.speakers if manager.ready else ["Đang tải..."]
-    with gr.Blocks(theme=gr.themes.Soft(), css=ELEGANT_CSS, title="CVNSS4.0 Studio") as app:
-        gr.HTML(f"""
-        <div style="margin-bottom: 20px;">
-            <div class="header-title">CVNSS4.0 Studio</div>
-            <div style="color: #64748b;">Long Ngo • Trần Tư Bình • Valtec TTS Core</div>
-            <div style="margin-top:5px">Trạng thái: <b>{manager.status_msg}</b></div>
         </div>
         """)
-        if not CORE_LOADED:
-             gr.HTML(f"""<div style="background:#fee2e2; color:#b91c1c; padding:10px; border-radius:8px;">
-                <b>LỖI NGHIÊM TRỌNG:</b> Không tải được mã nguồn 'src'.<br>
-                Hệ thống đã cố gắng tự tải nhưng thất bại. Chi tiết: {IMPORT_ERROR_MSG}
-             </div>""")
         with gr.Tabs():
             with gr.Tab("⚡ Chế độ Nhanh"):
                 with gr.Row():
-                    with gr.Column(scale=3, elem_classes="elegant-card"):
-                        txt_in = gr.Textbox(label="Văn bản", placeholder="Nhập văn bản tiếng Việt...", lines=3)
-                        with gr.Row():
-                            spk_drp = gr.Dropdown(speaker_list, value=speaker_list[0] if speaker_list else None, label="Giọng đọc")
-                            spd_sld = gr.Slider(0.5, 2.0, 1.0, label="Tốc độ")
-                        btn_run = gr.Button("🔊 Đọc Ngay", elem_classes="primary-btn")
-                    with gr.Column(scale=2, elem_classes="elegant-card"):
-                        out_aud = gr.Audio(label="Kết quả", type="filepath")
-                        out_html = gr.HTML()
-                btn_run.click(lambda t,s,sp: run_inference(t,s,sp,0.667,0.8,0.2,False,0,0), [txt_in,spk_drp,spd_sld], [out_aud,out_html])
-            with gr.Tab("💎 Chế độ Dài"):
-                with gr.Row():
-                    with gr.Column(scale=3, elem_classes="elegant-card"):
-                        txt_long = gr.Textbox(label="Văn bản dài", lines=6)
-                        with gr.Accordion("Cấu hình", open=False):
-                            ns = gr.Slider(0.1, 1.5, 0.667, label="Noise Scale")
-                            nsw = gr.Slider(0.1, 1.5, 0.8, label="Noise Width")
-                            sdp = gr.Slider(0, 1, 0.2, label="SDP")
-                            chunk = gr.Slider(100, 1000, 300, label="Ngắt câu")
-                            pause = gr.Slider(0, 1000, 250, label="Nghỉ (ms)")
-                        btn_long = gr.Button("🚀 Xử lý", elem_classes="primary-btn")
-                    with gr.Column(scale=2, elem_classes="elegant-card"):
-                        out_long = gr.Audio(label="Audio", type="filepath")
-                        out_html_long = gr.HTML()
-                btn_long.click(lambda t,s,sp,n,nw,sd,c,p: run_inference(t,s,sp,n,nw,sd,True,c,p), [txt_long,spk_drp,spd_sld,ns,nsw,sdp,chunk,pause], [out_long,out_html_long])
-    return app
-if __name__ == "__main__":
-    ui = build_interface()
-    ui.queue().launch(server_name="0.0.0.0", show_error=True)

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
+CVNSS4.0 Vietnamese TTS Studio
+- Architecture: Modular CSS & Component Separation
+- UX: High Contrast Input Fields
+- Core: Optimized Logic Flow
 """
 import os
 import sys
 import json
 import time
+import glob
 import re
+import hashlib
 import tempfile
 from pathlib import Path
 import torch
 import numpy as np
 import soundfile as sf
 import gradio as gr
+# Add src to path for imports
+sys.path.insert(0, str(Path(__file__).parent))
+# Import core modules
 try:
     from src.vietnamese.text_processor import process_vietnamese_text
     from src.vietnamese.phonemizer import text_to_phonemes, VIPHONEME_AVAILABLE
     from src.models.synthesizer import SynthesizerTrn
     from src.text.symbols import symbols
+except ImportError:
+    # Fallback for environment setup if src is missing during init
+    print("⚠️ Core modules not found. Ensure 'src' directory exists.")
     VIPHONEME_AVAILABLE = False
     symbols = []
 # =========================================================
+# 1) SYSTEM CONFIGURATION & CSS (The Expert Layer)
 # =========================================================
+# Expert CSS: Definitive Z-Index Management & Neon Theme
+NEON_CSS = r"""
 :root {
+    --bg-dark: #0f172a;
+    --bg-panel: rgba(30, 41, 59, 0.7);
+    --line: rgba(148, 163, 184, 0.1);
+    --text-primary: #e2e8f0;
+    --neon-cyan: #06b6d4;
+    --neon-accent: #38bdf8;
+    --radius-lg: 16px;
+    --radius-sm: 8px;
+    /* UX Color Palette for Inputs */
+    --input-bg: #f1f5f9;       /* Light Blue-Grey for readability */
+    --input-text: #0f4c81;     /* Classic Blue (Dark Blue) for high contrast */
+    --input-placeholder: #64748b;
+}
+body, .gradio-container, .app {
+    background: radial-gradient(circle at 50% 0%, #1e293b 0%, #0f172a 100%) !important;
+    color: var(--text-primary) !important;
+    font-family: 'Inter', 'Segoe UI', sans-serif;
 }
+/* --- ISOLATION FULL: CVNSS4.0 Vietnamese TTS Studio --- */
+.panelNeon {
+    border: 1px solid rgba(255,255,255,0.08);
+    border-radius: var(--radius-lg);
+    background: var(--bg-panel);
+    backdrop-filter: blur(12px);
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+    padding: 20px;
+    position: relative;
+    isolation: isolate;
+    z-index: 1;
+    margin-bottom: 20px;
 }
+/* UX IMPROVEMENT: High Contrast Input Styling */
+.panelNeon textarea, .panelNeon input[type="text"] {
+    background: var(--input-bg) !important;
+    color: var(--input-text) !important; /* DARK BLUE TEXT requested */
+    border: 2px solid transparent !important;
+    border-radius: var(--radius-sm) !important;
+    font-weight: 500 !important;
+    font-size: 1rem !important;
+    line-height: 1.5 !important;
+    padding: 12px !important;
+    transition: all 0.2s ease;
+    z-index: 10 !important;
+    position: relative !important;
 }
+.panelNeon textarea::placeholder {
+    color: var(--input-placeholder) !important;
 }
+.panelNeon textarea:focus, .panelNeon input:focus {
+    background: #ffffff !important;
+    border-color: var(--neon-cyan) !important;
+    box-shadow: 0 0 0 4px rgba(6, 182, 212, 0.15) !important;
+    color: #000000 !important; /* Even darker on focus */
+}
+/* Label Styling */
+.panelNeon label span {
+    color: var(--neon-accent) !important;
+    font-weight: 600;
+    font-size: 0.85rem;
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+    margin-bottom: 8px;
+    display: block;
+}
+/* Dropdown & Slider fixes */
+.panelNeon .wrap, .panelNeon .range-compact {
+    z-index: 10 !important;
+}
+/* Button Upgrades */
+button.primary, .gr-button-primary {
+    background: linear-gradient(135deg, #06b6d4 0%, #3b82f6 100%) !important;
     border: none !important;
+    color: white !important;
+    font-weight: 700 !important;
+    transition: transform 0.1s ease, box-shadow 0.2s ease;
+}
+button.primary:hover, .gr-button-primary:hover {
+    box-shadow: 0 10px 15px -3px rgba(6, 182, 212, 0.3) !important;
+    transform: translateY(-1px);
+}
+button.primary:active {
+    transform: translateY(0px);
+}
+/* Status Panel */
+.statusCard {
+    background: rgba(15, 23, 42, 0.6);
+    border-radius: var(--radius-sm);
+    padding: 16px;
+    border: 1px solid rgba(255,255,255,0.05);
+}
+.pill {
+    display: inline-flex;
+    align-items: center;
+    padding: 4px 12px;
+    border-radius: 99px;
+    background: rgba(56, 189, 248, 0.1);
+    color: #38bdf8;
+    border: 1px solid rgba(56, 189, 248, 0.2);
+    font-size: 0.8rem;
+    font-weight: 600;
+    margin-right: 6px;
+    margin-bottom: 6px;
 }
+.alert { padding: 12px; border-radius: 8px; margin-top: 12px; font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 8px;}
+.alertOk { background: rgba(34, 197, 94, 0.1); color: #4ade80; border: 1px solid rgba(34, 197, 94, 0.2); }
+.alertWarn { background: rgba(234, 179, 8, 0.1); color: #facc15; border: 1px solid rgba(234, 179, 8, 0.2); }
 """
 # =========================================================
+# 2) UTILITIES & HELPERS
 # =========================================================
+def check_viphoneme():
+    if not VIPHONEME_AVAILABLE:
+        print("⚠️ Viphoneme not available.")
+        return False
+    try:
+        phones, _, _ = text_to_phonemes("Test", use_viphoneme=True)
+        print("✅ Viphoneme active.")
+        return True
+    except Exception as e:
+        print(f"❌ Viphoneme error: {e}")
+        return False
+def md5_key(*parts: str) -> str:
+    return hashlib.md5("|".join(parts).encode("utf-8")).hexdigest()
+def split_sentences_vi(text: str, max_chars: int):
+    # Improved splitting logic
     if not text: return []
     text = re.sub(r'\s+', ' ', text).strip()
+    # Split by delimiters keeping delimiters
+    parts = re.split(r'([.?!;:])', text)
     chunks = []
+    current_chunk = ""
+    for i in range(0, len(parts) - 1, 2):
+        sentence = parts[i] + parts[i+1]
+        if len(current_chunk) + len(sentence) <= max_chars:
+            current_chunk += sentence
+        else:
+            if current_chunk: chunks.append(current_chunk.strip())
+            current_chunk = sentence
+    if len(parts) % 2 != 0 and parts[-1]:
+        sentence = parts[-1]
+        if len(current_chunk) + len(sentence) <= max_chars:
+            current_chunk += sentence
         else:
+            if current_chunk: chunks.append(current_chunk.strip())
+            current_chunk = sentence
+    if current_chunk: chunks.append(current_chunk.strip())
+    return chunks
 # =========================================================
+# 3) CORE ENGINE WRAPPER
 # =========================================================
 class TTSManager:
+    """Singleton-like manager for TTS operations."""
     def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"🔧 Initializing TTS on {self.device}...")
+        self.model_dir = self._get_model_dir()
+        self.ckpt_path = find_latest_checkpoint(self.model_dir, "G")
+        self.cfg_path = os.path.join(self.model_dir, "config.json")
+        if not self.ckpt_path:
+            raise FileNotFoundError(f"No checkpoint found in {self.model_dir}")
+        self.tts = VietnameseTTS(self.ckpt_path, self.cfg_path, self.device)
+        self.temp_dir = Path(tempfile.gettempdir()) / "neon_tts_cache"
+        self.temp_dir.mkdir(parents=True, exist_ok=True)
+    def _get_model_dir(self):
+        return download_model()
+    def synthesize(self, text, speaker, speed, noise_scale, noise_scale_w, sdp_ratio):
         try:
+            if not text or not text.strip():
+                return None, "⚠️ Empty input"
+            key = md5_key(speaker, f"{speed:.2f}", text[:20], str(len(text)))
+            out_path = self.temp_dir / f"{key}.wav"
+            if out_path.exists():
+                return str(out_path), "✅ Cached (From history)"
+            audio, sr = self.tts.synthesize(
+                text=text, speaker=speaker, length_scale=speed,
+                noise_scale=noise_scale, noise_scale_w=noise_scale_w, sdp_ratio=sdp_ratio
+            )
+            sf.write(str(out_path), audio, sr)
+            return str(out_path), "✅ Generated successfully"
         except Exception as e:
+            # Capture full traceback if needed, but return clean msg
+            return None, f"❌ Error: {str(e)}"
+# =========================================================
+# 4) MODEL LOGIC (PRESERVED & FIXED)
+# =========================================================
+def find_latest_checkpoint(model_dir, prefix="G"):
+    pattern = os.path.join(model_dir, f"{prefix}*.pth")
+    checkpoints = glob.glob(pattern)
+    if not checkpoints: return None
+    checkpoints.sort(key=lambda x: int(re.search(rf"{prefix}(\d+)\.pth", x).group(1)) if re.search(rf"{prefix}(\d+)\.pth", x) else 0, reverse=True)
+    return checkpoints[0]
+def download_model():
+    from huggingface_hub import snapshot_download
+    hf_repo = "valtecAI-team/valtec-tts-pretrained"
+    cache_base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
+    if os.name == "nt": cache_base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local"))
+    model_dir = cache_base / "valtec_tts" / "models" / "vits-vietnamese"
+    if (model_dir / "config.json").exists() and list(model_dir.glob("G_*.pth")):
+        return str(model_dir)
+    print(f"⬇️ Downloading {hf_repo}...")
+    snapshot_download(repo_id=hf_repo, local_dir=str(model_dir))
+    return str(model_dir)
+class VietnameseTTS:
+    def __init__(self, ckpt, cfg, device="cpu"):
+        self.device = device
+        with open(cfg, "r", encoding="utf-8") as f: self.config = json.load(f)
+        self.spk2id = self.config["data"]["spk2id"]
+        self.speakers = list(self.spk2id.keys())
+        self._load(ckpt)
+    def _load(self, ckpt):
+        self.model = SynthesizerTrn(
+            len(symbols),
+            self.config["data"]["filter_length"] // 2 + 1,
+            self.config["train"]["segment_size"] // self.config["data"]["hop_length"],
+            n_speakers=self.config["data"]["n_speakers"],
+            **self.config["model"]
+        ).to(self.device)
+        state = torch.load(ckpt, map_location=self.device)["model"]
+        self.model.load_state_dict({k.replace("module.", ""): v for k,v in state.items()}, strict=False)
+        self.model.eval()
+    def synthesize(self, text, speaker, **kwargs):
+        from src.text import cleaned_text_to_sequence
+        from src.nn import commons
+        # 1. Text Processing
+        norm_text = process_vietnamese_text(text)
+        phones, tones, _ = text_to_phonemes(norm_text, use_viphoneme=VIPHONEME_AVAILABLE)
         phone_ids, tone_ids, lang_ids = cleaned_text_to_sequence(phones, tones, "VI")
+        phone_ids = commons.intersperse(phone_ids, 0)
+        tone_ids = commons.intersperse(tone_ids, 0)
+        lang_ids = commons.intersperse(lang_ids, 0)
+        # 2. Prepare Tensors
+        x = torch.LongTensor(phone_ids).unsqueeze(0).to(self.device)
+        x_len = torch.LongTensor([len(phone_ids)]).to(self.device)
+        tone = torch.LongTensor(tone_ids).unsqueeze(0).to(self.device)
+        lang = torch.LongTensor(lang_ids).unsqueeze(0).to(self.device)
+        sid = torch.LongTensor([self.spk2id.get(speaker, 0)]).to(self.device)
+        # 3. Inference with Gradient Safety (FIX IS HERE)
         with torch.no_grad():
+            bert = torch.zeros(1024, len(phone_ids)).unsqueeze(0).to(self.device)
+            ja_bert = torch.zeros(768, len(phone_ids)).unsqueeze(0).to(self.device)
+            # Run inference
+            # The error "Can't call numpy() on Tensor that requires grad" means output has grad_fn.
+            # We use .detach() before .cpu() to ensure the graph is cut.
+            outputs = self.model.infer(
+                x, x_len, sid, tone, lang,
+                bert, ja_bert,
+                **kwargs
+            )
+            audio = outputs[0][0,0].detach().cpu().numpy()
+        return audio, self.config["data"]["sampling_rate"]
 # =========================================================
+# 5) UI CONSTRUCTION (REFACTORED)
 # =========================================================
+def create_ui(manager: TTSManager):
+    def ui_header():
+        return gr.HTML("""
+        <div style="border-bottom: 1px solid rgba(255,255,255,0.08); padding-bottom: 20px; margin-bottom: 25px;">
+            <h1 style="color: #38bdf8; margin:0; font-weight:800; font-size: 2rem; letter-spacing: -0.02em;">
+                🎛️ CVNSS4.0 Vietnamese TTS Studio
+            </h1>
+            <div style="color: #94a3b8; font-size: 1rem; margin-top: 5px; font-weight: 400;">
+                Thiết kế bởi Long Ngo, 2026 • Phiên bản 1.0.1 Demo • Dự án mã nguồn mở, cố vấn Thầy Trần Tư Bình
+            </div>
         </div>
         """)
+    def ui_status_render(text, speaker, speed, chunks, dur, msg):
+        return f"""
+        <div class="statusCard">
+            <div style="margin-bottom:12px; font-weight:700; color:#38bdf8; font-size: 0.9rem; text-transform: uppercase;">
+                📟 Trạng thái hoạt động
+            </div>
+            <div style="display:flex; flex-wrap:wrap; gap:8px;">
+                <span class="pill">🎤 {speaker}</span>
+                <span class="pill">⚡ {speed}x</span>
+                <span class="pill">📄 {len(text)} ký tự</span>
+                <span class="pill">🧩 {chunks} đoạn</span>
+            </div>
+            <div class="alert {'alertOk' if '✅' in msg else 'alertWarn'}">
+                {msg}
+            </div>
+        </div>
+        """
+    with gr.Blocks(theme=gr.themes.Base(), css=NEON_CSS, title="Neon TTS Expert") as app:
+        ui_header()
         with gr.Tabs():
+            # --- TAB BASIC ---
             with gr.Tab("⚡ Chế độ Nhanh"):
                 with gr.Row():
+                    # INPUT COLUMN
+                    with gr.Column(scale=2):
+                        # REFACTOR: Using a specific ID for the container to target with CSS isolation
+                        with gr.Group(elem_classes=["panelNeon"], elem_id="input-panel-basic"):
+                            gr.HTML('<div class="panelTitle">📝 Văn bản đầu vào</div>')
+                            # THE FIX: Pure Textbox with updated styling (Dark Blue text)
+                            txt_basic = gr.Textbox(
+                                label="",
+                                show_label=False,
+                                placeholder="Nhập nội dung tiếng Việt vào... (Ví dụ: Xin chào, bạn đã học qua CVNSS4.0 chưa?)",
+                                lines=6,
+                                elem_id="main-input-basic"
+                            )
+                            with gr.Row():
+                                spk_basic = gr.Dropdown(choices=manager.tts.spea