CVNSS commited on
Commit
740c387
·
verified ·
1 Parent(s): 7c8d39b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -203
app.py CHANGED
@@ -2,227 +2,188 @@
2
  # -*- coding: utf-8 -*-
3
 
4
  """
5
- CVNSS4.0 Vietnamese TTS Studio (Fixed & Auto-Healing Version)
6
- - Fix: SyntaxError Dropdown
7
- - Fix: NameError SynthesizerTrn (Auto download src)
8
  """
9
 
10
  import os
11
  import sys
 
 
 
 
 
12
  import json
13
  import time
14
  import glob
15
  import re
16
- import hashlib
17
- import tempfile
18
- import subprocess
19
  import shutil
 
20
  from pathlib import Path
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  import torch
23
  import numpy as np
24
  import soundfile as sf
25
  import gradio as gr
26
 
27
  # =========================================================
28
- # 0) AUTO-HEALING: DOWNLOAD MISSING CORE MODULES
29
  # =========================================================
30
  def setup_environment():
31
  """Tự động tải thư mục src nếu bị thiếu"""
32
  if not os.path.exists("src"):
33
- print("🔄 Phát hiện thiếu thư mục 'src'. Đang tải mã nguồn cốt lõi (Core Modules)...")
34
  try:
35
- # Clone repo chứa src từ HuggingFace Space gốc
36
  subprocess.run(
37
  ["git", "clone", "https://huggingface.co/spaces/valtecAI-team/valtec-vietnamese-tts", "temp_repo"],
38
  check=True
39
  )
40
- # Di chuyển thư mục src ra ngoài
41
  if os.path.exists("temp_repo/src"):
 
42
  shutil.move("temp_repo/src", "./src")
43
  print("✅ Đã cài đặt xong 'src'.")
44
  else:
45
  print("❌ Không tìm thấy 'src' trong repo đã tải.")
46
 
47
- # Dọn dẹp
48
  shutil.rmtree("temp_repo", ignore_errors=True)
49
-
50
  except Exception as e:
51
  print(f"❌ Lỗi khi tải mã nguồn: {e}")
52
- print("⚠️ Vui lòng kiểm tra kết nối mạng hoặc cài đặt git.")
53
 
54
- # Chạy setup trước khi import
55
  setup_environment()
56
 
57
  # Add src to path
58
  sys.path.insert(0, str(Path(__file__).parent))
59
 
60
- # Import core modules (Bây giờ sẽ không bị lỗi nữa)
61
  try:
 
 
62
  from src.vietnamese.text_processor import process_vietnamese_text
63
  from src.vietnamese.phonemizer import text_to_phonemes, VIPHONEME_AVAILABLE
64
  from src.models.synthesizer import SynthesizerTrn
65
- from src.text.symbols import symbols
66
  print("✅ Core modules imported successfully.")
67
  except ImportError as e:
68
- print(f"🔥 Critical Import Error: {e}")
69
- print("⚠️ Cấu trúc file vẫn chưa đúng. Hãy đảm bảo thư mục 'src' nằm cùng cấp với app.py")
70
- # Define dummy classes to prevent immediate crash, allow UI to show error
71
  VIPHONEME_AVAILABLE = False
72
  symbols = []
73
  SynthesizerTrn = None
74
 
75
-
76
  # =========================================================
77
- # 1) SYSTEM CONFIGURATION & CSS
78
  # =========================================================
79
  NEON_CSS = r"""
80
- :root {
81
- --bg-dark: #0f172a;
82
- --bg-panel: rgba(30, 41, 59, 0.7);
83
- --line: rgba(148, 163, 184, 0.1);
84
- --text-primary: #e2e8f0;
85
- --neon-cyan: #06b6d4;
86
- --neon-accent: #38bdf8;
87
- --radius-lg: 16px;
88
- --radius-sm: 8px;
89
- --input-bg: #f1f5f9;
90
- --input-text: #0f4c81;
91
- --input-placeholder: #64748b;
92
- }
93
- body, .gradio-container, .app {
94
- background: radial-gradient(circle at 50% 0%, #1e293b 0%, #0f172a 100%) !important;
95
- color: var(--text-primary) !important;
96
- font-family: 'Inter', 'Segoe UI', sans-serif;
97
- }
98
- .panelNeon {
99
- border: 1px solid rgba(255,255,255,0.08);
100
- border-radius: var(--radius-lg);
101
- background: var(--bg-panel);
102
- backdrop-filter: blur(12px);
103
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
104
- padding: 20px;
105
- margin-bottom: 20px;
106
- }
107
- .panelNeon textarea, .panelNeon input[type="text"] {
108
- background: var(--input-bg) !important;
109
- color: var(--input-text) !important;
110
- border: 2px solid transparent !important;
111
- border-radius: var(--radius-sm) !important;
112
- font-weight: 500 !important;
113
- font-size: 1rem !important;
114
- padding: 12px !important;
115
- }
116
- button.primary, .gr-button-primary {
117
- background: linear-gradient(135deg, #06b6d4 0%, #3b82f6 100%) !important;
118
- border: none !important;
119
- color: white !important;
120
- font-weight: 700 !important;
121
- }
122
- .statusCard {
123
- background: rgba(15, 23, 42, 0.6);
124
- border-radius: var(--radius-sm);
125
- padding: 16px;
126
- border: 1px solid rgba(255,255,255,0.05);
127
- }
128
- .pill {
129
- display: inline-flex;
130
- align-items: center;
131
- padding: 4px 12px;
132
- border-radius: 99px;
133
- background: rgba(56, 189, 248, 0.1);
134
- color: #38bdf8;
135
- border: 1px solid rgba(56, 189, 248, 0.2);
136
- font-size: 0.8rem;
137
- font-weight: 600;
138
- margin-right: 6px;
139
- }
140
- .alert { padding: 12px; border-radius: 8px; margin-top: 12px; font-size: 0.9rem; }
141
- .alertOk { background: rgba(34, 197, 94, 0.1); color: #4ade80; border: 1px solid rgba(34, 197, 94, 0.2); }
142
- .alertWarn { background: rgba(234, 179, 8, 0.1); color: #facc15; border: 1px solid rgba(234, 179, 8, 0.2); }
143
  """
144
 
145
  # =========================================================
146
- # 2) UTILITIES & HELPERS
147
  # =========================================================
148
- def check_viphoneme():
149
- if not VIPHONEME_AVAILABLE:
150
- print("⚠️ Viphoneme not available.")
151
- return False
152
- return True
153
 
154
- def md5_key(*parts: str) -> str:
155
- return hashlib.md5("|".join(parts).encode("utf-8")).hexdigest()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
  # =========================================================
158
- # 3) CORE ENGINE WRAPPER
159
  # =========================================================
160
  class TTSManager:
161
  def __init__(self):
162
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
163
  print(f"🔧 Initializing TTS on {self.device}...")
164
 
165
- # Check dependency again
166
  if SynthesizerTrn is None:
167
- raise ImportError("Class SynthesizerTrn chưa được định nghĩa. Kiểm tra lại thư mục src.")
168
 
169
- self.model_dir = self._get_model_dir()
170
  self.ckpt_path = find_latest_checkpoint(self.model_dir, "G")
171
  self.cfg_path = os.path.join(self.model_dir, "config.json")
172
 
173
- if not self.ckpt_path:
174
- raise FileNotFoundError(f"No checkpoint found in {self.model_dir}")
175
 
176
  self.tts = VietnameseTTS(self.ckpt_path, self.cfg_path, self.device)
177
  self.temp_dir = Path(tempfile.gettempdir()) / "neon_tts_cache"
178
  self.temp_dir.mkdir(parents=True, exist_ok=True)
179
 
180
- def _get_model_dir(self):
181
- return download_model()
182
-
183
- def synthesize(self, text, speaker, speed, noise_scale, noise_scale_w, sdp_ratio):
184
  try:
185
- if not text or not text.strip():
186
- return None, "⚠️ Empty input"
187
-
188
- key = md5_key(speaker, f"{speed:.2f}", text[:20], str(len(text)))
189
  out_path = self.temp_dir / f"{key}.wav"
190
 
191
- if out_path.exists():
192
- return str(out_path), "✅ Cached (From history)"
193
 
194
- audio, sr = self.tts.synthesize(
195
- text=text, speaker=speaker, length_scale=speed,
196
- noise_scale=noise_scale, noise_scale_w=noise_scale_w, sdp_ratio=sdp_ratio
197
- )
198
  sf.write(str(out_path), audio, sr)
199
- return str(out_path), "✅ Generated successfully"
200
  except Exception as e:
201
- return None, f"❌ Error: {str(e)}"
202
-
203
- # =========================================================
204
- # 4) MODEL LOGIC
205
- # =========================================================
206
- def find_latest_checkpoint(model_dir, prefix="G"):
207
- pattern = os.path.join(model_dir, f"{prefix}*.pth")
208
- checkpoints = glob.glob(pattern)
209
- if not checkpoints: return None
210
- checkpoints.sort(key=lambda x: int(re.search(rf"{prefix}(\d+)\.pth", x).group(1)) if re.search(rf"{prefix}(\d+)\.pth", x) else 0, reverse=True)
211
- return checkpoints[0]
212
-
213
- def download_model():
214
- from huggingface_hub import snapshot_download
215
- hf_repo = "valtecAI-team/valtec-tts-pretrained"
216
- cache_base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
217
- if os.name == "nt": cache_base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local"))
218
-
219
- model_dir = cache_base / "valtec_tts" / "models" / "vits-vietnamese"
220
- if (model_dir / "config.json").exists() and list(model_dir.glob("G_*.pth")):
221
- return str(model_dir)
222
-
223
- print(f"⬇️ Downloading {hf_repo}...")
224
- snapshot_download(repo_id=hf_repo, local_dir=str(model_dir))
225
- return str(model_dir)
226
 
227
  class VietnameseTTS:
228
  def __init__(self, ckpt, cfg, device="cpu"):
@@ -230,9 +191,7 @@ class VietnameseTTS:
230
  with open(cfg, "r", encoding="utf-8") as f: self.config = json.load(f)
231
  self.spk2id = self.config["data"]["spk2id"]
232
  self.speakers = list(self.spk2id.keys())
233
- self._load(ckpt)
234
 
235
- def _load(self, ckpt):
236
  self.model = SynthesizerTrn(
237
  len(symbols),
238
  self.config["data"]["filter_length"] // 2 + 1,
@@ -240,11 +199,12 @@ class VietnameseTTS:
240
  n_speakers=self.config["data"]["n_speakers"],
241
  **self.config["model"]
242
  ).to(self.device)
 
243
  state = torch.load(ckpt, map_location=self.device)["model"]
244
  self.model.load_state_dict({k.replace("module.", ""): v for k,v in state.items()}, strict=False)
245
  self.model.eval()
246
 
247
- def synthesize(self, text, speaker, **kwargs):
248
  from src.text import cleaned_text_to_sequence
249
  from src.nn import commons
250
 
@@ -265,83 +225,45 @@ class VietnameseTTS:
265
  with torch.no_grad():
266
  bert = torch.zeros(1024, len(phone_ids)).unsqueeze(0).to(self.device)
267
  ja_bert = torch.zeros(768, len(phone_ids)).unsqueeze(0).to(self.device)
268
- outputs = self.model.infer(x, x_len, sid, tone, lang, bert, ja_bert, **kwargs)
269
  audio = outputs[0][0,0].detach().cpu().numpy()
270
 
271
  return audio, self.config["data"]["sampling_rate"]
272
 
273
  # =========================================================
274
- # 5) UI CONSTRUCTION
275
  # =========================================================
276
  def create_ui(manager: TTSManager):
277
- def ui_header():
278
- return gr.HTML("""
279
- <div style="border-bottom: 1px solid rgba(255,255,255,0.08); padding-bottom: 20px; margin-bottom: 25px;">
280
- <h1 style="color: #38bdf8; margin:0; font-weight:800; font-size: 2rem;">🎛️ CVNSS4.0 TTS Studio</h1>
281
- <div style="color: #94a3b8; font-size: 1rem;">Fix: Auto-Healing Source • Expert Mode</div>
282
- </div>
283
- """)
284
-
285
- def ui_status_render(text, speaker, speed, dur, msg):
286
- return f"""
287
- <div class="statusCard">
288
- <div style="display:flex; flex-wrap:wrap; gap:8px;">
289
- <span class="pill">🎤 {speaker}</span>
290
- <span class="pill">⚡ {speed}x</span>
291
- <span class="pill">⏱️ {dur:.2f}s</span>
292
- </div>
293
- <div class="alert {'alertOk' if '✅' in msg else 'alertWarn'}">{msg}</div>
294
- </div>
295
- """
296
-
297
- def run_inference(text, speaker, speed):
298
- start_t = time.time()
299
- audio_path, msg = manager.synthesize(text, speaker, speed, 0.667, 0.8, 0.2)
300
- duration = time.time() - start_t
301
- html_status = ui_status_render(text, speaker, speed, duration, msg)
302
- return audio_path, html_status
303
 
304
- with gr.Blocks(css=NEON_CSS, title="Neon TTS Expert") as app:
305
- ui_header()
306
- with gr.Tabs():
307
- with gr.Tab("⚡ Chế độ Nhanh"):
308
- with gr.Row():
309
- with gr.Column(scale=2):
310
- with gr.Group(elem_classes=["panelNeon"]):
311
- gr.HTML('<div class="panelTitle">📝 Văn bản đầu vào</div>')
312
- txt_basic = gr.Textbox(show_label=False, lines=5, placeholder="Nhập văn bản tiếng Việt...", value="Xin chào, hệ thống đã tự động sửa lỗi thiếu file nguồn.")
313
-
314
- with gr.Row():
315
- # === FIX DROPDOWN HERE ===
316
- spk_basic = gr.Dropdown(
317
- choices=manager.tts.speakers,
318
- value=manager.tts.speakers[0] if manager.tts.speakers else None,
319
- label="Người đọc",
320
- interactive=True,
321
- scale=2
322
- )
323
- speed_basic = gr.Slider(0.1, 2.0, 1.0, 0.1, label="Tốc độ", scale=2)
324
-
325
- btn_basic = gr.Button("🔊 Đọc Ngay", variant="primary")
326
-
327
- with gr.Column(scale=1):
328
- with gr.Group(elem_classes=["panelNeon"]):
329
- gr.HTML('<div class="panelTitle">🎧 Kết quả</div>')
330
- out_audio = gr.Audio(label="Audio Output", type="filepath")
331
- out_status = gr.HTML()
332
-
333
- btn_basic.click(run_inference, [txt_basic, spk_basic, speed_basic], [out_audio, out_status])
334
  return app
335
 
336
  # =========================================================
337
- # 6) MAIN EXECUTION
338
  # =========================================================
339
  if __name__ == "__main__":
340
  try:
341
  manager = TTSManager()
342
- check_viphoneme()
343
  app = create_ui(manager)
344
- print("🚀 Launching App...")
345
  app.launch()
346
  except Exception as e:
347
- print(f"🔥 Critical Start Error: {e}")
 
2
  # -*- coding: utf-8 -*-
3
 
4
  """
5
+ CVNSS4.0 Vietnamese TTS Studio
6
+ - Fix: Python 3.12 Compatibility (Missing 'imp' module)
7
+ - Fix: Auto-download 'src'
8
  """
9
 
10
  import os
11
  import sys
12
+ import types
13
+ import importlib
14
+ import importlib.util
15
+ import hashlib
16
+ import tempfile
17
  import json
18
  import time
19
  import glob
20
  import re
 
 
 
21
  import shutil
22
+ import subprocess
23
  from pathlib import Path
24
 
25
+ # =========================================================
26
+ # 0) CRITICAL PATCH: FIX PYTHON 3.12 'imp' ERROR
27
+ # =========================================================
28
+ # Python 3.12 đã xóa module 'imp', nhưng code TTS cũ vẫn gọi nó.
29
+ # Đoạn code này tạo ra một module 'imp' giả để đánh lừa hệ thống.
30
+ try:
31
+ import imp
32
+ except ImportError:
33
+ print("🔧 Đang vá lỗi tương thích Python 3.12+ (Mocking 'imp' module)...")
34
+ imp = types.ModuleType('imp')
35
+ imp.new_module = types.ModuleType
36
+ imp.reload = importlib.reload
37
+
38
+ # Hàm find_module giả lập (thường được dùng trong code cũ)
39
+ def _find_module(name, path=None):
40
+ spec = importlib.util.find_spec(name, path)
41
+ if spec is None:
42
+ raise ImportError(f"No module named {name}")
43
+ return (None, spec.origin, ("", "", 0)) # Trả về tuple giả
44
+
45
+ imp.find_module = _find_module
46
+
47
+ # Hàm load_module giả lập
48
+ def _load_module(name, file, pathname, description):
49
+ spec = importlib.util.spec_from_file_location(name, pathname)
50
+ module = importlib.util.module_from_spec(spec)
51
+ spec.loader.exec_module(module)
52
+ sys.modules[name] = module
53
+ return module
54
+
55
+ imp.load_module = _load_module
56
+ sys.modules['imp'] = imp
57
+ print("✅ Đã vá xong module 'imp'.")
58
+
59
+ # Các import nặng khác sau khi đã vá lỗi
60
  import torch
61
  import numpy as np
62
  import soundfile as sf
63
  import gradio as gr
64
 
65
  # =========================================================
66
+ # 1) AUTO-HEALING: DOWNLOAD MISSING CORE MODULES
67
  # =========================================================
68
  def setup_environment():
69
  """Tự động tải thư mục src nếu bị thiếu"""
70
  if not os.path.exists("src"):
71
+ print("🔄 Phát hiện thiếu thư mục 'src'. Đang tải mã nguồn cốt lõi...")
72
  try:
73
+ # Clone repo chứa src
74
  subprocess.run(
75
  ["git", "clone", "https://huggingface.co/spaces/valtecAI-team/valtec-vietnamese-tts", "temp_repo"],
76
  check=True
77
  )
78
+ # Di chuyển thư mục src
79
  if os.path.exists("temp_repo/src"):
80
+ if os.path.exists("src"): shutil.rmtree("src") # Xóa cũ nếu lỗi
81
  shutil.move("temp_repo/src", "./src")
82
  print("✅ Đã cài đặt xong 'src'.")
83
  else:
84
  print("❌ Không tìm thấy 'src' trong repo đã tải.")
85
 
 
86
  shutil.rmtree("temp_repo", ignore_errors=True)
 
87
  except Exception as e:
88
  print(f"❌ Lỗi khi tải mã nguồn: {e}")
 
89
 
90
+ # Chạy setup
91
  setup_environment()
92
 
93
  # Add src to path
94
  sys.path.insert(0, str(Path(__file__).parent))
95
 
96
+ # Import core modules
97
  try:
98
+ # Import theo thứ tự để tránh lỗi vòng lặp
99
+ from src.text.symbols import symbols
100
  from src.vietnamese.text_processor import process_vietnamese_text
101
  from src.vietnamese.phonemizer import text_to_phonemes, VIPHONEME_AVAILABLE
102
  from src.models.synthesizer import SynthesizerTrn
 
103
  print("✅ Core modules imported successfully.")
104
  except ImportError as e:
105
+ print(f"🔥 Vẫn còn lỗi Import: {e}")
106
+ # Fallback objects
 
107
  VIPHONEME_AVAILABLE = False
108
  symbols = []
109
  SynthesizerTrn = None
110
 
 
111
  # =========================================================
112
+ # 2) SYSTEM CONFIGURATION & CSS
113
  # =========================================================
114
  NEON_CSS = r"""
115
+ :root { --bg-dark: #0f172a; --text-primary: #e2e8f0; --neon-cyan: #06b6d4; }
116
+ body, .gradio-container, .app { background: radial-gradient(circle at 50% 0%, #1e293b 0%, #0f172a 100%) !important; color: white !important; }
117
+ .panelNeon { background: rgba(30, 41, 59, 0.7); border: 1px solid rgba(255,255,255,0.08); border-radius: 16px; padding: 20px; margin-bottom: 20px; }
118
+ .panelNeon textarea, .panelNeon input, .panelNeon select { background: #f1f5f9 !important; color: #0f4c81 !important; font-weight: 500; }
119
+ button.primary { background: linear-gradient(135deg, #06b6d4 0%, #3b82f6 100%) !important; color: white; font-weight: bold; border: none; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  """
121
 
122
  # =========================================================
123
+ # 3) UTILITIES
124
  # =========================================================
125
+ def md5_key(*parts): return hashlib.md5("|".join(parts).encode("utf-8")).hexdigest()
 
 
 
 
126
 
127
+ def find_latest_checkpoint(model_dir, prefix="G"):
128
+ pattern = os.path.join(model_dir, f"{prefix}*.pth")
129
+ checkpoints = glob.glob(pattern)
130
+ if not checkpoints: return None
131
+ checkpoints.sort(key=lambda x: int(re.search(rf"{prefix}(\d+)\.pth", x).group(1)) if re.search(rf"{prefix}(\d+)\.pth", x) else 0, reverse=True)
132
+ return checkpoints[0]
133
+
134
+ def download_model():
135
+ from huggingface_hub import snapshot_download
136
+ hf_repo = "valtecAI-team/valtec-tts-pretrained"
137
+ cache_base = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
138
+ if os.name == "nt": cache_base = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local"))
139
+ model_dir = cache_base / "valtec_tts" / "models" / "vits-vietnamese"
140
+
141
+ if (model_dir / "config.json").exists() and list(model_dir.glob("G_*.pth")):
142
+ return str(model_dir)
143
+
144
+ print(f"⬇️ Downloading Model {hf_repo}...")
145
+ try:
146
+ snapshot_download(repo_id=hf_repo, local_dir=str(model_dir))
147
+ return str(model_dir)
148
+ except Exception as e:
149
+ print(f"Lỗi tải model: {e}")
150
+ return str(model_dir) # Trả về đường dẫn dù lỗi để tránh crash ngay
151
 
152
  # =========================================================
153
+ # 4) MANAGER & MODEL WRAPPER
154
  # =========================================================
155
  class TTSManager:
156
  def __init__(self):
157
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
158
  print(f"🔧 Initializing TTS on {self.device}...")
159
 
 
160
  if SynthesizerTrn is None:
161
+ raise RuntimeError("Lỗi nghiêm trọng: Không thể tải class SynthesizerTrn do lỗi import.")
162
 
163
+ self.model_dir = download_model()
164
  self.ckpt_path = find_latest_checkpoint(self.model_dir, "G")
165
  self.cfg_path = os.path.join(self.model_dir, "config.json")
166
 
167
+ if not self.ckpt_path or not os.path.exists(self.cfg_path):
168
+ raise FileNotFoundError("Không tìm thấy Model Checkpoint hoặc Config.")
169
 
170
  self.tts = VietnameseTTS(self.ckpt_path, self.cfg_path, self.device)
171
  self.temp_dir = Path(tempfile.gettempdir()) / "neon_tts_cache"
172
  self.temp_dir.mkdir(parents=True, exist_ok=True)
173
 
174
+ def synthesize(self, text, speaker, speed):
 
 
 
175
  try:
176
+ if not text: return None, "⚠️ Chưa nhập nội dung"
177
+ key = md5_key(speaker, str(speed), text[:30])
 
 
178
  out_path = self.temp_dir / f"{key}.wav"
179
 
180
+ if out_path.exists(): return str(out_path), "✅ Lấy từ Cache"
 
181
 
182
+ audio, sr = self.tts.synthesize(text, speaker, speed)
 
 
 
183
  sf.write(str(out_path), audio, sr)
184
+ return str(out_path), "✅ Tạo thành công"
185
  except Exception as e:
186
+ return None, f"❌ Lỗi: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  class VietnameseTTS:
189
  def __init__(self, ckpt, cfg, device="cpu"):
 
191
  with open(cfg, "r", encoding="utf-8") as f: self.config = json.load(f)
192
  self.spk2id = self.config["data"]["spk2id"]
193
  self.speakers = list(self.spk2id.keys())
 
194
 
 
195
  self.model = SynthesizerTrn(
196
  len(symbols),
197
  self.config["data"]["filter_length"] // 2 + 1,
 
199
  n_speakers=self.config["data"]["n_speakers"],
200
  **self.config["model"]
201
  ).to(self.device)
202
+
203
  state = torch.load(ckpt, map_location=self.device)["model"]
204
  self.model.load_state_dict({k.replace("module.", ""): v for k,v in state.items()}, strict=False)
205
  self.model.eval()
206
 
207
+ def synthesize(self, text, speaker, speed):
208
  from src.text import cleaned_text_to_sequence
209
  from src.nn import commons
210
 
 
225
  with torch.no_grad():
226
  bert = torch.zeros(1024, len(phone_ids)).unsqueeze(0).to(self.device)
227
  ja_bert = torch.zeros(768, len(phone_ids)).unsqueeze(0).to(self.device)
228
+ outputs = self.model.infer(x, x_len, sid, tone, lang, bert, ja_bert, noise_scale=0.667, noise_scale_w=0.8, length_scale=speed)
229
  audio = outputs[0][0,0].detach().cpu().numpy()
230
 
231
  return audio, self.config["data"]["sampling_rate"]
232
 
233
  # =========================================================
234
+ # 5) UI
235
  # =========================================================
236
  def create_ui(manager: TTSManager):
237
+ def run(text, spk, spd):
238
+ start = time.time()
239
+ path, msg = manager.synthesize(text, spk, spd)
240
+ dur = time.time() - start
241
+ return path, f"<div style='padding:10px; color:#38bdf8'>⏱️ {dur:.2f}s | {msg}</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
+ with gr.Blocks(css=NEON_CSS, title="Fixed TTS") as app:
244
+ gr.Markdown("## 🎛️ CVNSS4.0 TTS (Python 3.12 Patched)")
245
+
246
+ with gr.Row():
247
+ with gr.Column():
248
+ with gr.Group(elem_classes=["panelNeon"]):
249
+ txt = gr.Textbox(label="Văn bản", value="Xin chào, hệ thống đã sửa lỗi thư viện imp.", lines=4)
250
+ spk = gr.Dropdown(choices=manager.tts.speakers, value=manager.tts.speakers[0], label="Người đọc")
251
+ spd = gr.Slider(0.5, 2.0, 1.0, step=0.1, label="Tốc độ")
252
+ btn = gr.Button("Đọc ngay", variant="primary")
253
+ with gr.Column():
254
+ out_audio = gr.Audio(label="Audio", type="filepath")
255
+ out_msg = gr.HTML()
256
+
257
+ btn.click(run, [txt, spk, spd], [out_audio, out_msg])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  return app
259
 
260
  # =========================================================
261
+ # 6) MAIN
262
  # =========================================================
263
  if __name__ == "__main__":
264
  try:
265
  manager = TTSManager()
 
266
  app = create_ui(manager)
 
267
  app.launch()
268
  except Exception as e:
269
+ print(f"🔥 Lỗi khởi động cuối cùng: {e}")