Spaces:

Arrcttacsrks
/

VieNeu-TTS-Run-On-CPU2

Running

App Files Files Community

Arrcttacsrks commited on about 17 hours ago

Commit

6b200af

verified ·

1 Parent(s): 86fdb38

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -40

app.py CHANGED Viewed

@@ -354,40 +354,63 @@ def decode_audio(codes_str, codec):
     return recon[0, 0, :]
 # --- MODEL LOADING ---
-print("📦 Đang tải model Q4 GGUF và Codec ONNX...")
-model_loaded = False
-backbone = None
-codec = None
-try:
-    backbone = Llama.from_pretrained(
-        repo_id=BACKBONE_REPO,
-        filename="*.gguf",
-        verbose=False,
-        n_gpu_layers=-1,
-        n_ctx=2048,
-        mlock=True,
-        flash_attn=True,
-    )
-    codec = NeuCodecOnnxDecoder.from_pretrained(CODEC_REPO)
-    print("✅ Model đã tải thành công!")
-    model_loaded = True
-except Exception as e:
-    import traceback
-    traceback.print_exc()
-    print(f"❌ Lỗi khi tải model: {e}")
-    model_loaded = False
 # --- SYNTHESIS FUNCTION (Internal) ---
 def synthesize_speech_internal(text, voice_choice):
     """Internal synthesis function không phụ thuộc UI"""
-    global backbone, codec, model_loaded
-    if not model_loaded:
-        print("❌ Model chưa được tải")
-        return None
     if not text or text.strip() == "":
         print("❌ Text rỗng")
@@ -451,7 +474,7 @@ def synthesize_speech_internal(text, voice_choice):
             )
             # Generate
-            output = backbone(
                 prompt,
                 max_tokens=2048,
                 temperature=1.0,
@@ -461,7 +484,7 @@ def synthesize_speech_internal(text, voice_choice):
             output_str = output["choices"][0]["text"]
             # Decode
-            chunk_wav = decode_audio(output_str, codec)
             if chunk_wav is not None and len(chunk_wav) > 0:
                 all_audio_segments.append(chunk_wav)
@@ -495,11 +518,13 @@ def synthesize_speech_internal(text, voice_choice):
 # --- SYNTHESIS FUNCTION (UI) ---
 def synthesize_speech(text, voice_choice):
     """Main synthesis function với UI feedback"""
-    global backbone, codec, model_loaded
-    if not model_loaded:
-        yield None, "⚠️ Model chưa tải. Vui lòng kiểm tra lỗi console!"
-        return
     if not text or text.strip() == "":
         yield None, "⚠️ Vui lòng nhập văn bản!"
@@ -553,7 +578,8 @@ def synthesize_speech(text, voice_choice):
     try:
         for i, chunk in enumerate(text_chunks):
-            yield None, f"⏳ Đang xử lý đoạn {i+1}/{total_chunks}... ({int((i/total_chunks)*100)}%)"
             # Phonemize
             ref_text_phoneme = phonemize_with_dict(ref_text_raw)
@@ -568,7 +594,7 @@ def synthesize_speech(text, voice_choice):
             )
             # Generate
-            output = backbone(
                 prompt,
                 max_tokens=2048,
                 temperature=1.0,
@@ -578,7 +604,7 @@ def synthesize_speech(text, voice_choice):
             output_str = output["choices"][0]["text"]
             # Decode
-            chunk_wav = decode_audio(output_str, codec)
             if chunk_wav is not None and len(chunk_wav) > 0:
                 all_audio_segments.append(chunk_wav)
@@ -598,11 +624,13 @@ def synthesize_speech(text, voice_choice):
             output_path = tmp.name
         process_time = time.time() - start_time
         # Lưu vào lịch sử
         permanent_path = add_to_history(raw_text, voice_choice, output_path, process_time, "Thành công")
-        yield permanent_path, f"✅ Hoàn tất! (Tổng thời gian: {process_time:.2f}s | RTF: {process_time/(len(final_wav)/SAMPLE_RATE):.3f})"
     except Exception as e:
         import traceback
@@ -731,7 +759,7 @@ EXAMPLES_LIST = [
     ["Thành phố Hồ Chí Minh luôn chuyển mình không ngừng với nhịp sống hối hả, năng động.", "Dung (nữ miền Nam)"],
 ]
-initial_status = f"✅ Model đã tải thành công! (Chạy trên **{DEVICE_INFO}**). Hỗ trợ xử lý background và lưu lịch sử." if model_loaded else "❌ Lỗi khi tải model."
 with gr.Blocks(title="VieNeu-TTS", theme=theme, css=css) as demo:
     with gr.Column(elem_classes="container"):
@@ -757,6 +785,22 @@ with gr.Blocks(title="VieNeu-TTS", theme=theme, css=css) as demo:
         """)
         status_banner = gr.Markdown(initial_status)
         # --- TABS ---
         with gr.Tabs():
@@ -778,7 +822,7 @@ with gr.Blocks(title="VieNeu-TTS", theme=theme, css=css) as demo:
                         )
                         with gr.Row():
-                            btn_generate = gr.Button("🎵 Bắt đầu tổng hợp", variant="primary", size="lg", interactive=model_loaded)
                             btn_clear = gr.Button("🗑️ Xóa", variant="secondary", size="lg")
                     with gr.Column(scale=2):
@@ -905,6 +949,7 @@ with gr.Blocks(title="VieNeu-TTS", theme=theme, css=css) as demo:
                 - ✅ Chia chunk thông minh
                 - ✅ Thread-safe operations
                 - ✅ Tự động xóa file cũ khi vượt quá 100 bản ghi
                 ### 📊 Thống kê
                 {get_processing_stats()}
@@ -969,6 +1014,7 @@ if __name__ == "__main__":
     print(f"📂 Lịch sử lưu tại: {HISTORY_DIR}")
     print(f"🎭 Số giọng mẫu: {len(VOICE_SAMPLES)}")
     print(f"⚙️  Chế độ: {DEVICE_INFO}")
     print(f"{'='*60}\n")
     demo.queue().launch(

     return recon[0, 0, :]
 # --- MODEL LOADING ---
+# Sử dụng class để giữ state của model
+class ModelManager:
+    def __init__(self):
+        self.backbone = None
+        self.codec = None
+        self.model_loaded = False
+        self.loading_lock = threading.Lock()
+        self.load_models()
+    def load_models(self):
+        """Tải models với thread safety"""
+        with self.loading_lock:
+            if self.model_loaded:
+                print("✅ Models đã được tải trước đó")
+                return True
+            print("📦 Đang tải model Q4 GGUF và Codec ONNX...")
+            try:
+                self.backbone = Llama.from_pretrained(
+                    repo_id=BACKBONE_REPO,
+                    filename="*.gguf",
+                    verbose=False,
+                    n_gpu_layers=-1,
+                    n_ctx=2048,
+                    mlock=True,
+                    flash_attn=True,
+                )
+                self.codec = NeuCodecOnnxDecoder.from_pretrained(CODEC_REPO)
+                self.model_loaded = True
+                print("✅ Model đã tải thành công!")
+                return True
+            except Exception as e:
+                import traceback
+                traceback.print_exc()
+                print(f"❌ Lỗi khi tải model: {e}")
+                self.model_loaded = False
+                return False
+    def is_ready(self):
+        """Kiểm tra xem model đã sẵn sàng chưa"""
+        return self.model_loaded and self.backbone is not None and self.codec is not None
+# Khởi tạo ModelManager singleton
+model_manager = ModelManager()
 # --- SYNTHESIS FUNCTION (Internal) ---
 def synthesize_speech_internal(text, voice_choice):
     """Internal synthesis function không phụ thuộc UI"""
+    if not model_manager.is_ready():
+        print("❌ Model chưa được tải, đang thử tải lại...")
+        if not model_manager.load_models():
+            print("❌ Không thể tải model")
+            return None
     if not text or text.strip() == "":
         print("❌ Text rỗng")
             )
             # Generate
+            output = model_manager.backbone(
                 prompt,
                 max_tokens=2048,
                 temperature=1.0,
             output_str = output["choices"][0]["text"]
             # Decode
+            chunk_wav = decode_audio(output_str, model_manager.codec)
             if chunk_wav is not None and len(chunk_wav) > 0:
                 all_audio_segments.append(chunk_wav)
 # --- SYNTHESIS FUNCTION (UI) ---
 def synthesize_speech(text, voice_choice):
     """Main synthesis function với UI feedback"""
+    if not model_manager.is_ready():
+        yield None, "⚠️ Model chưa tải. Đang thử tải lại..."
+        if not model_manager.load_models():
+            yield None, "❌ Không thể tải model. Vui lòng kiểm tra console!"
+            return
+        yield None, "✅ Model đã tải thành công!"
     if not text or text.strip() == "":
         yield None, "⚠️ Vui lòng nhập văn bản!"
     try:
         for i, chunk in enumerate(text_chunks):
+            progress = int((i/total_chunks)*100)
+            yield None, f"⏳ Đang xử lý đoạn {i+1}/{total_chunks}... ({progress}%)"
             # Phonemize
             ref_text_phoneme = phonemize_with_dict(ref_text_raw)
             )
             # Generate
+            output = model_manager.backbone(
                 prompt,
                 max_tokens=2048,
                 temperature=1.0,
             output_str = output["choices"][0]["text"]
             # Decode
+            chunk_wav = decode_audio(output_str, model_manager.codec)
             if chunk_wav is not None and len(chunk_wav) > 0:
                 all_audio_segments.append(chunk_wav)
             output_path = tmp.name
         process_time = time.time() - start_time
+        audio_duration = len(final_wav) / SAMPLE_RATE
+        rtf = process_time / audio_duration
         # Lưu vào lịch sử
         permanent_path = add_to_history(raw_text, voice_choice, output_path, process_time, "Thành công")
+        yield permanent_path, f"✅ Hoàn tất! (Thời gian: {process_time:.2f}s | Audio: {audio_duration:.2f}s | RTF: {rtf:.3f})"
     except Exception as e:
         import traceback
     ["Thành phố Hồ Chí Minh luôn chuyển mình không ngừng với nhịp sống hối hả, năng động.", "Dung (nữ miền Nam)"],
 ]
+initial_status = f"✅ Model đã tải thành công! (Chạy trên **{DEVICE_INFO}**). Hỗ trợ xử lý background và lưu lịch sử." if model_manager.is_ready() else "⚠️ Model đang được tải hoặc chưa sẵn sàng..."
 with gr.Blocks(title="VieNeu-TTS", theme=theme, css=css) as demo:
     with gr.Column(elem_classes="container"):
         """)
         status_banner = gr.Markdown(initial_status)
+        # Thêm nút kiểm tra model status
+        with gr.Row():
+            btn_check_model = gr.Button("🔍 Kiểm tra trạng thái Model", size="sm", variant="secondary")
+            model_status_output = gr.Textbox(label="", show_label=False, interactive=False, container=False)
+        def check_model_status():
+            if model_manager.is_ready():
+                return f"✅ Model sẵn sàng | Backbone: {'Loaded' if model_manager.backbone else 'Not loaded'} | Codec: {'Loaded' if model_manager.codec else 'Not loaded'}"
+            else:
+                return "⚠️ Model chưa sẵn sàng. Nhấn nút tổng hợp để tự động tải model."
+        btn_check_model.click(
+            fn=check_model_status,
+            outputs=model_status_output
+        )
         # --- TABS ---
         with gr.Tabs():
                         )
                         with gr.Row():
+                            btn_generate = gr.Button("🎵 Bắt đầu tổng hợp", variant="primary", size="lg")
                             btn_clear = gr.Button("🗑️ Xóa", variant="secondary", size="lg")
                     with gr.Column(scale=2):
                 - ✅ Chia chunk thông minh
                 - ✅ Thread-safe operations
                 - ✅ Tự động xóa file cũ khi vượt quá 100 bản ghi
+                - ✅ Auto-reload model sau khi reload trang
                 ### 📊 Thống kê
                 {get_processing_stats()}
     print(f"📂 Lịch sử lưu tại: {HISTORY_DIR}")
     print(f"🎭 Số giọng mẫu: {len(VOICE_SAMPLES)}")
     print(f"⚙️  Chế độ: {DEVICE_INFO}")
+    print(f"✅ Model status: {'Ready' if model_manager.is_ready() else 'Not ready'}")
     print(f"{'='*60}\n")
     demo.queue().launch(