Spaces:

nhantrungsp
/

FSub

Running on Zero

App Files Files Community

nhantrungsp commited on 1 day ago

Commit

d46de93

verified ·

1 Parent(s): 847b717

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +20 -23

gradio_app.py CHANGED Viewed

@@ -1,21 +1,24 @@
-import gradio as gr
-import soundfile as sf
-import tempfile
-import torch
-from vieneu_tts import VieNeuTTS
 import os
 import time
 import threading
 import pickle
 import hashlib
 import numpy as np
 from pydub import AudioSegment
 from fastapi import FastAPI, HTTPException
-from fastapi.responses import FileResponse
 from pydantic import BaseModel
-import base64
-import io
-import spaces  # <--- THÊM THƯ VIỆN NÀY
 # --- KHỞI TẠO FASTAPI ---
 app = FastAPI()
@@ -23,7 +26,6 @@ app = FastAPI()
 print("⏳ Đang khởi động VieNeu-TTS...")
 # --- 1. SETUP MODEL ---
-# Trên ZeroGPU, ban đầu có thể nó nhận là CPU, nhưng @spaces.GPU sẽ lo phần chuyển đổi sau
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"🖥️ Sử dụng thiết bị (Global): {device.upper()}")
@@ -54,6 +56,7 @@ def save_cache_to_disk(cache_key, ref_codes):
 # Load Model
 try:
     tts = VieNeuTTS(
         backbone_repo="pnnbao-ump/VieNeu-TTS",
         backbone_device=device,
@@ -81,7 +84,7 @@ VOICE_SAMPLES = {
 # --- 3. CORE LOGIC (Dùng chung cho cả API và UI) ---
-# QUAN TRỌNG: Thêm @spaces.GPU vào hàm này để báo cho HF biết đây là hàm cần GPU
 @spaces.GPU
 def core_synthesize(text, voice_choice, speed_factor):
     # Lấy thông tin giọng
@@ -104,16 +107,16 @@ def core_synthesize(text, voice_choice, speed_factor):
         else:
             ref_codes = load_cache_from_disk(cache_key)
             if ref_codes is None:
-                # Đảm bảo model đang ở đúng device trước khi encode
                 if torch.cuda.is_available():
-                    # Move model components to GPU if needed inside the decorated function
-                    # (Usually VieNeuTTS handles this based on init, but we double check)
-                    pass
                 ref_codes = tts.encode_reference(ref_audio_path)
                 save_cache_to_disk(cache_key, ref_codes)
             reference_cache[cache_key] = ref_codes
     # Infer
     wav = tts.infer(text, ref_codes, ref_text_raw)
     # Speed
@@ -137,6 +140,8 @@ def core_synthesize(text, voice_choice, speed_factor):
 # Hàm riêng cho Custom Voice cũng cần GPU
 @spaces.GPU
 def custom_synthesize_logic(text, ref_audio_path, ref_text_raw):
     ref_codes = tts.encode_reference(ref_audio_path)
     wav = tts.infer(text, ref_codes, ref_text_raw)
     return wav
@@ -175,19 +180,14 @@ async def fast_tts(request: FastTTSRequest):
         raise HTTPException(status_code=500, detail=str(e))
 # --- 5. GRADIO UI SETUP ---
-# Dùng theme Soft để tránh lỗi
 theme = gr.themes.Soft()
-# CSS
 css = ".container { max-width: 900px; margin: auto; }"
 def ui_synthesize(text, voice, custom_audio, custom_text, mode, speed):
     try:
         start = time.time()
-        # Logic riêng cho UI (hỗ trợ custom voice)
         if mode == "custom_mode":
             wav = custom_synthesize_logic(text, custom_audio, custom_text)
-            # (Bỏ qua speed control cho custom để code gọn)
         else:
             wav = core_synthesize(text, voice, speed)
@@ -219,7 +219,6 @@ with gr.Blocks(theme=theme, css=css, title="VieNeu-TTS") as demo:
             out_audio = gr.Audio(label="Kết quả", autoplay=True)
             out_status = gr.Textbox(label="Trạng thái")
-    # Ẩn hiện mode
     mode_state = gr.Textbox(visible=False, value="preset_mode")
     tabs.children[0].select(lambda: "preset_mode", None, mode_state)
     tabs.children[1].select(lambda: "custom_mode", None, mode_state)
@@ -227,11 +226,9 @@ with gr.Blocks(theme=theme, css=css, title="VieNeu-TTS") as demo:
     btn.click(ui_synthesize, [inp_text, inp_voice, inp_audio, inp_ref_text, mode_state, inp_speed], [out_audio, out_status])
 # --- 6. MOUNT GRADIO VÀO FASTAPI ---
-# Đây là bước quan trọng nhất để chạy cả 2 cùng lúc
 app = gr.mount_gradio_app(app, demo, path="/")
 # --- 7. CHẠY SERVER ---
 if __name__ == "__main__":
     import uvicorn
-    # 0.0.0.0 Mở port ra ngoài internet
     uvicorn.run(app, host="0.0.0.0", port=7860)

+import spaces  # <--- QUAN TRỌNG: PHẢI ĐỂ DÒNG ĐẦU TIÊN
 import os
 import time
 import threading
 import pickle
 import hashlib
+import base64
+import io
+import tempfile
 import numpy as np
+# Các thư viện khác import sau spaces
+import torch
+import soundfile as sf
 from pydub import AudioSegment
+import gradio as gr
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+# Import thư viện nội bộ
+from vieneu_tts import VieNeuTTS
 # --- KHỞI TẠO FASTAPI ---
 app = FastAPI()
 print("⏳ Đang khởi động VieNeu-TTS...")
 # --- 1. SETUP MODEL ---
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"🖥️ Sử dụng thiết bị (Global): {device.upper()}")
 # Load Model
 try:
+    print("📦 Đang tải model vào bộ nhớ...")
     tts = VieNeuTTS(
         backbone_repo="pnnbao-ump/VieNeu-TTS",
         backbone_device=device,
 # --- 3. CORE LOGIC (Dùng chung cho cả API và UI) ---
+# QUAN TRỌNG: Decorator GPU
 @spaces.GPU
 def core_synthesize(text, voice_choice, speed_factor):
     # Lấy thông tin giọng
         else:
             ref_codes = load_cache_from_disk(cache_key)
             if ref_codes is None:
+                # Đảm bảo dọn dẹp bộ nhớ trước khi encode
                 if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
                 ref_codes = tts.encode_reference(ref_audio_path)
                 save_cache_to_disk(cache_key, ref_codes)
             reference_cache[cache_key] = ref_codes
     # Infer
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
     wav = tts.infer(text, ref_codes, ref_text_raw)
     # Speed
 # Hàm riêng cho Custom Voice cũng cần GPU
 @spaces.GPU
 def custom_synthesize_logic(text, ref_audio_path, ref_text_raw):
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
     ref_codes = tts.encode_reference(ref_audio_path)
     wav = tts.infer(text, ref_codes, ref_text_raw)
     return wav
         raise HTTPException(status_code=500, detail=str(e))
 # --- 5. GRADIO UI SETUP ---
 theme = gr.themes.Soft()
 css = ".container { max-width: 900px; margin: auto; }"
 def ui_synthesize(text, voice, custom_audio, custom_text, mode, speed):
     try:
         start = time.time()
         if mode == "custom_mode":
             wav = custom_synthesize_logic(text, custom_audio, custom_text)
         else:
             wav = core_synthesize(text, voice, speed)
             out_audio = gr.Audio(label="Kết quả", autoplay=True)
             out_status = gr.Textbox(label="Trạng thái")
     mode_state = gr.Textbox(visible=False, value="preset_mode")
     tabs.children[0].select(lambda: "preset_mode", None, mode_state)
     tabs.children[1].select(lambda: "custom_mode", None, mode_state)
     btn.click(ui_synthesize, [inp_text, inp_voice, inp_audio, inp_ref_text, mode_state, inp_speed], [out_audio, out_status])
 # --- 6. MOUNT GRADIO VÀO FASTAPI ---
 app = gr.mount_gradio_app(app, demo, path="/")
 # --- 7. CHẠY SERVER ---
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)