Spaces:

nhantrungsp
/

FSub

Running on Zero

App Files Files Community

nhantrungsp commited on 1 day ago

Commit

847b717

verified ·

1 Parent(s): 962fbc7

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +21 -6

gradio_app.py CHANGED Viewed

@@ -15,6 +15,7 @@ from fastapi.responses import FileResponse
 from pydantic import BaseModel
 import base64
 import io
 # --- KHỞI TẠO FASTAPI ---
 app = FastAPI()
@@ -22,8 +23,9 @@ app = FastAPI()
 print("⏳ Đang khởi động VieNeu-TTS...")
 # --- 1. SETUP MODEL ---
 device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"🖥️ Sử dụng thiết bị: {device.upper()}")
 # Cache
 CACHE_DIR = "./reference_cache"
@@ -78,6 +80,9 @@ VOICE_SAMPLES = {
 }
 # --- 3. CORE LOGIC (Dùng chung cho cả API và UI) ---
 def core_synthesize(text, voice_choice, speed_factor):
     # Lấy thông tin giọng
     voice_info = VOICE_SAMPLES.get(voice_choice)
@@ -99,6 +104,11 @@ def core_synthesize(text, voice_choice, speed_factor):
         else:
             ref_codes = load_cache_from_disk(cache_key)
             if ref_codes is None:
                 ref_codes = tts.encode_reference(ref_audio_path)
                 save_cache_to_disk(cache_key, ref_codes)
             reference_cache[cache_key] = ref_codes
@@ -124,6 +134,13 @@ def core_synthesize(text, voice_choice, speed_factor):
     return wav
 # --- 4. API ENDPOINTS (Cho Client App kết nối) ---
 class FastTTSRequest(BaseModel):
     text: str
@@ -139,6 +156,7 @@ async def get_voices():
 async def fast_tts(request: FastTTSRequest):
     try:
         start = time.time()
         wav = core_synthesize(request.text, request.voice_choice, request.speed_factor)
         process_time = time.time() - start
@@ -168,10 +186,7 @@ def ui_synthesize(text, voice, custom_audio, custom_text, mode, speed):
         start = time.time()
         # Logic riêng cho UI (hỗ trợ custom voice)
         if mode == "custom_mode":
-            ref_audio_path = custom_audio
-            ref_text_raw = custom_text
-            ref_codes = tts.encode_reference(ref_audio_path) # Không cache custom
-            wav = tts.infer(text, ref_codes, ref_text_raw)
             # (Bỏ qua speed control cho custom để code gọn)
         else:
             wav = core_synthesize(text, voice, speed)
@@ -218,5 +233,5 @@ app = gr.mount_gradio_app(app, demo, path="/")
 # --- 7. CHẠY SERVER ---
 if __name__ == "__main__":
     import uvicorn
-    # Chạy uvicorn thay vì demo.launch()
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from pydantic import BaseModel
 import base64
 import io
+import spaces  # <--- THÊM THƯ VIỆN NÀY
 # --- KHỞI TẠO FASTAPI ---
 app = FastAPI()
 print("⏳ Đang khởi động VieNeu-TTS...")
 # --- 1. SETUP MODEL ---
+# Trên ZeroGPU, ban đầu có thể nó nhận là CPU, nhưng @spaces.GPU sẽ lo phần chuyển đổi sau
 device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"🖥️ Sử dụng thiết bị (Global): {device.upper()}")
 # Cache
 CACHE_DIR = "./reference_cache"
 }
 # --- 3. CORE LOGIC (Dùng chung cho cả API và UI) ---
+# QUAN TRỌNG: Thêm @spaces.GPU vào hàm này để báo cho HF biết đây là hàm cần GPU
+@spaces.GPU
 def core_synthesize(text, voice_choice, speed_factor):
     # Lấy thông tin giọng
     voice_info = VOICE_SAMPLES.get(voice_choice)
         else:
             ref_codes = load_cache_from_disk(cache_key)
             if ref_codes is None:
+                # Đảm bảo model đang ở đúng device trước khi encode
+                if torch.cuda.is_available():
+                    # Move model components to GPU if needed inside the decorated function
+                    # (Usually VieNeuTTS handles this based on init, but we double check)
+                    pass
                 ref_codes = tts.encode_reference(ref_audio_path)
                 save_cache_to_disk(cache_key, ref_codes)
             reference_cache[cache_key] = ref_codes
     return wav
+# Hàm riêng cho Custom Voice cũng cần GPU
+@spaces.GPU
+def custom_synthesize_logic(text, ref_audio_path, ref_text_raw):
+    ref_codes = tts.encode_reference(ref_audio_path)
+    wav = tts.infer(text, ref_codes, ref_text_raw)
+    return wav
 # --- 4. API ENDPOINTS (Cho Client App kết nối) ---
 class FastTTSRequest(BaseModel):
     text: str
 async def fast_tts(request: FastTTSRequest):
     try:
         start = time.time()
+        # Gọi hàm đã được decorate @spaces.GPU
         wav = core_synthesize(request.text, request.voice_choice, request.speed_factor)
         process_time = time.time() - start
         start = time.time()
         # Logic riêng cho UI (hỗ trợ custom voice)
         if mode == "custom_mode":
+            wav = custom_synthesize_logic(text, custom_audio, custom_text)
             # (Bỏ qua speed control cho custom để code gọn)
         else:
             wav = core_synthesize(text, voice, speed)
 # --- 7. CHẠY SERVER ---
 if __name__ == "__main__":
     import uvicorn
+    # 0.0.0.0 Mở port ra ngoài internet
     uvicorn.run(app, host="0.0.0.0", port=7860)