Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# π v136: ZEROGPU HOPPER ULTIMATE (PERSISTENT GPU)
|
| 2 |
try:
|
| 3 |
import spaces
|
| 4 |
except ImportError:
|
|
@@ -21,17 +20,9 @@ import time
|
|
| 21 |
import gc
|
| 22 |
import traceback
|
| 23 |
import soundfile as sf
|
| 24 |
-
from faster_whisper import WhisperModel
|
| 25 |
-
|
| 26 |
-
# π‘οΈ 0. INFRASTRUCTURE PURIST (v136)
|
| 27 |
import numpy as np
|
| 28 |
-
import uvicorn
|
| 29 |
-
from fastapi import FastAPI, Request
|
| 30 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 31 |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
| 32 |
from TTS.api import TTS
|
| 33 |
-
import gradio as gr
|
| 34 |
-
import json # Added for gradio interface
|
| 35 |
|
| 36 |
# ==========================================
|
| 37 |
# π v137 - HOPPER NATIVE (Transformers + Persistent VRAM)
|
|
@@ -48,19 +39,18 @@ os.environ["PYTHONWARNINGS"] = "ignore"
|
|
| 48 |
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
| 49 |
torch.backends.cuda.matmul.allow_tf32 = False
|
| 50 |
torch.backends.cudnn.allow_tf32 = False
|
| 51 |
-
torch.use_deterministic_algorithms(False) # Some kernels might need this, but let's keep it flexible
|
| 52 |
|
| 53 |
app = FastAPI()
|
| 54 |
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
| 55 |
|
| 56 |
-
MODELS = {"stt": None, "tts": None
|
| 57 |
|
| 58 |
def load_gpu_models():
|
| 59 |
"""Persistent loading into GPU VRAM. Only runs once per worker."""
|
| 60 |
global MODELS
|
| 61 |
device = "cuda"
|
| 62 |
|
| 63 |
-
if MODELS
|
| 64 |
print("--- [v137] π₯ LOADING NATIVE WHISPER (Large-v3-Turbo) ---")
|
| 65 |
model_id = "openai/whisper-large-v3-turbo"
|
| 66 |
torch_dtype = torch.float16
|
|
@@ -82,7 +72,7 @@ def load_gpu_models():
|
|
| 82 |
)
|
| 83 |
print("--- [v137] β
WHISPER LOADED ---")
|
| 84 |
|
| 85 |
-
if MODELS
|
| 86 |
print("--- [v137] π₯ LOADING XTTS (VRAM STABLE) ---")
|
| 87 |
MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
| 88 |
print("--- [v137] β
XTTS LOADED ---")
|
|
@@ -179,7 +169,8 @@ def health(): return {"status": "ok", "v": "137"}
|
|
| 179 |
|
| 180 |
# Gradio interface for debugging
|
| 181 |
with gr.Blocks() as demo:
|
| 182 |
-
gr.Markdown("
|
|
|
|
| 183 |
with gr.Row():
|
| 184 |
audio_in = gr.Audio(type="filepath", label="Input Audio")
|
| 185 |
stt_btn = gr.Button("STT")
|
|
|
|
|
|
|
| 1 |
try:
|
| 2 |
import spaces
|
| 3 |
except ImportError:
|
|
|
|
| 20 |
import gc
|
| 21 |
import traceback
|
| 22 |
import soundfile as sf
|
|
|
|
|
|
|
|
|
|
| 23 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
| 24 |
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
|
| 25 |
from TTS.api import TTS
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# ==========================================
|
| 28 |
# π v137 - HOPPER NATIVE (Transformers + Persistent VRAM)
|
|
|
|
| 39 |
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
| 40 |
torch.backends.cuda.matmul.allow_tf32 = False
|
| 41 |
torch.backends.cudnn.allow_tf32 = False
|
|
|
|
| 42 |
|
| 43 |
app = FastAPI()
|
| 44 |
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
| 45 |
|
| 46 |
+
MODELS = {"stt": None, "tts": None}
|
| 47 |
|
| 48 |
def load_gpu_models():
|
| 49 |
"""Persistent loading into GPU VRAM. Only runs once per worker."""
|
| 50 |
global MODELS
|
| 51 |
device = "cuda"
|
| 52 |
|
| 53 |
+
if MODELS.get("stt") is None:
|
| 54 |
print("--- [v137] π₯ LOADING NATIVE WHISPER (Large-v3-Turbo) ---")
|
| 55 |
model_id = "openai/whisper-large-v3-turbo"
|
| 56 |
torch_dtype = torch.float16
|
|
|
|
| 72 |
)
|
| 73 |
print("--- [v137] β
WHISPER LOADED ---")
|
| 74 |
|
| 75 |
+
if MODELS.get("tts") is None:
|
| 76 |
print("--- [v137] π₯ LOADING XTTS (VRAM STABLE) ---")
|
| 77 |
MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
| 78 |
print("--- [v137] β
XTTS LOADED ---")
|
|
|
|
| 169 |
|
| 170 |
# Gradio interface for debugging
|
| 171 |
with gr.Blocks() as demo:
|
| 172 |
+
gr.Markdown("# π v137 HOPPER NATIVE (H200 Stable)")
|
| 173 |
+
gr.Markdown("Direct GPU path | Transformers Whisper | XTTS-v2 Singleton")
|
| 174 |
with gr.Row():
|
| 175 |
audio_in = gr.Audio(type="filepath", label="Input Audio")
|
| 176 |
stt_btn = gr.Button("STT")
|