TGPro1 commited on
Commit
7454cce
Β·
verified Β·
1 Parent(s): 5713fb8

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +40 -33
app.py CHANGED
@@ -1,4 +1,10 @@
1
- print("--- [v137-clean] πŸš€ BOOTING APP.PY ---")
 
 
 
 
 
 
2
  try:
3
  import spaces
4
  except ImportError:
@@ -11,16 +17,13 @@ except ImportError:
11
  import gradio as gr
12
  from fastapi import FastAPI, Request
13
  from fastapi.middleware.cors import CORSMiddleware
14
- import uvicorn
15
  import base64
16
  import torch
17
- import os
18
  import tempfile
19
  import json
20
  import time
21
  import gc
22
  import traceback
23
- import soundfile as sf
24
  import numpy as np
25
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
26
  from TTS.api import TTS
@@ -35,9 +38,6 @@ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
35
  torch.backends.cuda.matmul.allow_tf32 = False
36
  torch.backends.cudnn.allow_tf32 = False
37
 
38
- app = FastAPI()
39
- app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
40
-
41
  MODELS = {"stt": None, "tts": None}
42
 
43
  def load_gpu_models():
@@ -48,7 +48,6 @@ def load_gpu_models():
48
  if MODELS.get("stt") is None:
49
  print("--- [v137] πŸ“₯ LOADING NATIVE WHISPER (Large-v3-Turbo) ---")
50
  model_id = "openai/whisper-large-v3-turbo"
51
- # Load model with SDPA (Flash Attention) for H200
52
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
53
  model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True
54
  ).to(device)
@@ -147,7 +146,33 @@ def core_process(request_dict):
147
  print(f"--- [v137] ✨ DONE ({time.time()-t1:.1f}s) ---")
148
  torch.cuda.empty_cache()
149
 
150
- @app.post("/process")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  async def api_process(request: Request):
152
  try:
153
  data = await request.json()
@@ -155,29 +180,11 @@ async def api_process(request: Request):
155
  return core_process(data)
156
  except Exception as e: return {"error": str(e)}
157
 
158
- @app.get("/health")
159
- def health(): return {"status": "ok", "v": "137"}
160
-
161
- # Named function for Gradio to avoid lambda schema issues
162
- def gradio_stt(audio_path):
163
- if not audio_path: return ""
164
- with open(audio_path, "rb") as f:
165
- b64 = base64.b64encode(f.read()).decode()
166
- res = core_process({"action": "stt", "file": b64})
167
- return res.get("text", f"Error: {res.get('error')}")
168
-
169
- with gr.Blocks() as demo:
170
- gr.Markdown("# πŸš€ v137 HOPPER NATIVE (H200 Stable)")
171
- gr.Markdown("Direct GPU path | Transformers Whisper | XTTS-v2 Singleton")
172
- with gr.Row():
173
- audio_in = gr.Audio(type="filepath", label="Input Audio")
174
- stt_btn = gr.Button("STT")
175
- txt_out = gr.Textbox(label="STT Result")
176
- stt_btn.click(fn=gradio_stt, inputs=audio_in, outputs=txt_out)
177
-
178
- print("--- [v137-clean] πŸ”§ MOUNTING GRADIO ---")
179
- app = gr.mount_gradio_app(app, demo, path="/")
180
 
 
181
  if __name__ == "__main__":
182
- print("--- [v137-clean] πŸ“‘ STARTING UVICORN ---")
183
- uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")
 
1
+ import os
2
+ import sys
3
+
4
+ # --- [v137-clean-v3] πŸš€ INITIALIZING SYSTEM ---
5
+ print(f"PYTHON VERSION: {sys.version}")
6
+ print(f"WORKING DIR: {os.getcwd()}")
7
+
8
  try:
9
  import spaces
10
  except ImportError:
 
17
  import gradio as gr
18
  from fastapi import FastAPI, Request
19
  from fastapi.middleware.cors import CORSMiddleware
 
20
  import base64
21
  import torch
 
22
  import tempfile
23
  import json
24
  import time
25
  import gc
26
  import traceback
 
27
  import numpy as np
28
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
29
  from TTS.api import TTS
 
38
  torch.backends.cuda.matmul.allow_tf32 = False
39
  torch.backends.cudnn.allow_tf32 = False
40
 
 
 
 
41
  MODELS = {"stt": None, "tts": None}
42
 
43
  def load_gpu_models():
 
48
  if MODELS.get("stt") is None:
49
  print("--- [v137] πŸ“₯ LOADING NATIVE WHISPER (Large-v3-Turbo) ---")
50
  model_id = "openai/whisper-large-v3-turbo"
 
51
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
52
  model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True
53
  ).to(device)
 
146
  print(f"--- [v137] ✨ DONE ({time.time()-t1:.1f}s) ---")
147
  torch.cuda.empty_cache()
148
 
149
+ # --- Gradio UI Logic ---
150
+ def gradio_stt_fn(audio_path) -> str:
151
+ if not audio_path: return "No audio provided."
152
+ try:
153
+ with open(audio_path, "rb") as f:
154
+ b64 = base64.b64encode(f.read()).decode()
155
+ res = core_process({"action": "stt", "file": b64})
156
+ return res.get("text", f"Error: {res.get('error')}")
157
+ except Exception as e:
158
+ return f"UI Error: {str(e)}"
159
+
160
+ # --- Interface Definition ---
161
+ with gr.Blocks(title="S2ST H200 v137") as demo:
162
+ gr.Markdown("# πŸš€ S2ST AI Engine v137 (HOPPER NATIVE)")
163
+ gr.Markdown("**H200 Stable | Transformers Whisper | XTTS-v2 VRAM Singleton**")
164
+ with gr.Row():
165
+ audio_in = gr.Audio(type="filepath", label="Input Audio")
166
+ stt_btn = gr.Button("Transcribe (STT)")
167
+ txt_out = gr.Textbox(label="Result")
168
+ stt_btn.click(fn=gradio_stt_fn, inputs=audio_in, outputs=txt_out)
169
+
170
+ # --- FastAPI Route Integration ---
171
+ print("--- [v137-clean-v3] πŸ”§ INTEGRATING FASTAPI ROUTES ---")
172
+ fastapi_app = demo.app # Access Gradio's internal FastAPI app
173
+ fastapi_app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
174
+
175
+ @fastapi_app.post("/process")
176
  async def api_process(request: Request):
177
  try:
178
  data = await request.json()
 
180
  return core_process(data)
181
  except Exception as e: return {"error": str(e)}
182
 
183
+ @fastapi_app.get("/api/v137_health")
184
+ def api_v137_health():
185
+ return {"status": "ok", "v": "137", "details": "NATIVE_INTEGRATION"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ # --- Start System ---
188
  if __name__ == "__main__":
189
+ print("--- [v137-clean-v3] πŸ“‘ LAUNCHING SYSTEM ---")
190
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False, quiet=True)