TGPro1 commited on
Commit
811f60e
Β·
verified Β·
1 Parent(s): af35fde

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +74 -75
app.py CHANGED
@@ -1,43 +1,42 @@
1
  import os
2
  import sys
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- # --- [v137-clean-v3] πŸš€ INITIALIZING SYSTEM ---
5
- print(f"PYTHON VERSION: {sys.version}")
6
- print(f"WORKING DIR: {os.getcwd()}")
7
 
8
  try:
9
  import spaces
 
10
  except ImportError:
 
11
  class spaces:
12
  @staticmethod
13
  def GPU(duration=60, f=None):
14
  if f is None: return lambda x: x
15
  return f
16
 
17
- import gradio as gr
18
- from fastapi import FastAPI, Request
19
- from fastapi.middleware.cors import CORSMiddleware
20
- import base64
21
- import torch
22
- import tempfile
23
- import json
24
- import time
25
- import gc
26
- import traceback
27
- import numpy as np
28
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
29
- from TTS.api import TTS
30
-
31
- # ==========================================
32
- # πŸš€ v137 - HOPPER NATIVE (Transformers + Persistent VRAM)
33
- # ==========================================
34
-
35
  os.environ["COQUI_TOS_AGREED"] = "1"
36
  os.environ["PYTHONWARNINGS"] = "ignore"
37
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
38
  torch.backends.cuda.matmul.allow_tf32 = False
39
  torch.backends.cudnn.allow_tf32 = False
40
 
 
 
 
41
  MODELS = {"stt": None, "tts": None}
42
 
43
  def load_gpu_models():
@@ -46,7 +45,7 @@ def load_gpu_models():
46
  device = "cuda"
47
 
48
  if MODELS.get("stt") is None:
49
- print("--- [v137] πŸ“₯ LOADING NATIVE WHISPER (Large-v3-Turbo) ---")
50
  model_id = "openai/whisper-large-v3-turbo"
51
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
52
  model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True
@@ -62,18 +61,18 @@ def load_gpu_models():
62
  device=device,
63
  model_kwargs={"attn_implementation": "sdpa"}
64
  )
65
- print("--- [v137] βœ… WHISPER LOADED ---")
66
 
67
  if MODELS.get("tts") is None:
68
- print("--- [v137] πŸ“₯ LOADING XTTS (VRAM STABLE) ---")
69
  MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
70
- print("--- [v137] βœ… XTTS LOADED ---")
71
 
72
  @spaces.GPU(duration=120)
73
  def core_process(request_dict):
74
  global MODELS
75
  action = request_dict.get("action")
76
- print(f"--- [v137] πŸ› οΈ HOPPER ENGINE: {action} ---")
77
  t1 = time.time()
78
 
79
  try:
@@ -114,14 +113,8 @@ def core_process(request_dict):
114
  mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
115
 
116
  if mapped_lang:
117
- speaker_wav_path = None
118
- if request_dict.get("speaker_wav"):
119
- sb = base64.b64decode(request_dict.get("speaker_wav"))
120
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
121
- f.write(sb); speaker_wav_path = f.name
122
- else:
123
- speaker_wav_path = "default_speaker.wav"
124
- if not os.path.exists(speaker_wav_path): speaker_wav_path = None
125
 
126
  try:
127
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
@@ -129,62 +122,68 @@ def core_process(request_dict):
129
  MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_wav_path)
130
  with open(out_p, "rb") as f: audio_b64 = base64.b64encode(f.read()).decode()
131
  finally:
132
- if speaker_wav_path and "default" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
133
  if 'out_p' in locals() and os.path.exists(out_p): os.unlink(out_p)
134
- else:
135
- import chatterbox_utils
136
- audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
137
- audio_b64 = base64.b64encode(audio_bytes).decode()
138
 
139
  if action == "tts": return {"audio": audio_b64}
140
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
141
 
142
  except Exception as e:
143
- print(f"❌ [v137] ERROR: {traceback.format_exc()}")
144
  return {"error": str(e)}
145
  finally:
146
- print(f"--- [v137] ✨ DONE ({time.time()-t1:.1f}s) ---")
147
  torch.cuda.empty_cache()
148
 
149
- # --- Gradio UI Logic ---
150
- def gradio_stt_fn(audio_path) -> str:
151
- if not audio_path: return "No audio provided."
152
- try:
153
- with open(audio_path, "rb") as f:
154
- b64 = base64.b64encode(f.read()).decode()
155
- res = core_process({"action": "stt", "file": b64})
156
- return res.get("text", f"Error: {res.get('error')}")
157
- except Exception as e:
158
- return f"UI Error: {str(e)}"
159
-
160
- # --- Interface Definition ---
161
- with gr.Blocks(title="S2ST H200 v137") as demo:
162
- gr.Markdown("# πŸš€ S2ST AI Engine v137 (HOPPER NATIVE)")
163
- gr.Markdown("**H200 Stable | Transformers Whisper | XTTS-v2 VRAM Singleton**")
164
- with gr.Row():
165
- audio_in = gr.Audio(type="filepath", label="Input Audio")
166
- stt_btn = gr.Button("Transcribe (STT)")
167
- txt_out = gr.Textbox(label="Result")
168
- stt_btn.click(fn=gradio_stt_fn, inputs=audio_in, outputs=txt_out)
169
-
170
- # --- FastAPI Route Integration ---
171
- print("--- [v137-clean-v3] πŸ”§ INTEGRATING FASTAPI ROUTES ---")
172
- fastapi_app = demo.app # Access Gradio's internal FastAPI app
173
- fastapi_app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
174
-
175
- @fastapi_app.post("/process")
176
  async def api_process(request: Request):
177
  try:
178
  data = await request.json()
179
- if data.get("action") == "health": return {"status": "awake", "v": "137"}
180
  return core_process(data)
181
  except Exception as e: return {"error": str(e)}
182
 
183
- @fastapi_app.get("/api/v137_health")
184
- def api_v137_health():
185
- return {"status": "ok", "v": "137", "details": "NATIVE_INTEGRATION"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
- # --- Start System ---
188
  if __name__ == "__main__":
189
- print("--- [v137-clean-v3] πŸ“‘ LAUNCHING SYSTEM ---")
190
- demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False, quiet=True)
 
1
  import os
2
  import sys
3
+ import time
4
+ import base64
5
+ import torch
6
+ import tempfile
7
+ import traceback
8
+ import gc
9
+ from fastapi import FastAPI, Request, Response
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from fastapi.responses import HTMLResponse
12
+ import uvicorn
13
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
14
+ from TTS.api import TTS
15
 
16
+ # --- [v138] πŸš€ ZEROGPU LEGACY-FREE ENGINE ---
17
+ print(f"--- [v138] πŸ“‘ BOOTING API ENGINE ---")
 
18
 
19
  try:
20
  import spaces
21
+ HAS_SPACES = True
22
  except ImportError:
23
+ HAS_SPACES = False
24
  class spaces:
25
  @staticmethod
26
  def GPU(duration=60, f=None):
27
  if f is None: return lambda x: x
28
  return f
29
 
30
+ # --- System Config ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  os.environ["COQUI_TOS_AGREED"] = "1"
32
  os.environ["PYTHONWARNINGS"] = "ignore"
33
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
34
  torch.backends.cuda.matmul.allow_tf32 = False
35
  torch.backends.cudnn.allow_tf32 = False
36
 
37
+ app = FastAPI()
38
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
39
+
40
  MODELS = {"stt": None, "tts": None}
41
 
42
  def load_gpu_models():
 
45
  device = "cuda"
46
 
47
  if MODELS.get("stt") is None:
48
+ print("--- [v138] πŸ“₯ LOADING NATIVE WHISPER (Large-v3-Turbo) ---")
49
  model_id = "openai/whisper-large-v3-turbo"
50
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
51
  model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True
 
61
  device=device,
62
  model_kwargs={"attn_implementation": "sdpa"}
63
  )
64
+ print("--- [v138] βœ… WHISPER LOADED ---")
65
 
66
  if MODELS.get("tts") is None:
67
+ print("--- [v138] πŸ“₯ LOADING XTTS (VRAM STABLE) ---")
68
  MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
69
+ print("--- [v138] βœ… XTTS LOADED ---")
70
 
71
  @spaces.GPU(duration=120)
72
  def core_process(request_dict):
73
  global MODELS
74
  action = request_dict.get("action")
75
+ print(f"--- [v138] πŸ› οΈ HOPPER ENGINE: {action} ---")
76
  t1 = time.time()
77
 
78
  try:
 
113
  mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
114
 
115
  if mapped_lang:
116
+ speaker_wav_path = "default_speaker.wav"
117
+ if not os.path.exists(speaker_wav_path): speaker_wav_path = None
 
 
 
 
 
 
118
 
119
  try:
120
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
 
122
  MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_wav_path)
123
  with open(out_p, "rb") as f: audio_b64 = base64.b64encode(f.read()).decode()
124
  finally:
 
125
  if 'out_p' in locals() and os.path.exists(out_p): os.unlink(out_p)
126
+ else: return {"error": f"Language {clean_lang} not supported."}
 
 
 
127
 
128
  if action == "tts": return {"audio": audio_b64}
129
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
130
 
131
  except Exception as e:
132
+ print(f"❌ [v138] ERROR: {traceback.format_exc()}")
133
  return {"error": str(e)}
134
  finally:
135
+ print(f"--- [v138] ✨ DONE ({time.time()-t1:.1f}s) ---")
136
  torch.cuda.empty_cache()
137
 
138
+ # --- API Endpoints ---
139
+ @app.post("/process")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  async def api_process(request: Request):
141
  try:
142
  data = await request.json()
143
+ if data.get("action") == "health": return {"status": "awake", "v": "138"}
144
  return core_process(data)
145
  except Exception as e: return {"error": str(e)}
146
 
147
+ @app.get("/health")
148
+ def health(): return {"status": "ok", "v": "138", "gpu": HAS_SPACES}
149
+
150
+ # --- Minimal UI ---
151
+ @app.get("/", response_class=HTMLResponse)
152
+ def root():
153
+ return """
154
+ <html>
155
+ <head>
156
+ <title>S2ST v138</title>
157
+ <style>
158
+ body { font-family: sans-serif; background: #111; color: #eee; text-align: center; padding-top: 50px; }
159
+ .card { background: #222; border: 1px solid #444; padding: 20px; border-radius: 10px; display: inline-block; }
160
+ button { background: #007bff; color: #fff; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer; }
161
+ #log { margin-top: 20px; color: #aaa; font-family: monospace; }
162
+ </style>
163
+ </head>
164
+ <body>
165
+ <div class="card">
166
+ <h1>πŸš€ AI Engine v138</h1>
167
+ <p>HOPPER NATIVE - FASTAPI ONLY</p>
168
+ <button onclick="checkHealth()">Test API Connectivity</button>
169
+ <div id="log">Status: Awaiting test...</div>
170
+ </div>
171
+ <script>
172
+ async function checkHealth() {
173
+ const log = document.getElementById('log');
174
+ log.innerText = 'Checking...';
175
+ try {
176
+ const res = await fetch('/health');
177
+ const data = await res.json();
178
+ log.innerText = 'Response: ' + JSON.stringify(data);
179
+ } catch (e) {
180
+ log.innerText = 'Error: ' + e;
181
+ }
182
+ }
183
+ </script>
184
+ </body>
185
+ </html>
186
+ """
187
 
 
188
  if __name__ == "__main__":
189
+ uvicorn.run(app, host="0.0.0.0", port=7860)