TGPro1 commited on
Commit
c2b0ea6
Β·
verified Β·
1 Parent(s): 0f73429

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +93 -89
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V107: ZEROGPU HARDENING
2
  # Must be first to patch environment correctly
3
  try:
4
  import spaces
@@ -10,7 +10,7 @@ except ImportError:
10
  if f is None: return lambda x: x
11
  return f
12
 
13
- from fastapi import FastAPI, Request, Response
14
  from fastapi.middleware.cors import CORSMiddleware
15
  from contextlib import asynccontextmanager
16
  import gradio as gr
@@ -19,7 +19,6 @@ import base64
19
  import torch
20
  import os
21
  import tempfile
22
- import traceback
23
  import json
24
  import time
25
  import torchaudio
@@ -30,16 +29,15 @@ import logging
30
  from threading import Thread, Lock
31
  from huggingface_hub import snapshot_download
32
 
33
- # πŸ›‘οΈ 1. SILENCE LOGS & WARNINGS (v107: Stability Milestone)
34
  logging.getLogger("transformers").setLevel(logging.ERROR)
35
  logging.getLogger("TTS").setLevel(logging.ERROR)
36
- logging.getLogger("onnxruntime").setLevel(logging.ERROR)
37
  os.environ["CT2_VERBOSE"] = "0"
38
  os.environ["ORT_LOGGING_LEVEL"] = "3"
39
- os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
40
- os.environ["CUDA_VISIBLE_DEVICES"] = "0"
41
 
42
- # πŸ› οΈ 2. COMPATIBILITY PATCHES
43
  if "torchaudio.backend" not in sys.modules:
44
  backend = types.ModuleType("torchaudio.backend")
45
  common = types.ModuleType("torchaudio.backend.common")
@@ -84,8 +82,8 @@ from TTS.api import TTS
84
  from df.enhance import init_df
85
  import deep_translator
86
 
87
- # FORCE BUILD TRIGGER: 17:10:00 Jan 21 2026
88
- # v107: Whisper int8 stability. Gradio 5.9.1.
89
 
90
  os.environ["COQUI_TOS_AGREED"] = "1"
91
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
@@ -94,7 +92,7 @@ WARMUP_STATUS = {"complete": False, "in_progress": False}
94
  WARMUP_LOCK = Lock()
95
 
96
  def activate_gpu_models(action):
97
- """v107: Safe Hardware Activation"""
98
  global MODELS, WARMUP_STATUS
99
  local_only = WARMUP_STATUS["complete"]
100
 
@@ -103,19 +101,16 @@ def activate_gpu_models(action):
103
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
104
  except: pass
105
  if not stt_on_gpu:
106
- print(f"πŸŽ™οΈ [v107] Activating Whisper (GPU: int8 Protocol)...")
107
  try:
108
  if MODELS["stt"]: del MODELS["stt"]
109
  gc.collect(); torch.cuda.empty_cache()
110
  time.sleep(0.5)
111
- # v107: Using 'int8' for guaranteed stability on H200 MIG.
112
- # Removed device_index to allow driver-level discovery.
113
  MODELS["stt"] = WhisperModel(
114
  "large-v3",
115
  device="cuda",
116
- compute_type="int8",
117
  num_workers=1,
118
- cpu_threads=1,
119
  local_files_only=local_only
120
  )
121
  except Exception as e:
@@ -129,7 +124,7 @@ def activate_gpu_models(action):
129
  tts_on_gpu = "cuda" in curr
130
  except: pass
131
  if MODELS["tts"] is None or not tts_on_gpu:
132
- print(f"πŸ”Š [v107] Activating XTTS-v2 (GPU)...")
133
  try:
134
  if MODELS["tts"] is None:
135
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
@@ -144,9 +139,9 @@ def activate_gpu_models(action):
144
  if MODELS["translate"] is None: MODELS["translate"] = "active"
145
 
146
  def release_gpu_models():
147
- """v107: GPU Cleanup"""
148
  global MODELS
149
- print("🧹 [v107] Releasing GPU resources.")
150
  try:
151
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
152
  del MODELS["stt"]
@@ -161,83 +156,101 @@ def release_gpu_models():
161
  time.sleep(0.5)
162
 
163
  def warmup_task():
164
- """Silent Warmup (v107)"""
165
  global WARMUP_STATUS
166
  with WARMUP_LOCK:
167
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
168
  WARMUP_STATUS["in_progress"] = True
169
- print("\nπŸ”₯ --- V107: ZEROGPU RECOVERY STARTED ---")
170
  try:
 
171
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
172
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
 
173
  chatterbox_utils.warmup_chatterbox()
174
  WARMUP_STATUS["complete"] = True
175
- print(f"βœ… --- SYSTEM READY: v107 --- \n")
176
- except: pass
 
177
  finally: WARMUP_STATUS["in_progress"] = False
178
 
179
- def _stt_logic(request_dict):
180
- audio_bytes = base64.b64decode(request_dict.get("file"))
181
- lang = request_dict.get("lang")
182
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
183
- f.write(audio_bytes); temp_path = f.name
184
- try:
185
- segments, _ = MODELS["stt"].transcribe(temp_path, language=lang, beam_size=1)
186
- return {"text": " ".join([s.text for s in segments]).strip()}
187
- finally:
188
- if os.path.exists(temp_path): os.unlink(temp_path)
189
-
190
- def _translate_logic(text, target_lang):
191
- return deep_translator.GoogleTranslator(source='auto', target=target_lang).translate(text)
192
-
193
- def _tts_logic(text, lang, speaker_wav_b64):
194
- XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
195
- clean_lang = lang.strip().lower().split('-')[0]
196
- mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
197
- if mapped_lang:
198
- speaker_wav_path = None
199
- if speaker_wav_b64:
200
- sb = base64.b64decode(speaker_wav_b64)
201
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
202
- f.write(sb); speaker_wav_path = f.name
203
- else: speaker_wav_path = "default_speaker.wav"
204
- try:
205
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
206
- output_path = output_file.name
207
- MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=output_path, speaker_wav=speaker_wav_path)
208
- with open(output_path, "rb") as f: return {"audio": base64.b64encode(f.read()).decode()}
209
- finally:
210
- if speaker_wav_path and "default_speaker" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
211
- if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
212
- try:
213
- temp_ref = None
214
- if speaker_wav_b64:
215
- sb = base64.b64decode(speaker_wav_b64)
216
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
217
- f.write(sb); temp_ref = f.name
218
- audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang, speaker_wav_path=temp_ref)
219
- if temp_ref and os.path.exists(temp_ref): os.unlink(temp_ref)
220
- return {"audio": base64.b64encode(audio_bytes).decode()}
221
- except: return {"error": "TTS Failure"}
222
-
223
  @spaces.GPU(duration=150)
224
  def core_process(request_dict):
225
  action = request_dict.get("action")
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  t1 = time.time()
227
- print(f"--- [v107] πŸš€ GPU SESSION: {action} ---")
228
  activate_gpu_models(action)
229
  try:
230
- if action == "stt": res = _stt_logic(request_dict)
231
- elif action == "translate": res = {"translated": _translate_logic(request_dict.get("text"), request_dict.get("target_lang", "en"))}
232
- elif action == "tts": res = _tts_logic(request_dict.get("text"), request_dict.get("lang"), request_dict.get("speaker_wav"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  elif action == "s2st":
234
- stt_res = _stt_logic({"file": request_dict.get("file"), "lang": request_dict.get("source_lang")})
235
- translated = _translate_logic(stt_res.get("text", ""), request_dict.get("target_lang"))
236
- tts_res = _tts_logic(translated, request_dict.get("target_lang"), request_dict.get("speaker_wav"))
 
 
 
 
 
 
 
237
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
238
  else: res = {"error": f"Unknown action: {action}"}
 
 
 
239
  finally:
240
- print(f"--- [v107] ✨ SUCCESS: {action} ({time.time()-t1:.2f}s) ---")
241
  release_gpu_models()
242
  return res
243
 
@@ -246,7 +259,7 @@ async def lifespan(app: FastAPI):
246
  Thread(target=warmup_task, daemon=True).start()
247
  yield
248
 
249
- # πŸš€ STEP 1: DEFINE FASTAPI
250
  app = FastAPI(lifespan=lifespan)
251
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
252
 
@@ -255,31 +268,22 @@ async def api_process(request: Request):
255
  try:
256
  req_data = await request.json()
257
  if req_data.get("action") == "health":
258
- return {"status": "awake", "warm": WARMUP_STATUS["complete"]}
259
  return core_process(req_data)
260
  except Exception as e: return {"error": str(e)}
261
 
262
  @app.get("/health")
263
- def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "107"}
264
-
265
- @app.post("/api/v1/clear_cache")
266
- async def clear_cache_api():
267
- try:
268
- release_gpu_models()
269
- return {"status": "success"}
270
- except: return {"status": "error"}
271
 
272
- # πŸš€ STEP 2: DEFINE GRADIO
273
  def gradio_fn(req_json):
274
  try: return json.dumps(core_process(json.loads(req_json)))
275
  except Exception as e: return json.dumps({"error": str(e)})
276
 
277
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v107")
278
  demo.queue()
279
 
280
- # MOUNT
281
  app = gr.mount_gradio_app(app, demo, path="/")
282
 
283
  if __name__ == "__main__":
284
- print("πŸš€ [v107] Starting Unified Server (ZeroGPU Recovery)...")
285
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")
 
1
+ # πŸš€ V108: ZEROGPU REQUEST-WAIT PROTOCOL
2
  # Must be first to patch environment correctly
3
  try:
4
  import spaces
 
10
  if f is None: return lambda x: x
11
  return f
12
 
13
+ from fastapi import FastAPI, Request
14
  from fastapi.middleware.cors import CORSMiddleware
15
  from contextlib import asynccontextmanager
16
  import gradio as gr
 
19
  import torch
20
  import os
21
  import tempfile
 
22
  import json
23
  import time
24
  import torchaudio
 
29
  from threading import Thread, Lock
30
  from huggingface_hub import snapshot_download
31
 
32
+ # πŸ›‘οΈ 1. SILENCE & ENV (v108)
33
  logging.getLogger("transformers").setLevel(logging.ERROR)
34
  logging.getLogger("TTS").setLevel(logging.ERROR)
 
35
  os.environ["CT2_VERBOSE"] = "0"
36
  os.environ["ORT_LOGGING_LEVEL"] = "3"
37
+ os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
38
+ os.environ["GRADIO_SERVER_PORT"] = "7860"
39
 
40
+ # πŸ› οΈ 2. COMPATIBILITY PATCHES (v108)
41
  if "torchaudio.backend" not in sys.modules:
42
  backend = types.ModuleType("torchaudio.backend")
43
  common = types.ModuleType("torchaudio.backend.common")
 
82
  from df.enhance import init_df
83
  import deep_translator
84
 
85
+ # FORCE REBUILD: 17:18:00 Jan 21 2026
86
+ # v108: Rebuilt with full requirements. Wait protocol active.
87
 
88
  os.environ["COQUI_TOS_AGREED"] = "1"
89
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
 
92
  WARMUP_LOCK = Lock()
93
 
94
  def activate_gpu_models(action):
95
+ """v108: Optimized Activation"""
96
  global MODELS, WARMUP_STATUS
97
  local_only = WARMUP_STATUS["complete"]
98
 
 
101
  try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
102
  except: pass
103
  if not stt_on_gpu:
104
+ print(f"πŸŽ™οΈ [v108] Activating Whisper (GPU: int8_float16)...")
105
  try:
106
  if MODELS["stt"]: del MODELS["stt"]
107
  gc.collect(); torch.cuda.empty_cache()
108
  time.sleep(0.5)
 
 
109
  MODELS["stt"] = WhisperModel(
110
  "large-v3",
111
  device="cuda",
112
+ compute_type="int8_float16",
113
  num_workers=1,
 
114
  local_files_only=local_only
115
  )
116
  except Exception as e:
 
124
  tts_on_gpu = "cuda" in curr
125
  except: pass
126
  if MODELS["tts"] is None or not tts_on_gpu:
127
+ print(f"πŸ”Š [v108] Activating XTTS-v2 (GPU)...")
128
  try:
129
  if MODELS["tts"] is None:
130
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
 
139
  if MODELS["translate"] is None: MODELS["translate"] = "active"
140
 
141
  def release_gpu_models():
142
+ """v108: Resilient Release"""
143
  global MODELS
144
+ print("🧹 [v108] Releasing GPU resources.")
145
  try:
146
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
147
  del MODELS["stt"]
 
156
  time.sleep(0.5)
157
 
158
  def warmup_task():
159
+ """v108: System Preparation"""
160
  global WARMUP_STATUS
161
  with WARMUP_LOCK:
162
  if WARMUP_STATUS["complete"] or WARMUP_STATUS["in_progress"]: return
163
  WARMUP_STATUS["in_progress"] = True
164
+ print("\nπŸ”₯ --- V108: ZEROGPU WARMUP STARTED ---")
165
  try:
166
+ # Pre-download everything to CPU first
167
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
168
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
169
+ MODELS["denoiser"] = init_df()
170
  chatterbox_utils.warmup_chatterbox()
171
  WARMUP_STATUS["complete"] = True
172
+ print(f"βœ… --- SYSTEM READY: v108 --- \n")
173
+ except Exception as e:
174
+ print(f"❌ Warmup Error: {e}")
175
  finally: WARMUP_STATUS["in_progress"] = False
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  @spaces.GPU(duration=150)
178
  def core_process(request_dict):
179
  action = request_dict.get("action")
180
+ print(f"--- [v108] πŸš€ REQUEST: {action} ---")
181
+
182
+ # πŸ”₯ v108: WAIT PROTOCOL
183
+ max_wait = 180 # 3 minutes for absolute first-time build/download
184
+ waited = 0
185
+ while not WARMUP_STATUS["complete"] and waited < max_wait:
186
+ if waited % 5 == 0: print(f"⏳ System warming up... (waited {waited}s)")
187
+ time.sleep(1)
188
+ waited += 1
189
+
190
+ if not WARMUP_STATUS["complete"]:
191
+ return {"error": "System still warming up. Please try again in 30 seconds."}
192
+
193
  t1 = time.time()
 
194
  activate_gpu_models(action)
195
  try:
196
+ if action == "stt":
197
+ audio_bytes = base64.b64decode(request_dict.get("file"))
198
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
199
+ f.write(audio_bytes); temp_path = f.name
200
+ try:
201
+ segments, _ = MODELS["stt"].transcribe(temp_path, language=request_dict.get("lang"), beam_size=1)
202
+ res = {"text": " ".join([s.text for s in segments]).strip()}
203
+ finally:
204
+ if os.path.exists(temp_path): os.unlink(temp_path)
205
+
206
+ elif action == "translate":
207
+ res = {"translated": deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang", "en")).translate(request_dict.get("text"))}
208
+
209
+ elif action == "tts":
210
+ text = request_dict.get("text")
211
+ lang = request_dict.get("lang", "en")
212
+ XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
213
+ clean_lang = lang.strip().lower().split('-')[0]
214
+ mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
215
+
216
+ if mapped_lang:
217
+ speaker_wav_path = None
218
+ if request_dict.get("speaker_wav"):
219
+ sb = base64.b64decode(request_dict.get("speaker_wav"))
220
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
221
+ f.write(sb); speaker_wav_path = f.name
222
+ else: speaker_wav_path = "default_speaker.wav"
223
+
224
+ try:
225
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
226
+ output_path = output_file.name
227
+ MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=output_path, speaker_wav=speaker_wav_path)
228
+ with open(output_path, "rb") as f: res = {"audio": base64.b64encode(f.read()).decode()}
229
+ finally:
230
+ if speaker_wav_path and "default" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
231
+ if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
232
+ else:
233
+ audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
234
+ res = {"audio": base64.b64encode(audio_bytes).decode()}
235
+
236
  elif action == "s2st":
237
+ # Combined logic
238
+ req_copy = request_dict.copy()
239
+ req_copy["action"] = "stt"
240
+ stt_res = core_process.__wrapped__(req_copy) # Recursive but without double GPU wrapper
241
+
242
+ translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
243
+
244
+ req_tts = {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")}
245
+ tts_res = core_process.__wrapped__(req_tts)
246
+
247
  res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
248
  else: res = {"error": f"Unknown action: {action}"}
249
+ except Exception as e:
250
+ print(f"❌ Error in core_process: {traceback.format_exc()}")
251
+ res = {"error": str(e)}
252
  finally:
253
+ print(f"--- [v108] ✨ DONE ({time.time()-t1:.2f}s) ---")
254
  release_gpu_models()
255
  return res
256
 
 
259
  Thread(target=warmup_task, daemon=True).start()
260
  yield
261
 
262
+ # πŸš€ 4. FastAPI & Gradio Unified
263
  app = FastAPI(lifespan=lifespan)
264
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
265
 
 
268
  try:
269
  req_data = await request.json()
270
  if req_data.get("action") == "health":
271
+ return {"status": "awake", "warm": WARMUP_STATUS["complete"], "v": "108"}
272
  return core_process(req_data)
273
  except Exception as e: return {"error": str(e)}
274
 
275
  @app.get("/health")
276
+ def health(): return {"status": "ok", "warm": WARMUP_STATUS["complete"], "v": "108"}
 
 
 
 
 
 
 
277
 
 
278
  def gradio_fn(req_json):
279
  try: return json.dumps(core_process(json.loads(req_json)))
280
  except Exception as e: return json.dumps({"error": str(e)})
281
 
282
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v108")
283
  demo.queue()
284
 
 
285
  app = gr.mount_gradio_app(app, demo, path="/")
286
 
287
  if __name__ == "__main__":
288
+ print("πŸš€ [v108] Starting System...")
289
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="error")