TGPro1 commited on
Commit
b9d3269
Β·
verified Β·
1 Parent(s): c4fd68e

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +65 -68
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V114: ZEROGPU BULLETPROOF CACHE
2
  try:
3
  import spaces
4
  except ImportError:
@@ -28,7 +28,7 @@ import traceback
28
  from threading import Thread, Lock
29
  from huggingface_hub import snapshot_download, hf_hub_download
30
 
31
- # πŸ›‘οΈ 1. SILENCE & ENV (v114)
32
  logging.getLogger("transformers").setLevel(logging.ERROR)
33
  logging.getLogger("TTS").setLevel(logging.ERROR)
34
  os.environ["CT2_VERBOSE"] = "0"
@@ -36,7 +36,7 @@ os.environ["ORT_LOGGING_LEVEL"] = "3"
36
  os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
37
  os.environ["GRADIO_SERVER_PORT"] = "7860"
38
 
39
- # πŸ› οΈ 2. COMPATIBILITY PATCHES (v114)
40
  if "torchaudio.backend" not in sys.modules:
41
  backend = types.ModuleType("torchaudio.backend")
42
  common = types.ModuleType("torchaudio.backend.common")
@@ -81,62 +81,61 @@ from TTS.api import TTS
81
  from df.enhance import init_df
82
  import deep_translator
83
 
84
- # FORCE BUILD TRIGGER: 18:35:00 Jan 21 2026
85
- # v114: Bulletproof Flag-File Sync.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
89
  READY_FLAG = os.path.expanduser("~/.engine_ready")
90
-
91
- # Cleanup flag on boot
92
- if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
93
 
94
  def is_system_ready():
95
- """v114: Process-Safe Readiness Check"""
96
  return os.path.exists(READY_FLAG)
97
 
98
  def activate_gpu_models(action):
99
- """v114: Instant Promotion (RAM -> GPU)"""
100
  global MODELS
101
-
102
- if action in ["stt", "s2st"]:
103
- stt_on_gpu = False
104
- try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
105
- except: pass
106
-
107
- if not stt_on_gpu:
108
- print(f"πŸŽ™οΈ [v114] Promoting Whisper (2 Workers, int8_float16)...")
109
- try:
110
- # Fast re-init (local_files_only=True ensures no network lag)
111
- MODELS["stt"] = WhisperModel(
112
- "large-v3",
113
- device="cuda",
114
- compute_type="int8_float16",
115
- num_workers=2,
116
- local_files_only=True
117
- )
118
- except: MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
 
 
119
 
120
- if action in ["tts", "s2st"]:
121
- tts_on_gpu = False
122
- try:
123
- params = next(MODELS["tts"].synthesizer.tts_model.parameters())
124
- tts_on_gpu = "cuda" in str(params.device)
125
- except: pass
126
-
127
- if MODELS["tts"] is not None and not tts_on_gpu:
128
- print(f"πŸ”Š [v114] Promoting XTTS-v2 to GPU...")
129
- try: MODELS["tts"].to("cuda")
130
  except: pass
 
 
 
 
 
131
 
132
- chatterbox_utils.load_chatterbox(device="cpu")
133
- if MODELS["denoiser"] is None:
134
- try: MODELS["denoiser"] = init_df()
135
- except: pass
136
- if MODELS["translate"] is None: MODELS["translate"] = "active"
137
 
138
  def release_gpu_models():
139
- """v114: Smooth Offloading"""
140
  global MODELS
141
  try:
142
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
@@ -145,36 +144,30 @@ def release_gpu_models():
145
  try: MODELS["tts"].to("cpu")
146
  except: pass
147
  except: pass
 
148
  if torch.cuda.is_available(): torch.cuda.empty_cache()
149
 
150
  def warmup_task():
151
- """v114: Heavy RAM Loading (Worker Thread)"""
152
- print("\nπŸ”₯ --- V114: BULLETPROOF WARMUP (RAM) ---")
 
153
  try:
154
- # 1. Load Whisper to RAM
155
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
156
- # 2. Load TTS to RAM
157
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
158
- # 3. Cache ONNX models
159
  chatterbox_utils.warmup_chatterbox()
160
  chatterbox_utils.load_chatterbox(device="cpu")
161
-
162
- # Write the Flag-File!
163
  with open(READY_FLAG, "w") as f: f.write("READY")
164
-
165
- print(f"βœ… --- ENGINE READY: v114 --- \n")
166
- except Exception as e:
167
- print(f"❌ Warmup Fault: {e}")
168
 
169
  @spaces.GPU(duration=150)
170
  def core_process(request_dict):
171
  action = request_dict.get("action")
172
- print(f"--- [v114] πŸš€ REQUEST: {action} ---")
173
 
174
- # πŸ”₯ v114: ZERO-WAIT (Flag Check)
175
  waited = 0
176
  while not is_system_ready() and waited < 300:
177
- if waited % 10 == 0: print(f"⏳ Syncing assets... ({waited}s)")
178
  time.sleep(1)
179
  waited += 1
180
 
@@ -186,7 +179,6 @@ def core_process(request_dict):
186
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
187
  f.write(audio_bytes); temp_path = f.name
188
  try:
189
- # v114: Explicit beam_size=1 for max speed
190
  segments, _ = MODELS["stt"].transcribe(temp_path, language=request_dict.get("lang"), beam_size=1)
191
  res = {"text": " ".join([s.text for s in segments]).strip()}
192
  finally:
@@ -222,17 +214,22 @@ def core_process(request_dict):
222
  res = {"audio": base64.b64encode(audio_bytes).decode()}
223
 
224
  elif action == "s2st":
225
- # Combined logic (process-safe)
226
  stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
227
- translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_res.get("text", ""))
 
 
 
 
 
228
  tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")} )
229
- res = {"text": stt_res.get("text"), "translated": translated, "audio": tts_res.get("audio")}
230
  else: res = {"error": f"Unknown action: {action}"}
231
  except Exception as e:
232
- print(f"❌ Fault: {traceback.format_exc()}")
233
  res = {"error": str(e)}
234
  finally:
235
- print(f"--- [v114] ✨ DONE ({time.time()-t1:.2f}s) ---")
236
  release_gpu_models()
237
  return res
238
 
@@ -241,7 +238,7 @@ async def lifespan(app: FastAPI):
241
  Thread(target=warmup_task, daemon=True).start()
242
  yield
243
 
244
- # πŸš€ Server Lifecycle
245
  app = FastAPI(lifespan=lifespan)
246
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
247
 
@@ -250,18 +247,18 @@ async def api_process(request: Request):
250
  try:
251
  req_data = await request.json()
252
  if req_data.get("action") == "health":
253
- return {"status": "awake", "warm": is_system_ready(), "v": "114"}
254
  return core_process(req_data)
255
  except Exception as e: return {"error": str(e)}
256
 
257
  @app.get("/health")
258
- def health(): return {"status": "ok", "warm": is_system_ready(), "v": "114"}
259
 
260
  def gradio_fn(req_json):
261
  try: return json.dumps(core_process(json.loads(req_json)))
262
  except Exception as e: return json.dumps({"error": str(e)})
263
 
264
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v114")
265
  demo.queue()
266
  app = gr.mount_gradio_app(app, demo, path="/")
267
 
 
1
+ # πŸš€ V115: ZEROGPU HOPPER STABILITY (STABLE-FAST)
2
  try:
3
  import spaces
4
  except ImportError:
 
28
  from threading import Thread, Lock
29
  from huggingface_hub import snapshot_download, hf_hub_download
30
 
31
+ # πŸ›‘οΈ 1. SILENCE & ENV (v115)
32
  logging.getLogger("transformers").setLevel(logging.ERROR)
33
  logging.getLogger("TTS").setLevel(logging.ERROR)
34
  os.environ["CT2_VERBOSE"] = "0"
 
36
  os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
37
  os.environ["GRADIO_SERVER_PORT"] = "7860"
38
 
39
+ # πŸ› οΈ 2. COMPATIBILITY PATCHES
40
  if "torchaudio.backend" not in sys.modules:
41
  backend = types.ModuleType("torchaudio.backend")
42
  common = types.ModuleType("torchaudio.backend.common")
 
81
  from df.enhance import init_df
82
  import deep_translator
83
 
84
+ # FORCE BUILD TRIGGER: 18:55:00 Jan 21 2026
85
+ # v115: Hopper Stability. num_workers=1. Serial Activation.
86
 
87
  os.environ["COQUI_TOS_AGREED"] = "1"
88
  MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
89
  READY_FLAG = os.path.expanduser("~/.engine_ready")
90
+ ACTIVATE_LOCK = Lock()
 
 
91
 
92
  def is_system_ready():
 
93
  return os.path.exists(READY_FLAG)
94
 
95
  def activate_gpu_models(action):
96
+ """v115: Serial Safe Activation"""
97
  global MODELS
98
+ with ACTIVATE_LOCK:
99
+ if action in ["stt", "s2st"]:
100
+ stt_on_gpu = False
101
+ try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
102
+ except: pass
103
+
104
+ if not stt_on_gpu:
105
+ print(f"πŸŽ™οΈ [v115] Activating Whisper (Stable: 1 Worker)...")
106
+ try:
107
+ gc.collect(); torch.cuda.empty_cache()
108
+ MODELS["stt"] = WhisperModel(
109
+ "large-v3",
110
+ device="cuda",
111
+ compute_type="int8_float16",
112
+ num_workers=1,
113
+ local_files_only=True
114
+ )
115
+ except Exception as e:
116
+ print(f"⚠️ GPU STT Init crash: {e}")
117
+ MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8", local_files_only=True)
118
 
119
+ if action in ["tts", "s2st"]:
120
+ tts_on_gpu = False
121
+ try:
122
+ params = next(MODELS["tts"].synthesizer.tts_model.parameters())
123
+ tts_on_gpu = "cuda" in str(params.device)
 
 
 
 
 
124
  except: pass
125
+
126
+ if MODELS["tts"] is not None and not tts_on_gpu:
127
+ print(f"πŸ”Š [v115] Activating XTTS-v2 (GPU)...")
128
+ try: MODELS["tts"].to("cuda")
129
+ except: pass
130
 
131
+ chatterbox_utils.load_chatterbox(device="cpu")
132
+ if MODELS["denoiser"] is None:
133
+ try: MODELS["denoiser"] = init_df()
134
+ except: pass
135
+ if MODELS["translate"] is None: MODELS["translate"] = "active"
136
 
137
  def release_gpu_models():
138
+ """v115: Safe Offload"""
139
  global MODELS
140
  try:
141
  if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
 
144
  try: MODELS["tts"].to("cpu")
145
  except: pass
146
  except: pass
147
+ gc.collect()
148
  if torch.cuda.is_available(): torch.cuda.empty_cache()
149
 
150
  def warmup_task():
151
+ """v115: RAM Warming"""
152
+ if os.path.exists(READY_FLAG): os.remove(READY_FLAG)
153
+ print("\nπŸ”₯ --- V115: HOPPER WARMUP ---")
154
  try:
 
155
  MODELS["stt"] = WhisperModel("large-v3", device="cpu", compute_type="int8")
 
156
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
 
157
  chatterbox_utils.warmup_chatterbox()
158
  chatterbox_utils.load_chatterbox(device="cpu")
 
 
159
  with open(READY_FLAG, "w") as f: f.write("READY")
160
+ print(f"βœ… --- ENGINE ON: v115 --- \n")
161
+ except Exception as e: print(f"❌ Warmup Fault: {e}")
 
 
162
 
163
  @spaces.GPU(duration=150)
164
  def core_process(request_dict):
165
  action = request_dict.get("action")
166
+ print(f"--- [v115] πŸš€ START: {action} ---")
167
 
 
168
  waited = 0
169
  while not is_system_ready() and waited < 300:
170
+ if waited % 10 == 0: print(f"⏳ Waiting for Engine... ({waited}s)")
171
  time.sleep(1)
172
  waited += 1
173
 
 
179
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
180
  f.write(audio_bytes); temp_path = f.name
181
  try:
 
182
  segments, _ = MODELS["stt"].transcribe(temp_path, language=request_dict.get("lang"), beam_size=1)
183
  res = {"text": " ".join([s.text for s in segments]).strip()}
184
  finally:
 
214
  res = {"audio": base64.b64encode(audio_bytes).decode()}
215
 
216
  elif action == "s2st":
217
+ print("πŸŽ™οΈ Step 1: Transcription...")
218
  stt_res = core_process.__wrapped__( {**request_dict, "action": "stt"} )
219
+ stt_text = stt_res.get("text", "")
220
+
221
+ print(f"🌍 Step 2: Translation ({request_dict.get('target_lang')})...")
222
+ translated = deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang")).translate(stt_text)
223
+
224
+ print("πŸ”Š Step 3: Synthesis...")
225
  tts_res = core_process.__wrapped__( {"action": "tts", "text": translated, "lang": request_dict.get("target_lang"), "speaker_wav": request_dict.get("speaker_wav")} )
226
+ res = {"text": stt_text, "translated": translated, "audio": tts_res.get("audio")}
227
  else: res = {"error": f"Unknown action: {action}"}
228
  except Exception as e:
229
+ print(f"❌ Error: {traceback.format_exc()}")
230
  res = {"error": str(e)}
231
  finally:
232
+ print(f"--- [v115] ✨ FINISH ({time.time()-t1:.2f}s) ---")
233
  release_gpu_models()
234
  return res
235
 
 
238
  Thread(target=warmup_task, daemon=True).start()
239
  yield
240
 
241
+ # πŸš€ Server
242
  app = FastAPI(lifespan=lifespan)
243
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
244
 
 
247
  try:
248
  req_data = await request.json()
249
  if req_data.get("action") == "health":
250
+ return {"status": "awake", "warm": is_system_ready(), "v": "115"}
251
  return core_process(req_data)
252
  except Exception as e: return {"error": str(e)}
253
 
254
  @app.get("/health")
255
+ def health(): return {"status": "ok", "warm": is_system_ready(), "v": "115"}
256
 
257
  def gradio_fn(req_json):
258
  try: return json.dumps(core_process(json.loads(req_json)))
259
  except Exception as e: return json.dumps({"error": str(e)})
260
 
261
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v115")
262
  demo.queue()
263
  app = gr.mount_gradio_app(app, demo, path="/")
264