TGPro1 commited on
Commit
ad3d045
Β·
verified Β·
1 Parent(s): 456b557

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +61 -139
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V118: ZEROGPU HOPPER STEADY (PRODUCTION GRADE)
2
  try:
3
  import spaces
4
  except ImportError:
@@ -8,10 +8,9 @@ except ImportError:
8
  if f is None: return lambda x: x
9
  return f
10
 
 
11
  from fastapi import FastAPI, Request
12
  from fastapi.middleware.cors import CORSMiddleware
13
- from contextlib import asynccontextmanager
14
- import gradio as gr
15
  import uvicorn
16
  import base64
17
  import torch
@@ -25,125 +24,58 @@ import sys
25
  import types
26
  import logging
27
  import traceback
 
28
  from huggingface_hub import snapshot_download, hf_hub_download
 
29
 
30
- # πŸ›‘οΈ 1. SILENCE & ENV (v118)
31
  logging.getLogger("transformers").setLevel(logging.ERROR)
32
- logging.getLogger("TTS").setLevel(logging.ERROR)
33
- os.environ["CT2_VERBOSE"] = "0"
34
- os.environ["ORT_LOGGING_LEVEL"] = "3"
35
  os.environ["COQUI_TOS_AGREED"] = "1"
 
36
 
37
- # πŸ› οΈ 2. TOP-LEVEL ASSET PREPARATION (Ensures HF Readiness)
38
- print("\nπŸ“¦ [v118] TOP-LEVEL: Preparing AI Assets...")
39
- try:
40
- WHISPER_PATH = snapshot_download("Systran/faster-whisper-large-v3")
41
- XTTS_PATH = snapshot_download("coqui/XTTS-v2")
42
- print("βœ… Assets cached on disk.")
43
- except Exception as e:
44
- print(f"⚠️ Pre-download warning: {e}")
45
- WHISPER_PATH = "large-v3"
46
-
47
- # πŸ› οΈ 3. COMPATIBILITY PATCHES
48
- if "torchaudio.backend" not in sys.modules:
49
- backend = types.ModuleType("torchaudio.backend")
50
- common = types.ModuleType("torchaudio.backend.common")
51
- try: common.AudioMetaData = torchaudio.AudioMetaData
52
- except AttributeError:
53
- class AudioMetaData: pass
54
- common.AudioMetaData = AudioMetaData
55
- backend.common = common
56
- sys.modules["torchaudio.backend"] = backend
57
- sys.modules["torchaudio.backend.common"] = common
58
-
59
- if not hasattr(torchaudio, "info"):
60
- def mock_info(filepath, **kwargs):
61
- from types import SimpleNamespace
62
- import wave
63
- try:
64
- with wave.open(filepath, "rb") as f:
65
- return SimpleNamespace(sample_rate=f.getframerate(), num_frames=f.getnframes(), num_channels=f.getnchannels(), bits_per_sample=f.getsampwidth() * 8, encoding="PCM_S")
66
- except: return SimpleNamespace(sample_rate=48000, num_frames=0, num_channels=1)
67
- torchaudio.info = mock_info
68
-
69
- # πŸ“¦ 4. AI LIBRARIES
70
- import chatterbox_utils
71
- from faster_whisper import WhisperModel
72
- from TTS.api import TTS
73
- from df.enhance import init_df
74
- import deep_translator
75
-
76
- # v118: Hopper Steady. Persistent RAM Init. int8 GPU.
77
- MODELS = {"stt": None, "translate": None, "tts": None, "denoiser": None}
78
-
79
- def activate_gpu_models(action):
80
- """v118: Robust GPU Promotion"""
81
- global MODELS
82
-
83
- if action in ["stt", "s2st"]:
84
- stt_on_gpu = False
85
- try: stt_on_gpu = MODELS["stt"] is not None and MODELS["stt"].model.device == "cuda"
86
- except: pass
87
-
88
- if not stt_on_gpu:
89
- print(f"πŸŽ™οΈ [v118] PROMOTE: Whisper (GPU, int8)...")
90
- try:
91
- gc.collect(); torch.cuda.empty_cache()
92
- MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cuda", compute_type="int8", num_workers=1)
93
- except Exception as e:
94
- print(f"⚠️ GPU STT Fail: {e}")
95
- MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cpu", compute_type="int8")
96
-
97
- if action in ["tts", "s2st"]:
98
- tts_on_gpu = False
99
- try:
100
- params = next(MODELS["tts"].synthesizer.tts_model.parameters())
101
- tts_on_gpu = "cuda" in str(params.device)
102
- except: pass
103
-
104
- if MODELS["tts"] is not None and not tts_on_gpu:
105
- print(f"πŸ”Š [v118] PROMOTE: XTTS to GPU...")
106
- try: MODELS["tts"].to("cuda")
107
- except: pass
108
-
109
- chatterbox_utils.load_chatterbox(device="cpu")
110
- if MODELS["denoiser"] is None:
111
- try: MODELS["denoiser"] = init_df()
112
- except: pass
113
- if MODELS["translate"] is None: MODELS["translate"] = "active"
114
-
115
- def release_gpu_models():
116
- """v118: Graceful Offload"""
117
- global MODELS
118
- try:
119
- if MODELS["stt"] and MODELS["stt"].model.device == "cuda":
120
- MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cpu", compute_type="int8", local_files_only=True)
121
- if MODELS["tts"]:
122
- try: MODELS["tts"].to("cpu")
123
- except: pass
124
- except: pass
125
- gc.collect()
126
- if torch.cuda.is_available(): torch.cuda.empty_cache()
127
 
 
128
  @spaces.GPU(duration=150)
129
  def core_process(request_dict):
 
130
  action = request_dict.get("action")
131
- print(f"--- [v118] πŸš€ REQUEST: {action} ---")
132
-
133
  t1 = time.time()
134
- activate_gpu_models(action)
135
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  if action == "stt":
137
  audio_bytes = base64.b64decode(request_dict.get("file"))
138
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
139
  f.write(audio_bytes); temp_path = f.name
140
  try:
141
- segments, _ = MODELS["stt"].transcribe(temp_path, language=request_dict.get("lang"), beam_size=1)
142
- res = {"text": " ".join([s.text for s in segments]).strip()}
 
143
  finally:
144
  if os.path.exists(temp_path): os.unlink(temp_path)
145
 
146
  elif action == "translate":
 
147
  res = {"translated": deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang", "en")).translate(request_dict.get("text"))}
148
 
149
  elif action == "tts":
@@ -169,67 +101,57 @@ def core_process(request_dict):
169
  if speaker_wav_path and "default" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
170
  if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
171
  else:
 
172
  audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
173
  res = {"audio": base64.b64encode(audio_bytes).decode()}
174
 
175
  elif action == "s2st":
176
- # Direct logic sequence in v118 (No recursion)
177
- audio_bytes = base64.b64decode(request_dict.get("file"))
178
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
179
- f.write(audio_bytes); temp_path = f.name
180
- try:
181
- # 1. STT
182
- segs, _ = MODELS["stt"].transcribe(temp_path, language=request_dict.get("lang"), beam_size=1)
183
- stt_text = " ".join([s.text for s in segs]).strip()
184
- # 2. Translated
185
- target = request_dict.get("target_lang")
186
- translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(stt_text)
187
- # 3. TTS
188
- final_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
189
- res = {"text": stt_text, "translated": translated, "audio": final_res.get("audio")}
190
- finally:
191
- if os.path.exists(temp_path): os.unlink(temp_path)
192
- else: res = {"error": f"Unknown action: {action}"}
193
  except Exception as e:
194
- print(f"❌ Fault: {traceback.format_exc()}")
195
  res = {"error": str(e)}
196
  finally:
197
- print(f"--- [v118] ✨ FINISH ({time.time()-t1:.2f}s) ---")
198
- release_gpu_models()
 
 
199
  return res
200
 
201
- @asynccontextmanager
202
- async def lifespan(app: FastAPI):
203
- print("πŸ”₯ [v118] RAM Warming...")
204
- MODELS["stt"] = WhisperModel(WHISPER_PATH, device="cpu", compute_type="int8")
205
- MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
206
- chatterbox_utils.warmup_chatterbox()
207
- print("βœ… [v118] ENGINE READY.")
208
- yield
209
-
210
- # πŸš€ FastAPI
211
- app = FastAPI(lifespan=lifespan)
212
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
213
 
214
  @app.post("/api/v1/process")
215
  async def api_process(request: Request):
216
  try:
217
- req_data = await request.json()
218
- if req_data.get("action") == "health": return {"status": "awake", "v": "118"}
219
- return core_process(req_data)
220
  except Exception as e: return {"error": str(e)}
221
 
222
  @app.get("/health")
223
- def health(): return {"status": "ok", "v": "118"}
224
 
225
  def gradio_fn(req_json):
226
  try: return json.dumps(core_process(json.loads(req_json)))
227
  except Exception as e: return json.dumps({"error": str(e)})
228
 
229
- # Unified UI mount
230
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v118")
231
  demo.queue()
232
  app = gr.mount_gradio_app(app, demo, path="/")
233
 
234
  if __name__ == "__main__":
 
235
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")
 
1
+ # πŸš€ V119: ZEROGPU HOPPER RESILIENT (STABILITY OVERRIDE)
2
  try:
3
  import spaces
4
  except ImportError:
 
8
  if f is None: return lambda x: x
9
  return f
10
 
11
+ import gradio as gr
12
  from fastapi import FastAPI, Request
13
  from fastapi.middleware.cors import CORSMiddleware
 
 
14
  import uvicorn
15
  import base64
16
  import torch
 
24
  import types
25
  import logging
26
  import traceback
27
+ from threading import Thread
28
  from huggingface_hub import snapshot_download, hf_hub_download
29
+ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
30
 
31
+ # πŸ›‘οΈ 1. SILENCE & ENV (v119)
32
  logging.getLogger("transformers").setLevel(logging.ERROR)
 
 
 
33
  os.environ["COQUI_TOS_AGREED"] = "1"
34
+ os.environ["CT2_VERBOSE"] = "0"
35
 
36
+ # πŸ“¦ 2. GLOBAL MODELS (LAZY LOAD)
37
+ MODELS = {"stt": None, "tts": None, "translate": None}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # πŸ› οΈ 3. CORE PROCESSING (v119: STABILITY FIRST)
40
  @spaces.GPU(duration=150)
41
  def core_process(request_dict):
42
+ global MODELS
43
  action = request_dict.get("action")
44
+ print(f"--- [v119] πŸš€ PROCESSING: {action} ---")
 
45
  t1 = time.time()
46
+
47
  try:
48
+ # v119: LAZY LOAD INSIDE GPU SESSION (Prevents Startup Hangs)
49
+ if action in ["stt", "s2st"] and MODELS["stt"] is None:
50
+ print("πŸŽ™οΈ Loading Whisper (Transformers Pipeline, float16)...")
51
+ # Using Transformers instead of faster-whisper for MIG stability
52
+ model_id = "openai/whisper-large-v3"
53
+ MODELS["stt"] = pipeline(
54
+ "automatic-speech-recognition",
55
+ model=model_id,
56
+ torch_dtype=torch.float16,
57
+ device="cuda"
58
+ )
59
+
60
+ if action in ["tts", "s2st"] and MODELS["tts"] is None:
61
+ print("πŸ”Š Loading XTTS-v2 (Native float16)...")
62
+ from TTS.api import TTS
63
+ MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
64
+
65
+ # πŸ› οΈ Execute Logic
66
  if action == "stt":
67
  audio_bytes = base64.b64decode(request_dict.get("file"))
68
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
69
  f.write(audio_bytes); temp_path = f.name
70
  try:
71
+ # v119: Transcribe via Transformers
72
+ result = MODELS["stt"](temp_path, generate_kwargs={"language": request_dict.get("lang")})
73
+ res = {"text": result["text"].strip()}
74
  finally:
75
  if os.path.exists(temp_path): os.unlink(temp_path)
76
 
77
  elif action == "translate":
78
+ import deep_translator
79
  res = {"translated": deep_translator.GoogleTranslator(source='auto', target=request_dict.get("target_lang", "en")).translate(request_dict.get("text"))}
80
 
81
  elif action == "tts":
 
101
  if speaker_wav_path and "default" not in speaker_wav_path and os.path.exists(speaker_wav_path): os.unlink(speaker_wav_path)
102
  if 'output_path' in locals() and os.path.exists(output_path): os.unlink(output_path)
103
  else:
104
+ import chatterbox_utils
105
  audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
106
  res = {"audio": base64.b64encode(audio_bytes).decode()}
107
 
108
  elif action == "s2st":
109
+ print("πŸ”„ Step 1: STT...")
110
+ s_res = core_process.__wrapped__({**request_dict, "action": "stt"})
111
+ text = s_res.get("text", "")
112
+ print(f"πŸ”„ Step 2: Translation to {request_dict.get('target_lang')}...")
113
+ import deep_translator
114
+ target = request_dict.get("target_lang")
115
+ translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(text)
116
+ print("πŸ”„ Step 3: TTS...")
117
+ t_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
118
+ res = {"text": text, "translated": translated, "audio": t_res.get("audio")}
119
+ else: res = {"error": "Invalid action"}
120
+
 
 
 
 
 
121
  except Exception as e:
122
+ print(f"❌ [v119] ERROR: {traceback.format_exc()}")
123
  res = {"error": str(e)}
124
  finally:
125
+ print(f"--- [v119] ✨ FINISHED IN {time.time()-t1:.2f}s ---")
126
+ # Aggressive memory cleanup for ZeroGPU
127
+ gc.collect()
128
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
129
  return res
130
 
131
+ # πŸš€ 4. SERVER SETUP
132
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
133
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
134
 
135
  @app.post("/api/v1/process")
136
  async def api_process(request: Request):
137
  try:
138
+ data = await request.json()
139
+ if data.get("action") == "health": return {"status": "awake", "v": "119"}
140
+ return core_process(data)
141
  except Exception as e: return {"error": str(e)}
142
 
143
  @app.get("/health")
144
+ def health(): return {"status": "ok", "v": "119"}
145
 
146
  def gradio_fn(req_json):
147
  try: return json.dumps(core_process(json.loads(req_json)))
148
  except Exception as e: return json.dumps({"error": str(e)})
149
 
150
+ # Unified UI
151
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v119")
152
  demo.queue()
153
  app = gr.mount_gradio_app(app, demo, path="/")
154
 
155
  if __name__ == "__main__":
156
+ print("πŸš€ [v119] Starting Resilient Server on Port 7860...")
157
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")