TGPro1 commited on
Commit
58069b9
Β·
verified Β·
1 Parent(s): 1936bbb

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +16 -24
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V120: ZEROGPU HOPPER TURBO (FLASH ATTENTION ENABLED)
2
  try:
3
  import spaces
4
  except ImportError:
@@ -24,7 +24,7 @@ import traceback
24
  from huggingface_hub import snapshot_download
25
  from transformers import pipeline
26
 
27
- # πŸ›‘οΈ 1. SILENCE & ENV (v120)
28
  import logging
29
  logging.getLogger("transformers").setLevel(logging.ERROR)
30
  os.environ["COQUI_TOS_AGREED"] = "1"
@@ -33,31 +33,30 @@ os.environ["PYTHONWARNINGS"] = "ignore"
33
  # πŸ“¦ 2. GLOBAL MODELS (LAZY LOAD)
34
  MODELS = {"stt": None, "tts": None}
35
 
36
- # πŸ› οΈ 3. CORE PROCESSING (v120: FLASH SPEED)
37
  @spaces.GPU(duration=120)
38
  def core_process(request_dict):
39
  global MODELS
40
  action = request_dict.get("action")
41
- print(f"--- [v120] ⚑ HOPPER ACTIVATED: {action} ---")
42
  t1 = time.time()
43
 
44
  try:
45
- # v120: Whisper Large-v3-Turbo + Flash Attention 2 (H200 Optimized)
46
  if action in ["stt", "s2st"] and MODELS["stt"] is None:
47
- print("πŸŽ™οΈ Loading Whisper Turbo (v3) + FlashAttention-2...")
48
  model_id = "openai/whisper-large-v3-turbo"
 
49
  MODELS["stt"] = pipeline(
50
  "automatic-speech-recognition",
51
  model=model_id,
52
  torch_dtype=torch.bfloat16,
53
- device="cuda",
54
- model_kwargs={"attn_implementation": "flash_attention_2"}
55
  )
56
 
57
  if action in ["tts", "s2st"] and MODELS["tts"] is None:
58
- print("πŸ”Š Loading XTTS-v2 (Hopper BF16 Optimized)...")
59
  from TTS.api import TTS
60
- # Note: XTTS-v2 doesn't native support bfloat16 in its loader yet, but we'll use gpu=True
61
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
62
 
63
  # πŸ› οΈ Execute Logic
@@ -66,7 +65,7 @@ def core_process(request_dict):
66
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
67
  f.write(audio_bytes); temp_path = f.name
68
  try:
69
- # v120: Optimized Transcription
70
  lang = request_dict.get("lang")
71
  gen_kwargs = {"language": lang} if lang and len(lang) <= 3 else {}
72
  result = MODELS["stt"](
@@ -97,11 +96,8 @@ def core_process(request_dict):
97
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
98
  f.write(sb); speaker_wav_path = f.name
99
  else:
100
- # Use a default speaker if available, or just use the first available
101
  speaker_wav_path = "default_speaker.wav"
102
- if not os.path.exists(speaker_wav_path):
103
- # Fallback to internal speaker if default not found
104
- speaker_wav_path = None
105
 
106
  try:
107
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
@@ -117,23 +113,20 @@ def core_process(request_dict):
117
  res = {"audio": base64.b64encode(audio_bytes).decode()}
118
 
119
  elif action == "s2st":
120
- print("πŸ”„ Step 1: STT...")
121
  s_res = core_process.__wrapped__({**request_dict, "action": "stt"})
122
  text = s_res.get("text", "")
123
- print(f"πŸ”„ Step 2: Translation ({request_dict.get('target_lang')})...")
124
  import deep_translator
125
  target = request_dict.get("target_lang") or "en"
126
  translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(text)
127
- print("πŸ”„ Step 3: TTS...")
128
  t_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
129
  res = {"text": text, "translated": translated, "audio": t_res.get("audio")}
130
  else: res = {"error": "Invalid action"}
131
 
132
  except Exception as e:
133
- print(f"❌ [v120] ERROR: {traceback.format_exc()}")
134
  res = {"error": str(e)}
135
  finally:
136
- print(f"--- [v120] ✨ FINISHED IN {time.time()-t1:.2f}s ---")
137
  gc.collect()
138
  if torch.cuda.is_available(): torch.cuda.empty_cache()
139
  return res
@@ -146,21 +139,20 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
146
  async def api_process(request: Request):
147
  try:
148
  data = await request.json()
149
- if data.get("action") == "health": return {"status": "awake", "v": "120"}
150
  return core_process(data)
151
  except Exception as e: return {"error": str(e)}
152
 
153
  @app.get("/health")
154
- def health(): return {"status": "ok", "v": "120"}
155
 
156
  def gradio_fn(req_json):
157
  try: return json.dumps(core_process(json.loads(req_json)))
158
  except Exception as e: return json.dumps({"error": str(e)})
159
 
160
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v120 (Hopper Turbo)")
161
  demo.queue()
162
  app = gr.mount_gradio_app(app, demo, path="/")
163
 
164
  if __name__ == "__main__":
165
- print("πŸš€ [v120] Starting Hopper Turbo Engine on Port 7860...")
166
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")
 
1
+ # πŸš€ V121: ZEROGPU HOPPER STABLE (BUILD FIX)
2
  try:
3
  import spaces
4
  except ImportError:
 
24
  from huggingface_hub import snapshot_download
25
  from transformers import pipeline
26
 
27
+ # πŸ›‘οΈ 1. SILENCE & ENV (v121)
28
  import logging
29
  logging.getLogger("transformers").setLevel(logging.ERROR)
30
  os.environ["COQUI_TOS_AGREED"] = "1"
 
33
  # πŸ“¦ 2. GLOBAL MODELS (LAZY LOAD)
34
  MODELS = {"stt": None, "tts": None}
35
 
36
+ # πŸ› οΈ 3. CORE PROCESSING (v121: STABLE SPEED)
37
  @spaces.GPU(duration=120)
38
  def core_process(request_dict):
39
  global MODELS
40
  action = request_dict.get("action")
41
+ print(f"--- [v121] ⚑ HOPPER ACTIVATED: {action} ---")
42
  t1 = time.time()
43
 
44
  try:
45
+ # v121: Whisper Large-v3-Turbo (Hopper BF16 Optimized)
46
  if action in ["stt", "s2st"] and MODELS["stt"] is None:
47
+ print("πŸŽ™οΈ Loading Whisper Turbo (v3) [BF16]...")
48
  model_id = "openai/whisper-large-v3-turbo"
49
+ # We let transformers auto-select the best attention (SDPA/Flash)
50
  MODELS["stt"] = pipeline(
51
  "automatic-speech-recognition",
52
  model=model_id,
53
  torch_dtype=torch.bfloat16,
54
+ device="cuda"
 
55
  )
56
 
57
  if action in ["tts", "s2st"] and MODELS["tts"] is None:
58
+ print("πŸ”Š Loading XTTS-v2 (GPU Optimized)...")
59
  from TTS.api import TTS
 
60
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
61
 
62
  # πŸ› οΈ Execute Logic
 
65
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
66
  f.write(audio_bytes); temp_path = f.name
67
  try:
68
+ # v121: Flexible Transcription
69
  lang = request_dict.get("lang")
70
  gen_kwargs = {"language": lang} if lang and len(lang) <= 3 else {}
71
  result = MODELS["stt"](
 
96
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
97
  f.write(sb); speaker_wav_path = f.name
98
  else:
 
99
  speaker_wav_path = "default_speaker.wav"
100
+ if not os.path.exists(speaker_wav_path): speaker_wav_path = None
 
 
101
 
102
  try:
103
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
 
113
  res = {"audio": base64.b64encode(audio_bytes).decode()}
114
 
115
  elif action == "s2st":
 
116
  s_res = core_process.__wrapped__({**request_dict, "action": "stt"})
117
  text = s_res.get("text", "")
 
118
  import deep_translator
119
  target = request_dict.get("target_lang") or "en"
120
  translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(text)
 
121
  t_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
122
  res = {"text": text, "translated": translated, "audio": t_res.get("audio")}
123
  else: res = {"error": "Invalid action"}
124
 
125
  except Exception as e:
126
+ print(f"❌ [v121] ERROR: {traceback.format_exc()}")
127
  res = {"error": str(e)}
128
  finally:
129
+ print(f"--- [v121] ✨ FINISHED IN {time.time()-t1:.2f}s ---")
130
  gc.collect()
131
  if torch.cuda.is_available(): torch.cuda.empty_cache()
132
  return res
 
139
  async def api_process(request: Request):
140
  try:
141
  data = await request.json()
142
+ if data.get("action") == "health": return {"status": "awake", "v": "121"}
143
  return core_process(data)
144
  except Exception as e: return {"error": str(e)}
145
 
146
  @app.get("/health")
147
+ def health(): return {"status": "ok", "v": "121"}
148
 
149
  def gradio_fn(req_json):
150
  try: return json.dumps(core_process(json.loads(req_json)))
151
  except Exception as e: return json.dumps({"error": str(e)})
152
 
153
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v121")
154
  demo.queue()
155
  app = gr.mount_gradio_app(app, demo, path="/")
156
 
157
  if __name__ == "__main__":
 
158
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")