TGPro1 commited on
Commit
81932e5
Β·
verified Β·
1 Parent(s): ad3d045

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +44 -35
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # πŸš€ V119: ZEROGPU HOPPER RESILIENT (STABILITY OVERRIDE)
2
  try:
3
  import spaces
4
  except ImportError:
@@ -18,48 +18,46 @@ import os
18
  import tempfile
19
  import json
20
  import time
21
- import torchaudio
22
  import gc
23
  import sys
24
- import types
25
- import logging
26
  import traceback
27
- from threading import Thread
28
- from huggingface_hub import snapshot_download, hf_hub_download
29
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
30
 
31
- # πŸ›‘οΈ 1. SILENCE & ENV (v119)
 
32
  logging.getLogger("transformers").setLevel(logging.ERROR)
33
  os.environ["COQUI_TOS_AGREED"] = "1"
34
- os.environ["CT2_VERBOSE"] = "0"
35
 
36
  # πŸ“¦ 2. GLOBAL MODELS (LAZY LOAD)
37
- MODELS = {"stt": None, "tts": None, "translate": None}
38
 
39
- # πŸ› οΈ 3. CORE PROCESSING (v119: STABILITY FIRST)
40
- @spaces.GPU(duration=150)
41
  def core_process(request_dict):
42
  global MODELS
43
  action = request_dict.get("action")
44
- print(f"--- [v119] πŸš€ PROCESSING: {action} ---")
45
  t1 = time.time()
46
 
47
  try:
48
- # v119: LAZY LOAD INSIDE GPU SESSION (Prevents Startup Hangs)
49
  if action in ["stt", "s2st"] and MODELS["stt"] is None:
50
- print("πŸŽ™οΈ Loading Whisper (Transformers Pipeline, float16)...")
51
- # Using Transformers instead of faster-whisper for MIG stability
52
- model_id = "openai/whisper-large-v3"
53
  MODELS["stt"] = pipeline(
54
  "automatic-speech-recognition",
55
  model=model_id,
56
- torch_dtype=torch.float16,
57
- device="cuda"
 
58
  )
59
 
60
  if action in ["tts", "s2st"] and MODELS["tts"] is None:
61
- print("πŸ”Š Loading XTTS-v2 (Native float16)...")
62
  from TTS.api import TTS
 
63
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
64
 
65
  # πŸ› οΈ Execute Logic
@@ -68,8 +66,15 @@ def core_process(request_dict):
68
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
69
  f.write(audio_bytes); temp_path = f.name
70
  try:
71
- # v119: Transcribe via Transformers
72
- result = MODELS["stt"](temp_path, generate_kwargs={"language": request_dict.get("lang")})
 
 
 
 
 
 
 
73
  res = {"text": result["text"].strip()}
74
  finally:
75
  if os.path.exists(temp_path): os.unlink(temp_path)
@@ -81,7 +86,8 @@ def core_process(request_dict):
81
  elif action == "tts":
82
  text = request_dict.get("text")
83
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
84
- clean_lang = (request_dict.get("lang") or "en").strip().lower().split('-')[0]
 
85
  mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
86
 
87
  if mapped_lang:
@@ -90,8 +96,13 @@ def core_process(request_dict):
90
  sb = base64.b64decode(request_dict.get("speaker_wav"))
91
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
92
  f.write(sb); speaker_wav_path = f.name
93
- else: speaker_wav_path = "default_speaker.wav"
94
-
 
 
 
 
 
95
  try:
96
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
97
  output_path = output_file.name
@@ -109,9 +120,9 @@ def core_process(request_dict):
109
  print("πŸ”„ Step 1: STT...")
110
  s_res = core_process.__wrapped__({**request_dict, "action": "stt"})
111
  text = s_res.get("text", "")
112
- print(f"πŸ”„ Step 2: Translation to {request_dict.get('target_lang')}...")
113
  import deep_translator
114
- target = request_dict.get("target_lang")
115
  translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(text)
116
  print("πŸ”„ Step 3: TTS...")
117
  t_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
@@ -119,11 +130,10 @@ def core_process(request_dict):
119
  else: res = {"error": "Invalid action"}
120
 
121
  except Exception as e:
122
- print(f"❌ [v119] ERROR: {traceback.format_exc()}")
123
  res = {"error": str(e)}
124
  finally:
125
- print(f"--- [v119] ✨ FINISHED IN {time.time()-t1:.2f}s ---")
126
- # Aggressive memory cleanup for ZeroGPU
127
  gc.collect()
128
  if torch.cuda.is_available(): torch.cuda.empty_cache()
129
  return res
@@ -136,22 +146,21 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
136
  async def api_process(request: Request):
137
  try:
138
  data = await request.json()
139
- if data.get("action") == "health": return {"status": "awake", "v": "119"}
140
  return core_process(data)
141
  except Exception as e: return {"error": str(e)}
142
 
143
  @app.get("/health")
144
- def health(): return {"status": "ok", "v": "119"}
145
 
146
  def gradio_fn(req_json):
147
  try: return json.dumps(core_process(json.loads(req_json)))
148
  except Exception as e: return json.dumps({"error": str(e)})
149
 
150
- # Unified UI
151
- demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v119")
152
  demo.queue()
153
  app = gr.mount_gradio_app(app, demo, path="/")
154
 
155
  if __name__ == "__main__":
156
- print("πŸš€ [v119] Starting Resilient Server on Port 7860...")
157
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")
 
1
+ # πŸš€ V120: ZEROGPU HOPPER TURBO (FLASH ATTENTION ENABLED)
2
  try:
3
  import spaces
4
  except ImportError:
 
18
  import tempfile
19
  import json
20
  import time
 
21
  import gc
22
  import sys
 
 
23
  import traceback
24
+ from huggingface_hub import snapshot_download
25
+ from transformers import pipeline
 
26
 
27
+ # πŸ›‘οΈ 1. SILENCE & ENV (v120)
28
+ import logging
29
  logging.getLogger("transformers").setLevel(logging.ERROR)
30
  os.environ["COQUI_TOS_AGREED"] = "1"
31
+ os.environ["PYTHONWARNINGS"] = "ignore"
32
 
33
  # πŸ“¦ 2. GLOBAL MODELS (LAZY LOAD)
34
+ MODELS = {"stt": None, "tts": None}
35
 
36
+ # πŸ› οΈ 3. CORE PROCESSING (v120: FLASH SPEED)
37
+ @spaces.GPU(duration=120)
38
  def core_process(request_dict):
39
  global MODELS
40
  action = request_dict.get("action")
41
+ print(f"--- [v120] ⚑ HOPPER ACTIVATED: {action} ---")
42
  t1 = time.time()
43
 
44
  try:
45
+ # v120: Whisper Large-v3-Turbo + Flash Attention 2 (H200 Optimized)
46
  if action in ["stt", "s2st"] and MODELS["stt"] is None:
47
+ print("πŸŽ™οΈ Loading Whisper Turbo (v3) + FlashAttention-2...")
48
+ model_id = "openai/whisper-large-v3-turbo"
 
49
  MODELS["stt"] = pipeline(
50
  "automatic-speech-recognition",
51
  model=model_id,
52
+ torch_dtype=torch.bfloat16,
53
+ device="cuda",
54
+ model_kwargs={"attn_implementation": "flash_attention_2"}
55
  )
56
 
57
  if action in ["tts", "s2st"] and MODELS["tts"] is None:
58
+ print("πŸ”Š Loading XTTS-v2 (Hopper BF16 Optimized)...")
59
  from TTS.api import TTS
60
+ # Note: XTTS-v2 doesn't native support bfloat16 in its loader yet, but we'll use gpu=True
61
  MODELS["tts"] = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
62
 
63
  # πŸ› οΈ Execute Logic
 
66
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
67
  f.write(audio_bytes); temp_path = f.name
68
  try:
69
+ # v120: Optimized Transcription
70
+ lang = request_dict.get("lang")
71
+ gen_kwargs = {"language": lang} if lang and len(lang) <= 3 else {}
72
+ result = MODELS["stt"](
73
+ temp_path,
74
+ chunk_length_s=30,
75
+ batch_size=8,
76
+ generate_kwargs=gen_kwargs
77
+ )
78
  res = {"text": result["text"].strip()}
79
  finally:
80
  if os.path.exists(temp_path): os.unlink(temp_path)
 
86
  elif action == "tts":
87
  text = request_dict.get("text")
88
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
89
+ raw_lang = (request_dict.get("lang") or "en").strip().lower()
90
+ clean_lang = raw_lang.split('-')[0]
91
  mapped_lang = XTTS_MAP.get(clean_lang) or ("zh-cn" if clean_lang == "zh" else None)
92
 
93
  if mapped_lang:
 
96
  sb = base64.b64decode(request_dict.get("speaker_wav"))
97
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
98
  f.write(sb); speaker_wav_path = f.name
99
+ else:
100
+ # Use a default speaker if available, or just use the first available
101
+ speaker_wav_path = "default_speaker.wav"
102
+ if not os.path.exists(speaker_wav_path):
103
+ # Fallback to internal speaker if default not found
104
+ speaker_wav_path = None
105
+
106
  try:
107
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
108
  output_path = output_file.name
 
120
  print("πŸ”„ Step 1: STT...")
121
  s_res = core_process.__wrapped__({**request_dict, "action": "stt"})
122
  text = s_res.get("text", "")
123
+ print(f"πŸ”„ Step 2: Translation ({request_dict.get('target_lang')})...")
124
  import deep_translator
125
+ target = request_dict.get("target_lang") or "en"
126
  translated = deep_translator.GoogleTranslator(source='auto', target=target).translate(text)
127
  print("πŸ”„ Step 3: TTS...")
128
  t_res = core_process.__wrapped__({"action": "tts", "text": translated, "lang": target, "speaker_wav": request_dict.get("speaker_wav")})
 
130
  else: res = {"error": "Invalid action"}
131
 
132
  except Exception as e:
133
+ print(f"❌ [v120] ERROR: {traceback.format_exc()}")
134
  res = {"error": str(e)}
135
  finally:
136
+ print(f"--- [v120] ✨ FINISHED IN {time.time()-t1:.2f}s ---")
 
137
  gc.collect()
138
  if torch.cuda.is_available(): torch.cuda.empty_cache()
139
  return res
 
146
  async def api_process(request: Request):
147
  try:
148
  data = await request.json()
149
+ if data.get("action") == "health": return {"status": "awake", "v": "120"}
150
  return core_process(data)
151
  except Exception as e: return {"error": str(e)}
152
 
153
  @app.get("/health")
154
+ def health(): return {"status": "ok", "v": "120"}
155
 
156
  def gradio_fn(req_json):
157
  try: return json.dumps(core_process(json.loads(req_json)))
158
  except Exception as e: return json.dumps({"error": str(e)})
159
 
160
+ demo = gr.Interface(fn=gradio_fn, inputs="text", outputs="text", title="πŸš€ AI Engine v120 (Hopper Turbo)")
 
161
  demo.queue()
162
  app = gr.mount_gradio_app(app, demo, path="/")
163
 
164
  if __name__ == "__main__":
165
+ print("πŸš€ [v120] Starting Hopper Turbo Engine on Port 7860...")
166
  uvicorn.run(app, host="0.0.0.0", port=7860, log_level="warning")