TGPro1 commited on
Commit
3d20be5
Β·
verified Β·
1 Parent(s): ac7f38f

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +19 -19
app.py CHANGED
@@ -10,8 +10,8 @@ from fastapi import FastAPI, Request
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from fastapi.responses import HTMLResponse
12
 
13
- # --- [v159] πŸš€ HERO STABILITY ENGINE (FINAL MISSION) ---
14
- print(f"--- [v159] πŸ“‘ BOOTING ENGINE ---")
15
 
16
  # πŸ› οΈ CRITICAL: TORCHAUDIO MONKEYPATCH πŸ› οΈ
17
  # This bypasses the 'torchcodec' ModuleNotFoundError on Hugging Face Spaces.
@@ -19,7 +19,7 @@ import torchaudio
19
  import soundfile as sf
20
  import numpy as np
21
 
22
- defHeroLoad(filepath, **kwargs):
23
  """Robust alternative to torchaudio.load using soundfile."""
24
  try:
25
  data, samplerate = sf.read(filepath)
@@ -37,7 +37,7 @@ defHeroLoad(filepath, **kwargs):
37
  if not hasattr(torchaudio, 'load_orig'):
38
  torchaudio.load_orig = torchaudio.load
39
  torchaudio.load = HeroLoad
40
- print("--- [v159] 🩹 TORCHAUDIO HERO PATCH APPLIED (soundfile) ---")
41
 
42
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
43
  from TTS.api import TTS
@@ -72,16 +72,16 @@ MODELS = {"stt": None, "tts": None}
72
  def load_tts_cpu():
73
  global MODELS
74
  if MODELS.get("tts") is None:
75
- print("--- [v159] πŸ“₯ LOADING XTTS V2 (CPU MODE) ---")
76
  # CPU loading is 100% stable on ZeroGPU H200
77
  MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cpu")
78
- print("--- [v159] βœ… XTTS READY (CPU) ---")
79
 
80
  @spaces.GPU(duration=60)
81
  def gpu_stt_base(temp_path, lang):
82
  global MODELS
83
  if MODELS.get("stt") is None:
84
- print("--- [v159] πŸ“₯ LOADING WHISPER (Base) ON GPU ---")
85
  model_id = "openai/whisper-base"
86
  model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch.float32).to("cuda")
87
  processor = AutoProcessor.from_pretrained(model_id)
@@ -92,7 +92,7 @@ def gpu_stt_base(temp_path, lang):
92
  feature_extractor=processor.feature_extractor,
93
  device="cuda"
94
  )
95
- print(f"--- [v159] πŸŽ™οΈ RUNNING WHISPER INFERENCE ---")
96
  res = MODELS["stt"](temp_path, generate_kwargs={"language": lang if lang and len(lang) <= 3 else None})
97
  return res["text"].strip()
98
 
@@ -101,9 +101,9 @@ async def handle_process(request: Request):
101
  try:
102
  data = await request.json()
103
  action = data.get("action")
104
- if action == "health": return {"status": "awake", "v": "159"}
105
 
106
- print(f"--- [v159] πŸ› οΈ {action.upper()} REQUESTED ---")
107
 
108
  # 🟒 STT PATH
109
  stt_text = ""
@@ -117,7 +117,7 @@ async def handle_process(request: Request):
117
  temp_path = f.name
118
  try:
119
  stt_text = gpu_stt_base(temp_path, data.get("lang"))
120
- print(f"--- [v159] πŸŽ™οΈ TRANSCRIPT: {stt_text} ---")
121
  finally:
122
  if os.path.exists(temp_path): os.unlink(temp_path)
123
 
@@ -132,10 +132,10 @@ async def handle_process(request: Request):
132
  target = data.get("target_lang") or data.get("lang") or "en"
133
 
134
  if action == "s2st":
135
- print(f"--- [v159] 🌏 TRANSLATING TO {target} ---")
136
  trans_text = GoogleTranslator(source='auto', target=target).translate(stt_text)
137
  text = trans_text
138
- print(f"--- [v159] πŸ“ TRANSLATED: {text} ---")
139
 
140
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
141
  clean_lang = target.split('-')[0].lower()
@@ -143,7 +143,7 @@ async def handle_process(request: Request):
143
 
144
  if not mapped_lang:
145
  if HAS_CHATTERBOX:
146
- print(f"--- [v159] πŸ“¦ FALLBACK: CHATTERBOX FOR {clean_lang} ---")
147
  audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
148
  audio_b64 = base64.b64encode(audio_bytes).decode()
149
  else: return {"error": f"Lang {clean_lang} unsupported"}
@@ -162,7 +162,7 @@ async def handle_process(request: Request):
162
  try:
163
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
164
  out_p = out_f.name
165
- print(f"--- [v159] πŸ”Š XTTS INFERENCE (CPU) ---")
166
  MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_path)
167
  with open(out_p, "rb") as f:
168
  audio_b64 = base64.b64encode(f.read()).decode()
@@ -174,20 +174,20 @@ async def handle_process(request: Request):
174
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
175
 
176
  except Exception as e:
177
- print(f"❌ [v159] HERO ERROR: {traceback.format_exc()}")
178
  return {"error": str(e)}
179
  finally:
180
- print(f"--- [v159] ✨ DONE ({time.time()-t1:.1f}s) ---")
181
 
182
  @app.post("/process")
183
  @app.post("/api/v1/process")
184
  async def api_process(request: Request): return await handle_process(request)
185
 
186
  @app.get("/health")
187
- def health(): return {"status": "ok", "v": "159", "mode": "HERO_STABLE", "gpu": torch.cuda.is_available()}
188
 
189
  @app.get("/", response_class=HTMLResponse)
190
- def root(): return "<h1>πŸš€ AI Engine v159 (HERO STABLE)</h1>"
191
 
192
  if __name__ == "__main__":
193
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from fastapi.responses import HTMLResponse
12
 
13
+ # --- [v160] πŸš€ HERO STABILITY ENGINE (FINAL MISSION) ---
14
+ print(f"--- [v160] πŸ“‘ BOOTING ENGINE ---")
15
 
16
  # πŸ› οΈ CRITICAL: TORCHAUDIO MONKEYPATCH πŸ› οΈ
17
  # This bypasses the 'torchcodec' ModuleNotFoundError on Hugging Face Spaces.
 
19
  import soundfile as sf
20
  import numpy as np
21
 
22
+ def HeroLoad(filepath, **kwargs):
23
  """Robust alternative to torchaudio.load using soundfile."""
24
  try:
25
  data, samplerate = sf.read(filepath)
 
37
  if not hasattr(torchaudio, 'load_orig'):
38
  torchaudio.load_orig = torchaudio.load
39
  torchaudio.load = HeroLoad
40
+ print("--- [v160] 🩹 TORCHAUDIO HERO PATCH APPLIED (soundfile) ---")
41
 
42
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
43
  from TTS.api import TTS
 
72
  def load_tts_cpu():
73
  global MODELS
74
  if MODELS.get("tts") is None:
75
+ print("--- [v160] πŸ“₯ LOADING XTTS V2 (CPU MODE) ---")
76
  # CPU loading is 100% stable on ZeroGPU H200
77
  MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cpu")
78
+ print("--- [v160] βœ… XTTS READY (CPU) ---")
79
 
80
  @spaces.GPU(duration=60)
81
  def gpu_stt_base(temp_path, lang):
82
  global MODELS
83
  if MODELS.get("stt") is None:
84
+ print("--- [v160] πŸ“₯ LOADING WHISPER (Base) ON GPU ---")
85
  model_id = "openai/whisper-base"
86
  model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch.float32).to("cuda")
87
  processor = AutoProcessor.from_pretrained(model_id)
 
92
  feature_extractor=processor.feature_extractor,
93
  device="cuda"
94
  )
95
+ print(f"--- [v160] πŸŽ™οΈ RUNNING WHISPER INFERENCE ---")
96
  res = MODELS["stt"](temp_path, generate_kwargs={"language": lang if lang and len(lang) <= 3 else None})
97
  return res["text"].strip()
98
 
 
101
  try:
102
  data = await request.json()
103
  action = data.get("action")
104
+ if action == "health": return {"status": "awake", "v": "160"}
105
 
106
+ print(f"--- [v160] πŸ› οΈ {action.upper()} REQUESTED ---")
107
 
108
  # 🟒 STT PATH
109
  stt_text = ""
 
117
  temp_path = f.name
118
  try:
119
  stt_text = gpu_stt_base(temp_path, data.get("lang"))
120
+ print(f"--- [v160] πŸŽ™οΈ TRANSCRIPT: {stt_text} ---")
121
  finally:
122
  if os.path.exists(temp_path): os.unlink(temp_path)
123
 
 
132
  target = data.get("target_lang") or data.get("lang") or "en"
133
 
134
  if action == "s2st":
135
+ print(f"--- [v160] 🌏 TRANSLATING TO {target} ---")
136
  trans_text = GoogleTranslator(source='auto', target=target).translate(stt_text)
137
  text = trans_text
138
+ print(f"--- [v160] πŸ“ TRANSLATED: {text} ---")
139
 
140
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
141
  clean_lang = target.split('-')[0].lower()
 
143
 
144
  if not mapped_lang:
145
  if HAS_CHATTERBOX:
146
+ print(f"--- [v160] πŸ“¦ FALLBACK: CHATTERBOX FOR {clean_lang} ---")
147
  audio_bytes = chatterbox_utils.run_chatterbox_inference(text, clean_lang)
148
  audio_b64 = base64.b64encode(audio_bytes).decode()
149
  else: return {"error": f"Lang {clean_lang} unsupported"}
 
162
  try:
163
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
164
  out_p = out_f.name
165
+ print(f"--- [v160] πŸ”Š XTTS INFERENCE (CPU) ---")
166
  MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_path)
167
  with open(out_p, "rb") as f:
168
  audio_b64 = base64.b64encode(f.read()).decode()
 
174
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
175
 
176
  except Exception as e:
177
+ print(f"❌ [v160] HERO ERROR: {traceback.format_exc()}")
178
  return {"error": str(e)}
179
  finally:
180
+ print(f"--- [v160] ✨ DONE ({time.time()-t1:.1f}s) ---")
181
 
182
  @app.post("/process")
183
  @app.post("/api/v1/process")
184
  async def api_process(request: Request): return await handle_process(request)
185
 
186
  @app.get("/health")
187
+ def health(): return {"status": "ok", "v": "160", "mode": "HERO_STABLE", "gpu": torch.cuda.is_available()}
188
 
189
  @app.get("/", response_class=HTMLResponse)
190
+ def root(): return "<h1>πŸš€ AI Engine v160 (HERO STABLE)</h1>"
191
 
192
  if __name__ == "__main__":
193
  uvicorn.run(app, host="0.0.0.0", port=7860)