TGPro1 commited on
Commit
22c6fab
Β·
verified Β·
1 Parent(s): 202e15e

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +38 -52
app.py CHANGED
@@ -11,10 +11,10 @@ from fastapi.middleware.cors import CORSMiddleware
11
  from fastapi.responses import HTMLResponse
12
  import uvicorn
13
 
14
- # --- [v152] πŸš€ REFINED HYBRID ENGINE (CPU-STT + CPU-TRANS + GPU-TTS) ---
15
- print(f"--- [v152] πŸ“‘ BOOTING REFINED HYBRID ENGINE ---")
16
 
17
- from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
18
  from TTS.api import TTS
19
  from deep_translator import GoogleTranslator
20
 
@@ -42,66 +42,52 @@ os.environ["PYTHONWARNINGS"] = "ignore"
42
  app = FastAPI()
43
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
44
 
45
- MODELS = {"stt": None, "tts": None}
46
 
47
  def load_stt_cpu():
48
- """STT on CPU is stable and fast for Whisper Base."""
49
  global MODELS
50
  if MODELS.get("stt") is None:
51
- print("--- [v152] πŸ“₯ LOADING WHISPER (Base) ON CPU ---")
52
- model_id = "openai/whisper-base"
53
- MODELS["stt"] = pipeline(
54
- "automatic-speech-recognition",
55
- model=model_id,
56
- device="cpu", # Force CPU for stability
57
- torch_dtype=torch.float32
58
- )
59
- print("--- [v152] βœ… WHISPER READY (CPU) ---")
60
-
61
- def load_tts_gpu():
62
- global MODELS
63
- if MODELS.get("tts") is None:
64
- print("--- [v152] πŸ“₯ LOADING XTTS V2 ---")
65
- # Load to CPU first, then move to CUDA inside the decorated function if needed,
66
- # or just load directly if ZeroGPU allows.
67
- MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
68
- print("--- [v152] βœ… XTTS READY (CPU MEMORY) ---")
69
-
70
- @spaces.GPU(duration=120)
71
- def gpu_tts_inference(text, mapped_lang, speaker_path):
72
- """Isolated GPU inference for XTTS."""
73
- global MODELS
74
- if MODELS["tts"] is None:
75
- load_tts_gpu()
76
-
77
- # Move to GPU inside the decorated scope
78
- MODELS["tts"].to("cuda")
79
-
80
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
81
- out_p = out_f.name
82
-
83
  try:
84
- MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_path)
 
 
 
 
 
 
 
 
85
  with open(out_p, "rb") as f:
86
  audio_b64 = base64.b64encode(f.read()).decode()
87
  return audio_b64
88
  finally:
89
- # Move back to CPU to release VRAM as per ZeroGPU guidelines
90
- MODELS["tts"].to("cpu")
91
- if os.path.exists(out_p): os.unlink(out_p)
 
 
92
  torch.cuda.empty_cache()
93
 
94
  async def handle_process(request: Request):
95
  try:
96
  data = await request.json()
97
  action = data.get("action")
98
- if action == "health": return {"status": "awake", "v": "152"}
99
 
100
- print(f"--- [v152] πŸ› οΈ ENGINE ACTION: {action} ---")
101
  t1 = time.time()
102
 
103
  # πŸŽ™οΈ STT (CPU)
104
- stt_text = None
105
  if action in ["stt", "s2st"]:
106
  load_stt_cpu()
107
  audio_bytes = base64.b64decode(data.get("file"))
@@ -111,11 +97,12 @@ async def handle_process(request: Request):
111
  lang = data.get("lang")
112
  res = MODELS["stt"](temp_path, generate_kwargs={"language": lang if lang and len(lang) <= 3 else None})
113
  stt_text = res["text"].strip()
114
- if action == "stt": return {"text": stt_text}
115
  finally:
116
  if os.path.exists(temp_path): os.unlink(temp_path)
 
 
117
 
118
- # πŸ”Š TTS (Hybrid GPU)
119
  if action in ["tts", "s2st"]:
120
  text = (data.get("text") if action == "tts" else stt_text).strip()
121
  trans_text = text
@@ -147,7 +134,7 @@ async def handle_process(request: Request):
147
  if not os.path.exists(speaker_path): speaker_path = None
148
 
149
  try:
150
- audio_b64 = gpu_tts_inference(text, mapped_lang, speaker_path)
151
  finally:
152
  if speaker_wav_b64 and speaker_path and os.path.exists(speaker_path): os.unlink(speaker_path)
153
 
@@ -155,21 +142,20 @@ async def handle_process(request: Request):
155
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
156
 
157
  except Exception as e:
158
- print(f"❌ [v152] ERROR: {traceback.format_exc()}")
159
  return {"error": str(e)}
160
  finally:
161
- print(f"--- [v152] ✨ DONE ({time.time()-t1:.1f}s) ---")
162
 
163
  @app.post("/process")
164
  @app.post("/api/v1/process")
165
  async def api_process(request: Request): return await handle_process(request)
166
 
167
  @app.get("/health")
168
- def health():
169
- return {"status": "ok", "v": "152", "gpu": torch.cuda.is_available()}
170
 
171
  @app.get("/", response_class=HTMLResponse)
172
- def root(): return "<h1>πŸš€ AI Engine v152 (REFINED HYBRID)</h1>"
173
 
174
  if __name__ == "__main__":
175
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
11
  from fastapi.responses import HTMLResponse
12
  import uvicorn
13
 
14
+ # --- [v153] πŸš€ EPHEMERAL GPU ENGINE ---
15
+ print(f"--- [v153] πŸ“‘ BOOTING EPHEMERAL ENGINE ---")
16
 
17
+ from transformers import pipeline
18
  from TTS.api import TTS
19
  from deep_translator import GoogleTranslator
20
 
 
42
  app = FastAPI()
43
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
44
 
45
+ MODELS = {"stt": None}
46
 
47
  def load_stt_cpu():
 
48
  global MODELS
49
  if MODELS.get("stt") is None:
50
+ print("--- [v153] πŸ“₯ LOADING WHISPER (Base) ON CPU ---")
51
+ MODELS["stt"] = pipeline("automatic-speech-recognition", model="openai/whisper-base", device="cpu")
52
+ print("--- [v153] βœ… WHISPER READY (CPU) ---")
53
+
54
+ @spaces.GPU(duration=180)
55
+ def ephemeral_tts(text, mapped_lang, speaker_path):
56
+ """Pure ephemeral loading on GPU to bypass VRAM watchdogs."""
57
+ print(f"--- [v153] πŸ“₯ LOADING XTTS EPOCH... ---")
58
+ local_tts = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  try:
60
+ local_tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")
61
+ local_tts.to(torch.float32)
62
+
63
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
64
+ out_p = out_f.name
65
+
66
+ print(f"--- [v153] πŸ”Š INFERENCE... ---")
67
+ local_tts.tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_path)
68
+
69
  with open(out_p, "rb") as f:
70
  audio_b64 = base64.b64encode(f.read()).decode()
71
  return audio_b64
72
  finally:
73
+ print(f"--- [v153] 🧹 CLEANUP ---")
74
+ if local_tts:
75
+ del local_tts
76
+ if 'out_p' in locals() and os.path.exists(out_p): os.unlink(out_p)
77
+ gc.collect()
78
  torch.cuda.empty_cache()
79
 
80
  async def handle_process(request: Request):
81
  try:
82
  data = await request.json()
83
  action = data.get("action")
84
+ if action == "health": return {"status": "awake", "v": "153"}
85
 
86
+ print(f"--- [v153] πŸ› οΈ {action} ---")
87
  t1 = time.time()
88
 
89
  # πŸŽ™οΈ STT (CPU)
90
+ stt_text = ""
91
  if action in ["stt", "s2st"]:
92
  load_stt_cpu()
93
  audio_bytes = base64.b64decode(data.get("file"))
 
97
  lang = data.get("lang")
98
  res = MODELS["stt"](temp_path, generate_kwargs={"language": lang if lang and len(lang) <= 3 else None})
99
  stt_text = res["text"].strip()
 
100
  finally:
101
  if os.path.exists(temp_path): os.unlink(temp_path)
102
+
103
+ if action == "stt": return {"text": stt_text}
104
 
105
+ # πŸ”Š TTS (GPU)
106
  if action in ["tts", "s2st"]:
107
  text = (data.get("text") if action == "tts" else stt_text).strip()
108
  trans_text = text
 
134
  if not os.path.exists(speaker_path): speaker_path = None
135
 
136
  try:
137
+ audio_b64 = ephemeral_tts(text, mapped_lang, speaker_path)
138
  finally:
139
  if speaker_wav_b64 and speaker_path and os.path.exists(speaker_path): os.unlink(speaker_path)
140
 
 
142
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
143
 
144
  except Exception as e:
145
+ print(f"❌ [v153] ERROR: {traceback.format_exc()}")
146
  return {"error": str(e)}
147
  finally:
148
+ print(f"--- [v153] ✨ DONE ({time.time()-t1:.1f}s) ---")
149
 
150
  @app.post("/process")
151
  @app.post("/api/v1/process")
152
  async def api_process(request: Request): return await handle_process(request)
153
 
154
  @app.get("/health")
155
+ def health(): return {"status": "ok", "v": "153", "gpu": torch.cuda.is_available()}
 
156
 
157
  @app.get("/", response_class=HTMLResponse)
158
+ def root(): return "<h1>πŸš€ AI Engine v153 (EPHEMERAL)</h1>"
159
 
160
  if __name__ == "__main__":
161
  uvicorn.run(app, host="0.0.0.0", port=7860)