TGPro1 commited on
Commit
ff68d4e
Β·
verified Β·
1 Parent(s): 65a6e6f

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +16 -24
app.py CHANGED
@@ -5,14 +5,14 @@ import base64
5
  import torch
6
  import tempfile
7
  import traceback
8
- import gc
9
  from fastapi import FastAPI, Request
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from fastapi.responses import HTMLResponse
12
  import uvicorn
13
 
14
- # --- [v154] πŸš€ PRO STABLE ENGINE (GPU-STT + CPU-TTS) ---
15
- print(f"--- [v154] πŸ“‘ BOOTING PRO STABLE ENGINE ---")
 
16
 
17
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
18
  from TTS.api import TTS
@@ -47,30 +47,25 @@ MODELS = {"stt": None, "tts": None}
47
  def load_tts_cpu():
48
  global MODELS
49
  if MODELS.get("tts") is None:
50
- print("--- [v154] πŸ“₯ LOADING XTTS V2 (CPU MODE) ---")
51
- # XTTS on CPU is stable and avoids ZeroGPU kernel crashes
52
  MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cpu")
53
- print("--- [v154] βœ… XTTS READY (CPU) ---")
54
 
55
  @spaces.GPU(duration=60)
56
- def gpu_stt_inference(temp_path, lang):
57
  global MODELS
58
  if MODELS.get("stt") is None:
59
- print("--- [v154] πŸ“₯ LOADING WHISPER (Large-v3-Turbo) ON GPU ---")
60
- model_id = "openai/whisper-large-v3-turbo"
61
- model = AutoModelForSpeechSeq2Seq.from_pretrained(
62
- model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True
63
- ).to("cuda")
64
  processor = AutoProcessor.from_pretrained(model_id)
65
  MODELS["stt"] = pipeline(
66
  "automatic-speech-recognition",
67
  model=model,
68
  tokenizer=processor.tokenizer,
69
  feature_extractor=processor.feature_extractor,
70
- torch_dtype=torch.float16,
71
  device="cuda"
72
  )
73
-
74
  res = MODELS["stt"](temp_path, generate_kwargs={"language": lang if lang and len(lang) <= 3 else None})
75
  return res["text"].strip()
76
 
@@ -78,25 +73,22 @@ async def handle_process(request: Request):
78
  try:
79
  data = await request.json()
80
  action = data.get("action")
81
- if action == "health": return {"status": "awake", "v": "154"}
82
 
83
- print(f"--- [v154] πŸ› οΈ {action} ---")
84
  t1 = time.time()
85
 
86
- # πŸŽ™οΈ STT (GPU)
87
  stt_text = ""
88
  if action in ["stt", "s2st"]:
89
  audio_bytes = base64.b64decode(data.get("file"))
90
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
91
  f.write(audio_bytes); temp_path = f.name
92
  try:
93
- stt_text = gpu_stt_inference(temp_path, data.get("lang"))
94
  finally:
95
  if os.path.exists(temp_path): os.unlink(temp_path)
96
-
97
  if action == "stt": return {"text": stt_text}
98
 
99
- # πŸ”Š TTS (CPU)
100
  if action in ["tts", "s2st"]:
101
  load_tts_cpu()
102
  text = (data.get("text") if action == "tts" else stt_text).strip()
@@ -138,20 +130,20 @@ async def handle_process(request: Request):
138
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
139
 
140
  except Exception as e:
141
- print(f"❌ [v154] ERROR: {traceback.format_exc()}")
142
  return {"error": str(e)}
143
  finally:
144
- print(f"--- [v154] ✨ DONE ({time.time()-t1:.1f}s) ---")
145
 
146
  @app.post("/process")
147
  @app.post("/api/v1/process")
148
  async def api_process(request: Request): return await handle_process(request)
149
 
150
  @app.get("/health")
151
- def health(): return {"status": "ok", "v": "154", "gpu": torch.cuda.is_available()}
152
 
153
  @app.get("/", response_class=HTMLResponse)
154
- def root(): return "<h1>πŸš€ AI Engine v154 (PRO STABLE)</h1>"
155
 
156
  if __name__ == "__main__":
157
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
5
  import torch
6
  import tempfile
7
  import traceback
 
8
  from fastapi import FastAPI, Request
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from fastapi.responses import HTMLResponse
11
  import uvicorn
12
 
13
+ # --- [v155] πŸš€ FINAL STABLE ENGINE (GPU-STT-Base + CPU-TTS-XTTS) ---
14
+ # This version uses the proven stable components for H200 ZeroGPU.
15
+ print(f"--- [v155] πŸ“‘ BOOTING FINAL ENGINE ---")
16
 
17
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
18
  from TTS.api import TTS
 
47
  def load_tts_cpu():
48
  global MODELS
49
  if MODELS.get("tts") is None:
50
+ print("--- [v155] πŸ“₯ LOADING XTTS V2 (CPU) ---")
 
51
  MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cpu")
52
+ print("--- [v155] βœ… XTTS READY ---")
53
 
54
  @spaces.GPU(duration=60)
55
+ def gpu_stt_base(temp_path, lang):
56
  global MODELS
57
  if MODELS.get("stt") is None:
58
+ print("--- [v155] πŸ“₯ LOADING WHISPER (Base) ON GPU ---")
59
+ model_id = "openai/whisper-base"
60
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch.float32).to("cuda")
 
 
61
  processor = AutoProcessor.from_pretrained(model_id)
62
  MODELS["stt"] = pipeline(
63
  "automatic-speech-recognition",
64
  model=model,
65
  tokenizer=processor.tokenizer,
66
  feature_extractor=processor.feature_extractor,
 
67
  device="cuda"
68
  )
 
69
  res = MODELS["stt"](temp_path, generate_kwargs={"language": lang if lang and len(lang) <= 3 else None})
70
  return res["text"].strip()
71
 
 
73
  try:
74
  data = await request.json()
75
  action = data.get("action")
76
+ if action == "health": return {"status": "awake", "v": "155"}
77
 
78
+ print(f"--- [v155] πŸ› οΈ {action} ---")
79
  t1 = time.time()
80
 
 
81
  stt_text = ""
82
  if action in ["stt", "s2st"]:
83
  audio_bytes = base64.b64decode(data.get("file"))
84
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
85
  f.write(audio_bytes); temp_path = f.name
86
  try:
87
+ stt_text = gpu_stt_base(temp_path, data.get("lang"))
88
  finally:
89
  if os.path.exists(temp_path): os.unlink(temp_path)
 
90
  if action == "stt": return {"text": stt_text}
91
 
 
92
  if action in ["tts", "s2st"]:
93
  load_tts_cpu()
94
  text = (data.get("text") if action == "tts" else stt_text).strip()
 
130
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
131
 
132
  except Exception as e:
133
+ print(f"❌ [v155] ERROR: {traceback.format_exc()}")
134
  return {"error": str(e)}
135
  finally:
136
+ print(f"--- [v155] ✨ DONE ({time.time()-t1:.1f}s) ---")
137
 
138
  @app.post("/process")
139
  @app.post("/api/v1/process")
140
  async def api_process(request: Request): return await handle_process(request)
141
 
142
  @app.get("/health")
143
+ def health(): return {"status": "ok", "v": "155", "gpu": torch.cuda.is_available()}
144
 
145
  @app.get("/", response_class=HTMLResponse)
146
+ def root(): return "<h1>πŸš€ AI Engine v155 (STABLE)</h1>"
147
 
148
  if __name__ == "__main__":
149
  uvicorn.run(app, host="0.0.0.0", port=7860)