TGPro1 commited on
Commit
d2aef21
Β·
verified Β·
1 Parent(s): 25dea23

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +26 -41
app.py CHANGED
@@ -6,15 +6,15 @@ import torch
6
  import tempfile
7
  import traceback
8
  import gc
9
- from fastapi import FastAPI, Request
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from fastapi.responses import HTMLResponse
12
  import uvicorn
13
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
14
  from TTS.api import TTS
15
 
16
- # --- [v139] πŸš€ H200 SAFEST MODE (FP32 + Standard Attention) ---
17
- print(f"--- [v139] πŸ“‘ BOOTING SAFEST ENGINE ---")
18
 
19
  try:
20
  import spaces
@@ -27,14 +27,13 @@ except ImportError:
27
  if f is None: return lambda x: x
28
  return f
29
 
30
- # --- Strict Stability Config ---
31
  os.environ["COQUI_TOS_AGREED"] = "1"
32
  os.environ["PYTHONWARNINGS"] = "ignore"
33
- # Disable all hardware acceleration that might cause kernel alignment errors
34
- os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
35
- torch.backends.cuda.matmul.allow_tf32 = False
36
- torch.backends.cudnn.allow_tf32 = False
37
- torch.backends.cudnn.deterministic = True # Extra safety
38
 
39
  app = FastAPI()
40
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
@@ -42,17 +41,18 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], all
42
  MODELS = {"stt": None, "tts": None}
43
 
44
  def load_gpu_models():
45
- """Persistent loading into GPU VRAM (FP32 focus)."""
46
  global MODELS
47
  device = "cuda"
48
 
49
  if MODELS.get("stt") is None:
50
- print("--- [v139] πŸ“₯ LOADING NATIVE WHISPER (FP32 / No-SDPA) ---")
51
  model_id = "openai/whisper-large-v3-turbo"
52
- # Force float32 to avoid CUBLAS_STATUS_INVALID_VALUE on H200 MIG
53
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
54
- model_id, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_safetensors=True
55
- ).to(device)
 
56
  processor = AutoProcessor.from_pretrained(model_id)
57
 
58
  MODELS["stt"] = pipeline(
@@ -62,36 +62,31 @@ def load_gpu_models():
62
  feature_extractor=processor.feature_extractor,
63
  torch_dtype=torch.float32,
64
  device=device,
65
- # Explicitly avoid SDPA/Flash Attention to dodge kernel bugs
66
- model_kwargs={"attn_implementation": "eager"}
67
  )
68
- print("--- [v139] βœ… WHISPER LOADED (FP32) ---")
69
 
70
  if MODELS.get("tts") is None:
71
- print("--- [v139] πŸ“₯ LOADING XTTS (SINGLETON) ---")
72
- # XTTS is generally stable if in VRAM
73
  MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
74
- print("--- [v139] βœ… XTTS LOADED ---")
75
 
76
  @spaces.GPU(duration=120)
77
  def core_process(request_dict):
78
  global MODELS
79
  action = request_dict.get("action")
80
- print(f"--- [v139] πŸ› οΈ SAFE ENGINE: {action} ---")
81
  t1 = time.time()
82
 
83
  try:
84
  load_gpu_models()
85
 
86
- # πŸŽ™οΈ STT PATH
87
  if action in ["stt", "s2st"]:
88
  audio_bytes = base64.b64decode(request_dict.get("file"))
89
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
90
  f.write(audio_bytes); temp_path = f.name
91
-
92
  try:
93
  lang = request_dict.get("lang")
94
- # batch_size=1 for maximum stability
95
  result = MODELS["stt"](temp_path, batch_size=1, generate_kwargs={"language": lang if lang and len(lang) <= 3 else None})
96
  stt_text = result["text"].strip()
97
  finally:
@@ -99,19 +94,16 @@ def core_process(request_dict):
99
 
100
  if action == "stt": return {"text": stt_text}
101
 
102
- # πŸ”Š TTS PATH
103
  if action in ["tts", "s2st"]:
104
  text = (request_dict.get("text") if action == "tts" else stt_text).strip()
105
  trans_text = text
106
-
107
  if action == "s2st":
108
  from deep_translator import GoogleTranslator
109
  target = request_dict.get("target_lang") or "en"
110
  text = GoogleTranslator(source='auto', target=target).translate(stt_text)
111
  trans_text = text
112
 
113
- if len(text) < 2 or not any(c.isalnum() for c in text):
114
- return {"audio": ""} if action == "tts" else {"text": stt_text, "translated": "", "audio": ""}
115
 
116
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
117
  raw_lang = (request_dict.get("lang") if action == "tts" else target).strip().lower()
@@ -121,10 +113,8 @@ def core_process(request_dict):
121
  if mapped_lang:
122
  speaker_wav_path = "default_speaker.wav"
123
  if not os.path.exists(speaker_wav_path): speaker_wav_path = None
124
-
125
  try:
126
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f:
127
- out_p = out_f.name
128
  MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_wav_path)
129
  with open(out_p, "rb") as f: audio_b64 = base64.b64encode(f.read()).decode()
130
  finally:
@@ -135,31 +125,26 @@ def core_process(request_dict):
135
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
136
 
137
  except Exception as e:
138
- print(f"❌ [v139] ERROR: {traceback.format_exc()}")
139
  return {"error": str(e)}
140
  finally:
141
- print(f"--- [v139] ✨ DONE ({time.time()-t1:.1f}s) ---")
142
  torch.cuda.empty_cache()
143
 
144
  @app.post("/process")
145
  async def api_process(request: Request):
146
  try:
147
  data = await request.json()
148
- if data.get("action") == "health": return {"status": "awake", "v": "139"}
149
  return core_process(data)
150
  except Exception as e: return {"error": str(e)}
151
 
152
  @app.get("/health")
153
- def health(): return {"status": "ok", "v": "139", "gpu": HAS_SPACES}
154
 
155
  @app.get("/", response_class=HTMLResponse)
156
  def root():
157
- return """
158
- <html><head><title>S2ST v139</title><style>body { font-family: sans-serif; background: #111; color: #eee; text-align: center; padding-top: 50px; }</style></head>
159
- <body><h1>πŸš€ AI Engine v139 (FP32 SAFE)</h1><p>H200 Native Stability Test</p><div id="log">Awaiting test...</div>
160
- <script>fetch('/health').then(r=>r.json()).then(d=>document.getElementById('log').innerText=JSON.stringify(d));</script>
161
- </body></html>
162
- """
163
 
164
  if __name__ == "__main__":
165
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
6
  import tempfile
7
  import traceback
8
  import gc
9
+ from fastapi import FastAPI, Request, Response
10
  from fastapi.middleware.cors import CORSMiddleware
11
  from fastapi.responses import HTMLResponse
12
  import uvicorn
13
  from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
14
  from TTS.api import TTS
15
 
16
+ # --- [v140] πŸš€ H200 CORE STABILIZATION (Dynamic Workspace + Explicit FP32) ---
17
+ print(f"--- [v140] πŸ“‘ BOOTING CORE ENGINE ---")
18
 
19
  try:
20
  import spaces
 
27
  if f is None: return lambda x: x
28
  return f
29
 
30
+ # --- System Config ---
31
  os.environ["COQUI_TOS_AGREED"] = "1"
32
  os.environ["PYTHONWARNINGS"] = "ignore"
33
+ # REMOVED: CUBLAS_WORKSPACE_CONFIG (Let the driver decide)
34
+ torch.backends.cuda.matmul.allow_tf32 = True # Allow TF32 for better alignment on Hopper
35
+ torch.backends.cudnn.allow_tf32 = True
36
+ torch.backends.cudnn.benchmark = False # Avoid erratic kernel selection
 
37
 
38
  app = FastAPI()
39
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 
41
  MODELS = {"stt": None, "tts": None}
42
 
43
  def load_gpu_models():
44
+ """Persistent loading with explicit casting."""
45
  global MODELS
46
  device = "cuda"
47
 
48
  if MODELS.get("stt") is None:
49
+ print("--- [v140] πŸ“₯ LOADING WHISPER (EXPLICIT FP32) ---")
50
  model_id = "openai/whisper-large-v3-turbo"
51
+ # Load and force cast to float()
52
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
53
+ model_id, low_cpu_mem_usage=True, use_safetensors=True
54
+ ).to(device).float() # FORCE FLOAT32
55
+
56
  processor = AutoProcessor.from_pretrained(model_id)
57
 
58
  MODELS["stt"] = pipeline(
 
62
  feature_extractor=processor.feature_extractor,
63
  torch_dtype=torch.float32,
64
  device=device,
65
+ model_kwargs={"attn_implementation": "eager"}
 
66
  )
67
+ print("--- [v140] βœ… WHISPER LOADED (FORCED FP32) ---")
68
 
69
  if MODELS.get("tts") is None:
70
+ print("--- [v140] πŸ“₯ LOADING XTTS ---")
 
71
  MODELS["tts"] = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
72
+ print("--- [v140] βœ… XTTS LOADED ---")
73
 
74
  @spaces.GPU(duration=120)
75
  def core_process(request_dict):
76
  global MODELS
77
  action = request_dict.get("action")
78
+ print(f"--- [v140] πŸ› οΈ CORE ENGINE: {action} ---")
79
  t1 = time.time()
80
 
81
  try:
82
  load_gpu_models()
83
 
 
84
  if action in ["stt", "s2st"]:
85
  audio_bytes = base64.b64decode(request_dict.get("file"))
86
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
87
  f.write(audio_bytes); temp_path = f.name
 
88
  try:
89
  lang = request_dict.get("lang")
 
90
  result = MODELS["stt"](temp_path, batch_size=1, generate_kwargs={"language": lang if lang and len(lang) <= 3 else None})
91
  stt_text = result["text"].strip()
92
  finally:
 
94
 
95
  if action == "stt": return {"text": stt_text}
96
 
 
97
  if action in ["tts", "s2st"]:
98
  text = (request_dict.get("text") if action == "tts" else stt_text).strip()
99
  trans_text = text
 
100
  if action == "s2st":
101
  from deep_translator import GoogleTranslator
102
  target = request_dict.get("target_lang") or "en"
103
  text = GoogleTranslator(source='auto', target=target).translate(stt_text)
104
  trans_text = text
105
 
106
+ if len(text) < 2: return {"audio": ""} if action == "tts" else {"text": stt_text, "translated": "", "audio": ""}
 
107
 
108
  XTTS_MAP = {"en": "en", "de": "de", "fr": "fr", "es": "es", "it": "it", "pl": "pl", "pt": "pt", "tr": "tr", "ru": "ru", "nl": "nl", "cs": "cs", "ar": "ar", "hu": "hu", "ko": "ko", "hi": "hi", "zh": "zh-cn"}
109
  raw_lang = (request_dict.get("lang") if action == "tts" else target).strip().lower()
 
113
  if mapped_lang:
114
  speaker_wav_path = "default_speaker.wav"
115
  if not os.path.exists(speaker_wav_path): speaker_wav_path = None
 
116
  try:
117
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as out_f: out_p = out_f.name
 
118
  MODELS["tts"].tts_to_file(text=text, language=mapped_lang, file_path=out_p, speaker_wav=speaker_wav_path)
119
  with open(out_p, "rb") as f: audio_b64 = base64.b64encode(f.read()).decode()
120
  finally:
 
125
  return {"text": stt_text, "translated": trans_text, "audio": audio_b64}
126
 
127
  except Exception as e:
128
+ print(f"❌ [v140] ERROR: {traceback.format_exc()}")
129
  return {"error": str(e)}
130
  finally:
131
+ print(f"--- [v140] ✨ DONE ({time.time()-t1:.1f}s) ---")
132
  torch.cuda.empty_cache()
133
 
134
  @app.post("/process")
135
  async def api_process(request: Request):
136
  try:
137
  data = await request.json()
138
+ if data.get("action") == "health": return {"status": "awake", "v": "140"}
139
  return core_process(data)
140
  except Exception as e: return {"error": str(e)}
141
 
142
  @app.get("/health")
143
+ def health(): return {"status": "ok", "v": "140", "gpu": HAS_SPACES}
144
 
145
  @app.get("/", response_class=HTMLResponse)
146
  def root():
147
+ return f"<html><body><h1>πŸš€ AI Engine v140 (STABLE BASELINE)</h1><p>GPU: {HAS_SPACES}</p></body></html>"
 
 
 
 
 
148
 
149
  if __name__ == "__main__":
150
  uvicorn.run(app, host="0.0.0.0", port=7860)