SmartHeal commited on
Commit
a4ec7ff
·
verified ·
1 Parent(s): b8b442a

Update src/ai_processor.py

Browse files
Files changed (1) hide show
  1. src/ai_processor.py +42 -8
src/ai_processor.py CHANGED
@@ -29,7 +29,7 @@ def _log_kv(prefix: str, kv: Dict):
29
  # --- Spaces GPU decorator (REQUIRED) ---
30
  from spaces import GPU as _SPACES_GPU
31
 
32
- @_SPACES_GPU(enable_queue=True)
33
  def smartheal_gpu_stub(ping: int = 0) -> str:
34
  return "ready"
35
 
@@ -94,7 +94,11 @@ def _import_hf_cls():
94
  return pipeline
95
 
96
  def _import_embeddings():
97
- from langchain_community.embeddings import HuggingFaceEmbeddings
 
 
 
 
98
  return HuggingFaceEmbeddings
99
 
100
  def _import_langchain_pdf():
@@ -148,11 +152,34 @@ def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional
148
  """
149
  Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
150
  """
 
 
 
151
  from transformers import pipeline
152
  pipe = pipeline(
153
  task="image-text-to-text",
154
  model=model_id,
155
- device_map="cpu", # CUDA init happens here, safely in GPU worker
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  token=token,
157
  trust_remote_code=True,
158
  model_kwargs={"low_cpu_mem_usage": True},
@@ -174,6 +201,7 @@ def generate_medgemma_report( # kept name so callers don't change
174
  """
175
  MedGemma replacement using Qwen/Qwen2-VL-2B-Instruct via image-text-to-text.
176
  Loads & runs ONLY inside a GPU worker to satisfy Stateless GPU constraints.
 
177
  """
178
  if os.getenv("SMARTHEAL_ENABLE_VLM", "1") != "1":
179
  return "⚠️ VLM disabled"
@@ -200,12 +228,16 @@ def generate_medgemma_report( # kept name so callers don't change
200
  ]},
201
  ]
202
 
 
203
  try:
204
- # IMPORTANT: do not import transformers or touch CUDA here. Only call the GPU worker.
205
  return _vlm_infer_gpu(messages, model_id, max_new_tokens, HF_TOKEN)
206
  except Exception as e:
207
- logging.error(f"VLM call failed: {e}")
208
- return "⚠️ VLM error"
 
 
 
 
209
 
210
  # ---------- Initialize CPU models ----------
211
  def load_yolo_model():
@@ -217,7 +249,7 @@ def load_yolo_model():
217
 
218
  def load_segmentation_model():
219
  load_model = _import_tf_loader()
220
- return load_model(SEG_MODEL_PATH, compile=False, custom_objects={'InputLayer': tf.keras.layers.InputLayer})
221
 
222
  def load_classification_pipeline():
223
  pipe = _import_hf_cls()
@@ -255,6 +287,7 @@ def initialize_cpu_models() -> None:
255
  models_cache["seg"] = None
256
  logging.warning("Segmentation model file missing; skipping.")
257
  except Exception as e:
 
258
  models_cache["seg"] = None
259
  logging.warning(f"Segmentation unavailable: {e}")
260
 
@@ -419,7 +452,8 @@ def _grabcut_refine(bgr: np.ndarray, seed01: np.ndarray, iters: int = 3) -> np.n
419
  seed_dil = cv2.dilate(seed01, k, iterations=1)
420
  gc[seed01.astype(bool)] = cv2.GC_PR_FGD
421
  gc[seed_dil.astype(bool)] = cv2.GC_FGD
422
- gc[0, :], gc[-1, :], gc[:, 0], gc[:, 1] = cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD
 
423
  bgdModel = np.zeros((1, 65), np.float64)
424
  fgdModel = np.zeros((1, 65), np.float64)
425
  cv2.grabCut(bgr, gc, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
 
29
  # --- Spaces GPU decorator (REQUIRED) ---
30
  from spaces import GPU as _SPACES_GPU
31
 
32
+ @_SPACES_GPU(enable_queue=True) # enable_queue ignored by ZeroGPU but explicit is fine
33
  def smartheal_gpu_stub(ping: int = 0) -> str:
34
  return "ready"
35
 
 
94
  return pipeline
95
 
96
  def _import_embeddings():
97
+ # Prefer the new package if available, fallback to community to avoid deprecation warnings
98
+ try:
99
+ from langchain_huggingface import HuggingFaceEmbeddings # type: ignore
100
+ except Exception:
101
+ from langchain_community.embeddings import HuggingFaceEmbeddings # type: ignore
102
  return HuggingFaceEmbeddings
103
 
104
  def _import_langchain_pdf():
 
152
  """
153
  Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
154
  """
155
+ import torch
156
+ if not torch.cuda.is_available():
157
+ raise RuntimeError("CUDA not available in worker (check ZeroGPU torch version).")
158
  from transformers import pipeline
159
  pipe = pipeline(
160
  task="image-text-to-text",
161
  model=model_id,
162
+ device_map={"": 0}, # be explicit: put everything on cuda:0
163
+ token=token,
164
+ trust_remote_code=True,
165
+ model_kwargs={"low_cpu_mem_usage": True},
166
+ )
167
+ out = pipe(text=messages, max_new_tokens=max_new_tokens, do_sample=False, temperature=0.2)
168
+ try:
169
+ txt = out[0]["generated_text"][-1].get("content", "")
170
+ except Exception:
171
+ txt = out[0].get("generated_text", "")
172
+ return (txt or "").strip() or "⚠️ Empty response"
173
+
174
+ def _vlm_infer_cpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]) -> str:
175
+ """
176
+ CPU fallback path when ZeroGPU grant fails or CUDA wheel is unavailable.
177
+ """
178
+ from transformers import pipeline
179
+ pipe = pipeline(
180
+ task="image-text-to-text",
181
+ model=model_id,
182
+ device_map="cpu",
183
  token=token,
184
  trust_remote_code=True,
185
  model_kwargs={"low_cpu_mem_usage": True},
 
201
  """
202
  MedGemma replacement using Qwen/Qwen2-VL-2B-Instruct via image-text-to-text.
203
  Loads & runs ONLY inside a GPU worker to satisfy Stateless GPU constraints.
204
+ Falls back to CPU pipeline if a GPU grant/initialization fails.
205
  """
206
  if os.getenv("SMARTHEAL_ENABLE_VLM", "1") != "1":
207
  return "⚠️ VLM disabled"
 
228
  ]},
229
  ]
230
 
231
+ # Try GPU worker first, then CPU fallback
232
  try:
 
233
  return _vlm_infer_gpu(messages, model_id, max_new_tokens, HF_TOKEN)
234
  except Exception as e:
235
+ logging.warning(f"GPU VLM failed; falling back to CPU: {e}")
236
+ try:
237
+ return _vlm_infer_cpu(messages, model_id, max_new_tokens, HF_TOKEN)
238
+ except Exception as e2:
239
+ logging.error(f"CPU VLM also failed: {e2}")
240
+ return "⚠️ VLM error"
241
 
242
  # ---------- Initialize CPU models ----------
243
  def load_yolo_model():
 
249
 
250
  def load_segmentation_model():
251
  load_model = _import_tf_loader()
252
+ return load_model(SEG_MODEL_PATH, compile=False)
253
 
254
  def load_classification_pipeline():
255
  pipe = _import_hf_cls()
 
287
  models_cache["seg"] = None
288
  logging.warning("Segmentation model file missing; skipping.")
289
  except Exception as e:
290
+ # Typical with Keras/TF version mismatch; pin TF/Keras 2.15 in requirements.
291
  models_cache["seg"] = None
292
  logging.warning(f"Segmentation unavailable: {e}")
293
 
 
452
  seed_dil = cv2.dilate(seed01, k, iterations=1)
453
  gc[seed01.astype(bool)] = cv2.GC_PR_FGD
454
  gc[seed_dil.astype(bool)] = cv2.GC_FGD
455
+ # force borders to background
456
+ gc[0, :], gc[-1, :], gc[:, 0], gc[:, -1] = cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD
457
  bgdModel = np.zeros((1, 65), np.float64)
458
  fgdModel = np.zeros((1, 65), np.float64)
459
  cv2.grabCut(bgr, gc, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)