Spaces:
Running
Running
Update src/ai_processor.py
Browse files- src/ai_processor.py +42 -8
src/ai_processor.py
CHANGED
|
@@ -29,7 +29,7 @@ def _log_kv(prefix: str, kv: Dict):
|
|
| 29 |
# --- Spaces GPU decorator (REQUIRED) ---
|
| 30 |
from spaces import GPU as _SPACES_GPU
|
| 31 |
|
| 32 |
-
@_SPACES_GPU(enable_queue=True)
|
| 33 |
def smartheal_gpu_stub(ping: int = 0) -> str:
|
| 34 |
return "ready"
|
| 35 |
|
|
@@ -94,7 +94,11 @@ def _import_hf_cls():
|
|
| 94 |
return pipeline
|
| 95 |
|
| 96 |
def _import_embeddings():
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
return HuggingFaceEmbeddings
|
| 99 |
|
| 100 |
def _import_langchain_pdf():
|
|
@@ -148,11 +152,34 @@ def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional
|
|
| 148 |
"""
|
| 149 |
Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
|
| 150 |
"""
|
|
|
|
|
|
|
|
|
|
| 151 |
from transformers import pipeline
|
| 152 |
pipe = pipeline(
|
| 153 |
task="image-text-to-text",
|
| 154 |
model=model_id,
|
| 155 |
-
device_map="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
token=token,
|
| 157 |
trust_remote_code=True,
|
| 158 |
model_kwargs={"low_cpu_mem_usage": True},
|
|
@@ -174,6 +201,7 @@ def generate_medgemma_report( # kept name so callers don't change
|
|
| 174 |
"""
|
| 175 |
MedGemma replacement using Qwen/Qwen2-VL-2B-Instruct via image-text-to-text.
|
| 176 |
Loads & runs ONLY inside a GPU worker to satisfy Stateless GPU constraints.
|
|
|
|
| 177 |
"""
|
| 178 |
if os.getenv("SMARTHEAL_ENABLE_VLM", "1") != "1":
|
| 179 |
return "⚠️ VLM disabled"
|
|
@@ -200,12 +228,16 @@ def generate_medgemma_report( # kept name so callers don't change
|
|
| 200 |
]},
|
| 201 |
]
|
| 202 |
|
|
|
|
| 203 |
try:
|
| 204 |
-
# IMPORTANT: do not import transformers or touch CUDA here. Only call the GPU worker.
|
| 205 |
return _vlm_infer_gpu(messages, model_id, max_new_tokens, HF_TOKEN)
|
| 206 |
except Exception as e:
|
| 207 |
-
logging.
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
# ---------- Initialize CPU models ----------
|
| 211 |
def load_yolo_model():
|
|
@@ -217,7 +249,7 @@ def load_yolo_model():
|
|
| 217 |
|
| 218 |
def load_segmentation_model():
|
| 219 |
load_model = _import_tf_loader()
|
| 220 |
-
return load_model(SEG_MODEL_PATH, compile=False
|
| 221 |
|
| 222 |
def load_classification_pipeline():
|
| 223 |
pipe = _import_hf_cls()
|
|
@@ -255,6 +287,7 @@ def initialize_cpu_models() -> None:
|
|
| 255 |
models_cache["seg"] = None
|
| 256 |
logging.warning("Segmentation model file missing; skipping.")
|
| 257 |
except Exception as e:
|
|
|
|
| 258 |
models_cache["seg"] = None
|
| 259 |
logging.warning(f"Segmentation unavailable: {e}")
|
| 260 |
|
|
@@ -419,7 +452,8 @@ def _grabcut_refine(bgr: np.ndarray, seed01: np.ndarray, iters: int = 3) -> np.n
|
|
| 419 |
seed_dil = cv2.dilate(seed01, k, iterations=1)
|
| 420 |
gc[seed01.astype(bool)] = cv2.GC_PR_FGD
|
| 421 |
gc[seed_dil.astype(bool)] = cv2.GC_FGD
|
| 422 |
-
|
|
|
|
| 423 |
bgdModel = np.zeros((1, 65), np.float64)
|
| 424 |
fgdModel = np.zeros((1, 65), np.float64)
|
| 425 |
cv2.grabCut(bgr, gc, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
|
|
|
|
| 29 |
# --- Spaces GPU decorator (REQUIRED) ---
|
| 30 |
from spaces import GPU as _SPACES_GPU
|
| 31 |
|
| 32 |
+
@_SPACES_GPU(enable_queue=True) # enable_queue ignored by ZeroGPU but explicit is fine
|
| 33 |
def smartheal_gpu_stub(ping: int = 0) -> str:
|
| 34 |
return "ready"
|
| 35 |
|
|
|
|
| 94 |
return pipeline
|
| 95 |
|
| 96 |
def _import_embeddings():
|
| 97 |
+
# Prefer the new package if available, fallback to community to avoid deprecation warnings
|
| 98 |
+
try:
|
| 99 |
+
from langchain_huggingface import HuggingFaceEmbeddings # type: ignore
|
| 100 |
+
except Exception:
|
| 101 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings # type: ignore
|
| 102 |
return HuggingFaceEmbeddings
|
| 103 |
|
| 104 |
def _import_langchain_pdf():
|
|
|
|
| 152 |
"""
|
| 153 |
Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
|
| 154 |
"""
|
| 155 |
+
import torch
|
| 156 |
+
if not torch.cuda.is_available():
|
| 157 |
+
raise RuntimeError("CUDA not available in worker (check ZeroGPU torch version).")
|
| 158 |
from transformers import pipeline
|
| 159 |
pipe = pipeline(
|
| 160 |
task="image-text-to-text",
|
| 161 |
model=model_id,
|
| 162 |
+
device_map={"": 0}, # be explicit: put everything on cuda:0
|
| 163 |
+
token=token,
|
| 164 |
+
trust_remote_code=True,
|
| 165 |
+
model_kwargs={"low_cpu_mem_usage": True},
|
| 166 |
+
)
|
| 167 |
+
out = pipe(text=messages, max_new_tokens=max_new_tokens, do_sample=False, temperature=0.2)
|
| 168 |
+
try:
|
| 169 |
+
txt = out[0]["generated_text"][-1].get("content", "")
|
| 170 |
+
except Exception:
|
| 171 |
+
txt = out[0].get("generated_text", "")
|
| 172 |
+
return (txt or "").strip() or "⚠️ Empty response"
|
| 173 |
+
|
| 174 |
+
def _vlm_infer_cpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]) -> str:
|
| 175 |
+
"""
|
| 176 |
+
CPU fallback path when ZeroGPU grant fails or CUDA wheel is unavailable.
|
| 177 |
+
"""
|
| 178 |
+
from transformers import pipeline
|
| 179 |
+
pipe = pipeline(
|
| 180 |
+
task="image-text-to-text",
|
| 181 |
+
model=model_id,
|
| 182 |
+
device_map="cpu",
|
| 183 |
token=token,
|
| 184 |
trust_remote_code=True,
|
| 185 |
model_kwargs={"low_cpu_mem_usage": True},
|
|
|
|
| 201 |
"""
|
| 202 |
MedGemma replacement using Qwen/Qwen2-VL-2B-Instruct via image-text-to-text.
|
| 203 |
Loads & runs ONLY inside a GPU worker to satisfy Stateless GPU constraints.
|
| 204 |
+
Falls back to CPU pipeline if a GPU grant/initialization fails.
|
| 205 |
"""
|
| 206 |
if os.getenv("SMARTHEAL_ENABLE_VLM", "1") != "1":
|
| 207 |
return "⚠️ VLM disabled"
|
|
|
|
| 228 |
]},
|
| 229 |
]
|
| 230 |
|
| 231 |
+
# Try GPU worker first, then CPU fallback
|
| 232 |
try:
|
|
|
|
| 233 |
return _vlm_infer_gpu(messages, model_id, max_new_tokens, HF_TOKEN)
|
| 234 |
except Exception as e:
|
| 235 |
+
logging.warning(f"GPU VLM failed; falling back to CPU: {e}")
|
| 236 |
+
try:
|
| 237 |
+
return _vlm_infer_cpu(messages, model_id, max_new_tokens, HF_TOKEN)
|
| 238 |
+
except Exception as e2:
|
| 239 |
+
logging.error(f"CPU VLM also failed: {e2}")
|
| 240 |
+
return "⚠️ VLM error"
|
| 241 |
|
| 242 |
# ---------- Initialize CPU models ----------
|
| 243 |
def load_yolo_model():
|
|
|
|
| 249 |
|
| 250 |
def load_segmentation_model():
|
| 251 |
load_model = _import_tf_loader()
|
| 252 |
+
return load_model(SEG_MODEL_PATH, compile=False)
|
| 253 |
|
| 254 |
def load_classification_pipeline():
|
| 255 |
pipe = _import_hf_cls()
|
|
|
|
| 287 |
models_cache["seg"] = None
|
| 288 |
logging.warning("Segmentation model file missing; skipping.")
|
| 289 |
except Exception as e:
|
| 290 |
+
# Typical with Keras/TF version mismatch; pin TF/Keras 2.15 in requirements.
|
| 291 |
models_cache["seg"] = None
|
| 292 |
logging.warning(f"Segmentation unavailable: {e}")
|
| 293 |
|
|
|
|
| 452 |
seed_dil = cv2.dilate(seed01, k, iterations=1)
|
| 453 |
gc[seed01.astype(bool)] = cv2.GC_PR_FGD
|
| 454 |
gc[seed_dil.astype(bool)] = cv2.GC_FGD
|
| 455 |
+
# force borders to background
|
| 456 |
+
gc[0, :], gc[-1, :], gc[:, 0], gc[:, -1] = cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD
|
| 457 |
bgdModel = np.zeros((1, 65), np.float64)
|
| 458 |
fgdModel = np.zeros((1, 65), np.float64)
|
| 459 |
cv2.grabCut(bgr, gc, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
|