Spaces:

SmartHeal
/

SmartHeal-Agentic-AI

Sleeping

SmartHeal commited on Aug 18, 2025

Commit

6091286

verified ·

1 Parent(s): d323f56

Update src/ai_processor.py

Files changed (1) hide show

src/ai_processor.py CHANGED Viewed

@@ -139,7 +139,6 @@ Keep to 220–300 words. Do NOT provide diagnosis. Avoid contraindicated advice.
 """
 # ---------- VLM (MedGemma replaced with Qwen2-VL) ----------
-@_SPACES_GPU(enable_queue=True)
 def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]):
     """
     Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
@@ -150,7 +149,6 @@ def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional
         task="image-text-to-text",
         model=model_id,
         torch_dtype=torch.bfloat16, # Use torch_dtype from the working example
-        device_map="auto",            # CUDA init happens here, safely in GPU worker
         token=token,
         trust_remote_code=True,
         model_kwargs={"low_cpu_mem_usage": True},

 """
 # ---------- VLM (MedGemma replaced with Qwen2-VL) ----------
 def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]):
     """
     Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
         task="image-text-to-text",
         model=model_id,
         torch_dtype=torch.bfloat16, # Use torch_dtype from the working example
         token=token,
         trust_remote_code=True,
         model_kwargs={"low_cpu_mem_usage": True},