SmartHeal commited on
Commit
6091286
·
verified ·
1 Parent(s): d323f56

Update src/ai_processor.py

Browse files
Files changed (1) hide show
  1. src/ai_processor.py +0 -2
src/ai_processor.py CHANGED
@@ -139,7 +139,6 @@ Keep to 220–300 words. Do NOT provide diagnosis. Avoid contraindicated advice.
139
  """
140
 
141
  # ---------- VLM (MedGemma replaced with Qwen2-VL) ----------
142
- @_SPACES_GPU(enable_queue=True)
143
  def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]):
144
  """
145
  Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
@@ -150,7 +149,6 @@ def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional
150
  task="image-text-to-text",
151
  model=model_id,
152
  torch_dtype=torch.bfloat16, # Use torch_dtype from the working example
153
- device_map="auto", # CUDA init happens here, safely in GPU worker
154
  token=token,
155
  trust_remote_code=True,
156
  model_kwargs={"low_cpu_mem_usage": True},
 
139
  """
140
 
141
  # ---------- VLM (MedGemma replaced with Qwen2-VL) ----------
 
142
  def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]):
143
  """
144
  Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
 
149
  task="image-text-to-text",
150
  model=model_id,
151
  torch_dtype=torch.bfloat16, # Use torch_dtype from the working example
 
152
  token=token,
153
  trust_remote_code=True,
154
  model_kwargs={"low_cpu_mem_usage": True},