Spaces:
Sleeping
Sleeping
Update src/ai_processor.py
Browse files- src/ai_processor.py +0 -2
src/ai_processor.py
CHANGED
|
@@ -139,7 +139,6 @@ Keep to 220–300 words. Do NOT provide diagnosis. Avoid contraindicated advice.
|
|
| 139 |
"""
|
| 140 |
|
| 141 |
# ---------- VLM (MedGemma replaced with Qwen2-VL) ----------
|
| 142 |
-
@_SPACES_GPU(enable_queue=True)
|
| 143 |
def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]):
|
| 144 |
"""
|
| 145 |
Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
|
|
@@ -150,7 +149,6 @@ def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional
|
|
| 150 |
task="image-text-to-text",
|
| 151 |
model=model_id,
|
| 152 |
torch_dtype=torch.bfloat16, # Use torch_dtype from the working example
|
| 153 |
-
device_map="auto", # CUDA init happens here, safely in GPU worker
|
| 154 |
token=token,
|
| 155 |
trust_remote_code=True,
|
| 156 |
model_kwargs={"low_cpu_mem_usage": True},
|
|
|
|
| 139 |
"""
|
| 140 |
|
| 141 |
# ---------- VLM (MedGemma replaced with Qwen2-VL) ----------
|
|
|
|
| 142 |
def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]):
|
| 143 |
"""
|
| 144 |
Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
|
|
|
|
| 149 |
task="image-text-to-text",
|
| 150 |
model=model_id,
|
| 151 |
torch_dtype=torch.bfloat16, # Use torch_dtype from the working example
|
|
|
|
| 152 |
token=token,
|
| 153 |
trust_remote_code=True,
|
| 154 |
model_kwargs={"low_cpu_mem_usage": True},
|