Darius Morawiec commited on
Commit
a84c724
·
1 Parent(s): bc51dfa

Load model on CPU during download and move to CUDA if available

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -184,11 +184,10 @@ with gr.Blocks() as demo:
184
  elif model_id.startswith("Qwen/Qwen3-VL"):
185
  model_loader = Qwen3VLForConditionalGeneration
186
  assert model_loader is not None, f"Unsupported model ID: {model_id}"
 
187
  current_model = model_loader.from_pretrained(
188
- model_id,
189
- torch_dtype="auto",
190
- device_map="auto",
191
- ).eval()
192
  current_processor = AutoProcessor.from_pretrained(model_id)
193
  current_model_id = model_id
194
  return current_model, current_processor
@@ -205,6 +204,10 @@ with gr.Blocks() as demo:
205
  image_resize: str,
206
  image_target_size: int | None,
207
  ):
 
 
 
 
208
  base64_image = image_to_base64(
209
  resize_image(image, image_target_size)
210
  if image_resize == "Yes" and image_target_size
 
184
  elif model_id.startswith("Qwen/Qwen3-VL"):
185
  model_loader = Qwen3VLForConditionalGeneration
186
  assert model_loader is not None, f"Unsupported model ID: {model_id}"
187
+ # Load model on CPU to avoid using CUDA resources during download
188
  current_model = model_loader.from_pretrained(
189
+ model_id, torch_dtype=torch.bfloat16, device_map="cpu"
190
+ )
 
 
191
  current_processor = AutoProcessor.from_pretrained(model_id)
192
  current_model_id = model_id
193
  return current_model, current_processor
 
204
  image_resize: str,
205
  image_target_size: int | None,
206
  ):
207
+ # Move model to CUDA if available (inside @spaces.GPU decorated function)
208
+ model = model.to(DEVICE)
209
+ model.eval()
210
+
211
  base64_image = image_to_base64(
212
  resize_image(image, image_target_size)
213
  if image_resize == "Yes" and image_target_size