Spaces:

prithivMLmods
/

VLM-Parsing

Running on Zero

prithivMLmods commited on Dec 1, 2025

Commit

2b8b50a

verified ·

1 Parent(s): bea725a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -88,6 +88,7 @@ logger.info(f"Loading model 1: {MODEL_ID_1}")
 processor_1 = AutoProcessor.from_pretrained(MODEL_ID_1, trust_remote_code=True)
 model_1 = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_1,
     trust_remote_code=True,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device).eval()
@@ -99,6 +100,7 @@ logger.info(f"Loading model 2: {MODEL_ID_2}")
 processor_2 = AutoProcessor.from_pretrained(MODEL_ID_2, trust_remote_code=True)
 model_2 = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_2,
     trust_remote_code=True,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device).eval()
@@ -110,6 +112,7 @@ logger.info(f"Loading model 3: {MODEL_ID_3}")
 processor_3 = AutoProcessor.from_pretrained(MODEL_ID_3, trust_remote_code=True)
 model_3 = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_3,
     trust_remote_code=True,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device).eval()

 processor_1 = AutoProcessor.from_pretrained(MODEL_ID_1, trust_remote_code=True)
 model_1 = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_1,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device).eval()
 processor_2 = AutoProcessor.from_pretrained(MODEL_ID_2, trust_remote_code=True)
 model_2 = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_2,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device).eval()
 processor_3 = AutoProcessor.from_pretrained(MODEL_ID_3, trust_remote_code=True)
 model_3 = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_3,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16 if device == "cuda" else torch.float32
 ).to(device).eval()