Spaces:

IFMedTechdemo
/

Multi-Model-OCR

Runtime error

App Files Files Community

IFMedTechdemo commited on Oct 27

Commit

6193aca

verified ·

1 Parent(s): 1ed4a63

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -7

app.py CHANGED Viewed

@@ -27,7 +27,8 @@ processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
 model_v = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_V,
     trust_remote_code=True,
-    torch_dtype=torch.float16
 ).to(device).eval()
 # Load Nanonets-OCR2-3B
@@ -36,15 +37,16 @@ processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
 model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
     trust_remote_code=True,
-    torch_dtype=torch.float16
 ).to(device).eval()
-# Load Dots.OCR
 MODEL_PATH_D = "strangervisionhf/dots.ocr-base-fix"
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
 model_d = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH_D,
-    attn_implementation="flash_attention_2",
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True
@@ -56,15 +58,16 @@ processor_m = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", trust
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
     trust_remote_code=True,
-    torch_dtype=torch.bfloat16
 ).to(device).eval()
-# Load DeepSeek-OCR
 MODEL_ID_DS = "deepseek-ai/DeepSeek-OCR"
 tokenizer_ds = AutoTokenizer.from_pretrained(MODEL_ID_DS, trust_remote_code=True)
 model_ds = AutoModel.from_pretrained(
     MODEL_ID_DS,
-    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     use_safetensors=True
 ).eval().to(device).to(torch.bfloat16)

 model_v = Qwen3VLForConditionalGeneration.from_pretrained(
     MODEL_ID_V,
     trust_remote_code=True,
+    torch_dtype=torch.float16,
+    attn_implementation="sdpa"  # Use PyTorch's native scaled dot product attention
 ).to(device).eval()
 # Load Nanonets-OCR2-3B
 model_x = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
     trust_remote_code=True,
+    torch_dtype=torch.float16,
+    attn_implementation="sdpa"  # Use PyTorch's native attention
 ).to(device).eval()
+# Load Dots.OCR - REMOVE flash_attention_2 parameter
 MODEL_PATH_D = "strangervisionhf/dots.ocr-base-fix"
 processor_d = AutoProcessor.from_pretrained(MODEL_PATH_D, trust_remote_code=True)
 model_d = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH_D,
+    attn_implementation="sdpa",  # Changed from flash_attention_2
     torch_dtype=torch.bfloat16,
     device_map="auto",
     trust_remote_code=True
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
     trust_remote_code=True,
+    torch_dtype=torch.bfloat16,
+    attn_implementation="sdpa"  # Use PyTorch's native attention
 ).to(device).eval()
+# Load DeepSeek-OCR - REMOVE flash_attention_2 parameter
 MODEL_ID_DS = "deepseek-ai/DeepSeek-OCR"
 tokenizer_ds = AutoTokenizer.from_pretrained(MODEL_ID_DS, trust_remote_code=True)
 model_ds = AutoModel.from_pretrained(
     MODEL_ID_DS,
+    attn_implementation="sdpa",  # Changed from flash_attention_2
     trust_remote_code=True,
     use_safetensors=True
 ).eval().to(device).to(torch.bfloat16)