MultiVLM-OCR

Running on Zero

App Files Files Community

Geraldine commited on 18 days ago

Commit

d5d699b

verified ·

1 Parent(s): 9704588

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -10

app.py CHANGED Viewed

@@ -43,6 +43,35 @@ if torch.cuda.is_available():
 print("Using device:", device)
 def patch_dots_ocr_configuration(repo_path: str) -> None:
     config_path = Path(repo_path) / "configuration_dots.py"
     if not config_path.exists():
@@ -92,9 +121,9 @@ def resolve_dots_ocr_model_path(repo_id: str) -> str:
 MODEL_ID_V = "nanonets/Nanonets-OCR2-3B"
 processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
-model_v = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_V,
-    attn_implementation="kernels-community/flash-attn2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -102,36 +131,36 @@ model_v = Qwen2_5_VLForConditionalGeneration.from_pretrained(
 MODEL_ID_Y = "rednote-hilab/dots.ocr"
 MODEL_PATH_Y = resolve_dots_ocr_model_path(MODEL_ID_Y)
 processor_y = AutoProcessor.from_pretrained(MODEL_PATH_Y, trust_remote_code=True)
-model_y = AutoModelForCausalLM.from_pretrained(
     MODEL_PATH_Y,
-    attn_implementation="kernels-community/flash-attn2",
     trust_remote_code=True,
     torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
 ).to(device).eval()
 MODEL_ID_X = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
-model_x = Qwen2VLForConditionalGeneration.from_pretrained(
     MODEL_ID_X,
-    attn_implementation="kernels-community/flash-attn2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 MODEL_ID_W = "allenai/olmOCR-7B-0725"
 processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
-model_w = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_W,
-    attn_implementation="kernels-community/flash-attn2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 MODEL_ID_M = "reducto/RolmOCR"
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
-model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
-    attn_implementation="kernels-community/flash-attn2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()

 print("Using device:", device)
+def get_attention_fallbacks() -> list[str | None]:
+    fallbacks = []
+    if torch.cuda.is_available() and os.getenv("USE_FLASH_ATTN", "0") == "1":
+        fallbacks.append("kernels-community/flash-attn2")
+    if torch.cuda.is_available():
+        fallbacks.append("sdpa")
+    fallbacks.append("eager")
+    fallbacks.append(None)
+    return fallbacks
+def load_model_with_attention_fallback(model_cls, model_id, **kwargs):
+    last_error = None
+    for attn_impl in get_attention_fallbacks():
+        load_kwargs = dict(kwargs)
+        label = attn_impl or "default"
+        if attn_impl is None:
+            load_kwargs.pop("attn_implementation", None)
+        else:
+            load_kwargs["attn_implementation"] = attn_impl
+        try:
+            print(f"Loading {model_id} with attention backend: {label}")
+            return model_cls.from_pretrained(model_id, **load_kwargs)
+        except Exception as exc:
+            last_error = exc
+            print(f"Failed loading {model_id} with attention backend {label}: {exc}")
+    raise last_error
 def patch_dots_ocr_configuration(repo_path: str) -> None:
     config_path = Path(repo_path) / "configuration_dots.py"
     if not config_path.exists():
 MODEL_ID_V = "nanonets/Nanonets-OCR2-3B"
 processor_v = AutoProcessor.from_pretrained(MODEL_ID_V, trust_remote_code=True)
+model_v = load_model_with_attention_fallback(
+    Qwen2_5_VLForConditionalGeneration,
     MODEL_ID_V,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 MODEL_ID_Y = "rednote-hilab/dots.ocr"
 MODEL_PATH_Y = resolve_dots_ocr_model_path(MODEL_ID_Y)
 processor_y = AutoProcessor.from_pretrained(MODEL_PATH_Y, trust_remote_code=True)
+model_y = load_model_with_attention_fallback(
+    AutoModelForCausalLM,
     MODEL_PATH_Y,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
 ).to(device).eval()
 MODEL_ID_X = "prithivMLmods/Qwen2-VL-OCR-2B-Instruct"
 processor_x = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
+model_x = load_model_with_attention_fallback(
+    Qwen2VLForConditionalGeneration,
     MODEL_ID_X,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 MODEL_ID_W = "allenai/olmOCR-7B-0725"
 processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
+model_w = load_model_with_attention_fallback(
+    Qwen2_5_VLForConditionalGeneration,
     MODEL_ID_W,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 MODEL_ID_M = "reducto/RolmOCR"
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
+model_m = load_model_with_attention_fallback(
+    Qwen2_5_VLForConditionalGeneration,
     MODEL_ID_M,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()