Spaces:

openpecha
/

bec-dot.orc-api

Sleeping

ta4tsering commited on Feb 23

Commit

15c943a

1 Parent(s): 0ea2759

fix: update DotsVLProcessor class to handle processor attributes and adjust model loading requirements

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import sys
 import torch
 import gradio as gr
 from PIL import Image
@@ -24,22 +25,32 @@ def patch_configuration_dots(model_path: str) -> None:
     if not os.path.exists(config_path):
         return
-    with open(config_path, "r") as f:
         source = f.read()
-    if 'attributes = ["image_processor", "tokenizer"]' in source:
-        return  # already patched
-    patched = source.replace(
-        "class DotsVLProcessor(Qwen2_5_VLProcessor):\n"
-        "    def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):",
-        "class DotsVLProcessor(Qwen2_5_VLProcessor):\n"
-        '    attributes = ["image_processor", "tokenizer"]\n'
-        "    def __init__(self, image_processor=None, tokenizer=None, video_processor=None, chat_template=None, **kwargs):",
     )
-    with open(config_path, "w") as f:
         f.write(patched)
@@ -73,6 +84,7 @@ def load_model():
     processor = AutoProcessor.from_pretrained(
         model_path,
         trust_remote_code=True,
     )
     return model, processor

 import os
 import sys
+import re
 import torch
 import gradio as gr
 from PIL import Image
     if not os.path.exists(config_path):
         return
+    with open(config_path, "r", encoding="utf-8") as f:
         source = f.read()
+    patched = source
+    # Force processor mixin to treat dots.ocr as image+tokenizer only.
+    # This avoids newer transformers requiring BaseVideoProcessor.
+    if 'attributes = ["image_processor", "tokenizer"]' not in patched:
+        patched = re.sub(
+            r"(class\s+DotsVLProcessor\(Qwen2_5_VLProcessor\):\n)",
+            r'\1    attributes = ["image_processor", "tokenizer"]\n',
+            patched,
+            count=1,
+        )
+    # Handle both older and newer remote class signatures.
+    patched = patched.replace(
+        "def __init__(self, image_processor=None, tokenizer=None, chat_template=None, **kwargs):",
+        "def __init__(self, image_processor=None, tokenizer=None, video_processor=None, chat_template=None, **kwargs):",
     )
+    if patched == source:
+        print("No dots.ocr processor patch changes were required.")
+        return
+    with open(config_path, "w", encoding="utf-8") as f:
         f.write(patched)
     processor = AutoProcessor.from_pretrained(
         model_path,
         trust_remote_code=True,
+        use_fast=False,
     )
     return model, processor

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-transformers>=4.57.0
 torch>=2.4.0
 torchvision>=0.19.0
 Pillow>=10.0.0

+transformers==4.56.2
 torch>=2.4.0
 torchvision>=0.19.0
 Pillow>=10.0.0