Spaces:

chitrark
/

olmocr-api

Running on CPU Upgrade

chitrark commited on Dec 20, 2025

Commit

6ba0575

verified ·

1 Parent(s): 92e866a

updated minor warnings

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,12 +1,18 @@
 import os
 import base64
 from io import BytesIO
 import torch
 from PIL import Image
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForVision2Seq
 # IMPORTANT: Load processor+model from the olmOCR checkpoint itself
 MODEL_ID = "allenai/olmOCR-2-7B-1025"
@@ -19,9 +25,6 @@ def load_model():
     if processor is not None and model is not None:
         return
-    # Silence the libgomp warning in Spaces
-    os.environ["OMP_NUM_THREADS"] = "1"
     # trust_remote_code is often required for VLM checkpoints
     processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
@@ -95,7 +98,9 @@ def ocr_image(img: Image.Image) -> str:
         padding=True,
         return_tensors="pt",
     )
-    # NOTE: don't move inputs to cuda manually when using device_map="auto"
     with torch.inference_mode():
         output_ids = model.generate(
@@ -121,7 +126,7 @@ with gr.Blocks(title="BookReader OCR API (olmOCR2)") as demo:
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil", label="Upload image")
-            run_btn = gr.Button("Run OCR")
         with gr.Column():
             output = gr.Textbox(label="Extracted text", lines=20)
@@ -132,5 +137,5 @@ with gr.Blocks(title="BookReader OCR API (olmOCR2)") as demo:
         api_name="/ocr",
     )
-demo.queue().launch()

 import os
 import base64
 from io import BytesIO
+import warnings
 import torch
 from PIL import Image
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForVision2Seq
+# Suppress warnings at startup
+os.environ["OMP_NUM_THREADS"] = "1"
+os.environ["TRANSFORMERS_VERBOSITY"] = "error"
+warnings.filterwarnings("ignore", category=FutureWarning)
 # IMPORTANT: Load processor+model from the olmOCR checkpoint itself
 MODEL_ID = "allenai/olmOCR-2-7B-1025"
     if processor is not None and model is not None:
         return
     # trust_remote_code is often required for VLM checkpoints
     processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
         padding=True,
         return_tensors="pt",
     )
+    # FIX: Move inputs to model's device (eliminates the warning)
+    inputs = {k: v.to(model.device) if torch.is_tensor(v) else v for k, v in inputs.items()}
     with torch.inference_mode():
         output_ids = model.generate(
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil", label="Upload image")
+            run_btn = gr.Button("Run OCR", variant="primary")
         with gr.Column():
             output = gr.Textbox(label="Extracted text", lines=20)
         api_name="/ocr",
     )
+if __name__ == "__main__":
+    demo.queue().launch()