wealthcoders
/

deepseek-OCR

Model card Files Files and versions

wealthcoders commited on Nov 28, 2025

Commit

c35506c

·

verified ·

1 Parent(s): 25f0959

Create handler.py

Files changed (1) hide show

handler.py +50 -0

handler.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from transformers import AutoModel, AutoTokenizer
+from typing import Dict, List, Any
+import torch
+import base64
+from io import BytesIO
+from PIL import Image
+import os
+class EndpointHandler:
+    def __init__(self):
+        model_name = 'deepseek-ai/DeepSeek-OCR'
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+        model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
+        self.model = model.eval().cuda().to(torch.bfloat16)  # Use .cpu() if no GPU
+    def __call__(self, data: Dict[str, Any]) -> str:
+        try:
+            base64_string = inputs["base64"]
+            # Remove data URL prefix if present
+            if ',' in base64_string:
+                base64_string = base64_string.split(',')[1]
+            # Decode base64 to image
+            image_data = base64.b64decode(base64_string)
+            image = Image.open(BytesIO(image_data))
+        # Convert to RGB if necessary (handles PNG, JPEG, etc.)
+            if image.mode != 'RGB':
+                image = image.convert('RGB')
+        # Define the prompt for Markdown conversion
+            prompt = "<image>\n<|grounding|>Convert the document to markdown."
+        # Run OCR inference
+            result = self.model.infer(
+                self.tokenizer,
+                prompt=prompt,
+                image_file=image,  # Pass PIL Image directly
+                output_path=output_path,
+                base_size=1024,
+                image_size=640,
+                crop_mode=True,
+                save_results=output_path is not None
+            )
+            return result
+        except Exception as e:
+            print(f"Error processing image: {e}")
+            return None