wealthcoders commited on
Commit
c35506c
·
verified ·
1 Parent(s): 25f0959

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +50 -0
handler.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModel, AutoTokenizer
2
+ from typing import Dict, List, Any
3
+ import torch
4
+ import base64
5
+ from io import BytesIO
6
+ from PIL import Image
7
+ import os
8
+
9
+ class EndpointHandler:
10
+ def __init__(self):
11
+ model_name = 'deepseek-ai/DeepSeek-OCR'
12
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
13
+ model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
14
+ self.model = model.eval().cuda().to(torch.bfloat16) # Use .cpu() if no GPU
15
+
16
+ def __call__(self, data: Dict[str, Any]) -> str:
17
+ try:
18
+ base64_string = inputs["base64"]
19
+ # Remove data URL prefix if present
20
+ if ',' in base64_string:
21
+ base64_string = base64_string.split(',')[1]
22
+
23
+ # Decode base64 to image
24
+ image_data = base64.b64decode(base64_string)
25
+ image = Image.open(BytesIO(image_data))
26
+
27
+ # Convert to RGB if necessary (handles PNG, JPEG, etc.)
28
+ if image.mode != 'RGB':
29
+ image = image.convert('RGB')
30
+
31
+ # Define the prompt for Markdown conversion
32
+ prompt = "<image>\n<|grounding|>Convert the document to markdown."
33
+
34
+ # Run OCR inference
35
+ result = self.model.infer(
36
+ self.tokenizer,
37
+ prompt=prompt,
38
+ image_file=image, # Pass PIL Image directly
39
+ output_path=output_path,
40
+ base_size=1024,
41
+ image_size=640,
42
+ crop_mode=True,
43
+ save_results=output_path is not None
44
+ )
45
+
46
+ return result
47
+
48
+ except Exception as e:
49
+ print(f"Error processing image: {e}")
50
+ return None