wealthcoders
/

deepseek-OCR

Model card Files Files and versions

wealthcoders commited on Nov 28, 2025

Commit

8a4adbe

·

verified ·

1 Parent(s): 0c1f2c5

Update handler.py

Files changed (1) hide show

handler.py +17 -7

handler.py CHANGED Viewed

@@ -69,19 +69,29 @@ class EndpointHandler:
             # Decode base64 to image
             image_data = base64.b64decode(base64_string)
-            image = Image.open(BytesIO(image_data))
-            # Convert to RGB if necessary (handles PNG, JPEG, etc.)
-            if image.mode != 'RGB':
-                image = image.convert('RGB')
             # Define the prompt for Markdown conversion
             prompt = "<image>\n<|grounding|>Convert the document to markdown."
             with tempfile.TemporaryDirectory() as temp_dir:
-                print(f"Using temporary directory: {temp_dir}")
-                    # Run OCR inference
                 result = self.model.infer(
                     self.tokenizer,
                     prompt=prompt,

             # Decode base64 to image
             image_data = base64.b64decode(base64_string)
             # Define the prompt for Markdown conversion
             prompt = "<image>\n<|grounding|>Convert the document to markdown."
             with tempfile.TemporaryDirectory() as temp_dir:
+                image_path = os.path.join(temp_dir, "input_image.png")
+                with open(image_path, "wb") as f:
+                    f.write(image_data)
+                print(f"Image saved to: {image_path}")
+                # Verify the image can be opened
+                try:
+                    test_image = Image.open(image_path)
+                    if test_image.mode != 'RGB':
+                        test_image = test_image.convert('RGB')
+                        test_image.save(image_path)  # Save converted version
+                    print(f"Image verified: {test_image.size}, mode: {test_image.mode}")
+                except Exception as img_error:
+                    return {"error": f"Invalid image: {str(img_error)}"}
+                # Run OCR inference
                 result = self.model.infer(
                     self.tokenizer,
                     prompt=prompt,