my-smoldocling-demo

Sleeping

bharatcoder commited on Oct 25

Commit

178bba5

verified ·

1 Parent(s): 1f42ce9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ def convert_to_pil(image_input: str) -> Image.Image:
     raise ValueError(f"Could not convert image input to PIL.Image: {type(image_input)}")
-def smoldocling_readimage(image: str, prompt_text: str) -> str:
     """
     Extract text and structured content from document images using SmolDocling model.
@@ -51,8 +51,7 @@ def smoldocling_readimage(image: str, prompt_text: str) -> str:
     the SmolDocling-256M-preview model for image-to-text conversion with chat-based prompting.
     Args:
-        image (str): The input document image as base64 encoded string or file path.
-            MCP clients will send this as base64.
         prompt_text (str): The instruction or prompt text that guides the model's output format.
             Supported prompts include:
@@ -86,7 +85,7 @@ def smoldocling_readimage(image: str, prompt_text: str) -> str:
         - Maximum output length is limited to 1024 new tokens
     """
     # Convert string input (base64 or path) to PIL.Image
-    pil_image = convert_to_pil(image)
     messages = [
         {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}

     raise ValueError(f"Could not convert image input to PIL.Image: {type(image_input)}")
+def smoldocling_readimage(image: Image.Image, prompt_text: str) -> str:
     """
     Extract text and structured content from document images using SmolDocling model.
     the SmolDocling-256M-preview model for image-to-text conversion with chat-based prompting.
     Args:
+        image (Image.Image): The input document image
         prompt_text (str): The instruction or prompt text that guides the model's output format.
             Supported prompts include:
         - Maximum output length is limited to 1024 new tokens
     """
     # Convert string input (base64 or path) to PIL.Image
+    # pil_image = convert_to_pil(image)
     messages = [
         {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}