File size: 1,574 Bytes
e1f3958
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import pytesseract
from PIL import Image
from io import BytesIO
import base64
import os


def load_image(image_input):
    """Internal helper to load an image from bytes, file path, base64, or PIL.Image."""
    if isinstance(image_input, bytes):
        return Image.open(BytesIO(image_input)).convert("RGB")
    if isinstance(image_input, Image.Image):
        return image_input.convert("RGB")
    if isinstance(image_input, str):
        if image_input.startswith("data:"):   # base64 data URL
            _, b64 = image_input.split(",", 1)
            return Image.open(BytesIO(base64.b64decode(b64))).convert("RGB")
        return Image.open(os.path.join("LLMFiles", image_input)).convert("RGB")
    raise ValueError("Unsupported image input type")


def ocr_image_tool(payload: dict) -> dict:
    """

    Extract text from an image using pytesseract OCR.



    Expected payload:

    {

        "image": bytes | base64 string | file path | PIL.Image,

        "lang": "eng" (optional)

    }



    Returns:

    {

        "text": "<extracted text>",

        "engine": "pytesseract"

    }



    Use this tool when the user wants to read or extract text from an image.

    """
    try:
        image_data = payload["image"]
        lang = payload.get("lang", "eng")

        img = load_image(image_data)
        text = pytesseract.image_to_string(img, lang=lang)

        return {
            "text": text.strip(),
            "engine": "pytesseract"
        }
    except Exception as e:
        return f"Error occurred: {e}"