File size: 1,574 Bytes
e1f3958 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | import pytesseract
from PIL import Image
from io import BytesIO
import base64
import os
def load_image(image_input):
"""Internal helper to load an image from bytes, file path, base64, or PIL.Image."""
if isinstance(image_input, bytes):
return Image.open(BytesIO(image_input)).convert("RGB")
if isinstance(image_input, Image.Image):
return image_input.convert("RGB")
if isinstance(image_input, str):
if image_input.startswith("data:"): # base64 data URL
_, b64 = image_input.split(",", 1)
return Image.open(BytesIO(base64.b64decode(b64))).convert("RGB")
return Image.open(os.path.join("LLMFiles", image_input)).convert("RGB")
raise ValueError("Unsupported image input type")
def ocr_image_tool(payload: dict) -> dict:
"""
Extract text from an image using pytesseract OCR.
Expected payload:
{
"image": bytes | base64 string | file path | PIL.Image,
"lang": "eng" (optional)
}
Returns:
{
"text": "<extracted text>",
"engine": "pytesseract"
}
Use this tool when the user wants to read or extract text from an image.
"""
try:
image_data = payload["image"]
lang = payload.get("lang", "eng")
img = load_image(image_data)
text = pytesseract.image_to_string(img, lang=lang)
return {
"text": text.strip(),
"engine": "pytesseract"
}
except Exception as e:
return f"Error occurred: {e}"
|