File size: 2,201 Bytes
998967a
 
856f7b8
 
 
 
 
 
 
 
 
 
 
 
f4c14e9
 
856f7b8
f4c14e9
 
856f7b8
f4c14e9
 
856f7b8
f4c14e9
856f7b8
 
 
 
 
 
 
 
 
 
 
 
 
 
f4c14e9
856f7b8
f4c14e9
 
 
 
 
 
 
856f7b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4c14e9
 
 
856f7b8
f4c14e9
856f7b8
f4c14e9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from smolagents import tool

@tool
def image_to_text(image_path: str) -> str:
    """
    Extract text from an image using pytesseract (if available).

    Args:
        image_path: Path to the image file

    Returns:
        Extracted text or error message
    """
    try:
        import pytesseract
        from PIL import Image

        # Open the image using PIL
        img = Image.open(image_path)

        # Use pytesseract to extract text from the image
        extracted_text = pytesseract.image_to_string(img)

        return f"Extracted text from image: {extracted_text}"
    except ImportError:
        return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
    except Exception as e:
        return f"Error extracting text from image: {str(e)}"

@tool
def pdf_to_text(pdf_file_path: str) -> str:
    """
    Reads a PDF file from the given path and returns its content as text.
    Args:
        pdf_file_path (str): The path to the PDF file.
    Returns:
        str: The text content of the PDF.
    """
    
    try:
        import pymupdf
        doc = pymupdf.open(pdf_file_path)
        text = ""
        for page in doc:
            text += page.get_text("text")
            text += "\n"
        return text
    except FileNotFoundError:
        return f"Error: The file at '{pdf_file_path}' was not found."
    except Exception as e:
        return f"An error occurred: {e}"

@tool
def text_file_to_string(path: str) -> str:
    """
    Reads any plain text file and returns its content as a string.

    Args:
        path (str): The path to the text file.

    Works for:
    - .txt
    - .md
    - .json / .jsonl
    - .html
    - .csv (as raw text)
    - any UTF-8 or ASCII compatible text file

    If the file contains binary data, the returned string may be partially decoded.
    """
    try:
        with open(path, "r", encoding="utf-8", errors="ignore") as f:
            content = f.read()
        return content
    except FileNotFoundError:
        return f"Error: The file at '{path}' was not found."
    except Exception as e:
        return f"An error occurred: {e}"