File size: 4,290 Bytes
ce79b68
 
 
 
8dfc9f8
 
b5fafa1
6c0aeb9
b5fafa1
 
 
 
 
 
 
6c0aeb9
b5fafa1
 
8dfc9f8
 
 
 
0868311
8dfc9f8
b5fafa1
6c0aeb9
8dfc9f8
 
 
 
b5fafa1
8dfc9f8
abd6d4e
8dfc9f8
b5fafa1
 
 
 
8dfc9f8
 
b5fafa1
 
6c0aeb9
8dfc9f8
 
 
 
b5fafa1
8dfc9f8
abd6d4e
8dfc9f8
 
b5fafa1
8dfc9f8
 
b5fafa1
 
6c0aeb9
8dfc9f8
 
 
 
 
b5fafa1
8dfc9f8
 
 
b5fafa1
8dfc9f8
abd6d4e
8dfc9f8
 
 
 
b5fafa1
 
6c0aeb9
0868311
8dfc9f8
 
 
 
b5fafa1
8dfc9f8
e003639
8dfc9f8
b5fafa1
8dfc9f8
abd6d4e
8dfc9f8
b5fafa1
8dfc9f8
0868311
8dfc9f8
 
 
e040f4f
8dfc9f8
 
 
0868311
8dfc9f8
 
 
0868311
e003639
0868311
e003639
0868311
e003639
 
 
0868311
 
e003639
b5fafa1
 
 
e003639
b5fafa1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Simple agent to analyse a markdown, just to test some ideas.
"""

from enum import Enum

from mistralai import OCRResponse
from smolagents import CodeAgent, LiteLLMModel

from deepengineer.webcrawler.pdf_utils import (
    convert_ocr_response_to_markdown,
    find_in_markdown,
    get_markdown_by_page_numbers,
    get_table_of_contents_per_page_markdown,
)
from deepengineer.logging_tools import LoggingTool


class ToolNames(Enum):
    GET_TABLE_OF_CONTENTS = "get_table_of_contents"
    GET_MARKDOWN = "get_markdown"
    GET_PAGES_CONTENT = "get_pages_content"
    FIND_IN_MARKDOWN = "find_in_markdown"


class GetTableOfContentsTool(LoggingTool):
    name = ToolNames.GET_TABLE_OF_CONTENTS.value
    description = "Returns all of the titles in the document along with the page number they are on."
    inputs = {}
    output_type = "string"

    def __init__(self, markdown: OCRResponse):
        super().__init__()
        self.markdown: OCRResponse = markdown
        self.table_of_contents: str = get_table_of_contents_per_page_markdown(
            self.markdown
        )

    def forward(self) -> str:
        return self.table_of_contents


class GetMarkdownTool(LoggingTool):
    name = ToolNames.GET_MARKDOWN.value
    description = f"Returns the markdown entire content of the document. Beware this might be too long to be useful, except for small documents, use {ToolNames.GET_PAGES_CONTENT.value} instead. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the document including the number of pages."
    inputs = {}
    output_type = "string"

    def __init__(self, markdown: OCRResponse):
        super().__init__()
        self.markdown: OCRResponse = markdown
        self.markdown_content: str = convert_ocr_response_to_markdown(self.markdown)

    def forward(self) -> str:
        return self.markdown_content


class GetPagesContentTool(LoggingTool):
    name = ToolNames.GET_PAGES_CONTENT.value
    description = f"Returns the content of the pages. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the document including the number of pages. Expects a list of page numbers as integers as input."
    inputs = {
        "page_numbers": {
            "type": "array",
            "description": "The page numbers to get the content of.",
        },
    }
    output_type = "string"

    def __init__(self, markdown: OCRResponse):
        super().__init__()
        self.markdown: OCRResponse = markdown

    def forward(self, page_numbers: list[int]) -> str:
        return get_markdown_by_page_numbers(self.markdown, page_numbers)


class FindInMarkdownTool(LoggingTool):
    name = ToolNames.FIND_IN_MARKDOWN.value
    description = f"Finds the page numbers of the document that contain the search queries. If you are looking for a specific information, you can use this tool to find the page numbers of the document that contain the information and then use {ToolNames.GET_PAGES_CONTENT.value} to get the content of the pages."
    inputs = {
        "search_queries": {
            "type": "array",
            "description": "The search queries to find in the document. List of strings.",
        }
    }
    output_type = "array"

    def __init__(self, markdown: OCRResponse):
        super().__init__()
        self.markdown: OCRResponse = markdown

    def forward(self, search_queries: list[str]) -> list[int]:
        return find_in_markdown(self.markdown, search_queries)


def create_agent(markdown: OCRResponse, model_id="deepseek/deepseek-chat"):
    """This agent is just a test and will not be used as is by the main agent."""

    model = LiteLLMModel(model_id=model_id)

    MARKDOWN_TOOLS = [
        GetTableOfContentsTool(markdown),
        GetMarkdownTool(markdown),
        GetPagesContentTool(markdown),
        FindInMarkdownTool(markdown),
    ]
    markdown_agent = CodeAgent(
        model=model,
        tools=MARKDOWN_TOOLS,
        max_steps=20,
        verbosity_level=2,
        planning_interval=4,
        name="markdown_agent",
        description="""A team member that can analyse a markdown.""",
    )
    markdown_agent.prompt_templates["managed_agent"][
        "task"
    ] += """You can navigate to .txt online files."""

    return markdown_agent