Charles Azam commited on
Commit
8dfc9f8
·
1 Parent(s): 1ed26ea

feat: add pdf agent

Browse files
pyproject.toml CHANGED
@@ -26,6 +26,7 @@ dependencies = [
26
  "mistralai>=1.9.1",
27
  "fastapi>=0.115.14",
28
  "supabase>=2.16.0",
 
29
  ]
30
 
31
  [project.scripts]
 
26
  "mistralai>=1.9.1",
27
  "fastapi>=0.115.14",
28
  "supabase>=2.16.0",
29
+ "ipython>=9.4.0",
30
  ]
31
 
32
  [project.scripts]
src/deepengineer/deepsearch/analyse_markdown_agent.py CHANGED
@@ -1,52 +1,98 @@
1
- def create_agent(model_id="o1"):
2
- model_params = {
3
- "model_id": model_id,
4
- "custom_role_conversions": custom_role_conversions,
5
- "max_completion_tokens": 8192,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
7
- if model_id == "o1":
8
- model_params["reasoning_effort"] = "high"
9
- model = LiteLLMModel(model_id="deepseek/deepseek-chat")
10
-
11
- text_limit = 100000
12
- browser = SimpleTextBrowser(**BROWSER_CONFIG)
13
- WEB_TOOLS = [
14
- GoogleSearchTool(provider="serper"),
15
- VisitTool(browser),
16
- PageUpTool(browser),
17
- PageDownTool(browser),
18
- FinderTool(browser),
19
- FindNextTool(browser),
20
- ArchiveSearchTool(browser),
21
- TextInspectorTool(model, text_limit),
 
 
 
 
22
  ]
23
- text_webbrowser_agent = ToolCallingAgent(
24
  model=model,
25
- tools=WEB_TOOLS,
26
  max_steps=20,
27
  verbosity_level=2,
28
  planning_interval=4,
29
- name="search_agent",
30
- description="""A team member that will search the internet to answer your question.
31
- Ask him for all your questions that require browsing the web.
32
- Provide him as much context as possible, in particular if you need to search on a specific timeframe!
33
- And don't hesitate to provide him with a complex search task, like finding a difference between two webpages.
34
- Your request must be a real sentence, not a google search! Like "Find me this information (...)" rather than a few keywords.
35
- """,
36
  provide_run_summary=True,
37
  )
38
- text_webbrowser_agent.prompt_templates["managed_agent"]["task"] += """You can navigate to .txt online files.
39
- If a non-html page is in another format, especially .pdf or a Youtube video, use tool 'inspect_file_as_text' to inspect it.
40
- Additionally, if after some searching you find out that you need more information to answer the question, you can use `final_answer` with your request for clarification as argument to request for more information."""
41
-
42
- manager_agent = CodeAgent(
43
- model=model,
44
- tools=[visualizer, TextInspectorTool(model, text_limit)],
45
- max_steps=12,
46
- verbosity_level=2,
47
- additional_authorized_imports=["*"],
48
- planning_interval=4,
49
- managed_agents=[text_webbrowser_agent],
50
- )
51
 
52
- return manager_agent
 
1
+ from smolagents import CodeAgent, tool, Tool, LiteLLMModel
2
+ from deepengineer.webcrawler.pdf_utils import get_markdown_by_page_numbers, get_table_of_contents_per_page_pdf, find_in_pdf, convert_ocr_response_to_markdown
3
+ from mistralai import OCRResponse
4
+ from enum import Enum
5
+ from pathlib import Path
6
+
7
+ class ToolNames(Enum):
8
+ GET_TABLE_OF_CONTENTS = "get_table_of_contents"
9
+ GET_MARKDOWN = "get_markdown"
10
+ GET_PAGES_CONTENT = "get_pages_content"
11
+ FIND_IN_PDF = "find_in_pdf"
12
+
13
+ class GetTableOfContentsTool(Tool):
14
+ name = ToolNames.GET_TABLE_OF_CONTENTS.value
15
+ description = "Returns all of the titles in the document along with the page number they are on."
16
+ inputs = {}
17
+ output_type = "string"
18
+
19
+ def __init__(self, markdown: OCRResponse):
20
+ self.markdown: OCRResponse = markdown
21
+ self.table_of_contents: str = get_table_of_contents_per_page_pdf(self.markdown)
22
+
23
+ def forward(self) -> str:
24
+ return self.table_of_contents
25
+
26
+ class GetMarkdownTool(Tool):
27
+ name = ToolNames.GET_MARKDOWN.value
28
+ description = f"Returns the markdown entire content of the document. Beware this might be too long to be useful, except for small documents, use {ToolNames.GET_PAGES_CONTENT.value} instead. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the document including the number of pages."
29
+ inputs = {}
30
+ output_type = "string"
31
+
32
+ def __init__(self, markdown: OCRResponse):
33
+ self.markdown: OCRResponse = markdown
34
+ self.markdown_content: str = convert_ocr_response_to_markdown(self.markdown)
35
+
36
+ def forward(self) -> str:
37
+ return self.markdown_content
38
+
39
+
40
+ class GetPagesContentTool(Tool):
41
+ name = ToolNames.GET_PAGES_CONTENT.value
42
+ description = f"Returns the content of the pages. You can use {ToolNames.GET_TABLE_OF_CONTENTS.value} to get the table of contents of the document including the number of pages. Expects a list of page numbers as integers as input."
43
+ inputs = {
44
+ "page_numbers": {
45
+ "type": "array",
46
+ "description": "The page numbers to get the content of."
47
+ },
48
+ }
49
+ output_type = "string"
50
+
51
+ def __init__(self, markdown: OCRResponse):
52
+ self.markdown: OCRResponse = markdown
53
+
54
+ def forward(self, page_numbers: list[int]) -> str:
55
+ return get_markdown_by_page_numbers(self.markdown, page_numbers)
56
+
57
+ class FindInPdfTool(Tool):
58
+ name = ToolNames.FIND_IN_PDF.value
59
+ description = f"Finds the page numbers of the document that contain the search queries. If you are looking for a specific information, you can use this tool to find the page numbers of the document that contain the information and then use {ToolNames.GET_PAGES_CONTENT.value} to get the content of the pages."
60
+ inputs = {
61
+ "search_queries": {
62
+ "type": "array",
63
+ "description": "The search queries to find in the document. List of strings."
64
+ }
65
  }
66
+ output_type = "array"
67
+
68
+ def __init__(self, markdown: OCRResponse):
69
+ self.markdown: OCRResponse = markdown
70
+
71
+ def forward(self, search_queries: list[str]) -> list[int]:
72
+ return find_in_pdf(self.markdown, search_queries)
73
+
74
+
75
+
76
+ def create_agent(markdown: OCRResponse, model_id="deepseek/deepseek-chat"):
77
+
78
+ model = LiteLLMModel(model_id=model_id)
79
+
80
+ PDFS_TOOLS = [
81
+ GetTableOfContentsTool(markdown),
82
+ GetMarkdownTool(markdown),
83
+ GetPagesContentTool(markdown),
84
+ FindInPdfTool(markdown),
85
  ]
86
+ pdf_agent = CodeAgent(
87
  model=model,
88
+ tools=PDFS_TOOLS,
89
  max_steps=20,
90
  verbosity_level=2,
91
  planning_interval=4,
92
+ name="pdf_agent",
93
+ description="""A team member that will search the internet to answer your question.""",
 
 
 
 
 
94
  provide_run_summary=True,
95
  )
96
+ pdf_agent.prompt_templates["managed_agent"]["task"] += """You can navigate to .txt online files."""
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ return pdf_agent
src/deepengineer/webcrawler/pdf_utils.py CHANGED
@@ -7,7 +7,7 @@ from mistralai import Mistral
7
  import os
8
  from litellm import completion
9
 
10
- from mistralai.models import OCRResponse
11
  import yaml
12
  from tenacity import retry, stop_after_attempt, wait_fixed, RetryError
13
  from litellm.exceptions import BadRequestError
@@ -52,34 +52,45 @@ def convert_ocr_response_to_markdown(
52
 
53
  return "\n\n".join(markdowns)
54
 
55
- def get_markdown_by_page_numbers(markdown: OCRResponse, page_numbers: list[int]) -> str:
56
  markdowns: list[str] = []
57
- for page_number in page_numbers:
 
 
 
 
58
  markdowns.append(f"*Page {page_number}*\n{markdown.pages[page_number].markdown}")
59
  return "\n\n".join(markdowns)
60
 
61
- def find_in_pdf(markdown: OCRResponse, search_query: str) -> list[int]:
62
  """
63
  Find the page numbers of the pdf that contain the search query.
64
 
65
  Args:
66
  markdown (OCRResponse): The markdown of the pdf.
67
- search_query (str): The search query.
68
 
69
  Returns:
70
  list[int]: The page numbers of the pdf that contain the search query.
71
  """
72
  page_numbers: list[int] = []
73
  for page_number, page in enumerate(markdown.pages):
74
- if search_query.lower() in page.markdown.lower():
75
- page_numbers.append(page_number)
 
76
  return page_numbers
77
 
78
- def table_of_contents_per_page_pdf(markdown: OCRResponse) -> str:
79
  """
80
  Get the table of contents of the pdf.
81
 
82
  Finds all the titles of the pdf to reconstruct the table of contents.
 
 
 
 
 
 
83
  """
84
  title_to_page_number: dict[str, int] = {}
85
  for page_number, page in enumerate(markdown.pages):
@@ -92,6 +103,13 @@ def table_of_contents_per_page_pdf(markdown: OCRResponse) -> str:
92
  table_of_contents = "\n".join([f"{title} - Page {page_number}" for title, page_number in title_to_page_number.items()])
93
  return table_of_contents
94
 
 
 
 
 
 
 
 
95
  def get_images_from_pdf(pdf_path: Path, image_ids: list[str]) -> list[str]:
96
  raise NotImplementedError("Not implemented")
97
 
 
7
  import os
8
  from litellm import completion
9
 
10
+ from mistralai.models import OCRResponse, OCRPageObject
11
  import yaml
12
  from tenacity import retry, stop_after_attempt, wait_fixed, RetryError
13
  from litellm.exceptions import BadRequestError
 
52
 
53
  return "\n\n".join(markdowns)
54
 
55
+ def get_markdown_by_page_numbers(markdown: OCRResponse, page_numbers: list[int], get_full_content: bool = False) -> str:
56
  markdowns: list[str] = []
57
+ page_numbers_to_get = set(page_numbers)
58
+ if get_full_content:
59
+ page_numbers_to_get = set(range(len(markdown.pages)))
60
+
61
+ for page_number in page_numbers_to_get:
62
  markdowns.append(f"*Page {page_number}*\n{markdown.pages[page_number].markdown}")
63
  return "\n\n".join(markdowns)
64
 
65
+ def find_in_pdf(markdown: OCRResponse, search_queries: list[str]) -> list[int]:
66
  """
67
  Find the page numbers of the pdf that contain the search query.
68
 
69
  Args:
70
  markdown (OCRResponse): The markdown of the pdf.
71
+ search_queries (list[str]): The search queries.
72
 
73
  Returns:
74
  list[int]: The page numbers of the pdf that contain the search query.
75
  """
76
  page_numbers: list[int] = []
77
  for page_number, page in enumerate(markdown.pages):
78
+ for search_query in search_queries:
79
+ if search_query.lower() in page.markdown.lower():
80
+ page_numbers.append(page_number)
81
  return page_numbers
82
 
83
+ def get_table_of_contents_per_page_pdf(markdown: OCRResponse) -> str:
84
  """
85
  Get the table of contents of the pdf.
86
 
87
  Finds all the titles of the pdf to reconstruct the table of contents.
88
+
89
+ Args:
90
+ markdown (OCRResponse): The markdown of the pdf.
91
+
92
+ Returns:
93
+ str: The table of contents of the pdf.
94
  """
95
  title_to_page_number: dict[str, int] = {}
96
  for page_number, page in enumerate(markdown.pages):
 
103
  table_of_contents = "\n".join([f"{title} - Page {page_number}" for title, page_number in title_to_page_number.items()])
104
  return table_of_contents
105
 
106
+ def convert_raw_markdown_to_ocr_response(raw_markdown: str) -> OCRResponse:
107
+ # split by big title starting with # and then a space
108
+ pages = raw_markdown.split("\n# ")
109
+ return OCRResponse(pages=[OCRPageObject(markdown="# " + page, page_number=i) for i, page in enumerate(pages)])
110
+
111
+
112
+
113
  def get_images_from_pdf(pdf_path: Path, image_ids: list[str]) -> list[str]:
114
  raise NotImplementedError("Not implemented")
115
 
src/deepengineer/webcrawler/tools.py CHANGED
@@ -1,14 +1,17 @@
1
  from deepengineer.webcrawler.async_search import linkup_search_async, tavily_search_async, arxiv_search_async, pubmed_search_async, sciencedirect_search_async, scientific_search_async
2
  from deepengineer.webcrawler.async_crawl import crawl4ai_extract_markdown_of_url_async, arxiv_download_pdf_async, download_pdf_async
 
 
 
3
 
 
 
4
 
5
 
 
 
 
 
 
6
 
7
-
8
-
9
-
10
-
11
-
12
-
13
-
14
-
 
1
  from deepengineer.webcrawler.async_search import linkup_search_async, tavily_search_async, arxiv_search_async, pubmed_search_async, sciencedirect_search_async, scientific_search_async
2
  from deepengineer.webcrawler.async_crawl import crawl4ai_extract_markdown_of_url_async, arxiv_download_pdf_async, download_pdf_async
3
+ from deepengineer.webcrawler.pdf_utils import get_table_of_contents_per_page_pdf
4
+ from typing import Callable
5
+ from smolagents.tools import get_json_schema
6
 
7
+ def print_function_signature_smolagents(tool_function: Callable):
8
+ tool_json_schema = get_json_schema(tool_function)["function"]
9
 
10
 
11
+ # Set the class attributes
12
+ print("name: ", tool_json_schema["name"])
13
+ print("description: ", tool_json_schema["description"])
14
+ print("inputs: ", tool_json_schema["parameters"]["properties"])
15
+ print("output_type: ", tool_json_schema["return"]["type"])
16
 
17
+ print_function_signature_smolagents(get_table_of_contents_per_page_pdf)
 
 
 
 
 
 
 
tests/webcrawler/test_pdf_agent.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from deepengineer.deepsearch.analyse_markdown_agent import create_agent
2
+ from mistralai import OCRResponse
3
+ from deepengineer.common_path import DATA_DIR
4
+
5
+ def load_mock_ocr_response() -> OCRResponse:
6
+ with open(DATA_DIR / "report_thermal_neutron.json", "r") as f:
7
+ return OCRResponse.model_validate_json(f.read())
8
+
9
+
10
+ def test_pdf_agent():
11
+ ocr_response = load_mock_ocr_response()
12
+ pdf_agent = create_agent(ocr_response)
13
+ assert pdf_agent is not None
14
+ assert pdf_agent.name == "pdf_agent"
15
+ assert pdf_agent.tools is not None
16
+ assert len(pdf_agent.tools) == 4 + 1 # +1 for the final answer
17
+
18
+ test_pdf_agent()
tests/webcrawler/test_pdfs_utils.py CHANGED
@@ -1,4 +1,4 @@
1
- from deepengineer.webcrawler.pdf_utils import convert_pdf_to_markdown_async, convert_ocr_response_to_markdown, find_in_pdf, table_of_contents_per_page_pdf, get_markdown_by_page_numbers
2
  from mistralai import OCRResponse
3
  from deepengineer.common_path import DATA_DIR
4
  import pytest
@@ -22,7 +22,7 @@ async def test_convert_pdf_to_markdown_async():
22
 
23
  def test_table_of_contents_per_page_pdf():
24
  ocr_response = load_mock_ocr_response()
25
- table_of_contents = table_of_contents_per_page_pdf(ocr_response)
26
  assert "References - Page 15" in table_of_contents
27
 
28
  def test_find_in_pdf():
 
1
+ from deepengineer.webcrawler.pdf_utils import convert_pdf_to_markdown_async, convert_ocr_response_to_markdown, find_in_pdf, get_table_of_contents_per_page_pdf, get_markdown_by_page_numbers
2
  from mistralai import OCRResponse
3
  from deepengineer.common_path import DATA_DIR
4
  import pytest
 
22
 
23
  def test_table_of_contents_per_page_pdf():
24
  ocr_response = load_mock_ocr_response()
25
+ table_of_contents = get_table_of_contents_per_page_pdf(ocr_response)
26
  assert "References - Page 15" in table_of_contents
27
 
28
  def test_find_in_pdf():
uv.lock CHANGED
@@ -100,6 +100,15 @@ wheels = [
100
  { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916 },
101
  ]
102
 
 
 
 
 
 
 
 
 
 
103
  [[package]]
104
  name = "attrs"
105
  version = "25.3.0"
@@ -373,6 +382,15 @@ wheels = [
373
  { url = "https://files.pythonhosted.org/packages/20/34/a08b0ee99715eaba118cbe19a71f7b5e2425c2718ef96007c325944a1152/datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b", size = 491546 },
374
  ]
375
 
 
 
 
 
 
 
 
 
 
376
  [[package]]
377
  name = "deepengineer"
378
  version = "0.1.0"
@@ -384,6 +402,7 @@ dependencies = [
384
  { name = "fasttext-wheel" },
385
  { name = "gradio" },
386
  { name = "httpx" },
 
387
  { name = "langchain" },
388
  { name = "litellm" },
389
  { name = "mistralai" },
@@ -406,6 +425,7 @@ requires-dist = [
406
  { name = "fasttext-wheel" },
407
  { name = "gradio" },
408
  { name = "httpx" },
 
409
  { name = "langchain" },
410
  { name = "litellm" },
411
  { name = "mistralai", specifier = ">=1.9.1" },
@@ -459,6 +479,15 @@ wheels = [
459
  { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830 },
460
  ]
461
 
 
 
 
 
 
 
 
 
 
462
  [[package]]
463
  name = "fake-http-header"
464
  version = "0.3.5"
@@ -826,6 +855,51 @@ wheels = [
826
  { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 },
827
  ]
828
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
829
  [[package]]
830
  name = "jinja2"
831
  version = "3.1.6"
@@ -1084,6 +1158,18 @@ wheels = [
1084
  { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739 },
1085
  ]
1086
 
 
 
 
 
 
 
 
 
 
 
 
 
1087
  [[package]]
1088
  name = "mdurl"
1089
  version = "0.1.2"
@@ -1293,6 +1379,27 @@ wheels = [
1293
  { url = "https://files.pythonhosted.org/packages/39/c2/646d2e93e0af70f4e5359d870a63584dacbc324b54d73e6b3267920ff117/pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249", size = 13231847 },
1294
  ]
1295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1296
  [[package]]
1297
  name = "pillow"
1298
  version = "10.4.0"
@@ -1354,6 +1461,18 @@ wheels = [
1354
  { url = "https://files.pythonhosted.org/packages/a4/71/188a50ea64c17f73ff4df5196ec1553a8f1723421eb2d1069c73bab47d78/postgrest-1.1.1-py3-none-any.whl", hash = "sha256:98a6035ee1d14288484bfe36235942c5fb2d26af6d8120dfe3efbe007859251a", size = 22366 },
1355
  ]
1356
 
 
 
 
 
 
 
 
 
 
 
 
 
1357
  [[package]]
1358
  name = "propcache"
1359
  version = "0.3.2"
@@ -1410,6 +1529,24 @@ wheels = [
1410
  { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 },
1411
  ]
1412
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1413
  [[package]]
1414
  name = "pyarrow"
1415
  version = "20.0.0"
@@ -2018,6 +2155,20 @@ wheels = [
2018
  { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224 },
2019
  ]
2020
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2021
  [[package]]
2022
  name = "starlette"
2023
  version = "0.46.2"
@@ -2169,6 +2320,15 @@ wheels = [
2169
  { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 },
2170
  ]
2171
 
 
 
 
 
 
 
 
 
 
2172
  [[package]]
2173
  name = "transformers"
2174
  version = "4.53.0"
@@ -2257,6 +2417,15 @@ wheels = [
2257
  { url = "https://files.pythonhosted.org/packages/d2/e2/dc81b1bd1dcfe91735810265e9d26bc8ec5da45b4c0f6237e286819194c3/uvicorn-0.35.0-py3-none-any.whl", hash = "sha256:197535216b25ff9b785e29a0b79199f55222193d47f820816e7da751e9bc8d4a", size = 66406 },
2258
  ]
2259
 
 
 
 
 
 
 
 
 
 
2260
  [[package]]
2261
  name = "websockets"
2262
  version = "15.0.1"
 
100
  { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916 },
101
  ]
102
 
103
+ [[package]]
104
+ name = "asttokens"
105
+ version = "3.0.0"
106
+ source = { registry = "https://pypi.org/simple" }
107
+ sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 }
108
+ wheels = [
109
+ { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 },
110
+ ]
111
+
112
  [[package]]
113
  name = "attrs"
114
  version = "25.3.0"
 
382
  { url = "https://files.pythonhosted.org/packages/20/34/a08b0ee99715eaba118cbe19a71f7b5e2425c2718ef96007c325944a1152/datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b", size = 491546 },
383
  ]
384
 
385
+ [[package]]
386
+ name = "decorator"
387
+ version = "5.2.1"
388
+ source = { registry = "https://pypi.org/simple" }
389
+ sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711 }
390
+ wheels = [
391
+ { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190 },
392
+ ]
393
+
394
  [[package]]
395
  name = "deepengineer"
396
  version = "0.1.0"
 
402
  { name = "fasttext-wheel" },
403
  { name = "gradio" },
404
  { name = "httpx" },
405
+ { name = "ipython" },
406
  { name = "langchain" },
407
  { name = "litellm" },
408
  { name = "mistralai" },
 
425
  { name = "fasttext-wheel" },
426
  { name = "gradio" },
427
  { name = "httpx" },
428
+ { name = "ipython", specifier = ">=9.4.0" },
429
  { name = "langchain" },
430
  { name = "litellm" },
431
  { name = "mistralai", specifier = ">=1.9.1" },
 
479
  { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830 },
480
  ]
481
 
482
+ [[package]]
483
+ name = "executing"
484
+ version = "2.2.0"
485
+ source = { registry = "https://pypi.org/simple" }
486
+ sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693 }
487
+ wheels = [
488
+ { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
489
+ ]
490
+
491
  [[package]]
492
  name = "fake-http-header"
493
  version = "0.3.5"
 
855
  { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 },
856
  ]
857
 
858
+ [[package]]
859
+ name = "ipython"
860
+ version = "9.4.0"
861
+ source = { registry = "https://pypi.org/simple" }
862
+ dependencies = [
863
+ { name = "colorama", marker = "sys_platform == 'win32'" },
864
+ { name = "decorator" },
865
+ { name = "ipython-pygments-lexers" },
866
+ { name = "jedi" },
867
+ { name = "matplotlib-inline" },
868
+ { name = "pexpect", marker = "sys_platform != 'emscripten' and sys_platform != 'win32'" },
869
+ { name = "prompt-toolkit" },
870
+ { name = "pygments" },
871
+ { name = "stack-data" },
872
+ { name = "traitlets" },
873
+ ]
874
+ sdist = { url = "https://files.pythonhosted.org/packages/54/80/406f9e3bde1c1fd9bf5a0be9d090f8ae623e401b7670d8f6fdf2ab679891/ipython-9.4.0.tar.gz", hash = "sha256:c033c6d4e7914c3d9768aabe76bbe87ba1dc66a92a05db6bfa1125d81f2ee270", size = 4385338 }
875
+ wheels = [
876
+ { url = "https://files.pythonhosted.org/packages/63/f8/0031ee2b906a15a33d6bfc12dd09c3dfa966b3cb5b284ecfb7549e6ac3c4/ipython-9.4.0-py3-none-any.whl", hash = "sha256:25850f025a446d9b359e8d296ba175a36aedd32e83ca9b5060430fe16801f066", size = 611021 },
877
+ ]
878
+
879
+ [[package]]
880
+ name = "ipython-pygments-lexers"
881
+ version = "1.1.1"
882
+ source = { registry = "https://pypi.org/simple" }
883
+ dependencies = [
884
+ { name = "pygments" },
885
+ ]
886
+ sdist = { url = "https://files.pythonhosted.org/packages/ef/4c/5dd1d8af08107f88c7f741ead7a40854b8ac24ddf9ae850afbcf698aa552/ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81", size = 8393 }
887
+ wheels = [
888
+ { url = "https://files.pythonhosted.org/packages/d9/33/1f075bf72b0b747cb3288d011319aaf64083cf2efef8354174e3ed4540e2/ipython_pygments_lexers-1.1.1-py3-none-any.whl", hash = "sha256:a9462224a505ade19a605f71f8fa63c2048833ce50abc86768a0d81d876dc81c", size = 8074 },
889
+ ]
890
+
891
+ [[package]]
892
+ name = "jedi"
893
+ version = "0.19.2"
894
+ source = { registry = "https://pypi.org/simple" }
895
+ dependencies = [
896
+ { name = "parso" },
897
+ ]
898
+ sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287 }
899
+ wheels = [
900
+ { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278 },
901
+ ]
902
+
903
  [[package]]
904
  name = "jinja2"
905
  version = "3.1.6"
 
1158
  { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739 },
1159
  ]
1160
 
1161
+ [[package]]
1162
+ name = "matplotlib-inline"
1163
+ version = "0.1.7"
1164
+ source = { registry = "https://pypi.org/simple" }
1165
+ dependencies = [
1166
+ { name = "traitlets" },
1167
+ ]
1168
+ sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159 }
1169
+ wheels = [
1170
+ { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 },
1171
+ ]
1172
+
1173
  [[package]]
1174
  name = "mdurl"
1175
  version = "0.1.2"
 
1379
  { url = "https://files.pythonhosted.org/packages/39/c2/646d2e93e0af70f4e5359d870a63584dacbc324b54d73e6b3267920ff117/pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249", size = 13231847 },
1380
  ]
1381
 
1382
+ [[package]]
1383
+ name = "parso"
1384
+ version = "0.8.4"
1385
+ source = { registry = "https://pypi.org/simple" }
1386
+ sdist = { url = "https://files.pythonhosted.org/packages/66/94/68e2e17afaa9169cf6412ab0f28623903be73d1b32e208d9e8e541bb086d/parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d", size = 400609 }
1387
+ wheels = [
1388
+ { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650 },
1389
+ ]
1390
+
1391
+ [[package]]
1392
+ name = "pexpect"
1393
+ version = "4.9.0"
1394
+ source = { registry = "https://pypi.org/simple" }
1395
+ dependencies = [
1396
+ { name = "ptyprocess" },
1397
+ ]
1398
+ sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450 }
1399
+ wheels = [
1400
+ { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 },
1401
+ ]
1402
+
1403
  [[package]]
1404
  name = "pillow"
1405
  version = "10.4.0"
 
1461
  { url = "https://files.pythonhosted.org/packages/a4/71/188a50ea64c17f73ff4df5196ec1553a8f1723421eb2d1069c73bab47d78/postgrest-1.1.1-py3-none-any.whl", hash = "sha256:98a6035ee1d14288484bfe36235942c5fb2d26af6d8120dfe3efbe007859251a", size = 22366 },
1462
  ]
1463
 
1464
+ [[package]]
1465
+ name = "prompt-toolkit"
1466
+ version = "3.0.51"
1467
+ source = { registry = "https://pypi.org/simple" }
1468
+ dependencies = [
1469
+ { name = "wcwidth" },
1470
+ ]
1471
+ sdist = { url = "https://files.pythonhosted.org/packages/bb/6e/9d084c929dfe9e3bfe0c6a47e31f78a25c54627d64a66e884a8bf5474f1c/prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed", size = 428940 }
1472
+ wheels = [
1473
+ { url = "https://files.pythonhosted.org/packages/ce/4f/5249960887b1fbe561d9ff265496d170b55a735b76724f10ef19f9e40716/prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07", size = 387810 },
1474
+ ]
1475
+
1476
  [[package]]
1477
  name = "propcache"
1478
  version = "0.3.2"
 
1529
  { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 },
1530
  ]
1531
 
1532
+ [[package]]
1533
+ name = "ptyprocess"
1534
+ version = "0.7.0"
1535
+ source = { registry = "https://pypi.org/simple" }
1536
+ sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762 }
1537
+ wheels = [
1538
+ { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993 },
1539
+ ]
1540
+
1541
+ [[package]]
1542
+ name = "pure-eval"
1543
+ version = "0.2.3"
1544
+ source = { registry = "https://pypi.org/simple" }
1545
+ sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752 }
1546
+ wheels = [
1547
+ { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 },
1548
+ ]
1549
+
1550
  [[package]]
1551
  name = "pyarrow"
1552
  version = "20.0.0"
 
2155
  { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224 },
2156
  ]
2157
 
2158
+ [[package]]
2159
+ name = "stack-data"
2160
+ version = "0.6.3"
2161
+ source = { registry = "https://pypi.org/simple" }
2162
+ dependencies = [
2163
+ { name = "asttokens" },
2164
+ { name = "executing" },
2165
+ { name = "pure-eval" },
2166
+ ]
2167
+ sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707 }
2168
+ wheels = [
2169
+ { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 },
2170
+ ]
2171
+
2172
  [[package]]
2173
  name = "starlette"
2174
  version = "0.46.2"
 
2320
  { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 },
2321
  ]
2322
 
2323
+ [[package]]
2324
+ name = "traitlets"
2325
+ version = "5.14.3"
2326
+ source = { registry = "https://pypi.org/simple" }
2327
+ sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621 }
2328
+ wheels = [
2329
+ { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 },
2330
+ ]
2331
+
2332
  [[package]]
2333
  name = "transformers"
2334
  version = "4.53.0"
 
2417
  { url = "https://files.pythonhosted.org/packages/d2/e2/dc81b1bd1dcfe91735810265e9d26bc8ec5da45b4c0f6237e286819194c3/uvicorn-0.35.0-py3-none-any.whl", hash = "sha256:197535216b25ff9b785e29a0b79199f55222193d47f820816e7da751e9bc8d4a", size = 66406 },
2418
  ]
2419
 
2420
+ [[package]]
2421
+ name = "wcwidth"
2422
+ version = "0.2.13"
2423
+ source = { registry = "https://pypi.org/simple" }
2424
+ sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301 }
2425
+ wheels = [
2426
+ { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 },
2427
+ ]
2428
+
2429
  [[package]]
2430
  name = "websockets"
2431
  version = "15.0.1"