first transition to clean tools
Browse files- .gitignore +0 -1
- agent.py +14 -2
- requirements.txt +5 -4
- tools/csv_reader.py +21 -0
- tools/excel_reader.py +21 -0
- tools/file_downloader.py +24 -0
- tools/img_txt_extractor.py +24 -0
- tools/pdf_reader.py +19 -0
.gitignore
CHANGED
|
@@ -1,3 +1,2 @@
|
|
| 1 |
.env
|
| 2 |
-
pg.py
|
| 3 |
*.json
|
|
|
|
| 1 |
.env
|
|
|
|
| 2 |
*.json
|
agent.py
CHANGED
|
@@ -1,6 +1,11 @@
|
|
| 1 |
import os
|
| 2 |
-
from smolagents import CodeAgent, tool, DuckDuckGoSearchTool, OpenAIServerModel, VisitWebpageTool,
|
| 3 |
from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
@tool
|
| 6 |
def add(a:int, b:int) -> int:
|
|
@@ -113,6 +118,13 @@ def arvix_search(query: str) -> str:
|
|
| 113 |
def get_agent() -> CodeAgent:
|
| 114 |
search_tool = DuckDuckGoSearchTool()
|
| 115 |
web_page_tool = VisitWebpageTool()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
api_key = os.getenv('CODESTRAL_API_KEY')
|
| 118 |
|
|
@@ -121,4 +133,4 @@ def get_agent() -> CodeAgent:
|
|
| 121 |
api_base="https://codestral.mistral.ai/v1/",
|
| 122 |
api_key=api_key)
|
| 123 |
|
| 124 |
-
return CodeAgent(tools=[add, subtract, multiply, divide, modulus, rounder, search_tool, web_page_tool, wiki_search, arvix_search], model=model)
|
|
|
|
| 1 |
import os
|
| 2 |
+
from smolagents import CodeAgent, tool, DuckDuckGoSearchTool, OpenAIServerModel, VisitWebpageTool, PythonInterpreterTool
|
| 3 |
from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
|
| 4 |
+
from tools.csv_reader import CsvReaderTool
|
| 5 |
+
from tools.excel_reader import ExcelReaderTool
|
| 6 |
+
from tools.file_downloader import FileDownloaderTool
|
| 7 |
+
from tools.img_txt_extractor import ImageTextExtractorTool
|
| 8 |
+
from tools.pdf_reader import PdfReaderTool
|
| 9 |
|
| 10 |
@tool
|
| 11 |
def add(a:int, b:int) -> int:
|
|
|
|
| 118 |
def get_agent() -> CodeAgent:
|
| 119 |
search_tool = DuckDuckGoSearchTool()
|
| 120 |
web_page_tool = VisitWebpageTool()
|
| 121 |
+
py_interpreter = PythonInterpreterTool()
|
| 122 |
+
|
| 123 |
+
csv_reader = CsvReaderTool()
|
| 124 |
+
excel_reader = ExcelReaderTool()
|
| 125 |
+
file_downloader = FileDownloaderTool()
|
| 126 |
+
img_txt_extractor = ImageTextExtractorTool()
|
| 127 |
+
pdf_reader = PdfReaderTool()
|
| 128 |
|
| 129 |
api_key = os.getenv('CODESTRAL_API_KEY')
|
| 130 |
|
|
|
|
| 133 |
api_base="https://codestral.mistral.ai/v1/",
|
| 134 |
api_key=api_key)
|
| 135 |
|
| 136 |
+
return CodeAgent(tools=[add, subtract, multiply, divide, modulus, rounder, search_tool, web_page_tool, py_interpreter, wiki_search, arvix_search, csv_reader, excel_reader, file_downloader, img_txt_extractor, pdf_reader], model=model)
|
requirements.txt
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
gradio
|
| 2 |
requests
|
|
|
|
|
|
|
| 3 |
smolagents[openai]
|
| 4 |
-
langchain_community
|
| 5 |
-
wikipedia
|
| 6 |
duckduckgo_search
|
| 7 |
requests
|
| 8 |
markdownify
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
| 1 |
gradio
|
| 2 |
requests
|
| 3 |
+
pandas
|
| 4 |
+
smolagents
|
| 5 |
smolagents[openai]
|
|
|
|
|
|
|
| 6 |
duckduckgo_search
|
| 7 |
requests
|
| 8 |
markdownify
|
| 9 |
+
openpyxl
|
| 10 |
+
easyocr
|
| 11 |
+
pdfminer.six
|
tools/csv_reader.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from smolagents import Tool
|
| 3 |
+
|
| 4 |
+
class CsvReaderTool(Tool):
|
| 5 |
+
name = "csv_reader"
|
| 6 |
+
description = "Extract CSV file content. Supported file extensions: .csv"
|
| 7 |
+
inputs = {
|
| 8 |
+
"file_path": {
|
| 9 |
+
"type": "string",
|
| 10 |
+
"description": "Path to the CSV file",
|
| 11 |
+
}
|
| 12 |
+
}
|
| 13 |
+
output_type = "string"
|
| 14 |
+
|
| 15 |
+
def forward(self, file_path) -> str:
|
| 16 |
+
try:
|
| 17 |
+
df = pd.read_csv(file_path)
|
| 18 |
+
print(f"Describe CSV file:\n {df.describe()}")
|
| 19 |
+
return df.to_json()
|
| 20 |
+
except Exception as e:
|
| 21 |
+
return f"Error processing CSV file: {str(e)}"
|
tools/excel_reader.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from smolagents import Tool
|
| 3 |
+
|
| 4 |
+
class ExcelReaderTool(Tool):
|
| 5 |
+
name = "excel_reader"
|
| 6 |
+
description = "Extract Excel file content. Supported file extensions: .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, .odt"
|
| 7 |
+
inputs = {
|
| 8 |
+
"file_path": {
|
| 9 |
+
"type": "string",
|
| 10 |
+
"description": "Path to the Excel file",
|
| 11 |
+
}
|
| 12 |
+
}
|
| 13 |
+
output_type = "string"
|
| 14 |
+
|
| 15 |
+
def forward(self, file_path) -> str:
|
| 16 |
+
try:
|
| 17 |
+
df = pd.read_excel(file_path)
|
| 18 |
+
print(f"Describe Excel file:\n {df.describe()}")
|
| 19 |
+
return df.to_json()
|
| 20 |
+
except Exception as e:
|
| 21 |
+
return f"Error processing Excel file: {str(e)}"
|
tools/file_downloader.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import urllib.request
|
| 2 |
+
from smolagents import Tool
|
| 3 |
+
|
| 4 |
+
class FileDownloaderTool(Tool):
|
| 5 |
+
name = "file_downloader"
|
| 6 |
+
description = "Download a file from Internet by URL provided, save it into temp dir and return file path"
|
| 7 |
+
inputs = {
|
| 8 |
+
"url": {
|
| 9 |
+
"type": "string",
|
| 10 |
+
"description": "URL to download from",
|
| 11 |
+
}
|
| 12 |
+
}
|
| 13 |
+
output_type = "string"
|
| 14 |
+
|
| 15 |
+
def forward(self, url: str) -> str:
|
| 16 |
+
file_path = None
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
result = urllib.request.urlretrieve(url)
|
| 20 |
+
file_path = result[0]
|
| 21 |
+
except Exception as e:
|
| 22 |
+
print(f"Error downloading file: {str(e)}")
|
| 23 |
+
|
| 24 |
+
return file_path
|
tools/img_txt_extractor.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import Tool
|
| 2 |
+
import easyocr
|
| 3 |
+
|
| 4 |
+
class ImageTextExtractorTool(Tool):
|
| 5 |
+
name = "img_txt_extractor"
|
| 6 |
+
description = """
|
| 7 |
+
Multilingual OCR tool to extract key information or presented text from any image.
|
| 8 |
+
Supported image extensions: .png, .jpg, .jpeg, .bmp, .svg.
|
| 9 |
+
"""
|
| 10 |
+
inputs = {
|
| 11 |
+
"image_path": {
|
| 12 |
+
"type": "string",
|
| 13 |
+
"description": "The path to the image file",
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
output_type = "array"
|
| 17 |
+
|
| 18 |
+
def __init__(self):
|
| 19 |
+
super().__init__()
|
| 20 |
+
self.reader = easyocr.Reader(['ch_sim', 'en'])
|
| 21 |
+
|
| 22 |
+
def forward(self, image_path: str) -> list[str]:
|
| 23 |
+
result = self.reader.readtext(image_path, detail=False)
|
| 24 |
+
return result
|
tools/pdf_reader.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pdfminer.high_level import extract_text
|
| 2 |
+
from smolagents import Tool
|
| 3 |
+
|
| 4 |
+
class PdfReaderTool(Tool):
|
| 5 |
+
name = "pdf_reader"
|
| 6 |
+
description = "Extract PDF content. Supported file extensions: .pdf"
|
| 7 |
+
inputs = {
|
| 8 |
+
"file_path": {
|
| 9 |
+
"type": "string",
|
| 10 |
+
"description": "Path to the PDF file",
|
| 11 |
+
}
|
| 12 |
+
}
|
| 13 |
+
output_type = "string"
|
| 14 |
+
|
| 15 |
+
def forward(self, file_path) -> str:
|
| 16 |
+
try:
|
| 17 |
+
return extract_text(file_path)
|
| 18 |
+
except Exception as e:
|
| 19 |
+
return f"Error processing PDF file: {str(e)}"
|