Nioi commited on
Commit
726d2d9
·
1 Parent(s): bbf20c6
.gitignore CHANGED
@@ -1,2 +1,6 @@
1
  .env
2
- *.json
 
 
 
 
 
1
  .env
2
+ *.json
3
+ TODO.md
4
+ .venv
5
+ test.py
6
+ __pycache__
agent.py CHANGED
@@ -1,132 +1,13 @@
1
  import os
2
- from smolagents import CodeAgent, tool, DuckDuckGoSearchTool, OpenAIServerModel, VisitWebpageTool, PythonInterpreterTool, SpeechToTextTool
3
- from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
4
- from tools.csv_reader import CsvReaderTool
5
- from tools.excel_reader import ExcelReaderTool
6
- from tools.file_downloader import FileDownloaderTool
7
- from tools.img_txt_extractor import ImageTextExtractorTool
8
- from tools.pdf_reader import PdfReaderTool
9
-
10
- @tool
11
- def add(a:int, b:int) -> int:
12
- """
13
- This tool returns the sum of two numbers.
14
-
15
- Args:
16
- a: first number
17
- b: second number
18
- """
19
-
20
- return a+b
21
-
22
-
23
- @tool
24
- def subtract(a:int, b:int) -> int:
25
- """
26
- This tool returns the difference between two numbers.
27
-
28
- Args:
29
- a: first number
30
- b: second number
31
- """
32
-
33
- return a-b
34
-
35
-
36
- @tool
37
- def multiply(a:int, b:int) -> int:
38
- """
39
- This tool multiplies two numbers.
40
-
41
- Args:
42
- a: first number
43
- b: second number
44
- """
45
-
46
- return a*b
47
-
48
-
49
- @tool
50
- def divide(a:int, b:int) -> float:
51
- """
52
- This tool divides two numbers.
53
-
54
- Args:
55
- a: first number
56
- b: second number
57
- """
58
-
59
- if b==0: raise ValueError('Cannot divide by zero')
60
- return a/b
61
-
62
-
63
- @tool
64
- def modulus(a:int, b:int) -> int:
65
- """
66
- This tool returns the modulus of two numbers.
67
-
68
- Args:
69
- a: first number
70
- b: second number
71
- """
72
-
73
- return a%b
74
-
75
-
76
- @tool
77
- def rounder(a:float, n:int) -> float:
78
- """
79
- This tool return a number rounded to a certain number of decimals.
80
-
81
- Args:
82
- a: number to be rounded
83
- n: number of decimals to use when rounding the number
84
- """
85
-
86
- return round(a,n)
87
-
88
- @tool
89
- def wiki_search(query: str) -> str:
90
- """Search Wikipedia for a query and return maximum 2 results.
91
-
92
- Args:
93
- query: The search query."""
94
- search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
95
- formatted_search_docs = "\n\n---\n\n".join(
96
- [
97
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
98
- for doc in search_docs
99
- ])
100
- return {"wiki_results": formatted_search_docs}
101
-
102
-
103
- @tool
104
- def arvix_search(query: str) -> str:
105
- """Search Arxiv for a query and return maximum 3 result.
106
-
107
- Args:
108
- query: The search query."""
109
- search_docs = ArxivLoader(query=query, load_max_docs=3).load()
110
- formatted_search_docs = "\n\n---\n\n".join(
111
- [
112
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
113
- for doc in search_docs
114
- ])
115
- return {"arvix_results": formatted_search_docs}
116
-
117
 
118
  def get_agent() -> CodeAgent:
119
  search_tool = DuckDuckGoSearchTool(max_results=5)
120
  web_page_tool = VisitWebpageTool(max_output_length=1_000_000)
121
- py_interpreter = PythonInterpreterTool()
122
-
123
- csv_reader = CsvReaderTool()
124
- excel_reader = ExcelReaderTool()
125
- file_downloader = FileDownloaderTool()
126
- img_txt_extractor = ImageTextExtractorTool()
127
- pdf_reader = PdfReaderTool()
128
-
129
- speech_txt = SpeechToTextTool()
130
 
131
  api_key = os.getenv('CODESTRAL_API_KEY')
132
 
@@ -135,4 +16,4 @@ def get_agent() -> CodeAgent:
135
  api_base="https://codestral.mistral.ai/v1/",
136
  api_key=api_key)
137
 
138
- return CodeAgent(tools=[add, subtract, multiply, divide, modulus, rounder, search_tool, web_page_tool, py_interpreter, wiki_search, arvix_search, csv_reader, excel_reader, file_downloader, img_txt_extractor, pdf_reader, speech_txt], model=model)
 
1
  import os
2
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel, VisitWebpageTool
3
+ from tools.attached_files import csv_reader, download_file_from_url, excel_reader
4
+ from tools.basic_math import *
5
+ from tools.browser import arvix_search, wiki_search
6
+ from tools.media import transcribe_audio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def get_agent() -> CodeAgent:
9
  search_tool = DuckDuckGoSearchTool(max_results=5)
10
  web_page_tool = VisitWebpageTool(max_output_length=1_000_000)
 
 
 
 
 
 
 
 
 
11
 
12
  api_key = os.getenv('CODESTRAL_API_KEY')
13
 
 
16
  api_base="https://codestral.mistral.ai/v1/",
17
  api_key=api_key)
18
 
19
+ return CodeAgent(tools=[search_tool, web_page_tool, add, subtract, multiply, divide, modulus, rounder, power, square_root, download_file_from_url, csv_reader, excel_reader, transcribe_audio, wiki_search, arvix_search], model=model, planning_interval=None, additional_authorized_imports=['random', 'time'], verbosity_level=1, max_steps=5, add_base_tools=True)
app.py CHANGED
@@ -72,7 +72,7 @@ def run_random_question(profile: gr.OAuthProfile | None):
72
  return "Random question is missing task_id or question", None
73
  try:
74
  with open('system_prompt.txt') as f:
75
- submitted_answer = agent(f.readline()+"\n\n"+question_text)
76
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
77
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
78
  except Exception as e:
 
72
  return "Random question is missing task_id or question", None
73
  try:
74
  with open('system_prompt.txt') as f:
75
+ submitted_answer = agent(f.read()+"\n\n"+question_text)
76
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
77
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
78
  except Exception as e:
requirements.txt CHANGED
@@ -2,15 +2,14 @@ gradio
2
  requests
3
  pandas
4
  smolagents
 
5
  smolagents[openai]
6
  smolagents[transformers]
7
- smolagents[audio]
8
  langchain_community
9
  wikipedia
10
  arxiv
11
  pymupdf
12
  duckduckgo_search
13
- requests
14
  markdownify
15
  openpyxl
16
  easyocr
 
2
  requests
3
  pandas
4
  smolagents
5
+ smolagents[toolkit]
6
  smolagents[openai]
7
  smolagents[transformers]
 
8
  langchain_community
9
  wikipedia
10
  arxiv
11
  pymupdf
12
  duckduckgo_search
 
13
  markdownify
14
  openpyxl
15
  easyocr
system_prompt.txt CHANGED
@@ -1 +1,5 @@
1
- You are a helpful assistant tasked with answering questions using a set of tools. Now, I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
 
 
 
 
 
1
+ You are a helpful assistant tasked with answering questions using a set of tools.
2
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
3
+ FINAL ANSWER: [YOUR FINAL ANSWER].
4
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, Apply the rules above for each element (number or string), ensure there is exactly one space after each comma.
5
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
tools/attached_files.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool
2
+ import urllib
3
+ import pandas as pd
4
+
5
+ @tool
6
+ def download_file_from_url(url: str) -> str:
7
+ """
8
+ Download a file from a URL and save it to a temporary location.
9
+ Args:
10
+ url: the URL of the file to download.
11
+ """
12
+ file_path = None
13
+
14
+ try:
15
+ result = urllib.request.urlretrieve(url)
16
+ file_path = result[0]
17
+ except Exception as e:
18
+ return f"Error downloading file: {str(e)}"
19
+
20
+ return file_path
21
+
22
+ @tool
23
+ def csv_reader(file_path: str) -> str:
24
+ """
25
+ Extract CSV file content and return it in a json format. Supported file extensions: .csv
26
+ Args:
27
+ file_path: the path to the CSV file.
28
+ """
29
+
30
+ try:
31
+ df = pd.read_csv(file_path)
32
+ return df.to_json()
33
+
34
+ except Exception as e:
35
+ return f"Error analyzing CSV file: {str(e)}"
36
+
37
+ @tool
38
+ def excel_reader(file_path: str) -> str:
39
+ """
40
+ Extract Excel file content and return it in a json format. Supported file extensions: .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, .odt
41
+ Args:
42
+ file_path: the path to the Excel file.
43
+ """
44
+
45
+ try:
46
+ df = pd.read_excel(file_path)
47
+ return df.to_json()
48
+
49
+ except Exception as e:
50
+ return f"Error analyzing Excel file: {str(e)}"
tools/basic_math.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool
2
+ import cmath
3
+
4
+ @tool
5
+ def add(a:int, b:int) -> int:
6
+ """
7
+ This tool returns the sum of two numbers.
8
+
9
+ Args:
10
+ a: first number
11
+ b: second number
12
+ """
13
+
14
+ return a+b
15
+
16
+
17
+ @tool
18
+ def subtract(a:int, b:int) -> int:
19
+ """
20
+ This tool returns the difference between two numbers.
21
+
22
+ Args:
23
+ a: first number
24
+ b: second number
25
+ """
26
+
27
+ return a-b
28
+
29
+
30
+ @tool
31
+ def multiply(a:int, b:int) -> int:
32
+ """
33
+ This tool multiplies two numbers.
34
+
35
+ Args:
36
+ a: first number
37
+ b: second number
38
+ """
39
+
40
+ return a*b
41
+
42
+
43
+ @tool
44
+ def divide(a:int, b:int) -> float:
45
+ """
46
+ This tool divides two numbers.
47
+
48
+ Args:
49
+ a: first number
50
+ b: second number
51
+ """
52
+
53
+ if b==0: raise ValueError('Cannot divide by zero')
54
+ return a/b
55
+
56
+
57
+ @tool
58
+ def modulus(a:int, b:int) -> int:
59
+ """
60
+ This tool returns the modulus of two numbers.
61
+
62
+ Args:
63
+ a: first number
64
+ b: second number
65
+ """
66
+
67
+ return a%b
68
+
69
+
70
+ @tool
71
+ def rounder(a:float, n:int) -> float:
72
+ """
73
+ This tool return a number rounded to a certain number of decimals.
74
+
75
+ Args:
76
+ a: number to be rounded
77
+ n: number of decimals to use when rounding the number
78
+ """
79
+
80
+ return round(a,n)
81
+
82
+
83
+ @tool
84
+ def power(a: float, b: float) -> float:
85
+ """
86
+ Get the power of two numbers.
87
+ Args:
88
+ a: the first number
89
+ b: the second number
90
+ """
91
+
92
+ return a**b
93
+
94
+
95
+ @tool
96
+ def square_root(a: float) -> float | complex:
97
+ """
98
+ Get the square root of a number.
99
+ Args:
100
+ a: the number to get the square root of
101
+ """
102
+
103
+ if a >= 0:
104
+ return a**0.5
105
+ return cmath.sqrt(a)
tools/browser.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool
2
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
3
+
4
+ @tool
5
+ def wiki_search(query: str) -> str:
6
+ """Search Wikipedia for a query and return maximum 2 results.
7
+
8
+ Args:
9
+ query: The search query."""
10
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
11
+ formatted_search_docs = "\n\n---\n\n".join(
12
+ [
13
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
14
+ for doc in search_docs
15
+ ])
16
+ return {"wiki_results": formatted_search_docs}
17
+
18
+
19
+ @tool
20
+ def arvix_search(query: str) -> str:
21
+ """Search Arxiv for a query and return maximum 3 result.
22
+
23
+ Args:
24
+ query: The search query."""
25
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
26
+ formatted_search_docs = "\n\n---\n\n".join(
27
+ [
28
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
29
+ for doc in search_docs
30
+ ])
31
+ return {"arvix_results": formatted_search_docs}
tools/csv_reader.py DELETED
@@ -1,21 +0,0 @@
1
- import pandas as pd
2
- from smolagents import Tool
3
-
4
- class CsvReaderTool(Tool):
5
- name = "csv_reader"
6
- description = "Extract CSV file content. Supported file extensions: .csv"
7
- inputs = {
8
- "file_path": {
9
- "type": "string",
10
- "description": "Path to the CSV file",
11
- }
12
- }
13
- output_type = "string"
14
-
15
- def forward(self, file_path) -> str:
16
- try:
17
- df = pd.read_csv(file_path)
18
- print(f"Describe CSV file:\n {df.describe()}")
19
- return df.to_json()
20
- except Exception as e:
21
- return f"Error processing CSV file: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/excel_reader.py DELETED
@@ -1,21 +0,0 @@
1
- import pandas as pd
2
- from smolagents import Tool
3
-
4
- class ExcelReaderTool(Tool):
5
- name = "excel_reader"
6
- description = "Extract Excel file content. Supported file extensions: .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, .odt"
7
- inputs = {
8
- "file_path": {
9
- "type": "string",
10
- "description": "Path to the Excel file",
11
- }
12
- }
13
- output_type = "string"
14
-
15
- def forward(self, file_path) -> str:
16
- try:
17
- df = pd.read_excel(file_path)
18
- print(f"Describe Excel file:\n {df.describe()}")
19
- return df.to_json()
20
- except Exception as e:
21
- return f"Error processing Excel file: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/file_downloader.py DELETED
@@ -1,24 +0,0 @@
1
- import urllib.request
2
- from smolagents import Tool
3
-
4
- class FileDownloaderTool(Tool):
5
- name = "file_downloader"
6
- description = "Download a file from Internet by URL provided, save it into temp dir and return file path"
7
- inputs = {
8
- "url": {
9
- "type": "string",
10
- "description": "URL to download from",
11
- }
12
- }
13
- output_type = "string"
14
-
15
- def forward(self, url: str) -> str:
16
- file_path = None
17
-
18
- try:
19
- result = urllib.request.urlretrieve(url)
20
- file_path = result[0]
21
- except Exception as e:
22
- print(f"Error downloading file: {str(e)}")
23
-
24
- return file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/img_txt_extractor.py DELETED
@@ -1,24 +0,0 @@
1
- from smolagents import Tool
2
- import easyocr
3
-
4
- class ImageTextExtractorTool(Tool):
5
- name = "img_txt_extractor"
6
- description = """
7
- Multilingual OCR tool to extract key information or presented text from any image.
8
- Supported image extensions: .png, .jpg, .jpeg, .bmp, .svg.
9
- """
10
- inputs = {
11
- "image_path": {
12
- "type": "string",
13
- "description": "The path to the image file",
14
- }
15
- }
16
- output_type = "array"
17
-
18
- def __init__(self):
19
- super().__init__()
20
- self.reader = easyocr.Reader(['ch_sim', 'en'])
21
-
22
- def forward(self, image_path: str) -> list[str]:
23
- result = self.reader.readtext(image_path, detail=False)
24
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/media.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool
2
+ import whisper
3
+ import requests
4
+ from board_to_fen.predict import get_fen_from_image_path
5
+
6
+ STOCKFISH_API_URL = "https://stockfish.online/api/s/v2.php"
7
+ MODEL = whisper.load_model("tiny")
8
+
9
+ @tool
10
+ def transcribe_audio(file_path: str) -> str:
11
+ """
12
+ Extract MP3 file content and return it as text. Supported file extensions: .mp3
13
+ Args:
14
+ file_path: the path to the mp3 file.
15
+ """
16
+
17
+ result = None
18
+
19
+ try:
20
+ result = MODEL.transcribe(file_path)
21
+ except Exception as e:
22
+ return f"Error transcribing file: {str(e)}"
23
+
24
+ return result['text']
25
+
26
+ #@tool
27
+ def get_fen(file_path:str) -> str:
28
+ """
29
+ Extract Chess Board Image file content and return a string representing the board in FEN notation. Supported file extensions: .png
30
+ Args:
31
+ file_path: the path to the chess board image file.
32
+ """
33
+ fen = None
34
+
35
+ try:
36
+ fen = get_fen_from_image_path(file_path)
37
+ except Exception as e:
38
+ return f"Error decoding image file: {str(e)}"
39
+
40
+ return fen
41
+
42
+ #@tool
43
+ def get_best_chess_move(fen: str) -> str:
44
+ """
45
+ Return the best chess move provided the FEN notation of the board.
46
+ Args:
47
+ fen: FEN string to analyze.
48
+ """
49
+
50
+ data = None
51
+
52
+ try:
53
+ response = requests.get(STOCKFISH_API_URL, {"fen":fen, "depth":8})
54
+ response.raise_for_status()
55
+ data = response.json()
56
+
57
+ except Exception as e:
58
+ return f"Error fetching best move: {str(e)}"
59
+
60
+ return data.get('bestmove').split(' ')[1].strip()
tools/pdf_reader.py DELETED
@@ -1,19 +0,0 @@
1
- from pdfminer.high_level import extract_text
2
- from smolagents import Tool
3
-
4
- class PdfReaderTool(Tool):
5
- name = "pdf_reader"
6
- description = "Extract PDF content. Supported file extensions: .pdf"
7
- inputs = {
8
- "file_path": {
9
- "type": "string",
10
- "description": "Path to the PDF file",
11
- }
12
- }
13
- output_type = "string"
14
-
15
- def forward(self, file_path) -> str:
16
- try:
17
- return extract_text(file_path)
18
- except Exception as e:
19
- return f"Error processing PDF file: {str(e)}"