zaldivards commited on
Commit
df6c855
·
1 Parent(s): 10c9801

Fix tools bugs

Browse files
Files changed (3) hide show
  1. requirements.txt +11 -0
  2. tools.py +56 -3
  3. utils.py +1 -4
requirements.txt CHANGED
@@ -1,16 +1,21 @@
 
1
  aiofiles==24.1.0 ; python_version >= '3.8'
2
  annotated-types==0.7.0 ; python_version >= '3.8'
3
  anyio==4.9.0 ; python_version >= '3.9'
 
4
  boto3==1.38.23
5
  botocore==1.38.23 ; python_version >= '3.9'
6
  certifi==2025.4.26 ; python_version >= '3.6'
7
  charset-normalizer==3.4.2 ; python_version >= '3.7'
8
  click==8.2.1 ; python_version >= '3.10'
 
 
9
  et-xmlfile==2.0.0 ; python_version >= '3.8'
10
  fastapi==0.115.12 ; python_version >= '3.8'
11
  ffmpy==0.5.0 ; python_version >= '3.8' and python_version < '4.0'
12
  filelock==3.18.0 ; python_version >= '3.9'
13
  fsspec==2025.5.1 ; python_version >= '3.9'
 
14
  gradio==5.31.0
15
  gradio-client==1.10.1 ; python_version >= '3.10'
16
  groovy==0.1.2 ; python_version >= '3.10'
@@ -21,6 +26,7 @@ httpx==0.28.1 ; python_version >= '3.8'
21
  huggingface-hub==0.32.1 ; python_full_version >= '3.8.0'
22
  idna==3.10 ; python_version >= '3.6'
23
  jinja2==3.1.6 ; python_version >= '3.7'
 
24
  jmespath==1.0.1 ; python_version >= '3.7'
25
  jsonpatch==1.33 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
26
  jsonpointer==3.0.0 ; python_version >= '3.7'
@@ -30,16 +36,20 @@ langgraph-checkpoint==2.0.26 ; python_version >= '3.9'
30
  langgraph-prebuilt==0.2.1 ; python_version >= '3.9'
31
  langgraph-sdk==0.1.70 ; python_version >= '3.9'
32
  langsmith==0.3.42 ; python_version >= '3.9'
 
33
  markdown-it-py==3.0.0 ; python_version >= '3.8'
 
34
  markupsafe==3.0.2 ; python_version >= '3.9'
35
  mdurl==0.1.2 ; python_version >= '3.7'
36
  numpy==2.2.6 ; python_version >= '3.10'
 
37
  openpyxl==3.1.5
38
  orjson==3.10.18 ; python_version >= '3.9'
39
  ormsgpack==1.10.0 ; python_version >= '3.9'
40
  packaging==24.2 ; python_version >= '3.8'
41
  pandas==2.2.3 ; python_version >= '3.9'
42
  pillow==11.2.1 ; python_version >= '3.9'
 
43
  pydantic==2.11.5 ; python_version >= '3.9'
44
  pydantic-core==2.33.2 ; python_version >= '3.9'
45
  pydub==0.25.1
@@ -61,6 +71,7 @@ shellingham==1.5.4 ; python_version >= '3.7'
61
  six==1.17.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
62
  smolagents==1.16.1
63
  sniffio==1.3.1 ; python_version >= '3.7'
 
64
  starlette==0.46.2 ; sys_platform != 'emscripten'
65
  tenacity==9.1.2 ; python_version >= '3.9'
66
  tomlkit==0.13.2 ; python_version >= '3.8'
 
1
+ -i https://pypi.org/simple
2
  aiofiles==24.1.0 ; python_version >= '3.8'
3
  annotated-types==0.7.0 ; python_version >= '3.8'
4
  anyio==4.9.0 ; python_version >= '3.9'
5
+ beautifulsoup4==4.13.4 ; python_full_version >= '3.7.0'
6
  boto3==1.38.23
7
  botocore==1.38.23 ; python_version >= '3.9'
8
  certifi==2025.4.26 ; python_version >= '3.6'
9
  charset-normalizer==3.4.2 ; python_version >= '3.7'
10
  click==8.2.1 ; python_version >= '3.10'
11
+ distro==1.9.0 ; python_version >= '3.6'
12
+ duckduckgo-search==8.0.2
13
  et-xmlfile==2.0.0 ; python_version >= '3.8'
14
  fastapi==0.115.12 ; python_version >= '3.8'
15
  ffmpy==0.5.0 ; python_version >= '3.8' and python_version < '4.0'
16
  filelock==3.18.0 ; python_version >= '3.9'
17
  fsspec==2025.5.1 ; python_version >= '3.9'
18
+ googlesearch-python==1.3.0
19
  gradio==5.31.0
20
  gradio-client==1.10.1 ; python_version >= '3.10'
21
  groovy==0.1.2 ; python_version >= '3.10'
 
26
  huggingface-hub==0.32.1 ; python_full_version >= '3.8.0'
27
  idna==3.10 ; python_version >= '3.6'
28
  jinja2==3.1.6 ; python_version >= '3.7'
29
+ jiter==0.10.0 ; python_version >= '3.9'
30
  jmespath==1.0.1 ; python_version >= '3.7'
31
  jsonpatch==1.33 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'
32
  jsonpointer==3.0.0 ; python_version >= '3.7'
 
36
  langgraph-prebuilt==0.2.1 ; python_version >= '3.9'
37
  langgraph-sdk==0.1.70 ; python_version >= '3.9'
38
  langsmith==0.3.42 ; python_version >= '3.9'
39
+ lxml==5.4.0 ; python_version >= '3.6'
40
  markdown-it-py==3.0.0 ; python_version >= '3.8'
41
+ markdownify==1.1.0
42
  markupsafe==3.0.2 ; python_version >= '3.9'
43
  mdurl==0.1.2 ; python_version >= '3.7'
44
  numpy==2.2.6 ; python_version >= '3.10'
45
+ openai==1.82.0
46
  openpyxl==3.1.5
47
  orjson==3.10.18 ; python_version >= '3.9'
48
  ormsgpack==1.10.0 ; python_version >= '3.9'
49
  packaging==24.2 ; python_version >= '3.8'
50
  pandas==2.2.3 ; python_version >= '3.9'
51
  pillow==11.2.1 ; python_version >= '3.9'
52
+ primp==0.15.0 ; python_version >= '3.8'
53
  pydantic==2.11.5 ; python_version >= '3.9'
54
  pydantic-core==2.33.2 ; python_version >= '3.9'
55
  pydub==0.25.1
 
71
  six==1.17.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
72
  smolagents==1.16.1
73
  sniffio==1.3.1 ; python_version >= '3.7'
74
+ soupsieve==2.7 ; python_version >= '3.8'
75
  starlette==0.46.2 ; sys_platform != 'emscripten'
76
  tenacity==9.1.2 ; python_version >= '3.9'
77
  tomlkit==0.13.2 ; python_version >= '3.8'
tools.py CHANGED
@@ -8,8 +8,13 @@ from uuid import uuid4
8
 
9
  import boto3
10
  import fitz
 
 
 
11
  from pandas import read_excel
12
  from smolagents import tool, Tool
 
 
13
 
14
  from definitions import TranscriptionJob
15
  from utils import get_file, s3_upload_file, s3_download_file, bedrock_runtime, BEDROCK_MODEL_ID
@@ -45,6 +50,7 @@ def excel_reader(task_id: str, file_name: str) -> str:
45
  return f"Error reading Excel file {file_name}: {e}"
46
 
47
 
 
48
  def txt_reader(task_id: str, file_name: str) -> str:
49
  """Reads a text file and returns its content as a string.
50
 
@@ -59,6 +65,7 @@ def txt_reader(task_id: str, file_name: str) -> str:
59
  return f"Error reading file {file_name}: {e}"
60
 
61
 
 
62
  def pdf_reader(task_id: str, file_name: str) -> str:
63
  """Reads a PDF file and returns its content as a string.
64
 
@@ -91,6 +98,7 @@ class AudioTranscriber(Tool): # pylint: disable=C0115
91
  "description": "The name of the audio file to transcribe.",
92
  },
93
  }
 
94
 
95
  def __init__(self, *args, **kwargs):
96
  super().__init__(*args, **kwargs)
@@ -151,7 +159,7 @@ def image_transcriber(text_prompt: str, task_id: str, file_name: str) -> str:
151
  """
152
  try:
153
  file_content = get_file(task_id)
154
- base64_image = base64.b64encode(file_content).decode("utf-8")
155
  response = bedrock_runtime.invoke_model(
156
  modelId=BEDROCK_MODEL_ID,
157
  body=json.dumps(
@@ -176,7 +184,52 @@ def image_transcriber(text_prompt: str, task_id: str, file_name: str) -> str:
176
  ],
177
  }
178
  ),
179
- ).read()
180
- return json.loads(response)["message"]["content"][0]["text"]
181
  except Exception as e:
182
  return f"Error processing image file {file_name}: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  import boto3
10
  import fitz
11
+ import requests
12
+ from bs4 import BeautifulSoup
13
+ from googlesearch import search
14
  from pandas import read_excel
15
  from smolagents import tool, Tool
16
+ from requests.exceptions import HTTPError
17
+ from urllib3.exceptions import ReadTimeoutError
18
 
19
  from definitions import TranscriptionJob
20
  from utils import get_file, s3_upload_file, s3_download_file, bedrock_runtime, BEDROCK_MODEL_ID
 
50
  return f"Error reading Excel file {file_name}: {e}"
51
 
52
 
53
+ @tool
54
  def txt_reader(task_id: str, file_name: str) -> str:
55
  """Reads a text file and returns its content as a string.
56
 
 
65
  return f"Error reading file {file_name}: {e}"
66
 
67
 
68
+ @tool
69
  def pdf_reader(task_id: str, file_name: str) -> str:
70
  """Reads a PDF file and returns its content as a string.
71
 
 
98
  "description": "The name of the audio file to transcribe.",
99
  },
100
  }
101
+ output_type = "string"
102
 
103
  def __init__(self, *args, **kwargs):
104
  super().__init__(*args, **kwargs)
 
159
  """
160
  try:
161
  file_content = get_file(task_id)
162
+ base64_image = base64.b64encode(file_content.getvalue()).decode("utf-8")
163
  response = bedrock_runtime.invoke_model(
164
  modelId=BEDROCK_MODEL_ID,
165
  body=json.dumps(
 
184
  ],
185
  }
186
  ),
187
+ )["body"].read()
188
+ return json.loads(response)["content"][0]["text"]
189
  except Exception as e:
190
  return f"Error processing image file {file_name}: {e}"
191
+
192
+
193
+ def _get_content(url: str, timeout: int = 5) -> bytes:
194
+ resp = requests.get(url=url, timeout=timeout)
195
+ resp.raise_for_status()
196
+ return resp.content
197
+
198
+
199
+ def _js_disable_message(text: str) -> bool:
200
+ return "JavaScript is disabled in this browser" in text
201
+
202
+
203
+ @tool
204
+ def search_engine(search_term: str) -> str:
205
+ """Search for the provided search term in Google Search
206
+
207
+ Args:
208
+ search_term (str): The term to search for on the web.
209
+ """
210
+ results = search(search_term, num_results=5)
211
+ for idx, url in enumerate(results, 1):
212
+ error_ocurred = False
213
+ try:
214
+ html_content = BeautifulSoup(_get_content(url), "html.parser")
215
+ # Remove headers and footers
216
+ for tag in html_content.find_all(["header", "footer", "nav", "aside"]):
217
+ tag.decompose()
218
+ except (ReadTimeoutError, HTTPError) as ex:
219
+ print("Got HTTP error when requesting %s. Error %s", url, ex)
220
+ error_ocurred = True
221
+
222
+ html_text = html_content.text
223
+ if _js_disable_message(html_text):
224
+ error_ocurred = True
225
+
226
+ if error_ocurred:
227
+ # if the last URL is not successfully requested, return a direct response as if it was the final answer
228
+ if len(results) == idx:
229
+ return "Sorry, got an HTTP error when requesting the internet"
230
+ # if there are more URLs to request, continue
231
+ continue
232
+
233
+ return html_text.replace("\n", "")
234
+
235
+ return "Could not retrieve any content from the search results."
utils.py CHANGED
@@ -3,12 +3,9 @@ from io import BytesIO
3
 
4
  import boto3
5
  import requests
6
- from dotenv import load_dotenv
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
- BEDROCK_MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"
10
-
11
- load_dotenv()
12
 
13
  bedrock_runtime = boto3.client("bedrock-runtime", region_name=os.getenv("AWS_REGION"))
14
 
 
3
 
4
  import boto3
5
  import requests
 
6
 
7
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
+ BEDROCK_MODEL_ID = "anthropic.claude-3-5-sonnet-20240620-v1:0"
 
 
9
 
10
  bedrock_runtime = boto3.client("bedrock-runtime", region_name=os.getenv("AWS_REGION"))
11