zaldivards commited on
Commit
98e87f9
·
1 Parent(s): d1b9537

add image transcriber function

Browse files
Files changed (3) hide show
  1. agent.py +2 -1
  2. tools.py +47 -4
  3. utils.py +2 -7
agent.py CHANGED
@@ -1,6 +1,7 @@
1
  from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, VisitWebpageTool, AmazonBedrockServerModel
2
 
3
  from tools import AudioTranscriber, txt_reader, pdf_reader, excel_reader, math_calculator
 
4
 
5
  MAIN_PROMPT = """
6
  You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
@@ -20,7 +21,7 @@ class MainAgent:
20
  """Main agent that orchestrates file handling and web search tasks."""
21
 
22
  def __init__(self):
23
- model = AmazonBedrockServerModel(model_id="anthropic.claude-3-5-sonnet-20241022-v2:0")
24
  file_handler_agent = ToolCallingAgent(
25
  name="FileHandlerAgent",
26
  description="Handles file-related tasks such as reading Excel, text, PDF files, etc.",
 
1
  from smolagents import ToolCallingAgent, DuckDuckGoSearchTool, VisitWebpageTool, AmazonBedrockServerModel
2
 
3
  from tools import AudioTranscriber, txt_reader, pdf_reader, excel_reader, math_calculator
4
+ from utils import BEDROCK_MODEL_ID
5
 
6
  MAIN_PROMPT = """
7
  You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
 
21
  """Main agent that orchestrates file handling and web search tasks."""
22
 
23
  def __init__(self):
24
+ model = AmazonBedrockServerModel(model_id=BEDROCK_MODEL_ID)
25
  file_handler_agent = ToolCallingAgent(
26
  name="FileHandlerAgent",
27
  description="Handles file-related tasks such as reading Excel, text, PDF files, etc.",
tools.py CHANGED
@@ -2,6 +2,7 @@
2
  import ast
3
  import json
4
  import os
 
5
  from time import sleep
6
  from uuid import uuid4
7
 
@@ -11,18 +12,18 @@ from pandas import read_excel
11
  from smolagents import tool, Tool
12
 
13
  from definitions import TranscriptionJob
14
- from utils import get_file, s3_upload_file, s3_download_file
15
 
16
 
17
  @tool
18
- def math_calculator(query: str) -> str:
19
  """A simple calculator tool that evaluates mathematical expressions.
20
 
21
  Args:
22
- query (str): A mathematical expression as a string, e.g., "2 + 2 * 3".
23
  """
24
  try:
25
- result = ast.literal_eval(query)
26
  return str(result)
27
  except Exception as e:
28
  return f"Error evaluating expression: {e}"
@@ -137,3 +138,45 @@ class AudioTranscriber(Tool): # pylint: disable=C0115
137
  return transcription
138
  except Exception as e:
139
  return f"Error starting transcription job for {file_name}: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import ast
3
  import json
4
  import os
5
+ import base64
6
  from time import sleep
7
  from uuid import uuid4
8
 
 
12
  from smolagents import tool, Tool
13
 
14
  from definitions import TranscriptionJob
15
+ from utils import get_file, s3_upload_file, s3_download_file, bedrock_runtime, BEDROCK_MODEL_ID
16
 
17
 
18
  @tool
19
+ def math_calculator(expression: str) -> str:
20
  """A simple calculator tool that evaluates mathematical expressions.
21
 
22
  Args:
23
+ expression (str): A mathematical expression as a string, e.g., "2 + 2 * 3".
24
  """
25
  try:
26
+ result = ast.literal_eval(expression)
27
  return str(result)
28
  except Exception as e:
29
  return f"Error evaluating expression: {e}"
 
138
  return transcription
139
  except Exception as e:
140
  return f"Error starting transcription job for {file_name}: {e}"
141
+
142
+
143
+ @tool
144
+ def image_transcriber(text_prompt: str, task_id: str, file_name: str) -> str:
145
+ """Transcribes text from an image file
146
+
147
+ Args:
148
+ text_prompt (str): The text prompt to guide the transcription.
149
+ task_id (str): The ID of the task associated with the image file.
150
+ file_name (str): The name of the image file to transcribe.
151
+ """
152
+ try:
153
+ file_content = get_file(task_id)
154
+ base64_image = base64.b64encode(file_content).decode("utf-8")
155
+ response = bedrock_runtime.invoke_model(
156
+ modelId=BEDROCK_MODEL_ID,
157
+ body=json.dumps(
158
+ {
159
+ "anthropic_version": "bedrock-2023-05-31",
160
+ "max_tokens": 4096,
161
+ "messages": [
162
+ {
163
+ "role": "user",
164
+ "content": [
165
+ {
166
+ "type": "image",
167
+ "source": {
168
+ "type": "base64",
169
+ "media_type": f"image/{file_name.split('.')[-1]}",
170
+ "data": base64_image,
171
+ },
172
+ },
173
+ {"type": "text", "text": text_prompt},
174
+ ],
175
+ }
176
+ ],
177
+ }
178
+ ),
179
+ ).read()
180
+ return json.loads(response)["message"]["content"][0]["text"]
181
+ except Exception as e:
182
+ return f"Error processing image file {file_name}: {e}"
utils.py CHANGED
@@ -6,16 +6,11 @@ import requests
6
  from dotenv import load_dotenv
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
-
10
 
11
  load_dotenv()
12
 
13
- bedrock_client = boto3.client(
14
- "bedrock-runtime",
15
- region_name=os.getenv("AWS_REGION"),
16
- aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
17
- aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
18
- )
19
 
20
 
21
  def get_file(task_id: str) -> BytesIO:
 
6
  from dotenv import load_dotenv
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
+ BEDROCK_MODEL_ID = "anthropic.claude-3-5-sonnet-20241022-v2:0"
10
 
11
  load_dotenv()
12
 
13
+ bedrock_runtime = boto3.client("bedrock-runtime", region_name=os.getenv("AWS_REGION"))
 
 
 
 
 
14
 
15
 
16
  def get_file(task_id: str) -> BytesIO: