RCaz commited on
Commit
3e94ec9
·
verified ·
1 Parent(s): 54317b5

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +86 -45
agent.py CHANGED
@@ -3,7 +3,8 @@ from typing import Optional, Tuple, Literal
3
  from smolagents import tool
4
  import base64
5
  from openai import OpenAI
6
-
 
7
 
8
  @tool
9
  def download_and_get_path_for_provided_file(path: str):
@@ -29,7 +30,7 @@ def download_and_get_path_for_provided_file(path: str):
29
  @tool
30
  def extract_text_from_audio(file_path: str) -> str:
31
  """
32
- Extract and return text transcription from an audio file.
33
 
34
  Args:
35
  file_path (str): Path to the audio file to be transcribed.
@@ -47,21 +48,25 @@ def extract_text_from_audio(file_path: str) -> str:
47
  >>> extract_text_from_audio("/path/to/audio/interview.mp3")
48
  "Could you please introduce yourself and your background?"
49
  """
50
-
51
- client = OpenAI()
52
- audio_file = open(file_path, "rb")
53
 
54
- transcription = client.audio.transcriptions.create(
55
- model="gpt-4o-transcribe",
56
- file=audio_file,
57
- response_format="text"
58
- )
59
- return transcription
 
 
 
 
 
60
 
61
 
62
  def describe_image(request:str, file_path: str) -> str:
63
  """
64
- Extract and return the requested information from an image.
65
 
66
  Args:
67
  request: The information to retreive from the image. The request must be simple, short and precise.
@@ -79,44 +84,76 @@ def describe_image(request:str, file_path: str) -> str:
79
  "Qd3"
80
  """
81
 
82
- client = OpenAI()
83
-
84
- # Function to encode the image
85
- def encode_image(image_path):
86
- with open(image_path, "rb") as image_file:
87
- return base64.b64encode(image_file.read()).decode("utf-8")
88
 
89
- # Getting the Base64 string
90
- base64_image = encode_image(file_path)
91
-
92
-
93
- response = client.responses.create(
94
- model="gpt-4.1",
95
- input=[
96
- {
97
- "role": "user",
98
- "content": [
99
- { "type": "input_text", "text": request },
100
- {
101
- "type": "input_image",
102
- "image_url": f"data:image/jpeg;base64,{base64_image}",
103
- },
104
- ],
105
- }
106
- ],
107
- )
108
 
109
- return response.output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
 
112
  @tool
113
- def get_transcript(file_id:str) -> str:
 
 
 
 
 
 
 
 
 
 
 
114
  from youtube_transcript_api import YouTubeTranscriptApi
115
  ytt_api = YouTubeTranscriptApi()
116
- transcript = ytt_api.fetch(video_id)
117
  return transcript
118
 
 
 
 
 
 
 
 
 
119
 
 
 
 
 
 
 
 
 
120
  class TestAgent:
121
  def __init__(self):
122
 
@@ -143,11 +180,15 @@ class TestAgent:
143
  #model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")
144
  # Instantiate the agent
145
  self.agent = CodeAgent(
146
- tools=[extract_text_from_audio, # homemade tool
147
- DuckDuckGoSearchTool(), # basic tools from smolagent
148
  VisitWebpageTool(),
149
- wikipedia_tool, # tool from langchain with extra parmaeters
150
- #youtube_tools, # tool from MCP server
 
 
 
 
151
  FinalAnswerTool()],
152
  additional_authorized_imports=["pandas","markdownify","requests"], # V2 add markdownify & requests
153
  model=model,
@@ -157,7 +198,7 @@ class TestAgent:
157
  use_structured_outputs_internally=True # V3. Adds structure
158
  )
159
  # V3. add Guidance
160
- prompt_for_guidance = "\n10. Provide the answer axactly as it is asked, be concise and precise\n\nNow Begin!"
161
  #self.agent.prompt_templates['system_prompt'] = self.agent.prompt_templates['system_prompt'] + prompt_for_guidance
162
 
163
  # V4. use prompt from the paper as guidance
 
3
  from smolagents import tool
4
  import base64
5
  from openai import OpenAI
6
+ import joblib
7
+ from openai import OpenAI
8
 
9
  @tool
10
  def download_and_get_path_for_provided_file(path: str):
 
30
  @tool
31
  def extract_text_from_audio(file_path: str) -> str:
32
  """
33
+ Extract and return text transcription from an audio file given its path.
34
 
35
  Args:
36
  file_path (str): Path to the audio file to be transcribed.
 
48
  >>> extract_text_from_audio("/path/to/audio/interview.mp3")
49
  "Could you please introduce yourself and your background?"
50
  """
51
+ try:
52
+ return joblib.load(f"cahced_files/{file_path}")
 
53
 
54
+ except:
55
+ client = OpenAI()
56
+ audio_file = open(file_path, "rb")
57
+
58
+ transcription = client.audio.transcriptions.create(
59
+ model="gpt-4o-transcribe",
60
+ file=audio_file,
61
+ response_format="text"
62
+ )
63
+ joblib.dump(transcription, f"cahced_files/{file_path}")
64
+ return transcription
65
 
66
 
67
  def describe_image(request:str, file_path: str) -> str:
68
  """
69
+ Extract and return the requested information from an image given its path.
70
 
71
  Args:
72
  request: The information to retreive from the image. The request must be simple, short and precise.
 
84
  "Qd3"
85
  """
86
 
87
+ try
88
+ return joblib.load(f"cahced_files/{file_path}")
 
 
 
 
89
 
90
+ except:
91
+ client = OpenAI()
92
+
93
+ # Function to encode the image
94
+ def encode_image(image_path):
95
+ with open(image_path, "rb") as image_file:
96
+ return base64.b64encode(image_file.read()).decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ # Getting the Base64 string
99
+ base64_image = encode_image(file_path)
100
+
101
+
102
+ response = client.responses.create(
103
+ model="gpt-4.1",
104
+ input=[
105
+ {
106
+ "role": "user",
107
+ "content": [
108
+ { "type": "input_text", "text": request },
109
+ {
110
+ "type": "input_image",
111
+ "image_url": f"data:image/jpeg;base64,{base64_image}",
112
+ },
113
+ ],
114
+ }
115
+ ],
116
+ )
117
+ joblib.dump(response.output_text,f"cahced_files/{file_path}")
118
+ return response.output_text
119
+
120
 
121
 
122
  @tool
123
+ def get_transcript_from_youtube_file_id(file_id: str) -> str:
124
+ """
125
+ Retrieve the transcript for a YouTube video given its id.
126
+
127
+ Args:
128
+ file_id (str): The YouTube video ID (the alphanumeric string that appears after
129
+ 'v=' in a YouTube URL, e.g., 'dQw4w9WgXcQ').
130
+
131
+ Returns:
132
+ str: The transcript content for the specified video. a JSON string or formatted
133
+ text containing transcript segments with timestamps.
134
+ """
135
  from youtube_transcript_api import YouTubeTranscriptApi
136
  ytt_api = YouTubeTranscriptApi()
137
+ transcript = ytt_api.fetch(file_id)
138
  return transcript
139
 
140
+
141
+ @tool
142
+ def parse_python_file(path: str) -> str:
143
+ """
144
+ Read and return the contents of a Python file from its path.
145
+
146
+ Args:
147
+ path (str): The file path to the Python file to be read.
148
 
149
+ Returns:
150
+ str: The complete contents of the Python file as a string.
151
+
152
+ """
153
+ with open(file_path, "r") as py_file:
154
+ return py_file.read()
155
+
156
+
157
  class TestAgent:
158
  def __init__(self):
159
 
 
180
  #model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct")
181
  # Instantiate the agent
182
  self.agent = CodeAgent(
183
+ tools=[download_and_get_path_for_provided_file, # V4. get attached file
184
+ DuckDuckGoSearchTool(), # basic tools from smolagent
185
  VisitWebpageTool(),
186
+ wikipedia_tool, # tool from langchain with extra parmaeters
187
+ #youtube_tools, # tool from MCP server
188
+ get_transcript_from_youtube_file_id, # V4
189
+ parse_python_file, # V4
190
+ describe_image, # V4
191
+ extract_text_from_audio, # V4
192
  FinalAnswerTool()],
193
  additional_authorized_imports=["pandas","markdownify","requests"], # V2 add markdownify & requests
194
  model=model,
 
198
  use_structured_outputs_internally=True # V3. Adds structure
199
  )
200
  # V3. add Guidance
201
+ #prompt_for_guidance = "\n10. Provide the answer axactly as it is asked, be concise and precise\n\nNow Begin!"
202
  #self.agent.prompt_templates['system_prompt'] = self.agent.prompt_templates['system_prompt'] + prompt_for_guidance
203
 
204
  # V4. use prompt from the paper as guidance