Scott Cogan commited on
Commit
72ec790
·
1 Parent(s): 292d225

latest requirements

Browse files
Files changed (1) hide show
  1. app.py +104 -154
app.py CHANGED
@@ -5,234 +5,184 @@ import inspect
5
  import pandas as pd
6
  import asyncio
7
  from langchain_google_genai import ChatGoogleGenerativeAI
8
- from typing import IO, Dict
9
  from io import BytesIO
10
- from langchain_core.messages import HumanMessage, SystemMessage
11
- from langgraph.graph import StateGraph
12
  import base64
13
  from google.ai.generativelanguage_v1beta.types import Tool as GenAITool
14
  import google.generativeai as genai
15
- import os
16
- from pydantic import BaseModel
17
- from typing import List, Any
 
18
 
 
19
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
20
-
21
- # (Keep Constants as is)
22
- # --- Constants ---
23
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
  GEMINI_API_KEY = os.getenv("Gemini_API_key")
25
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
26
 
27
- # --- Basic Agent Definition ---
28
- # Agent capabilities required: Search the web, listen to audio recordings, watch YouTube videos (process the footage, not the transcript), work with Excel spreadsheets
29
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
30
-
31
- def get_file(task_id: str) -> IO:
32
- '''
33
- Downloads the file associated with the given task_id, if one exists and is mapped.
34
- If the question mentions an attachment, use this function.
35
- Args:
36
- task_id: Id of the question.
37
- Returns:
38
- The file associated with the question.
39
- '''
40
- file_request = requests.get(url=f'https://agents-course-unit4-scoring.hf.space/files/{task_id}')
41
- file_request.raise_for_status()
42
-
43
- return BytesIO(file_request.content)
44
 
 
 
45
  def analyse_excel(task_id: str) -> Dict[str, float]:
46
- '''
47
- Analyzes the Excel file associated with the given task_id and returns the sum of each numeric column.
48
- Args:
49
- task_id: Id of the question.
50
- Returns:
51
- A dictionary with the sum of each numeric column.
52
- '''
53
  excel_file = get_file(task_id)
54
  df = pd.read_excel(excel_file, sheet_name=0)
55
-
56
  return df.select_dtypes(include='number').sum().to_dict()
57
 
 
58
  def add_numbers(a: float, b: float) -> float:
59
- '''
60
- Adds two numbers together.
61
- Args:
62
- a: First number.
63
- b: Second number.
64
- Returns:
65
- The sum of the two numbers.
66
- '''
67
  return a + b
68
 
 
69
  def transcribe_audio(task_id: str) -> HumanMessage:
70
- '''
71
- Opens an audio file and returns its content as a string.
72
- Args:
73
- file: The audio file to be opened.
74
- Returns:
75
- The content of the audio file as a string.
76
- '''
77
  audio_file = get_file(task_id)
78
  if audio_file is None:
79
  raise ValueError("No audio file found for the given task_id.")
80
- # Encode the audio file to base64
81
- audio_file.seek(0) # Ensure the file pointer is at the beginning
82
  encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
83
-
84
  return HumanMessage(
85
  content=[
86
  {"type": "text", "text": "Transcribe the audio."},
87
  {
88
  "type": "media",
89
- "data": encoded_audio, # Use base64 string directly
90
  "mime_type": "audio/mpeg",
91
  },
92
  ]
93
  )
94
 
 
95
  def python_code(task_id: str) -> str:
96
- '''
97
- Returns the Python code associated with the given task_id.
98
- Args:
99
- task_id: Id of the question.
100
- Returns:
101
- The Python code associated with the question.
102
- '''
103
- code_request = requests.get(url=f'https://agents-course-unit4-scoring.hf.space/files/{task_id}')
104
  code_request.raise_for_status()
105
-
106
  return code_request.text
107
 
 
108
  def open_image(task_id: str) -> str:
109
- '''
110
- Opens an image file associated with the given task_id.
111
- Args:
112
- task_id: Id of the question.
113
- Returns:
114
- The base64 encoded string of the image file.
115
- '''
116
  image_file = get_file(task_id)
117
  if image_file is None:
118
  raise ValueError("No image file found for the given task_id.")
119
-
120
  return base64.b64encode(image_file.read()).decode("utf-8")
121
 
122
- def open_youtube_video(url: str, query:str) -> str:
123
- '''
124
- Answers a question about a video from the given URL.
125
- Args:
126
- url: The URL of the video file.
127
- query: The question to be answered about the video.
128
- Returns:
129
- Answer to the question about the video.
130
- '''
131
-
132
  client = genai.Client(api_key=GOOGLE_API_KEY)
133
-
134
  response = client.models.generate_content(
135
- model='models/gemini-2.0-flash',
136
- contents=types.Content(
137
- parts=[
138
- types.Part(
139
- file_data=types.FileData(file_uri=url)
140
- ),
141
- types.Part(text=f'''{query} YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated
142
- list of numbers and/or strings.''')
143
  ]
144
- )
145
- )
146
-
147
  return response.text
148
 
 
149
  def google_search(query: str) -> str:
150
- '''
151
- Performs a Google search for the given query.
152
- Args:
153
- query: The search query.
154
- Returns:
155
- The search results as a string.
156
- '''
157
  llm = ChatGoogleGenerativeAI(
158
- model="gemini-2.5-flash-preview-05-20",
159
- max_tokens=8192,
160
- temperature=0
161
- )
162
- response = llm.invoke(query,
163
- tools=[GenAITool(google_search={})]
164
  )
165
-
166
  return response.content
167
 
168
- class AgentState(BaseModel):
169
- messages: List[Any]
170
-
171
  class BasicAgent:
172
  def __init__(self):
173
  self.llm = ChatGoogleGenerativeAI(
174
  model="gemini-2.5-flash-preview-05-20",
175
  max_tokens=8192,
176
  temperature=0
177
- )
178
- self.tools = [get_file, analyse_excel, add_numbers, transcribe_audio, python_code, open_image, open_youtube_video
179
- , google_search
180
- ]
181
 
182
- self.agent = self.llm # Use the LLM directly
183
-
 
 
 
 
 
 
184
  self.sys_msg = SystemMessage('''You are a general AI assistant. I will ask you a question. Only provide YOUR FINAL ANSWER and nothing else.
185
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
186
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
187
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
188
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
189
- You have access to multiple tools and should use as many as you need to answer the question.
190
- If you are asked to analyze an Excel file, use the 'analyse_excel' tool.
191
- If you are asked to download a file, use the 'get_file' tool.
192
- If you are asked to add two numbers, use the 'add_numbers' tool. If you need to add more than two numbers, use the 'add_numbers'
193
- tool multiple times.
194
- If you are asked to transcribe an audio file, use the 'transcribe_audio' tool.
195
- If you are asked to run a Python code, use the 'python_code' tool.
196
- If you are asked to open an image, use the 'open_image' tool.
197
- If you were given a link with www.youtube.com, use the 'open_youtube_video' tool.
198
- If the question requires a web search because your internal knowledge doesn't have the information, use the 'google_search' tool.
199
- ''')
200
 
201
- # Graph
202
- self.builder = StateGraph(state_schema=AgentState)
203
- # Define nodes: these do the work
204
- self.builder.add_node("START", lambda state: AgentState(**state), input=AgentState)
205
- self.builder.add_node("assistant", self.assistant)
206
- self.builder.add_node("tools", self.tools_node)
207
- # Define edges: these determine how the control flow moves
208
- self.builder.add_edge("START", "assistant")
209
- self.builder.add_edge("assistant", "tools")
210
- self.builder.add_edge("tools", "assistant")
211
- print("Nodes:", self.builder.nodes)
212
- print("Edges:", self.builder.edges)
213
- self.react_graph = self.builder.compile()
 
 
 
214
 
215
  print("BasicAgent initialized.")
216
 
217
- def assistant(self, state):
218
- return {"messages": [self.llm.invoke([self.sys_msg] + state["messages"])]}
 
 
 
219
 
220
- def tools_node(self, state):
221
- # Execute the tool and return the result
222
- tool_name = state["messages"][-1].content
223
- for tool in self.tools:
224
- if tool.__name__ == tool_name:
225
- return {"messages": [tool(*state["args"])]}
226
- return {"messages": [f"Tool {tool_name} not found"]}
 
 
 
 
 
 
 
 
 
227
 
228
  async def __call__(self, question: str, task_id: str) -> str:
 
229
  print(f"Agent received question (first 50 chars): {question[:50]}...")
230
- fixed_answer = "This is a default answer."
231
-
232
- await asyncio.sleep(60)
233
- messages = self.react_graph.invoke({"messages": [f'Task id: {task_id}\n {question}']}, node="START")
234
- return messages["messages"][-1].content if messages["messages"] else fixed_answer
235
-
 
 
 
 
 
 
 
236
 
237
  def run_and_submit_all(profile):
238
  """
 
5
  import pandas as pd
6
  import asyncio
7
  from langchain_google_genai import ChatGoogleGenerativeAI
8
+ from typing import IO, Dict, TypedDict, Annotated, Sequence
9
  from io import BytesIO
10
+ from langchain_core.messages import HumanMessage, SystemMessage, BaseMessage, AIMessage
11
+ from langgraph.graph import StateGraph, END
12
  import base64
13
  from google.ai.generativelanguage_v1beta.types import Tool as GenAITool
14
  import google.generativeai as genai
15
+ import operator
16
+ from langgraph.prebuilt import ToolExecutor
17
+ from langchain_core.tools import tool
18
+ from utilities import get_file
19
 
20
+ # Constants
21
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 
 
 
22
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
23
  GEMINI_API_KEY = os.getenv("Gemini_API_key")
24
  SERPER_API_KEY = os.getenv("SERPER_API_KEY")
25
 
26
+ # Define the state type
27
+ class AgentState(TypedDict):
28
+ messages: Annotated[Sequence[BaseMessage], operator.add]
29
+ next: str
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Convert existing functions to tools
32
+ @tool
33
  def analyse_excel(task_id: str) -> Dict[str, float]:
34
+ '''Analyzes the Excel file associated with the given task_id.'''
 
 
 
 
 
 
35
  excel_file = get_file(task_id)
36
  df = pd.read_excel(excel_file, sheet_name=0)
 
37
  return df.select_dtypes(include='number').sum().to_dict()
38
 
39
+ @tool
40
  def add_numbers(a: float, b: float) -> float:
41
+ '''Adds two numbers together.'''
 
 
 
 
 
 
 
42
  return a + b
43
 
44
+ @tool
45
  def transcribe_audio(task_id: str) -> HumanMessage:
46
+ '''Transcribes an audio file.'''
 
 
 
 
 
 
47
  audio_file = get_file(task_id)
48
  if audio_file is None:
49
  raise ValueError("No audio file found for the given task_id.")
50
+ audio_file.seek(0)
 
51
  encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
 
52
  return HumanMessage(
53
  content=[
54
  {"type": "text", "text": "Transcribe the audio."},
55
  {
56
  "type": "media",
57
+ "data": encoded_audio,
58
  "mime_type": "audio/mpeg",
59
  },
60
  ]
61
  )
62
 
63
+ @tool
64
  def python_code(task_id: str) -> str:
65
+ '''Returns the Python code associated with the given task_id.'''
66
+ code_request = requests.get(url=f'{DEFAULT_API_URL}/files/{task_id}')
 
 
 
 
 
 
67
  code_request.raise_for_status()
 
68
  return code_request.text
69
 
70
+ @tool
71
  def open_image(task_id: str) -> str:
72
+ '''Opens an image file associated with the given task_id.'''
 
 
 
 
 
 
73
  image_file = get_file(task_id)
74
  if image_file is None:
75
  raise ValueError("No image file found for the given task_id.")
 
76
  return base64.b64encode(image_file.read()).decode("utf-8")
77
 
78
+ @tool
79
+ def open_youtube_video(url: str, query: str) -> str:
80
+ '''Answers a question about a video from the given URL.'''
 
 
 
 
 
 
 
81
  client = genai.Client(api_key=GOOGLE_API_KEY)
 
82
  response = client.models.generate_content(
83
+ model='models/gemini-2.0-flash',
84
+ contents=types.Content(
85
+ parts=[
86
+ types.Part(file_data=types.FileData(file_uri=url)),
87
+ types.Part(text=f'''{query} YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated
88
+ list of numbers and/or strings.''')
 
 
89
  ]
90
+ )
91
+ )
 
92
  return response.text
93
 
94
+ @tool
95
  def google_search(query: str) -> str:
96
+ '''Performs a Google search for the given query.'''
 
 
 
 
 
 
97
  llm = ChatGoogleGenerativeAI(
98
+ model="gemini-2.5-flash-preview-05-20",
99
+ max_tokens=8192,
100
+ temperature=0
 
 
 
101
  )
102
+ response = llm.invoke(query, tools=[GenAITool(google_search={})])
103
  return response.content
104
 
 
 
 
105
  class BasicAgent:
106
  def __init__(self):
107
  self.llm = ChatGoogleGenerativeAI(
108
  model="gemini-2.5-flash-preview-05-20",
109
  max_tokens=8192,
110
  temperature=0
111
+ )
 
 
 
112
 
113
+ # Create tool executor
114
+ self.tools = [
115
+ get_file, analyse_excel, add_numbers, transcribe_audio,
116
+ python_code, open_image, open_youtube_video, google_search
117
+ ]
118
+ self.tool_executor = ToolExecutor(self.tools)
119
+
120
+ # System message
121
  self.sys_msg = SystemMessage('''You are a general AI assistant. I will ask you a question. Only provide YOUR FINAL ANSWER and nothing else.
122
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
123
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
124
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
125
  If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
126
+ You have access to multiple tools and should use as many as you need to answer the question.''')
 
 
 
 
 
 
 
 
 
 
127
 
128
+ # Create the graph
129
+ self.workflow = StateGraph(AgentState)
130
+
131
+ # Add nodes
132
+ self.workflow.add_node("agent", self.call_model)
133
+ self.workflow.add_node("tools", self.call_tools)
134
+
135
+ # Add edges
136
+ self.workflow.add_edge("agent", "tools")
137
+ self.workflow.add_edge("tools", "agent")
138
+
139
+ # Set entry point
140
+ self.workflow.set_entry_point("agent")
141
+
142
+ # Compile the graph
143
+ self.app = self.workflow.compile()
144
 
145
  print("BasicAgent initialized.")
146
 
147
+ def call_model(self, state: AgentState) -> AgentState:
148
+ """Call the model to generate a response."""
149
+ messages = state["messages"]
150
+ response = self.llm.invoke([self.sys_msg] + messages)
151
+ return {"messages": [response], "next": "tools"}
152
 
153
+ def call_tools(self, state: AgentState) -> AgentState:
154
+ """Call the tools based on the model's response."""
155
+ messages = state["messages"]
156
+ last_message = messages[-1]
157
+
158
+ if isinstance(last_message, AIMessage):
159
+ # Extract tool calls from the message
160
+ tool_calls = last_message.tool_calls
161
+ if tool_calls:
162
+ for tool_call in tool_calls:
163
+ tool_name = tool_call.name
164
+ tool_args = tool_call.args
165
+ result = self.tool_executor.invoke(tool_name, tool_args)
166
+ messages.append(AIMessage(content=f"Tool result: {result}"))
167
+
168
+ return {"messages": messages, "next": "agent"}
169
 
170
  async def __call__(self, question: str, task_id: str) -> str:
171
+ """Process a question and return the answer."""
172
  print(f"Agent received question (first 50 chars): {question[:50]}...")
173
+
174
+ # Create initial state
175
+ initial_state = {
176
+ "messages": [HumanMessage(content=f'Task id: {task_id}\n {question}')],
177
+ "next": "agent"
178
+ }
179
+
180
+ # Process through the graph
181
+ result = self.app.invoke(initial_state)
182
+
183
+ # Extract the final answer
184
+ final_message = result["messages"][-1]
185
+ return final_message.content if isinstance(final_message, AIMessage) else "No answer generated."
186
 
187
  def run_and_submit_all(profile):
188
  """