David commited on
Commit
3f771a9
·
1 Parent(s): edf3100

Still implementing and trying

Browse files
Files changed (5) hide show
  1. agent.py +125 -21
  2. app.py +12 -5
  3. gaia_system_prompt.py +18 -1
  4. requirements.txt +6 -0
  5. tools.py +127 -9
agent.py CHANGED
@@ -1,50 +1,154 @@
1
  from llama_index.llms.google_genai import GoogleGenAI
 
 
 
2
  from llama_index.tools.arxiv import ArxivToolSpec
3
  from llama_index.tools.wikipedia import WikipediaToolSpec
4
- from llama_index.tools.duckduckgo import DuckDuckGoSearchResultsToolSpec
5
  from llama_index.core.tools import FunctionTool
6
- from llama_index.core.agent.workflow import AgentWorkflow
 
 
 
 
 
 
 
7
 
8
  from tools import interpret_python_math_code
9
- from gaia_system_prompt import GAIA_SYSTEM_PROMPT
10
 
11
  import os
 
12
 
 
13
  GEMINI_API_KEY = os.getenv("GEMINI_TOKEN")
 
 
14
  GEMINI_MODEL_NAME = "gemini-2.5-flash-preview-04-17"
 
 
15
 
16
  class FinalAgent:
17
  def __init__(self):
18
  # LLM Initialization
19
- self.llm = GoogleGenAI(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)
 
 
 
 
20
 
21
  # Tool Initialization
22
  self.tools = [
23
  FunctionTool.from_defaults(
24
- func=interpret_python_math_code,
25
  name="InterpretPythonMathCode",
26
  description="Interprets Python code for mathematical expressions."
27
- ),
28
- DuckDuckGoSearchResultsToolSpec(),
29
- WikipediaToolSpec(),
30
- ArxivToolSpec()
31
  ]
 
 
 
 
 
 
 
 
 
32
 
33
  # Agent Workflow Initialization
34
- self.agent = AgentWorkflow(
 
 
 
 
 
 
 
35
  llm=self.llm,
36
- tools=self.tools,
37
- system_prompt=GAIA_SYSTEM_PROMPT
 
 
38
  )
39
 
40
  print("FinalAgent initialized.")
41
- def __call__(self, question: str) -> str:
42
- # Example
43
- print(f"Agent received question (first 50 chars): {question[:50]}...")
44
- fixed_answer = "This is a default answer."
45
- print(f"Agent returning fixed answer: {fixed_answer}")
46
-
47
- # Implement agent logic here
48
- response = self.agent.run(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from llama_index.llms.google_genai import GoogleGenAI
2
+ from llama_index.llms.gemini import Gemini
3
+ from llama_index.llms.groq import Groq
4
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
5
  from llama_index.tools.arxiv import ArxivToolSpec
6
  from llama_index.tools.wikipedia import WikipediaToolSpec
7
+ from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
8
  from llama_index.core.tools import FunctionTool
9
+ from llama_index.core.agent.workflow import AgentWorkflow, ReActAgent
10
+ from llama_index.llms.lmstudio import LMStudio
11
+ from llama_index.core.agent.workflow import (
12
+ AgentStream,
13
+ AgentOutput
14
+ )
15
+ from gradio import ChatMessage
16
+ from llama_index.core.base.llms.types import ChatMessage as llama_index_chat_message
17
 
18
  from tools import interpret_python_math_code
19
+ from gaia_system_prompt import SYSTEM_PROMPT as GAIA_SYSTEM_PROMPT
20
 
21
  import os
22
+ import asyncio
23
 
24
+ TIMEOUT=180 # Timeout for agent execution in seconds
25
  GEMINI_API_KEY = os.getenv("GEMINI_TOKEN")
26
+ GROQ_API_KEY = os.getenv("GROQ_TOKEN")
27
+ GEMINI_OPENAI_API_DIR = "https://generativelanguage.googleapis.com/v1beta/openai/"
28
  GEMINI_MODEL_NAME = "gemini-2.5-flash-preview-04-17"
29
+ LMSTUDIO_MODEL_NAME = "gemma-3-12B-it-qat-GGUF"
30
+ API_DIR = "http://host.docker.internal:1234/v1" # LM Studio API URL
31
 
32
  class FinalAgent:
33
  def __init__(self):
34
  # LLM Initialization
35
+ # self.llm = GoogleGenAI(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)
36
+ # self.llm = Gemini(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)
37
+ # self.llm = Groq(model="meta-llama/llama-4-maverick-17b-128e-instruct", api_key=GROQ_API_KEY)
38
+ # self.llm = LMStudio(model_name=LMSTUDIO_MODEL_NAME, base_url=API_DIR, request_timeout=180, temperature=0.1)
39
+ self.llm = HuggingFaceInferenceAPI(model_name="meta-llama/Llama-3.3-70B-Instruct", timeout=TIMEOUT)
40
 
41
  # Tool Initialization
42
  self.tools = [
43
  FunctionTool.from_defaults(
44
+ fn=interpret_python_math_code,
45
  name="InterpretPythonMathCode",
46
  description="Interprets Python code for mathematical expressions."
47
+ )
 
 
 
48
  ]
49
+ self.tools.extend(
50
+ ArxivToolSpec().to_tool_list()
51
+ )
52
+ self.tools.extend(
53
+ WikipediaToolSpec().to_tool_list()
54
+ )
55
+ self.tools.extend(
56
+ DuckDuckGoSearchToolSpec().to_tool_list()
57
+ )
58
 
59
  # Agent Workflow Initialization
60
+ # self.agent = AgentWorkflow.from_tools_or_functions(
61
+ # tools_or_functions=self.tools,
62
+ # llm=self.llm,
63
+ # system_prompt=GAIA_SYSTEM_PROMPT,
64
+ # timeout=TIMEOUT
65
+ # )
66
+
67
+ self.agent = ReActAgent(
68
  llm=self.llm,
69
+ verbose=True,
70
+ max_iterations=5,
71
+ system_prompt=GAIA_SYSTEM_PROMPT,
72
+ tools=self.tools
73
  )
74
 
75
  print("FinalAgent initialized.")
76
+ # async def __call__(self, question: str) -> str:
77
+ # # Example
78
+ # print(f"Agent received question: {question}")
79
+ # # fixed_answer = "This is a default answer."
80
+ # # print(f"Agent returning fixed answer: {fixed_answer}")
81
+ # # response = fixed_answer
82
+
83
+ # # Implement agent logic here
84
+ # response = ""
85
+ # # Run the agent with the question
86
+ # stream = await self.agent.run(question)
87
+ # response = stream.response.content
88
+ # # async for event in stream.stream_events():
89
+ # # if isinstance(event, AgentStream):
90
+ # # # Check if delta is empty
91
+ # # if event.raw["choices"][0]["delta"] != {}:
92
+ # # response += event.raw["choices"][0]["delta"]["content"]
93
+
94
+ # print(f"Agent response: {response}")
95
+
96
+ # return response
97
+
98
+ async def __call__(self, question: str) -> str:
99
+ print(f"Agent received question: {question}")
100
+
101
+ response_str = ""
102
+ try:
103
+ # Use arun for an async method.
104
+ agent_chat_response = await self.agent.run(question)
105
+
106
+ potential_response_obj = agent_chat_response.response
107
+
108
+ if isinstance(potential_response_obj, ChatMessage):
109
+ # If it's a ChatMessage, its .content attribute should hold the string
110
+ print(f"DEBUG: Response object is ChatMessage. Role: {potential_response_obj.role}")
111
+ response_str = potential_response_obj.content
112
+ if response_str is None: # Handle cases where content might be None
113
+ print("DEBUG: ChatMessage content is None, defaulting to empty string.")
114
+ response_str = ""
115
+ elif isinstance(potential_response_obj, str):
116
+ # If it's already a string
117
+ print("DEBUG: Response object is str.")
118
+ response_str = potential_response_obj
119
+ elif isinstance(potential_response_obj, llama_index_chat_message):
120
+ # If it's a llama_index ChatMessage, use its .content attribute
121
+ print(f"DEBUG: Response object is llama_index ChatMessage. Role: {potential_response_obj.role}")
122
+ response_str = potential_response_obj.content
123
+ if response_str is None:
124
+ print("DEBUG: llama_index ChatMessage content is None, defaulting to empty string.")
125
+ response_str = ""
126
+ else:
127
+ # Fallback if it's some other type
128
+ print(f"Warning: Agent response was of unexpected type: {type(potential_response_obj)}. Converting to string.")
129
+ response_str = str(potential_response_obj)
130
+
131
+ except Exception as e:
132
+ print(f"Error during agent execution with LLM {self.llm.__class__.__name__}: {e}")
133
+ # Depending on requirements, you might want to return an error message or re-raise
134
+ response_str = f"Agent error: {e}"
135
 
136
+ # Get the agent's final response string from FINAL ANSWER:
137
+ if "FINAL ANSWER: " in response_str:
138
+ response_str = response_str.split("FINAL ANSWER: ")[-1].strip()
139
+ else:
140
+ print("Warning: 'FINAL ANSWER:' not found in response string. Returning full response.")
141
+
142
+ print(f"Agent final response: {response_str}")
143
+ return response_str
144
+
145
+
146
+ async def main():
147
+ # Example usage
148
+ agent = FinalAgent()
149
+ question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
150
+ answer = await agent(question)
151
+ print(f"Final answer: {answer}")
152
+
153
+ if __name__ == "__main__":
154
+ asyncio.run(main())
app.py CHANGED
@@ -4,6 +4,9 @@ import requests
4
  import inspect
5
  import pandas as pd
6
 
 
 
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -19,7 +22,7 @@ class BasicAgent:
19
  print(f"Agent returning fixed answer: {fixed_answer}")
20
  return fixed_answer
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
@@ -40,7 +43,9 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
 
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -80,7 +85,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
@@ -171,7 +176,7 @@ with gr.Blocks() as demo:
171
  outputs=[status_output, results_table]
172
  )
173
 
174
- if __name__ == "__main__":
175
  print("\n" + "-"*30 + " App Starting " + "-"*30)
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
@@ -193,4 +198,6 @@ if __name__ == "__main__":
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
 
 
4
  import inspect
5
  import pandas as pd
6
 
7
+ from agent import FinalAgent
8
+ import asyncio
9
+
10
  # (Keep Constants as is)
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
22
  print(f"Agent returning fixed answer: {fixed_answer}")
23
  return fixed_answer
24
 
25
+ async def run_and_submit_all( profile: gr.OAuthProfile | None):
26
  """
27
  Fetches all questions, runs the BasicAgent on them, submits all answers,
28
  and displays the results.
 
43
 
44
  # 1. Instantiate Agent ( modify this part to create your agent)
45
  try:
46
+ # agent = BasicAgent()
47
+ agent = FinalAgent() # Use your custom agent class here
48
+ print(f"Agent instantiated successfully: {agent}")
49
  except Exception as e:
50
  print(f"Error instantiating agent: {e}")
51
  return f"Error initializing agent: {e}", None
 
85
  print(f"Skipping item with missing task_id or question: {item}")
86
  continue
87
  try:
88
+ submitted_answer = await agent(question_text)
89
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
90
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
91
  except Exception as e:
 
176
  outputs=[status_output, results_table]
177
  )
178
 
179
+ async def main():
180
  print("\n" + "-"*30 + " App Starting " + "-"*30)
181
  # Check for SPACE_HOST and SPACE_ID at startup for information
182
  space_host_startup = os.getenv("SPACE_HOST")
 
198
  print("-"*(60 + len(" App Starting ")) + "\n")
199
 
200
  print("Launching Gradio Interface for Basic Agent Evaluation...")
201
+ demo.launch(debug=True, share=False)
202
+ if __name__ == "__main__":
203
+ asyncio.run(main())
gaia_system_prompt.py CHANGED
@@ -3,4 +3,21 @@ FINAL ANSWER: [YOUR FINAL ANSWER].
3
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
4
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
5
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
6
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
4
  If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
5
  If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
6
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
7
+
8
+ SYSTEM_PROMPT = """
9
+ You are a general AI assistant. Answer my question directly, following these strict rules. Your entire output must be *only* the template below.
10
+
11
+ **Rules:**
12
+ * No thoughts, explanations, or extra text.
13
+ * The *only* output is: FINAL ANSWER: [YOUR SHORT ANSWER]
14
+ * [YOUR SHORT ANSWER] is a number, string, or comma-separated list.
15
+ * Numbers: No commas, no units (unless specified).
16
+ * Strings: No articles, no abbreviations, digits as words (unless specified).
17
+ * Lists: Apply number/string rules to items.
18
+
19
+ **Example:**
20
+ User: What is the capital of France?
21
+ Assistant:
22
+ FINAL ANSWER: Paris
23
+ """
requirements.txt CHANGED
@@ -3,7 +3,13 @@ requests
3
  numpy
4
  pandas
5
  scipy
 
6
  llama-index
 
 
 
 
 
7
  llama-index-llms-gemini
8
  llama-index-llms-google-genai
9
  llama-index-utils-workflow
 
3
  numpy
4
  pandas
5
  scipy
6
+ groq
7
  llama-index
8
+ llama-index-llms-huggingface
9
+ llama-index-llms-huggingface-api
10
+ llama-index-llms-groq
11
+ llama-index-utils-workflow
12
+ llama-index-llms-lmstudio
13
  llama-index-llms-gemini
14
  llama-index-llms-google-genai
15
  llama-index-utils-workflow
tools.py CHANGED
@@ -5,6 +5,12 @@ import sys
5
  import numpy as np
6
  import pandas as pd
7
  import scipy
 
 
 
 
 
 
8
 
9
  ALLOWED_MODULES = {"numpy", "pandas", "scipy"}
10
 
@@ -113,12 +119,124 @@ def interpret_python_math_code(python_code: str) -> str:
113
  sys.stdout = old_stdout
114
 
115
 
116
- # Example usage:
117
- if __name__ == "__main__":
118
- code = """
119
- import numpy as np
120
- # import os # This should trigger an error since 'os' is not allowed
121
- arr = np.array([1, 2, 3, 4, 5])
122
- _result = arr.mean()
123
- """
124
- print(interpret_python_math_code(code))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import numpy as np
6
  import pandas as pd
7
  import scipy
8
+ from groq import Groq
9
+
10
+ from pathlib import Path
11
+ import pandas as pd
12
+ import mimetypes
13
+ import base64
14
 
15
  ALLOWED_MODULES = {"numpy", "pandas", "scipy"}
16
 
 
119
  sys.stdout = old_stdout
120
 
121
 
122
+ ## STT tool
123
+ def convert_audio_to_text(path_to_audio: str) -> str:
124
+ """
125
+ Converts speech from an audio file into text.
126
+ Args:
127
+ path_to_audio (str): The path to the audio file to be transcribed.
128
+ Returns:
129
+ str: The transcribed text content of the audio file.
130
+ """
131
+
132
+ # Validate audio file
133
+ if not isinstance(path_to_audio, str):
134
+ raise TypeError(
135
+ "Parameter 'path_to_audio' must be a string containing the file path."
136
+ )
137
+ path = Path(path_to_audio).expanduser().resolve()
138
+ if not path.is_file():
139
+ raise FileNotFoundError(f"No such audio file: {path}")
140
+
141
+ # Initialize the Groq client
142
+ client = Groq()
143
+
144
+ # Open the audio file
145
+ with open(path_to_audio, "rb") as audio_file:
146
+ # Create a transcription of the audio file
147
+ transcription = client.audio.transcriptions.create(
148
+ file=audio_file,
149
+ model="whisper-large-v3-turbo",
150
+ response_format="text", # Returns plain text instead of JSON
151
+ language="en",
152
+ temperature=0.1
153
+ )
154
+
155
+ return transcription
156
+
157
+ ## Analyze image tool
158
+ def analyze_image(path_to_image: str, question: str) -> str:
159
+ """
160
+ Analyzes an image and generates a response to a given question based on the image's content.
161
+
162
+ Args:
163
+ path_to_image (str): The path to the image file to be analyzed.
164
+ question (str): The question to be answered, based on the contents of the image.
165
+
166
+ Returns:
167
+ str: The response from a VLM, typically a textual analysis or description based on the image.
168
+ """
169
+
170
+ def encode_image(image_path):
171
+ with open(image_path, "rb") as image_file:
172
+ return base64.b64encode(image_file.read()).decode('utf-8')
173
+
174
+ # Get the MIME type (e.g., image/png, image/jpeg)
175
+ mime_type, _ = mimetypes.guess_type(path_to_image)
176
+ if mime_type is None:
177
+ raise ValueError("Unsupported file type. Please provide a valid image.")
178
+
179
+ base64_image = encode_image(path_to_image)
180
+
181
+ # Initialize the Groq client
182
+ client = GroqClient()
183
+
184
+ chat_completion = client.chat.completions.create(
185
+ messages=[
186
+ {
187
+ "role": "user",
188
+ "content": [
189
+ {"type": "text", "text": question},
190
+ {
191
+ "type": "image_url",
192
+ "image_url": {
193
+ "url": f"data:{mime_type};base64,{base64_image}",
194
+ },
195
+ },
196
+ ],
197
+ }
198
+ ],
199
+ model="meta-llama/llama-4-scout-17b-16e-instruct",
200
+ )
201
+
202
+ return chat_completion.choices[0].message.content
203
+
204
+ ## Read .csv file tool
205
+ def read_csv_file(path_to_csv: str) -> str:
206
+ """
207
+ Reads a CSV file from the specified path and returns its content as plain text.
208
+
209
+ Args:
210
+ path_to_csv (str): The file path to the CSV file.
211
+
212
+ Returns:
213
+ str: Content of the CSV file as plain text.
214
+ """
215
+ try:
216
+ # Read the CSV file using pandas
217
+ df = pd.read_csv(path_to_csv)
218
+
219
+ # Return df as plain tect
220
+ return df.to_string(index=False)
221
+ except Exception as e:
222
+ return f"Error reading the CSV file: {e}"
223
+
224
+ ## Read .xlsx file tool
225
+ def read_xlsx_file(path_to_xlsx: str) -> str:
226
+ """
227
+ Reads a XLSX file from the specified path and returns its content as plain text.
228
+
229
+ Args:
230
+ path_to_xlsx (str): The file path to the XLSX file.
231
+
232
+ Returns:
233
+ str: Content of the XLSX file as plain text.
234
+ """
235
+ try:
236
+ # Read the XLSX file using pandas
237
+ df = pd.read_excel(path_to_xlsx)
238
+
239
+ # Return df as plain tect
240
+ return df.to_string(index=False)
241
+ except Exception as e:
242
+ return f"Error reading the XLSX file: {e}"