datdevsteve commited on
Commit
3010658
·
verified ·
1 Parent(s): fd73fb1

created the agent

Browse files
Files changed (1) hide show
  1. app.py +253 -46
app.py CHANGED
@@ -1,34 +1,212 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def __init__(self):
15
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -38,15 +216,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
 
50
 
51
  # 2. Fetch Questions
52
  print(f"Fetching questions from: {questions_url}")
@@ -55,16 +234,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -73,26 +252,40 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
 
 
82
  try:
83
  submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
  # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
@@ -142,28 +335,29 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
142
 
143
  # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
  **Instructions:**
149
-
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
-
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
 
 
158
  """
159
  )
160
 
161
  gr.LoginButton()
162
 
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
  run_button.click(
@@ -173,9 +367,12 @@ with gr.Blocks() as demo:
173
 
174
  if __name__ == "__main__":
175
  print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
 
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
 
 
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +380,24 @@ if __name__ == "__main__":
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
  else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
 
 
 
 
 
 
 
 
192
 
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
  demo.launch(debug=True, share=False)
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
+ from langchain.agents import create_agent
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain.tools import tool
9
+ from dotenv import load_dotenv
10
+ from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
11
+ from ddgs import DDGS
12
+
13
+ # Load environment variables
14
+ load_dotenv()
15
 
 
16
  # --- Constants ---
17
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
18
 
19
+ # --- Agent Setup ---
20
+ openai_key = os.getenv("OPENAI_API_KEY")
21
+ googleai_key = os.getenv("GOOGLE_API_KEY")
22
+
23
+ # Initialize the model
24
+ model = ChatGoogleGenerativeAI(
25
+ model="gemini-2.5-flash",
26
+ temperature=0,
27
+ max_tokens=5000,
28
+ timeout=None,
29
+ max_retries=2,
30
+ )
31
+
32
+ # --- Tools Definition ---
33
+ @tool
34
+ def multiply(a: int, b: int) -> int:
35
+ """Multiply two numbers.
36
+ Args:
37
+ a: first int
38
+ b: second int
39
+ """
40
+ return a * b
41
+
42
+ @tool
43
+ def add(a: int, b: int) -> int:
44
+ """Add two numbers.
45
+
46
+ Args:
47
+ a: first int
48
+ b: second int
49
+ """
50
+ return a + b
51
+
52
+ @tool
53
+ def subtract(a: int, b: int) -> int:
54
+ """Subtract two numbers.
55
+
56
+ Args:
57
+ a: first int
58
+ b: second int
59
+ """
60
+ return a - b
61
+
62
+ @tool
63
+ def divide(a: int, b: int) -> int:
64
+ """Divide two numbers.
65
+
66
+ Args:
67
+ a: first int
68
+ b: second int
69
+ """
70
+ if b == 0:
71
+ raise ValueError("Cannot divide by zero.")
72
+ return a / b
73
+
74
+ @tool
75
+ def modulus(a: int, b: int) -> int:
76
+ """Get the modulus of two numbers.
77
+
78
+ Args:
79
+ a: first int
80
+ b: second int
81
+ """
82
+ return a % b
83
+
84
+ @tool
85
+ def wiki_search(query: str) -> str:
86
+ """Search Wikipedia for a query and return maximum 2 results.
87
+
88
+ Args:
89
+ query: The search query."""
90
+ search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
91
+ formatted_search_docs = "\n\n---\n\n".join(
92
+ [
93
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
94
+ for doc in search_docs
95
+ ])
96
+ return {"wiki_results": formatted_search_docs}
97
+
98
+ @tool
99
+ def web_search(query: str) -> str:
100
+ """Search DDGS for a query and return maximum 3 results.
101
+
102
+ Args:
103
+ query: The search query."""
104
+ search_docs = DDGS().text(query,max_results=3)
105
+ formatted_search_docs = "\n\n---\n\n".join(
106
+ [
107
+ f'Title:{doc["title"]}\nContent:{doc["body"]}\n--\n'
108
+ for doc in search_docs
109
+ ])
110
+ return formatted_search_docs
111
+
112
+ @tool
113
+ def arvix_search(query: str) -> str:
114
+ """Search Arxiv for a query and return maximum 3 result.
115
+
116
+ Args:
117
+ query: The search query."""
118
+ search_docs = ArxivLoader(query=query, load_max_docs=3).load()
119
+ formatted_search_docs = "\n\n---\n\n".join(
120
+ [
121
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
122
+ for doc in search_docs
123
+ ])
124
+ return {"arvix_results": formatted_search_docs}
125
+
126
+ @tool
127
+ def image_search(query: str) -> str:
128
+ """Searches DDGS for an image query and returns maximum 10 image results"""
129
+ search_images = DDGS().images(query=query)
130
+ formatted_result = "\n\n---\n\n".join(
131
+ [
132
+ f'Image Title:{image["title"]}\nImage URL: {image["url"]}'
133
+ for image in search_images
134
+ ])
135
+
136
+
137
+ # Tools list
138
+ tools = [
139
+ multiply, add, subtract, divide, modulus,
140
+ wiki_search, web_search, arvix_search, image_search
141
+ ]
142
+
143
+ # System prompt
144
+ sys_prompt = """You are a helpful agent, please provide clear and concise answers to asked questions.
145
+ Keep your word limit for answers as minimum as you can. You are equipped with the following tools:
146
+ 1. [multiply], [add], [subtract], [divide], [modulus] - basic calculator operations.
147
+ 2. [wiki_search] - search Wikipedia and return up to 2 documents as text.
148
+ 3. [web_search] - perform a web search and return up to 3 documents as text.
149
+ 4. [arxiv_search] - search arXiv and return up to 3 documents as text.
150
+ 5. [image_search] - Searches the internet for an image query and returns maximum 10 image results
151
+
152
+ Under any circumstances, if you fail to provide the accurate answer expected by the user, you may say the same to the user and provide a similar answer which is approximately the closest. Disregard spelling mistakes and provide answer with results retreived from the correct spelling.
153
+
154
+ For every tool you use, append a single line at the end of your response exactly in this format:
155
+ [TOOLS USED: (tool_name)]
156
+ When no tools are used, append:
157
+ [TOOLS USED WERE NONE]"""
158
+
159
+ # --- Agent Class ---
160
+ class GAIAAgent:
161
  def __init__(self):
162
+ print("GAIAAgent initialized with LangChain agent.")
163
+ try:
164
+ self.agent = create_agent(model, tools=tools, system_prompt=sys_prompt)
165
+ print("Agent created successfully.")
166
+ except Exception as e:
167
+ print(f"Error creating agent: {e}")
168
+ raise
169
+
170
  def __call__(self, question: str) -> str:
171
+ print(f"Agent received question (first 100 chars): {question[:100]}...")
172
+ try:
173
+ result = self.agent.invoke({
174
+ "messages": [{"role": "user", "content": question}]
175
+ })
176
+
177
+ # Get the content from the last message
178
+ raw_content = result["messages"][-1].content
179
+
180
+ # Parse the response format: list of dicts with 'text' key
181
+ if isinstance(raw_content, list) and len(raw_content) > 0:
182
+ if isinstance(raw_content[0], dict) and 'text' in raw_content[0]:
183
+ answer = raw_content[0]['text']
184
+ else:
185
+ # Fallback: convert list to string
186
+ answer = str(raw_content)
187
+ elif isinstance(raw_content, str):
188
+ answer = raw_content
189
+ else:
190
+ answer = str(raw_content)
191
+
192
+ print(f"Agent returning answer (first 100 chars): {answer[:100]}...")
193
+ return answer
194
+ except Exception as e:
195
+ print(f"Error in agent execution: {e}")
196
+ import traceback
197
+ traceback.print_exc()
198
+ return f"Error: {str(e)}"
199
 
200
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
201
  """
202
+ Fetches all questions, runs the GAIAAgent on them, submits all answers,
203
  and displays the results.
204
  """
205
  # --- Determine HF Space Runtime URL and Repo URL ---
206
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
207
 
208
  if profile:
209
+ username = f"{profile.username}"
210
  print(f"User logged in: {username}")
211
  else:
212
  print("User not logged in.")
 
216
  questions_url = f"{api_url}/questions"
217
  submit_url = f"{api_url}/submit"
218
 
219
+ # 1. Instantiate Agent
220
  try:
221
+ agent = GAIAAgent()
222
  except Exception as e:
223
  print(f"Error instantiating agent: {e}")
224
  return f"Error initializing agent: {e}", None
225
+
226
+ # In the case of an app running as a Hugging Face space, this link points toward your codebase
227
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local"
228
+ print(f"Agent code location: {agent_code}")
229
 
230
  # 2. Fetch Questions
231
  print(f"Fetching questions from: {questions_url}")
 
234
  response.raise_for_status()
235
  questions_data = response.json()
236
  if not questions_data:
237
+ print("Fetched questions list is empty.")
238
+ return "Fetched questions list is empty or invalid format.", None
239
  print(f"Fetched {len(questions_data)} questions.")
240
  except requests.exceptions.RequestException as e:
241
  print(f"Error fetching questions: {e}")
242
  return f"Error fetching questions: {e}", None
243
  except requests.exceptions.JSONDecodeError as e:
244
+ print(f"Error decoding JSON response from questions endpoint: {e}")
245
+ print(f"Response text: {response.text[:500]}")
246
+ return f"Error decoding server response for questions: {e}", None
247
  except Exception as e:
248
  print(f"An unexpected error occurred fetching questions: {e}")
249
  return f"An unexpected error occurred fetching questions: {e}", None
 
252
  results_log = []
253
  answers_payload = []
254
  print(f"Running agent on {len(questions_data)} questions...")
255
+ for idx, item in enumerate(questions_data, 1):
256
  task_id = item.get("task_id")
257
  question_text = item.get("question")
258
  if not task_id or question_text is None:
259
  print(f"Skipping item with missing task_id or question: {item}")
260
  continue
261
+
262
+ print(f"Processing question {idx}/{len(questions_data)} - Task ID: {task_id}")
263
  try:
264
  submitted_answer = agent(question_text)
265
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
266
+ results_log.append({
267
+ "Task ID": task_id,
268
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
269
+ "Submitted Answer": submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
270
+ })
271
  except Exception as e:
272
+ print(f"Error running agent on task {task_id}: {e}")
273
+ results_log.append({
274
+ "Task ID": task_id,
275
+ "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
276
+ "Submitted Answer": f"AGENT ERROR: {e}"
277
+ })
278
 
279
  if not answers_payload:
280
  print("Agent did not produce any answers to submit.")
281
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
282
 
283
  # 4. Prepare Submission
284
+ submission_data = {
285
+ "username": username.strip(),
286
+ "agent_code": agent_code,
287
+ "answers": answers_payload
288
+ }
289
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
290
  print(status_update)
291
 
 
335
 
336
  # --- Build Gradio Interface using Blocks ---
337
  with gr.Blocks() as demo:
338
+ gr.Markdown("# GAIA Benchmark Agent Evaluation")
339
  gr.Markdown(
340
  """
341
  **Instructions:**
342
+ 1. This app integrates a LangChain agent with multiple tools (calculator, Wikipedia, web search, Arxiv).
343
+ 2. Log in to your Hugging Face account using the button below.
344
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch GAIA questions, run your agent, and submit answers.
345
+
346
+ **Agent Tools:**
347
+ - Mathematical operations (add, subtract, multiply, divide, modulus)
348
+ - Wikipedia search
349
+ - Web search (Tavily)
350
+ - Arxiv academic paper search
351
+
352
+ **Note:** Processing all questions may take several minutes depending on the number of questions and API response times.
353
  """
354
  )
355
 
356
  gr.LoginButton()
357
 
358
+ run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
359
 
360
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
361
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
362
 
363
  run_button.click(
 
367
 
368
  if __name__ == "__main__":
369
  print("\n" + "-"*30 + " App Starting " + "-"*30)
370
+
371
+ # Check for required environment variables
372
  space_host_startup = os.getenv("SPACE_HOST")
373
+ space_id_startup = os.getenv("SPACE_ID")
374
+ google_api_key = os.getenv("GOOGLE_API_KEY")
375
+ tavily_api_key = os.getenv("TAVILY_API_KEY")
376
 
377
  if space_host_startup:
378
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
380
  else:
381
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
382
 
383
+ if space_id_startup:
384
  print(f"✅ SPACE_ID found: {space_id_startup}")
385
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
386
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
387
  else:
388
+ print("ℹ️ SPACE_ID environment variable not found (running locally?).")
389
+
390
+ if google_api_key:
391
+ print("✅ GOOGLE_API_KEY found")
392
+ else:
393
+ print("⚠️ GOOGLE_API_KEY not found - agent will not work without it!")
394
+
395
+ if tavily_api_key:
396
+ print("✅ TAVILY_API_KEY found")
397
+ else:
398
+ print("⚠️ TAVILY_API_KEY not found - web search will not work!")
399
 
400
  print("-"*(60 + len(" App Starting ")) + "\n")
401
 
402
+ print("Launching Gradio Interface for GAIA Agent Evaluation...")
403
  demo.launch(debug=True, share=False)