lethaq commited on
Commit
5d7f198
·
verified ·
1 Parent(s): 81917a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -38
app.py CHANGED
@@ -3,25 +3,202 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
  def __init__(self):
15
- print("BasicAgent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
@@ -38,13 +215,20 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
 
 
 
 
 
 
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
 
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
  print(agent_code)
50
 
@@ -80,9 +264,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
 
83
  submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
 
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
86
  except Exception as e:
87
  print(f"Error running agent on task {task_id}: {e}")
88
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -91,23 +278,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
- print(status_update)
98
-
99
- # 5. Submit
100
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
 
103
  response.raise_for_status()
104
  result_data = response.json()
105
  final_status = (
106
  f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
  )
112
  print("Submission successful.")
113
  results_df = pd.DataFrame(results_log)
@@ -142,19 +323,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
142
 
143
  # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
  **Instructions:**
149
-
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
-
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
  """
159
  )
160
 
@@ -163,13 +342,41 @@ with gr.Blocks() as demo:
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  run_button.click(
170
  fn=run_and_submit_all,
 
171
  outputs=[status_output, results_table]
172
  )
 
 
 
 
 
 
173
 
174
  if __name__ == "__main__":
175
  print("\n" + "-"*30 + " App Starting " + "-"*30)
@@ -192,5 +399,7 @@ if __name__ == "__main__":
192
 
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
 
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ import json
7
+ import time
8
+ from typing import List, Dict, Any, Optional
9
+ from litellm import completion
10
+ from duckduckgo_search import DDGS
11
 
 
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
+ # --- Tool Implementations ---
16
+ class DuckDuckGoSearchTool:
 
17
  def __init__(self):
18
+ self.name = "duckduckgo_search"
19
+ self.description = "Search the web using DuckDuckGo"
20
+
21
+ def search(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
22
+ """
23
+ Search the web using DuckDuckGo and return results.
24
+
25
+ Args:
26
+ query: The search query
27
+ max_results: Maximum number of results to return
28
+
29
+ Returns:
30
+ List of dictionaries with search results
31
+ """
32
+ try:
33
+ with DDGS() as ddgs:
34
+ results = list(ddgs.text(query, max_results=max_results))
35
+ return results
36
+ except Exception as e:
37
+ print(f"DuckDuckGo search error: {e}")
38
+ return [{"title": f"Search error: {e}", "body": "", "href": ""}]
39
+
40
+ def __call__(self, query: str, max_results: int = 5) -> Dict[str, Any]:
41
+ """
42
+ Execute the search and return results in a structured format.
43
+
44
+ Args:
45
+ query: The search query
46
+ max_results: Maximum number of results to return
47
+
48
+ Returns:
49
+ Dictionary with search results and metadata
50
+ """
51
+ start_time = time.time()
52
+ results = self.search(query, max_results)
53
+ end_time = time.time()
54
+
55
+ return {
56
+ "tool_name": self.name,
57
+ "query": query,
58
+ "results": results,
59
+ "result_count": len(results),
60
+ "time_taken": end_time - start_time
61
+ }
62
+
63
+ # --- LiteLLM Model Wrapper ---
64
+ class LiteLLMModel:
65
+ def __init__(self, model_id: str, api_key: str):
66
+ self.model_id = model_id
67
+ self.api_key = api_key
68
+ print(f"Initialized LiteLLM with model: {model_id}")
69
+
70
+ def generate(self, prompt: str, system_prompt: str = None) -> str:
71
+ """
72
+ Generate text using the LiteLLM model.
73
+
74
+ Args:
75
+ prompt: The user prompt
76
+ system_prompt: Optional system prompt
77
+
78
+ Returns:
79
+ Generated text response
80
+ """
81
+ try:
82
+ messages = []
83
+ if system_prompt:
84
+ messages.append({"role": "system", "content": system_prompt})
85
+ messages.append({"role": "user", "content": prompt})
86
+
87
+ response = completion(
88
+ model=self.model_id,
89
+ messages=messages,
90
+ api_key=self.api_key
91
+ )
92
+
93
+ return response.choices[0].message.content
94
+ except Exception as e:
95
+ print(f"LiteLLM generation error: {e}")
96
+ return f"Error generating response: {str(e)}"
97
+
98
+ # --- Advanced Agent Implementation ---
99
+ class CodeAgent:
100
+ def __init__(self, tools: List[Any], model: LiteLLMModel):
101
+ self.tools = tools
102
+ self.model = model
103
+ self.search_tool = next((tool for tool in tools if isinstance(tool, DuckDuckGoSearchTool)), None)
104
+ print(f"CodeAgent initialized with {len(tools)} tools and model {model.model_id}")
105
+
106
+ def format_search_results(self, results: List[Dict[str, str]]) -> str:
107
+ """Format search results into a readable string"""
108
+ formatted = "Search Results:\n"
109
+ for i, result in enumerate(results, 1):
110
+ formatted += f"{i}. {result.get('title', 'No title')}\n"
111
+ formatted += f" {result.get('body', 'No description')[:200]}...\n"
112
+ formatted += f" URL: {result.get('href', 'No URL')}\n\n"
113
+ return formatted
114
+
115
+ def create_prompt(self, question: str, search_results: Optional[List[Dict[str, str]]] = None) -> str:
116
+ """Create a prompt for the model with optional search results"""
117
+ prompt = f"Question: {question}\n\n"
118
+
119
+ if search_results:
120
+ prompt += self.format_search_results(search_results)
121
+
122
+ prompt += "\nPlease provide a concise, factual answer to the question. "
123
+ prompt += "Your answer should be direct and to the point, without any explanations or reasoning. "
124
+ prompt += "For example, if asked 'What is the capital of France?', just answer 'Paris'. "
125
+ prompt += "If asked for a numerical value, provide only the number. "
126
+ prompt += "If asked for a list, provide comma-separated values without numbering. "
127
+ prompt += "If you don't know the answer, respond with 'Unknown' rather than speculating.\n\n"
128
+ prompt += "Answer: "
129
+
130
+ return prompt
131
+
132
+ def create_system_prompt(self) -> str:
133
+ """Create a system prompt for the model"""
134
+ return (
135
+ "You are a helpful AI assistant specialized in answering factual questions. "
136
+ "You always provide direct, concise answers without explanations or reasoning. "
137
+ "Your answers are factual, accurate, and to the point. "
138
+ "For questions requiring specific formats, you follow those formats exactly. "
139
+ "You never include phrases like 'the answer is' or 'I believe' in your responses."
140
+ )
141
+
142
  def __call__(self, question: str) -> str:
143
+ """
144
+ Process a question and return an answer.
145
+
146
+ Args:
147
+ question: The question to answer
148
+
149
+ Returns:
150
+ The answer to the question
151
+ """
152
+ print(f"Agent received question: {question[:100]}...")
153
+
154
+ # Determine if we should use search for this question
155
+ should_search = (
156
+ "what is" in question.lower() or
157
+ "who is" in question.lower() or
158
+ "when" in question.lower() or
159
+ "where" in question.lower() or
160
+ "how many" in question.lower() or
161
+ "which" in question.lower()
162
+ )
163
+
164
+ search_results = None
165
+ if should_search and self.search_tool:
166
+ print(f"Searching for information about: {question}")
167
+ search_response = self.search_tool(question, max_results=3)
168
+ search_results = search_response.get("results", [])
169
+ print(f"Found {len(search_results)} search results")
170
+
171
+ # Create prompt and generate response
172
+ prompt = self.create_prompt(question, search_results)
173
+ system_prompt = self.create_system_prompt()
174
+
175
+ print("Generating response with LLM...")
176
+ response = self.model.generate(prompt, system_prompt)
177
+
178
+ # Clean up the response
179
+ answer = response.strip()
180
+
181
+ # Remove common prefixes that models tend to add
182
+ prefixes_to_remove = [
183
+ "Answer:", "The answer is:", "I believe", "I think",
184
+ "Based on", "According to", "The answer would be"
185
+ ]
186
+
187
+ for prefix in prefixes_to_remove:
188
+ if answer.startswith(prefix):
189
+ answer = answer[len(prefix):].strip()
190
+
191
+ # Remove quotes if they wrap the entire answer
192
+ if (answer.startswith('"') and answer.endswith('"')) or \
193
+ (answer.startswith("'") and answer.endswith("'")):
194
+ answer = answer[1:-1].strip()
195
+
196
+ print(f"Final answer: {answer[:100]}...")
197
+ return answer
198
 
199
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
200
  """
201
+ Fetches all questions, runs the Agent on them, submits all answers,
202
  and displays the results.
203
  """
204
  # --- Determine HF Space Runtime URL and Repo URL ---
 
215
  questions_url = f"{api_url}/questions"
216
  submit_url = f"{api_url}/submit"
217
 
218
+ # 1. Instantiate Agent with Gemini model and DuckDuckGo search
219
  try:
220
+ # Get API key from environment variable
221
+ api_key = os.getenv("GEMINI_API_KEY")
222
+ if not api_key:
223
+ return "Error: GEMINI_API_KEY environment variable not found. Please set it in your Space settings.", None
224
+
225
+ model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=api_key)
226
+ agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
227
  except Exception as e:
228
  print(f"Error instantiating agent: {e}")
229
  return f"Error initializing agent: {e}", None
230
+
231
+ # In the case of an app running as a hugging Face space, this link points toward your codebase
232
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
233
  print(agent_code)
234
 
 
264
  print(f"Skipping item with missing task_id or question: {item}")
265
  continue
266
  try:
267
+ print(f"Processing task {task_id}: {question_text[:50]}...")
268
  submitted_answer = agent(question_text)
269
+ # Important: Use "model_answer" as the key, not "submitted_answer"
270
+ answers_payload.append({"task_id": task_id, "model_answer": submitted_answer})
271
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
272
+ print(f"Answer for task {task_id}: {submitted_answer[:50]}...")
273
  except Exception as e:
274
  print(f"Error running agent on task {task_id}: {e}")
275
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
278
  print("Agent did not produce any answers to submit.")
279
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
280
 
281
+ # 4. Submit answers directly as a list of dictionaries
 
 
 
 
 
282
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
283
  try:
284
+ # Important: Submit the answers_payload directly as JSON
285
+ response = requests.post(submit_url, json=answers_payload, timeout=60)
286
  response.raise_for_status()
287
  result_data = response.json()
288
  final_status = (
289
  f"Submission Successful!\n"
290
+ f"Score: {result_data.get('score', 'N/A')}% "
 
291
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
 
292
  )
293
  print("Submission successful.")
294
  results_df = pd.DataFrame(results_log)
 
323
 
324
  # --- Build Gradio Interface using Blocks ---
325
  with gr.Blocks() as demo:
326
+ gr.Markdown("# Gemini Agent for GAIA Benchmark")
327
  gr.Markdown(
328
  """
329
  **Instructions:**
330
+ 1. Make sure you have set the GEMINI_API_KEY environment variable in your Space settings.
331
+ 2. Log in to your Hugging Face account using the button below.
332
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and submit answers.
333
+
334
+ This agent uses:
335
+ - Gemini 2.0 Flash Lite model for reasoning
336
+ - DuckDuckGo search for retrieving information
 
 
337
  """
338
  )
339
 
 
342
  run_button = gr.Button("Run Evaluation & Submit All Answers")
343
 
344
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
345
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
346
 
347
+ # Add a single question test feature
348
+ gr.Markdown("## Test Single Question")
349
+ with gr.Row():
350
+ question_in = gr.Textbox(label="Question", lines=3)
351
+ answer_out = gr.Textbox(label="Answer", lines=3, interactive=False)
352
+
353
+ test_btn = gr.Button("Test Question", variant="secondary")
354
+
355
+ # Add a function to test a single question
356
+ def test_single_question(question):
357
+ try:
358
+ api_key = os.getenv("GEMINI_API_KEY")
359
+ if not api_key:
360
+ return "Error: GEMINI_API_KEY environment variable not found"
361
+
362
+ model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=AIzaSyAhmwogxZFBtt7_OUsKQGNeOYF7ced39bM)
363
+ agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
364
+ answer = agent(question)
365
+ return answer
366
+ except Exception as e:
367
+ return f"Error: {str(e)}"
368
+
369
  run_button.click(
370
  fn=run_and_submit_all,
371
+ inputs=[gr.OAuthProfile()],
372
  outputs=[status_output, results_table]
373
  )
374
+
375
+ test_btn.click(
376
+ fn=test_single_question,
377
+ inputs=[question_in],
378
+ outputs=[answer_out]
379
+ )
380
 
381
  if __name__ == "__main__":
382
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
399
 
400
  print("-"*(60 + len(" App Starting ")) + "\n")
401
 
402
+ print("Launching Gradio Interface for Gemini Agent Evaluation...")
403
+ demo.launch(debug=True, share=False)
404
+
405
+