bhotta commited on
Commit
5ebb577
·
verified ·
1 Parent(s): d6e9174

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -50
app.py CHANGED
@@ -2,76 +2,215 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel
 
 
6
 
7
- # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
- # --- OpenAI-Powered Agent Definition ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  class BasicAgent:
12
  def __init__(self):
13
- # 1. Get the key from the environment (Must be set in HF Space Secrets)
14
  api_key = os.getenv("OPENAI_API_KEY")
15
-
16
  if not api_key:
17
- raise ValueError("OPENAI_API_KEY is missing! Add it to your Space Secrets.")
18
 
19
- # 2. Initialize the Model (GPT-4o is recommended for GAIA tasks)
20
  self.model = OpenAIServerModel(
21
- model_id="gpt-4o",
22
  api_key=api_key
23
  )
24
 
25
- # 3. Initialize the Agent with tools
26
  self.agent = CodeAgent(
27
- tools=[DuckDuckGoSearchTool()],
 
 
 
 
 
 
28
  model=self.model,
29
- add_base_tools=True
 
30
  )
31
  print("✅ OpenAI-powered Agent initialized.")
32
 
33
- def __call__(self, question: str) -> str:
34
  print(f"DEBUG: Agent received question: {question[:100]}...")
35
-
36
- # Formatting the prompt for precise GAIA evaluation
37
  prompt = (
38
- f"You are a helpful agent. Task: {question}\n\n"
39
- "Provide ONLY the final direct answer. No explanations, no 'The answer is...', "
40
- "just the value or fact requested."
 
 
 
 
 
 
41
  )
42
-
43
  try:
44
  result = self.agent.run(prompt)
45
  return str(result).strip()
46
  except Exception as e:
47
- print(f"❌ Error during agent execution: {e}")
48
  return "Error finding answer."
49
 
 
 
 
50
  def run_and_submit_all(profile: gr.OAuthProfile | None):
51
- """
52
- Fetches GAIA questions, runs the BasicAgent, and submits to the leaderboard.
53
- """
54
- # 1. Check Login
55
  if profile:
56
  username = f"{profile.username}"
57
  print(f"Logged in as: {username}")
58
  else:
59
- return "Please Login to Hugging Face with the button above first.", None
60
 
61
- # 2. Setup URLs and Paths
62
- space_id = os.getenv("SPACE_ID")
63
  api_url = DEFAULT_API_URL
64
  questions_url = f"{api_url}/questions"
65
  submit_url = f"{api_url}/submit"
66
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
67
 
68
- # 3. Instantiate Agent
69
  try:
70
  agent = BasicAgent()
71
  except Exception as e:
72
  return f"Initialization Failed: {e}", None
73
 
74
- # 4. Fetch Questions
75
  try:
76
  response = requests.get(questions_url, timeout=15)
77
  response.raise_for_status()
@@ -79,26 +218,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
79
  except Exception as e:
80
  return f"Error fetching questions: {e}", None
81
 
82
- # 5. Run Agent on Questions
83
  results_log = []
84
  answers_payload = []
85
-
86
- # NOTE: This loop can take several minutes!
87
  for item in questions_data:
88
- task_id = item.get("task_id")
89
- question_text = item.get("question")
90
-
91
  try:
92
- submitted_answer = agent(question_text)
93
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
94
- results_log.append({"Task ID": task_id, "Question": question_text, "Answer": submitted_answer})
95
  except Exception as e:
96
- results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"Error: {e}"})
97
 
98
- # 6. Submit to Leaderboard
99
  submission_data = {
100
- "username": username.strip(),
101
- "agent_code": agent_code,
102
  "answers": answers_payload
103
  }
104
 
@@ -106,7 +241,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
106
  response = requests.post(submit_url, json=submission_data, timeout=60)
107
  response.raise_for_status()
108
  res = response.json()
109
-
110
  status = (
111
  f"Submission Successful!\n"
112
  f"Score: {res.get('score')}% ({res.get('correct_count')}/{res.get('total_attempted')})\n"
@@ -116,21 +250,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
116
  except Exception as e:
117
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
118
 
119
- # --- Gradio UI ---
120
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
121
  gr.Markdown("# 🤖 GAIA Agent Evaluation")
122
  gr.Markdown("Click Login, then Run to evaluate your agent on the GAIA dataset.")
123
-
124
  gr.LoginButton()
125
  run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")
126
-
127
  status_output = gr.Textbox(label="Status", lines=4)
128
  results_table = gr.DataFrame(label="Agent Performance Log")
129
-
130
- run_button.click(
131
- fn=run_and_submit_all,
132
- outputs=[status_output, results_table]
133
- )
134
 
135
  if __name__ == "__main__":
136
- demo.launch()
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ from smolagents import CodeAgent, OpenAIServerModel, tool
6
+ from openai import OpenAI
7
+ import base64
8
 
 
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
+ # --- Custom Tools ---
12
+
13
+ @tool
14
+ def search_web(query: str) -> str:
15
+ """Search the web using a query string. Returns search results as text."""
16
+ try:
17
+ from duckduckgo_search import DDGS
18
+ with DDGS() as ddgs:
19
+ results = list(ddgs.text(query, max_results=5))
20
+ return "\n\n".join(
21
+ f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body']}"
22
+ for r in results
23
+ )
24
+ except Exception as e:
25
+ return f"Search failed: {e}"
26
+
27
+ @tool
28
+ def visit_url(url: str) -> str:
29
+ """Fetch the text content of a webpage at the given URL."""
30
+ try:
31
+ headers = {"User-Agent": "Mozilla/5.0"}
32
+ resp = requests.get(url, headers=headers, timeout=15)
33
+ resp.raise_for_status()
34
+ # Basic HTML stripping
35
+ import re
36
+ text = re.sub(r'<[^>]+>', ' ', resp.text)
37
+ text = re.sub(r'\s+', ' ', text)
38
+ return text[:5000] # limit to 5000 chars
39
+ except Exception as e:
40
+ return f"Failed to fetch URL: {e}"
41
+
42
+ @tool
43
+ def wikipedia_search(topic: str) -> str:
44
+ """Search Wikipedia for a topic and return a summary."""
45
+ try:
46
+ url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{topic.replace(' ', '_')}"
47
+ resp = requests.get(url, timeout=10)
48
+ resp.raise_for_status()
49
+ data = resp.json()
50
+ return data.get("extract", "No summary found.")
51
+ except Exception as e:
52
+ return f"Wikipedia search failed: {e}"
53
+
54
+ @tool
55
+ def analyze_image_from_url(image_url: str, question: str) -> str:
56
+ """Analyze an image from a URL using GPT-4o vision and answer a question about it."""
57
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
58
+ try:
59
+ response = client.chat.completions.create(
60
+ model="gpt-4o",
61
+ messages=[{
62
+ "role": "user",
63
+ "content": [
64
+ {"type": "image_url", "image_url": {"url": image_url}},
65
+ {"type": "text", "text": question}
66
+ ]
67
+ }],
68
+ max_tokens=500
69
+ )
70
+ return response.choices[0].message.content
71
+ except Exception as e:
72
+ return f"Image analysis failed: {e}"
73
+
74
+ @tool
75
+ def analyze_task_file(task_id: str, question: str) -> str:
76
+ """
77
+ Download and analyze a file attached to a GAIA task.
78
+ Returns analysis result based on the question.
79
+ task_id: the GAIA task ID
80
+ question: what to ask about the file
81
+ """
82
+ api_url = DEFAULT_API_URL
83
+ file_url = f"{api_url}/files/{task_id}"
84
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
85
+
86
+ try:
87
+ resp = requests.get(file_url, timeout=30)
88
+ resp.raise_for_status()
89
+
90
+ content_type = resp.headers.get("content-type", "")
91
+ file_bytes = resp.content
92
+
93
+ # Image files → vision
94
+ if any(x in content_type for x in ["image", "png", "jpeg", "jpg", "gif", "webp"]):
95
+ b64 = base64.b64encode(file_bytes).decode()
96
+ ext = content_type.split("/")[-1]
97
+ data_url = f"data:{content_type};base64,{b64}"
98
+ response = client.chat.completions.create(
99
+ model="gpt-4o",
100
+ messages=[{
101
+ "role": "user",
102
+ "content": [
103
+ {"type": "image_url", "image_url": {"url": data_url}},
104
+ {"type": "text", "text": question}
105
+ ]
106
+ }],
107
+ max_tokens=500
108
+ )
109
+ return response.choices[0].message.content
110
+
111
+ # Text/CSV/code files
112
+ elif any(x in content_type for x in ["text", "csv", "json", "html"]):
113
+ text_content = file_bytes.decode("utf-8", errors="ignore")[:8000]
114
+ response = client.chat.completions.create(
115
+ model="gpt-4o",
116
+ messages=[{
117
+ "role": "user",
118
+ "content": f"File content:\n{text_content}\n\nQuestion: {question}"
119
+ }],
120
+ max_tokens=500
121
+ )
122
+ return response.choices[0].message.content
123
+
124
+ # Audio → Whisper transcription
125
+ elif any(x in content_type for x in ["audio", "mp3", "wav", "m4a", "ogg"]):
126
+ import tempfile
127
+ suffix = "." + content_type.split("/")[-1]
128
+ with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as f:
129
+ f.write(file_bytes)
130
+ f.flush()
131
+ transcript = client.audio.transcriptions.create(
132
+ model="whisper-1",
133
+ file=open(f.name, "rb")
134
+ )
135
+ return f"Transcript: {transcript.text}\n\nAnswer to '{question}': {transcript.text}"
136
+
137
+ else:
138
+ return f"File downloaded ({len(file_bytes)} bytes, type: {content_type}) but format not supported for analysis."
139
+
140
+ except Exception as e:
141
+ return f"File analysis failed: {e}"
142
+
143
+
144
+ # --- Agent ---
145
+
146
  class BasicAgent:
147
  def __init__(self):
 
148
  api_key = os.getenv("OPENAI_API_KEY")
 
149
  if not api_key:
150
+ raise ValueError("OPENAI_API_KEY is missing!")
151
 
 
152
  self.model = OpenAIServerModel(
153
+ model_id="gpt-4o",
154
  api_key=api_key
155
  )
156
 
 
157
  self.agent = CodeAgent(
158
+ tools=[
159
+ search_web,
160
+ visit_url,
161
+ wikipedia_search,
162
+ analyze_image_from_url,
163
+ analyze_task_file,
164
+ ],
165
  model=self.model,
166
+ add_base_tools=True,
167
+ max_steps=12,
168
  )
169
  print("✅ OpenAI-powered Agent initialized.")
170
 
171
+ def __call__(self, question: str, task_id: str = "") -> str:
172
  print(f"DEBUG: Agent received question: {question[:100]}...")
173
+
 
174
  prompt = (
175
+ f"You are a precise research agent solving GAIA benchmark tasks.\n"
176
+ f"Task ID: {task_id}\n"
177
+ f"Task: {question}\n\n"
178
+ f"Instructions:\n"
179
+ f"- If the task mentions a file or attachment, use analyze_task_file(task_id='{task_id}', question=...) first.\n"
180
+ f"- If the task mentions a YouTube URL, search for information about its content.\n"
181
+ f"- Use wikipedia_search for factual lookups before broader web search.\n"
182
+ f"- Provide ONLY the final direct answer. No explanations, no 'The answer is...', "
183
+ f"just the exact value or fact requested."
184
  )
185
+
186
  try:
187
  result = self.agent.run(prompt)
188
  return str(result).strip()
189
  except Exception as e:
190
+ print(f"❌ Error: {e}")
191
  return "Error finding answer."
192
 
193
+
194
+ # --- Gradio + Submission ---
195
+
196
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
197
  if profile:
198
  username = f"{profile.username}"
199
  print(f"Logged in as: {username}")
200
  else:
201
+ return "Please Login to Hugging Face first.", None
202
 
203
+ space_id = os.getenv("SPACE_ID")
 
204
  api_url = DEFAULT_API_URL
205
  questions_url = f"{api_url}/questions"
206
  submit_url = f"{api_url}/submit"
207
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
208
 
 
209
  try:
210
  agent = BasicAgent()
211
  except Exception as e:
212
  return f"Initialization Failed: {e}", None
213
 
 
214
  try:
215
  response = requests.get(questions_url, timeout=15)
216
  response.raise_for_status()
 
218
  except Exception as e:
219
  return f"Error fetching questions: {e}", None
220
 
 
221
  results_log = []
222
  answers_payload = []
223
+
 
224
  for item in questions_data:
225
+ task_id = item.get("task_id", "")
226
+ question_text = item.get("question", "")
 
227
  try:
228
+ submitted_answer = agent(question_text, task_id=task_id)
229
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
230
+ results_log.append({"Task ID": task_id, "Question": question_text[:80], "Answer": submitted_answer})
231
  except Exception as e:
232
+ results_log.append({"Task ID": task_id, "Question": question_text[:80], "Answer": f"Error: {e}"})
233
 
 
234
  submission_data = {
235
+ "username": username.strip(),
236
+ "agent_code": agent_code,
237
  "answers": answers_payload
238
  }
239
 
 
241
  response = requests.post(submit_url, json=submission_data, timeout=60)
242
  response.raise_for_status()
243
  res = response.json()
 
244
  status = (
245
  f"Submission Successful!\n"
246
  f"Score: {res.get('score')}% ({res.get('correct_count')}/{res.get('total_attempted')})\n"
 
250
  except Exception as e:
251
  return f"Submission Failed: {e}", pd.DataFrame(results_log)
252
 
253
+
254
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
255
  gr.Markdown("# 🤖 GAIA Agent Evaluation")
256
  gr.Markdown("Click Login, then Run to evaluate your agent on the GAIA dataset.")
 
257
  gr.LoginButton()
258
  run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")
 
259
  status_output = gr.Textbox(label="Status", lines=4)
260
  results_table = gr.DataFrame(label="Agent Performance Log")
261
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
262
 
263
  if __name__ == "__main__":
264
+ demo.launch(ssr_mode=False)