Raj989898 commited on
Commit
df815fa
Β·
verified Β·
1 Parent(s): 600fd70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -189
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
@@ -6,183 +7,176 @@ import tempfile
6
 
7
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
8
 
9
- SYSTEM_PROMPT = """You are an expert question-answering agent participating in the GAIA benchmark.
10
 
11
- CRITICAL RULES:
12
- 1. Your final response must be ONLY the answer β€” no explanation, no preamble, no "The answer is", no "FINAL ANSWER:"
13
- 2. Be as concise as possible: just the name, number, word, or phrase requested
14
- 3. For numbers: use digits (e.g., "42" not "forty-two") unless the question asks for words
15
- 4. For lists: separate items with commas
16
- 5. Always search the web for factual questions you're not 100% sure about
17
- 6. For files (Excel, Python, etc.): download them and analyze carefully
18
- 7. Think carefully before answering β€” exact match grading is used
19
- """
 
 
 
 
 
 
 
 
 
20
 
21
- class BasicAgent:
22
- def __init__(self):
23
- print("Initializing GAIA Agent...")
24
- from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, tool
25
- import urllib.request
26
-
27
- # Tool to download task files from the API
28
- @tool
29
- def download_task_file(task_id: str) -> str:
30
- """
31
- Downloads a file associated with a GAIA task and saves it locally.
32
- Returns the local file path where it was saved.
33
- Args:
34
- task_id: The GAIA task ID whose file you want to download
35
- """
36
- url = f"{DEFAULT_API_URL}/files/{task_id}"
37
- try:
38
- resp = requests.get(url, timeout=30)
39
- if resp.status_code == 200:
40
- # Try to get filename from headers
41
- cd = resp.headers.get("content-disposition", "")
42
- fname = "task_file"
43
- if "filename=" in cd:
44
- fname = cd.split("filename=")[-1].strip().strip('"')
45
-
46
- ext = os.path.splitext(fname)[-1] or ".bin"
47
- tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
48
- tmp.write(resp.content)
49
- tmp.close()
50
- return f"File saved to: {tmp.name} (original name: {fname})"
51
- else:
52
- return f"No file found for task_id={task_id} (status {resp.status_code})"
53
- except Exception as e:
54
- return f"Error downloading file: {e}"
55
-
56
- @tool
57
- def read_excel_file(file_path: str) -> str:
58
- """
59
- Reads an Excel file and returns its contents as a string summary.
60
- Args:
61
- file_path: Local path to the Excel file
62
- """
63
- try:
64
- df = pd.read_excel(file_path)
65
- return f"Shape: {df.shape}\nColumns: {list(df.columns)}\n\nFirst rows:\n{df.head(20).to_string()}\n\nFull data:\n{df.to_string()}"
66
- except Exception as e:
67
- return f"Error reading Excel: {e}"
68
-
69
- @tool
70
- def read_python_file(file_path: str) -> str:
71
- """
72
- Reads a Python file and returns its contents.
73
- Args:
74
- file_path: Local path to the Python file
75
- """
76
- try:
77
- with open(file_path, "r") as f:
78
- return f.read()
79
- except Exception as e:
80
- return f"Error reading file: {e}"
81
-
82
- @tool
83
- def fetch_webpage(url: str) -> str:
84
- """
85
- Fetches the text content of a webpage.
86
- Args:
87
- url: The URL to fetch
88
- """
89
- try:
90
- headers = {"User-Agent": "Mozilla/5.0"}
91
- resp = requests.get(url, headers=headers, timeout=15)
92
- resp.raise_for_status()
93
- # Basic HTML stripping
94
- text = resp.text
95
- import re
96
- text = re.sub(r'<[^>]+>', ' ', text)
97
- text = re.sub(r'\s+', ' ', text).strip()
98
- return text[:5000]
99
- except Exception as e:
100
- return f"Error fetching webpage: {e}"
101
-
102
- # Try stronger models in order of preference
103
- model = None
104
- for model_id in [
105
- "Qwen/Qwen2.5-72B-Instruct",
106
- "meta-llama/Llama-3.3-70B-Instruct",
107
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
108
- "meta-llama/Meta-Llama-3-8B-Instruct",
109
- ]:
110
- try:
111
- from smolagents import InferenceClientModel
112
- model = InferenceClientModel(model_id=model_id)
113
- # Quick test
114
- print(f"Using model: {model_id}")
115
- break
116
- except Exception as e:
117
- print(f"Model {model_id} failed: {e}, trying next...")
118
-
119
- if model is None:
120
- raise RuntimeError("No model could be loaded!")
121
-
122
- self.agent = CodeAgent(
123
- tools=[
124
- DuckDuckGoSearchTool(),
125
- download_task_file,
126
- read_excel_file,
127
- read_python_file,
128
- fetch_webpage,
129
- ],
130
- model=model,
131
- max_steps=10,
132
- verbosity_level=1,
133
- )
134
- print("Agent ready.")
135
 
136
- def __call__(self, question: str, task_id: str = "") -> str:
137
- print(f"\n{'='*60}")
138
- print(f"Task ID: {task_id}")
139
- print(f"Question: {question[:200]}")
140
- print('='*60)
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- try:
143
- # Include task_id in prompt so agent can download files if needed
144
- task_context = f"[Task ID for file download: {task_id}]\n\n" if task_id else ""
145
-
146
- prompt = (
147
- f"{task_context}"
148
- f"Question: {question}\n\n"
149
- f"Instructions: Answer with ONLY the final answer. "
150
- f"No explanation. No prefix like 'The answer is'. "
151
- f"Just the bare answer that would pass exact-match grading. "
152
- f"If the question involves a file, use the download_task_file tool with the task ID above."
153
- )
154
 
155
- result = self.agent.run(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
- if result is None:
158
- return ""
 
 
 
 
 
 
 
 
 
159
 
160
- answer = str(result).strip()
161
 
162
- # Strip common unwanted prefixes
163
- for prefix in [
164
- "FINAL ANSWER:", "Final Answer:", "Answer:",
165
- "The answer is:", "The answer is",
166
- "**Answer:**", "**Final Answer:**",
167
- ]:
168
- if answer.lower().startswith(prefix.lower()):
169
- answer = answer[len(prefix):].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
- # Take only first line
172
- answer = answer.split("\n")[0].strip()
173
- # Strip surrounding quotes/asterisks
174
- answer = answer.strip('"').strip("'").strip("*").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- print(f"ANSWER: {answer}")
 
 
 
 
 
 
 
 
 
 
 
 
177
  return answer
178
 
179
  except Exception as e:
180
- print(f"Agent error: {e}")
181
- import traceback
182
- traceback.print_exc()
183
  return ""
184
 
185
 
 
 
186
  def run_and_submit_all(profile: gr.OAuthProfile | None):
187
  space_id = os.getenv("SPACE_ID", "")
188
 
@@ -190,18 +184,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
190
  return "⚠️ Please log in with Hugging Face first.", None
191
 
192
  username = profile.username
193
- print(f"Logged in as: {username}")
194
-
195
- api_url = DEFAULT_API_URL
196
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
197
 
198
  try:
199
  agent = BasicAgent()
200
- except Exception as e:
201
- return f"❌ Error initializing agent: {e}", None
202
 
203
  try:
204
- resp = requests.get(f"{api_url}/questions", timeout=15)
205
  resp.raise_for_status()
206
  questions_data = resp.json()
207
  print(f"Fetched {len(questions_data)} questions.")
@@ -217,34 +208,29 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
217
  if not task_id or not question_text:
218
  continue
219
 
220
- print(f"\n[{i+1}/{len(questions_data)}] Task: {task_id}")
221
  try:
222
  submitted_answer = agent(question_text, task_id=task_id)
223
  except Exception as e:
224
  submitted_answer = ""
225
- print(f"Error on task {task_id}: {e}")
226
 
227
- answers_payload.append({
228
- "task_id": task_id,
229
- "submitted_answer": submitted_answer
230
- })
231
  results_log.append({
232
  "Task ID": task_id,
233
- "Question": question_text[:120] + "..." if len(question_text) > 120 else question_text,
234
  "Submitted Answer": submitted_answer,
235
  })
236
 
237
  if not answers_payload:
238
  return "❌ No answers generated.", None
239
 
240
- submission_data = {
241
- "username": username,
242
- "agent_code": agent_code,
243
- "answers": answers_payload,
244
- }
245
-
246
  try:
247
- resp = requests.post(f"{api_url}/submit", json=submission_data, timeout=120)
 
 
 
 
248
  resp.raise_for_status()
249
  result = resp.json()
250
  status = (
@@ -254,7 +240,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
254
  f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
255
  f"Message: {result.get('message', '')}"
256
  )
257
- print(status)
258
  return status, pd.DataFrame(results_log)
259
  except Exception as e:
260
  return f"❌ Submission failed: {e}", pd.DataFrame(results_log)
@@ -262,17 +247,17 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
262
 
263
  with gr.Blocks() as demo:
264
  gr.Markdown("# πŸ€– GAIA Agent β€” Final Assignment")
265
- gr.Markdown("Log in with your Hugging Face account, then click **Run & Submit** to evaluate your agent.")
266
-
 
 
 
 
267
  gr.LoginButton()
268
  run_button = gr.Button("πŸš€ Run Agent & Submit Answers", variant="primary")
269
  status_output = gr.Textbox(label="Status / Score", lines=6, interactive=False)
270
  results_table = gr.DataFrame(label="Results", wrap=True)
271
-
272
- run_button.click(
273
- fn=run_and_submit_all,
274
- outputs=[status_output, results_table],
275
- )
276
 
277
  if __name__ == "__main__":
278
  demo.launch()
 
1
  import os
2
+ import re
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
 
7
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
+ # ── file helpers ──────────────────────────────────────────────────────────────
11
 
12
+ def download_task_file(task_id: str):
13
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
14
+ try:
15
+ resp = requests.get(url, timeout=30)
16
+ if resp.status_code != 200:
17
+ return None, None
18
+ cd = resp.headers.get("content-disposition", "")
19
+ fname = "task_file"
20
+ if "filename=" in cd:
21
+ fname = cd.split("filename=")[-1].strip().strip('"')
22
+ ext = os.path.splitext(fname)[-1] or ".bin"
23
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
24
+ tmp.write(resp.content)
25
+ tmp.close()
26
+ return tmp.name, fname
27
+ except Exception as e:
28
+ print(f"File download error: {e}")
29
+ return None, None
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ def read_file_contents(local_path: str, fname: str) -> str:
33
+ ext = os.path.splitext(fname)[-1].lower()
34
+ try:
35
+ if ext in (".xlsx", ".xls"):
36
+ df = pd.read_excel(local_path)
37
+ return f"Excel file β€” shape {df.shape}\nColumns: {list(df.columns)}\n\n{df.to_string()}"
38
+ elif ext == ".csv":
39
+ df = pd.read_csv(local_path)
40
+ return f"CSV β€” shape {df.shape}\nColumns: {list(df.columns)}\n\n{df.to_string()}"
41
+ elif ext in (".py", ".txt", ".md", ".json"):
42
+ with open(local_path) as f:
43
+ return f.read()
44
+ else:
45
+ return f"Binary file: {fname}"
46
+ except Exception as e:
47
+ return f"Error reading file: {e}"
48
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ def search_web(query: str) -> str:
51
+ try:
52
+ from duckduckgo_search import DDGS
53
+ with DDGS() as ddgs:
54
+ results = list(ddgs.text(query, max_results=5))
55
+ if not results:
56
+ return "No results found."
57
+ out = []
58
+ for r in results:
59
+ out.append(f"Title: {r.get('title','')}\nSnippet: {r.get('body','')}")
60
+ return "\n\n".join(out)
61
+ except Exception as e:
62
+ return f"Search error: {e}"
63
+
64
 
65
+ def clean_answer(text: str) -> str:
66
+ text = text.strip()
67
+ for prefix in [
68
+ "FINAL ANSWER:", "Final Answer:", "Answer:",
69
+ "The answer is:", "The answer is",
70
+ "**Answer:**", "**Final Answer:**",
71
+ ]:
72
+ if text.lower().startswith(prefix.lower()):
73
+ text = text[len(prefix):].strip()
74
+ text = text.split("\n")[0].strip().strip('"').strip("'").strip("*").strip()
75
+ return text
76
 
 
77
 
78
+ # ── Gemini agent ──────────────────────────────────────────────────────────────
79
+
80
+ class BasicAgent:
81
+ def __init__(self):
82
+ self.api_key = os.getenv("GEMINI_API_KEY", "")
83
+ if not self.api_key:
84
+ raise RuntimeError(
85
+ "GEMINI_API_KEY secret is not set!\n"
86
+ "Get a FREE key at https://aistudio.google.com/app/apikey\n"
87
+ "Then add it in Space Settings β†’ Secrets β†’ GEMINI_API_KEY"
88
+ )
89
+ self.endpoint = (
90
+ f"https://generativelanguage.googleapis.com/v1beta/models/"
91
+ f"gemini-1.5-flash:generateContent?key={self.api_key}"
92
+ )
93
+ print("Agent ready (Gemini 1.5 Flash β€” free tier)")
94
+
95
+ def _ask_gemini(self, prompt: str) -> str:
96
+ body = {
97
+ "contents": [{"parts": [{"text": prompt}]}],
98
+ "generationConfig": {
99
+ "temperature": 0.1,
100
+ "maxOutputTokens": 512,
101
+ },
102
+ "systemInstruction": {
103
+ "parts": [{
104
+ "text": (
105
+ "You are a precise answer extraction agent for the GAIA benchmark. "
106
+ "Reply with ONLY the final answer β€” no explanation, no preamble, "
107
+ "no 'The answer is', no 'FINAL ANSWER'. "
108
+ "Give the shortest correct answer: a name, number, word, or short phrase. "
109
+ "Exact match grading is used."
110
+ )
111
+ }]
112
+ }
113
+ }
114
+ resp = requests.post(self.endpoint, json=body, timeout=60)
115
+ resp.raise_for_status()
116
+ data = resp.json()
117
+ try:
118
+ return data["candidates"][0]["content"]["parts"][0]["text"].strip()
119
+ except (KeyError, IndexError) as e:
120
+ print(f"Gemini parse error: {e} | Response: {data}")
121
+ return ""
122
 
123
+ def __call__(self, question: str, task_id: str = "") -> str:
124
+ print(f"\n{'='*60}\nTask: {task_id}\nQ: {question[:200]}")
125
+
126
+ # 1. Try to get attached file
127
+ file_context = ""
128
+ if task_id:
129
+ local_path, fname = download_task_file(task_id)
130
+ if local_path and fname:
131
+ contents = read_file_contents(local_path, fname)
132
+ file_context = f"\n\n--- Attached file ({fname}) ---\n{contents[:3000]}\n---\n"
133
+ print(f"Downloaded: {fname}")
134
+
135
+ # 2. Web search for factual questions
136
+ search_context = ""
137
+ search_triggers = [
138
+ "who", "what", "when", "where", "how many", "which", "name",
139
+ "album", "studio", "olympic", "pitcher", "actor", "article",
140
+ "specimen", "country", "competition", "veterinarian", "surname",
141
+ "wikipedia", "published", "season", "walks", "bats", "malko",
142
+ "vietnamese", "kuznetzov", "tamai", "petersen", "polish",
143
+ ]
144
+ if any(t in question.lower() for t in search_triggers) and not file_context:
145
+ print("Searching web...")
146
+ search_results = search_web(question[:200])
147
+ if "error" not in search_results.lower():
148
+ search_context = f"\n\n--- Web search results ---\n{search_results[:2000]}\n---\n"
149
+
150
+ # 3. Build prompt and ask Gemini
151
+ prompt = (
152
+ f"Question: {question}"
153
+ f"{file_context}"
154
+ f"{search_context}"
155
+ "\n\nAnswer with ONLY the final answer. No explanation whatsoever."
156
+ )
157
 
158
+ try:
159
+ answer = self._ask_gemini(prompt)
160
+
161
+ # If answer is too long, ask Gemini to extract just the key part
162
+ if len(answer.split()) > 15:
163
+ extract_prompt = (
164
+ f"Extract ONLY the final short answer (name, number, or brief phrase) "
165
+ f"from this text. Nothing else:\n\n{answer}"
166
+ )
167
+ answer = self._ask_gemini(extract_prompt)
168
+
169
+ answer = clean_answer(answer)
170
+ print(f"ANSWER: '{answer}'")
171
  return answer
172
 
173
  except Exception as e:
174
+ print(f"Gemini error: {e}")
 
 
175
  return ""
176
 
177
 
178
+ # ── gradio app ────────────────────────────────────────────────────────────────
179
+
180
  def run_and_submit_all(profile: gr.OAuthProfile | None):
181
  space_id = os.getenv("SPACE_ID", "")
182
 
 
184
  return "⚠️ Please log in with Hugging Face first.", None
185
 
186
  username = profile.username
 
 
 
187
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
188
 
189
  try:
190
  agent = BasicAgent()
191
+ except RuntimeError as e:
192
+ return f"❌ {e}", None
193
 
194
  try:
195
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
196
  resp.raise_for_status()
197
  questions_data = resp.json()
198
  print(f"Fetched {len(questions_data)} questions.")
 
208
  if not task_id or not question_text:
209
  continue
210
 
211
+ print(f"\n[{i+1}/{len(questions_data)}]")
212
  try:
213
  submitted_answer = agent(question_text, task_id=task_id)
214
  except Exception as e:
215
  submitted_answer = ""
216
+ print(f"Error: {e}")
217
 
218
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
 
 
 
219
  results_log.append({
220
  "Task ID": task_id,
221
+ "Question": question_text[:120] + ("..." if len(question_text) > 120 else ""),
222
  "Submitted Answer": submitted_answer,
223
  })
224
 
225
  if not answers_payload:
226
  return "❌ No answers generated.", None
227
 
 
 
 
 
 
 
228
  try:
229
+ resp = requests.post(
230
+ f"{DEFAULT_API_URL}/submit",
231
+ json={"username": username, "agent_code": agent_code, "answers": answers_payload},
232
+ timeout=120,
233
+ )
234
  resp.raise_for_status()
235
  result = resp.json()
236
  status = (
 
240
  f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
241
  f"Message: {result.get('message', '')}"
242
  )
 
243
  return status, pd.DataFrame(results_log)
244
  except Exception as e:
245
  return f"❌ Submission failed: {e}", pd.DataFrame(results_log)
 
247
 
248
  with gr.Blocks() as demo:
249
  gr.Markdown("# πŸ€– GAIA Agent β€” Final Assignment")
250
+ gr.Markdown(
251
+ "### Setup (one-time, free!)\n"
252
+ "1. Get a **free** Gemini API key at [aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey)\n"
253
+ "2. In your Space β†’ **Settings β†’ Secrets** β†’ add `GEMINI_API_KEY`\n"
254
+ "3. Log in below and click Run!"
255
+ )
256
  gr.LoginButton()
257
  run_button = gr.Button("πŸš€ Run Agent & Submit Answers", variant="primary")
258
  status_output = gr.Textbox(label="Status / Score", lines=6, interactive=False)
259
  results_table = gr.DataFrame(label="Results", wrap=True)
260
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
261
 
262
  if __name__ == "__main__":
263
  demo.launch()