Raj989898 commited on
Commit
d5a51fc
·
verified ·
1 Parent(s): 67eace3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -47
app.py CHANGED
@@ -1,11 +1,10 @@
1
- # app.py — safe GAIA runner (paste entire file, replace existing)
2
  import os
3
  import time
4
  import requests
5
  import pandas as pd
6
  import gradio as gr
7
 
8
- # ddgs (DuckDuckGo search) — safe, lightweight
9
  try:
10
  from ddgs import DDGS
11
  except Exception:
@@ -14,15 +13,13 @@ except Exception:
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
  # -------------------------
17
- # LLM (Groq) caller (safe)
18
  # -------------------------
19
  _last_call = 0
20
- def call_groq(api_key, prompt, max_tokens=128):
21
- """
22
- Call Groq API. Raises on network/HTTP failure.
23
- """
24
  global _last_call
25
- # rate limit tiny delay
26
  if time.time() - _last_call < 1.5:
27
  time.sleep(1.5)
28
  _last_call = time.time()
@@ -41,27 +38,37 @@ def call_groq(api_key, prompt, max_tokens=128):
41
  r = requests.post(url, headers=headers, json=body, timeout=60)
42
  r.raise_for_status()
43
  data = r.json()
44
- # defensive
45
- return data["choices"][0]["message"]["content"].strip()
 
 
 
 
 
46
 
47
  # -------------------------
48
- # Clean / normalise answers
49
  # -------------------------
50
  def clean_answer(text: str) -> str:
51
  if text is None:
52
  return ""
53
  text = str(text).strip()
54
  prefixes = [
55
- "FINAL ANSWER:", "Final Answer:", "Answer:", "The answer is", "Result:"
 
 
 
 
56
  ]
57
  for p in prefixes:
58
  if text.lower().startswith(p.lower()):
59
- text = text[len(p):].strip()
60
  # only first line
61
  text = text.splitlines()[0].strip()
62
- # strip quotes/asterisks
63
  return text.strip('"').strip("'").strip("*").strip()
64
 
 
65
  # -------------------------
66
  # Web search (ddgs)
67
  # -------------------------
@@ -72,31 +79,25 @@ def web_search_snippets(query: str, max_results: int = 5) -> str:
72
  try:
73
  with DDGS() as ddgs:
74
  for i, r in enumerate(ddgs.text(query, max_results=max_results)):
75
- # r typically contains 'title' and 'body'
76
  title = r.get("title", "")
77
  body = r.get("body", "")
78
  snippets.append(f"{title} — {body}")
79
- if i+1 >= max_results:
80
  break
81
  except Exception:
82
- # swallow search errors
83
  return ""
84
  return "\n".join(snippets)
85
 
 
86
  # -------------------------
87
  # Download task file helper
88
  # -------------------------
89
  def download_task_file(task_id: str):
90
- """
91
- Returns (local_path, filename) or (None, None) if not found.
92
- Saves into /tmp and returns path.
93
- """
94
  try:
95
  url = f"{DEFAULT_API_URL}/files/{task_id}/download"
96
  r = requests.get(url, timeout=20)
97
  if r.status_code != 200:
98
  return None, None
99
- # try to derive filename
100
  cd = r.headers.get("content-disposition", "")
101
  filename = ""
102
  if "filename=" in cd:
@@ -112,14 +113,13 @@ def download_task_file(task_id: str):
112
  except Exception:
113
  return None, None
114
 
 
115
  # -------------------------
116
- # BasicAgent (safe, retry)
117
  # -------------------------
118
  class BasicAgent:
119
  def __init__(self):
120
- # pick up key if available
121
  self.key = os.getenv("GROQ_API_KEY", "").strip() or None
122
- # quick status printed to logs
123
  print("BasicAgent initializing. GROQ key present:", bool(self.key), "DDGS available:", DDGS is not None)
124
 
125
  def ask_llm(self, prompt: str, max_tokens: int = 128) -> str:
@@ -140,25 +140,20 @@ class BasicAgent:
140
  return ""
141
 
142
  def fallback_from_search(self, question: str) -> str:
143
- # If no key or LLM fails, return the first useful snippet from web search
144
  snippets = web_search_snippets(question, max_results=4)
145
  if not snippets:
146
  return ""
147
- # pick first non-empty line and clean
148
  for line in snippets.splitlines():
149
  s = line.strip()
150
  if len(s) > 3:
151
- # take first sentence-like chunk
152
  sentence = s.split(".")[0].strip()
153
  return clean_answer(sentence)
154
  return ""
155
 
156
  def __call__(self, question: str, task_id: str = "") -> str:
157
  print("Received question:", question[:200])
158
- # prepare short context (search + file)
159
  context_parts = []
160
 
161
- # file if present
162
  if task_id:
163
  lp, fn = download_task_file(task_id)
164
  if lp and fn:
@@ -167,17 +162,14 @@ class BasicAgent:
167
  txt = f.read(4000)
168
  context_parts.append(f"File {fn} contents (truncated):\n{txt}")
169
  except Exception:
170
- # binary file or not readable; ignore
171
  context_parts.append(f"File {fn} exists but not included in context.")
172
 
173
- # web snippets
174
  search_snip = web_search_snippets(question, max_results=4)
175
  if search_snip:
176
  context_parts.append("Web snippets:\n" + search_snip[:3000])
177
 
178
  context = "\n\n".join(context_parts).strip()
179
 
180
- # construct LLM prompt
181
  prompt = f"""You are solving a GAIA benchmark question. Return ONLY the final answer, nothing else.
182
 
183
  Question:
@@ -187,12 +179,10 @@ Context:
187
  {context}
188
 
189
  Return ONLY the final answer."""
190
- # try LLM if key present
191
  if self.key:
192
  ans = self.solve_with_retries(prompt, attempts=3)
193
  if ans:
194
  return ans
195
- # fallback try one more time shorter prompt
196
  try:
197
  ans2 = self.ask_llm("Extract the single final short answer only:\n" + prompt, max_tokens=48)
198
  ans2 = clean_answer(ans2)
@@ -201,13 +191,12 @@ Return ONLY the final answer."""
201
  except Exception as e:
202
  print("LLM final fallback failed:", e)
203
 
204
- # final fallback from web search
205
  fb = self.fallback_from_search(question)
206
  if fb:
207
  return fb
208
- # last resort: empty string (the grader tolerates empties)
209
  return ""
210
 
 
211
  # -------------------------
212
  # Evaluation runner used by UI
213
  # -------------------------
@@ -215,9 +204,17 @@ def run_and_submit_all(profile):
215
  if not profile:
216
  return "Please login first", None
217
 
218
- username = profile.username
219
- print("User:", username)
 
 
 
 
 
 
 
220
 
 
221
  agent = BasicAgent()
222
 
223
  try:
@@ -230,15 +227,15 @@ def run_and_submit_all(profile):
230
  for q in questions:
231
  task_id = q.get("task_id")
232
  question = q.get("question", "")
233
- ans = agent(question, task_id)
 
 
 
 
234
  answers.append({"task_id": task_id, "submitted_answer": ans})
235
  logs.append({"task_id": task_id, "question": question, "answer": ans})
236
 
237
- payload = {
238
- "username": username,
239
- "agent_code": "", # optional: your space repo link
240
- "answers": answers
241
- }
242
  try:
243
  resp = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=30)
244
  resp.raise_for_status()
@@ -248,6 +245,7 @@ def run_and_submit_all(profile):
248
  except Exception as e:
249
  return f"Submission failed: {e}", pd.DataFrame(logs)
250
 
 
251
  # -------------------------
252
  # UI (minimal)
253
  # -------------------------
@@ -256,9 +254,9 @@ with gr.Blocks() as demo:
256
  gr.Markdown("Make sure you added `GROQ_API_KEY` in Settings → Secrets for best results.")
257
  gr.LoginButton()
258
  run_btn = gr.Button("Run Evaluation")
259
- status = gr.Textbox(label="Run status", lines=4)
260
  table = gr.DataFrame(label="Logs")
261
- run_btn.click(run_and_submit_all, outputs=[status, table])
262
 
263
  if __name__ == "__main__":
264
  demo.launch()
 
 
1
  import os
2
  import time
3
  import requests
4
  import pandas as pd
5
  import gradio as gr
6
 
7
+ # optional ddgs (duckduckgo) search
8
  try:
9
  from ddgs import DDGS
10
  except Exception:
 
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
  # -------------------------
16
+ # GROQ / LLM caller (safe)
17
  # -------------------------
18
  _last_call = 0
19
+
20
+
21
+ def call_groq(api_key: str, prompt: str, max_tokens: int = 128) -> str:
 
22
  global _last_call
 
23
  if time.time() - _last_call < 1.5:
24
  time.sleep(1.5)
25
  _last_call = time.time()
 
38
  r = requests.post(url, headers=headers, json=body, timeout=60)
39
  r.raise_for_status()
40
  data = r.json()
41
+ # defensive access
42
+ choice = data.get("choices") and data["choices"][0]
43
+ if not choice:
44
+ return ""
45
+ msg = choice.get("message") or {}
46
+ return msg.get("content", "").strip()
47
+
48
 
49
  # -------------------------
50
+ # Clean / normalize answers
51
  # -------------------------
52
  def clean_answer(text: str) -> str:
53
  if text is None:
54
  return ""
55
  text = str(text).strip()
56
  prefixes = [
57
+ "FINAL ANSWER:",
58
+ "Final Answer:",
59
+ "Answer:",
60
+ "The answer is",
61
+ "Result:",
62
  ]
63
  for p in prefixes:
64
  if text.lower().startswith(p.lower()):
65
+ text = text[len(p) :].strip()
66
  # only first line
67
  text = text.splitlines()[0].strip()
68
+ # strip common quoting characters
69
  return text.strip('"').strip("'").strip("*").strip()
70
 
71
+
72
  # -------------------------
73
  # Web search (ddgs)
74
  # -------------------------
 
79
  try:
80
  with DDGS() as ddgs:
81
  for i, r in enumerate(ddgs.text(query, max_results=max_results)):
 
82
  title = r.get("title", "")
83
  body = r.get("body", "")
84
  snippets.append(f"{title} — {body}")
85
+ if i + 1 >= max_results:
86
  break
87
  except Exception:
 
88
  return ""
89
  return "\n".join(snippets)
90
 
91
+
92
  # -------------------------
93
  # Download task file helper
94
  # -------------------------
95
  def download_task_file(task_id: str):
 
 
 
 
96
  try:
97
  url = f"{DEFAULT_API_URL}/files/{task_id}/download"
98
  r = requests.get(url, timeout=20)
99
  if r.status_code != 200:
100
  return None, None
 
101
  cd = r.headers.get("content-disposition", "")
102
  filename = ""
103
  if "filename=" in cd:
 
113
  except Exception:
114
  return None, None
115
 
116
+
117
  # -------------------------
118
+ # BasicAgent with retries and fallback
119
  # -------------------------
120
  class BasicAgent:
121
  def __init__(self):
 
122
  self.key = os.getenv("GROQ_API_KEY", "").strip() or None
 
123
  print("BasicAgent initializing. GROQ key present:", bool(self.key), "DDGS available:", DDGS is not None)
124
 
125
  def ask_llm(self, prompt: str, max_tokens: int = 128) -> str:
 
140
  return ""
141
 
142
  def fallback_from_search(self, question: str) -> str:
 
143
  snippets = web_search_snippets(question, max_results=4)
144
  if not snippets:
145
  return ""
 
146
  for line in snippets.splitlines():
147
  s = line.strip()
148
  if len(s) > 3:
 
149
  sentence = s.split(".")[0].strip()
150
  return clean_answer(sentence)
151
  return ""
152
 
153
  def __call__(self, question: str, task_id: str = "") -> str:
154
  print("Received question:", question[:200])
 
155
  context_parts = []
156
 
 
157
  if task_id:
158
  lp, fn = download_task_file(task_id)
159
  if lp and fn:
 
162
  txt = f.read(4000)
163
  context_parts.append(f"File {fn} contents (truncated):\n{txt}")
164
  except Exception:
 
165
  context_parts.append(f"File {fn} exists but not included in context.")
166
 
 
167
  search_snip = web_search_snippets(question, max_results=4)
168
  if search_snip:
169
  context_parts.append("Web snippets:\n" + search_snip[:3000])
170
 
171
  context = "\n\n".join(context_parts).strip()
172
 
 
173
  prompt = f"""You are solving a GAIA benchmark question. Return ONLY the final answer, nothing else.
174
 
175
  Question:
 
179
  {context}
180
 
181
  Return ONLY the final answer."""
 
182
  if self.key:
183
  ans = self.solve_with_retries(prompt, attempts=3)
184
  if ans:
185
  return ans
 
186
  try:
187
  ans2 = self.ask_llm("Extract the single final short answer only:\n" + prompt, max_tokens=48)
188
  ans2 = clean_answer(ans2)
 
191
  except Exception as e:
192
  print("LLM final fallback failed:", e)
193
 
 
194
  fb = self.fallback_from_search(question)
195
  if fb:
196
  return fb
 
197
  return ""
198
 
199
+
200
  # -------------------------
201
  # Evaluation runner used by UI
202
  # -------------------------
 
204
  if not profile:
205
  return "Please login first", None
206
 
207
+ username = getattr(profile, "username", None) or profile.get("username") if isinstance(profile, dict) else None
208
+ if not username:
209
+ # sometimes gradio returns OAuthProfile object; fallback
210
+ try:
211
+ username = profile.username
212
+ except Exception:
213
+ username = None
214
+ if not username:
215
+ return "Unable to get username from profile. Please try logging out and back in.", None
216
 
217
+ print("User:", username)
218
  agent = BasicAgent()
219
 
220
  try:
 
227
  for q in questions:
228
  task_id = q.get("task_id")
229
  question = q.get("question", "")
230
+ try:
231
+ ans = agent(question, task_id)
232
+ except Exception as e:
233
+ print("Agent execution error:", e)
234
+ ans = ""
235
  answers.append({"task_id": task_id, "submitted_answer": ans})
236
  logs.append({"task_id": task_id, "question": question, "answer": ans})
237
 
238
+ payload = {"username": username, "agent_code": "", "answers": answers}
 
 
 
 
239
  try:
240
  resp = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=30)
241
  resp.raise_for_status()
 
245
  except Exception as e:
246
  return f"Submission failed: {e}", pd.DataFrame(logs)
247
 
248
+
249
  # -------------------------
250
  # UI (minimal)
251
  # -------------------------
 
254
  gr.Markdown("Make sure you added `GROQ_API_KEY` in Settings → Secrets for best results.")
255
  gr.LoginButton()
256
  run_btn = gr.Button("Run Evaluation")
257
+ status = gr.Textbox(label="Run status", lines=6)
258
  table = gr.DataFrame(label="Logs")
259
+ run_btn.click(run_and_submit_all, inputs=gr.OAuthProfile(), outputs=[status, table])
260
 
261
  if __name__ == "__main__":
262
  demo.launch()