Raj989898 commited on
Commit
99671ca
Β·
verified Β·
1 Parent(s): 4edf2a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -21
app.py CHANGED
@@ -34,7 +34,6 @@ def rate_limited_groq(api_key, prompt, system="", max_tokens=128):
34
  return resp.json()["choices"][0]["message"]["content"].strip()
35
 
36
  def download_task_file(task_id, hf_token=None):
37
- """Download with HF OAuth token for authentication."""
38
  url = f"{DEFAULT_API_URL}/files/{task_id}"
39
  headers = {}
40
  if hf_token:
@@ -42,7 +41,7 @@ def download_task_file(task_id, hf_token=None):
42
  try:
43
  resp = requests.get(url, headers=headers, timeout=30)
44
  print(f" File [{task_id[:8]}]: HTTP {resp.status_code}, "
45
- f"size={len(resp.content)}, ct={resp.headers.get('content-type','?')[:40]}")
46
  if resp.status_code != 200 or len(resp.content) == 0:
47
  return None, None
48
  cd = resp.headers.get("content-disposition", "")
@@ -62,7 +61,7 @@ def download_task_file(task_id, hf_token=None):
62
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext, prefix="gaia_")
63
  tmp.write(resp.content)
64
  tmp.close()
65
- print(f" Saved: {fname} ({len(resp.content)} bytes)")
66
  return tmp.name, fname
67
  except Exception as e:
68
  print(f" Download error: {e}")
@@ -131,9 +130,9 @@ def test_api():
131
  except Exception as e:
132
  return f"❌ {e}"
133
 
134
- SYSTEM = """You are a GAIA benchmark agent. Exact match grading β€” your answer must match exactly.
135
  Reply with ONLY the final answer. No explanation. No prefix. No "The answer is".
136
- Give the bare answer: a name, number, word, or short phrase only."""
137
 
138
  class BasicAgent:
139
  def __init__(self, hf_token=None):
@@ -141,7 +140,7 @@ class BasicAgent:
141
  if not self.key:
142
  raise RuntimeError("GROQ_API_KEY not set!")
143
  self.hf_token = hf_token
144
- print(f"Agent ready. Groq key: {self.key[:8]}... | HF token: {'YES' if hf_token else 'NO'}")
145
 
146
  def ask(self, prompt, max_tokens=128):
147
  return clean_answer(rate_limited_groq(self.key, prompt, SYSTEM, max_tokens))
@@ -157,7 +156,7 @@ class BasicAgent:
157
  file_ctx = ""
158
  is_py = False
159
 
160
- # Download file with HF token
161
  if task_id:
162
  lp, fn = download_task_file(task_id, self.hf_token)
163
  if lp and fn:
@@ -183,7 +182,7 @@ class BasicAgent:
183
  if results and "error" not in results.lower():
184
  search_ctx = f"\n[Search]\n{results[:3500]}\n"
185
 
186
- # Format hints
187
  q = question.lower()
188
  fmt = ""
189
  if "studio album" in q:
@@ -196,20 +195,20 @@ class BasicAgent:
196
  fmt = "\nSingle integer only."
197
  elif "how many" in q:
198
  fmt = "\nSingle integer only."
199
- elif "ioc" in q or ("country" in q and "olympic" in q):
200
- fmt = "\nIOC country code only (3 letters). If tied, alphabetically first country."
201
  elif "excel" in q or ("sale" in q and "food" in q):
202
- fmt = "\nNumber with exactly two decimal places. No $ sign, no commas (e.g. 89.50)."
203
  elif "chess" in q:
204
- fmt = "\nChess move in algebraic notation only (e.g. Qd8, e5)."
205
  elif "pitcher" in q and "number" in q:
206
- fmt = "\nTwo last names, comma-separated. Pitcher with lower jersey number first."
207
  elif "wikipedia" in q and "nominat" in q:
208
  fmt = "\nWikipedia username only."
209
  elif "grocery" in q or ("shopping" in q and "list" in q):
210
  fmt = "\nComma-separated list, alphabetical order."
211
  elif "youtube" in q or "video" in q:
212
- fmt = "\nExact short answer β€” quote, number, or brief phrase only."
213
 
214
  prompt = (
215
  f"Question: {question}"
@@ -232,15 +231,20 @@ class BasicAgent:
232
  print(f" Error: {e}")
233
  return ""
234
 
235
- def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
236
  space_id = os.getenv("SPACE_ID")
237
  if not profile:
238
  return "Please Login to Hugging Face.", None
239
 
240
  username = profile.username
241
- # Get the HF OAuth token from the login profile
242
- hf_token = getattr(profile, "token", None)
243
- print(f"User: {username}, HF token present: {bool(hf_token)}")
244
 
245
  try:
246
  agent = BasicAgent(hf_token=hf_token)
@@ -280,7 +284,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
280
 
281
  try:
282
  resp = requests.post(f"{DEFAULT_API_URL}/submit",
283
- json={"username": username.strip(), "agent_code": agent_code, "answers": answers_payload},
 
284
  timeout=60)
285
  resp.raise_for_status()
286
  r = resp.json()
@@ -292,7 +297,10 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
292
 
293
  with gr.Blocks() as demo:
294
  gr.Markdown("# Basic Agent Evaluation Runner")
295
- gr.Markdown("**Setup:** `GROQ_API_KEY` in Space Settings β†’ Secrets. Free at [console.groq.com](https://console.groq.com)")
 
 
 
296
  gr.LoginButton()
297
  with gr.Row():
298
  test_btn = gr.Button("πŸ”¬ Test Groq API", variant="secondary")
@@ -302,7 +310,12 @@ with gr.Blocks() as demo:
302
  run_button = gr.Button("πŸš€ Run Evaluation & Submit All Answers", variant="primary")
303
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
304
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
305
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
 
306
 
307
  if __name__ == "__main__":
308
  key = os.getenv("GROQ_API_KEY", "")
 
34
  return resp.json()["choices"][0]["message"]["content"].strip()
35
 
36
  def download_task_file(task_id, hf_token=None):
 
37
  url = f"{DEFAULT_API_URL}/files/{task_id}"
38
  headers = {}
39
  if hf_token:
 
41
  try:
42
  resp = requests.get(url, headers=headers, timeout=30)
43
  print(f" File [{task_id[:8]}]: HTTP {resp.status_code}, "
44
+ f"size={len(resp.content)}, ct={resp.headers.get('content-type','?')[:50]}")
45
  if resp.status_code != 200 or len(resp.content) == 0:
46
  return None, None
47
  cd = resp.headers.get("content-disposition", "")
 
61
  tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext, prefix="gaia_")
62
  tmp.write(resp.content)
63
  tmp.close()
64
+ print(f" Saved: {fname} ({len(resp.content)} bytes) -> {tmp.name}")
65
  return tmp.name, fname
66
  except Exception as e:
67
  print(f" Download error: {e}")
 
130
  except Exception as e:
131
  return f"❌ {e}"
132
 
133
+ SYSTEM = """You are a GAIA benchmark agent. Exact match grading is used.
134
  Reply with ONLY the final answer. No explanation. No prefix. No "The answer is".
135
+ Give only: a name, number, word, or short phrase."""
136
 
137
  class BasicAgent:
138
  def __init__(self, hf_token=None):
 
140
  if not self.key:
141
  raise RuntimeError("GROQ_API_KEY not set!")
142
  self.hf_token = hf_token
143
+ print(f"Agent ready. Groq: {self.key[:8]}... | HF token: {'YES βœ…' if hf_token else 'NO ❌'}")
144
 
145
  def ask(self, prompt, max_tokens=128):
146
  return clean_answer(rate_limited_groq(self.key, prompt, SYSTEM, max_tokens))
 
156
  file_ctx = ""
157
  is_py = False
158
 
159
+ # Download file using HF OAuth token
160
  if task_id:
161
  lp, fn = download_task_file(task_id, self.hf_token)
162
  if lp and fn:
 
182
  if results and "error" not in results.lower():
183
  search_ctx = f"\n[Search]\n{results[:3500]}\n"
184
 
185
+ # Format hints per question type
186
  q = question.lower()
187
  fmt = ""
188
  if "studio album" in q:
 
195
  fmt = "\nSingle integer only."
196
  elif "how many" in q:
197
  fmt = "\nSingle integer only."
198
+ elif "ioc" in q:
199
+ fmt = "\nIOC country code only (3 letters e.g. USA). If tied on athlete count, alphabetically first."
200
  elif "excel" in q or ("sale" in q and "food" in q):
201
+ fmt = "\nUSD with two decimal places, no $ sign, no commas (e.g. 8945.50)."
202
  elif "chess" in q:
203
+ fmt = "\nChess move in algebraic notation only (e.g. Qd8)."
204
  elif "pitcher" in q and "number" in q:
205
+ fmt = "\nTwo last names comma-separated. Lower jersey number pitcher first."
206
  elif "wikipedia" in q and "nominat" in q:
207
  fmt = "\nWikipedia username only."
208
  elif "grocery" in q or ("shopping" in q and "list" in q):
209
  fmt = "\nComma-separated list, alphabetical order."
210
  elif "youtube" in q or "video" in q:
211
+ fmt = "\nExact short answer only."
212
 
213
  prompt = (
214
  f"Question: {question}"
 
231
  print(f" Error: {e}")
232
  return ""
233
 
234
+ def run_and_submit_all(profile: gr.OAuthProfile | None,
235
+ oauth_token: gr.OAuthToken | None):
236
+ """
237
+ IMPORTANT: oauth_token gives us the actual HF bearer token
238
+ needed to download task files from the scoring API.
239
+ """
240
  space_id = os.getenv("SPACE_ID")
241
  if not profile:
242
  return "Please Login to Hugging Face.", None
243
 
244
  username = profile.username
245
+ # Extract the actual token string
246
+ hf_token = oauth_token.token if oauth_token else None
247
+ print(f"User: {username} | HF token present: {'YES βœ…' if hf_token else 'NO ❌'}")
248
 
249
  try:
250
  agent = BasicAgent(hf_token=hf_token)
 
284
 
285
  try:
286
  resp = requests.post(f"{DEFAULT_API_URL}/submit",
287
+ json={"username": username.strip(), "agent_code": agent_code,
288
+ "answers": answers_payload},
289
  timeout=60)
290
  resp.raise_for_status()
291
  r = resp.json()
 
297
 
298
  with gr.Blocks() as demo:
299
  gr.Markdown("# Basic Agent Evaluation Runner")
300
+ gr.Markdown(
301
+ "**Setup:** Add `GROQ_API_KEY` in Space Settings β†’ Secrets. "
302
+ "Free key at [console.groq.com](https://console.groq.com)"
303
+ )
304
  gr.LoginButton()
305
  with gr.Row():
306
  test_btn = gr.Button("πŸ”¬ Test Groq API", variant="secondary")
 
310
  run_button = gr.Button("πŸš€ Run Evaluation & Submit All Answers", variant="primary")
311
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
312
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
313
+ # Pass BOTH profile AND oauth_token so we can use the HF bearer token
314
+ run_button.click(
315
+ fn=run_and_submit_all,
316
+
317
+ outputs=[status_output, results_table]
318
+ )
319
 
320
  if __name__ == "__main__":
321
  key = os.getenv("GROQ_API_KEY", "")