SantoshKumar1310 commited on
Commit
6bfe482
Β·
verified Β·
1 Parent(s): 82eec24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -496
app.py CHANGED
@@ -2,551 +2,128 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- import re
6
- from typing import Dict, List, Any, Optional
7
- import json
8
 
9
  # --- Constants ---
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
- # --- Enhanced GAIA Agent ---
13
- class GAIAAgent:
14
- """
15
- Enhanced agent optimized for GAIA Level 1 questions.
16
- Targets 30%+ accuracy through multi-tool integration.
17
- """
18
-
19
  def __init__(self):
20
- print("βœ… GAIA Agent initialized with enhanced capabilities.")
21
- self.api_url = DEFAULT_API_URL
22
-
23
- def __call__(self, question: str, task_id: str = None) -> str:
24
- """
25
- Main entry point - processes a question and returns a precise answer.
26
- """
27
- print(f"\n{'='*60}")
28
- print(f"🧠 Processing Task: {task_id}")
29
- print(f"πŸ“ Question: {question[:100]}...")
30
- print(f"{'='*60}")
31
-
32
- try:
33
- # Step 1: Classify question type
34
- q_type = self._classify_question(question)
35
- print(f"πŸ“Š Question Type: {q_type}")
36
-
37
- # Step 2: Route to specialized handler
38
- answer = self._route_to_handler(question, q_type, task_id)
39
-
40
- # Step 3: Clean and format answer
41
- final_answer = self._clean_answer(answer, question)
42
-
43
- print(f"βœ… Final Answer: {final_answer}")
44
- return final_answer
45
-
46
- except Exception as e:
47
- print(f"❌ Error: {e}")
48
- # Return a safe fallback
49
- return "Unable to determine answer"
50
-
51
- def _classify_question(self, question: str) -> str:
52
- """Classify question to route to appropriate handler"""
53
- q_lower = question.lower()
54
-
55
- # Math/calculation questions
56
- if any(word in q_lower for word in ["calculate", "sum", "total", "multiply", "divide", "average", "mean"]):
57
- return "math"
58
-
59
- # Questions with numbers/operators
60
- if any(op in question for op in ["+", "-", "Γ—", "Γ·", "*", "/"]) and any(c.isdigit() for c in question):
61
- return "math"
62
-
63
- # Counting questions
64
- if any(word in q_lower for word in ["how many", "count", "number of"]):
65
- return "counting"
66
-
67
- # Date/time questions
68
- if any(word in q_lower for word in ["year", "date", "when", "month", "day"]):
69
- return "date"
70
-
71
- # Location questions
72
- if any(word in q_lower for word in ["where", "location", "city", "country", "capital"]):
73
- return "location"
74
-
75
- # Definition/what is questions
76
- if q_lower.startswith("what is") or q_lower.startswith("what's"):
77
- return "definition"
78
-
79
- # Who questions
80
- if q_lower.startswith("who"):
81
- return "person"
82
-
83
- # File-based questions
84
- if any(word in q_lower for word in ["file", "document", "image", "picture", "photo"]):
85
- return "file"
86
-
87
- return "general"
88
-
89
- def _route_to_handler(self, question: str, q_type: str, task_id: str) -> str:
90
- """Route question to appropriate specialized handler"""
91
-
92
- if q_type == "math":
93
- return self._handle_math(question)
94
- elif q_type == "counting":
95
- return self._handle_counting(question)
96
- elif q_type == "date":
97
- return self._handle_date(question)
98
- elif q_type == "location":
99
- return self._handle_location(question)
100
- elif q_type == "definition":
101
- return self._handle_definition(question)
102
- elif q_type == "person":
103
- return self._handle_person(question)
104
- elif q_type == "file":
105
- return self._handle_file(question, task_id)
106
- else:
107
- return self._handle_general(question)
108
-
109
- def _handle_math(self, question: str) -> str:
110
- """Handle mathematical calculations"""
111
- try:
112
- # Extract numbers
113
- numbers = re.findall(r'-?\d+\.?\d*', question)
114
- if not numbers:
115
- return "0"
116
-
117
- nums = [float(n) for n in numbers]
118
- q_lower = question.lower()
119
-
120
- # Detect operation
121
- if "sum" in q_lower or "total" in q_lower or "+" in question or "add" in q_lower:
122
- result = sum(nums)
123
- elif "difference" in q_lower or "-" in question or "subtract" in q_lower:
124
- result = nums[0] - sum(nums[1:]) if len(nums) > 1 else nums[0]
125
- elif "product" in q_lower or "*" in question or "Γ—" in question or "multiply" in q_lower:
126
- result = 1
127
- for n in nums:
128
- result *= n
129
- elif "divide" in q_lower or "/" in question or "Γ·" in question:
130
- result = nums[0] / nums[1] if len(nums) >= 2 and nums[1] != 0 else nums[0]
131
- elif "average" in q_lower or "mean" in q_lower:
132
- result = sum(nums) / len(nums)
133
- else:
134
- # Try to evaluate the expression safely
135
- expr = re.sub(r'[^0-9+\-*/().\s]', '', question)
136
- result = eval(expr, {"__builtins__": {}}, {})
137
-
138
- # Format result
139
- if result == int(result):
140
- return str(int(result))
141
- else:
142
- return f"{result:.2f}"
143
-
144
- except Exception as e:
145
- print(f"Math error: {e}")
146
- return "0"
147
-
148
- def _handle_counting(self, question: str) -> str:
149
- """Handle counting questions"""
150
- # Extract the first number found (often the answer)
151
- numbers = re.findall(r'\d+', question)
152
- return numbers[0] if numbers else "0"
153
-
154
- def _handle_date(self, question: str) -> str:
155
- """Handle date/year questions"""
156
- # Look for 4-digit years
157
- years = re.findall(r'\b(19|20)\d{2}\b', question)
158
- if years:
159
- return years[0]
160
-
161
- # Look for dates
162
- dates = re.findall(r'\b\d{1,2}/\d{1,2}/\d{4}\b', question)
163
- if dates:
164
- return dates[0]
165
-
166
- return "Unknown"
167
-
168
- def _handle_location(self, question: str) -> str:
169
- """Handle location questions using knowledge base"""
170
- q_lower = question.lower()
171
-
172
- # Common capitals and locations
173
- location_kb = {
174
- "france": "Paris",
175
- "paris": "France",
176
- "england": "London",
177
- "london": "England",
178
- "usa": "Washington D.C.",
179
- "united states": "Washington D.C.",
180
- "japan": "Tokyo",
181
- "tokyo": "Japan",
182
- "germany": "Berlin",
183
- "berlin": "Germany",
184
- "italy": "Rome",
185
- "rome": "Italy",
186
- "spain": "Madrid",
187
- "madrid": "Spain",
188
- }
189
-
190
- for key, value in location_kb.items():
191
- if key in q_lower:
192
- return value
193
-
194
- return "Unknown"
195
-
196
- def _handle_definition(self, question: str) -> str:
197
- """Handle 'What is' questions"""
198
- # Extract the subject
199
- match = re.search(r"what (?:is|was|are) (?:the |an? )?(.+?)(?:\?|$)", question, re.IGNORECASE)
200
- if match:
201
- subject = match.group(1).strip()
202
- return f"{subject}"
203
- return "Unknown"
204
-
205
- def _handle_person(self, question: str) -> str:
206
- """Handle 'Who' questions using knowledge base"""
207
- q_lower = question.lower()
208
-
209
- # Famous people knowledge base
210
- people_kb = {
211
- "romeo and juliet": "William Shakespeare",
212
- "hamlet": "William Shakespeare",
213
- "mona lisa": "Leonardo da Vinci",
214
- "starry night": "Vincent van Gogh",
215
- "theory of relativity": "Albert Einstein",
216
- "evolution": "Charles Darwin",
217
- "telephone": "Alexander Graham Bell",
218
- "light bulb": "Thomas Edison",
219
- "first president": "George Washington",
220
- }
221
-
222
- for key, value in people_kb.items():
223
- if key in q_lower:
224
- return value
225
-
226
- return "Unknown"
227
-
228
- def _handle_file(self, question: str, task_id: str) -> str:
229
- """Handle questions that require file access"""
230
- if not task_id:
231
- return "No file available"
232
-
233
- try:
234
- # Download the file from API
235
- file_url = f"{self.api_url}/files/{task_id}"
236
- print(f"πŸ“₯ Downloading file from: {file_url}")
237
-
238
- response = requests.get(file_url, timeout=30)
239
- if response.status_code == 200:
240
- # Process file based on type
241
- content_type = response.headers.get('Content-Type', '')
242
-
243
- if 'text' in content_type or 'json' in content_type:
244
- # Text-based file
245
- content = response.text
246
- return self._analyze_text_file(content, question)
247
- elif 'image' in content_type:
248
- # Image file
249
- return "Image analysis not implemented"
250
- else:
251
- return "Unknown file type"
252
- else:
253
- print(f"File download failed: {response.status_code}")
254
- return "File not found"
255
-
256
- except Exception as e:
257
- print(f"File handling error: {e}")
258
- return "File processing failed"
259
-
260
- def _analyze_text_file(self, content: str, question: str) -> str:
261
- """Analyze text file content to answer question"""
262
- q_lower = question.lower()
263
-
264
- # Counting items in file
265
- if "how many" in q_lower:
266
- lines = content.strip().split('\n')
267
- return str(len(lines))
268
-
269
- # Finding specific text
270
- if "find" in q_lower or "search" in q_lower:
271
- # Extract search term
272
- match = re.search(r"(?:find|search for) ['\"](.+?)['\"]", question, re.IGNORECASE)
273
- if match:
274
- term = match.group(1)
275
- if term in content:
276
- return "Found"
277
- else:
278
- return "Not found"
279
-
280
- # Return first line as fallback
281
- lines = content.strip().split('\n')
282
- return lines[0] if lines else "Empty file"
283
-
284
- def _handle_general(self, question: str) -> str:
285
- """Handle general questions with basic reasoning"""
286
- # Try to extract any numbers or dates
287
- numbers = re.findall(r'\d+', question)
288
- if numbers:
289
- return numbers[0]
290
-
291
- # Look for yes/no questions
292
- if question.strip().endswith('?') and any(word in question.lower() for word in ['is', 'are', 'was', 'were', 'can', 'could', 'will', 'would']):
293
- return "Yes"
294
-
295
- return "Unable to determine"
296
-
297
- def _clean_answer(self, answer: str, question: str) -> str:
298
- """
299
- Clean and format answer according to GAIA requirements.
300
- GAIA requires exact matches, so formatting is critical.
301
- """
302
- # Remove extra whitespace
303
- answer = answer.strip()
304
-
305
- # Remove "The answer is" or similar phrases
306
- answer = re.sub(r'^(?:the answer is|it is|result is)[:\s]+', '', answer, flags=re.IGNORECASE)
307
-
308
- # Remove trailing punctuation (except for decimals)
309
- answer = re.sub(r'[.!?,;]+$', '', answer)
310
-
311
- # Handle comma-separated lists
312
- if "comma-separated" in question.lower() or "list" in question.lower():
313
- # Ensure proper comma-space formatting
314
- answer = re.sub(r'\s*,\s*', ', ', answer)
315
-
316
- # Handle number formatting
317
- if re.match(r'^-?\d+\.?\d*$', answer):
318
- # It's a number
319
- num = float(answer)
320
- # If it's a whole number, format without decimals
321
- if num == int(num):
322
- answer = str(int(num))
323
- else:
324
- # Keep minimal decimal places
325
- answer = f"{num:.10g}"
326
-
327
- return answer
328
 
329
 
 
330
  def run_and_submit_all(profile: gr.OAuthProfile | None):
331
- """
332
- Fetch all questions, run the agent, submit answers, and show results.
333
- """
334
- space_id = os.getenv("SPACE_ID")
335
 
336
  if profile:
337
- username = profile.username
338
- print(f"πŸ‘€ User logged in: {username}")
339
  else:
340
- print("❌ User not logged in.")
341
- return "❌ Please login to Hugging Face first.", None
342
 
343
  api_url = DEFAULT_API_URL
344
  questions_url = f"{api_url}/questions"
345
  submit_url = f"{api_url}/submit"
346
 
347
- # Create Agent
348
  try:
349
- agent = GAIAAgent()
350
  except Exception as e:
351
- return f"❌ Agent initialization failed: {e}", None
352
 
353
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "No_Space_ID"
354
- print(f"πŸ“ Agent code link: {agent_code}")
355
 
356
- # Fetch Questions
 
357
  try:
358
- print("πŸ“‘ Fetching questions from API...")
359
- response = requests.get(questions_url, timeout=30)
360
  response.raise_for_status()
361
  questions_data = response.json()
362
-
363
  if not questions_data:
364
- return "⚠️ No questions received from API.", None
365
-
366
- print(f"βœ… Retrieved {len(questions_data)} questions.")
367
-
368
- except requests.exceptions.RequestException as e:
369
- return f"❌ Error fetching questions: {e}\n\nPlease check if the API is available.", None
370
 
371
- # Run Agent on all questions
372
  results_log = []
373
  answers_payload = []
374
-
375
- print(f"\nπŸ€– Running agent on {len(questions_data)} questions...\n")
376
-
377
- for i, item in enumerate(questions_data, 1):
378
  task_id = item.get("task_id")
379
  question_text = item.get("question")
380
-
381
- if not task_id or not question_text:
382
  continue
383
-
384
  try:
385
- print(f"\n[{i}/{len(questions_data)}] Processing: {task_id}")
386
- submitted_answer = agent(question_text, task_id)
387
-
388
- answers_payload.append({
389
- "task_id": task_id,
390
- "submitted_answer": submitted_answer
391
- })
392
-
393
- results_log.append({
394
- "Task ID": task_id,
395
- "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
396
- "Your Answer": submitted_answer
397
- })
398
-
399
  except Exception as e:
400
- error_msg = f"ERROR: {e}"
401
- print(f"❌ {error_msg}")
402
- results_log.append({
403
- "Task ID": task_id,
404
- "Question": question_text[:80] + "..." if len(question_text) > 80 else question_text,
405
- "Your Answer": error_msg
406
- })
407
 
408
  if not answers_payload:
409
- return "⚠️ No answers generated.", pd.DataFrame(results_log)
410
-
411
- results_df = pd.DataFrame(results_log)
412
 
413
- # Submit Answers
414
- submission_data = {
415
- "username": username.strip(),
416
- "agent_code": agent_code,
417
- "answers": answers_payload
418
- }
419
 
 
420
  try:
421
- print(f"\nπŸ“€ Submitting {len(answers_payload)} answers to API...")
422
- response = requests.post(submit_url, json=submission_data, timeout=120)
423
  response.raise_for_status()
424
  result_data = response.json()
425
-
426
- score = result_data.get('score', 0)
427
- correct = result_data.get('correct_count', 0)
428
- total = result_data.get('total_attempted', len(answers_payload))
429
-
430
- # Determine emoji based on score
431
- if score >= 30:
432
- emoji = "πŸŽ‰πŸ†"
433
- elif score >= 20:
434
- emoji = "🎯"
435
- elif score >= 10:
436
- emoji = "πŸ“ˆ"
437
- else:
438
- emoji = "πŸ’ͺ"
439
-
440
  final_status = (
441
- f"{emoji} Submission Complete!\n\n"
442
- f"πŸ‘€ Username: {result_data.get('username')}\n"
443
- f"🏁 Score: {score}% ({correct}/{total} correct)\n"
444
- f"πŸ“Š Target: 30% for certification\n\n"
445
- f"πŸ“ {result_data.get('message', '')}\n\n"
446
- f"πŸ”— Check the leaderboard: https://huggingface.co/spaces/agents-course/agents-course-unit4-leaderboard"
447
  )
448
-
449
- return final_status, results_df
450
-
451
- except requests.exceptions.RequestException as e:
452
- return f"❌ Submission failed: {e}\n\nβœ… Generated {len(answers_payload)} answers (see table)", results_df
453
 
454
 
455
- # --- Gradio Interface ---
456
- with gr.Blocks(theme=gr.themes.Soft(), title="GAIA Agent Evaluation") as demo:
457
- gr.Markdown(
458
- """
459
- # πŸ€– GAIA Agent Evaluation System
460
-
461
- ### 🎯 Goal: Achieve 30%+ accuracy on GAIA Level 1 questions
462
-
463
- This agent evaluates your AI assistant on 20 carefully selected questions from GAIA's validation set.
464
- The questions test reasoning, calculation, factual knowledge, and tool usage.
465
-
466
- ---
467
-
468
- ### πŸ“‹ How to Submit:
469
-
470
- 1. **Clone this Space** to your Hugging Face profile
471
- 2. **Keep your Space public** (required for leaderboard verification)
472
- 3. **Login** using the button below
473
- 4. **Click "Run Evaluation"** and wait for results
474
- 5. **Check your score** on the [leaderboard](https://huggingface.co/spaces/agents-course/agents-course-unit4-leaderboard)
475
-
476
- ---
477
-
478
- ### πŸ’‘ Tips for Improvement:
479
-
480
- - Study the question types and patterns
481
- - Add web search capabilities (DuckDuckGo, Wikipedia)
482
- - Implement better answer formatting
483
- - Test individual questions using `/random-question` endpoint
484
- - Focus on precise, exact-match answers
485
-
486
- ---
487
-
488
- ### ⚠️ Important Notes:
489
-
490
- - Processing takes 2-5 minutes (20 questions)
491
- - Answers must be **exact matches** (case-sensitive, format-sensitive)
492
- - Keep your Space public for leaderboard verification
493
- - The SPACE_ID environment variable is set automatically by HF Spaces
494
-
495
- """
496
- )
497
-
498
- with gr.Row():
499
- gr.LoginButton()
500
-
501
- gr.Markdown("---")
502
-
503
- run_button = gr.Button(
504
- "πŸš€ Run Evaluation & Submit All Answers",
505
- variant="primary",
506
- size="lg"
507
- )
508
-
509
- status_output = gr.Textbox(
510
- label="πŸ“Š Evaluation Results",
511
- lines=12,
512
- interactive=False,
513
- show_copy_button=True
514
- )
515
-
516
- results_table = gr.DataFrame(
517
- label="πŸ“ Questions and Your Answers",
518
- wrap=True,
519
- interactive=False
520
- )
521
-
522
  gr.Markdown(
523
  """
 
 
 
 
 
524
  ---
525
-
526
- ### πŸ”— Resources:
527
-
528
- - [GAIA Benchmark Paper](https://arxiv.org/abs/2311.12983)
529
- - [Leaderboard](https://huggingface.co/spaces/agents-course/agents-course-unit4-leaderboard)
530
- - [Course Materials](https://huggingface.co/learn/cookbook/agents)
531
- - [API Documentation](https://agents-course-unit4-scoring.hf.space/docs)
532
-
533
- ### πŸ† Score Interpretation:
534
-
535
- - **30%+**: Excellent! You've achieved certification level βœ…
536
- - **20-29%**: Good progress! Keep improving πŸ“ˆ
537
- - **10-19%**: On the right track! Add more tools πŸ”§
538
- - **0-9%**: Keep experimenting! Study the questions πŸ’ͺ
539
-
540
- Remember: Human performance is ~92%, GPT-4 with plugins is ~15%. You're competing with AI systems!
541
  """
542
  )
543
 
544
- run_button.click(
545
- fn=run_and_submit_all,
546
- outputs=[status_output, results_table]
547
- )
 
548
 
 
549
 
 
550
  if __name__ == "__main__":
551
- print("πŸš€ Launching GAIA Agent Evaluation Interface...")
552
- demo.launch(debug=True, share=False)
 
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
 
 
5
 
6
  # --- Constants ---
7
+ # βœ… correct backend API base URL
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
+ # --- Basic Agent Definition ---
11
+ # πŸ‘‡ customize this class to make your own agent smarter
12
+ class BasicAgent:
 
 
 
 
13
  def __init__(self):
14
+ print("βœ… BasicAgent initialized.")
15
+
16
+ def __call__(self, question: str) -> str:
17
+ print(f"Agent received question: {question[:50]}...")
18
+ # For now, it returns a placeholder answer
19
+ fixed_answer = "This is a default answer."
20
+ print(f"Agent returning: {fixed_answer}")
21
+ return fixed_answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
+ # --- Evaluation Logic ---
25
  def run_and_submit_all(profile: gr.OAuthProfile | None):
26
+ """Fetches all questions, runs agent, submits answers, shows results."""
27
+ space_id = os.getenv("SPACE_ID") # for linking to code repo
 
 
28
 
29
  if profile:
30
+ username = f"{profile.username}"
31
+ print(f"πŸ‘€ Logged in as: {username}")
32
  else:
33
+ return "Please log in with your Hugging Face account.", None
 
34
 
35
  api_url = DEFAULT_API_URL
36
  questions_url = f"{api_url}/questions"
37
  submit_url = f"{api_url}/submit"
38
 
39
+ # --- Instantiate your agent ---
40
  try:
41
+ agent = BasicAgent()
42
  except Exception as e:
43
+ return f"Error initializing agent: {e}", None
44
 
45
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "N/A"
46
+ print(f"πŸ”— Code link: {agent_code}")
47
 
48
+ # --- Fetch Questions ---
49
+ print(f"πŸ“‘ Fetching from {questions_url}")
50
  try:
51
+ response = requests.get(questions_url, timeout=15)
 
52
  response.raise_for_status()
53
  questions_data = response.json()
 
54
  if not questions_data:
55
+ return "No questions fetched.", None
56
+ print(f"βœ… {len(questions_data)} questions retrieved.")
57
+ except Exception as e:
58
+ return f"Error fetching questions: {e}", None
 
 
59
 
60
+ # --- Run Agent ---
61
  results_log = []
62
  answers_payload = []
63
+ print(f"πŸ€– Running agent on {len(questions_data)} questions...")
64
+ for item in questions_data:
 
 
65
  task_id = item.get("task_id")
66
  question_text = item.get("question")
67
+ if not task_id or question_text is None:
 
68
  continue
 
69
  try:
70
+ submitted_answer = agent(question_text)
71
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
72
+ results_log.append({"Task ID": task_id, "Question": question_text, "Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
 
 
73
  except Exception as e:
74
+ results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"ERROR: {e}"})
 
 
 
 
 
 
75
 
76
  if not answers_payload:
77
+ return "No answers produced by the agent.", pd.DataFrame(results_log)
 
 
78
 
79
+ # --- Prepare Submission ---
80
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
81
+ print(f"πŸš€ Submitting {len(answers_payload)} answers...")
 
 
 
82
 
83
+ # --- Submit ---
84
  try:
85
+ response = requests.post(submit_url, json=submission_data, timeout=60)
 
86
  response.raise_for_status()
87
  result_data = response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  final_status = (
89
+ f"βœ… Submission Successful!\n"
90
+ f"User: {result_data.get('username')}\n"
91
+ f"Score: {result_data.get('score', 'N/A')}%\n"
92
+ f"Correct: {result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')}\n"
93
+ f"Message: {result_data.get('message', 'No message received.')}"
 
94
  )
95
+ return final_status, pd.DataFrame(results_log)
96
+ except Exception as e:
97
+ return f"Submission failed: {e}", pd.DataFrame(results_log)
 
 
98
 
99
 
100
+ # --- Build Gradio Interface ---
101
+ with gr.Blocks() as demo:
102
+ gr.Markdown("# 🧠 Basic Agent Evaluation Runner")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  gr.Markdown(
104
  """
105
+ ### Instructions
106
+ 1️⃣ Clone this space on your Hugging Face profile.
107
+ 2️⃣ Modify the `BasicAgent` class to add your logic.
108
+ 3️⃣ Log in below, then click **Run Evaluation & Submit All Answers**.
109
+
110
  ---
111
+ The process might take a few minutes while the agent runs all questions.
112
+ You can enhance your agent with reasoning, web tools, or retrieval modules.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  """
114
  )
115
 
116
+ gr.LoginButton()
117
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit All Answers")
118
+
119
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
120
+ results_table = gr.DataFrame(label="🧾 Questions and Agent Answers")
121
 
122
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
123
 
124
+ # --- Run ---
125
  if __name__ == "__main__":
126
+ print("\n" + "-" * 40)
127
+ print("🌐 App Starting")
128
+ print("-" * 40)
129
+ demo.launch(debug=True, share=False)