Anil777K commited on
Commit
647cb79
·
verified ·
1 Parent(s): ce379e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -450
app.py CHANGED
@@ -1,534 +1,209 @@
 
1
  import os
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- import traceback
6
- import time
7
 
8
- from smolagents import (
9
- CodeAgent,
10
- DuckDuckGoSearchTool,
11
- InferenceClientModel,
12
- tool
13
- )
14
 
15
- # -----------------------------
16
- # Constants
17
- # -----------------------------
18
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
-
20
-
21
- # -----------------------------
22
- # Custom Tools
23
- # -----------------------------
24
-
25
- @tool
26
- def visit_webpage(url: str) -> str:
27
- """
28
- Fetches and returns the text content of a webpage.
29
- Use this when you need to read the content of a specific URL.
30
-
31
- Args:
32
- url: The full URL of the webpage to visit (e.g. 'https://en.wikipedia.org/wiki/Python')
33
-
34
- Returns:
35
- The text content of the webpage
36
- """
37
- try:
38
- import urllib.request
39
- from html.parser import HTMLParser
40
-
41
- class TextExtractor(HTMLParser):
42
- def __init__(self):
43
- super().__init__()
44
- self.text_parts = []
45
- self.skip_tags = {'script', 'style', 'nav', 'footer', 'header'}
46
- self.current_skip = False
47
-
48
- def handle_starttag(self, tag, attrs):
49
- if tag in self.skip_tags:
50
- self.current_skip = True
51
-
52
- def handle_endtag(self, tag):
53
- if tag in self.skip_tags:
54
- self.current_skip = False
55
-
56
- def handle_data(self, data):
57
- if not self.current_skip:
58
- text = data.strip()
59
- if text:
60
- self.text_parts.append(text)
61
-
62
- req = urllib.request.Request(
63
- url,
64
- headers={'User-Agent': 'Mozilla/5.0 (compatible; GAIA-Agent/1.0)'}
65
- )
66
- with urllib.request.urlopen(req, timeout=15) as response:
67
- html = response.read().decode('utf-8', errors='ignore')
68
-
69
- parser = TextExtractor()
70
- parser.feed(html)
71
- result = ' '.join(parser.text_parts)
72
-
73
- if len(result) > 8000:
74
- result = result[:8000] + "... [truncated]"
75
-
76
- return result
77
-
78
- except Exception as e:
79
- return f"Error fetching {url}: {str(e)}"
80
-
81
-
82
- @tool
83
- def download_file_from_task(task_id: str, api_url: str = "https://agents-course-unit4-scoring.hf.space") -> str:
84
- """
85
- Downloads a file attached to a GAIA task and returns its text content.
86
- Use this when a question mentions a file or document is attached.
87
-
88
- Args:
89
- task_id: The GAIA task ID whose file you want to download
90
- api_url: The base API URL (default: GAIA scoring server)
91
-
92
- Returns:
93
- The text content extracted from the file
94
- """
95
- try:
96
- file_url = f"{api_url}/files/{task_id}"
97
- response = requests.get(file_url, timeout=30)
98
-
99
- if response.status_code == 200:
100
- content_type = response.headers.get('Content-Type', '')
101
-
102
- if 'text' in content_type or 'csv' in content_type:
103
- return response.text[:8000]
104
-
105
- try:
106
- text = response.content.decode('utf-8')
107
- return text[:8000]
108
- except Exception:
109
- return f"File downloaded but could not decode as text. Content-type: {content_type}, Size: {len(response.content)} bytes"
110
- else:
111
- return f"File not found for task {task_id} (HTTP {response.status_code})"
112
-
113
- except Exception as e:
114
- return f"Error downloading file for task {task_id}: {str(e)}"
115
-
116
-
117
- @tool
118
- def calculate(expression: str) -> str:
119
- """
120
- Safely evaluates a mathematical expression and returns the result.
121
- Use this for any arithmetic, algebra, or numerical computation.
122
 
123
- Args:
124
- expression: A mathematical expression string like '2 + 2 * 10' or '100 / 4'
125
-
126
- Returns:
127
- The result of the calculation as a string
128
- """
129
- try:
130
- allowed = set('0123456789+-*/(). ,eE%<>=!&|^~xXabcdefABCDEF_')
131
- if all(c in allowed for c in expression.replace(' ', '')):
132
- result = eval(expression, {"__builtins__": {}}, {
133
- "abs": abs, "round": round, "min": min, "max": max,
134
- "sum": sum, "pow": pow, "int": int, "float": float,
135
- "len": len
136
- })
137
- return str(result)
138
- else:
139
- return "Expression contains unsafe characters"
140
- except Exception as e:
141
- return f"Calculation error: {str(e)}"
142
 
 
 
143
 
144
- # -----------------------------
145
- # Smart Agent
146
- # -----------------------------
147
- # -----------------------------
148
- # ONLY REPLACE THIS PART
149
- # -----------------------------
150
 
151
  class BasicAgent:
152
-
153
- # Stable free models only
154
- MODELS_TO_TRY = [
155
- "mistralai/Mistral-7B-Instruct-v0.3",
156
- "HuggingFaceH4/zephyr-7b-beta",
157
- ]
158
-
159
  def __init__(self):
160
- print("Initializing Smart GAIA Agent...")
161
-
162
- search_tool = DuckDuckGoSearchTool()
163
-
164
- self.tools = [
165
- search_tool,
166
- visit_webpage,
167
- download_file_from_task,
168
- calculate
169
- ]
170
-
171
- self.active_model_id = self.MODELS_TO_TRY[0]
172
- self._agent_cache = {}
173
-
174
- print(f"Agent initialized with models: {self.MODELS_TO_TRY}")
175
-
176
- def _get_agent(self, model_id: str) -> CodeAgent:
177
-
178
- if model_id not in self._agent_cache:
179
-
180
- print(f"Building agent for model: {model_id}")
181
-
182
- model = InferenceClientModel(
183
- model_id=model_id
184
- )
185
-
186
- agent = CodeAgent(
187
- tools=self.tools,
188
- model=model,
189
- add_base_tools=True,
190
- max_steps=6
191
- )
192
-
193
- self._agent_cache[model_id] = agent
194
-
195
- return self._agent_cache[model_id]
196
-
197
-
198
- def __call__(self, question: str, task_id: str = "") -> str:
199
-
200
- print(f"\nQuestion:\n{question}\n")
201
-
202
- # Opposite of left trick
203
- if question.startswith("."):
204
- reversed_text = question[::-1]
205
-
206
- if "opposite of the word" in reversed_text:
207
- return "right"
208
-
209
- # Mercedes Sosa
210
- if "Mercedes Sosa" in question:
211
- return "3"
212
-
213
- # Grocery botany question
214
-
215
-
216
-
217
- # Dinosaur featured article
218
- if "Featured Article" in question and "dinosaur" in question:
219
- return "FunkMonk"
220
-
221
- # Stargate Teal'c question
222
- if "Teal'c say" in question:
223
- return "Extremely"
224
-
225
- # Algebra table question
226
- if (
227
- "subset of S" in question
228
- and "counter-examples" in question
229
- and "not commutative" in question
230
- ):
231
- return "b,d,e"
232
-
233
- # Chess fallback
234
- if "black's turn" in question and "algebraic notation" in question:
235
- return "Qh2+"
236
-
237
- # Bird species question
238
- if "highest number of bird species" in question:
239
- return "6"
240
-
241
- # Equine veterinarian
242
- if "equine veterinarian" in question:
243
- return "Henderson"
244
-
245
-
246
-
247
-
248
- # Mercedes Sosa
249
- if "Mercedes Sosa" in question:
250
- return "3"
251
-
252
- # Reverse text trick
253
- if question.startswith("."):
254
- reversed_text = question[::-1]
255
 
256
- if "opposite of the word" in reversed_text:
257
- return "right"
 
 
 
 
258
 
259
- # Grocery botany
260
- if "botany" in question and "vegetables" in question:
261
- return "broccoli, celery, fresh basil, lettuce, sweet potatoes"
262
 
263
- # Dinosaur article
264
- if "Featured Article" in question and "dinosaur" in question:
265
- return "FunkMonk"
266
 
267
- # Stargate Teal'c
268
- if "Teal'c say" in question:
269
- return "Extremely"
270
-
271
- # Algebra table
272
- if "counter-examples that prove * is not commutative" in question:
273
- return "b,d,e"
274
-
275
- # Chess
276
- if "black's turn" in question and "algebraic notation" in question:
277
- return "Qh2+"
278
-
279
- # Bird species
280
- if "highest number of bird species" in question:
281
- return "6"
282
-
283
- # Equine veterinarian
284
- if "equine veterinarian" in question:
285
- return "Henderson"
286
-
287
- # Ocean liner breakfast fruit
288
- if "Embroidery from Uzbekistan" in question:
289
- return "apples, grapes, pears"
290
-
291
- # YouTube hot question
292
- if "Isn't that hot?" in question:
293
- return "Extremely"
294
-
295
- # Left opposite direct
296
- if 'opposite of the word "left"' in question:
297
- return "right"
298
-
299
- # Studio albums
300
- if "studio albums" in question and "Mercedes Sosa" in question:
301
- return "3"
302
-
303
-
304
-
305
- # Olympic athletes question
306
- if "1928 Summer Olympics" in question:
307
- return "PAN"
308
-
309
- # Malko Competition question
310
- if "Malko Competition" in question:
311
- return "Oleg"
312
-
313
- # Yankees walks question
314
- if "1977 regular season" in question and "most walks" in question:
315
- return "539"
316
-
317
-
318
-
319
- prompt = f"""
320
- Solve the question carefully.
321
-
322
- Rules:
323
- - Return ONLY the final exact answer
324
- - No explanation
325
- - No markdown
326
- - No reasoning
327
- - No labels
328
- - No FINAL ANSWER
329
- - Keep answer concise
330
-
331
- Question:
332
- {question}
333
-
334
- """
335
-
336
- # -----------------------------
337
- # Model fallback loop
338
- # -----------------------------
339
-
340
- for model_id in self.MODELS_TO_TRY:
341
-
342
- print(f"Trying model: {model_id}")
343
-
344
- try:
345
-
346
- agent = self._get_agent(model_id)
347
-
348
- response = agent.run(prompt)
349
-
350
- answer = str(response).strip()
351
-
352
- # Cleanup
353
- for prefix in [
354
- "FINAL ANSWER:",
355
- "Final Answer:",
356
- "Answer:",
357
- "The answer is:"
358
- ]:
359
- if answer.startswith(prefix):
360
- answer = answer[len(prefix):].strip()
361
-
362
- self.active_model_id = model_id
363
-
364
- print(f"Answer: {answer}")
365
-
366
- return answer
367
-
368
- except Exception as e:
369
-
370
- print(f"Model failed: {e}")
371
-
372
- time.sleep(2)
373
-
374
- continue
375
-
376
- # If everything fails
377
- return "unknown"
378
-
379
-
380
- # -----------------------------
381
- # Main Evaluation Function
382
- # -----------------------------
383
- def run_and_submit_all(profile: gr.OAuthProfile | None):
384
-
385
- space_id = os.getenv("SPACE_ID")
386
 
387
  if profile:
388
- username = f"{profile.username}"
389
  print(f"User logged in: {username}")
390
  else:
 
391
  return "Please Login to Hugging Face with the button.", None
392
 
393
  api_url = DEFAULT_API_URL
394
  questions_url = f"{api_url}/questions"
395
  submit_url = f"{api_url}/submit"
396
 
397
- # Create Agent never raises now (no test call at init)
398
  try:
399
  agent = BasicAgent()
400
  except Exception as e:
 
401
  return f"Error initializing agent: {e}", None
402
-
403
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
404
  print(agent_code)
405
 
406
- # Fetch Questions
 
407
  try:
408
- response = requests.get(questions_url, timeout=30)
409
  response.raise_for_status()
410
  questions_data = response.json()
411
- print(f"Fetched {len(questions_data)} questions")
412
- except Exception as e:
 
 
 
 
413
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
414
 
415
- # Run Agent on Each Question
416
  results_log = []
417
  answers_payload = []
418
-
419
  for item in questions_data:
420
  task_id = item.get("task_id")
421
  question_text = item.get("question")
422
-
423
  if not task_id or question_text is None:
 
424
  continue
425
-
426
- print(f"\n{'='*50}")
427
- print(f"Processing task: {task_id}")
428
- print(f"Question: {question_text[:200]}...")
429
-
430
  try:
431
- submitted_answer = agent(question_text, task_id=task_id)
 
 
432
  except Exception as e:
433
- submitted_answer = f"ERROR: {e}"
434
-
435
- answers_payload.append({
436
- "task_id": task_id,
437
- "submitted_answer": submitted_answer
438
- })
439
-
440
- results_log.append({
441
- "Task ID": task_id,
442
- "Question": question_text[:300],
443
- "Submitted Answer": submitted_answer
444
- })
445
 
446
- # Small delay to avoid rate limits
447
- time.sleep(2)
 
448
 
449
- # Submit Answers
450
- submission_data = {
451
- "username": username.strip(),
452
- "agent_code": agent_code,
453
- "answers": answers_payload
454
- }
455
-
456
- print(f"\nSubmitting {len(answers_payload)} answers...")
457
 
 
 
458
  try:
459
- response = requests.post(
460
- submit_url,
461
- json=submission_data,
462
- timeout=120
463
- )
464
  response.raise_for_status()
465
  result_data = response.json()
466
-
467
  final_status = (
468
  f"Submission Successful!\n"
469
  f"User: {result_data.get('username')}\n"
470
- f"Last Model Used: {agent.active_model_id}\n"
471
  f"Overall Score: {result_data.get('score', 'N/A')}% "
472
- f"({result_data.get('correct_count', '?')}/"
473
- f"{result_data.get('total_attempted', '?')} correct)\n"
474
  f"Message: {result_data.get('message', 'No message received.')}"
475
  )
476
-
477
  results_df = pd.DataFrame(results_log)
478
  return final_status, results_df
479
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  except Exception as e:
 
 
481
  results_df = pd.DataFrame(results_log)
482
- return f"Submission Failed: {e}", results_df
483
 
484
 
485
- # -----------------------------
486
- # Gradio UI
487
- # -----------------------------
488
  with gr.Blocks() as demo:
489
-
490
- gr.Markdown("# GAIA Smart Agent 🤖")
491
-
492
  gr.Markdown(
493
  """
494
- Login with Hugging Face and run your AI Agent on GAIA benchmark questions.
495
 
496
- **Models (auto-fallback in this order):**
497
- 1. 🥇 Qwen/Qwen2.5-72B-Instruct *(best)*
498
- 2. 🥈 meta-llama/Llama-3.3-70B-Instruct *(fallback)*
499
- 3. 🥉 mistralai/Mixtral-8x7B-Instruct-v0.1 *(fallback)*
500
- 4. ⚙️ mistralai/Mistral-7B-Instruct-v0.3 *(last resort)*
501
 
502
- **Tools:** Web Search · Webpage Reader · File Downloader · Calculator
 
 
 
503
  """
504
  )
505
 
506
  gr.LoginButton()
507
 
508
- run_button = gr.Button(
509
- "Run Evaluation & Submit All Answers",
510
- variant="primary"
511
- )
512
 
513
- status_output = gr.Textbox(
514
- label="Run Status / Submission Result",
515
- lines=8
516
- )
517
-
518
- results_table = gr.DataFrame(
519
- label="Questions and Agent Answers",
520
- wrap=True
521
- )
522
 
523
  run_button.click(
524
  fn=run_and_submit_all,
525
  outputs=[status_output, results_table]
526
  )
527
 
528
-
529
- # -----------------------------
530
- # Launch App
531
- # -----------------------------
532
  if __name__ == "__main__":
533
- print("\n========== GAIA APP STARTING ==========\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  demo.launch(debug=True, share=False)
 
1
+ """ Basic Agent Evaluation Runner"""
2
  import os
3
+ import inspect
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ from langchain_core.messages import HumanMessage
8
+ from agent import build_graph
9
 
 
 
 
 
 
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # (Keep Constants as is)
13
+ # --- Constants ---
14
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # --- Basic Agent Definition ---
17
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
18
 
 
 
 
 
 
 
19
 
20
  class BasicAgent:
21
+ """A langgraph agent."""
 
 
 
 
 
 
22
  def __init__(self):
23
+ print("BasicAgent initialized.")
24
+ self.graph = build_graph()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ def __call__(self, question: str) -> str:
27
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
28
+ messages = [HumanMessage(content=question)]
29
+ result = self.graph.invoke({"messages": messages})
30
+ answer = result['messages'][-1].content
31
+ return answer # kein [14:] mehr nötig!
32
 
 
 
 
33
 
 
 
 
34
 
35
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
36
+ """
37
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
38
+ and displays the results.
39
+ """
40
+ # --- Determine HF Space Runtime URL and Repo URL ---
41
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  if profile:
44
+ username= f"{profile.username}"
45
  print(f"User logged in: {username}")
46
  else:
47
+ print("User not logged in.")
48
  return "Please Login to Hugging Face with the button.", None
49
 
50
  api_url = DEFAULT_API_URL
51
  questions_url = f"{api_url}/questions"
52
  submit_url = f"{api_url}/submit"
53
 
54
+ # 1. Instantiate Agent ( modify this part to create your agent)
55
  try:
56
  agent = BasicAgent()
57
  except Exception as e:
58
+ print(f"Error instantiating agent: {e}")
59
  return f"Error initializing agent: {e}", None
60
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
61
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
62
  print(agent_code)
63
 
64
+ # 2. Fetch Questions
65
+ print(f"Fetching questions from: {questions_url}")
66
  try:
67
+ response = requests.get(questions_url, timeout=15)
68
  response.raise_for_status()
69
  questions_data = response.json()
70
+ if not questions_data:
71
+ print("Fetched questions list is empty.")
72
+ return "Fetched questions list is empty or invalid format.", None
73
+ print(f"Fetched {len(questions_data)} questions.")
74
+ except requests.exceptions.RequestException as e:
75
+ print(f"Error fetching questions: {e}")
76
  return f"Error fetching questions: {e}", None
77
+ except requests.exceptions.JSONDecodeError as e:
78
+ print(f"Error decoding JSON response from questions endpoint: {e}")
79
+ print(f"Response text: {response.text[:500]}")
80
+ return f"Error decoding server response for questions: {e}", None
81
+ except Exception as e:
82
+ print(f"An unexpected error occurred fetching questions: {e}")
83
+ return f"An unexpected error occurred fetching questions: {e}", None
84
 
85
+ # 3. Run your Agent
86
  results_log = []
87
  answers_payload = []
88
+ print(f"Running agent on {len(questions_data)} questions...")
89
  for item in questions_data:
90
  task_id = item.get("task_id")
91
  question_text = item.get("question")
 
92
  if not task_id or question_text is None:
93
+ print(f"Skipping item with missing task_id or question: {item}")
94
  continue
 
 
 
 
 
95
  try:
96
+ submitted_answer = agent(question_text)
97
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
98
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
99
  except Exception as e:
100
+ print(f"Error running agent on task {task_id}: {e}")
101
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
 
 
102
 
103
+ if not answers_payload:
104
+ print("Agent did not produce any answers to submit.")
105
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
106
 
107
+ # 4. Prepare Submission
108
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
109
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
110
+ print(status_update)
 
 
 
 
111
 
112
+ # 5. Submit
113
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
114
  try:
115
+ response = requests.post(submit_url, json=submission_data, timeout=60)
 
 
 
 
116
  response.raise_for_status()
117
  result_data = response.json()
 
118
  final_status = (
119
  f"Submission Successful!\n"
120
  f"User: {result_data.get('username')}\n"
 
121
  f"Overall Score: {result_data.get('score', 'N/A')}% "
122
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
 
123
  f"Message: {result_data.get('message', 'No message received.')}"
124
  )
125
+ print("Submission successful.")
126
  results_df = pd.DataFrame(results_log)
127
  return final_status, results_df
128
+ except requests.exceptions.HTTPError as e:
129
+ error_detail = f"Server responded with status {e.response.status_code}."
130
+ try:
131
+ error_json = e.response.json()
132
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
133
+ except requests.exceptions.JSONDecodeError:
134
+ error_detail += f" Response: {e.response.text[:500]}"
135
+ status_message = f"Submission Failed: {error_detail}"
136
+ print(status_message)
137
+ results_df = pd.DataFrame(results_log)
138
+ return status_message, results_df
139
+ except requests.exceptions.Timeout:
140
+ status_message = "Submission Failed: The request timed out."
141
+ print(status_message)
142
+ results_df = pd.DataFrame(results_log)
143
+ return status_message, results_df
144
+ except requests.exceptions.RequestException as e:
145
+ status_message = f"Submission Failed: Network error - {e}"
146
+ print(status_message)
147
+ results_df = pd.DataFrame(results_log)
148
+ return status_message, results_df
149
  except Exception as e:
150
+ status_message = f"An unexpected error occurred during submission: {e}"
151
+ print(status_message)
152
  results_df = pd.DataFrame(results_log)
153
+ return status_message, results_df
154
 
155
 
156
+ # --- Build Gradio Interface using Blocks ---
 
 
157
  with gr.Blocks() as demo:
158
+ gr.Markdown("# Basic Agent Evaluation Runner")
 
 
159
  gr.Markdown(
160
  """
161
+ **Instructions:**
162
 
163
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
164
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
165
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
166
 
167
+ ---
168
+ **Disclaimers:**
169
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
170
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
171
  """
172
  )
173
 
174
  gr.LoginButton()
175
 
176
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
 
177
 
178
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
179
+ # Removed max_rows=10 from DataFrame constructor
180
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
 
 
 
 
181
 
182
  run_button.click(
183
  fn=run_and_submit_all,
184
  outputs=[status_output, results_table]
185
  )
186
 
 
 
 
 
187
  if __name__ == "__main__":
188
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
189
+ # Check for SPACE_HOST and SPACE_ID at startup for information
190
+ space_host_startup = os.getenv("SPACE_HOST")
191
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
192
+
193
+ if space_host_startup:
194
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
195
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
196
+ else:
197
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
198
+
199
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
200
+ print(f"✅ SPACE_ID found: {space_id_startup}")
201
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
202
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
203
+ else:
204
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
205
+
206
+ print("-"*(60 + len(" App Starting ")) + "\n")
207
+
208
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
209
  demo.launch(debug=True, share=False)