Nitinguleria commited on
Commit
79ad221
Β·
verified Β·
1 Parent(s): 46188de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -363
app.py CHANGED
@@ -1,284 +1,47 @@
 
1
  import os
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- import sympy
6
- import re
7
- from duckduckgo_search import DDGS
8
- from langgraph.graph import StateGraph, END
9
- from typing import TypedDict, Literal
10
 
11
- # Default API URL - you may need to update this
12
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
-
14
- # --- Enhanced Tools for GAIA Benchmark ---
15
-
16
- def wikipedia_search_tool(input: str) -> str:
17
- """Enhanced search tool with better result processing"""
18
- try:
19
- ddgs = DDGS()
20
- results = ddgs.text(input, max_results=5)
21
- if results:
22
- # Combine multiple results for better coverage
23
- combined_info = []
24
- for i, result in enumerate(results[:3]):
25
- body = result.get("body", "")
26
- if body and len(body) > 10:
27
- combined_info.append(f"Source {i+1}: {body}")
28
-
29
- if combined_info:
30
- return "\n\n".join(combined_info)
31
- return "No relevant information found."
32
- except Exception as e:
33
- return f"Search Error: {e}"
34
-
35
- def math_solver_tool(input: str) -> str:
36
- """Enhanced math solver with better parsing"""
37
- try:
38
- # Clean and preprocess the input
39
- cleaned_input = input.replace("^", "**").replace("Γ·", "/")
40
-
41
- # Try to extract mathematical expressions
42
- math_patterns = [
43
- r'[\d\+\-\*/\^\(\)\.\s]+',
44
- r'[a-zA-Z\d\+\-\*/\^\(\)\.\s]+=.*',
45
- ]
46
-
47
- for pattern in math_patterns:
48
- matches = re.findall(pattern, cleaned_input)
49
- if matches:
50
- try:
51
- expr = sympy.sympify(matches[0])
52
- result = expr.evalf()
53
- return str(result)
54
- except:
55
- continue
56
-
57
- # Direct sympy attempt
58
- expr = sympy.sympify(cleaned_input)
59
- result = expr.evalf()
60
- return str(result)
61
-
62
- except Exception as e:
63
- # Try basic eval as fallback (with safety checks)
64
- try:
65
- # Only allow safe mathematical operations
66
- safe_chars = set('0123456789+-*/.() ')
67
- if all(c in safe_chars for c in input.replace(' ', '')):
68
- result = eval(input)
69
- return str(result)
70
- except:
71
- pass
72
- return f"Could not solve mathematical expression: {e}"
73
-
74
- def code_execution_tool(input: str) -> str:
75
- """Enhanced code execution with better safety and Python support"""
76
- try:
77
- # Create a safe execution environment
78
- safe_globals = {
79
- '__builtins__': {
80
- 'len': len, 'str': str, 'int': int, 'float': float,
81
- 'list': list, 'dict': dict, 'tuple': tuple, 'set': set,
82
- 'sum': sum, 'max': max, 'min': min, 'abs': abs,
83
- 'round': round, 'range': range, 'enumerate': enumerate,
84
- 'zip': zip, 'sorted': sorted, 'reversed': reversed,
85
- 'print': print
86
- },
87
- 'math': __import__('math'),
88
- 're': __import__('re'),
89
- }
90
-
91
- local_vars = {}
92
-
93
- # Try to execute the code
94
- if 'return ' in input or 'print(' in input:
95
- exec(input, safe_globals, local_vars)
96
- # Look for printed output or return values
97
- if 'result' in local_vars:
98
- return str(local_vars['result'])
99
- return "Code executed successfully"
100
- else:
101
- # Try to evaluate as expression
102
- result = eval(input, safe_globals, local_vars)
103
- return str(result)
104
-
105
- except Exception as e:
106
- return f"Code execution error: {e}"
107
-
108
- def general_reasoning_tool(input: str) -> str:
109
- """Tool for general reasoning and analysis"""
110
- # This is a placeholder for more advanced reasoning
111
- # In a real implementation, you might use an LLM here
112
-
113
- # Simple keyword-based analysis
114
- if any(word in input.lower() for word in ['compare', 'difference', 'similar', 'contrast']):
115
- return f"Analysis: This appears to be a comparison question. Key factors to consider: {input[:200]}..."
116
- elif any(word in input.lower() for word in ['cause', 'reason', 'why', 'because']):
117
- return f"Reasoning: This is asking about causation. Consider multiple factors that might contribute to: {input[:200]}..."
118
- else:
119
- return f"General analysis: {input[:300]}..."
120
-
121
- # --- State definition ---
122
-
123
- class AgentState(TypedDict):
124
- question: str
125
- response: str
126
- tool_used: str
127
-
128
- # --- Enhanced Routing logic for GAIA ---
129
-
130
- def route_question(state: AgentState) -> Literal["math", "code", "search", "reasoning"]:
131
- """Enhanced routing for GAIA benchmark questions"""
132
- q = state["question"].lower()
133
-
134
- # Math-related keywords
135
- math_keywords = [
136
- "solve", "calculate", "evaluate", "compute", "sum", "multiply",
137
- "divide", "percentage", "%", "=", "equation", "formula", "average",
138
- "total", "cost", "price", "number", "how many", "how much"
139
- ]
140
-
141
- # Code-related keywords
142
- code_keywords = [
143
- "python", "code", "function", "return", "algorithm", "program",
144
- "script", "execute", "run", "implementation"
145
- ]
146
-
147
- # Search-related keywords
148
- search_keywords = [
149
- "what", "who", "when", "where", "which", "capital", "country",
150
- "invented", "created", "founded", "established", "located", "known for"
151
- ]
152
-
153
- # Check for mathematical expressions or numbers
154
- if (any(k in q for k in math_keywords) or
155
- re.search(r'\d+[\+\-\*/\^]\d+', q) or
156
- re.search(r'\$\d+', q) or
157
- '%' in q):
158
- return "math"
159
- elif any(k in q for k in code_keywords):
160
- return "code"
161
- elif any(k in q for k in search_keywords):
162
- return "search"
163
- else:
164
- return "reasoning"
165
-
166
- # --- Node functions ---
167
-
168
- def math_node(state: AgentState) -> AgentState:
169
- response = math_solver_tool(state["question"])
170
- return {
171
- "question": state["question"],
172
- "response": response,
173
- "tool_used": "math"
174
- }
175
-
176
- def code_node(state: AgentState) -> AgentState:
177
- response = code_execution_tool(state["question"])
178
- return {
179
- "question": state["question"],
180
- "response": response,
181
- "tool_used": "code"
182
- }
183
-
184
- def search_node(state: AgentState) -> AgentState:
185
- response = wikipedia_search_tool(state["question"])
186
- return {
187
- "question": state["question"],
188
- "response": response,
189
- "tool_used": "search"
190
- }
191
 
192
- def reasoning_node(state: AgentState) -> AgentState:
193
- response = general_reasoning_tool(state["question"])
194
- return {
195
- "question": state["question"],
196
- "response": response,
197
- "tool_used": "reasoning"
198
- }
199
 
200
- # --- LangGraph setup with corrected API ---
201
-
202
- def create_agent_graph():
203
- """Create the agent graph using the correct LangGraph API"""
204
-
205
- # Create the state graph
206
- workflow = StateGraph(AgentState)
207
-
208
- # Add all the nodes
209
- workflow.add_node("math", math_node)
210
- workflow.add_node("code", code_node)
211
- workflow.add_node("search", search_node)
212
- workflow.add_node("reasoning", reasoning_node)
213
-
214
- # Add conditional edges from entry point
215
- workflow.add_conditional_edges(
216
- "__start__",
217
- route_question,
218
- {
219
- "math": "math",
220
- "code": "code",
221
- "search": "search",
222
- "reasoning": "reasoning"
223
- }
224
- )
225
-
226
- # All nodes end the workflow
227
- workflow.add_edge("math", END)
228
- workflow.add_edge("code", END)
229
- workflow.add_edge("search", END)
230
- workflow.add_edge("reasoning", END)
231
-
232
- return workflow.compile()
233
 
234
- # Create the compiled graph
235
- app_graph = create_agent_graph()
236
 
237
- # --- Enhanced Agent wrapper ---
238
 
239
  class BasicAgent:
 
240
  def __init__(self):
241
- self.graph = app_graph
242
- print("Enhanced LangGraph Agent initialized for GAIA benchmark.")
243
 
244
  def __call__(self, question: str) -> str:
245
- """Process a question and return an answer"""
246
- try:
247
- state = {
248
- "question": question,
249
- "response": "",
250
- "tool_used": ""
251
- }
252
-
253
- result = self.graph.invoke(state)
254
-
255
- # Post-process the response for better formatting
256
- response = result.get("response", "No response generated")
257
- tool_used = result.get("tool_used", "unknown")
258
-
259
- # For math problems, try to extract just the numerical answer
260
- if tool_used == "math" and response:
261
- # Try to extract the final number
262
- numbers = re.findall(r'-?\d+\.?\d*', response)
263
- if numbers:
264
- return numbers[-1] # Return the last number found
265
-
266
- return str(response)
267
-
268
- except Exception as e:
269
- print(f"Error in agent processing: {e}")
270
- return f"Error: Could not process the question - {e}"
271
 
272
- def run_and_submit_all(profile: gr.OAuthProfile | None):
 
273
  """
274
  Fetches all questions, runs the BasicAgent on them, submits all answers,
275
  and displays the results.
276
  """
277
  # --- Determine HF Space Runtime URL and Repo URL ---
278
- space_id = os.getenv("SPACE_ID")
279
 
280
  if profile:
281
- username = f"{profile.username}"
282
  print(f"User logged in: {username}")
283
  else:
284
  print("User not logged in.")
@@ -288,15 +51,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
288
  questions_url = f"{api_url}/questions"
289
  submit_url = f"{api_url}/submit"
290
 
291
- # 1. Instantiate Agent
292
  try:
293
  agent = BasicAgent()
294
  except Exception as e:
295
  print(f"Error instantiating agent: {e}")
296
  return f"Error initializing agent: {e}", None
297
-
298
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "local"
299
- print(f"Agent code location: {agent_code}")
300
 
301
  # 2. Fetch Questions
302
  print(f"Fetching questions from: {questions_url}")
@@ -305,78 +68,56 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
305
  response.raise_for_status()
306
  questions_data = response.json()
307
  if not questions_data:
308
- print("Fetched questions list is empty.")
309
- return "Fetched questions list is empty or invalid format.", None
310
  print(f"Fetched {len(questions_data)} questions.")
311
  except requests.exceptions.RequestException as e:
312
  print(f"Error fetching questions: {e}")
313
  return f"Error fetching questions: {e}", None
 
 
 
 
314
  except Exception as e:
315
  print(f"An unexpected error occurred fetching questions: {e}")
316
  return f"An unexpected error occurred fetching questions: {e}", None
317
 
318
- # 3. Run Agent on all questions
319
  results_log = []
320
  answers_payload = []
321
  print(f"Running agent on {len(questions_data)} questions...")
322
-
323
- for i, item in enumerate(questions_data):
324
  task_id = item.get("task_id")
325
  question_text = item.get("question")
326
-
327
  if not task_id or question_text is None:
328
  print(f"Skipping item with missing task_id or question: {item}")
329
  continue
330
-
331
- print(f"Processing question {i+1}/{len(questions_data)}: {task_id}")
332
-
333
  try:
334
  submitted_answer = agent(question_text)
335
- answers_payload.append({
336
- "task_id": task_id,
337
- "submitted_answer": submitted_answer
338
- })
339
- results_log.append({
340
- "Task ID": task_id,
341
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
342
- "Submitted Answer": submitted_answer
343
- })
344
  except Exception as e:
345
- print(f"Error running agent on task {task_id}: {e}")
346
- error_answer = f"AGENT ERROR: {e}"
347
- answers_payload.append({
348
- "task_id": task_id,
349
- "submitted_answer": error_answer
350
- })
351
- results_log.append({
352
- "Task ID": task_id,
353
- "Question": question_text[:100] + "..." if len(question_text) > 100 else question_text,
354
- "Submitted Answer": error_answer
355
- })
356
 
357
  if not answers_payload:
358
  print("Agent did not produce any answers to submit.")
359
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
360
 
361
- # 4. Prepare Submission
362
- submission_data = {
363
- "username": username.strip(),
364
- "agent_code": agent_code,
365
- "answers": answers_payload
366
- }
367
-
368
- print(f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'...")
369
 
370
- # 5. Submit answers
371
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
372
  try:
373
- response = requests.post(submit_url, json=submission_data, timeout=120)
374
  response.raise_for_status()
375
  result_data = response.json()
376
-
377
  final_status = (
378
  f"Submission Successful!\n"
379
- f"User: {result_data.get('username', username)}\n"
380
  f"Overall Score: {result_data.get('score', 'N/A')}% "
381
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
382
  f"Message: {result_data.get('message', 'No message received.')}"
@@ -384,96 +125,85 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
384
  print("Submission successful.")
385
  results_df = pd.DataFrame(results_log)
386
  return final_status, results_df
387
-
388
  except requests.exceptions.HTTPError as e:
389
  error_detail = f"Server responded with status {e.response.status_code}."
390
  try:
391
  error_json = e.response.json()
392
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
393
- except:
394
  error_detail += f" Response: {e.response.text[:500]}"
395
  status_message = f"Submission Failed: {error_detail}"
396
  print(status_message)
397
  results_df = pd.DataFrame(results_log)
398
  return status_message, results_df
399
-
 
 
 
 
 
 
 
 
 
400
  except Exception as e:
401
- status_message = f"Submission error: {e}"
402
  print(status_message)
403
  results_df = pd.DataFrame(results_log)
404
  return status_message, results_df
405
 
406
- # --- Gradio Interface ---
407
- with gr.Blocks(title="GAIA Benchmark Agent") as demo:
408
- gr.Markdown("# Enhanced GAIA Benchmark Agent")
 
409
  gr.Markdown(
410
  """
411
- **Enhanced Agent for GAIA Benchmark - Targeting 60% Accuracy**
412
-
413
- **Features:**
414
- - Enhanced mathematical problem solving with symbolic computation
415
- - Improved search capabilities with multiple source aggregation
416
- - Safe code execution environment
417
- - Smart question routing (math/code/search/reasoning)
418
- - Better answer formatting and extraction
419
-
420
  **Instructions:**
421
- 1. Log in to your Hugging Face account using the button below
422
- 2. Click 'Run Evaluation & Submit All Answers' to start the benchmark
423
- 3. The agent will process all questions and submit answers automatically
424
-
425
- **Note:** Processing may take several minutes depending on the number of questions.
 
 
 
 
426
  """
427
  )
428
 
429
  gr.LoginButton()
430
 
431
- run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
432
-
433
- status_output = gr.Textbox(
434
- label="Status & Results",
435
- lines=8,
436
- interactive=False,
437
- placeholder="Click the button above to start the evaluation..."
438
- )
439
-
440
- results_table = gr.DataFrame(
441
- label="Questions and Agent Responses",
442
- wrap=True,
443
- interactive=False
444
- )
445
 
446
  run_button.click(
447
  fn=run_and_submit_all,
448
- inputs=[],
449
  outputs=[status_output, results_table]
450
  )
451
 
452
  if __name__ == "__main__":
453
- print("\n" + "="*50)
454
- print("πŸš€ GAIA Benchmark Agent Starting")
455
- print("="*50)
456
-
457
- # Environment info
458
- space_host = os.getenv("SPACE_HOST")
459
- space_id = os.getenv("SPACE_ID")
460
-
461
- if space_host:
462
- print(f"βœ… SPACE_HOST: {space_host}")
463
- print(f" Runtime URL: https://{space_host}.hf.space")
464
  else:
465
- print("ℹ️ Running locally (SPACE_HOST not found)")
466
 
467
- if space_id:
468
- print(f"βœ… SPACE_ID: {space_id}")
469
- print(f" Repo URL: https://huggingface.co/spaces/{space_id}")
 
470
  else:
471
- print("ℹ️ SPACE_ID not found")
472
-
473
- print("="*50 + "\n")
474
-
475
- print("🎯 Target: 60% accuracy on GAIA benchmark")
476
- print("πŸ”§ Enhanced tools: Math, Code, Search, Reasoning")
477
- print("\nLaunching Gradio interface...")
478
-
479
  demo.launch(debug=True, share=False)
 
1
+ """ Basic Agent Evaluation Runner"""
2
  import os
3
+ import inspect
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
7
+ from langchain_core.messages import HumanMessage
8
+ from agent import build_graph
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
 
 
 
 
 
 
 
11
 
12
+ # (Keep Constants as is)
13
+ # --- Constants ---
14
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # --- Basic Agent Definition ---
17
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
18
 
 
19
 
20
  class BasicAgent:
21
+ """A langgraph agent."""
22
  def __init__(self):
23
+ print("BasicAgent initialized.")
24
+ self.graph = build_graph()
25
 
26
  def __call__(self, question: str) -> str:
27
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
28
+ messages = [HumanMessage(content=question)]
29
+ result = self.graph.invoke({"messages": messages})
30
+ answer = result['messages'][-1].content
31
+ return answer # kein [14:] mehr nΓΆtig!
32
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+
35
+ def run_and_submit_all( profile: gr.OAuthProfile | None):
36
  """
37
  Fetches all questions, runs the BasicAgent on them, submits all answers,
38
  and displays the results.
39
  """
40
  # --- Determine HF Space Runtime URL and Repo URL ---
41
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
42
 
43
  if profile:
44
+ username= f"{profile.username}"
45
  print(f"User logged in: {username}")
46
  else:
47
  print("User not logged in.")
 
51
  questions_url = f"{api_url}/questions"
52
  submit_url = f"{api_url}/submit"
53
 
54
+ # 1. Instantiate Agent ( modify this part to create your agent)
55
  try:
56
  agent = BasicAgent()
57
  except Exception as e:
58
  print(f"Error instantiating agent: {e}")
59
  return f"Error initializing agent: {e}", None
60
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
61
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
62
+ print(agent_code)
63
 
64
  # 2. Fetch Questions
65
  print(f"Fetching questions from: {questions_url}")
 
68
  response.raise_for_status()
69
  questions_data = response.json()
70
  if not questions_data:
71
+ print("Fetched questions list is empty.")
72
+ return "Fetched questions list is empty or invalid format.", None
73
  print(f"Fetched {len(questions_data)} questions.")
74
  except requests.exceptions.RequestException as e:
75
  print(f"Error fetching questions: {e}")
76
  return f"Error fetching questions: {e}", None
77
+ except requests.exceptions.JSONDecodeError as e:
78
+ print(f"Error decoding JSON response from questions endpoint: {e}")
79
+ print(f"Response text: {response.text[:500]}")
80
+ return f"Error decoding server response for questions: {e}", None
81
  except Exception as e:
82
  print(f"An unexpected error occurred fetching questions: {e}")
83
  return f"An unexpected error occurred fetching questions: {e}", None
84
 
85
+ # 3. Run your Agent
86
  results_log = []
87
  answers_payload = []
88
  print(f"Running agent on {len(questions_data)} questions...")
89
+ for item in questions_data:
 
90
  task_id = item.get("task_id")
91
  question_text = item.get("question")
 
92
  if not task_id or question_text is None:
93
  print(f"Skipping item with missing task_id or question: {item}")
94
  continue
 
 
 
95
  try:
96
  submitted_answer = agent(question_text)
97
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
98
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
99
  except Exception as e:
100
+ print(f"Error running agent on task {task_id}: {e}")
101
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
 
102
 
103
  if not answers_payload:
104
  print("Agent did not produce any answers to submit.")
105
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
106
 
107
+ # 4. Prepare Submission
108
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
109
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
110
+ print(status_update)
 
 
 
 
111
 
112
+ # 5. Submit
113
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
114
  try:
115
+ response = requests.post(submit_url, json=submission_data, timeout=60)
116
  response.raise_for_status()
117
  result_data = response.json()
 
118
  final_status = (
119
  f"Submission Successful!\n"
120
+ f"User: {result_data.get('username')}\n"
121
  f"Overall Score: {result_data.get('score', 'N/A')}% "
122
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
123
  f"Message: {result_data.get('message', 'No message received.')}"
 
125
  print("Submission successful.")
126
  results_df = pd.DataFrame(results_log)
127
  return final_status, results_df
 
128
  except requests.exceptions.HTTPError as e:
129
  error_detail = f"Server responded with status {e.response.status_code}."
130
  try:
131
  error_json = e.response.json()
132
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
133
+ except requests.exceptions.JSONDecodeError:
134
  error_detail += f" Response: {e.response.text[:500]}"
135
  status_message = f"Submission Failed: {error_detail}"
136
  print(status_message)
137
  results_df = pd.DataFrame(results_log)
138
  return status_message, results_df
139
+ except requests.exceptions.Timeout:
140
+ status_message = "Submission Failed: The request timed out."
141
+ print(status_message)
142
+ results_df = pd.DataFrame(results_log)
143
+ return status_message, results_df
144
+ except requests.exceptions.RequestException as e:
145
+ status_message = f"Submission Failed: Network error - {e}"
146
+ print(status_message)
147
+ results_df = pd.DataFrame(results_log)
148
+ return status_message, results_df
149
  except Exception as e:
150
+ status_message = f"An unexpected error occurred during submission: {e}"
151
  print(status_message)
152
  results_df = pd.DataFrame(results_log)
153
  return status_message, results_df
154
 
155
+
156
+ # --- Build Gradio Interface using Blocks ---
157
+ with gr.Blocks() as demo:
158
+ gr.Markdown("# Basic Agent Evaluation Runner")
159
  gr.Markdown(
160
  """
 
 
 
 
 
 
 
 
 
161
  **Instructions:**
162
+
163
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
164
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
165
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
166
+
167
+ ---
168
+ **Disclaimers:**
169
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
170
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
171
  """
172
  )
173
 
174
  gr.LoginButton()
175
 
176
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
177
+
178
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
179
+ # Removed max_rows=10 from DataFrame constructor
180
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
 
 
 
 
 
 
 
181
 
182
  run_button.click(
183
  fn=run_and_submit_all,
 
184
  outputs=[status_output, results_table]
185
  )
186
 
187
  if __name__ == "__main__":
188
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
189
+ # Check for SPACE_HOST and SPACE_ID at startup for information
190
+ space_host_startup = os.getenv("SPACE_HOST")
191
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
192
+
193
+ if space_host_startup:
194
+ print(f"βœ… SPACE_HOST found: {space_host_startup}")
195
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
 
 
 
196
  else:
197
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
198
 
199
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
200
+ print(f"βœ… SPACE_ID found: {space_id_startup}")
201
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
202
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
203
  else:
204
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
205
+
206
+ print("-"*(60 + len(" App Starting ")) + "\n")
207
+
208
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
 
 
 
209
  demo.launch(debug=True, share=False)