Shaukat39 commited on
Commit
0aeee3d
·
verified ·
1 Parent(s): 7a23afa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +300 -172
app.py CHANGED
@@ -1,241 +1,369 @@
1
- """ Basic Agent Evaluation Runner"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
- import re
4
- import inspect
5
  import gradio as gr
6
  import requests
7
  import pandas as pd
8
  from langchain_core.messages import HumanMessage
9
  from agent import build_graph
 
 
10
 
11
-
12
-
13
- # (Keep Constants as is)
14
- # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
- # --- Basic Agent Definition ---
18
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
19
-
20
 
21
  class BasicAgent:
22
- """A langgraph agent with debug logging."""
23
  def __init__(self):
24
- print("BasicAgent initialized.")
25
  self.graph = build_graph()
26
 
27
  def __call__(self, question: str) -> str:
28
- print(f"\n📥 Question {repr(question)}")
29
  messages = [HumanMessage(content=question)]
30
  response = self.graph.invoke({"messages": messages})
31
-
32
  raw_output = response["messages"][-1].content
33
- print("📦 Raw model output", repr(raw_output))
34
 
35
- import re
36
  match = re.search(r"FINAL ANSWER:\s*(.+)", raw_output, re.IGNORECASE)
37
  if match:
38
  final_answer = match.group(1).strip()
39
  else:
40
  final_answer = raw_output.strip()
41
- print("⚠️ Output missing 'FINAL ANSWER:' prefix. Using fallback.")
42
-
43
- print("✅ Final parsed answer:", repr(final_answer))
44
- return final_answer
45
-
46
 
 
 
 
 
47
 
48
 
49
-
50
- def run_and_submit_all( profile: gr.OAuthProfile | None):
51
- """
52
- Fetches all questions, runs the BasicAgent on them, submits all answers,
53
- and displays the results.
54
- """
55
- # --- Determine HF Space Runtime URL and Repo URL ---
56
  space_id = os.getenv("SPACE_ID", "").strip()
57
- # Get the SPACE_ID for sending link to the code
58
 
59
  if profile:
60
- username= f"{profile.username.strip()}"
61
- print(f"User logged in: {username}")
62
  else:
63
- print("User not logged in.")
64
- return "Please Login to Hugging Face with the button.", None
 
 
65
 
66
- api_url = DEFAULT_API_URL
67
- questions_url = f"{api_url}/questions"
68
- submit_url = f"{api_url}/submit"
69
 
70
- # 1. Instantiate Agent ( modify this part to create your agent)
71
  try:
72
  agent = BasicAgent()
73
  except Exception as e:
74
- print(f"Error instantiating agent: {e}")
75
- return f"Error initializing agent: {e}", None
76
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
77
-
78
 
79
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
80
-
81
- agent_code = agent_code.strip()
82
-
83
- print(agent_code)
84
-
85
- # 2. Fetch Questions
86
- print(f"Fetching questions from: {questions_url}")
87
  try:
88
  response = requests.get(questions_url, timeout=15)
89
  response.raise_for_status()
90
  questions_data = response.json()
91
- if not questions_data:
92
- print("Fetched questions list is empty.")
93
- return "Fetched questions list is empty or invalid format.", None
94
- print(f"Fetched {len(questions_data)} questions.")
95
- except requests.exceptions.RequestException as e:
96
- print(f"Error fetching questions: {e}")
97
- return f"Error fetching questions: {e}", None
98
- except requests.exceptions.JSONDecodeError as e:
99
- print(f"Error decoding JSON response from questions endpoint: {e}")
100
- print(f"Response text: {response.text[:500]}")
101
- return f"Error decoding server response for questions: {e}", None
102
  except Exception as e:
103
- print(f"An unexpected error occurred fetching questions: {e}")
104
- return f"An unexpected error occurred fetching questions: {e}", None
105
 
106
- # 3. Run your Agent
107
  results_log = []
108
  answers_payload = []
109
- print(f"Running agent on {len(questions_data)} questions...")
 
110
  for item in questions_data:
111
  task_id = item.get("task_id")
112
  question_text = item.get("question")
113
- if not task_id or question_text is None:
114
- print(f"Skipping item with missing task_id or question: {item}")
115
  continue
116
  try:
117
- submitted_answer = agent(question_text)
118
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
119
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
120
  except Exception as e:
121
- print(f"Error running agent on task {task_id}: {e}")
122
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
123
 
124
  if not answers_payload:
125
- print("Agent did not produce any answers to submit.")
126
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
127
 
128
- # 4. Prepare Submission
129
- print("Sanitized submission preview:")
130
- print("username →", repr(username))
131
- print("agent_code →", repr(agent_code))
132
- for a in answers_payload[:3]: # Show first few answers
133
- print("sample answer →", repr(a))
134
 
 
 
135
 
136
- submission_data = {
137
- "username": username.strip(),
138
- "agent_code": agent_code.strip(), # <<< prevent newline issues here too
139
- "answers": answers_payload
140
- }
141
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
142
- print(status_update)
143
- import json
144
- print("📤 SUBMISSION PAYLOAD →", json.dumps(submission_data, indent=2))
145
-
146
- # 5. Submit
147
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
148
  try:
149
  response = requests.post(submit_url, json=submission_data, timeout=60)
150
  response.raise_for_status()
151
- result_data = response.json()
152
- final_status = (
153
- f"Submission Successful!\n"
154
- f"User: {result_data.get('username')}\n"
155
- f"Overall Score: {result_data.get('score', 'N/A')}% "
156
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
157
- f"Message: {result_data.get('message', 'No message received.')}"
158
  )
159
- print("Submission successful.")
160
- results_df = pd.DataFrame(results_log)
161
- return final_status, results_df
162
- except requests.exceptions.HTTPError as e:
163
- error_detail = f"Server responded with status {e.response.status_code}."
164
- try:
165
- error_json = e.response.json()
166
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
167
- except requests.exceptions.JSONDecodeError:
168
- error_detail += f" Response: {e.response.text[:500]}"
169
- status_message = f"Submission Failed: {error_detail}"
170
- print(status_message)
171
- results_df = pd.DataFrame(results_log)
172
- return status_message, results_df
173
- except requests.exceptions.Timeout:
174
- status_message = "Submission Failed: The request timed out."
175
- print(status_message)
176
- results_df = pd.DataFrame(results_log)
177
- return status_message, results_df
178
- except requests.exceptions.RequestException as e:
179
- status_message = f"Submission Failed: Network error - {e}"
180
- print(status_message)
181
- results_df = pd.DataFrame(results_log)
182
- return status_message, results_df
183
  except Exception as e:
184
- status_message = f"An unexpected error occurred during submission: {e}"
185
- print(status_message)
186
- results_df = pd.DataFrame(results_log)
187
- return status_message, results_df
188
 
189
 
190
- # --- Build Gradio Interface using Blocks ---
191
  with gr.Blocks() as demo:
192
- gr.Markdown("# Basic Agent Evaluation Runner")
193
- gr.Markdown(
194
- """
195
- **Instructions:**
196
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
197
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
198
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
199
- ---
200
- **Disclaimers:**
201
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
202
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
203
- """
204
- )
205
-
206
  gr.LoginButton()
207
-
208
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
 
209
 
210
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
211
- # Removed max_rows=10 from DataFrame constructor
212
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
213
-
214
- run_button.click(
215
- fn=run_and_submit_all,
216
- outputs=[status_output, results_table]
217
- )
218
 
219
  if __name__ == "__main__":
220
- print("\n" + "-"*30 + " App Starting " + "-"*30)
221
- # Check for SPACE_HOST and SPACE_ID at startup for information
222
- space_host_startup = os.getenv("SPACE_HOST")
223
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
224
-
225
- if space_host_startup:
226
- print(f"✅ SPACE_HOST found: {space_host_startup}")
227
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
228
- else:
229
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
230
-
231
- if space_id_startup: # Print repo URLs if SPACE_ID is found
232
- print(f"✅ SPACE_ID found: {space_id_startup}")
233
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
234
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
235
- else:
236
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
237
-
238
- print("-"*(60 + len(" App Starting ")) + "\n")
239
-
240
- print("Launching Gradio Interface for Basic Agent Evaluation...")
241
- demo.launch(debug=True, share=False)
 
1
+ # """ Basic Agent Evaluation Runner"""
2
+ # import os
3
+ # import re
4
+ # import inspect
5
+ # import gradio as gr
6
+ # import requests
7
+ # import pandas as pd
8
+ # from langchain_core.messages import HumanMessage
9
+ # from agent import build_graph
10
+
11
+
12
+
13
+ # # (Keep Constants as is)
14
+ # # --- Constants ---
15
+ # DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
+
17
+ # # --- Basic Agent Definition ---
18
+ # # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
19
+
20
+
21
+ # class BasicAgent:
22
+ # """A langgraph agent with debug logging."""
23
+ # def __init__(self):
24
+ # print("BasicAgent initialized.")
25
+ # self.graph = build_graph()
26
+
27
+ # def __call__(self, question: str) -> str:
28
+ # print(f"\n📥 Question → {repr(question)}")
29
+ # messages = [HumanMessage(content=question)]
30
+ # response = self.graph.invoke({"messages": messages})
31
+
32
+ # raw_output = response["messages"][-1].content
33
+ # print("📦 Raw model output →", repr(raw_output))
34
+
35
+ # import re
36
+ # match = re.search(r"FINAL ANSWER:\s*(.+)", raw_output, re.IGNORECASE)
37
+ # if match:
38
+ # final_answer = match.group(1).strip()
39
+ # else:
40
+ # final_answer = raw_output.strip()
41
+ # print("⚠️ Output missing 'FINAL ANSWER:' prefix. Using fallback.")
42
+
43
+ # print("✅ Final parsed answer:", repr(final_answer))
44
+ # return final_answer
45
+
46
+
47
+
48
+
49
+
50
+ # def run_and_submit_all( profile: gr.OAuthProfile | None):
51
+ # """
52
+ # Fetches all questions, runs the BasicAgent on them, submits all answers,
53
+ # and displays the results.
54
+ # """
55
+ # # --- Determine HF Space Runtime URL and Repo URL ---
56
+ # space_id = os.getenv("SPACE_ID", "").strip()
57
+ # # Get the SPACE_ID for sending link to the code
58
+
59
+ # if profile:
60
+ # username= f"{profile.username.strip()}"
61
+ # print(f"User logged in: {username}")
62
+ # else:
63
+ # print("User not logged in.")
64
+ # return "Please Login to Hugging Face with the button.", None
65
+
66
+ # api_url = DEFAULT_API_URL
67
+ # questions_url = f"{api_url}/questions"
68
+ # submit_url = f"{api_url}/submit"
69
+
70
+ # # 1. Instantiate Agent ( modify this part to create your agent)
71
+ # try:
72
+ # agent = BasicAgent()
73
+ # except Exception as e:
74
+ # print(f"Error instantiating agent: {e}")
75
+ # return f"Error initializing agent: {e}", None
76
+ # # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
77
+
78
+
79
+ # agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
80
+
81
+ # agent_code = agent_code.strip()
82
+
83
+ # print(agent_code)
84
+
85
+ # # 2. Fetch Questions
86
+ # print(f"Fetching questions from: {questions_url}")
87
+ # try:
88
+ # response = requests.get(questions_url, timeout=15)
89
+ # response.raise_for_status()
90
+ # questions_data = response.json()
91
+ # if not questions_data:
92
+ # print("Fetched questions list is empty.")
93
+ # return "Fetched questions list is empty or invalid format.", None
94
+ # print(f"Fetched {len(questions_data)} questions.")
95
+ # except requests.exceptions.RequestException as e:
96
+ # print(f"Error fetching questions: {e}")
97
+ # return f"Error fetching questions: {e}", None
98
+ # except requests.exceptions.JSONDecodeError as e:
99
+ # print(f"Error decoding JSON response from questions endpoint: {e}")
100
+ # print(f"Response text: {response.text[:500]}")
101
+ # return f"Error decoding server response for questions: {e}", None
102
+ # except Exception as e:
103
+ # print(f"An unexpected error occurred fetching questions: {e}")
104
+ # return f"An unexpected error occurred fetching questions: {e}", None
105
+
106
+ # # 3. Run your Agent
107
+ # results_log = []
108
+ # answers_payload = []
109
+ # print(f"Running agent on {len(questions_data)} questions...")
110
+ # for item in questions_data:
111
+ # task_id = item.get("task_id")
112
+ # question_text = item.get("question")
113
+ # if not task_id or question_text is None:
114
+ # print(f"Skipping item with missing task_id or question: {item}")
115
+ # continue
116
+ # try:
117
+ # submitted_answer = agent(question_text)
118
+ # answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
119
+ # results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
120
+ # except Exception as e:
121
+ # print(f"Error running agent on task {task_id}: {e}")
122
+ # results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
123
+
124
+ # if not answers_payload:
125
+ # print("Agent did not produce any answers to submit.")
126
+ # return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
127
+
128
+ # # 4. Prepare Submission
129
+ # print("Sanitized submission preview:")
130
+ # print("username ��", repr(username))
131
+ # print("agent_code →", repr(agent_code))
132
+ # for a in answers_payload[:3]: # Show first few answers
133
+ # print("sample answer →", repr(a))
134
+
135
+
136
+ # submission_data = {
137
+ # "username": username.strip(),
138
+ # "agent_code": agent_code.strip(), # <<< prevent newline issues here too
139
+ # "answers": answers_payload
140
+ # }
141
+ # status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
142
+ # print(status_update)
143
+ # import json
144
+ # print("📤 SUBMISSION PAYLOAD →", json.dumps(submission_data, indent=2))
145
+
146
+ # # 5. Submit
147
+ # print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
148
+ # try:
149
+ # response = requests.post(submit_url, json=submission_data, timeout=60)
150
+ # response.raise_for_status()
151
+ # result_data = response.json()
152
+ # final_status = (
153
+ # f"Submission Successful!\n"
154
+ # f"User: {result_data.get('username')}\n"
155
+ # f"Overall Score: {result_data.get('score', 'N/A')}% "
156
+ # f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
157
+ # f"Message: {result_data.get('message', 'No message received.')}"
158
+ # )
159
+ # print("Submission successful.")
160
+ # results_df = pd.DataFrame(results_log)
161
+ # return final_status, results_df
162
+ # except requests.exceptions.HTTPError as e:
163
+ # error_detail = f"Server responded with status {e.response.status_code}."
164
+ # try:
165
+ # error_json = e.response.json()
166
+ # error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
167
+ # except requests.exceptions.JSONDecodeError:
168
+ # error_detail += f" Response: {e.response.text[:500]}"
169
+ # status_message = f"Submission Failed: {error_detail}"
170
+ # print(status_message)
171
+ # results_df = pd.DataFrame(results_log)
172
+ # return status_message, results_df
173
+ # except requests.exceptions.Timeout:
174
+ # status_message = "Submission Failed: The request timed out."
175
+ # print(status_message)
176
+ # results_df = pd.DataFrame(results_log)
177
+ # return status_message, results_df
178
+ # except requests.exceptions.RequestException as e:
179
+ # status_message = f"Submission Failed: Network error - {e}"
180
+ # print(status_message)
181
+ # results_df = pd.DataFrame(results_log)
182
+ # return status_message, results_df
183
+ # except Exception as e:
184
+ # status_message = f"An unexpected error occurred during submission: {e}"
185
+ # print(status_message)
186
+ # results_df = pd.DataFrame(results_log)
187
+ # return status_message, results_df
188
+
189
+
190
+ # # --- Build Gradio Interface using Blocks ---
191
+ # with gr.Blocks() as demo:
192
+ # gr.Markdown("# Basic Agent Evaluation Runner")
193
+ # gr.Markdown(
194
+ # """
195
+ # **Instructions:**
196
+ # 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
197
+ # 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
198
+ # 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
199
+ # ---
200
+ # **Disclaimers:**
201
+ # Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
202
+ # This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
203
+ # """
204
+ # )
205
+
206
+ # gr.LoginButton()
207
+
208
+ # run_button = gr.Button("Run Evaluation & Submit All Answers")
209
+
210
+ # status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
211
+ # # Removed max_rows=10 from DataFrame constructor
212
+ # results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
213
+
214
+ # run_button.click(
215
+ # fn=run_and_submit_all,
216
+ # outputs=[status_output, results_table]
217
+ # )
218
+
219
+ # if __name__ == "__main__":
220
+ # print("\n" + "-"*30 + " App Starting " + "-"*30)
221
+ # # Check for SPACE_HOST and SPACE_ID at startup for information
222
+ # space_host_startup = os.getenv("SPACE_HOST")
223
+ # space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
224
+
225
+ # if space_host_startup:
226
+ # print(f"✅ SPACE_HOST found: {space_host_startup}")
227
+ # print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
228
+ # else:
229
+ # print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
230
+
231
+ # if space_id_startup: # Print repo URLs if SPACE_ID is found
232
+ # print(f"✅ SPACE_ID found: {space_id_startup}")
233
+ # print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
234
+ # print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
235
+ # else:
236
+ # print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
237
+
238
+ # print("-"*(60 + len(" App Starting ")) + "\n")
239
+
240
+ # print("Launching Gradio Interface for Basic Agent Evaluation...")
241
+ # demo.launch(debug=True, share=False)
242
+
243
  import os
 
 
244
  import gradio as gr
245
  import requests
246
  import pandas as pd
247
  from langchain_core.messages import HumanMessage
248
  from agent import build_graph
249
+ import json
250
+ import re
251
 
 
 
 
 
252
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
253
 
 
 
 
254
 
255
  class BasicAgent:
256
+ """LangGraph agent with enhanced debug logging and answer sanitization."""
257
  def __init__(self):
258
+ print("🧠 Initializing BasicAgent...")
259
  self.graph = build_graph()
260
 
261
  def __call__(self, question: str) -> str:
262
+ print(f"\n📥 Received question: {repr(question)}")
263
  messages = [HumanMessage(content=question)]
264
  response = self.graph.invoke({"messages": messages})
 
265
  raw_output = response["messages"][-1].content
266
+ print("📦 Raw LLM output:", repr(raw_output))
267
 
 
268
  match = re.search(r"FINAL ANSWER:\s*(.+)", raw_output, re.IGNORECASE)
269
  if match:
270
  final_answer = match.group(1).strip()
271
  else:
272
  final_answer = raw_output.strip()
273
+ print("⚠️ 'FINAL ANSWER:' prefix missing. Using fallback.")
 
 
 
 
274
 
275
+ # Sanitize for newline bugs
276
+ clean_answer = final_answer.replace("\n", " ").replace("\r", " ").strip()
277
+ print("✅ Final cleaned answer:", repr(clean_answer))
278
+ return clean_answer
279
 
280
 
281
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
 
282
  space_id = os.getenv("SPACE_ID", "").strip()
 
283
 
284
  if profile:
285
+ username = f"{profile.username.strip()}"
286
+ print(f"🙋 User logged in: {username}")
287
  else:
288
+ return "⚠️ Please log in to Hugging Face first.", None
289
+
290
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main".strip()
291
+ print("🛰️ Submitting from agent_code →", repr(agent_code))
292
 
293
+ questions_url = f"{DEFAULT_API_URL}/questions"
294
+ submit_url = f"{DEFAULT_API_URL}/submit"
 
295
 
 
296
  try:
297
  agent = BasicAgent()
298
  except Exception as e:
299
+ return f"🚫 Failed to initialize agent: {e}", None
 
 
 
300
 
301
+ print(f"📡 Fetching questions from → {questions_url}")
 
 
 
 
 
 
 
302
  try:
303
  response = requests.get(questions_url, timeout=15)
304
  response.raise_for_status()
305
  questions_data = response.json()
 
 
 
 
 
 
 
 
 
 
 
306
  except Exception as e:
307
+ return f" Failed to fetch questions: {e}", None
 
308
 
 
309
  results_log = []
310
  answers_payload = []
311
+ print(f"🤖 Running agent on {len(questions_data)} questions...")
312
+
313
  for item in questions_data:
314
  task_id = item.get("task_id")
315
  question_text = item.get("question")
316
+ if not task_id or not question_text:
 
317
  continue
318
  try:
319
+ answer = agent(question_text)
320
+ # Check for newlines explicitly
321
+ if "\n" in answer:
322
+ print(f"⚠️ Found newline in answer [{task_id}] →", repr(answer))
323
+ answers_payload.append({"task_id": task_id.strip(), "submitted_answer": answer})
324
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
325
  except Exception as e:
326
+ err_msg = f"AGENT ERROR: {e}"
327
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": err_msg})
328
 
329
  if not answers_payload:
330
+ return "🚫 Agent produced no valid answers.", pd.DataFrame(results_log)
 
331
 
332
+ submission_data = {
333
+ "username": username,
334
+ "agent_code": agent_code,
335
+ "answers": answers_payload
336
+ }
 
337
 
338
+ print("\n📤 SUBMISSION PAYLOAD:")
339
+ print(json.dumps(submission_data, indent=2))
340
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  try:
342
  response = requests.post(submit_url, json=submission_data, timeout=60)
343
  response.raise_for_status()
344
+ result = response.json()
345
+ summary = (
346
+ f"Submission Successful!\n"
347
+ f"User: {result.get('username')}\n"
348
+ f"Score: {result.get('score', 'N/A')}% "
349
+ f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')})\n"
350
+ f"Message: {result.get('message', 'No message returned.')}"
351
  )
352
+ return summary, pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  except Exception as e:
354
+ return f"🚨 Submission failed: {e}", pd.DataFrame(results_log)
 
 
 
355
 
356
 
 
357
  with gr.Blocks() as demo:
358
+ gr.Markdown("## 🧪 GAIA Evaluation Runner with Debug Mode")
359
+ gr.Markdown("Log in, run your agent, submit answers, and review results with logging enabled.")
 
 
 
 
 
 
 
 
 
 
 
 
360
  gr.LoginButton()
 
361
  run_button = gr.Button("Run Evaluation & Submit All Answers")
362
+ status_output = gr.Textbox(label="Status / Result", lines=4)
363
+ results_table = gr.DataFrame(label="QA Log")
364
 
365
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
 
 
 
 
366
 
367
  if __name__ == "__main__":
368
+ print("🚀 Launching Gradio app...")
369
+ demo.launch(debug=True, share=False)