dracero commited on
Commit
75cf299
·
verified ·
1 Parent(s): 5a2474d
Files changed (1) hide show
  1. app.py +817 -4
app.py CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
@@ -118,7 +119,239 @@ class BasicAgent:
118
 
119
  # Usar la respuesta genérica para el tipo de pregunta identificado
120
  if question_type in self.generic_responses:
121
- answer = self.generic_responses[question_type]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  else:
123
  answer = self.generic_responses["default"]
124
 
@@ -128,7 +361,123 @@ class BasicAgent:
128
  # Personalizar la primera frase con las palabras clave
129
  if "in the context of" not in answer:
130
  answer = f"{keywords[0].capitalize()} {answer[0].lower()}{answer[1:]}"
131
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  # Guardar en caché para futuras consultas
133
  self.response_cache[normalized_question] = answer
134
 
@@ -143,7 +492,239 @@ class BasicAgent:
143
 
144
  # Calcular intersección y unión
145
  intersection = words1.intersection(words2)
146
- union = words1.union(words2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  # Coeficiente de Jaccard
149
  if len(union) == 0:
@@ -165,7 +746,123 @@ class BasicAgent:
165
  "the", "a", "an", "of", "in", "on", "at", "to", "for", "with", "by", "about",
166
  "and", "or", "but", "if", "then", "than", "so", "no", "not", "this", "that",
167
  "these", "those", "there", "here", "some", "any", "can", "could", "should",
168
- "would", "may", "might", "must", "will", "shall", "do", "does", "did"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  # Dividir la pregunta en palabras y filtrar las palabras comunes
171
  words = question.lower().split()
@@ -298,7 +995,123 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
298
  status_message = "Submission Failed: The request timed out."
299
  print(status_message)
300
  results_df = pd.DataFrame(results_log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  return status_message, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  except requests.exceptions.RequestException as e:
303
  status_message = f"Submission Failed: Network error - {e}"
304
  print(status_message)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ import re
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
 
119
 
120
  # Usar la respuesta genérica para el tipo de pregunta identificado
121
  if question_type in self.generic_responses:
122
+ answer = self.generclass BasicAgent:
123
+ if not task_id or question_text is None:
124
+ print(f"Skipping item with missing task_id or question: {item}")
125
+ continue
126
+ try:
127
+ submitted_answer = agent(question_text)
128
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
129
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
130
+ except Exception as e:
131
+ print(f"Error running agent on task {task_id}: {e}")
132
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
133
+
134
+ if not answers_payload:
135
+ print("Agent did not produce any answers to submit.")
136
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
137
+
138
+ # 4. Prepare Submission
139
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
140
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
141
+ print(status_update)
142
+
143
+ # 5. Submit
144
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
145
+ try:
146
+ response = requests.post(submit_url, json=submission_data, timeout=60)
147
+ response.raise_for_status()
148
+ result_data = response.json()
149
+ final_status = (
150
+ f"Submission Successful!\n"
151
+ f"User: {result_data.get('username')}\n"
152
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
153
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
154
+ f"Message: {result_data.get('message', 'No message received.')}"
155
+ )
156
+ print("Submission successful.")
157
+ results_df = pd.DataFrame(results_log)
158
+ return final_status, results_df
159
+ except requests.exceptions.HTTPError as e:
160
+ error_detail = f"Server responded with status {e.response.status_code}."
161
+ try:
162
+ error_json = e.response.json()
163
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
164
+ except requests.exceptions.JSONDecodeError:
165
+ error_detail += f" Response: {e.response.text[:500]}"
166
+ status_message = f"Submission Failed: {error_detail}"
167
+ print(status_message)
168
+ results_df = pd.DataFrame(results_log)
169
+ return status_message, results_df
170
+ except requests.exceptions.Timeout:
171
+ status_message = "Submission Failed: The request timed out."
172
+ print(status_message)
173
+ results_df = pd.DataFrame(results_log)
174
+ return status_message, results_df
175
+ except requests.exceptions.RequestException as e:
176
+ status_message = f"Submission Failed: Network error - {e}"
177
+ print(status_message)
178
+ results_df = pd.DataFrame(results_log)
179
+ return status_message, results_df
180
+ except Exception as e:
181
+ status_message = f"An unexpected error occurred during submission: {e}"
182
+ print(status_message)
183
+ results_df = pd.DataFrame(results_log)
184
+ return status_message, results_df
185
+
186
+
187
+ # --- Build Gradio Interface using Blocks ---
188
+ with gr.Blocks() as demo:
189
+ gr.Markdown("# Basic Agent Evaluation Runner")
190
+ gr.Markdown(
191
+ """
192
+ **Instructions:**
193
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
194
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
195
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
196
+ ---
197
+ **Disclaimers:**
198
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
199
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
200
+ """
201
+ )
202
+
203
+ gr.LoginButton()
204
+
205
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
206
+
207
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
208
+ # Removed max_rows=10 from DataFrame constructor
209
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
210
+
211
+ run_button.click(
212
+ fn=run_and_submit_all,
213
+ outputs=[status_output, results_table]
214
+ )
215
+
216
+ if __name__ == "__main__":
217
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
218
+ # Check for SPACE_HOST and SPACE_ID at startup for information
219
+ space_host_startup = os.getenv("SPACE_HOST")
220
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
221
+
222
+ if space_host_startup:
223
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
224
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
225
+ else:
226
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
227
+
228
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
229
+ print(f"✅ SPACE_ID found: {space_id_startup}")
230
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
231
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
232
+ else:
233
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
234
+
235
+ print("-"*(60 + len(" App Starting ")) + "\n")
236
+
237
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
238
+ demo.launch(debug=True, share=False)class BasicAgent:
239
+ if not task_id or question_text is None:
240
+ print(f"Skipping item with missing task_id or question: {item}")
241
+ continue
242
+ try:
243
+ submitted_answer = agent(question_text)
244
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
245
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
246
+ except Exception as e:
247
+ print(f"Error running agent on task {task_id}: {e}")
248
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
249
+
250
+ if not answers_payload:
251
+ print("Agent did not produce any answers to submit.")
252
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
253
+
254
+ # 4. Prepare Submission
255
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
256
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
257
+ print(status_update)
258
+
259
+ # 5. Submit
260
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
261
+ try:
262
+ response = requests.post(submit_url, json=submission_data, timeout=60)
263
+ response.raise_for_status()
264
+ result_data = response.json()
265
+ final_status = (
266
+ f"Submission Successful!\n"
267
+ f"User: {result_data.get('username')}\n"
268
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
269
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
270
+ f"Message: {result_data.get('message', 'No message received.')}"
271
+ )
272
+ print("Submission successful.")
273
+ results_df = pd.DataFrame(results_log)
274
+ return final_status, results_df
275
+ except requests.exceptions.HTTPError as e:
276
+ error_detail = f"Server responded with status {e.response.status_code}."
277
+ try:
278
+ error_json = e.response.json()
279
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
280
+ except requests.exceptions.JSONDecodeError:
281
+ error_detail += f" Response: {e.response.text[:500]}"
282
+ status_message = f"Submission Failed: {error_detail}"
283
+ print(status_message)
284
+ results_df = pd.DataFrame(results_log)
285
+ return status_message, results_df
286
+ except requests.exceptions.Timeout:
287
+ status_message = "Submission Failed: The request timed out."
288
+ print(status_message)
289
+ results_df = pd.DataFrame(results_log)
290
+ return status_message, results_df
291
+ except requests.exceptions.RequestException as e:
292
+ status_message = f"Submission Failed: Network error - {e}"
293
+ print(status_message)
294
+ results_df = pd.DataFrame(results_log)
295
+ return status_message, results_df
296
+ except Exception as e:
297
+ status_message = f"An unexpected error occurred during submission: {e}"
298
+ print(status_message)
299
+ results_df = pd.DataFrame(results_log)
300
+ return status_message, results_df
301
+
302
+
303
+ # --- Build Gradio Interface using Blocks ---
304
+ with gr.Blocks() as demo:
305
+ gr.Markdown("# Basic Agent Evaluation Runner")
306
+ gr.Markdown(
307
+ """
308
+ **Instructions:**
309
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
310
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
311
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
312
+ ---
313
+ **Disclaimers:**
314
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
315
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
316
+ """
317
+ )
318
+
319
+ gr.LoginButton()
320
+
321
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
322
+
323
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
324
+ # Removed max_rows=10 from DataFrame constructor
325
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
326
+
327
+ run_button.click(
328
+ fn=run_and_submit_all,
329
+ outputs=[status_output, results_table]
330
+ )
331
+
332
+ if __name__ == "__main__":
333
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
334
+ # Check for SPACE_HOST and SPACE_ID at startup for information
335
+ space_host_startup = os.getenv("SPACE_HOST")
336
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
337
+
338
+ if space_host_startup:
339
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
340
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
341
+ else:
342
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
343
+
344
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
345
+ print(f"✅ SPACE_ID found: {space_id_startup}")
346
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
347
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
348
+ else:
349
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
350
+
351
+ print("-"*(60 + len(" App Starting ")) + "\n")
352
+
353
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
354
+ demo.launch(debug=True, share=False)ic_responses[question_type]
355
  else:
356
  answer = self.generic_responses["default"]
357
 
 
361
  # Personalizar la primera frase con las palabras clave
362
  if "in the context of" not in answer:
363
  answer = f"{keywords[0].capitalize()} {answer[0].lower()}{answer[1:]}"
364
+ class BasicAgent:
365
+ if not task_id or question_text is None:
366
+ print(f"Skipping item with missing task_id or question: {item}")
367
+ continue
368
+ try:
369
+ submitted_answer = agent(question_text)
370
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
371
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
372
+ except Exception as e:
373
+ print(f"Error running agent on task {task_id}: {e}")
374
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
375
+
376
+ if not answers_payload:
377
+ print("Agent did not produce any answers to submit.")
378
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
379
+
380
+ # 4. Prepare Submission
381
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
382
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
383
+ print(status_update)
384
+
385
+ # 5. Submit
386
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
387
+ try:
388
+ response = requests.post(submit_url, json=submission_data, timeout=60)
389
+ response.raise_for_status()
390
+ result_data = response.json()
391
+ final_status = (
392
+ f"Submission Successful!\n"
393
+ f"User: {result_data.get('username')}\n"
394
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
395
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
396
+ f"Message: {result_data.get('message', 'No message received.')}"
397
+ )
398
+ print("Submission successful.")
399
+ results_df = pd.DataFrame(results_log)
400
+ return final_status, results_df
401
+ except requests.exceptions.HTTPError as e:
402
+ error_detail = f"Server responded with status {e.response.status_code}."
403
+ try:
404
+ error_json = e.response.json()
405
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
406
+ except requests.exceptions.JSONDecodeError:
407
+ error_detail += f" Response: {e.response.text[:500]}"
408
+ status_message = f"Submission Failed: {error_detail}"
409
+ print(status_message)
410
+ results_df = pd.DataFrame(results_log)
411
+ return status_message, results_df
412
+ except requests.exceptions.Timeout:
413
+ status_message = "Submission Failed: The request timed out."
414
+ print(status_message)
415
+ results_df = pd.DataFrame(results_log)
416
+ return status_message, results_df
417
+ except requests.exceptions.RequestException as e:
418
+ status_message = f"Submission Failed: Network error - {e}"
419
+ print(status_message)
420
+ results_df = pd.DataFrame(results_log)
421
+ return status_message, results_df
422
+ except Exception as e:
423
+ status_message = f"An unexpected error occurred during submission: {e}"
424
+ print(status_message)
425
+ results_df = pd.DataFrame(results_log)
426
+ return status_message, results_df
427
+
428
+
429
+ # --- Build Gradio Interface using Blocks ---
430
+ with gr.Blocks() as demo:
431
+ gr.Markdown("# Basic Agent Evaluation Runner")
432
+ gr.Markdown(
433
+ """
434
+ **Instructions:**
435
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
436
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
437
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
438
+ ---
439
+ **Disclaimers:**
440
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
441
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
442
+ """
443
+ )
444
+
445
+ gr.LoginButton()
446
+
447
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
448
+
449
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
450
+ # Removed max_rows=10 from DataFrame constructor
451
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
452
+
453
+ run_button.click(
454
+ fn=run_and_submit_all,
455
+ outputs=[status_output, results_table]
456
+ )
457
+
458
+ if __name__ == "__main__":
459
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
460
+ # Check for SPACE_HOST and SPACE_ID at startup for information
461
+ space_host_startup = os.getenv("SPACE_HOST")
462
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
463
+
464
+ if space_host_startup:
465
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
466
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
467
+ else:
468
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
469
+
470
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
471
+ print(f"✅ SPACE_ID found: {space_id_startup}")
472
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
473
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
474
+ else:
475
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
476
+
477
+ print("-"*(60 + len(" App Starting ")) + "\n")
478
+
479
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
480
+ demo.launch(debug=True, share=False)
481
  # Guardar en caché para futuras consultas
482
  self.response_cache[normalized_question] = answer
483
 
 
492
 
493
  # Calcular intersección y unión
494
  intersection = words1.intersection(words2)
495
+ union = words1.union(wclass BasicAgent:
496
+ if not task_id or question_text is None:
497
+ print(f"Skipping item with missing task_id or question: {item}")
498
+ continue
499
+ try:
500
+ submitted_answer = agent(question_text)
501
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
502
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
503
+ except Exception as e:
504
+ print(f"Error running agent on task {task_id}: {e}")
505
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
506
+
507
+ if not answers_payload:
508
+ print("Agent did not produce any answers to submit.")
509
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
510
+
511
+ # 4. Prepare Submission
512
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
513
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
514
+ print(status_update)
515
+
516
+ # 5. Submit
517
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
518
+ try:
519
+ response = requests.post(submit_url, json=submission_data, timeout=60)
520
+ response.raise_for_status()
521
+ result_data = response.json()
522
+ final_status = (
523
+ f"Submission Successful!\n"
524
+ f"User: {result_data.get('username')}\n"
525
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
526
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
527
+ f"Message: {result_data.get('message', 'No message received.')}"
528
+ )
529
+ print("Submission successful.")
530
+ results_df = pd.DataFrame(results_log)
531
+ return final_status, results_df
532
+ except requests.exceptions.HTTPError as e:
533
+ error_detail = f"Server responded with status {e.response.status_code}."
534
+ try:
535
+ error_json = e.response.json()
536
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
537
+ except requests.exceptions.JSONDecodeError:
538
+ error_detail += f" Response: {e.response.text[:500]}"
539
+ status_message = f"Submission Failed: {error_detail}"
540
+ print(status_message)
541
+ results_df = pd.DataFrame(results_log)
542
+ return status_message, results_df
543
+ except requests.exceptions.Timeout:
544
+ status_message = "Submission Failed: The request timed out."
545
+ print(status_message)
546
+ results_df = pd.DataFrame(results_log)
547
+ return status_message, results_df
548
+ except requests.exceptions.RequestException as e:
549
+ status_message = f"Submission Failed: Network error - {e}"
550
+ print(status_message)
551
+ results_df = pd.DataFrame(results_log)
552
+ return status_message, results_df
553
+ except Exception as e:
554
+ status_message = f"An unexpected error occurred during submission: {e}"
555
+ print(status_message)
556
+ results_df = pd.DataFrame(results_log)
557
+ return status_message, results_df
558
+
559
+
560
+ # --- Build Gradio Interface using Blocks ---
561
+ with gr.Blocks() as demo:
562
+ gr.Markdown("# Basic Agent Evaluation Runner")
563
+ gr.Markdown(
564
+ """
565
+ **Instructions:**
566
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
567
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
568
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
569
+ ---
570
+ **Disclaimers:**
571
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
572
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
573
+ """
574
+ )
575
+
576
+ gr.LoginButton()
577
+
578
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
579
+
580
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
581
+ # Removed max_rows=10 from DataFrame constructor
582
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
583
+
584
+ run_button.click(
585
+ fn=run_and_submit_all,
586
+ outputs=[status_output, results_table]
587
+ )
588
+
589
+ if __name__ == "__main__":
590
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
591
+ # Check for SPACE_HOST and SPACE_ID at startup for information
592
+ space_host_startup = os.getenv("SPACE_HOST")
593
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
594
+
595
+ if space_host_startup:
596
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
597
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
598
+ else:
599
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
600
+
601
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
602
+ print(f"✅ SPACE_ID found: {space_id_startup}")
603
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
604
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
605
+ else:
606
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
607
+
608
+ print("-"*(60 + len(" App Starting ")) + "\n")
609
+
610
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
611
+ demo.launch(debug=True, share=False)class BasicAgent:
612
+ if not task_id or question_text is None:
613
+ print(f"Skipping item with missing task_id or question: {item}")
614
+ continue
615
+ try:
616
+ submitted_answer = agent(question_text)
617
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
618
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
619
+ except Exception as e:
620
+ print(f"Error running agent on task {task_id}: {e}")
621
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
622
+
623
+ if not answers_payload:
624
+ print("Agent did not produce any answers to submit.")
625
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
626
+
627
+ # 4. Prepare Submission
628
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
629
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
630
+ print(status_update)
631
+
632
+ # 5. Submit
633
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
634
+ try:
635
+ response = requests.post(submit_url, json=submission_data, timeout=60)
636
+ response.raise_for_status()
637
+ result_data = response.json()
638
+ final_status = (
639
+ f"Submission Successful!\n"
640
+ f"User: {result_data.get('username')}\n"
641
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
642
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
643
+ f"Message: {result_data.get('message', 'No message received.')}"
644
+ )
645
+ print("Submission successful.")
646
+ results_df = pd.DataFrame(results_log)
647
+ return final_status, results_df
648
+ except requests.exceptions.HTTPError as e:
649
+ error_detail = f"Server responded with status {e.response.status_code}."
650
+ try:
651
+ error_json = e.response.json()
652
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
653
+ except requests.exceptions.JSONDecodeError:
654
+ error_detail += f" Response: {e.response.text[:500]}"
655
+ status_message = f"Submission Failed: {error_detail}"
656
+ print(status_message)
657
+ results_df = pd.DataFrame(results_log)
658
+ return status_message, results_df
659
+ except requests.exceptions.Timeout:
660
+ status_message = "Submission Failed: The request timed out."
661
+ print(status_message)
662
+ results_df = pd.DataFrame(results_log)
663
+ return status_message, results_df
664
+ except requests.exceptions.RequestException as e:
665
+ status_message = f"Submission Failed: Network error - {e}"
666
+ print(status_message)
667
+ results_df = pd.DataFrame(results_log)
668
+ return status_message, results_df
669
+ except Exception as e:
670
+ status_message = f"An unexpected error occurred during submission: {e}"
671
+ print(status_message)
672
+ results_df = pd.DataFrame(results_log)
673
+ return status_message, results_df
674
+
675
+
676
+ # --- Build Gradio Interface using Blocks ---
677
+ with gr.Blocks() as demo:
678
+ gr.Markdown("# Basic Agent Evaluation Runner")
679
+ gr.Markdown(
680
+ """
681
+ **Instructions:**
682
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
683
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
684
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
685
+ ---
686
+ **Disclaimers:**
687
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
688
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
689
+ """
690
+ )
691
+
692
+ gr.LoginButton()
693
+
694
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
695
+
696
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
697
+ # Removed max_rows=10 from DataFrame constructor
698
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
699
+
700
+ run_button.click(
701
+ fn=run_and_submit_all,
702
+ outputs=[status_output, results_table]
703
+ )
704
+
705
+ if __name__ == "__main__":
706
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
707
+ # Check for SPACE_HOST and SPACE_ID at startup for information
708
+ space_host_startup = os.getenv("SPACE_HOST")
709
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
710
+
711
+ if space_host_startup:
712
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
713
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
714
+ else:
715
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
716
+
717
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
718
+ print(f"✅ SPACE_ID found: {space_id_startup}")
719
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
720
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
721
+ else:
722
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
723
+
724
+ print("-"*(60 + len(" App Starting ")) + "\n")
725
+
726
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
727
+ demo.launch(debug=True, share=False)ords2)
728
 
729
  # Coeficiente de Jaccard
730
  if len(union) == 0:
 
746
  "the", "a", "an", "of", "in", "on", "at", "to", "for", "with", "by", "about",
747
  "and", "or", "but", "if", "then", "than", "so", "no", "not", "this", "that",
748
  "these", "those", "there", "here", "some", "any", "can", "could", "should",
749
+ "would",class BasicAgent:
750
+ if not task_id or question_text is None:
751
+ print(f"Skipping item with missing task_id or question: {item}")
752
+ continue
753
+ try:
754
+ submitted_answer = agent(question_text)
755
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
756
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
757
+ except Exception as e:
758
+ print(f"Error running agent on task {task_id}: {e}")
759
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
760
+
761
+ if not answers_payload:
762
+ print("Agent did not produce any answers to submit.")
763
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
764
+
765
+ # 4. Prepare Submission
766
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
767
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
768
+ print(status_update)
769
+
770
+ # 5. Submit
771
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
772
+ try:
773
+ response = requests.post(submit_url, json=submission_data, timeout=60)
774
+ response.raise_for_status()
775
+ result_data = response.json()
776
+ final_status = (
777
+ f"Submission Successful!\n"
778
+ f"User: {result_data.get('username')}\n"
779
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
780
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
781
+ f"Message: {result_data.get('message', 'No message received.')}"
782
+ )
783
+ print("Submission successful.")
784
+ results_df = pd.DataFrame(results_log)
785
+ return final_status, results_df
786
+ except requests.exceptions.HTTPError as e:
787
+ error_detail = f"Server responded with status {e.response.status_code}."
788
+ try:
789
+ error_json = e.response.json()
790
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
791
+ except requests.exceptions.JSONDecodeError:
792
+ error_detail += f" Response: {e.response.text[:500]}"
793
+ status_message = f"Submission Failed: {error_detail}"
794
+ print(status_message)
795
+ results_df = pd.DataFrame(results_log)
796
+ return status_message, results_df
797
+ except requests.exceptions.Timeout:
798
+ status_message = "Submission Failed: The request timed out."
799
+ print(status_message)
800
+ results_df = pd.DataFrame(results_log)
801
+ return status_message, results_df
802
+ except requests.exceptions.RequestException as e:
803
+ status_message = f"Submission Failed: Network error - {e}"
804
+ print(status_message)
805
+ results_df = pd.DataFrame(results_log)
806
+ return status_message, results_df
807
+ except Exception as e:
808
+ status_message = f"An unexpected error occurred during submission: {e}"
809
+ print(status_message)
810
+ results_df = pd.DataFrame(results_log)
811
+ return status_message, results_df
812
+
813
+
814
+ # --- Build Gradio Interface using Blocks ---
815
+ with gr.Blocks() as demo:
816
+ gr.Markdown("# Basic Agent Evaluation Runner")
817
+ gr.Markdown(
818
+ """
819
+ **Instructions:**
820
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
821
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
822
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
823
+ ---
824
+ **Disclaimers:**
825
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
826
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
827
+ """
828
+ )
829
+
830
+ gr.LoginButton()
831
+
832
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
833
+
834
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
835
+ # Removed max_rows=10 from DataFrame constructor
836
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
837
+
838
+ run_button.click(
839
+ fn=run_and_submit_all,
840
+ outputs=[status_output, results_table]
841
+ )
842
+
843
+ if __name__ == "__main__":
844
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
845
+ # Check for SPACE_HOST and SPACE_ID at startup for information
846
+ space_host_startup = os.getenv("SPACE_HOST")
847
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
848
+
849
+ if space_host_startup:
850
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
851
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
852
+ else:
853
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
854
+
855
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
856
+ print(f"✅ SPACE_ID found: {space_id_startup}")
857
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
858
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
859
+ else:
860
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
861
+
862
+ print("-"*(60 + len(" App Starting ")) + "\n")
863
+
864
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
865
+ demo.launch(debug=True, share=False) "may", "might", "must", "will", "shall", "do", "does", "did"]
866
 
867
  # Dividir la pregunta en palabras y filtrar las palabras comunes
868
  words = question.lower().split()
 
995
  status_message = "Submission Failed: The request timed out."
996
  print(status_message)
997
  results_df = pd.DataFrame(results_log)
998
+ return status_messaclass BasicAgent:
999
+ if not task_id or question_text is None:
1000
+ print(f"Skipping item with missing task_id or question: {item}")
1001
+ continue
1002
+ try:
1003
+ submitted_answer = agent(question_text)
1004
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
1005
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
1006
+ except Exception as e:
1007
+ print(f"Error running agent on task {task_id}: {e}")
1008
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
1009
+
1010
+ if not answers_payload:
1011
+ print("Agent did not produce any answers to submit.")
1012
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
1013
+
1014
+ # 4. Prepare Submission
1015
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
1016
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
1017
+ print(status_update)
1018
+
1019
+ # 5. Submit
1020
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
1021
+ try:
1022
+ response = requests.post(submit_url, json=submission_data, timeout=60)
1023
+ response.raise_for_status()
1024
+ result_data = response.json()
1025
+ final_status = (
1026
+ f"Submission Successful!\n"
1027
+ f"User: {result_data.get('username')}\n"
1028
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
1029
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
1030
+ f"Message: {result_data.get('message', 'No message received.')}"
1031
+ )
1032
+ print("Submission successful.")
1033
+ results_df = pd.DataFrame(results_log)
1034
+ return final_status, results_df
1035
+ except requests.exceptions.HTTPError as e:
1036
+ error_detail = f"Server responded with status {e.response.status_code}."
1037
+ try:
1038
+ error_json = e.response.json()
1039
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
1040
+ except requests.exceptions.JSONDecodeError:
1041
+ error_detail += f" Response: {e.response.text[:500]}"
1042
+ status_message = f"Submission Failed: {error_detail}"
1043
+ print(status_message)
1044
+ results_df = pd.DataFrame(results_log)
1045
+ return status_message, results_df
1046
+ except requests.exceptions.Timeout:
1047
+ status_message = "Submission Failed: The request timed out."
1048
+ print(status_message)
1049
+ results_df = pd.DataFrame(results_log)
1050
+ return status_message, results_df
1051
+ except requests.exceptions.RequestException as e:
1052
+ status_message = f"Submission Failed: Network error - {e}"
1053
+ print(status_message)
1054
+ results_df = pd.DataFrame(results_log)
1055
+ return status_message, results_df
1056
+ except Exception as e:
1057
+ status_message = f"An unexpected error occurred during submission: {e}"
1058
+ print(status_message)
1059
+ results_df = pd.DataFrame(results_log)
1060
  return status_message, results_df
1061
+
1062
+
1063
+ # --- Build Gradio Interface using Blocks ---
1064
+ with gr.Blocks() as demo:
1065
+ gr.Markdown("# Basic Agent Evaluation Runner")
1066
+ gr.Markdown(
1067
+ """
1068
+ **Instructions:**
1069
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
1070
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
1071
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
1072
+ ---
1073
+ **Disclaimers:**
1074
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
1075
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
1076
+ """
1077
+ )
1078
+
1079
+ gr.LoginButton()
1080
+
1081
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
1082
+
1083
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
1084
+ # Removed max_rows=10 from DataFrame constructor
1085
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
1086
+
1087
+ run_button.click(
1088
+ fn=run_and_submit_all,
1089
+ outputs=[status_output, results_table]
1090
+ )
1091
+
1092
+ if __name__ == "__main__":
1093
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
1094
+ # Check for SPACE_HOST and SPACE_ID at startup for information
1095
+ space_host_startup = os.getenv("SPACE_HOST")
1096
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
1097
+
1098
+ if space_host_startup:
1099
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
1100
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
1101
+ else:
1102
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
1103
+
1104
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
1105
+ print(f"✅ SPACE_ID found: {space_id_startup}")
1106
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
1107
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
1108
+ else:
1109
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
1110
+
1111
+ print("-"*(60 + len(" App Starting ")) + "\n")
1112
+
1113
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
1114
+ demo.launch(debug=True, share=False)ge, results_df
1115
  except requests.exceptions.RequestException as e:
1116
  status_message = f"Submission Failed: Network error - {e}"
1117
  print(status_message)