Thanh Vinh Vo commited on
Commit
9fab94b
·
1 Parent(s): a3aa7a4
Files changed (2) hide show
  1. NOTES +16 -0
  2. app.py +10 -11
NOTES CHANGED
@@ -3,3 +3,19 @@
3
  - Don't give the master any tool, since it will try to delegate smaller work to the code agent, miss context
4
  - Temperature to 0
5
  - BeautifulSoup too bad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  - Don't give the master any tool, since it will try to delegate smaller work to the code agent, miss context
4
  - Temperature to 0
5
  - BeautifulSoup too bad
6
+
7
+
8
+
9
+ TASKS
10
+ - MATH: 6f37996b-2ac7-44b0-8e68-6d28256631b4
11
+ - 305ac316-eef6-4446-960a-92d80d542f82
12
+ - 7bd855d8-463d-4ed5-93ca-5fe35145f733
13
+ - cf106601-ab4f-4af9-b045-5295fe67b37d
14
+ - bda648d7-d618-4883-88f4-3466eabd860e
15
+ - 1f975693-876d-457b-a649-393859e79bf3
16
+ - f918266a-b3e0-4914-865d-4faa564f1aef
17
+ - 99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3
18
+ - 5a0c1adf-205e-4841-a666-7c3ef95def9d
19
+ - 1f975693-876d-457b-a649-393859e79bf3
20
+ - 3f57289b-8c60-48be-bd80-01f8099ca449
21
+ - 9d191bce-651d-4746-be2d-7ef8ecadb9c2
app.py CHANGED
@@ -207,6 +207,7 @@ class BasicAgent:
207
  description="""
208
  This is a powerful agent, it specializes in:
209
  - Writing code to solve problem.
 
210
  - Browse the web to find information.
211
  - Reason across audio, vision, and text, a.k.a multimodal agent. """,
212
  add_base_tools=True,
@@ -249,20 +250,20 @@ class BasicAgent:
249
  2. Please answer as concisely as possible.
250
  3. If the question asks for a number, please return a numerical answer without unit (unless unit is specifically asked for). For example: 3 instead of three, 0 instead of None, 3 instead of $3.
251
  4. `pandas` package is available for reading table data from HTML content or URL. It is useful for extracting tabular data from web pages (including Wikipedia pages).
 
252
  """
253
  result = self.manager_agent.run(prompt)
254
  print(f"Agent responded with: {result}")
255
  return result
256
 
257
 
258
- def run_and_submit_all(questions_index: str, profile: gr.OAuthProfile | None):
259
  """
260
  Fetches all questions, runs the BasicAgent on them, submits all answers,
261
  and displays the results.
262
  """
263
  # --- Determine HF Space Runtime URL and Repo URL ---
264
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
265
- QUESTION_INDEX = int(questions_index)
266
 
267
  if profile:
268
  username = f"{profile.username}"
@@ -291,11 +292,9 @@ def run_and_submit_all(questions_index: str, profile: gr.OAuthProfile | None):
291
  try:
292
  response = requests.get(questions_url, timeout=15)
293
  response.raise_for_status()
294
- questions_data = (
295
- [response.json()[QUESTION_INDEX]]
296
- if QUESTION_INDEX >= 0
297
- else response.json()
298
- )
299
  if not questions_data:
300
  print("Fetched questions list is empty.")
301
  return "Fetched questions list is empty or invalid format.", None
@@ -422,11 +421,11 @@ with gr.Blocks() as demo:
422
 
423
  gr.LoginButton()
424
 
425
- questions_limit = gr.Textbox(
426
- label="Question index to solve (-1 to solve all)",
427
  lines=1,
428
  interactive=True,
429
- value="0",
430
  )
431
  run_button = gr.Button("Run Evaluation & Submit All Answers")
432
  status_output = gr.Textbox(
@@ -437,7 +436,7 @@ with gr.Blocks() as demo:
437
 
438
  run_button.click(
439
  fn=run_and_submit_all,
440
- inputs=[questions_limit],
441
  outputs=[status_output, results_table],
442
  )
443
 
 
207
  description="""
208
  This is a powerful agent, it specializes in:
209
  - Writing code to solve problem.
210
+ - Solving Maths problems.
211
  - Browse the web to find information.
212
  - Reason across audio, vision, and text, a.k.a multimodal agent. """,
213
  add_base_tools=True,
 
250
  2. Please answer as concisely as possible.
251
  3. If the question asks for a number, please return a numerical answer without unit (unless unit is specifically asked for). For example: 3 instead of three, 0 instead of None, 3 instead of $3.
252
  4. `pandas` package is available for reading table data from HTML content or URL. It is useful for extracting tabular data from web pages (including Wikipedia pages).
253
+ 5. `multimodal_agent` is good at Maths, use it when facing Maths questions.
254
  """
255
  result = self.manager_agent.run(prompt)
256
  print(f"Agent responded with: {result}")
257
  return result
258
 
259
 
260
+ def run_and_submit_all(question_id: str, profile: gr.OAuthProfile | None):
261
  """
262
  Fetches all questions, runs the BasicAgent on them, submits all answers,
263
  and displays the results.
264
  """
265
  # --- Determine HF Space Runtime URL and Repo URL ---
266
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
267
 
268
  if profile:
269
  username = f"{profile.username}"
 
292
  try:
293
  response = requests.get(questions_url, timeout=15)
294
  response.raise_for_status()
295
+ questions_data = response.json()
296
+ if question_id:
297
+ questions_data = [item for item in questions_data if item.get("task_id") == question_id]
 
 
298
  if not questions_data:
299
  print("Fetched questions list is empty.")
300
  return "Fetched questions list is empty or invalid format.", None
 
421
 
422
  gr.LoginButton()
423
 
424
+ question_id = gr.Textbox(
425
+ label="Question id to solve (empty to solve all)",
426
  lines=1,
427
  interactive=True,
428
+ value="6f37996b-2ac7-44b0-8e68-6d28256631b4",
429
  )
430
  run_button = gr.Button("Run Evaluation & Submit All Answers")
431
  status_output = gr.Textbox(
 
436
 
437
  run_button.click(
438
  fn=run_and_submit_all,
439
+ inputs=[question_id],
440
  outputs=[status_output, results_table],
441
  )
442