Thanh Vinh Vo
commited on
Commit
·
9fab94b
1
Parent(s):
a3aa7a4
update
Browse files
NOTES
CHANGED
|
@@ -3,3 +3,19 @@
|
|
| 3 |
- Don't give the master any tool, since it will try to delegate smaller work to the code agent, miss context
|
| 4 |
- Temperature to 0
|
| 5 |
- BeautifulSoup too bad
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
- Don't give the master any tool, since it will try to delegate smaller work to the code agent, miss context
|
| 4 |
- Temperature to 0
|
| 5 |
- BeautifulSoup too bad
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
TASKS
|
| 10 |
+
- MATH: 6f37996b-2ac7-44b0-8e68-6d28256631b4
|
| 11 |
+
- 305ac316-eef6-4446-960a-92d80d542f82
|
| 12 |
+
- 7bd855d8-463d-4ed5-93ca-5fe35145f733
|
| 13 |
+
- cf106601-ab4f-4af9-b045-5295fe67b37d
|
| 14 |
+
- bda648d7-d618-4883-88f4-3466eabd860e
|
| 15 |
+
- 1f975693-876d-457b-a649-393859e79bf3
|
| 16 |
+
- f918266a-b3e0-4914-865d-4faa564f1aef
|
| 17 |
+
- 99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3
|
| 18 |
+
- 5a0c1adf-205e-4841-a666-7c3ef95def9d
|
| 19 |
+
- 1f975693-876d-457b-a649-393859e79bf3
|
| 20 |
+
- 3f57289b-8c60-48be-bd80-01f8099ca449
|
| 21 |
+
- 9d191bce-651d-4746-be2d-7ef8ecadb9c2
|
app.py
CHANGED
|
@@ -207,6 +207,7 @@ class BasicAgent:
|
|
| 207 |
description="""
|
| 208 |
This is a powerful agent, it specializes in:
|
| 209 |
- Writing code to solve problem.
|
|
|
|
| 210 |
- Browse the web to find information.
|
| 211 |
- Reason across audio, vision, and text, a.k.a multimodal agent. """,
|
| 212 |
add_base_tools=True,
|
|
@@ -249,20 +250,20 @@ class BasicAgent:
|
|
| 249 |
2. Please answer as concisely as possible.
|
| 250 |
3. If the question asks for a number, please return a numerical answer without unit (unless unit is specifically asked for). For example: 3 instead of three, 0 instead of None, 3 instead of $3.
|
| 251 |
4. `pandas` package is available for reading table data from HTML content or URL. It is useful for extracting tabular data from web pages (including Wikipedia pages).
|
|
|
|
| 252 |
"""
|
| 253 |
result = self.manager_agent.run(prompt)
|
| 254 |
print(f"Agent responded with: {result}")
|
| 255 |
return result
|
| 256 |
|
| 257 |
|
| 258 |
-
def run_and_submit_all(
|
| 259 |
"""
|
| 260 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 261 |
and displays the results.
|
| 262 |
"""
|
| 263 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
| 264 |
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
| 265 |
-
QUESTION_INDEX = int(questions_index)
|
| 266 |
|
| 267 |
if profile:
|
| 268 |
username = f"{profile.username}"
|
|
@@ -291,11 +292,9 @@ def run_and_submit_all(questions_index: str, profile: gr.OAuthProfile | None):
|
|
| 291 |
try:
|
| 292 |
response = requests.get(questions_url, timeout=15)
|
| 293 |
response.raise_for_status()
|
| 294 |
-
questions_data = (
|
| 295 |
-
|
| 296 |
-
if
|
| 297 |
-
else response.json()
|
| 298 |
-
)
|
| 299 |
if not questions_data:
|
| 300 |
print("Fetched questions list is empty.")
|
| 301 |
return "Fetched questions list is empty or invalid format.", None
|
|
@@ -422,11 +421,11 @@ with gr.Blocks() as demo:
|
|
| 422 |
|
| 423 |
gr.LoginButton()
|
| 424 |
|
| 425 |
-
|
| 426 |
-
label="Question
|
| 427 |
lines=1,
|
| 428 |
interactive=True,
|
| 429 |
-
value="
|
| 430 |
)
|
| 431 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 432 |
status_output = gr.Textbox(
|
|
@@ -437,7 +436,7 @@ with gr.Blocks() as demo:
|
|
| 437 |
|
| 438 |
run_button.click(
|
| 439 |
fn=run_and_submit_all,
|
| 440 |
-
inputs=[
|
| 441 |
outputs=[status_output, results_table],
|
| 442 |
)
|
| 443 |
|
|
|
|
| 207 |
description="""
|
| 208 |
This is a powerful agent, it specializes in:
|
| 209 |
- Writing code to solve problem.
|
| 210 |
+
- Solving Maths problems.
|
| 211 |
- Browse the web to find information.
|
| 212 |
- Reason across audio, vision, and text, a.k.a multimodal agent. """,
|
| 213 |
add_base_tools=True,
|
|
|
|
| 250 |
2. Please answer as concisely as possible.
|
| 251 |
3. If the question asks for a number, please return a numerical answer without unit (unless unit is specifically asked for). For example: 3 instead of three, 0 instead of None, 3 instead of $3.
|
| 252 |
4. `pandas` package is available for reading table data from HTML content or URL. It is useful for extracting tabular data from web pages (including Wikipedia pages).
|
| 253 |
+
5. `multimodal_agent` is good at Maths, use it when facing Maths questions.
|
| 254 |
"""
|
| 255 |
result = self.manager_agent.run(prompt)
|
| 256 |
print(f"Agent responded with: {result}")
|
| 257 |
return result
|
| 258 |
|
| 259 |
|
| 260 |
+
def run_and_submit_all(question_id: str, profile: gr.OAuthProfile | None):
|
| 261 |
"""
|
| 262 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 263 |
and displays the results.
|
| 264 |
"""
|
| 265 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
| 266 |
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
|
|
|
| 267 |
|
| 268 |
if profile:
|
| 269 |
username = f"{profile.username}"
|
|
|
|
| 292 |
try:
|
| 293 |
response = requests.get(questions_url, timeout=15)
|
| 294 |
response.raise_for_status()
|
| 295 |
+
questions_data = response.json()
|
| 296 |
+
if question_id:
|
| 297 |
+
questions_data = [item for item in questions_data if item.get("task_id") == question_id]
|
|
|
|
|
|
|
| 298 |
if not questions_data:
|
| 299 |
print("Fetched questions list is empty.")
|
| 300 |
return "Fetched questions list is empty or invalid format.", None
|
|
|
|
| 421 |
|
| 422 |
gr.LoginButton()
|
| 423 |
|
| 424 |
+
question_id = gr.Textbox(
|
| 425 |
+
label="Question id to solve (empty to solve all)",
|
| 426 |
lines=1,
|
| 427 |
interactive=True,
|
| 428 |
+
value="6f37996b-2ac7-44b0-8e68-6d28256631b4",
|
| 429 |
)
|
| 430 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 431 |
status_output = gr.Textbox(
|
|
|
|
| 436 |
|
| 437 |
run_button.click(
|
| 438 |
fn=run_and_submit_all,
|
| 439 |
+
inputs=[question_id],
|
| 440 |
outputs=[status_output, results_table],
|
| 441 |
)
|
| 442 |
|