Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,12 @@
|
|
| 5 |
import gradio.utils, os
|
| 6 |
import gradio as gr
|
| 7 |
from agents.crew import run_crew
|
| 8 |
-
from utils.utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# MCP server functions
|
| 11 |
|
|
@@ -72,9 +77,6 @@ gradio.utils.watchfn_spaces = watchfn
|
|
| 72 |
|
| 73 |
# Graphical user interface
|
| 74 |
|
| 75 |
-
QUESTION_FILE_PATH_GAIA = "files/gaia_validation.jsonl"
|
| 76 |
-
QUESTION_FILE_PATH_HLE = "files/hle_validation.jsonl"
|
| 77 |
-
|
| 78 |
DEFAULT_QUESTION = "In MCP's 1st Birthday Hackathon, hosted by Anthropic and Gradio, what percentage of participants submitted a solution so far?"
|
| 79 |
|
| 80 |
CSS_FULL_WIDTH = """
|
|
@@ -183,7 +185,7 @@ with gr.Blocks(elem_classes=["full-width-app"]) as gaia:
|
|
| 183 |
with gr.Tabs():
|
| 184 |
with gr.TabItem("GAIA Benchmark Level 1"):
|
| 185 |
gr.Examples(
|
| 186 |
-
examples=
|
| 187 |
inputs=[question, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
|
| 188 |
examples_per_page=3,
|
| 189 |
cache_examples=False
|
|
@@ -191,7 +193,7 @@ with gr.Blocks(elem_classes=["full-width-app"]) as gaia:
|
|
| 191 |
|
| 192 |
with gr.TabItem("GAIA Benchmark Level 2"):
|
| 193 |
gr.Examples(
|
| 194 |
-
examples=
|
| 195 |
inputs=[question, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
|
| 196 |
examples_per_page=3,
|
| 197 |
cache_examples=False
|
|
@@ -199,7 +201,7 @@ with gr.Blocks(elem_classes=["full-width-app"]) as gaia:
|
|
| 199 |
|
| 200 |
with gr.TabItem("GAIA Benchmark Level 3"):
|
| 201 |
gr.Examples(
|
| 202 |
-
examples=
|
| 203 |
inputs=[question, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
|
| 204 |
examples_per_page=3,
|
| 205 |
cache_examples=False
|
|
@@ -207,7 +209,7 @@ with gr.Blocks(elem_classes=["full-width-app"]) as gaia:
|
|
| 207 |
|
| 208 |
with gr.TabItem("Humanity's Last Exam"):
|
| 209 |
gr.Examples(
|
| 210 |
-
examples=
|
| 211 |
inputs=[question, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
|
| 212 |
examples_per_page=3,
|
| 213 |
cache_examples=False
|
|
|
|
| 5 |
import gradio.utils, os
|
| 6 |
import gradio as gr
|
| 7 |
from agents.crew import run_crew
|
| 8 |
+
from utils.utils import (
|
| 9 |
+
QUESTION_TYPE_GAIA,
|
| 10 |
+
QUESTION_TYPE_HLE,
|
| 11 |
+
#get_questions_from_dataset,
|
| 12 |
+
get_questions_from_file
|
| 13 |
+
)
|
| 14 |
|
| 15 |
# MCP server functions
|
| 16 |
|
|
|
|
| 77 |
|
| 78 |
# Graphical user interface
|
| 79 |
|
|
|
|
|
|
|
|
|
|
| 80 |
DEFAULT_QUESTION = "In MCP's 1st Birthday Hackathon, hosted by Anthropic and Gradio, what percentage of participants submitted a solution so far?"
|
| 81 |
|
| 82 |
CSS_FULL_WIDTH = """
|
|
|
|
| 185 |
with gr.Tabs():
|
| 186 |
with gr.TabItem("GAIA Benchmark Level 1"):
|
| 187 |
gr.Examples(
|
| 188 |
+
examples=get_questions_from_file(QUESTION_FILE_PATH_GAIA, 1),
|
| 189 |
inputs=[question, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
|
| 190 |
examples_per_page=3,
|
| 191 |
cache_examples=False
|
|
|
|
| 193 |
|
| 194 |
with gr.TabItem("GAIA Benchmark Level 2"):
|
| 195 |
gr.Examples(
|
| 196 |
+
examples=get_questions_from_file(QUESTION_FILE_PATH_GAIA, 2),
|
| 197 |
inputs=[question, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
|
| 198 |
examples_per_page=3,
|
| 199 |
cache_examples=False
|
|
|
|
| 201 |
|
| 202 |
with gr.TabItem("GAIA Benchmark Level 3"):
|
| 203 |
gr.Examples(
|
| 204 |
+
examples=get_questions_from_file(QUESTION_FILE_PATH_GAIA, 3),
|
| 205 |
inputs=[question, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
|
| 206 |
examples_per_page=3,
|
| 207 |
cache_examples=False
|
|
|
|
| 209 |
|
| 210 |
with gr.TabItem("Humanity's Last Exam"):
|
| 211 |
gr.Examples(
|
| 212 |
+
examples=get_questions_from_file(QUESTION_FILE_PATH_HLE, 0),
|
| 213 |
inputs=[question, ground_truth, file_name, openai_api_key, gemini_api_key, anthropic_api_key],
|
| 214 |
examples_per_page=3,
|
| 215 |
cache_examples=False
|