Update utils/utils.py
Browse files- utils/utils.py +9 -2
utils/utils.py
CHANGED
|
@@ -19,6 +19,7 @@ def get_questions_from_file(file_path, level):
|
|
| 19 |
def get_questions_from_dataset(file_path, level):
|
| 20 |
# Extract dataset type from file path (e.g., "gaia" or "hle")
|
| 21 |
basename = os.path.splitext(os.path.basename(file_path))[0]
|
|
|
|
| 22 |
|
| 23 |
# Get space ID from environment, defaulting to "bstraehle/gaia"
|
| 24 |
space_id = os.environ.get("SPACE_ID", "bstraehle/gaia")
|
|
@@ -26,10 +27,16 @@ def get_questions_from_dataset(file_path, level):
|
|
| 26 |
username = space_id.split("/")[0]
|
| 27 |
dataset_repo = f"{username}/validation"
|
| 28 |
|
| 29 |
-
# Load dataset from Hugging Face
|
| 30 |
-
dataset = load_dataset(dataset_repo, split=
|
| 31 |
df = dataset.to_pandas()
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# Filter by level if level > 0 (for GAIA benchmark)
|
| 34 |
if level > 0:
|
| 35 |
df = df[df["Level"] == level]
|
|
|
|
| 19 |
def get_questions_from_dataset(file_path, level):
|
| 20 |
# Extract dataset type from file path (e.g., "gaia" or "hle")
|
| 21 |
basename = os.path.splitext(os.path.basename(file_path))[0]
|
| 22 |
+
dataset_type = basename.replace("_validation", "")
|
| 23 |
|
| 24 |
# Get space ID from environment, defaulting to "bstraehle/gaia"
|
| 25 |
space_id = os.environ.get("SPACE_ID", "bstraehle/gaia")
|
|
|
|
| 27 |
username = space_id.split("/")[0]
|
| 28 |
dataset_repo = f"{username}/validation"
|
| 29 |
|
| 30 |
+
# Load dataset from Hugging Face
|
| 31 |
+
dataset = load_dataset(dataset_repo, split="validation")
|
| 32 |
df = dataset.to_pandas()
|
| 33 |
|
| 34 |
+
# Filter by dataset type using the task_id prefix
|
| 35 |
+
if dataset_type == "gaia":
|
| 36 |
+
df = df[df["task_id"].str.startswith("gaia-")]
|
| 37 |
+
elif dataset_type == "hle":
|
| 38 |
+
df = df[df["task_id"].str.startswith("hle-")]
|
| 39 |
+
|
| 40 |
# Filter by level if level > 0 (for GAIA benchmark)
|
| 41 |
if level > 0:
|
| 42 |
df = df[df["Level"] == level]
|