bstraehle commited on
Commit
acaa214
·
verified ·
1 Parent(s): 17b7166

Update utils/utils.py

Browse files
Files changed (1) hide show
  1. utils/utils.py +9 -2
utils/utils.py CHANGED
@@ -19,6 +19,7 @@ def get_questions_from_file(file_path, level):
19
  def get_questions_from_dataset(file_path, level):
20
  # Extract dataset type from file path (e.g., "gaia" or "hle")
21
  basename = os.path.splitext(os.path.basename(file_path))[0]
 
22
 
23
  # Get space ID from environment, defaulting to "bstraehle/gaia"
24
  space_id = os.environ.get("SPACE_ID", "bstraehle/gaia")
@@ -26,10 +27,16 @@ def get_questions_from_dataset(file_path, level):
26
  username = space_id.split("/")[0]
27
  dataset_repo = f"{username}/validation"
28
 
29
- # Load dataset from Hugging Face - use the basename as the split name
30
- dataset = load_dataset(dataset_repo, split=basename)
31
  df = dataset.to_pandas()
32
 
 
 
 
 
 
 
33
  # Filter by level if level > 0 (for GAIA benchmark)
34
  if level > 0:
35
  df = df[df["Level"] == level]
 
19
  def get_questions_from_dataset(file_path, level):
20
  # Extract dataset type from file path (e.g., "gaia" or "hle")
21
  basename = os.path.splitext(os.path.basename(file_path))[0]
22
+ dataset_type = basename.replace("_validation", "")
23
 
24
  # Get space ID from environment, defaulting to "bstraehle/gaia"
25
  space_id = os.environ.get("SPACE_ID", "bstraehle/gaia")
 
27
  username = space_id.split("/")[0]
28
  dataset_repo = f"{username}/validation"
29
 
30
+ # Load dataset from Hugging Face
31
+ dataset = load_dataset(dataset_repo, split="validation")
32
  df = dataset.to_pandas()
33
 
34
+ # Filter by dataset type using the task_id prefix
35
+ if dataset_type == "gaia":
36
+ df = df[df["task_id"].str.startswith("gaia-")]
37
+ elif dataset_type == "hle":
38
+ df = df[df["task_id"].str.startswith("hle-")]
39
+
40
  # Filter by level if level > 0 (for GAIA benchmark)
41
  if level > 0:
42
  df = df[df["Level"] == level]