bstraehle commited on
Commit
8ef6b6e
·
verified ·
1 Parent(s): 0d4f176

Update utils/utils.py

Browse files
Files changed (1) hide show
  1. utils/utils.py +1 -10
utils/utils.py CHANGED
@@ -34,20 +34,12 @@ def get_dataset_from_file(dataset_type, level):
34
  return result
35
 
36
  def get_dataset(dataset_type, level):
37
- # Get space ID from environment, defaulting to "bstraehle/gaia"
38
  space_id = os.environ.get("SPACE_ID", "bstraehle/gaia")
39
- # Extract username from space_id
40
  username = space_id.split("/")[0]
41
  dataset_repo = f"{username}/validation"
42
- print(f"space_id={space_id}")
43
- print(f"username={username}")
44
- print(f"dataset_repo={dataset_repo}")
45
-
46
- # Load dataset from Hugging Face
47
  dataset = load_dataset(dataset_repo, split="validation")
48
  df = dataset.to_pandas()
49
 
50
- # Filter by dataset type using the task_id prefix
51
  if dataset_type == DATASET_TYPE_GAIA:
52
  print(f"filtering for dataset_type={dataset_type}")
53
  df = df[df["task_id"].str.startswith("gaia-")]
@@ -55,14 +47,13 @@ def get_dataset(dataset_type, level):
55
  print(f"filtering for dataset_type={dataset_type}")
56
  df = df[df["task_id"].str.startswith("hle-")]
57
 
58
- # Filter by level if level > 0 (for GAIA benchmark)
59
- print(f"level={level}")
60
  if level > 0:
61
  df = df[df["Level"] == level]
62
 
63
  result=[]
64
 
65
  for _, row in df.iterrows():
 
66
  result.append([row["Question"], row["Final answer"], row["file_name"]])
67
 
68
  return result
 
34
  return result
35
 
36
  def get_dataset(dataset_type, level):
 
37
  space_id = os.environ.get("SPACE_ID", "bstraehle/gaia")
 
38
  username = space_id.split("/")[0]
39
  dataset_repo = f"{username}/validation"
 
 
 
 
 
40
  dataset = load_dataset(dataset_repo, split="validation")
41
  df = dataset.to_pandas()
42
 
 
43
  if dataset_type == DATASET_TYPE_GAIA:
44
  print(f"filtering for dataset_type={dataset_type}")
45
  df = df[df["task_id"].str.startswith("gaia-")]
 
47
  print(f"filtering for dataset_type={dataset_type}")
48
  df = df[df["task_id"].str.startswith("hle-")]
49
 
 
 
50
  if level > 0:
51
  df = df[df["Level"] == level]
52
 
53
  result=[]
54
 
55
  for _, row in df.iterrows():
56
+ print(row)
57
  result.append([row["Question"], row["Final answer"], row["file_name"]])
58
 
59
  return result