Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
awdwa commited on
Update dabstep_benchmark/leaderboard.py
Browse filesThe submission validator rejects valid JSONL files with:
"Columns with non-string data type: task_id, agent_answer".
The issue seems to be in dabstep_benchmark/leaderboard.py:
submission_df = pd.read_json(submission_path, lines=True, dtype=str)
non_string_columns = [col for col in submission_df.columns if submission_df[col].dtype != 'object']
In newer pandas / infer_string mode, string columns have dtype "str" or StringDtype instead of object, so valid string columns are rejected.
Suggested fix:
from pandas.api.types import is_string_dtype
non_string_columns = [
col for col in submission_df.columns
if not is_string_dtype(submission_df[col])
]
dabstep_benchmark/leaderboard.py
CHANGED
|
@@ -11,6 +11,7 @@ from datasets import load_dataset
|
|
| 11 |
from huggingface_hub import HfApi
|
| 12 |
|
| 13 |
from dabstep_benchmark.utils import format_log, format_error, format_warning, is_valid_https_url, evaluate
|
|
|
|
| 14 |
|
| 15 |
OWNER = "adyen"
|
| 16 |
|
|
@@ -80,7 +81,10 @@ def validate_submission(submission_df: pd.DataFrame):
|
|
| 80 |
return format_error("Submission contains NaN values. Please ensure no missing data.")
|
| 81 |
|
| 82 |
# Check if all columns are of string type
|
| 83 |
-
non_string_columns = [
|
|
|
|
|
|
|
|
|
|
| 84 |
if non_string_columns:
|
| 85 |
return format_error(f"Columns with non-string data type: {', '.join(non_string_columns)}")
|
| 86 |
|
|
|
|
| 11 |
from huggingface_hub import HfApi
|
| 12 |
|
| 13 |
from dabstep_benchmark.utils import format_log, format_error, format_warning, is_valid_https_url, evaluate
|
| 14 |
+
from pandas.api.types import is_object_dtype, is_string_dtype
|
| 15 |
|
| 16 |
OWNER = "adyen"
|
| 17 |
|
|
|
|
| 81 |
return format_error("Submission contains NaN values. Please ensure no missing data.")
|
| 82 |
|
| 83 |
# Check if all columns are of string type
|
| 84 |
+
non_string_columns = [
|
| 85 |
+
col for col in submission_df.columns
|
| 86 |
+
if not (is_object_dtype(submission_df[col]) or is_string_dtype(submission_df[col]))
|
| 87 |
+
]
|
| 88 |
if non_string_columns:
|
| 89 |
return format_error(f"Columns with non-string data type: {', '.join(non_string_columns)}")
|
| 90 |
|