Commit
·
0dfb8fc
1
Parent(s):
c7b592e
handle different version names in dataset and in storage
Browse files- src/config.py +1 -1
- src/eval.py +6 -3
- src/hf_utils.py +16 -1
- src/ui.py +2 -2
- src/user_eval.py +1 -1
- template.py +1 -1
src/config.py
CHANGED
|
@@ -17,7 +17,7 @@ LDB_COLS = [
|
|
| 17 |
]
|
| 18 |
|
| 19 |
# dataset versions
|
| 20 |
-
DATASET_VERSIONS = ["
|
| 21 |
DEFAULT_DATASET_VERSION = DATASET_VERSIONS[-1]
|
| 22 |
|
| 23 |
# modelling frameworks
|
|
|
|
| 17 |
]
|
| 18 |
|
| 19 |
# dataset versions
|
| 20 |
+
DATASET_VERSIONS = ["original", "verified"]
|
| 21 |
DEFAULT_DATASET_VERSION = DATASET_VERSIONS[-1]
|
| 22 |
|
| 23 |
# modelling frameworks
|
src/eval.py
CHANGED
|
@@ -29,8 +29,8 @@ def start_background_evaluation(submission_path, dataset_version):
|
|
| 29 |
|
| 30 |
# --- Main Evaluation Logic ---
|
| 31 |
def main_eval(
|
| 32 |
-
submission_path_in_dataset: str, # e.g., "submissions/
|
| 33 |
-
dataset_version: str # e.g., "
|
| 34 |
):
|
| 35 |
start_time = time.time()
|
| 36 |
# Infer submission name for logging and result path generation
|
|
@@ -39,7 +39,6 @@ def main_eval(
|
|
| 39 |
print(f"eval.py: Starting evaluation for submission: '{submission_name_for_files}'", flush=True)
|
| 40 |
print(f" User Data Repo: {DATASET_REPO_ID}", flush=True)
|
| 41 |
print(f" Submission to download from: {submission_path_in_dataset}", flush=True)
|
| 42 |
-
print(f" Results to upload to: {DS_RESULTS_PATH}/{dataset_version}/{submission_name_for_files}", flush=True)
|
| 43 |
|
| 44 |
hf_api = HfApi() # Will use HF_TOKEN from environment
|
| 45 |
|
|
@@ -103,6 +102,10 @@ def main_eval(
|
|
| 103 |
evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir, dataset_version)
|
| 104 |
|
| 105 |
# Upload the entire local_result_dir_for_upload to HF Dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
result_path_on_hub = f"{DS_RESULTS_PATH}/{dataset_version}/{submission_name_for_files}"
|
| 107 |
print(f" Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...",
|
| 108 |
flush=True)
|
|
|
|
| 29 |
|
| 30 |
# --- Main Evaluation Logic ---
|
| 31 |
def main_eval(
|
| 32 |
+
submission_path_in_dataset: str, # e.g., "submissions/verified/submission_name"
|
| 33 |
+
dataset_version: str # e.g., "original", "verified"
|
| 34 |
):
|
| 35 |
start_time = time.time()
|
| 36 |
# Infer submission name for logging and result path generation
|
|
|
|
| 39 |
print(f"eval.py: Starting evaluation for submission: '{submission_name_for_files}'", flush=True)
|
| 40 |
print(f" User Data Repo: {DATASET_REPO_ID}", flush=True)
|
| 41 |
print(f" Submission to download from: {submission_path_in_dataset}", flush=True)
|
|
|
|
| 42 |
|
| 43 |
hf_api = HfApi() # Will use HF_TOKEN from environment
|
| 44 |
|
|
|
|
| 102 |
evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir, dataset_version)
|
| 103 |
|
| 104 |
# Upload the entire local_result_dir_for_upload to HF Dataset
|
| 105 |
+
if dataset_version == "original":
|
| 106 |
+
dataset_version = "v1"
|
| 107 |
+
elif dataset_version == "verified":
|
| 108 |
+
dataset_version = "v1_verified"
|
| 109 |
result_path_on_hub = f"{DS_RESULTS_PATH}/{dataset_version}/{submission_name_for_files}"
|
| 110 |
print(f" Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...",
|
| 111 |
flush=True)
|
src/hf_utils.py
CHANGED
|
@@ -29,7 +29,12 @@ def load_leaderboard_data(dataset_version):
|
|
| 29 |
try:
|
| 30 |
# List all files in the results path of the dataset
|
| 31 |
repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
version_submissions_path = f"{DS_SUBMISSIONS_PATH}/{dataset_version}"
|
| 34 |
version_results_path = f"{DS_RESULTS_PATH}/{dataset_version}"
|
| 35 |
|
|
@@ -133,6 +138,11 @@ def upload_submission(uploaded_file, dir_name, report_file, model_framework, bas
|
|
| 133 |
if not HF_API:
|
| 134 |
return False, "Hugging Face API not initialized"
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
try:
|
| 137 |
submission_path = f"{DS_SUBMISSIONS_PATH}/{dataset_version}/{dir_name}"
|
| 138 |
HF_API.upload_file(
|
|
@@ -175,6 +185,11 @@ def check_name_exists(submission_name, dataset_version):
|
|
| 175 |
if not HF_API:
|
| 176 |
return False
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
try:
|
| 179 |
repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
|
| 180 |
for file_path in repo_files:
|
|
|
|
| 29 |
try:
|
| 30 |
# List all files in the results path of the dataset
|
| 31 |
repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
|
| 32 |
+
|
| 33 |
+
if dataset_version == "original":
|
| 34 |
+
dataset_version = "v1"
|
| 35 |
+
elif dataset_version == "verified":
|
| 36 |
+
dataset_version = "v1_verified"
|
| 37 |
+
|
| 38 |
version_submissions_path = f"{DS_SUBMISSIONS_PATH}/{dataset_version}"
|
| 39 |
version_results_path = f"{DS_RESULTS_PATH}/{dataset_version}"
|
| 40 |
|
|
|
|
| 138 |
if not HF_API:
|
| 139 |
return False, "Hugging Face API not initialized"
|
| 140 |
|
| 141 |
+
if dataset_version == "original":
|
| 142 |
+
dataset_version = "v1"
|
| 143 |
+
elif dataset_version == "verified":
|
| 144 |
+
dataset_version = "v1_verified"
|
| 145 |
+
|
| 146 |
try:
|
| 147 |
submission_path = f"{DS_SUBMISSIONS_PATH}/{dataset_version}/{dir_name}"
|
| 148 |
HF_API.upload_file(
|
|
|
|
| 185 |
if not HF_API:
|
| 186 |
return False
|
| 187 |
|
| 188 |
+
if dataset_version == "original":
|
| 189 |
+
dataset_version = "v1"
|
| 190 |
+
elif dataset_version == "verified":
|
| 191 |
+
dataset_version = "v1_verified"
|
| 192 |
+
|
| 193 |
try:
|
| 194 |
repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
|
| 195 |
for file_path in repo_files:
|
src/ui.py
CHANGED
|
@@ -99,9 +99,9 @@ def create_ui():
|
|
| 99 |
"## Important Notes\n"
|
| 100 |
"1. **Submission Name**: The submission name must be different from any existing submission names.\n"
|
| 101 |
"2. **File Format**: Ensure that the uploaded files are in the correct format. The submission file must be a `.jsonl` file, and the report must be a `pdf` file.\n"
|
| 102 |
-
"3. **Evaluation Script**: It is highly recommended to use the evaluation script provided [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/src/user_eval.py) to check your results before submission. You can run the script as follows:\n"
|
| 103 |
" ```bash\n"
|
| 104 |
-
" python user_eval.py --submission_file path/to/my/submission.jsonl --modelling_framework CPMpy --dataset_version
|
| 105 |
" ```\n"
|
| 106 |
" This will evaluate your submission locally and print the results to the console.\n"
|
| 107 |
"4. **Modelling Frameworks**: Currently, the supported modelling frameworks are MiniZinc, CPMpy and OR-Tools. More frameworks can be added (feel free to submit pull requests)."
|
|
|
|
| 99 |
"## Important Notes\n"
|
| 100 |
"1. **Submission Name**: The submission name must be different from any existing submission names.\n"
|
| 101 |
"2. **File Format**: Ensure that the uploaded files are in the correct format. The submission file must be a `.jsonl` file, and the report must be a `pdf` file.\n"
|
| 102 |
+
"3. **Evaluation Script**: It is highly recommended to use the evaluation script provided [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/src/user_eval.py) to check your results locally before submission. You can run the script as follows:\n"
|
| 103 |
" ```bash\n"
|
| 104 |
+
" python user_eval.py --submission_file path/to/my/submission.jsonl --modelling_framework CPMpy --dataset_version verified\n"
|
| 105 |
" ```\n"
|
| 106 |
" This will evaluate your submission locally and print the results to the console.\n"
|
| 107 |
"4. **Modelling Frameworks**: Currently, the supported modelling frameworks are MiniZinc, CPMpy and OR-Tools. More frameworks can be added (feel free to submit pull requests)."
|
src/user_eval.py
CHANGED
|
@@ -16,7 +16,7 @@ CPMPY_FRAMEWORK = "CPMpy"
|
|
| 16 |
MINIZINC_FRAMEWORK = "MiniZinc"
|
| 17 |
ORTOOLS_FRAMEWORK = "OR-Tools"
|
| 18 |
|
| 19 |
-
DATASET_VERSIONS = ["
|
| 20 |
|
| 21 |
GT_DATASET_NAME = "kostis-init/CP-Bench"
|
| 22 |
GT_PROBLEM_NAME_COLUMN = "id"
|
|
|
|
| 16 |
MINIZINC_FRAMEWORK = "MiniZinc"
|
| 17 |
ORTOOLS_FRAMEWORK = "OR-Tools"
|
| 18 |
|
| 19 |
+
DATASET_VERSIONS = ["original", "verified"]
|
| 20 |
|
| 21 |
GT_DATASET_NAME = "kostis-init/CP-Bench"
|
| 22 |
GT_PROBLEM_NAME_COLUMN = "id"
|
template.py
CHANGED
|
@@ -5,7 +5,7 @@ from together import Together
|
|
| 5 |
|
| 6 |
# === DATASET CONFIGURATION (Do not change, except potentially the split) ===
|
| 7 |
GT_DATASET_NAME = "kostis-init/CP-Bench"
|
| 8 |
-
DATASET_SPLIT = "
|
| 9 |
PROBLEM_ID_COLUMN = "id"
|
| 10 |
PROBLEM_DESCRIPTION_COLUMN = "description"
|
| 11 |
PROBLEM_DATA_COLUMN = "input_data"
|
|
|
|
| 5 |
|
| 6 |
# === DATASET CONFIGURATION (Do not change, except potentially the split) ===
|
| 7 |
GT_DATASET_NAME = "kostis-init/CP-Bench"
|
| 8 |
+
DATASET_SPLIT = "verified" # Choose the appropriate version/split for your submission
|
| 9 |
PROBLEM_ID_COLUMN = "id"
|
| 10 |
PROBLEM_DESCRIPTION_COLUMN = "description"
|
| 11 |
PROBLEM_DATA_COLUMN = "input_data"
|