CP-Bench-Leaderboard

Running

App Files Files Community

kostis-init commited on Aug 12, 2025

Commit

0dfb8fc

1 Parent(s): c7b592e

handle different version names in dataset and in storage

Browse files

Files changed (6) hide show

src/config.py +1 -1
src/eval.py +6 -3
src/hf_utils.py +16 -1
src/ui.py +2 -2
src/user_eval.py +1 -1
template.py +1 -1

src/config.py CHANGED Viewed

@@ -17,7 +17,7 @@ LDB_COLS = [
 ]
 # dataset versions
-DATASET_VERSIONS = ["v1", "v1_verified"]
 DEFAULT_DATASET_VERSION = DATASET_VERSIONS[-1]
 # modelling frameworks

 ]
 # dataset versions
+DATASET_VERSIONS = ["original", "verified"]
 DEFAULT_DATASET_VERSION = DATASET_VERSIONS[-1]
 # modelling frameworks

src/eval.py CHANGED Viewed

@@ -29,8 +29,8 @@ def start_background_evaluation(submission_path, dataset_version):
 # --- Main Evaluation Logic ---
 def main_eval(
-        submission_path_in_dataset: str,  # e.g., "submissions/v1/submission_name"
-        dataset_version: str  # e.g., "v1", "v1_verified", "v2"
 ):
     start_time = time.time()
     # Infer submission name for logging and result path generation
@@ -39,7 +39,6 @@ def main_eval(
     print(f"eval.py: Starting evaluation for submission: '{submission_name_for_files}'", flush=True)
     print(f"  User Data Repo: {DATASET_REPO_ID}", flush=True)
     print(f"  Submission to download from: {submission_path_in_dataset}", flush=True)
-    print(f"  Results to upload to: {DS_RESULTS_PATH}/{dataset_version}/{submission_name_for_files}", flush=True)
     hf_api = HfApi()  # Will use HF_TOKEN from environment
@@ -103,6 +102,10 @@ def main_eval(
         evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir, dataset_version)
         # Upload the entire local_result_dir_for_upload to HF Dataset
         result_path_on_hub = f"{DS_RESULTS_PATH}/{dataset_version}/{submission_name_for_files}"
         print(f"  Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...",
               flush=True)

 # --- Main Evaluation Logic ---
 def main_eval(
+        submission_path_in_dataset: str,  # e.g., "submissions/verified/submission_name"
+        dataset_version: str  # e.g., "original", "verified"
 ):
     start_time = time.time()
     # Infer submission name for logging and result path generation
     print(f"eval.py: Starting evaluation for submission: '{submission_name_for_files}'", flush=True)
     print(f"  User Data Repo: {DATASET_REPO_ID}", flush=True)
     print(f"  Submission to download from: {submission_path_in_dataset}", flush=True)
     hf_api = HfApi()  # Will use HF_TOKEN from environment
         evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir, dataset_version)
         # Upload the entire local_result_dir_for_upload to HF Dataset
+        if dataset_version == "original":
+            dataset_version = "v1"
+        elif dataset_version == "verified":
+            dataset_version = "v1_verified"
         result_path_on_hub = f"{DS_RESULTS_PATH}/{dataset_version}/{submission_name_for_files}"
         print(f"  Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...",
               flush=True)

src/hf_utils.py CHANGED Viewed

@@ -29,7 +29,12 @@ def load_leaderboard_data(dataset_version):
     try:
         # List all files in the results path of the dataset
         repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
         version_submissions_path = f"{DS_SUBMISSIONS_PATH}/{dataset_version}"
         version_results_path = f"{DS_RESULTS_PATH}/{dataset_version}"
@@ -133,6 +138,11 @@ def upload_submission(uploaded_file, dir_name, report_file, model_framework, bas
     if not HF_API:
         return False, "Hugging Face API not initialized"
     try:
         submission_path = f"{DS_SUBMISSIONS_PATH}/{dataset_version}/{dir_name}"
         HF_API.upload_file(
@@ -175,6 +185,11 @@ def check_name_exists(submission_name, dataset_version):
     if not HF_API:
         return False
     try:
         repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
         for file_path in repo_files:

     try:
         # List all files in the results path of the dataset
         repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
+        if dataset_version == "original":
+            dataset_version = "v1"
+        elif dataset_version == "verified":
+            dataset_version = "v1_verified"
         version_submissions_path = f"{DS_SUBMISSIONS_PATH}/{dataset_version}"
         version_results_path = f"{DS_RESULTS_PATH}/{dataset_version}"
     if not HF_API:
         return False, "Hugging Face API not initialized"
+    if dataset_version == "original":
+        dataset_version = "v1"
+    elif dataset_version == "verified":
+        dataset_version = "v1_verified"
     try:
         submission_path = f"{DS_SUBMISSIONS_PATH}/{dataset_version}/{dir_name}"
         HF_API.upload_file(
     if not HF_API:
         return False
+    if dataset_version == "original":
+        dataset_version = "v1"
+    elif dataset_version == "verified":
+        dataset_version = "v1_verified"
     try:
         repo_files = HF_API.list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
         for file_path in repo_files:

src/ui.py CHANGED Viewed

@@ -99,9 +99,9 @@ def create_ui():
             "## Important Notes\n"
             "1. **Submission Name**: The submission name must be different from any existing submission names.\n"
             "2. **File Format**: Ensure that the uploaded files are in the correct format. The submission file must be a `.jsonl` file, and the report must be a `pdf` file.\n"
-            "3. **Evaluation Script**: It is highly recommended to use the evaluation script provided [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/src/user_eval.py) to check your results before submission. You can run the script as follows:\n"
             "   ```bash\n"
-            "   python user_eval.py --submission_file path/to/my/submission.jsonl --modelling_framework CPMpy --dataset_version v1_verified\n"
             "   ```\n"
             "   This will evaluate your submission locally and print the results to the console.\n"
             "4. **Modelling Frameworks**: Currently, the supported modelling frameworks are MiniZinc, CPMpy and OR-Tools. More frameworks can be added (feel free to submit pull requests)."

             "## Important Notes\n"
             "1. **Submission Name**: The submission name must be different from any existing submission names.\n"
             "2. **File Format**: Ensure that the uploaded files are in the correct format. The submission file must be a `.jsonl` file, and the report must be a `pdf` file.\n"
+            "3. **Evaluation Script**: It is highly recommended to use the evaluation script provided [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/src/user_eval.py) to check your results locally before submission. You can run the script as follows:\n"
             "   ```bash\n"
+            "   python user_eval.py --submission_file path/to/my/submission.jsonl --modelling_framework CPMpy --dataset_version verified\n"
             "   ```\n"
             "   This will evaluate your submission locally and print the results to the console.\n"
             "4. **Modelling Frameworks**: Currently, the supported modelling frameworks are MiniZinc, CPMpy and OR-Tools. More frameworks can be added (feel free to submit pull requests)."

src/user_eval.py CHANGED Viewed

@@ -16,7 +16,7 @@ CPMPY_FRAMEWORK = "CPMpy"
 MINIZINC_FRAMEWORK = "MiniZinc"
 ORTOOLS_FRAMEWORK = "OR-Tools"
-DATASET_VERSIONS = ["v1", "v1_verified"]
 GT_DATASET_NAME = "kostis-init/CP-Bench"
 GT_PROBLEM_NAME_COLUMN = "id"

 MINIZINC_FRAMEWORK = "MiniZinc"
 ORTOOLS_FRAMEWORK = "OR-Tools"
+DATASET_VERSIONS = ["original", "verified"]
 GT_DATASET_NAME = "kostis-init/CP-Bench"
 GT_PROBLEM_NAME_COLUMN = "id"

template.py CHANGED Viewed

@@ -5,7 +5,7 @@ from together import Together
 # === DATASET CONFIGURATION (Do not change, except potentially the split) ===
 GT_DATASET_NAME = "kostis-init/CP-Bench"
-DATASET_SPLIT = "v1_verified"  # Choose the appropriate version/split for your submission
 PROBLEM_ID_COLUMN = "id"
 PROBLEM_DESCRIPTION_COLUMN = "description"
 PROBLEM_DATA_COLUMN = "input_data"

 # === DATASET CONFIGURATION (Do not change, except potentially the split) ===
 GT_DATASET_NAME = "kostis-init/CP-Bench"
+DATASET_SPLIT = "verified"  # Choose the appropriate version/split for your submission
 PROBLEM_ID_COLUMN = "id"
 PROBLEM_DESCRIPTION_COLUMN = "description"
 PROBLEM_DATA_COLUMN = "input_data"