Spaces:
Sleeping
Sleeping
Commit
·
70cc330
1
Parent(s):
f434b15
Refactor evaluation logic: streamline user_eval.py, update evaluation script references, and clean up eval.py
Browse files- src/eval.py +10 -332
- src/hf_utils.py +0 -4
- src/ui.py +3 -3
- user_eval.py → src/user_eval.py +211 -113
- template_submission.jsonl +18 -18
src/eval.py
CHANGED
|
@@ -1,30 +1,14 @@
|
|
| 1 |
-
import datetime
|
| 2 |
import time
|
| 3 |
import json
|
| 4 |
import tempfile
|
| 5 |
|
| 6 |
-
import minizinc
|
| 7 |
-
from datasets import load_dataset
|
| 8 |
from huggingface_hub import HfApi, hf_hub_download
|
| 9 |
import os
|
| 10 |
-
import sys
|
| 11 |
-
import subprocess
|
| 12 |
import threading
|
| 13 |
from pathlib import Path
|
| 14 |
|
| 15 |
-
from src.config import DATASET_REPO_ID, DS_RESULTS_PATH
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# --- Configuration ---
|
| 19 |
-
|
| 20 |
-
GT_DATASET_NAME = "kostis-init/CP-Bench"
|
| 21 |
-
|
| 22 |
-
# Column names in the Hugging Face dataset for problem identifier and model script
|
| 23 |
-
GT_PROBLEM_NAME_COLUMN = "id"
|
| 24 |
-
GT_MODEL_CODE_COLUMN = "model"
|
| 25 |
-
|
| 26 |
-
# Timeout for running individual model scripts (both generated and modified ground-truth)
|
| 27 |
-
SCRIPT_EXECUTION_TIMEOUT = 60 # seconds
|
| 28 |
|
| 29 |
|
| 30 |
def run_evaluation(submission_path):
|
|
@@ -43,171 +27,6 @@ def start_background_evaluation(submission_path):
|
|
| 43 |
return True
|
| 44 |
|
| 45 |
|
| 46 |
-
def extract_json_from_code_output(output: str):
|
| 47 |
-
try:
|
| 48 |
-
start_index = output.find('{')
|
| 49 |
-
end_index = output.rfind('}') + 1
|
| 50 |
-
# Extract the JSON part
|
| 51 |
-
json_part = output[start_index:end_index]
|
| 52 |
-
return json.loads(json_part)
|
| 53 |
-
except json.JSONDecodeError:
|
| 54 |
-
return None
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
def exec_code_minizinc(code: str, timeout_sec):
|
| 58 |
-
"""
|
| 59 |
-
Executes a MiniZinc model string using the minizinc-python library.
|
| 60 |
-
|
| 61 |
-
:param code: The MiniZinc model code as a string.
|
| 62 |
-
:param timeout_sec: The maximum time to wait for the solver in seconds.
|
| 63 |
-
:return: A tuple of (success, output, timeout_occured)
|
| 64 |
-
"""
|
| 65 |
-
successfully_executed = False
|
| 66 |
-
output = ""
|
| 67 |
-
timeout_occurred = False
|
| 68 |
-
timeout_duration = datetime.timedelta(seconds=timeout_sec)
|
| 69 |
-
|
| 70 |
-
try:
|
| 71 |
-
# 1. Create a MiniZinc model instance
|
| 72 |
-
model = minizinc.Model()
|
| 73 |
-
model.add_string(code)
|
| 74 |
-
|
| 75 |
-
# 2. Find a default solver configured with MiniZinc
|
| 76 |
-
# You can be more specific, e.g., solver = minizinc.Solver.lookup("gecode")
|
| 77 |
-
# If the default solver isn't found or suitable, this will raise an error.
|
| 78 |
-
gecode = minizinc.Solver.lookup("gecode")
|
| 79 |
-
if gecode is None:
|
| 80 |
-
raise RuntimeError("No suitable solver found. Please install a MiniZinc solver.")
|
| 81 |
-
|
| 82 |
-
# 3. Create an Instance to solve
|
| 83 |
-
instance = minizinc.Instance(gecode, model)
|
| 84 |
-
|
| 85 |
-
# 4. Solve the instance with the specified timeout
|
| 86 |
-
# The solve() method handles the timeout internally.
|
| 87 |
-
result = instance.solve(timeout=timeout_duration)
|
| 88 |
-
|
| 89 |
-
# 5. Process the result
|
| 90 |
-
if result.status in {minizinc.Status.SATISFIED, minizinc.Status.OPTIMAL_SOLUTION}:
|
| 91 |
-
successfully_executed = True
|
| 92 |
-
output = str(result.solution) if result.solution is not None else ""
|
| 93 |
-
timeout_occurred = False
|
| 94 |
-
elif result.status == minizinc.Status.UNKNOWN:
|
| 95 |
-
successfully_executed = False
|
| 96 |
-
output = f"Timeout Error: Solver stopped after {timeout_sec} seconds (Status: UNKNOWN)."
|
| 97 |
-
timeout_occurred = True
|
| 98 |
-
else:
|
| 99 |
-
# Handle other non-success statuses (UNSAT, ERROR, etc.)
|
| 100 |
-
successfully_executed = False
|
| 101 |
-
output = f"Solving failed. Status: {result.status}"
|
| 102 |
-
timeout_occurred = False
|
| 103 |
-
|
| 104 |
-
except minizinc.MiniZincError as e:
|
| 105 |
-
# Catch MiniZinc specific errors (e.g., syntax errors, solver not found)
|
| 106 |
-
successfully_executed = False
|
| 107 |
-
output = f"MiniZinc Error: {e}"
|
| 108 |
-
timeout_occurred = False
|
| 109 |
-
except Exception as e:
|
| 110 |
-
# Catch other unexpected errors
|
| 111 |
-
successfully_executed = False
|
| 112 |
-
output = f"Unexpected Error during MiniZinc execution: {e}"
|
| 113 |
-
timeout_occurred = False
|
| 114 |
-
|
| 115 |
-
return successfully_executed, output, timeout_occurred
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
def exec_code(code: str, timeout=10, modelling_language='cpmpy'):
|
| 119 |
-
"""
|
| 120 |
-
Execute the given code and return the output
|
| 121 |
-
|
| 122 |
-
:param code: The code to execute as a string
|
| 123 |
-
:param timeout: The maximum time to wait for the code to execute in seconds
|
| 124 |
-
:param modelling_language: The language to use for execution (cpmpy, minizinc, or-tools)
|
| 125 |
-
:return: A tuple of (success, output, timeout_occured)
|
| 126 |
-
"""
|
| 127 |
-
|
| 128 |
-
# create a temp directory to store the temporary file
|
| 129 |
-
temp_dir_name = "temp_dir_for_exec_code"
|
| 130 |
-
temp_dir = os.path.join(os.getcwd(), temp_dir_name)
|
| 131 |
-
os.makedirs(temp_dir, exist_ok=True)
|
| 132 |
-
|
| 133 |
-
# write the code to a temporary file
|
| 134 |
-
suffix = '.__hidden_py__' if modelling_language == CPMPY_FRAMEWORK or modelling_language == ORTOOLS_FRAMEWORK else '.mzn'
|
| 135 |
-
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=suffix, dir=temp_dir, encoding='utf-8') as temp_file:
|
| 136 |
-
temp_instance_path = temp_file.name
|
| 137 |
-
temp_file.write(code)
|
| 138 |
-
|
| 139 |
-
try:
|
| 140 |
-
# execute the code
|
| 141 |
-
if modelling_language == CPMPY_FRAMEWORK or modelling_language == ORTOOLS_FRAMEWORK:
|
| 142 |
-
command = [sys.executable, temp_instance_path]
|
| 143 |
-
result = subprocess.run(command, capture_output=True, text=True, timeout=timeout, encoding='utf-8')
|
| 144 |
-
|
| 145 |
-
successfully_executed = (result.returncode == 0)
|
| 146 |
-
output = result.stdout if successfully_executed else result.stderr
|
| 147 |
-
timeout_occurred = False
|
| 148 |
-
elif modelling_language == MINIZINC_FRAMEWORK:
|
| 149 |
-
successfully_executed, output, timeout_occurred = exec_code_minizinc(code, timeout)
|
| 150 |
-
else:
|
| 151 |
-
raise ValueError(f"MODELLING_LANGUAGE not supported: {modelling_language}")
|
| 152 |
-
|
| 153 |
-
except subprocess.TimeoutExpired as e:
|
| 154 |
-
successfully_executed = False
|
| 155 |
-
output = f"Timeout Error: Execution time exceeded {timeout} seconds"
|
| 156 |
-
timeout_occurred = True
|
| 157 |
-
except Exception as e:
|
| 158 |
-
successfully_executed = False
|
| 159 |
-
output = f"Error: {e}"
|
| 160 |
-
timeout_occurred = False
|
| 161 |
-
|
| 162 |
-
os.remove(temp_instance_path)
|
| 163 |
-
|
| 164 |
-
return successfully_executed, output, timeout_occurred
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
def add_constraints_as_string(solution):
|
| 168 |
-
"""Generate constraints as a string to be added to the original script."""
|
| 169 |
-
constraints = ""
|
| 170 |
-
if solution: # Ensure solution is not None
|
| 171 |
-
for key, value in solution.items():
|
| 172 |
-
# Basic escaping for string values if they occur, though typically solutions are numeric/boolean
|
| 173 |
-
if isinstance(value, str):
|
| 174 |
-
constraints += f"\nmodel += ({key} == \"{value}\")"
|
| 175 |
-
else:
|
| 176 |
-
constraints += f"\nmodel += ({key} == {value})"
|
| 177 |
-
return constraints
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
def get_modified_script(script_content, solution):
|
| 181 |
-
"""Add constraints to the script content and self-consistency checks."""
|
| 182 |
-
constraints_str = add_constraints_as_string(solution)
|
| 183 |
-
modified_script = f"{script_content}\n{constraints_str}"
|
| 184 |
-
modified_script += """
|
| 185 |
-
# Print the absolute path of the current directory along with the script name
|
| 186 |
-
import os
|
| 187 |
-
print(os.path.abspath(__file__))
|
| 188 |
-
|
| 189 |
-
# Keep old objective
|
| 190 |
-
old_objective = None
|
| 191 |
-
if hasattr(model, 'objective_is_min') and model.objective_is_min is not None:
|
| 192 |
-
old_objective = model.objective_value()
|
| 193 |
-
|
| 194 |
-
# Check self-consistency
|
| 195 |
-
if not model.solve():
|
| 196 |
-
print('ERROR: The model is unsatisfiable with the self-consistency constraints')
|
| 197 |
-
else:
|
| 198 |
-
print('SUCCESS: Model is consistent')
|
| 199 |
-
|
| 200 |
-
# Check if the objective value is the same
|
| 201 |
-
if old_objective is None:
|
| 202 |
-
print('SUCCESS: No objective defined')
|
| 203 |
-
elif model.objective_value() != old_objective:
|
| 204 |
-
print('ERROR: The objective value has changed')
|
| 205 |
-
else:
|
| 206 |
-
print('SUCCESS: Objective value is consistent')
|
| 207 |
-
"""
|
| 208 |
-
return modified_script
|
| 209 |
-
|
| 210 |
-
|
| 211 |
# --- Main Evaluation Logic ---
|
| 212 |
def main_eval(
|
| 213 |
user_dataset_repo_id: str,
|
|
@@ -237,7 +56,7 @@ def main_eval(
|
|
| 237 |
# Path for the summary file within the local temporary result directory
|
| 238 |
summary_file_path = local_result_dir_for_upload / "summary.txt"
|
| 239 |
|
| 240 |
-
#
|
| 241 |
print(f" Downloading submission files from '{submission_path_in_dataset}' to '{local_submission_dir}'...",
|
| 242 |
flush=True)
|
| 243 |
try:
|
|
@@ -262,28 +81,11 @@ def main_eval(
|
|
| 262 |
print(f" CRITICAL ERROR - Failed to download submission files: {e_download}", flush=True)
|
| 263 |
return 1
|
| 264 |
|
| 265 |
-
# 2. Load ground-truth dataset
|
| 266 |
-
print(f" Loading ground-truth dataset '{GT_DATASET_NAME}'...", flush=True)
|
| 267 |
-
try:
|
| 268 |
-
gt_dataset = load_dataset(GT_DATASET_NAME, split="train", trust_remote_code=True)
|
| 269 |
-
ground_truth_models = {
|
| 270 |
-
item[GT_PROBLEM_NAME_COLUMN]: item[GT_MODEL_CODE_COLUMN]
|
| 271 |
-
for item in gt_dataset if
|
| 272 |
-
GT_PROBLEM_NAME_COLUMN in item and GT_MODEL_CODE_COLUMN in item and item[GT_MODEL_CODE_COLUMN]
|
| 273 |
-
}
|
| 274 |
-
if not ground_truth_models: raise ValueError("No models in GT dataset.")
|
| 275 |
-
print(f" Loaded {len(ground_truth_models)} ground-truth models.", flush=True)
|
| 276 |
-
except Exception as e_gt:
|
| 277 |
-
print(f" CRITICAL ERROR - Failed to load ground-truth dataset: {e_gt}", flush=True)
|
| 278 |
-
with open(summary_file_path, "w") as f:
|
| 279 |
-
f.write(f"CRITICAL ERROR: Failed to load ground-truth dataset '{GT_DATASET_NAME}'.\nError: {e_gt}\n")
|
| 280 |
-
# (Attempt to upload error summary)
|
| 281 |
-
return 1
|
| 282 |
-
|
| 283 |
# load generated models from jsonl to memory
|
| 284 |
print(f" Loading generated models from '{local_submission_dir}'...", flush=True)
|
| 285 |
submitted_models = []
|
| 286 |
-
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "submission.jsonl"), "r",
|
|
|
|
| 287 |
for line in f:
|
| 288 |
try:
|
| 289 |
json_obj = json.loads(line)
|
|
@@ -292,124 +94,16 @@ def main_eval(
|
|
| 292 |
print(f" ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True)
|
| 293 |
|
| 294 |
# load metadata file
|
| 295 |
-
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "metadata.json"), "r",
|
|
|
|
| 296 |
metadata = json.load(f)
|
| 297 |
|
| 298 |
print(f" Loaded {len(submitted_models)} generated models.", flush=True)
|
| 299 |
|
|
|
|
|
|
|
| 300 |
|
| 301 |
-
#
|
| 302 |
-
total_submitted_models = 0
|
| 303 |
-
models_ran_successfully = 0
|
| 304 |
-
consistency_checks_passed = 0
|
| 305 |
-
objective_checks_passed = 0
|
| 306 |
-
all_checks_passed = 0
|
| 307 |
-
gt_models_found = 0
|
| 308 |
-
|
| 309 |
-
with open(summary_file_path, "w", encoding="utf-8") as summary_f:
|
| 310 |
-
summary_f.write(f"Evaluation Summary for Submission: {submission_name_for_files}\n")
|
| 311 |
-
summary_f.write(f"User Data Repo: {user_dataset_repo_id}\n")
|
| 312 |
-
summary_f.write(f"Submission Path in Dataset: {submission_path_in_dataset}\n")
|
| 313 |
-
summary_f.write(f"Ground-Truth Dataset: {GT_DATASET_NAME}\n")
|
| 314 |
-
summary_f.write("-" * 30 + "\n")
|
| 315 |
-
|
| 316 |
-
# Iterate through downloaded submitted models
|
| 317 |
-
for submitted_model in submitted_models:
|
| 318 |
-
curr_model = submitted_model[GT_MODEL_CODE_COLUMN]
|
| 319 |
-
|
| 320 |
-
total_submitted_models += 1
|
| 321 |
-
problem_name = submitted_model[GT_PROBLEM_NAME_COLUMN]
|
| 322 |
-
print(f"\n Processing downloaded model: {problem_name}", flush=True)
|
| 323 |
-
summary_f.write(f"\n--- Model: {problem_name} ---\n")
|
| 324 |
-
|
| 325 |
-
summary_f.write(" 1. Running submitted model...\n")
|
| 326 |
-
|
| 327 |
-
succ_exec, output, timeout_occurred = exec_code(curr_model, timeout=SCRIPT_EXECUTION_TIMEOUT, modelling_language=metadata["modelling_framework"])
|
| 328 |
-
|
| 329 |
-
if succ_exec:
|
| 330 |
-
models_ran_successfully += 1
|
| 331 |
-
summary_f.write(" - SUCCESS: Model executed successfully.\n")
|
| 332 |
-
|
| 333 |
-
if timeout_occurred:
|
| 334 |
-
summary_f.write(f" - TIMEOUT: Execution time exceeded {SCRIPT_EXECUTION_TIMEOUT} seconds.\n")
|
| 335 |
-
continue
|
| 336 |
-
if not succ_exec:
|
| 337 |
-
summary_f.write(f" - FAILED: Execution failed with error: {output}\n")
|
| 338 |
-
continue
|
| 339 |
-
if output is None or not output.strip():
|
| 340 |
-
summary_f.write(f" - FAILED: No output from execution.\n")
|
| 341 |
-
continue
|
| 342 |
-
# Attempt to extract JSON from stdout
|
| 343 |
-
generated_solution = extract_json_from_code_output(output)
|
| 344 |
-
if generated_solution is None:
|
| 345 |
-
summary_f.write(f" - FAILED: Could not extract JSON solution from output: {output}\n")
|
| 346 |
-
continue
|
| 347 |
-
summary_f.write(f" - SUCCESS: Got solution: {generated_solution}\n")
|
| 348 |
-
|
| 349 |
-
summary_f.write(f" 2. Checking against ground-truth for '{problem_name}'...\n")
|
| 350 |
-
if problem_name not in ground_truth_models:
|
| 351 |
-
summary_f.write(f" - FAILED: Ground-truth model for '{problem_name}' not found in dataset.\n")
|
| 352 |
-
continue
|
| 353 |
-
gt_models_found += 1
|
| 354 |
-
ground_truth_script_content = ground_truth_models[problem_name]
|
| 355 |
-
summary_f.write(" - SUCCESS: Found ground-truth model.\n")
|
| 356 |
-
|
| 357 |
-
summary_f.write(" 3. Performing self-consistency check on ground-truth model...\n")
|
| 358 |
-
modified_gt_script = get_modified_script(ground_truth_script_content, generated_solution)
|
| 359 |
-
|
| 360 |
-
try:
|
| 361 |
-
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8',
|
| 362 |
-
dir=top_level_temp_dir) as tmp_file:
|
| 363 |
-
tmp_file.write(modified_gt_script)
|
| 364 |
-
tmp_file_path_str = tmp_file.name
|
| 365 |
-
|
| 366 |
-
gt_check_result = subprocess.run(
|
| 367 |
-
[sys.executable, tmp_file_path_str],
|
| 368 |
-
capture_output=True, text=True, timeout=SCRIPT_EXECUTION_TIMEOUT, encoding='utf-8',
|
| 369 |
-
)
|
| 370 |
-
os.unlink(tmp_file_path_str)
|
| 371 |
-
|
| 372 |
-
gt_stdout = gt_check_result.stdout
|
| 373 |
-
if "SUCCESS: Model is consistent" in gt_stdout:
|
| 374 |
-
summary_f.write(" - CONSISTENCY: PASSED\n")
|
| 375 |
-
consistency_checks_passed += 1
|
| 376 |
-
else:
|
| 377 |
-
summary_f.write(
|
| 378 |
-
" - CONSISTENCY: FAILED (Details in logs or stdout)\n")
|
| 379 |
-
|
| 380 |
-
if "SUCCESS: No objective defined" in gt_stdout or "SUCCESS: Objective value is consistent" in gt_stdout:
|
| 381 |
-
summary_f.write(" - OBJECTIVE: PASSED\n")
|
| 382 |
-
objective_checks_passed += 1
|
| 383 |
-
else:
|
| 384 |
-
summary_f.write(" - OBJECTIVE: FAILED (Details in logs or stdout)\n")
|
| 385 |
-
|
| 386 |
-
if "SUCCESS: Model is consistent" in gt_stdout and ("SUCCESS: No objective defined" in gt_stdout or "SUCCESS: Objective value is consistent" in gt_stdout):
|
| 387 |
-
summary_f.write(" - SELF-CONSISTENCY CHECK: PASSED fully\n")
|
| 388 |
-
all_checks_passed += 1
|
| 389 |
-
|
| 390 |
-
except Exception as e_gt_run:
|
| 391 |
-
summary_f.write(f" - SELF-CONSISTENCY CHECK: FAILED (Error: {e_gt_run})\n")
|
| 392 |
-
|
| 393 |
-
# Final statistics (write to summary_f)
|
| 394 |
-
summary_f.write("\n" + "=" * 30 + "\n")
|
| 395 |
-
summary_f.write("Overall Evaluation Statistics:\n")
|
| 396 |
-
summary_f.write(f" Total Submitted Models Parsed: {total_submitted_models}\n")
|
| 397 |
-
summary_f.write(f" Models That Ran Successfully: {models_ran_successfully}/{total_submitted_models}\n")
|
| 398 |
-
summary_f.write(f" Ground-Truth Models Found: {gt_models_found}/{models_ran_successfully}\n")
|
| 399 |
-
summary_f.write(f" Consistency Checks Passed: {consistency_checks_passed}/{gt_models_found}\n")
|
| 400 |
-
summary_f.write(f" Objective Value Checks Passed: {objective_checks_passed}/{gt_models_found}\n")
|
| 401 |
-
summary_f.write("=" * 30 + "\n")
|
| 402 |
-
summary_f.write("Final Evaluation Summary:\n")
|
| 403 |
-
summary_f.write(f" Submission coverage perc: {float(total_submitted_models) / len(ground_truth_models) * 100:.2f}%\n")
|
| 404 |
-
summary_f.write(f" Execution perc: {models_ran_successfully / len(ground_truth_models) * 100:.2f}%\n")
|
| 405 |
-
summary_f.write(f" Error perc: {(total_submitted_models - models_ran_successfully) / len(ground_truth_models) * 100:.2f}%\n")
|
| 406 |
-
summary_f.write(f" Consistency perc: {consistency_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
| 407 |
-
summary_f.write(f" Objective perc: {objective_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
| 408 |
-
summary_f.write(f" Final Solution Accuracy perc: {all_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
| 409 |
-
summary_f.write("-" * 30 + "\n")
|
| 410 |
-
|
| 411 |
-
# 4. Upload the entire local_result_dir_for_upload to HF Dataset
|
| 412 |
-
# This directory contains summary.txt and could contain other result files.
|
| 413 |
result_path_on_hub = f"{results_base_path_in_dataset}/{submission_name_for_files}"
|
| 414 |
print(f" Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...",
|
| 415 |
flush=True)
|
|
@@ -428,19 +122,3 @@ def main_eval(
|
|
| 428 |
|
| 429 |
elapsed_time = time.time() - start_time
|
| 430 |
print(f"eval.py: Evaluation finished in {elapsed_time:.2f} seconds.", flush=True)
|
| 431 |
-
# return 0
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
# if __name__ == "__main__":
|
| 435 |
-
# if len(sys.argv) < 4:
|
| 436 |
-
# print(
|
| 437 |
-
# "Usage: python eval.py <user_dataset_repo_id> <submission_path_in_dataset> <results_base_path_in_dataset>")
|
| 438 |
-
# print("Example: python eval.py your-username/my-storage submissions/run123 results")
|
| 439 |
-
# sys.exit(1)
|
| 440 |
-
#
|
| 441 |
-
# arg_user_dataset_repo_id = sys.argv[1]
|
| 442 |
-
# arg_submission_path_in_dataset = sys.argv[2]
|
| 443 |
-
# arg_results_base_path_in_dataset = sys.argv[3]
|
| 444 |
-
#
|
| 445 |
-
# exit_code = main(arg_user_dataset_repo_id, arg_submission_path_in_dataset, arg_results_base_path_in_dataset)
|
| 446 |
-
# sys.exit(exit_code)
|
|
|
|
|
|
|
| 1 |
import time
|
| 2 |
import json
|
| 3 |
import tempfile
|
| 4 |
|
|
|
|
|
|
|
| 5 |
from huggingface_hub import HfApi, hf_hub_download
|
| 6 |
import os
|
|
|
|
|
|
|
| 7 |
import threading
|
| 8 |
from pathlib import Path
|
| 9 |
|
| 10 |
+
from src.config import DATASET_REPO_ID, DS_RESULTS_PATH
|
| 11 |
+
from src.user_eval import evaluate_submission
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
def run_evaluation(submission_path):
|
|
|
|
| 27 |
return True
|
| 28 |
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# --- Main Evaluation Logic ---
|
| 31 |
def main_eval(
|
| 32 |
user_dataset_repo_id: str,
|
|
|
|
| 56 |
# Path for the summary file within the local temporary result directory
|
| 57 |
summary_file_path = local_result_dir_for_upload / "summary.txt"
|
| 58 |
|
| 59 |
+
# Download submitted files from HF Dataset
|
| 60 |
print(f" Downloading submission files from '{submission_path_in_dataset}' to '{local_submission_dir}'...",
|
| 61 |
flush=True)
|
| 62 |
try:
|
|
|
|
| 81 |
print(f" CRITICAL ERROR - Failed to download submission files: {e_download}", flush=True)
|
| 82 |
return 1
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
# load generated models from jsonl to memory
|
| 85 |
print(f" Loading generated models from '{local_submission_dir}'...", flush=True)
|
| 86 |
submitted_models = []
|
| 87 |
+
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "submission.jsonl"), "r",
|
| 88 |
+
encoding="utf-8") as f:
|
| 89 |
for line in f:
|
| 90 |
try:
|
| 91 |
json_obj = json.loads(line)
|
|
|
|
| 94 |
print(f" ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True)
|
| 95 |
|
| 96 |
# load metadata file
|
| 97 |
+
with open(os.path.join(local_submission_dir, submission_path_in_dataset, "metadata.json"), "r",
|
| 98 |
+
encoding="utf-8") as f:
|
| 99 |
metadata = json.load(f)
|
| 100 |
|
| 101 |
print(f" Loaded {len(submitted_models)} generated models.", flush=True)
|
| 102 |
|
| 103 |
+
# Writes stats to the summary file
|
| 104 |
+
evaluate_submission(submitted_models, summary_file_path, metadata["modelling_framework"], top_level_temp_dir)
|
| 105 |
|
| 106 |
+
# Upload the entire local_result_dir_for_upload to HF Dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
result_path_on_hub = f"{results_base_path_in_dataset}/{submission_name_for_files}"
|
| 108 |
print(f" Uploading results from '{local_result_dir_for_upload}' to '{result_path_on_hub}' on dataset...",
|
| 109 |
flush=True)
|
|
|
|
| 122 |
|
| 123 |
elapsed_time = time.time() - start_time
|
| 124 |
print(f"eval.py: Evaluation finished in {elapsed_time:.2f} seconds.", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/hf_utils.py
CHANGED
|
@@ -73,10 +73,6 @@ def load_leaderboard_data():
|
|
| 73 |
for line in f:
|
| 74 |
if 'Error perc' in line:
|
| 75 |
entry[LDB_COLS[3]] = float(line.split(":")[1].strip().replace("%", ""))
|
| 76 |
-
# if 'Execution perc' in line:
|
| 77 |
-
# entry[LDB_COLS[1]] = float(line.split(":")[1].strip().replace("%", ""))
|
| 78 |
-
# if 'Consistency perc' in line:
|
| 79 |
-
# entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
|
| 80 |
if 'Final Solution Accuracy' in line:
|
| 81 |
entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
|
| 82 |
if 'Submission coverage perc' in line:
|
|
|
|
| 73 |
for line in f:
|
| 74 |
if 'Error perc' in line:
|
| 75 |
entry[LDB_COLS[3]] = float(line.split(":")[1].strip().replace("%", ""))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
if 'Final Solution Accuracy' in line:
|
| 77 |
entry[LDB_COLS[2]] = float(line.split(":")[1].strip().replace("%", ""))
|
| 78 |
if 'Submission coverage perc' in line:
|
src/ui.py
CHANGED
|
@@ -97,14 +97,14 @@ def create_ui():
|
|
| 97 |
"## Important Notes\n"
|
| 98 |
"1. **Submission Name**: The submission name must be different from any existing submission names.\n"
|
| 99 |
"2. **File Format**: Ensure that the uploaded files are in the correct format. The submission file must be a `.jsonl` file, and the report must be a `pdf` file.\n"
|
| 100 |
-
"3. **Evaluation Script**: It is highly recommended to use the evaluation script provided [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/user_eval.py) to check your results before submission. You can run the script as follows:\n"
|
| 101 |
" ```bash\n"
|
| 102 |
-
" python user_eval.py --submission_file path/to/my/submission.jsonl\n"
|
| 103 |
" ```\n"
|
| 104 |
" This will evaluate your submission locally and print the results to the console.\n"
|
| 105 |
"4. **Modelling Frameworks**: Currently, the supported modelling frameworks are MiniZinc, CPMpy and OR-Tools. More frameworks can be added (feel free to submit pull requests).\n"
|
| 106 |
"\n\n"
|
| 107 |
-
"### If you have any questions or issues,
|
| 108 |
"---\n"
|
| 109 |
)
|
| 110 |
|
|
|
|
| 97 |
"## Important Notes\n"
|
| 98 |
"1. **Submission Name**: The submission name must be different from any existing submission names.\n"
|
| 99 |
"2. **File Format**: Ensure that the uploaded files are in the correct format. The submission file must be a `.jsonl` file, and the report must be a `pdf` file.\n"
|
| 100 |
+
"3. **Evaluation Script**: It is highly recommended to use the evaluation script provided [here](https://huggingface.co/spaces/kostis-init/CP-Bench-competition/blob/main/src/user_eval.py) to check your results before submission. You can run the script as follows:\n"
|
| 101 |
" ```bash\n"
|
| 102 |
+
" python user_eval.py --submission_file path/to/my/submission.jsonl --modelling_framework CPMpy\n"
|
| 103 |
" ```\n"
|
| 104 |
" This will evaluate your submission locally and print the results to the console.\n"
|
| 105 |
"4. **Modelling Frameworks**: Currently, the supported modelling frameworks are MiniZinc, CPMpy and OR-Tools. More frameworks can be added (feel free to submit pull requests).\n"
|
| 106 |
"\n\n"
|
| 107 |
+
"### If you have any questions or issues, feel free to reach out to us.\n"
|
| 108 |
"---\n"
|
| 109 |
)
|
| 110 |
|
user_eval.py → src/user_eval.py
RENAMED
|
@@ -1,17 +1,85 @@
|
|
|
|
|
| 1 |
import json
|
| 2 |
import os
|
| 3 |
import subprocess
|
| 4 |
import sys
|
| 5 |
import tempfile
|
| 6 |
-
|
| 7 |
import click
|
| 8 |
from pathlib import Path
|
| 9 |
-
|
|
|
|
| 10 |
from datasets import load_dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
GT_DATASET_NAME = "kostis-init/CP-Bench"
|
| 13 |
GT_PROBLEM_NAME_COLUMN = "id"
|
| 14 |
GT_MODEL_CODE_COLUMN = "model"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def exec_code(code: str, timeout=10, modelling_language='cpmpy'):
|
|
@@ -25,27 +93,28 @@ def exec_code(code: str, timeout=10, modelling_language='cpmpy'):
|
|
| 25 |
"""
|
| 26 |
|
| 27 |
# create a temp directory to store the temporary file
|
| 28 |
-
temp_dir_name = "
|
| 29 |
temp_dir = os.path.join(os.getcwd(), temp_dir_name)
|
| 30 |
os.makedirs(temp_dir, exist_ok=True)
|
| 31 |
|
| 32 |
# write the code to a temporary file
|
| 33 |
-
suffix = '.__hidden_py__' if modelling_language ==
|
| 34 |
-
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=suffix, dir=temp_dir,
|
|
|
|
| 35 |
temp_instance_path = temp_file.name
|
| 36 |
temp_file.write(code)
|
| 37 |
|
| 38 |
try:
|
| 39 |
# execute the code
|
| 40 |
-
if modelling_language ==
|
| 41 |
command = [sys.executable, temp_instance_path]
|
| 42 |
result = subprocess.run(command, capture_output=True, text=True, timeout=timeout, encoding='utf-8')
|
| 43 |
|
| 44 |
successfully_executed = (result.returncode == 0)
|
| 45 |
output = result.stdout if successfully_executed else result.stderr
|
| 46 |
timeout_occurred = False
|
| 47 |
-
|
| 48 |
-
|
| 49 |
else:
|
| 50 |
raise ValueError(f"MODELLING_LANGUAGE not supported: {modelling_language}")
|
| 51 |
|
|
@@ -153,10 +222,125 @@ else:
|
|
| 153 |
"""
|
| 154 |
return modified_script
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
@click.command()
|
| 157 |
@click.option('--submission_file', required=True, type=click.Path(exists=True, path_type=Path),
|
| 158 |
help='Path to the submission JSONL file')
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
| 160 |
"""Evaluate a submission file for the CP-Bench competition."""
|
| 161 |
is_valid, message = validate_submission_file(submission_file)
|
| 162 |
if not is_valid:
|
|
@@ -177,116 +361,30 @@ def main(submission_file: Path):
|
|
| 177 |
print(f" ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True)
|
| 178 |
print(f" Loaded {len(submitted_models)} generated models.", flush=True)
|
| 179 |
|
|
|
|
|
|
|
| 180 |
|
| 181 |
-
# eval
|
| 182 |
-
total_submitted_models = 0
|
| 183 |
-
models_ran_successfully = 0
|
| 184 |
-
consistency_checks_passed = 0
|
| 185 |
-
objective_checks_passed = 0
|
| 186 |
-
all_checks_passed = 0
|
| 187 |
-
gt_models_found = 0
|
| 188 |
-
|
| 189 |
-
# Load ground-truth models
|
| 190 |
-
print(f" Loading ground-truth dataset '{GT_DATASET_NAME}'...", flush=True)
|
| 191 |
try:
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
}
|
| 198 |
-
if not ground_truth_models: raise ValueError("No models in GT dataset.")
|
| 199 |
-
print(f" Loaded {len(ground_truth_models)} ground-truth models.", flush=True)
|
| 200 |
-
except Exception as e_gt:
|
| 201 |
-
print(f" CRITICAL ERROR - Failed to load ground-truth dataset: {e_gt}", flush=True)
|
| 202 |
return
|
| 203 |
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
total_submitted_models += 1
|
| 209 |
-
problem_name = submitted_model[GT_PROBLEM_NAME_COLUMN]
|
| 210 |
-
print(f"\n Processing model: {problem_name}", flush=True)
|
| 211 |
-
print(f"\n--- Model: {problem_name} ---\n")
|
| 212 |
-
|
| 213 |
-
print(" 1. Running submitted model...\n")
|
| 214 |
-
|
| 215 |
-
succ_exec, output, timeout_occurred = exec_code(curr_model, timeout=60)
|
| 216 |
-
|
| 217 |
-
if timeout_occurred:
|
| 218 |
-
print(f" - TIMEOUT: Execution time exceeded 60 seconds.\n")
|
| 219 |
-
continue
|
| 220 |
-
if not succ_exec:
|
| 221 |
-
print(f" - FAILED: Execution failed with error: {output}\n")
|
| 222 |
-
continue
|
| 223 |
-
if output is None or not output.strip():
|
| 224 |
-
print(f" - FAILED: No output from execution.\n")
|
| 225 |
-
continue
|
| 226 |
-
# Attempt to extract JSON from stdout
|
| 227 |
-
generated_solution = extract_json_from_code_output(output)
|
| 228 |
-
if generated_solution is None:
|
| 229 |
-
print(f" - FAILED: Could not extract JSON solution from output: {output}\n")
|
| 230 |
-
continue
|
| 231 |
-
|
| 232 |
-
models_ran_successfully += 1
|
| 233 |
-
print(f" - SUCCESS: Got solution: {generated_solution}\n")
|
| 234 |
-
|
| 235 |
-
print(f" 2. Checking against ground-truth for '{problem_name}'...\n")
|
| 236 |
-
if problem_name not in ground_truth_models:
|
| 237 |
-
print(f" - FAILED: Ground-truth model for '{problem_name}' not found in dataset.\n")
|
| 238 |
-
continue
|
| 239 |
-
gt_models_found += 1
|
| 240 |
-
ground_truth_script_content = ground_truth_models[problem_name]
|
| 241 |
-
print(" - SUCCESS: Found ground-truth model.\n")
|
| 242 |
-
|
| 243 |
-
print(" 3. Performing self-consistency check on ground-truth model...\n")
|
| 244 |
-
modified_gt_script = get_modified_script(ground_truth_script_content, generated_solution)
|
| 245 |
|
|
|
|
|
|
|
| 246 |
try:
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
capture_output=True, text=True, timeout=60, encoding='utf-8',
|
| 254 |
-
)
|
| 255 |
-
os.unlink(tmp_file_path_str)
|
| 256 |
-
|
| 257 |
-
gt_stdout = gt_check_result.stdout
|
| 258 |
-
if "SUCCESS: Model is consistent" in gt_stdout:
|
| 259 |
-
print(" - CONSISTENCY: PASSED\n")
|
| 260 |
-
consistency_checks_passed += 1
|
| 261 |
-
else:
|
| 262 |
-
print(
|
| 263 |
-
" - CONSISTENCY: FAILED (Details in logs or stdout)\n")
|
| 264 |
-
|
| 265 |
-
if "SUCCESS: No objective defined" in gt_stdout or "SUCCESS: Objective value is consistent" in gt_stdout:
|
| 266 |
-
print(" - OBJECTIVE: PASSED\n")
|
| 267 |
-
objective_checks_passed += 1
|
| 268 |
-
else:
|
| 269 |
-
print(" - OBJECTIVE: FAILED (Details in logs or stdout)\n")
|
| 270 |
-
|
| 271 |
-
if "SUCCESS: Model is consistent" in gt_stdout and (
|
| 272 |
-
"SUCCESS: No objective defined" in gt_stdout or "SUCCESS: Objective value is consistent" in gt_stdout):
|
| 273 |
-
print(" - SELF-CONSISTENCY CHECK: PASSED fully\n")
|
| 274 |
-
all_checks_passed += 1
|
| 275 |
-
|
| 276 |
-
except Exception as e_gt_run:
|
| 277 |
-
print(f" - SELF-CONSISTENCY CHECK: FAILED (Error: {e_gt_run})\n")
|
| 278 |
-
|
| 279 |
-
# Final statistics (write to summary_f)
|
| 280 |
-
print("\n" + "=" * 30 + "\n")
|
| 281 |
-
print("Overall Evaluation:\n")
|
| 282 |
-
print(f" Total Submitted Models Parsed: {total_submitted_models}\n")
|
| 283 |
-
print(f" Execution perc: {models_ran_successfully / len(ground_truth_models) * 100:.2f}%\n")
|
| 284 |
-
print(f" Consistency perc: {consistency_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
| 285 |
-
print(f" Objective perc: {objective_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
| 286 |
-
print(f" Final Solution Accuracy perc: {all_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
| 287 |
-
print("-" * 30 + "\n")
|
| 288 |
-
|
| 289 |
-
click.echo("Evaluation complete!")
|
| 290 |
|
| 291 |
|
| 292 |
if __name__ == "__main__":
|
|
|
|
| 1 |
+
import time
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
import subprocess
|
| 5 |
import sys
|
| 6 |
import tempfile
|
|
|
|
| 7 |
import click
|
| 8 |
from pathlib import Path
|
| 9 |
+
import minizinc
|
| 10 |
+
import datetime
|
| 11 |
from datasets import load_dataset
|
| 12 |
+
from tqdm import tqdm
|
| 13 |
+
|
| 14 |
+
CPMPY_FRAMEWORK = "CPMpy"
|
| 15 |
+
MINIZINC_FRAMEWORK = "MiniZinc"
|
| 16 |
+
ORTOOLS_FRAMEWORK = "OR-Tools"
|
| 17 |
|
| 18 |
GT_DATASET_NAME = "kostis-init/CP-Bench"
|
| 19 |
GT_PROBLEM_NAME_COLUMN = "id"
|
| 20 |
GT_MODEL_CODE_COLUMN = "model"
|
| 21 |
+
SCRIPT_EXECUTION_TIMEOUT = 60 # seconds
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def exec_code_minizinc(code: str, timeout_sec):
|
| 25 |
+
"""
|
| 26 |
+
Executes a MiniZinc model string using the minizinc-python library.
|
| 27 |
+
|
| 28 |
+
:param code: The MiniZinc model code as a string.
|
| 29 |
+
:param timeout_sec: The maximum time to wait for the solver in seconds.
|
| 30 |
+
:return: A tuple of (success, output, timeout_occured)
|
| 31 |
+
"""
|
| 32 |
+
successfully_executed = False
|
| 33 |
+
output = ""
|
| 34 |
+
timeout_occurred = False
|
| 35 |
+
timeout_duration = datetime.timedelta(seconds=timeout_sec)
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
# 1. Create a MiniZinc model instance
|
| 39 |
+
model = minizinc.Model()
|
| 40 |
+
model.add_string(code)
|
| 41 |
+
|
| 42 |
+
# 2. Find a default solver configured with MiniZinc
|
| 43 |
+
# You can be more specific, e.g., solver = minizinc.Solver.lookup("gecode")
|
| 44 |
+
# If the default solver isn't found or suitable, this will raise an error.
|
| 45 |
+
gecode = minizinc.Solver.lookup("gecode")
|
| 46 |
+
if gecode is None:
|
| 47 |
+
raise RuntimeError("No suitable solver found. Please install a MiniZinc solver.")
|
| 48 |
+
|
| 49 |
+
# 3. Create an Instance to solve
|
| 50 |
+
instance = minizinc.Instance(gecode, model)
|
| 51 |
+
|
| 52 |
+
# 4. Solve the instance with the specified timeout
|
| 53 |
+
# The solve() method handles the timeout internally.
|
| 54 |
+
result = instance.solve(timeout=timeout_duration)
|
| 55 |
+
|
| 56 |
+
# 5. Process the result
|
| 57 |
+
if result.status in {minizinc.Status.SATISFIED, minizinc.Status.OPTIMAL_SOLUTION}:
|
| 58 |
+
successfully_executed = True
|
| 59 |
+
output = str(result.solution) if result.solution is not None else ""
|
| 60 |
+
timeout_occurred = False
|
| 61 |
+
elif result.status == minizinc.Status.UNKNOWN:
|
| 62 |
+
successfully_executed = False
|
| 63 |
+
output = f"Timeout Error: Solver stopped after {timeout_sec} seconds (Status: UNKNOWN)."
|
| 64 |
+
timeout_occurred = True
|
| 65 |
+
else:
|
| 66 |
+
# Handle other non-success statuses (UNSAT, ERROR, etc.)
|
| 67 |
+
successfully_executed = False
|
| 68 |
+
output = f"Solving failed. Status: {result.status}"
|
| 69 |
+
timeout_occurred = False
|
| 70 |
+
|
| 71 |
+
except minizinc.MiniZincError as e:
|
| 72 |
+
# Catch MiniZinc specific errors (e.g., syntax errors, solver not found)
|
| 73 |
+
successfully_executed = False
|
| 74 |
+
output = f"MiniZinc Error: {e}"
|
| 75 |
+
timeout_occurred = False
|
| 76 |
+
except Exception as e:
|
| 77 |
+
# Catch other unexpected errors
|
| 78 |
+
successfully_executed = False
|
| 79 |
+
output = f"Unexpected Error during MiniZinc execution: {e}"
|
| 80 |
+
timeout_occurred = False
|
| 81 |
+
|
| 82 |
+
return successfully_executed, output, timeout_occurred
|
| 83 |
|
| 84 |
|
| 85 |
def exec_code(code: str, timeout=10, modelling_language='cpmpy'):
|
|
|
|
| 93 |
"""
|
| 94 |
|
| 95 |
# create a temp directory to store the temporary file
|
| 96 |
+
temp_dir_name = "temp_dir_for_exec_code"
|
| 97 |
temp_dir = os.path.join(os.getcwd(), temp_dir_name)
|
| 98 |
os.makedirs(temp_dir, exist_ok=True)
|
| 99 |
|
| 100 |
# write the code to a temporary file
|
| 101 |
+
suffix = '.__hidden_py__' if modelling_language == CPMPY_FRAMEWORK or modelling_language == ORTOOLS_FRAMEWORK else '.mzn'
|
| 102 |
+
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=suffix, dir=temp_dir,
|
| 103 |
+
encoding='utf-8') as temp_file:
|
| 104 |
temp_instance_path = temp_file.name
|
| 105 |
temp_file.write(code)
|
| 106 |
|
| 107 |
try:
|
| 108 |
# execute the code
|
| 109 |
+
if modelling_language == CPMPY_FRAMEWORK or modelling_language == ORTOOLS_FRAMEWORK:
|
| 110 |
command = [sys.executable, temp_instance_path]
|
| 111 |
result = subprocess.run(command, capture_output=True, text=True, timeout=timeout, encoding='utf-8')
|
| 112 |
|
| 113 |
successfully_executed = (result.returncode == 0)
|
| 114 |
output = result.stdout if successfully_executed else result.stderr
|
| 115 |
timeout_occurred = False
|
| 116 |
+
elif modelling_language == MINIZINC_FRAMEWORK:
|
| 117 |
+
successfully_executed, output, timeout_occurred = exec_code_minizinc(code, timeout)
|
| 118 |
else:
|
| 119 |
raise ValueError(f"MODELLING_LANGUAGE not supported: {modelling_language}")
|
| 120 |
|
|
|
|
| 222 |
"""
|
| 223 |
return modified_script
|
| 224 |
|
| 225 |
+
|
| 226 |
+
def evaluate_submission(submitted_models, summary_file_path, modelling_framw, top_lvl_temp_dir):
|
| 227 |
+
# Load ground-truth dataset
|
| 228 |
+
print(f" Loading ground-truth dataset '{GT_DATASET_NAME}'...", flush=True)
|
| 229 |
+
try:
|
| 230 |
+
gt_dataset = load_dataset(GT_DATASET_NAME, split="train", trust_remote_code=True)
|
| 231 |
+
ground_truth_models = {
|
| 232 |
+
item[GT_PROBLEM_NAME_COLUMN]: item[GT_MODEL_CODE_COLUMN]
|
| 233 |
+
for item in gt_dataset if
|
| 234 |
+
GT_PROBLEM_NAME_COLUMN in item and GT_MODEL_CODE_COLUMN in item and item[GT_MODEL_CODE_COLUMN]
|
| 235 |
+
}
|
| 236 |
+
if not ground_truth_models: raise ValueError("No models in GT dataset.")
|
| 237 |
+
print(f" Loaded {len(ground_truth_models)} ground-truth models.", flush=True)
|
| 238 |
+
except Exception as e_gt:
|
| 239 |
+
print(f" CRITICAL ERROR - Failed to load ground-truth dataset: {e_gt}", flush=True)
|
| 240 |
+
with open(summary_file_path, "w") as f:
|
| 241 |
+
f.write(f"CRITICAL ERROR: Failed to load ground-truth dataset '{GT_DATASET_NAME}'.\nError: {e_gt}\n")
|
| 242 |
+
return 1
|
| 243 |
+
|
| 244 |
+
# Statistics
|
| 245 |
+
total_submitted_models = 0
|
| 246 |
+
models_ran_successfully = 0
|
| 247 |
+
consistency_checks_passed = 0
|
| 248 |
+
all_checks_passed = 0
|
| 249 |
+
|
| 250 |
+
with (open(summary_file_path, "w", encoding="utf-8") as summary_f):
|
| 251 |
+
summary_f.write(f"Ground-Truth Dataset: {GT_DATASET_NAME}\n")
|
| 252 |
+
summary_f.write("-" * 30 + "\n")
|
| 253 |
+
|
| 254 |
+
# Iterate through downloaded submitted models
|
| 255 |
+
for submitted_model in tqdm(submitted_models):
|
| 256 |
+
curr_model = submitted_model[GT_MODEL_CODE_COLUMN]
|
| 257 |
+
|
| 258 |
+
total_submitted_models += 1
|
| 259 |
+
problem_name = submitted_model[GT_PROBLEM_NAME_COLUMN]
|
| 260 |
+
print(f"\n Processing model: {problem_name}", flush=True)
|
| 261 |
+
summary_f.write(f"\n--- Model: {problem_name} ---\n")
|
| 262 |
+
|
| 263 |
+
summary_f.write(" 1. Running submitted model...\n")
|
| 264 |
+
|
| 265 |
+
succ_exec, output, timeout_occurred = exec_code(curr_model, timeout=SCRIPT_EXECUTION_TIMEOUT,
|
| 266 |
+
modelling_language=modelling_framw)
|
| 267 |
+
|
| 268 |
+
if succ_exec:
|
| 269 |
+
models_ran_successfully += 1
|
| 270 |
+
summary_f.write(" - SUCCESS: Model executed successfully.\n")
|
| 271 |
+
|
| 272 |
+
if timeout_occurred:
|
| 273 |
+
summary_f.write(f" - TIMEOUT: Execution time exceeded {SCRIPT_EXECUTION_TIMEOUT} seconds.\n")
|
| 274 |
+
continue
|
| 275 |
+
if not succ_exec:
|
| 276 |
+
summary_f.write(f" - FAILED: Execution failed with error: {output}\n")
|
| 277 |
+
continue
|
| 278 |
+
if output is None or not output.strip():
|
| 279 |
+
summary_f.write(f" - FAILED: No output from execution.\n")
|
| 280 |
+
continue
|
| 281 |
+
# Attempt to extract JSON from stdout
|
| 282 |
+
generated_solution = extract_json_from_code_output(output)
|
| 283 |
+
if generated_solution is None:
|
| 284 |
+
summary_f.write(f" - FAILED: Could not extract JSON solution from output: {output}\n")
|
| 285 |
+
continue
|
| 286 |
+
summary_f.write(f" - SUCCESS: Got solution: {generated_solution}\n")
|
| 287 |
+
|
| 288 |
+
summary_f.write(f" 2. Checking against ground-truth for '{problem_name}'...\n")
|
| 289 |
+
if problem_name not in ground_truth_models:
|
| 290 |
+
summary_f.write(f" - FAILED: Ground-truth model for '{problem_name}' not found in dataset.\n")
|
| 291 |
+
continue
|
| 292 |
+
ground_truth_script_content = ground_truth_models[problem_name]
|
| 293 |
+
summary_f.write(" - SUCCESS: Found ground-truth model.\n")
|
| 294 |
+
|
| 295 |
+
summary_f.write(" 3. Performing self-consistency check on ground-truth model...\n")
|
| 296 |
+
modified_gt_script = get_modified_script(ground_truth_script_content, generated_solution)
|
| 297 |
+
|
| 298 |
+
try:
|
| 299 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8',
|
| 300 |
+
dir=top_lvl_temp_dir) as tmp_file:
|
| 301 |
+
tmp_file.write(modified_gt_script)
|
| 302 |
+
tmp_file_path_str = tmp_file.name
|
| 303 |
+
|
| 304 |
+
gt_check_result = subprocess.run(
|
| 305 |
+
[sys.executable, tmp_file_path_str],
|
| 306 |
+
capture_output=True, text=True, timeout=SCRIPT_EXECUTION_TIMEOUT, encoding='utf-8',
|
| 307 |
+
)
|
| 308 |
+
os.unlink(tmp_file_path_str)
|
| 309 |
+
|
| 310 |
+
gt_stdout = gt_check_result.stdout
|
| 311 |
+
if "SUCCESS: Model is consistent" in gt_stdout:
|
| 312 |
+
summary_f.write(" - CONSISTENCY: PASSED\n")
|
| 313 |
+
consistency_checks_passed += 1
|
| 314 |
+
else:
|
| 315 |
+
summary_f.write(" - CONSISTENCY: FAILED (Details in logs or stdout)\n")
|
| 316 |
+
|
| 317 |
+
if "SUCCESS: Model is consistent" in gt_stdout and (
|
| 318 |
+
"SUCCESS: No objective defined" in gt_stdout or "SUCCESS: Objective value is consistent" in gt_stdout):
|
| 319 |
+
summary_f.write(" - SELF-CONSISTENCY CHECK: PASSED fully\n")
|
| 320 |
+
all_checks_passed += 1
|
| 321 |
+
|
| 322 |
+
except Exception as e_gt_run:
|
| 323 |
+
summary_f.write(f" - SELF-CONSISTENCY CHECK: FAILED (Error: {e_gt_run})\n")
|
| 324 |
+
|
| 325 |
+
# Final statistics (write to summary_f)
|
| 326 |
+
summary_f.write("\n" + "=" * 30 + "\n")
|
| 327 |
+
summary_f.write("Overall Evaluation Statistics:\n")
|
| 328 |
+
summary_f.write(f" Total Submitted Models Parsed: {total_submitted_models}\n")
|
| 329 |
+
summary_f.write(f" Models That Ran Successfully (out of the submitted models): {models_ran_successfully}/{total_submitted_models}\n")
|
| 330 |
+
summary_f.write(f" Submission coverage perc: {float(total_submitted_models) / len(ground_truth_models) * 100:.2f}%\n")
|
| 331 |
+
summary_f.write(f" Error perc: {(total_submitted_models - models_ran_successfully) / len(ground_truth_models) * 100:.2f}%\n")
|
| 332 |
+
summary_f.write(f" Consistency perc: {consistency_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
| 333 |
+
summary_f.write(f" Final Solution Accuracy perc: {all_checks_passed / len(ground_truth_models) * 100:.2f}%\n")
|
| 334 |
+
summary_f.write("-" * 30 + "\n")
|
| 335 |
+
|
| 336 |
+
|
| 337 |
@click.command()
|
| 338 |
@click.option('--submission_file', required=True, type=click.Path(exists=True, path_type=Path),
|
| 339 |
help='Path to the submission JSONL file')
|
| 340 |
+
@click.option('--modelling_framework', required=True,
|
| 341 |
+
type=click.Choice([CPMPY_FRAMEWORK, ORTOOLS_FRAMEWORK, MINIZINC_FRAMEWORK]),
|
| 342 |
+
help='Modelling framework used in the submission')
|
| 343 |
+
def main(submission_file: Path, modelling_framework: str):
|
| 344 |
"""Evaluate a submission file for the CP-Bench competition."""
|
| 345 |
is_valid, message = validate_submission_file(submission_file)
|
| 346 |
if not is_valid:
|
|
|
|
| 361 |
print(f" ERROR: Failed to parse JSON object from line: {line}. Error: {e}", flush=True)
|
| 362 |
print(f" Loaded {len(submitted_models)} generated models.", flush=True)
|
| 363 |
|
| 364 |
+
summary_file_path = Path("summary.txt")
|
| 365 |
+
top_level_temp_dir = tempfile.mkdtemp(prefix="cp_bench_eval_")
|
| 366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
try:
|
| 368 |
+
start_time = time.time()
|
| 369 |
+
evaluate_submission(submitted_models, summary_file_path, modelling_framework, top_level_temp_dir)
|
| 370 |
+
elapsed_time = time.time() - start_time
|
| 371 |
+
except Exception as e:
|
| 372 |
+
click.echo(f"Error during evaluation: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
return
|
| 374 |
|
| 375 |
+
click.echo("Evaluation complete!")
|
| 376 |
+
click.echo(f"Results written to {summary_file_path}")
|
| 377 |
+
click.echo(f"Total evaluation time: {elapsed_time:.2f} seconds")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
|
| 379 |
+
# Clean up temporary directory
|
| 380 |
+
if os.path.exists(top_level_temp_dir):
|
| 381 |
try:
|
| 382 |
+
os.rmdir(top_level_temp_dir)
|
| 383 |
+
except OSError as e:
|
| 384 |
+
click.echo(f"Warning: Could not remove temporary directory {top_level_temp_dir}: {e}")
|
| 385 |
+
else:
|
| 386 |
+
click.echo(f"Temporary directory {top_level_temp_dir} does not exist, nothing to clean up.")
|
| 387 |
+
click.echo("Thank you for your submission!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
|
| 390 |
if __name__ == "__main__":
|
template_submission.jsonl
CHANGED
|
@@ -81,21 +81,21 @@
|
|
| 81 |
{"id": "cpmpy_examples__who_killed_agatha", "model": "from cpmpy import *\nimport json\n\n# Data\nnames = [\"Agatha herself\", \"the butler\", \"Charles\"]\n\n# Variables\nkiller = int_var(0, 2, name=\"killer\")\n\n# Constraints\n# Agatha hates everybody except the butler\nagatha_hates = [1, 2] # indices of people Agatha hates\n# The butler hates everyone not richer than Aunt Agatha\nbutler_hates = [0, 2] # indices of people the butler hates\n# The butler hates everyone whom Agatha hates\n# This is already included in butler_hates\n# Charles hates noone that Agatha hates\ncharles_hates = [] # indices of people Charles hates\n\n# Model\nmodel = Model(\n # A killer always hates, and is no richer than his victim\n [killer in agatha_hates, \n killer != 0], # Agatha cannot kill herself\n # Additional constraints based on the problem description\n [killer in butler_hates if killer == 1 else True], # butler hates everyone not richer than Agatha\n [killer not in agatha_hates if killer == 2 else True] # Charles hates noone that Agatha hates\n)\n\n# Solve\nif model.solve():\n # Print solution as JSON\n solution = {\"killer\": names[killer.value()]}\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 82 |
{"id": "cpmpy_examples__wolf_goat_cabbage", "model": "from cpmpy import *\nimport json\n\n# Define the number of stages\nstage = 8\n\n# Define the decision variables\nwolf_pos = boolvar(shape=stage+1, name=\"wolf_pos\")\ngoat_pos = boolvar(shape=stage+1, name=\"goat_pos\")\ncabbage_pos = boolvar(shape=stage+1, name=\"cabbage_pos\")\nboat_pos = boolvar(shape=stage+1, name=\"boat_pos\")\n\n# Model\nmodel = Model(\n # Initial positions\n wolf_pos[0] == 0,\n goat_pos[0] == 0,\n cabbage_pos[0] == 0,\n boat_pos[0] == 0,\n\n # Final positions\n wolf_pos[stage] == 1,\n goat_pos[stage] == 1,\n cabbage_pos[stage] == 1,\n boat_pos[stage] == 1,\n\n # Boat movement\n [boat_pos[t] != boat_pos[t+1] for t in range(stage)],\n\n # Farmer is with the boat\n [wolf_pos[t] == boat_pos[t] or goat_pos[t] == boat_pos[t] or cabbage_pos[t] == boat_pos[t] for t in range(1, stage+1)],\n\n # Wolf and goat are not left together\n [(wolf_pos[t] != goat_pos[t]) or boat_pos[t] == wolf_pos[t] or boat_pos[t] == goat_pos[t] for t in range(1, stage+1)],\n\n # Goat and cabbage are not left together\n [(goat_pos[t] != cabbage_pos[t]) or boat_pos[t] == goat_pos[t] or boat_pos[t] == cabbage_pos[t] for t in range(1, stage+1)],\n\n # Wolf and cabbage are not left together (this constraint is not necessary, but it helps the solver)\n [(wolf_pos[t] != cabbage_pos[t]) or boat_pos[t] == wolf_pos[t] or boat_pos[t] == cabbage_pos[t] for t in range(1, stage+1)]\n)\n\n# Solve the model\nif model.solve():\n # Create the solution dictionary\n solution = {\n \"boat_pos\": [int(boat_pos[t].value()) for t in range(stage+1)],\n \"0\": 0,\n \"cabbage_pos\": [int(cabbage_pos[t].value()) for t in range(stage+1)],\n \"goat_pos\": [int(goat_pos[t].value()) for t in range(stage+1)],\n \"wolf_pos\": [int(wolf_pos[t].value()) for t in range(stage+1)],\n \"1\": 1\n }\n\n # Print the solution as a JSON object\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 83 |
{"id": "cpmpy_examples__zebra", "model": "from cpmpy import *\nimport json\n\n# Define the variables\ncolors = [\"yellow\", \"green\", \"red\", \"white\", \"blue\"]\nnations = [\"italy\", \"spain\", \"japan\", \"england\", \"norway\"]\njobs = [\"painter\", \"sculptor\", \"diplomat\", \"pianist\", \"doctor\"]\npets = [\"cat\", \"zebra\", \"bear\", \"snails\", \"horse\"]\ndrinks = [\"milk\", \"water\", \"tea\", \"coffee\", \"juice\"]\n\n# Define the model\nmodel = Model()\n\n# Define the decision variables\ncolor_vars = intvar(1, 5, shape=5, name=\"color\")\nnation_vars = intvar(1, 5, shape=5, name=\"nation\")\njob_vars = intvar(1, 5, shape=5, name=\"job\")\npet_vars = intvar(1, 5, shape=5, name=\"pet\")\ndrink_vars = intvar(1, 5, shape=5, name=\"drink\")\n\n# Constraints\nmodel += [AllDifferent(color_vars)]\nmodel += [AllDifferent(nation_vars)]\nmodel += [AllDifferent(job_vars)]\nmodel += [AllDifferent(pet_vars)]\nmodel += [AllDifferent(drink_vars)]\n\n# The painter owns the horse\nmodel += [job_vars[0] == 1, pet_vars[0] == 5]\n\n# The diplomat drinks coffee\nmodel += [job_vars[2] == 3, drink_vars[2] == 4]\n\n# The one who drinks milk lives in the white house\nmodel += [drink_vars[3] == 1, color_vars[3] == 4]\n\n# The Spaniard is a painter\nmodel += [nation_vars[1] == 2, job_vars[1] == 1]\n\n# The Englishman lives in the red house\nmodel += [nation_vars[3] == 4, color_vars[3] == 3]\n\n# The snails are owned by the sculptor\nmodel += [pet_vars[2] == 4, job_vars[2] == 2]\n\n# The green house is on the left of the red one\nmodel += [color_vars[1] == 2, color_vars[2] == 3]\n\n# The Norwegian lives on the right of the blue house\nmodel += [color_vars[4] == 5, nation_vars[4] == 5]\n\n# The doctor drinks milk\nmodel += [job_vars[4] == 5, drink_vars[4] == 1]\n\n# The diplomat is Japanese\nmodel += [nation_vars[2] == 3, job_vars[2] == 3]\n\n# The Norwegian owns the zebra\nmodel += [pet_vars[4] == 2, nation_vars[4] == 5]\n\n# The green house is next to the white one\nmodel += [color_vars[2] == 2, color_vars[3] == 4]\n\n# The horse is owned by the neighbor of the diplomat\nmodel += [pet_vars[1] == 5, job_vars[2] == 3]\n\n# The Italian either lives in the red, white or green house\nmodel += [Or([nation_vars[1] == 1, nation_vars[2] == 1, nation_vars[3] == 1])]\n\n# Solve the model\nif model.solve():\n # Print the solution as a JSON object\n solution = {\n \"colors\": [colors[i-1] for i in color_vars.value()],\n \"jobs\": [jobs[i-1] for i in job_vars.value()],\n \"nations\": [nations[i-1] for i in nation_vars.value()],\n \"pets\": [pets[i-1] for i in pet_vars.value()],\n \"drinks\": [drinks[i-1] for i in drink_vars.value()]\n }\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 84 |
-
{"id": "
|
| 85 |
-
{"id": "
|
| 86 |
-
{"id": "
|
| 87 |
-
{"id": "
|
| 88 |
-
{"id": "
|
| 89 |
-
{"id": "
|
| 90 |
-
{"id": "
|
| 91 |
-
{"id": "
|
| 92 |
-
{"id": "
|
| 93 |
-
{"id": "
|
| 94 |
-
{"id": "
|
| 95 |
-
{"id": "
|
| 96 |
-
{"id": "
|
| 97 |
-
{"id": "
|
| 98 |
-
{"id": "
|
| 99 |
-
{"id": "
|
| 100 |
-
{"id": "
|
| 101 |
-
{"id": "
|
|
|
|
| 81 |
{"id": "cpmpy_examples__who_killed_agatha", "model": "from cpmpy import *\nimport json\n\n# Data\nnames = [\"Agatha herself\", \"the butler\", \"Charles\"]\n\n# Variables\nkiller = int_var(0, 2, name=\"killer\")\n\n# Constraints\n# Agatha hates everybody except the butler\nagatha_hates = [1, 2] # indices of people Agatha hates\n# The butler hates everyone not richer than Aunt Agatha\nbutler_hates = [0, 2] # indices of people the butler hates\n# The butler hates everyone whom Agatha hates\n# This is already included in butler_hates\n# Charles hates noone that Agatha hates\ncharles_hates = [] # indices of people Charles hates\n\n# Model\nmodel = Model(\n # A killer always hates, and is no richer than his victim\n [killer in agatha_hates, \n killer != 0], # Agatha cannot kill herself\n # Additional constraints based on the problem description\n [killer in butler_hates if killer == 1 else True], # butler hates everyone not richer than Agatha\n [killer not in agatha_hates if killer == 2 else True] # Charles hates noone that Agatha hates\n)\n\n# Solve\nif model.solve():\n # Print solution as JSON\n solution = {\"killer\": names[killer.value()]}\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 82 |
{"id": "cpmpy_examples__wolf_goat_cabbage", "model": "from cpmpy import *\nimport json\n\n# Define the number of stages\nstage = 8\n\n# Define the decision variables\nwolf_pos = boolvar(shape=stage+1, name=\"wolf_pos\")\ngoat_pos = boolvar(shape=stage+1, name=\"goat_pos\")\ncabbage_pos = boolvar(shape=stage+1, name=\"cabbage_pos\")\nboat_pos = boolvar(shape=stage+1, name=\"boat_pos\")\n\n# Model\nmodel = Model(\n # Initial positions\n wolf_pos[0] == 0,\n goat_pos[0] == 0,\n cabbage_pos[0] == 0,\n boat_pos[0] == 0,\n\n # Final positions\n wolf_pos[stage] == 1,\n goat_pos[stage] == 1,\n cabbage_pos[stage] == 1,\n boat_pos[stage] == 1,\n\n # Boat movement\n [boat_pos[t] != boat_pos[t+1] for t in range(stage)],\n\n # Farmer is with the boat\n [wolf_pos[t] == boat_pos[t] or goat_pos[t] == boat_pos[t] or cabbage_pos[t] == boat_pos[t] for t in range(1, stage+1)],\n\n # Wolf and goat are not left together\n [(wolf_pos[t] != goat_pos[t]) or boat_pos[t] == wolf_pos[t] or boat_pos[t] == goat_pos[t] for t in range(1, stage+1)],\n\n # Goat and cabbage are not left together\n [(goat_pos[t] != cabbage_pos[t]) or boat_pos[t] == goat_pos[t] or boat_pos[t] == cabbage_pos[t] for t in range(1, stage+1)],\n\n # Wolf and cabbage are not left together (this constraint is not necessary, but it helps the solver)\n [(wolf_pos[t] != cabbage_pos[t]) or boat_pos[t] == wolf_pos[t] or boat_pos[t] == cabbage_pos[t] for t in range(1, stage+1)]\n)\n\n# Solve the model\nif model.solve():\n # Create the solution dictionary\n solution = {\n \"boat_pos\": [int(boat_pos[t].value()) for t in range(stage+1)],\n \"0\": 0,\n \"cabbage_pos\": [int(cabbage_pos[t].value()) for t in range(stage+1)],\n \"goat_pos\": [int(goat_pos[t].value()) for t in range(stage+1)],\n \"wolf_pos\": [int(wolf_pos[t].value()) for t in range(stage+1)],\n \"1\": 1\n }\n\n # Print the solution as a JSON object\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 83 |
{"id": "cpmpy_examples__zebra", "model": "from cpmpy import *\nimport json\n\n# Define the variables\ncolors = [\"yellow\", \"green\", \"red\", \"white\", \"blue\"]\nnations = [\"italy\", \"spain\", \"japan\", \"england\", \"norway\"]\njobs = [\"painter\", \"sculptor\", \"diplomat\", \"pianist\", \"doctor\"]\npets = [\"cat\", \"zebra\", \"bear\", \"snails\", \"horse\"]\ndrinks = [\"milk\", \"water\", \"tea\", \"coffee\", \"juice\"]\n\n# Define the model\nmodel = Model()\n\n# Define the decision variables\ncolor_vars = intvar(1, 5, shape=5, name=\"color\")\nnation_vars = intvar(1, 5, shape=5, name=\"nation\")\njob_vars = intvar(1, 5, shape=5, name=\"job\")\npet_vars = intvar(1, 5, shape=5, name=\"pet\")\ndrink_vars = intvar(1, 5, shape=5, name=\"drink\")\n\n# Constraints\nmodel += [AllDifferent(color_vars)]\nmodel += [AllDifferent(nation_vars)]\nmodel += [AllDifferent(job_vars)]\nmodel += [AllDifferent(pet_vars)]\nmodel += [AllDifferent(drink_vars)]\n\n# The painter owns the horse\nmodel += [job_vars[0] == 1, pet_vars[0] == 5]\n\n# The diplomat drinks coffee\nmodel += [job_vars[2] == 3, drink_vars[2] == 4]\n\n# The one who drinks milk lives in the white house\nmodel += [drink_vars[3] == 1, color_vars[3] == 4]\n\n# The Spaniard is a painter\nmodel += [nation_vars[1] == 2, job_vars[1] == 1]\n\n# The Englishman lives in the red house\nmodel += [nation_vars[3] == 4, color_vars[3] == 3]\n\n# The snails are owned by the sculptor\nmodel += [pet_vars[2] == 4, job_vars[2] == 2]\n\n# The green house is on the left of the red one\nmodel += [color_vars[1] == 2, color_vars[2] == 3]\n\n# The Norwegian lives on the right of the blue house\nmodel += [color_vars[4] == 5, nation_vars[4] == 5]\n\n# The doctor drinks milk\nmodel += [job_vars[4] == 5, drink_vars[4] == 1]\n\n# The diplomat is Japanese\nmodel += [nation_vars[2] == 3, job_vars[2] == 3]\n\n# The Norwegian owns the zebra\nmodel += [pet_vars[4] == 2, nation_vars[4] == 5]\n\n# The green house is next to the white one\nmodel += [color_vars[2] == 2, color_vars[3] == 4]\n\n# The horse is owned by the neighbor of the diplomat\nmodel += [pet_vars[1] == 5, job_vars[2] == 3]\n\n# The Italian either lives in the red, white or green house\nmodel += [Or([nation_vars[1] == 1, nation_vars[2] == 1, nation_vars[3] == 1])]\n\n# Solve the model\nif model.solve():\n # Print the solution as a JSON object\n solution = {\n \"colors\": [colors[i-1] for i in color_vars.value()],\n \"jobs\": [jobs[i-1] for i in job_vars.value()],\n \"nations\": [nations[i-1] for i in nation_vars.value()],\n \"pets\": [pets[i-1] for i in pet_vars.value()],\n \"drinks\": [drinks[i-1] for i in drink_vars.value()]\n }\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 84 |
+
{"id": "aplai_course__1_bank_card", "model": "from cpmpy import *\nimport json\n\n# Define the model\na, b, c, d = int_var(0, 9), int_var(0, 9), int_var(0, 9), int_var(0, 9)\n\nmodel = Model(\n [a != b, a != c, a != d, b != c, b != d, c != d], # all digits are different\n [10*a + b <= 9*10 + 9], # ab is at most 99\n [10*c + d <= 9*10 + 9], # cd is at most 99\n [10*d + a <= 9*10 + 9], # da is at most 99\n [10*b + c <= 9*10 + 9], # bc is at most 99\n [3*(10*a + b) == (10*c + d)], # cd is 3 times ab\n [2*(10*b + c) == (10*d + a)], # da is 2 times bc\n)\n\n# Solve the model\nif model.solve():\n solution = {\"a\": a.value(), \"b\": b.value(), \"c\": c.value(), \"d\": d.value()}\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 85 |
+
{"id": "aplai_course__5_climbing_stairs", "model": "from cpmpy import *\nimport json\n\n# Define the model\nn = 20 # Total steps\nm1 = 3 # Minimum steps at a time\nm2 = 5 # Maximum steps at a time\n\n# Define the variable\nsteps = intvar(0, m2, shape=n)\n\n# Define the model\nmodel = Model(\n # Objective: sum of steps equals n\n [sum(steps) == n],\n # Each step is between m1 and m2 (inclusive) or 0\n [steps[i] in [0, m1, m2] for i in range(n)],\n # Cumulative sum of steps does not exceed n at any point\n [cumsum(steps[:i+1]) <= n for i in range(n)],\n # If a step is not 0, it must be m1 or m2\n [steps[i] >= m1 for i in range(n)]\n)\n\n# Solve the model\nif model.solve():\n # Print the solution as a JSON object\n print(json.dumps({\"steps\": [steps[i].value() for i in range(n)]}))\nelse:\n print(\"No solution found\")"}
|
| 86 |
+
{"id": "aplai_course__2_color_simple", "model": "from cpmpy import *\nimport json\n\n# Define the adjacency list\ngraph = [\n [3, 1],\n [3, 6],\n [3, 4],\n [6, 4],\n [6, 1],\n [1, 5],\n [1, 4],\n [4, 5],\n [4, 2]\n]\n\n# Define the number of countries\nnum_countries = len(graph)\n\n# Define the number of colors\nnum_colors = num_countries\n\n# Define the color variables\ncolors = intvar(1, num_colors, shape=num_countries, name=\"colors\")\n\n# Define the model\nmodel = Model()\n\n# Add constraints: two neighbouring countries cannot have the same color\nfor i in range(num_countries):\n for j in graph[i]:\n model += colors[i] != colors[j-1]\n\n# Solve the model\nif model.solve():\n # Print the colors assigned to each country as a JSON object\n output = {\"colors\": [int(c) for c in colors.value()]}\n print(json.dumps(output))\nelse:\n print(\"No solution found\")"}
|
| 87 |
+
{"id": "aplai_course__3_exodus", "model": "from cpmpy import *\nimport json\n\n# Define the variables\nages = [3, 5, 7, 8, 10]\nchildren = [\"Bernice\", \"Carl\", \"Debby\", \"Sammy\", \"Ted\"]\ncountries = [\"Ethiopia\", \"Kazakhstan\", \"Lithuania\", \"Morocco\", \"Yemen\"]\nstories = [\"burning bush\", \"captivity\", \"Moses\u2019s youth\", \"Passover\", \"Ten Commandments\"]\n\n# Define the model\nmodel = Model()\n\n# Define the decision variables\nchild = intvar(1, 5, shape=5, name=\"child\")\ncountry = intvar(1, 5, shape=5, name=\"country\")\nstory = intvar(1, 5, shape=5, name=\"story\")\nage = intvar(1, 5, shape=5, name=\"age\")\n\n# Constraints\nmodel += [AllDifferent(child)]\nmodel += [AllDifferent(country)]\nmodel += [AllDifferent(story)]\nmodel += [AllDifferent(age)]\n\n# Debby\u2019s family is from Lithuania\nmodel += [country[children.index(\"Debby\")] == countries.index(\"Lithuania\") + 1]\n\n# The child who told the story of the Passover is two years older than Bernice\nmodel += [age[stories.index(\"Passover\")] - age[children.index(\"Bernice\")] == 2]\n\n# The child whose family is from Yemen is younger than the child from the Ethiopian family\nmodel += [age[countries.index(\"Yemen\")] < age[countries.index(\"Ethiopia\")]]\n\n# The child from the Moroccan family is three years older than Ted\nmodel += [age[countries.index(\"Morocco\")] - age[children.index(\"Ted\")] == 3]\n\n# Sammy is three years older than the child who told the story of Moses\u2019s youth\nmodel += [age[children.index(\"Sammy\")] - age[stories.index(\"Moses\u2019s youth\")] == 3]\n\n# Solve the model\nif model.solve():\n # Get the solution\n child_sol = [child[i].value() for i in range(5)]\n country_sol = [country[i].value() for i in range(5)]\n story_sol = [story[i].value() for i in range(5)]\n age_sol = [age[i].value() for i in range(5)]\n\n # Map the solution to the original lists\n countries_sol = [countries[i-1] for i in country_sol]\n children_sol = [children[i-1] for i in child_sol]\n stories_sol = [stories[i-1] for i in story_sol]\n ages_sol = [ages[i-1] for i in age_sol]\n\n # Print the solution as a JSON object\n print(json.dumps({\n \"countries\": [countries.index(c) + 1 for c in countries_sol],\n \"children\": [children.index(c) + 1 for c in children_sol],\n \"stories\": [stories.index(s) + 1 for s in stories_sol],\n \"ages\": [ages.index(a) + 1 for a in ages_sol]\n }))\nelse:\n print(\"No solution found\")"}
|
| 88 |
+
{"id": "aplai_course__3_farmer_and_cows", "model": "from cpmpy import *\nimport json\n\n# Data\nnum_cows = 25\nnum_sons = 5\ncows_per_son = [7, 6, 5, 4, 3]\nmilk_per_cow = list(range(1, num_cows + 1))\n\n# Model\nmodel = Model()\n\n# Variables\ncow_assignments = intvar(0, num_sons - 1, shape=num_cows, name=\"cow_assignments\")\n\n# Constraints\nfor i, num in enumerate(cows_per_son):\n model += sum([cow_assignments[j] == i for j in range(num_cows)]) == num\n\nmilk_per_son = [[milk_per_cow[j] for j in range(num_cows) if cow_assignments[j] == i] for i in range(num_sons)]\nfor i in range(1, num_sons):\n model += sum(milk_per_cow[j] for j in range(num_cows) if cow_assignments[j] == 0) == sum(milk_per_cow[j] for j in range(num_cows) if cow_assignments[j] == i)\n\n# Solve\nif model.solve():\n solution = {\"cow_assignments\": [int(x.value()) for x in cow_assignments]}\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 89 |
+
{"id": "aplai_course__1_five_floors", "model": "from cpmpy import *\nimport json\n\n# Define the variables\nB = int_var(1, 5, name=\"B\")\nC = int_var(1, 5, name=\"C\")\nF = int_var(1, 5, name=\"F\")\nM = int_var(1, 5, name=\"M\")\nS = int_var(1, 5, name=\"S\")\n\n# Define the model\nmodel = Model(\n [B != 5, # Baker does not live on the fifth floor\n C != 1, # Cooper does not live on the first floor\n F != 1, F != 5, # Fletcher does not live on either the fifth or the first floor\n M > C, # Miller lives on a higher floor than does Cooper\n abs(S - F) > 1, # Smith does not live on a floor adjacent to Fletcher's\n abs(F - C) > 1, # Fletcher does not live on a floor adjacent to Cooper's\n AllDifferent([B, C, F, M, S]) # They all live on different floors\n ]\n)\n\n# Solve the model\nif model.solve():\n # Print the solution as a JSON object\n solution = {\"B\": B.value(), \"C\": C.value(), \"F\": F.value(), \"M\": M.value(), \"S\": S.value()}\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 90 |
+
{"id": "aplai_course__5_grocery", "model": "from cpmpy import *\nimport json\n\n# Define the model\nx = intvar(1, 100, shape=4, name=\"prices\")\n\n# Define the constraints\nmodel = Model(\n [x[0] * x[1] * x[2] * x[3] == 711],\n [sum(x) == 711]\n)\n\n# Solve the model\nif model.solve():\n # Get the solution\n solution = {f\"prices[{i}]\": x[i].value() for i in range(4)}\n solution[\"prices\"] = [x[i].value() for i in range(4)]\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 91 |
+
{"id": "aplai_course__1_guards_and_apples", "model": "from cpmpy import *\nimport json\n\n# Define the model\nx = intvar(1, 1000, shape=6, name=\"apples\")\n\n# Define the model constraints\nmodel = Model(\n [x[i] == (x[i-1] + 1) // 2 + (x[i-1] + 1) % 2 for i in range(1, 6)],\n [x[5] == 1]\n)\n\n# Solve the model\nif model.solve():\n # Print the solution as a JSON object\n print(json.dumps({\"apples\": [int(val) for val in x.value]}))\nelse:\n print(\"No solution found\")"}
|
| 92 |
+
{"id": "aplai_course__5_hardy_1729_square", "model": "from cpmpy import *\nimport json\n\n# Model\na, b, c, d = intvar(1, 100, shape=4, name=\"vars\")\n\n# Constraints\nmodel = Model(\n AllDifferent([a, b, c, d]),\n a**2 + b**2 == c**2 + d**2\n)\n\n# Solve\nif model.solve():\n solution = {\n \"a\": a.value(),\n \"b\": b.value(),\n \"c\": c.value(),\n \"d\": d.value()\n }\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 93 |
+
{"id": "aplai_course__3_kidney_exchange", "model": "from cpmpy import *\nimport json\n\n# Data\nnum_people = 8\ncompatible = [\n [2, 3],\n [1, 6],\n [1, 4, 7],\n [2],\n [2],\n [5],\n [8],\n [3]\n]\n\n# Model\nmodel = Model()\n\n# Variable: transplants[i][j] is 1 if person i donates to person j, and 0 otherwise\ntransplants = [[model.new_int(var=0, name=f\"transplants_{i}_{j}\") for j in range(num_people)] for i in range(num_people)]\n\n# Constraints\nfor i in range(num_people):\n for j in range(num_people):\n model += [transplants[i][j] == 1].only_if(compatible[i].__contains__(j + 1))\n\nfor i in range(num_people):\n model += sum(transplants[i]) <= 1 # person i donates to at most one person\n model += sum([transplants[j][i] for j in range(num_people)]) <= 1 # person i receives at most one kidney\n\nfor i in range(num_people):\n model += sum(transplants[i]) == sum([transplants[j][i] for j in range(num_people)]) # person i donates if and only if they receive\n\n# Objective\nmodel.maximize(sum([sum(transplants[i]) for i in range(num_people)]))\n\n# Solve\nif model.solve():\n # Extract solution\n transplants_solution = [[transplants[i][j].value() for j in range(num_people)] for i in range(num_people)]\n\n # Print solution as JSON\n print(json.dumps({\"transplants\": transplants_solution}))\nelse:\n print(\"No solution found\")"}
|
| 94 |
+
{"id": "aplai_course__1_magic_square", "model": "from cpmpy import *\nimport json\n\n# Define the size of the magic square\nn = 4\n\n# Define the sum of integers in each row, column and diagonal\ntarget_sum = n * (n**2 + 1) // 2\n\n# Define the variables\nx = intvar(1, n**2, shape=(n, n), name=\"x\")\n\n# Define the model\nmodel = Model()\n\n# Each cell contains a different integer\nmodel += AllDifferent(x.flatten())\n\n# The sum of integers in each row is equal to the target sum\nfor i in range(n):\n model += sum(x[i, :]) == target_sum\n\n# The sum of integers in each column is equal to the target sum\nfor j in range(n):\n model += sum(x[:, j]) == target_sum\n\n# The sum of integers in the main diagonal is equal to the target sum\nmodel += sum([x[i, i] for i in range(n)]) == target_sum\n\n# The sum of integers in the anti-diagonal is equal to the target sum\nmodel += sum([x[i, n-i-1] for i in range(n)]) == target_sum\n\n# Solve the model\nif model.solve():\n # Print the solution as a JSON object\n solution = x.value()\n print(json.dumps({\"square\": [list(row) for row in solution]}))\nelse:\n print(\"No solution found\")"}
|
| 95 |
+
{"id": "aplai_course__2_maximal_independent_sets", "model": "from cpmpy import *\nimport json\n\n# Data\nn = 8\nadjacency_list = [\n [2, 3, 7],\n [1, 4, 8],\n [1, 4, 5],\n [2, 3, 6],\n [3, 6, 7],\n [4, 5, 8],\n [1, 5, 8],\n [2, 6, 7]\n]\n\n# Model\nmodel = Model()\n\n# Variables\nx = boolvar(shape=n, name=\"x\")\n\n# Constraints\nfor i in range(n):\n for j in adjacency_list[i]:\n model += ~x[i] | ~x[j-1] # subtract 1 because adjacency list is 1-indexed\n\n# Objective: maximize the number of nodes in the independent set\nmodel.maximize(sum(x))\n\n# Solve\nif model.solve():\n # Get the solution\n solution = {f\"node_{i+1}\": bool(x[i].value()) for i in range(n)}\n print(json.dumps({\"nodes\": solution}))\nelse:\n print(\"No solution found\")"}
|
| 96 |
+
{"id": "aplai_course__1_money_change", "model": "from cpmpy import *\nimport json\n\n# Define the available coins and their counts\ncoin_values = [1, 2, 5, 10, 25, 50]\ncoin_counts_available = [20, 10, 15, 8, 4, 2]\n\n# Define the target change\nchange = 199\n\n# Define the model\nmodel = Model()\n\n# Define the decision variables\ncoin_counts = intvar(0, max(coin_counts_available), shape=len(coin_values), name=\"coin_counts\")\n\n# Define the constraints\nmodel += (sum([coin_values[i] * coin_counts[i] for i in range(len(coin_values))]) == change)\nfor i in range(len(coin_values)):\n model += (coin_counts[i] <= coin_counts_available[i])\n\n# Define the objective\nmodel.minimize(sum(coin_counts))\n\n# Solve the model\nif model.solve():\n # Print the solution as a JSON object\n solution = {\"coin_counts\": [int(x) for x in coin_counts.value()]}\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 97 |
+
{"id": "aplai_course__2_movie_scheduling", "model": "from cpmpy import *\nimport json\n\n# Input data\nmovies = [\n [\"Tarjan of the Jungle\", 4, 13],\n [\"The Four Volume Problem\", 17, 27],\n [\"The President's Algorist\", 1, 10],\n [\"Steiner's Tree\", 12, 18],\n [\"Process Terminated\", 23, 30],\n [\"Halting State\", 9, 16],\n [\"Programming Challenges\", 19, 25],\n [\"Discrete Mathematics\", 2, 7],\n [\"Calculated Bets\", 26, 31]\n]\n\n# Sort movies by end date\nmovies.sort(key=lambda x: x[2])\n\n# Model\nmodel = Model()\n\n# Decision variables\nselected_movies = [model.bool_var() for _ in range(len(movies))]\n\n# Constraints\nfor i in range(len(movies)):\n for j in range(i+1, len(movies)):\n model.add(selected_movies[i] + selected_movies[j] <= (movies[j][1] > movies[i][2]))\n\n# Objective\nmodel.maximize(sum(selected_movies))\n\n# Solve\nif model.solve():\n num_selected_movies = sum([int(var.value()) for var in selected_movies])\n solution = {\n \"selected_movies\": [int(var.value()) for var in selected_movies],\n \"num_selected_movies\": num_selected_movies\n }\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 98 |
+
{"id": "aplai_course__3_people_in_a_room", "model": "from cpmpy import *\nimport json\n\n# Model\nnum_people = 13\nnum_males = 4\nnum_females = num_people - num_males\n\n# Decision variable: sequence of people entering the room (0 = male, 1 = female)\nsequence = intvar(0, 1, shape=num_people, name=\"sequence\")\n\n# Constraints\nmodel = Model(\n # Exactly num_males males and num_females females\n sum(sequence) == num_females,\n # Ratio of females to males at any time is no greater than 7/3\n [sum(sequence[:i+1]) / (i + 1 - sum(sequence[:i+1])) <= 7/3 for i in range(num_people)]\n)\n\n# Solve\nif model.solve():\n # Print solution as JSON\n solution = {\"sequence\": [int(x) for x in sequence.value()]}\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 99 |
+
{"id": "aplai_course__2_subsets_100", "model": "from cpmpy import *\nimport json\n\n# Define the set of integers\nA = [81, 21, 79, 4, 29, 70, 28, 20, 14, 7]\n\n# Define the model\nmodel = Model()\n\n# Define the decision variables\nin_S = boolvar(shape=len(A), name=\"in_S\")\nin_T = boolvar(shape=len(A), name=\"in_T\")\n\n# Define the constraints\nmodel += [sum([A[i] * in_S[i] for i in range(len(A))]) == sum([A[i] * in_T[i] for i in range(len(A))])]\nmodel += [(in_S[i] + in_T[i]) <= 1 for i in range(len(A))]\nmodel += [sum(in_S) > 0]\nmodel += [sum(in_T) > 0]\n\n# Solve the model\nif model.solve():\n # Print the solution as a JSON object\n solution = {\n \"in_S\": [int(in_S[i].value()) for i in range(len(A))],\n \"in_T\": [int(in_T[i].value()) for i in range(len(A))]\n }\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|
| 100 |
+
{"id": "aplai_course__2_subset_sum", "model": "from cpmpy import *\nimport json\n\n# Define the possible number of coins in each bag\ncoin_values = [16, 17, 23, 24, 39, 40]\n\n# Define the number of coins lost\ntotal_coins_lost = 100\n\n# Define the decision variables\nbags = intvar(0, 100, shape=len(coin_values), name=\"bags\")\n\n# Define the model\nmodel = Model(\n sum([coin_values[i] * bags[i] for i in range(len(coin_values))]) == total_coins_lost\n)\n\n# Solve the model\nif model.solve():\n # Print the solution as a JSON object\n solution = {f\"bags_{i}\": bags[i].value() for i in range(len(coin_values))}\n print(json.dumps({\"bags\": solution}))\nelse:\n print(\"No solution found\")"}
|
| 101 |
+
{"id": "aplai_course__1_thick_as_thieves", "model": "from cpmpy import *\nimport json\n\n# Define the variables\nartie = boolvar(name=\"artie\")\nbill = boolvar(name=\"bill\")\ncrackitt = boolvar(name=\"crackitt\")\ndodgy = boolvar(name=\"dodgy\")\nedgy = boolvar(name=\"edgy\")\nfingers = boolvar(name=\"fingers\")\n\n# At least 4 of them are innocent\nmodel = Model(\n 4 <= sum([~artie, ~bill, ~crackitt, ~dodgy, ~edgy, ~fingers])\n)\n\n# ARTIE: \"It wasn't me.\"\nmodel += [artie == ~artie]\n\n# BILL: \"Crackitt was in it up to his neck.\"\nmodel += [bill == crackitt]\n\n# CRACKITT: \"No I wasn't.\"\nmodel += [crackitt == ~crackitt]\n\n# DODGY: \"If Crackitt did it, Bill did it with him.\"\nmodel += [crackitt <= bill]\n\n# EDGY: \"Nobody did it alone.\"\nmodel += [(~edgy) | (sum([artie, bill, crackitt, dodgy, fingers]) >= 2)]\n\n# FINGERS: \"That\u2019s right: it was Artie and Dodgy together.\"\nmodel += [fingers == (artie & dodgy)]\n\n# Solve the model\nif model.solve():\n solution = {\n \"artie\": artie.value(),\n \"bill\": bill.value(),\n \"crackitt\": crackitt.value(),\n \"dodgy\": dodgy.value(),\n \"edgy\": edgy.value(),\n \"fingers\": fingers.value()\n }\n print(json.dumps(solution))\nelse:\n print(\"No solution found\")"}
|