| """ |
| Configuration file for simple RAG evaluation |
| Contains prompt templates and other settings |
| """ |
|
|
| from pathlib import Path |
|
|
| |
| |
| |
|
|
| |
| GUROBI_PROMPT = { |
| "system": """You are a helpful Assistant with expertise in mathematical modeling and the Gurobi solver. When the User provides an OR question, you will analyze it, build a detailed mathematical model, and provide the Gurobi code to solve it. |
| |
| Your response should follow these steps: |
| 1. Carefully analyze the problem to identify decision variables, objective, and constraints. |
| |
| 2. Develop a complete mathematical model, explicitly defining: |
| - Sets |
| - Parameters |
| - Decision Variables (and their types) |
| - Objective Function |
| - Constraints |
| 3. Provide the corresponding Gurobi Python code to implement the model. |
| |
| Implementation guardrails: |
| - Use `gurobipy` exclusively (avoid cvxpy/pulp/copty imports). |
| - When indexing tupledict variables across periods, introduce an explicit sentinel index (e.g., period 0) for initial conditions instead of accessing undefined keys like `x[-1]`. |
| - Define any Big-M constants explicitly using bounds derived from the data before they appear in constraints. |
| - Keep the model linear/integer; if a relationship seems non-linear, introduce auxiliary variables and linearization rather than exponentiation or log constraints. |
| - Always ensure every symbol referenced in constraints/objective (such as `M`, helper dictionaries, etc.) is declared in the code block. |
| """, |
| "user": """Problem: |
| {question} |
| |
| Provide a complete solution with mathematical model and Gurobi code. |
| """ |
| } |
|
|
| |
| |
| |
|
|
| |
| MODEL_CONFIGS = { |
| "gpt-4o": {"temperature": 0.01, "max_tokens": 8192}, |
| "gpt-4o-mini": {"temperature": 0.01, "max_tokens": 8192}, |
| "deepseek-chat": {"temperature": 0.01, "max_tokens": 8192}, |
| "gemini-2.0-flash-exp": {"temperature": 0.01, "max_tokens": 8192}, |
| "gemini-2.5-pro": {"temperature": 0.01, "max_tokens": 8192}, |
| } |
|
|
| |
| |
| |
|
|
| EVAL_CONFIG = { |
| |
| "timeout": 60, |
| "max_retries": 3, |
| |
| |
| "tolerance": 0.05, |
| "use_relative_tolerance": True, |
| "absolute_tolerance": 1e-3, |
| |
| |
| "save_code": True, |
| "save_output": False, |
| "verbose": False, |
| } |
|
|
| |
| |
| |
|
|
| |
| DATASETS = [ |
| "ComplexLP", |
| "EasyLP", |
| "IndustryOR", |
| "NL4OPT", |
| "NLP4LP", |
| "ReSocratic", |
| "ComplexOR", |
| "OPT-Principled", |
| ] |
|
|
| DATASET_ALIASES = { |
| "complexlp_clean": "ComplexLP", |
| "easylp_clean": "EasyLP", |
| "industryor_clean": "IndustryOR", |
| "industryor_v2": "IndustryOR", |
| "industryor_fixedv2": "IndustryOR", |
| "industryor_fixedv2_clean": "IndustryOR", |
| "nl4opt": "NL4OPT", |
| "nl4opt_clean": "NL4OPT", |
| "nlp4lp_clean": "NLP4LP", |
| "complexor_clean": "ComplexOR", |
| "resocratic_clean": "ReSocratic", |
| "combined": "OPT-Principled", |
| "combined_dataset": "OPT-Principled", |
| "opt-principled_clean": "OPT-Principled", |
| } |
|
|
| |
| DATASET_CONFIG = { |
| "ComplexLP": {"tolerance": 0.05}, |
| "EasyLP": {"tolerance": 0.01}, |
| "IndustryOR": {"tolerance": 0.05}, |
| "OPT-Principled": {"tolerance": 0.05}, |
| } |
|
|
| |
| |
| |
|
|
| def get_prompt_template(template_name="default"): |
| """Get prompt template by name""" |
| templates = { |
| "default": GUROBI_PROMPT, |
| } |
| return templates.get(template_name, GUROBI_PROMPT) |
|
|
|
|
| def get_model_config(model_name): |
| """Get configuration for a specific model""" |
| return MODEL_CONFIGS.get(model_name, {"temperature": 0.01, "max_tokens": 8192}) |
|
|
|
|
| def get_dataset_config(dataset_name): |
| """Get configuration for a specific dataset""" |
| return DATASET_CONFIG.get(normalize_dataset_name(dataset_name), {"tolerance": 0.05}) |
|
|
|
|
| def normalize_dataset_name(dataset_name: str) -> str: |
| """Map historical dataset names to the canonical OPEN benchmark names.""" |
| if not dataset_name: |
| return dataset_name |
|
|
| name = dataset_name.strip() |
| if name.endswith(".jsonl"): |
| name = name[:-6] |
|
|
| alias = DATASET_ALIASES.get(name.casefold()) |
| if alias: |
| return alias |
|
|
| for canonical_name in DATASETS: |
| if canonical_name.casefold() == name.casefold(): |
| return canonical_name |
|
|
| if name.endswith("_clean"): |
| base_name = name[:-6] |
| for canonical_name in DATASETS: |
| if canonical_name.casefold() == base_name.casefold(): |
| return canonical_name |
|
|
| return name |
|
|
|
|
| def get_benchmark_dirs(project_root: Path) -> list[Path]: |
| """Return benchmark directories in priority order for the migrated OPEN layout.""" |
| return [ |
| project_root.parent.parent / "data" / "benchmarks", |
| project_root / "clean_benchmarks", |
| project_root.parent / "clean_benchmarks", |
| ] |
|
|
|
|
| def find_benchmark_path(project_root: Path, dataset_name: str) -> Path: |
| """Locate the benchmark file for a dataset, accepting legacy names as input.""" |
| normalized_name = normalize_dataset_name(dataset_name) |
| candidate_names = [normalized_name] |
| raw_name = dataset_name[:-6] if dataset_name.endswith(".jsonl") else dataset_name |
| if raw_name not in candidate_names: |
| candidate_names.append(raw_name) |
|
|
| for directory in get_benchmark_dirs(project_root): |
| for name in candidate_names: |
| candidate = directory / f"{name}.jsonl" |
| if candidate.exists(): |
| return candidate |
|
|
| raise FileNotFoundError( |
| f"Dataset '{dataset_name}' not found. Checked directories: " |
| f"{[str(path) for path in get_benchmark_dirs(project_root)]}" |
| ) |
|
|