JustinTX commited on Apr 18

Commit

f4790e1

verified ·

1 Parent(s): 1255e4d

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

benchmarks/ADRS/README.md +63 -0
benchmarks/ADRS/eplb/evaluator/requirements.txt +1 -0
benchmarks/ADRS/llm_sql/evaluator/download_dataset.sh +30 -0
benchmarks/ADRS/llm_sql/evaluator/requirements.txt +2 -0
benchmarks/ADRS/llm_sql/initial_program.py +365 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc008/config.yaml +116 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc008/initial_program.cpp +508 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc011/best_program.cpp +730 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc011/config.yaml +208 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc011/evaluator.py +65 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc011/initial_program.cpp +607 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc015/best_program.cpp +664 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc015/config.yaml +77 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc015/initial_program.cpp +491 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc016/best_program.cpp +244 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc016/config.yaml +108 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc016/evaluator.py +65 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc016/initial_program.cpp +495 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc024/best_program.cpp +626 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc024/config.yaml +73 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc024/evaluator.py +65 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc024/initial_program.cpp +481 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc025/best_program.cpp +282 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc025/config.yaml +104 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc025/evaluator.py +65 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc025/initial_program.cpp +628 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc026/best_program.cpp +653 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc026/config.yaml +69 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc026/evaluator.py +65 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc026/initial_program.cpp +563 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc027/best_program.cpp +595 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc027/config.yaml +117 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc027/evaluator.py +65 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc027/initial_program.cpp +614 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc039/config.yaml +77 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc039/evaluator.py +65 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc046/config.yaml +62 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc046/evaluator.py +65 -0
benchmarks/ale_bench/ale-bench-lite-problems/ahc046/initial_program.cpp +897 -0
benchmarks/ale_bench/ale_agent_best/ahc016.cpp +495 -0
benchmarks/ale_bench/private_eval.py +161 -0
benchmarks/arc_benchmark/README.md +108 -0
configs/README.md +355 -0
configs/adaevolve.yaml +125 -0
configs/default.yaml +38 -0
configs/evox.yaml +59 -0
configs/human_in_the_loop.yaml +49 -0
configs/llm_judge.yaml +40 -0
configs/openevolve_native.yaml +70 -0
pyproject.toml +124 -0

benchmarks/ADRS/README.md ADDED Viewed

	@@ -0,0 +1,63 @@

+# ADRS: AI-Driven Research for Systems
+This directory contains the systems optimization benchmarks from the **AI-Driven Research for Systems (ADRS)** initiative at UC Berkeley.
+ADRS investigates how AI — large language models, evolutionary algorithms, and multi-agent architectures — can autonomously design, optimize, and evaluate computer systems. Instead of treating systems research as a purely manual process, ADRS frames it as a closed-loop optimization problem: propose candidate algorithms, evaluate them against system-level objectives, analyze failure modes, adapt the search strategy, and iterate.
+Each benchmark below defines a concrete systems task with a provided evaluator, initial program, and configuration. Solutions are evolved using SkyDiscover's evolutionary search loop.
+## Benchmarks
+### Cloudcast — Multi-Cloud Data Transfer
+**Directory:** `cloudcast/`
+Given a network of cloud regions with heterogeneous egress pricing and bandwidth, broadcast a dataset from a source region to multiple destinations at minimum total cost. The evolved algorithm must construct routing topologies (e.g., relay trees, Steiner-like structures) that exploit shared intermediate hops across transfers.
+### Expert Parallelism Load Balancer (EPLB)
+**Directory:** `eplb/`
+In Mixture-of-Experts (MoE) model inference, a small subset of experts handles each token, leading to GPU load imbalance when certain experts become disproportionately popular. This task evolves an algorithm that decides how many replicas each expert should have and how to assign them across GPUs, optimizing both load-balance quality and rebalancing runtime.
+### Model Placement (Prism)
+**Directory:** `prism/`
+Assign multiple LLM models to a fixed GPU cluster (80 GB per GPU) such that the worst-case KV-cache pressure ratio across GPUs is minimized. Lower pressure means more memory headroom for serving, improving throughput and stability under varying request loads.
+### LLM-SQL — Column Reordering for Prefix Caching
+**Directory:** `llm_sql/`
+When rows of a table are serialized into LLM prompts sequentially, consecutive rows that share leading column values can reuse cached prefixes. This task evolves a column-reordering strategy that maximizes prefix-cache hit rates across multiple real-world datasets without altering the underlying data.
+### Transaction Scheduling (TXN)
+**Directory:** `txn_scheduling/`
+Given a set of database transactions with read/write dependencies on shared keys, find an execution ordering that minimizes the total makespan. The evolved scheduler must respect conflict constraints (read-write and write-write on the same key) while compressing the overall completion time.
+### Telemetry Repair
+**Coming soon.** The Telemetry Repair benchmark is under active development and will be released in a future update.
+## Quick Start
+Each benchmark directory contains:
+- `initial_program.py` — the seed solution for evolution
+- `evaluator.py` — the scoring function
+- `config.yaml` — run configuration
+Run any benchmark from the repo root:
+```bash
+uv run skydiscover-run \
+  benchmarks/ADRS/cloudcast/initial_program.py \
+  benchmarks/ADRS/cloudcast/evaluator.py \
+  -c benchmarks/ADRS/cloudcast/config.yaml \
+  -s [your_algorithm] \
+  -i 100
+```
+See the individual benchmark directories for task-specific setup instructions (e.g., dataset downloads, GPU dependencies).

benchmarks/ADRS/eplb/evaluator/requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ torch

benchmarks/ADRS/llm_sql/evaluator/download_dataset.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+#!/usr/bin/env bash
+# Download CSV datasets for the LLM-SQL benchmark.
+#
+# Required files (placed in datasets/):
+#   movies.csv    - Rotten Tomatoes movie reviews (~9 MB)
+#   beer.csv      - Beer review dataset (~2.5 MB)
+#   BIRD.csv      - BIRD text-to-SQL dataset (~34 MB)
+#   PDMX.csv      - PDMX metadata dataset (~7.4 MB)
+#   products.csv  - Amazon product catalog (~16 MB)
+#
+# Usage:
+#   cd benchmarks/ADRS/llm_sql
+#   bash download_dataset.sh
+set -euo pipefail
+cd "$(dirname "$0")"
+BASE_URL="https://huggingface.co/datasets/f20180301/adrs-data/resolve/main/llm_sql"
+echo "Downloading LLM-SQL benchmark datasets..."
+mkdir -p datasets
+for dataset in movies.csv beer.csv BIRD.csv PDMX.csv products.csv; do
+    echo "  Downloading datasets/${dataset}..."
+    wget -q --show-progress -O "datasets/${dataset}" "${BASE_URL}/datasets/${dataset}"
+done
+echo ""
+echo "Done. Downloaded files:"
+ls -lh datasets/*.csv

benchmarks/ADRS/llm_sql/evaluator/requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ pandas
2	+ networkx>=3.2,<3.4

benchmarks/ADRS/llm_sql/initial_program.py ADDED Viewed

	@@ -0,0 +1,365 @@

+# EVOLVE-BLOCK-START
+import pandas as pd
+from solver import Algorithm
+from typing import Tuple, List, Dict
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from functools import lru_cache
+from collections import Counter
+import networkx as nx
+class Evolved(Algorithm):
+    """
+    GGR algorithm
+    """
+    def __init__(self, df: pd.DataFrame = None):
+        self.df = df
+        self.dep_graph = None  # NOTE: not used, for one way dependency
+        self.num_rows = 0
+        self.num_cols = 0
+        self.column_stats = None
+        self.val_len = None
+        self.row_stop = None
+        self.col_stop = None
+        self.base = 2000
+    def find_max_group_value(self, df: pd.DataFrame, value_counts: Dict, early_stop: int = 0) -> str:
+        # NOTE: recalculate value counts and length for each value
+        value_counts = Counter(df.stack())
+        weighted_counts = {val: self.val_len[val] * (count - 1) for val, count in value_counts.items()}  # if count > 1
+        if not weighted_counts:
+            return None
+        max_group_val, max_weighted_count = max(weighted_counts.items(), key=lambda x: x[1])
+        if max_weighted_count < early_stop:
+            return None
+        return max_group_val
+    def reorder_columns_for_value(self, row, value, column_names, grouped_rows_len: int = 1):
+        # cols_with_value will now use attribute access instead of indexing with row[]
+        cols_with_value = []
+        for idx, col in enumerate(column_names):
+            if hasattr(row, col) and getattr(row, col) == value:
+                cols_with_value.append(col)
+            elif hasattr(row, col.replace(" ", "_")) and getattr(row, col.replace(" ", "_")) == value:
+                cols_with_value.append(col)
+            else:
+                attr_name = f"_{idx}"
+                if hasattr(row, attr_name) and getattr(row, attr_name) == value:
+                    cols_with_value.append(attr_name)
+        if self.dep_graph is not None and grouped_rows_len > 1:
+            # NOTE: experimental
+            reordered_cols = []
+            for col in cols_with_value:
+                dependent_cols = self.get_dependent_columns(col)
+                # check if dependent columns are in row, and if column exists in row attributes
+                valid_dependent_cols = []
+                for idx, dep_col in enumerate(dependent_cols):
+                    if hasattr(row, dep_col):
+                        valid_dependent_cols.append(dep_col)
+                    elif hasattr(row, dep_col.replace(" ", "_")):
+                        valid_dependent_cols.append(dep_col)
+                    else:
+                        attr_name = f"_{idx}"
+                        if hasattr(row, attr_name):
+                            valid_dependent_cols.append(dep_col)
+                reordered_cols.extend([col] + valid_dependent_cols)
+            cols_without_value = [col for col in column_names if col not in reordered_cols]
+            reordered_cols.extend(cols_without_value)
+            assert len(reordered_cols) == len(
+                column_names
+            ), f"Reordered cols len: {len(reordered_cols)}  Original cols len: {len(column_names)}"
+            return [getattr(row, col) for col in reordered_cols], cols_with_value
+        else:
+            cols_without_value = []
+            for idx, col in enumerate(column_names):
+                if hasattr(row, col) and getattr(row, col) != value:
+                    cols_without_value.append(col)
+                elif hasattr(row, col.replace(" ", "_")) and getattr(row, col.replace(" ", "_")) != value:
+                    cols_without_value.append(col)
+                else:
+                    # Handle some edge cases
+                    attr_name = f"_{idx}"
+                    if hasattr(row, attr_name) and getattr(row, attr_name) != value:
+                        cols_without_value.append(attr_name)
+            reordered_cols = cols_with_value + cols_without_value
+            assert len(reordered_cols) == len(
+                column_names
+            ), f"Reordered cols len: {len(reordered_cols)}  Original cols len: {len(column_names)}"
+            return [getattr(row, col) for col in reordered_cols], cols_with_value
+    def get_dependent_columns(self, col: str) -> List[str]:
+        if self.dep_graph is None or not self.dep_graph.has_node(col):
+            return []
+        return list(nx.descendants(self.dep_graph, col))
+    @lru_cache(maxsize=None)
+    def get_cached_dependent_columns(self, col: str) -> List[str]:
+        return self.get_dependent_columns(col)
+    def fixed_reorder(self, df: pd.DataFrame, row_sort: bool = True) -> Tuple[pd.DataFrame, List[List[str]]]:
+        num_rows, column_stats = self.calculate_col_stats(df, enable_index=True)
+        reordered_columns = [col for col, _, _, _ in column_stats]
+        reordered_df = df[reordered_columns]
+        assert reordered_df.shape == df.shape
+        column_orderings = [reordered_columns] * num_rows
+        if row_sort:
+            reordered_df = reordered_df.sort_values(by=reordered_columns, axis=0)
+        return reordered_df, column_orderings
+    def column_recursion(self, result_df, max_value, grouped_rows, row_stop, col_stop, early_stop):
+        cols_settled = []
+        with ThreadPoolExecutor() as executor:
+            futures = [
+                executor.submit(self.reorder_columns_for_value, row, max_value, grouped_rows.columns.tolist(), len(grouped_rows))
+                for row in grouped_rows.itertuples(index=False)
+            ]
+            for i, future in enumerate(as_completed(futures)):
+                reordered_row, cols_settled = future.result()
+                result_df.loc[i] = reordered_row
+        grouped_value_counts = Counter()
+        if not result_df.empty:
+            # Group by the first column
+            grouped_result_df = result_df.groupby(result_df.columns[0])
+            grouped_value_counts = Counter(grouped_rows.stack())  # this is still faster than updating from cached value counts
+            for _, group in grouped_result_df:
+                if group[group.columns[0]].iloc[0] != max_value:
+                    continue
+                dependent_cols = self.get_cached_dependent_columns(group.columns[0])
+                length_of_settle_cols = len(cols_settled)
+                if dependent_cols:
+                    assert length_of_settle_cols >= 1, f"Dependent columns should be no less than 1, but got {length_of_settle_cols}"
+                    # test the first length_of_settle_cols columns, each column has nunique == 1
+                    for col in group.columns[:length_of_settle_cols]:
+                        assert group[col].nunique() == 1, f"Column {col} should have nunique == 1, but got {group[col].nunique()}"
+                    # drop all the settled columns and reorder the rest
+                    group_remainder = group.iloc[:, length_of_settle_cols:]
+                else:
+                    group_remainder = group.iloc[:, 1:]
+                grouped_remainder_value_counts = Counter(group_remainder.stack())
+                reordered_group_remainder, _ = self.recursive_reorder(
+                    group_remainder, grouped_remainder_value_counts, early_stop=early_stop, row_stop=row_stop, col_stop=col_stop + 1
+                )
+                # Update the group with the reordered columns
+                if dependent_cols:
+                    group.iloc[:, length_of_settle_cols:] = reordered_group_remainder.values
+                else:
+                    group.iloc[:, 1:] = reordered_group_remainder.values
+                result_df.update(group)
+                break
+        return result_df, grouped_value_counts
+    def recursive_reorder(
+        self,
+        df: pd.DataFrame,
+        value_counts: Dict,
+        early_stop: int = 0,
+        original_columns: List[str] = None,
+        row_stop: int = 0,
+        col_stop: int = 0,
+    ) -> Tuple[pd.DataFrame, List[List[str]]]:
+        if df.empty or len(df.columns) == 0 or len(df) == 0:
+            return df, []
+        if self.row_stop is not None and row_stop >= self.row_stop:
+            return self.fixed_reorder(df)
+        if self.col_stop is not None and col_stop >= self.col_stop:
+            return self.fixed_reorder(df)
+        if original_columns is None:
+            original_columns = df.columns.tolist()
+        # Find the max group value using updated counts
+        max_value = self.find_max_group_value(df, value_counts, early_stop=early_stop)
+        if max_value is None:
+            # If there is no max value, then fall back to fixed reorder
+            return self.fixed_reorder(df)
+        grouped_rows = df[df.isin([max_value]).any(axis=1)]
+        remaining_rows = df[~df.isin([max_value]).any(axis=1)]
+        # If there is no grouped rows, return the original DataFrame
+        if grouped_rows.empty:
+            return self.fixed_reorder(df)
+        result_df = pd.DataFrame(columns=df.columns)
+        reordered_remaining_rows = pd.DataFrame(columns=df.columns)  # Initialize empty dataframe first
+        # Column Recursion
+        result_df, grouped_value_counts = self.column_recursion(result_df, max_value, grouped_rows, row_stop, col_stop, early_stop)
+        remaining_value_counts = value_counts - grouped_value_counts  # Approach 1 - update remaining value counts with subtraction
+        # Row Recursion
+        reordered_remaining_rows, _ = self.recursive_reorder(
+            remaining_rows, remaining_value_counts, early_stop=early_stop, row_stop=row_stop + 1, col_stop=col_stop
+        )
+        old_column_names = result_df.columns.tolist()
+        result_cols_reset = result_df.reset_index(drop=True)
+        result_rows_reset = reordered_remaining_rows.reset_index(drop=True)
+        final_result_df = pd.DataFrame(result_cols_reset.values.tolist() + result_rows_reset.values.tolist())
+        if row_stop == 0 and col_stop == 0:
+            final_result_df.columns = old_column_names
+            final_result_df.columns = final_result_df.columns.tolist()[:-1] + ["original_index"]
+        return final_result_df, []
+    def recursive_split_and_reorder(self, df: pd.DataFrame, original_columns: List[str] = None, early_stop: int = 0):
+        """
+        Recursively split the DataFrame into halves until the size is <= 1000, then apply the recursive reorder function.
+        """
+        if len(df) <= self.base:
+            initial_value_counts = Counter(df.stack())
+            return self.recursive_reorder(df, initial_value_counts, early_stop, original_columns, row_stop=0, col_stop=0)[0]
+        mid_index = len(df) // 2
+        df_top_half = df.iloc[:mid_index]
+        df_bottom_half = df.iloc[mid_index:]
+        with ThreadPoolExecutor() as executor:
+            future_top = executor.submit(self.recursive_split_and_reorder, df_top_half, original_columns, early_stop)
+            future_bottom = executor.submit(self.recursive_split_and_reorder, df_bottom_half, original_columns, early_stop)
+        reordered_top_half = future_top.result()
+        reordered_bottom_half = future_bottom.result()
+        assert reordered_bottom_half.shape == df_bottom_half.shape
+        reordered_df = pd.concat([reordered_top_half, reordered_bottom_half], axis=0, ignore_index=True)
+        assert reordered_df.shape == df.shape
+        return reordered_df
+    @lru_cache(maxsize=None)
+    def calculate_length(self, value):
+        if isinstance(value, bool):
+            return 4**2
+        if isinstance(value, (int, float)):
+            return len(str(value)) ** 2
+        if isinstance(value, str):
+            return len(value) ** 2
+        return 0
+    def reorder(
+        self,
+        df: pd.DataFrame,
+        early_stop: int = 0,
+        row_stop: int = None,
+        col_stop: int = None,
+        col_merge: List[List[str]] = [],
+        one_way_dep: List[Tuple[str, str]] = [],
+        distinct_value_threshold: float = 0.8,
+        parallel: bool = True,
+    ) -> Tuple[pd.DataFrame, List[List[str]]]:
+        # Prepare
+        initial_df = df.copy()
+        if col_merge:
+            self.num_rows, self.column_stats = self.calculate_col_stats(df, enable_index=True)
+            reordered_columns = [col for col, _, _, _ in self.column_stats]
+            for col_to_merge in col_merge:
+                final_col_order = [col for col in reordered_columns if col in col_to_merge]
+                df = self.merging_columns(df, final_col_order, prepended=False)
+        self.num_rows, self.column_stats = self.calculate_col_stats(df, enable_index=True)
+        self.column_stats = {col: (num_groups, avg_len, score) for col, num_groups, avg_len, score in self.column_stats}
+        # One way dependency statistics [not used]
+        if one_way_dep is not None and len(one_way_dep) > 0:
+            self.dep_graph = nx.DiGraph()
+            for dep in one_way_dep:
+                col1 = [col for col in df.columns if dep[0] in col]
+                col2 = [col for col in df.columns if dep[1] in col]
+                assert len(col1) == 1, f"Expected one column to match {dep[0]}, but got {len(col1)}"
+                assert len(col2) == 1, f"Expected one column to match {dep[1]}, but got {len(col2)}"
+                col1 = col1[0]
+                col2 = col2[0]
+                self.dep_graph.add_edge(col1, col2)
+        # Discard too distinct columns by threshold [optional]
+        nunique_threshold = len(df) * distinct_value_threshold
+        columns_to_discard = [col for col in df.columns if df[col].nunique() > nunique_threshold]
+        columns_to_discard = sorted(columns_to_discard, key=lambda x: self.column_stats[x][2], reverse=True)
+        columns_to_recurse = [col for col in df.columns if col not in columns_to_discard]
+        df["original_index"] = range(len(df))
+        discarded_columns_df = df[columns_to_discard + ["original_index"]]
+        df_to_recurse = df[columns_to_recurse + ["original_index"]]
+        recurse_df = df_to_recurse
+        self.column_stats = {col: stats for col, stats in self.column_stats.items() if col not in columns_to_discard}
+        initial_value_counts = Counter(recurse_df.stack())
+        self.val_len = {val: self.calculate_length(val) for val in initial_value_counts.keys()}
+        self.row_stop = row_stop if row_stop else len(recurse_df)
+        self.col_stop = col_stop if col_stop else len(recurse_df.columns.tolist())
+        print("*" * 80)
+        print(f"DF columns = {df.columns}")
+        # print(f"Early stop = {early_stop}")
+        # print(f"Row recursion stop depth = {self.row_stop}, Column recursion stop depth = {self.col_stop}")
+        print("*" * 80)
+        # Eary stop and fall back
+        recurse_df, _ = self.fixed_reorder(recurse_df)
+        # Recursive reordering
+        self.num_cols = len(recurse_df.columns)
+        if parallel:
+            reordered_df = self.recursive_split_and_reorder(recurse_df, original_columns=columns_to_recurse, early_stop=early_stop)
+        else:
+            reordered_df, _ = self.recursive_reorder(
+                recurse_df,
+                initial_value_counts,
+                early_stop=early_stop,
+            )
+        assert (
+            reordered_df.shape == recurse_df.shape
+        ), f"Reordered DataFrame shape {reordered_df.shape} does not match original DataFrame shape {recurse_df.shape}"
+        assert recurse_df["original_index"].is_unique, "Passed in recurse index contains duplicates!"
+        assert reordered_df["original_index"].is_unique, "Reordered index contains duplicates!"
+        if len(columns_to_discard) > 0:
+            final_df = pd.merge(reordered_df, discarded_columns_df, on="original_index", how="left")
+        else:
+            final_df = reordered_df
+        final_df = final_df.drop(columns=["original_index"])
+        if not col_merge:
+            assert (
+                final_df.shape == initial_df.shape
+            ), f"Final DataFrame shape {final_df.shape} does not match original DataFrame shape {initial_df.shape}"
+        else:
+            assert (
+                final_df.shape[0] == initial_df.shape[0]
+            ), f"Final DataFrame shape {final_df.shape} does not match original DataFrame shape {initial_df.shape}"
+            assert (
+                final_df.shape[1] == recurse_df.shape[1] + len(columns_to_discard) - 1
+            ), f"Final DataFrame shape {final_df.shape} does not match original DataFrame shape {recurse_df.shape}"
+        # sort by the first column to get the final order
+        final_df = final_df.sort_values(by=final_df.columns.to_list(), axis=0)
+        return final_df, []
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc008/config.yaml ADDED Viewed

	@@ -0,0 +1,116 @@

+# ALE-Bench ahc008 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n\nStory\n--------\n\
+    AtCoder's CEO, Takahashi, loves animals and has a number of pets running free in the AtCoder office.\nAtCoder's employees\
+    \ have trouble with the pets interrupting their work, so they have decided to place partitions in the office to create\
+    \ a space where pets cannot come in.\nPlease create as large a space as possible.\n\nProblem Statement\n--------\nThere\
+    \ are $N$ pets and $M$ people in a room with a floor of $30 \\times 30$ squares.\nAll squares are initially passable,\
+    \ and outside of the $30 \\times 30$ squares are impassable.\nLet $(x, y)$ be the coordinates of the square in row $x$\
+    \ from the top ($1\\leq x\\leq 30$) and column $y$ from the left ($1\\leq y\\leq 30$).\nRepeat the following process for\
+    \ $300$ turns.\n\nFirst, you choose each person's action from the following three types, and perform each action simultaneously.\n\
+    \n- Do nothing and stay in the current position.\n- Choose a square adjacent to the current position and make it impassable.\
+    \ You cannot choose a square that contains pets or humans at the start of this turn. <b>You cannot choose a square whose\
+    \ adjacent square contains a pet, either.</b> If you choose a square that is already impassable, nothing happens.\n- Move\
+    \ to an adjacent passable square. It is not possible to move to a square that becomes impassable by another person's action\
+    \ in this turn.\n\nAfter all the people have completed their actions for that turn, each pet moves independently.\nRules\
+    \ for pet movement depend on the type of pet, and some pets may move multiple squares in a single turn.\nDetails are described\
+    \ later.\n\nSquares containing humans or pets are also passable, and each square can contain any number of humans and\
+    \ pets.\n\n\nScoring\n--------\nAt the end of $300$ turn, for each $i=1,\\cdots,M$, let $R_i$ be the set of squares reachable\
+    \ from the final position of person $i$ through only passable squares, and $n_i$ be the number of pets whose final position\
+    \ is in $R_i$.\nThen, person $i$ obtains satisfaction of $s_i=\\frac{|R_i|}{900}\\times 2^{-n_i}$.\nThe score for the\
+    \ test case is $\\mathrm{round}\\left(10^8\\times\\frac{1}{M}\\sum_{i=1}^M s_i\\right)$.\n\n#### Number of test cases\n\
+    - Provisional test: 100\n- System test: 2000. We will publish <a href=\"https://img.atcoder.jp/ahc008/seeds.txt\">seeds.txt</a>\
+    \ (md5=27bf0702bbe0265900374c3b6b9846b4, sha256=33973e4ded08e3a607fc2e841e14751ff110ae10154b286e7fd5f766ff86d706) after\
+    \ the contest is over.\n\nThe score of a submission is the total scores for each test case.\nIn the provisional test,\
+    \ if your submission produces illegal output or exceeds the time limit for some test cases, the submission itself will\
+    \ be judged as <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Wrong Answer\">WA</span>\
+    \ or <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\">TLE</span>\
+    \ , and the score of the submission will be zero.\nIn the system test, if your submission produces illegal output or exceeds\
+    \ the time limit for some test cases, only the score for those test cases will be zero.\nNote that if your program terminates\
+    \ abnormally, it may be judged as <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"\
+    Wrong Answer\">WA</span> instead of <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"\
+    Runtime Error\">RE</span>.\n\n#### About execution time\nExecution time may vary slightly from run to run.\nIn addition,\
+    \ since system tests simultaneously perform a large number of executions, it has been observed that execution time increases\
+    \ by several percent compared to provisional tests.\nFor these reasons, submissions that are very close to the time limit\
+    \ may result in <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\"\
+    >TLE</span> in the system test.\nPlease measure the execution time in your program to terminate the process, or have enough\
+    \ margin in the execution time.\n\n\nInput and Output\n--------\nFirst, the initial position and type of each pet, and\
+    \ the initial position of each person are given from Standard Input in the following format\n~~~\n$N$\n$px_1$ $py_1$ $pt_1$\n\
+    $\\vdots$\n$px_N$ $py_N$ $pt_N$\n$M$\n$hx_1$ $hy_1$\n$\\vdots$\n$hx_M$ $hy_M$\n~~~\n$N$ is an integer between $10$ and\
+    \ $20$ representing the number of pets.\n$(px_i,py_i)$ represents the coordinates of the initial position of the $i$-th\
+    \ pet, and $pt_i$ is an integer between $1$ and $5$ representing the type of the $i$-th pet.\n$M$ is an integer between\
+    \ $5$ and $10$ representing the number of humans.\n$(hx_i,hy_i)$ represents the coordinates of the initial position of\
+    \ the $i$-th human.\nThe initial positions of all pets and humans are guaranteed to be distinct.\n\nAfter reading the\
+    \ above information, repeat the following process $300$ turns.\n\nFirst, output a string of length $M$ where the $i$-th\
+    \ character represents the action of the $i$th person as follows on a single line to Standard Output.\n<font color=\"\
+    red\">**After the output, you have to flush Standard Output.**</font> Otherwise, the submission might be judged as <span\
+    \ class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\">TLE</span> .\n\n\
+    - `.`: Do nothing and stay in the current position.\n- `u`, `d`, `l`, `r`: Let $(x,y)$ be the current position. Make the\
+    \ square $(x-1,y)$, $(x+1,y)$, $(x,y-1)$, or $(x,y+1)$ impassable, respectively.\n- `U`, `D`, `L`, `R`: Let $(x,y)$ be\
+    \ the current position. Move to the the square $(x-1,y)$, $(x+1,y)$, $(x,y-1)$, or $(x,y+1)$, respectively.\n\nAfter the\
+    \ output, $N$ strings are given to Standard Input in a single line, separated by spaces.\nThe $i$-th string represents\
+    \ movement of the $i$-th pet in that turn.\nIf the pet does not move, the string is `.`.\nIf it does move, the string\
+    \ is a sequence of characters `U`, `D`, `L`, and `R` representing the movement of one square up, down, left, and right,\
+    \ respectively.\n\n<a href=\"https://img.atcoder.jp/ahc008/f828b9475ffb41d54f05619db6ccbd4f.html?lang=en&show=example\"\
+    >Show example</a>\n\n\nPets Movement Rules\n--------\nWe define a basic move as follows: move to a square chosen at random\
+    \ among the adjacent passable squares. From the condition of the squares that can be made impassable, such squares always\
+    \ exist.\n\nEach pet $i$ performs the following moves depending on $pt_i$, an integer value between $1$ and $5$ representing\
+    \ its type.\n\n1. <img src=\"./images/cow.png\" width=\"30\" height=\"30\" style=\"background-color:silver;image-rendering:pixelated\"\
+    > Cow: Perform one basic move.\n2. <img src=\"./images/pig.png\" width=\"30\" height=\"30\" style=\"background-color:silver;image-rendering:pixelated\"\
+    > Pig: Perform two basic moves.\n3. <img src=\"./images/rabbit.png\" width=\"30\" height=\"30\" style=\"background-color:silver;image-rendering:pixelated\"\
+    > Rabbit: Perform three basic moves.\n4. <img src=\"./images/dog.png\" width=\"30\" height=\"30\" style=\"background-color:silver;image-rendering:pixelated\"\
+    > Dog: Move toward a target person as follows. The first turn starts with no target. If it has no target, the target person\
+    \ is in the current position, or there exists no path to the target person, then it selects one person uniformly at random\
+    \ among those reachable from the current position, excluding those in the current position. If there is no such person,\
+    \ reset to no target and perform one basic move. Otherwise, move to an adjacent passable square that shortens the shortest\
+    \ distance to the target person (if there are multiple such squares, choose one of them uniformly at random), and then\
+    \ perform one basic move. If it reaches the destination after the first or the second move, reset to no target.\n5. <img\
+    \ src=\"./images/cat.png\" width=\"30\" height=\"30\" style=\"background-color:silver;image-rendering:pixelated\"> Cat:\
+    \ Move toward a target square as follows. The first turn starts with no target. If it has no target or there exists no\
+    \ path to the target square, then it selects one square uniformly at random among those reachable from the current position,\
+    \ excluding the current position. If there exists no such square, do nothing. Otherwise, move to an adjacent passable\
+    \ square that shortens the shortest distance to the target square (if there are multiple such squares, choose one of them\
+    \ uniformly at random), and then perform one basic move. If it reaches the destination after the first or the second move,\
+    \ reset to no target.\n\n\nInput Generation\n--------\nLet $\\mathrm{rand}(L,U)$ be a function that generates a uniform\
+    \ random integer between $L$ and $U$, inclusive.\n\nWe generate the number of pets by $N=\\mathrm{rand}(10, 20)$.\nThe\
+    \ initial position of each pet is chosen uniformly at random from the coordinates that have not been chosen yet.\nWe generate\
+    \ the type of each pet by $pt_i=\\mathrm{rand}(1, 5)$.\n\nWe generate the number of humans by $M=\\mathrm{rand}(5, 10)$.\n\
+    The initial position of each human is chosen uniformly at random from the coordinates that have not been chosen yet.\n\
+    \n\nTools\n--------\n- <a href=\"https://img.atcoder.jp/ahc008/tools_v3.zip\">Local tester</a>: You need a compilation\
+    \ environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n  - For those who are not familiar with the\
+    \ Rust language environment, we have prepared a pre-compiled binary for Windows. <a href=\"https://img.atcoder.jp/ahc008/tools_x86_64-pc-windows-gnu_v3.zip\"\
+    >tools_x86_64-pc-windows-gnu.zip</a>\n  - <font color=\"red\">The first version contained a bug in the cat's movement,\
+    \ which has been fixed at 130 minutes after the contest started. Please re-download it.</font>\n  - We have added more\
+    \ examples in README. If you don't know how to use the tools, please refer to README. Also, as stated in the rules, you\
+    \ are free to share information on how to run the provided tools.\n- <a href=\"https://img.atcoder.jp/ahc008/f828b9475ffb41d54f05619db6ccbd4f.html?lang=en\"\
+    >Web visualizer</a>: By pasting the output generated by the local tester into the Output field, you can display the animation\
+    \ of the execution result.\n\n<font color=\"red\">You are allowed to share output images (png or gif) of the provided\
+    \ visualizer for seed=0 on twitter during the contest.</font> You have to use the specified hashtag and public account.\
+    \ You can only share visualization results and scores for seed=0. Do not share scores for other seeds or mention solutions\
+    \ or discussions. <a href=\"https://twitter.com/search?q=%23AHC008%20%23visualizer&src=typed_query&f=live\">List of shared\
+    \ images.</a>\n\n#### Specification of input/output files used by the tools\nInput files for the local tester consist\
+    \ of the prior information (the initial position and type of each pet, and the initial position of each person) followed\
+    \ by a random seed value to generate pet movements.\nSince the pet's movement depends on human actions, the input file\
+    \ contains only the random seed value and not specific movements.\nThe local tester writes outputs from your program directly\
+    \ to the output file.\nYour program may output comment lines starting with `#`.\nThe web version of the visualizer displays\
+    \ the comment lines at the time they are output, which may be useful for debugging and analysis.\nSince the judge program\
+    \ ignores all comment lines, you can submit a program that outputs comment lines as is.\n\nProblem constraints:\ntime_limit=3.0\
+    \ memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc008/initial_program.cpp ADDED Viewed

	@@ -0,0 +1,508 @@

+# EVOLVE-BLOCK-START
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+// #include <map>
+// #include <set>
+#include <queue>
+#include <cmath>
+#include <iomanip>
+#include <limits>
+// --- Constants ---
+constexpr int GRID_SIZE = 30;
+constexpr int NUM_TURNS = 300;
+constexpr int INF = std::numeric_limits<int>::max();
+struct Point {
+    int r, c;
+    bool operator==(const Point& other) const { return r == other.r && c == other.c; }
+    bool operator!=(const Point& other) const { return !(*this == other); }
+    bool operator<(const Point& other) const {
+        if (r != other.r) return r < other.r;
+        return c < other.c;
+    }
+};
+const Point INVALID_POINT = {-1, -1};
+// Tunable parameters
+constexpr int STAND_OUTSIDE_INNER_SAFE_PENALTY = 1000;
+constexpr int ADJACENT_WALL_PRIORITY_BONUS = 0;
+constexpr int NEAR_PET_PENALTY_POINTS_PER_PET = 0;
+constexpr int NEAR_PET_RADIUS = 2;
+constexpr int MAX_STUCK_TURNS = 10; // Slightly increased
+// Directions: Up, Down, Left, Right (indices 0, 1, 2, 3)
+const Point DIRS[4] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
+const char DIR_CHARS_BUILD[4] = {'u', 'd', 'l', 'r'};
+const char DIR_CHARS_MOVE[4] = {'U', 'D', 'L', 'R'};
+const char PET_MOVE_CHARS[4] = {'U', 'D', 'L', 'R'};
+struct PetInfo {
+    Point pos;
+    int type;
+    int id;
+};
+enum class HumanObjective {
+    BUILDING_WALLS,
+    GOING_TO_SAFE_SPOT,
+    STAYING_IN_SAFE_SPOT,
+    REPOSITIONING_STUCK
+    // FLEEING_PET_IN_PEN removed, simplified objective setting
+};
+struct HumanInfo {
+    Point pos;
+    int id;
+    int strip_r_start;
+    int strip_r_end;
+    Point inner_safe_ul;
+    Point inner_safe_br;
+    Point final_stand_pos;
+    std::vector<Point> assigned_wall_cells;
+    HumanObjective objective;
+    int turns_stuck_building = 0;
+};
+// --- Game Grid and State ---
+bool is_impassable_grid_static[GRID_SIZE + 1][GRID_SIZE + 1];
+std::vector<PetInfo> pets_global_state;
+std::vector<HumanInfo> humans_global_state;
+int N_pets_global, M_humans_global;
+Point bfs_parent_grid[GRID_SIZE + 1][GRID_SIZE + 1];
+bool bfs_visited_grid[GRID_SIZE + 1][GRID_SIZE + 1];
+// --- Utility Functions ---
+bool is_valid_coord(int val) {
+    return val >= 1 && val <= GRID_SIZE;
+}
+bool is_valid_point(Point p) {
+    return is_valid_coord(p.r) && is_valid_coord(p.c);
+}
+int manhattan_distance(Point p1, Point p2) {
+    if (!is_valid_point(p1) || !is_valid_point(p2)) return INF;
+    return std::abs(p1.r - p2.r) + std::abs(p1.c - p2.c);
+}
+int count_adjacent_walls_or_boundaries(Point p) {
+    int count = 0;
+    for (int i = 0; i < 4; ++i) {
+        Point neighbor = {p.r + DIRS[i].r, p.c + DIRS[i].c};
+        if (!is_valid_point(neighbor) || (is_valid_point(neighbor) && is_impassable_grid_static[neighbor.r][neighbor.c])) {
+            count++;
+        }
+    }
+    return count;
+}
+bool can_theoretically_build_at(Point wall_pos, int builder_human_id) {
+    if (!is_valid_point(wall_pos)) return false;
+    if (is_impassable_grid_static[wall_pos.r][wall_pos.c]) return false;
+    for (const auto& pet : pets_global_state) {
+        if (pet.pos == wall_pos) return false;
+        if (manhattan_distance(wall_pos, pet.pos) == 1) return false;
+    }
+    for (const auto& human : humans_global_state) {
+        if (human.id == builder_human_id) continue; // Builder themself can be adjacent
+        if (human.pos == wall_pos) return false; // Other human on the wall_pos
+    }
+    return true;
+}
+char get_bfs_move_char(Point start_pos, Point target_pos,
+                       const std::vector<Point>& current_turn_tentative_walls) {
+    if (start_pos == target_pos) return '.';
+    std::queue<Point> q;
+    q.push(start_pos);
+    for(int r_bfs = 1; r_bfs <= GRID_SIZE; ++r_bfs) for(int c_bfs = 1; c_bfs <= GRID_SIZE; ++c_bfs) {
+        bfs_visited_grid[r_bfs][c_bfs] = false;
+        bfs_parent_grid[r_bfs][c_bfs] = INVALID_POINT;
+    }
+    if (!is_valid_point(start_pos)) return '.';
+    bfs_visited_grid[start_pos.r][start_pos.c] = true;
+    Point path_found_dest = INVALID_POINT;
+    while(!q.empty()){
+        Point curr = q.front();
+        q.pop();
+        for(int i_dir=0; i_dir < 4; ++i_dir){
+            Point next_p = {curr.r + DIRS[i_dir].r, curr.c + DIRS[i_dir].c};
+            if(is_valid_point(next_p) &&
+               !is_impassable_grid_static[next_p.r][next_p.c] &&
+               !bfs_visited_grid[next_p.r][next_p.c]){
+                bool is_tentative_wall_conflict = false;
+                for(const auto& tw : current_turn_tentative_walls) {
+                    if(next_p == tw) {
+                        is_tentative_wall_conflict = true;
+                        break;
+                    }
+                }
+                if(is_tentative_wall_conflict) continue;
+                bfs_visited_grid[next_p.r][next_p.c] = true;
+                bfs_parent_grid[next_p.r][next_p.c] = curr;
+                if (next_p == target_pos) {
+                    path_found_dest = next_p;
+                    goto bfs_done_label;
+                }
+                q.push(next_p);
+            }
+        }
+    }
+bfs_done_label:;
+    if (path_found_dest.r == -1) return '.';
+    Point current_step_in_path = path_found_dest;
+    while(!(bfs_parent_grid[current_step_in_path.r][current_step_in_path.c] == INVALID_POINT) &&
+          !(bfs_parent_grid[current_step_in_path.r][current_step_in_path.c] == start_pos)) {
+        current_step_in_path = bfs_parent_grid[current_step_in_path.r][current_step_in_path.c];
+    }
+    for(int i_dir = 0; i_dir < 4; ++i_dir){
+        if(start_pos.r + DIRS[i_dir].r == current_step_in_path.r &&
+           start_pos.c + DIRS[i_dir].c == current_step_in_path.c){
+            return DIR_CHARS_MOVE[i_dir];
+        }
+    }
+    return '.';
+}
+void initialize_game() {
+    std::cin >> N_pets_global;
+    pets_global_state.resize(N_pets_global);
+    for (int i = 0; i < N_pets_global; ++i) {
+        pets_global_state[i].id = i;
+        std::cin >> pets_global_state[i].pos.r >> pets_global_state[i].pos.c >> pets_global_state[i].type;
+    }
+    std::cin >> M_humans_global;
+    humans_global_state.resize(M_humans_global);
+    for(int r_grid=0; r_grid <= GRID_SIZE; ++r_grid) for(int c_grid=0; c_grid <= GRID_SIZE; ++c_grid) is_impassable_grid_static[r_grid][c_grid] = false;
+    int base_strip_height = GRID_SIZE / M_humans_global;
+    int remainder_heights = GRID_SIZE % M_humans_global;
+    int current_r_start_coord = 1;
+    for (int i = 0; i < M_humans_global; ++i) {
+        HumanInfo& human = humans_global_state[i];
+        human.id = i;
+        std::cin >> human.pos.r >> human.pos.c;
+        int strip_h_for_this_human = base_strip_height + (i < remainder_heights ? 1 : 0);
+        human.strip_r_start = current_r_start_coord;
+        human.strip_r_end = human.strip_r_start + strip_h_for_this_human - 1;
+        human.strip_r_end = std::min(human.strip_r_end, GRID_SIZE);
+        int actual_strip_h = human.strip_r_end - human.strip_r_start + 1;
+        int actual_strip_w = GRID_SIZE;
+        human.inner_safe_ul.r = human.strip_r_start + (actual_strip_h >= 3 ? 1 : 0);
+        human.inner_safe_ul.c = 1 + (actual_strip_w >= 3 ? 1 : 0);
+        human.inner_safe_br.r = human.strip_r_end - (actual_strip_h >= 3 ? 1 : 0);
+        human.inner_safe_br.c = GRID_SIZE - (actual_strip_w >= 3 ? 1 : 0);
+        if (human.inner_safe_ul.r > human.inner_safe_br.r) human.inner_safe_br.r = human.inner_safe_ul.r;
+        if (human.inner_safe_ul.c > human.inner_safe_br.c) human.inner_safe_br.c = human.inner_safe_ul.c;
+        human.final_stand_pos = {
+            human.inner_safe_ul.r + (human.inner_safe_br.r - human.inner_safe_ul.r) / 2,
+            human.inner_safe_ul.c + (human.inner_safe_br.c - human.inner_safe_ul.c) / 2
+        };
+        human.final_stand_pos.r = std::max(human.inner_safe_ul.r, std::min(human.inner_safe_br.r, human.final_stand_pos.r));
+        human.final_stand_pos.c = std::max(human.inner_safe_ul.c, std::min(human.inner_safe_br.c, human.final_stand_pos.c));
+        if (!is_valid_point(human.final_stand_pos)) {
+            human.final_stand_pos = {human.strip_r_start, 1};
+        }
+        human.assigned_wall_cells.clear();
+        int r_s = human.strip_r_start;
+        int r_e = human.strip_r_end;
+        if (i == 0) {
+            for (int c_coord = 1; c_coord <= GRID_SIZE; ++c_coord) human.assigned_wall_cells.push_back({r_s, c_coord});
+        } else {
+            for (int c_coord = GRID_SIZE / 2 + 1; c_coord <= GRID_SIZE; ++c_coord) human.assigned_wall_cells.push_back({r_s, c_coord});
+        }
+        if (i == M_humans_global - 1) {
+            for (int c_coord = 1; c_coord <= GRID_SIZE; ++c_coord) human.assigned_wall_cells.push_back({r_e, c_coord});
+        } else {
+            for (int c_coord = 1; c_coord <= GRID_SIZE / 2; ++c_coord) human.assigned_wall_cells.push_back({r_e, c_coord});
+        }
+        for (int r_mid = r_s + 1; r_mid <= r_e - 1; ++r_mid) {
+             human.assigned_wall_cells.push_back({r_mid, 1});
+             human.assigned_wall_cells.push_back({r_mid, GRID_SIZE});
+        }
+        std::sort(human.assigned_wall_cells.begin(), human.assigned_wall_cells.end());
+        human.assigned_wall_cells.erase(
+            std::unique(human.assigned_wall_cells.begin(), human.assigned_wall_cells.end()),
+            human.assigned_wall_cells.end()
+        );
+        current_r_start_coord = human.strip_r_end + 1;
+    }
+}
+std::string decide_human_actions() {
+    std::string actions_str(M_humans_global, '.');
+    std::vector<Point> tentative_walls_this_turn;
+    std::vector<Point> tentative_move_targets_this_turn(M_humans_global, INVALID_POINT);
+    for (int i = 0; i < M_humans_global; ++i) {
+        HumanInfo& human = humans_global_state[i];
+        int unbuilt_walls_count = 0;
+        for (const auto& wall_cell : human.assigned_wall_cells) {
+            if (is_valid_point(wall_cell) && !is_impassable_grid_static[wall_cell.r][wall_cell.c]) {
+                unbuilt_walls_count++;
+            }
+        }
+        if (unbuilt_walls_count == 0) {
+             human.objective = (human.pos == human.final_stand_pos) ?
+                              HumanObjective::STAYING_IN_SAFE_SPOT :
+                              HumanObjective::GOING_TO_SAFE_SPOT;
+        } else {
+            human.objective = HumanObjective::BUILDING_WALLS;
+        }
+        if(human.objective == HumanObjective::BUILDING_WALLS && human.turns_stuck_building >= MAX_STUCK_TURNS) {
+            human.objective = HumanObjective::REPOSITIONING_STUCK;
+        }
+        char chosen_action_for_human_i = '.';
+        if (human.objective == HumanObjective::STAYING_IN_SAFE_SPOT) {
+            chosen_action_for_human_i = '.';
+        } else if (human.objective == HumanObjective::GOING_TO_SAFE_SPOT ||
+                   human.objective == HumanObjective::REPOSITIONING_STUCK) {
+            if(human.objective == HumanObjective::REPOSITIONING_STUCK) human.turns_stuck_building = 0;
+            chosen_action_for_human_i = get_bfs_move_char(human.pos, human.final_stand_pos, tentative_walls_this_turn);
+        } else if (human.objective == HumanObjective::BUILDING_WALLS) {
+            Point best_wall_target = INVALID_POINT;
+            Point best_stand_point = INVALID_POINT;
+            int min_eval_score = INF;
+            for (const auto& wall_coord : human.assigned_wall_cells) {
+                if (!is_valid_point(wall_coord) || is_impassable_grid_static[wall_coord.r][wall_coord.c]) continue;
+                if (!can_theoretically_build_at(wall_coord, human.id)) continue;
+                int adj_wall_bonus_val = count_adjacent_walls_or_boundaries(wall_coord) * ADJACENT_WALL_PRIORITY_BONUS;
+                int current_near_pet_penalty = 0; // NEAR_PET_PENALTY_POINTS_PER_PET is 0
+                for (int k_dir_idx = 0; k_dir_idx < 4; ++k_dir_idx) {
+                    Point potential_stand_pos = {wall_coord.r + DIRS[k_dir_idx].r,
+                                                 wall_coord.c + DIRS[k_dir_idx].c};
+                    if (!is_valid_point(potential_stand_pos) || is_impassable_grid_static[potential_stand_pos.r][potential_stand_pos.c]) continue;
+                    bool conflict_with_tentative_wall_build_spot = false;
+                    for(const auto& tw : tentative_walls_this_turn) { if(potential_stand_pos == tw) { conflict_with_tentative_wall_build_spot = true; break; }}
+                    if(conflict_with_tentative_wall_build_spot) continue;
+                    bool conflict_with_tentative_move_dest = false;
+                    for(int j=0; j < i; ++j) {
+                        if (tentative_move_targets_this_turn[j] == potential_stand_pos) { conflict_with_tentative_move_dest = true; break; }
+                    }
+                    if (conflict_with_tentative_move_dest) continue;
+                    int current_dist_to_stand = manhattan_distance(human.pos, potential_stand_pos);
+                    int current_eval_score = current_dist_to_stand - adj_wall_bonus_val + current_near_pet_penalty;
+                    bool is_inside_inner_safe_region =
+                        (potential_stand_pos.r >= human.inner_safe_ul.r &&
+                         potential_stand_pos.r <= human.inner_safe_br.r &&
+                         potential_stand_pos.c >= human.inner_safe_ul.c &&
+                         potential_stand_pos.c <= human.inner_safe_br.c);
+                    if (!is_inside_inner_safe_region) {
+                        current_eval_score += STAND_OUTSIDE_INNER_SAFE_PENALTY;
+                    }
+                    if (current_eval_score < min_eval_score) {
+                        min_eval_score = current_eval_score;
+                        best_wall_target = wall_coord;
+                        best_stand_point = potential_stand_pos;
+                    } else if (current_eval_score == min_eval_score) {
+                        if (best_wall_target.r == -1 ||
+                            wall_coord < best_wall_target ||
+                            (wall_coord == best_wall_target && potential_stand_pos < best_stand_point)) {
+                            best_wall_target = wall_coord;
+                            best_stand_point = potential_stand_pos;
+                        }
+                    }
+                }
+            }
+            if (best_wall_target.r != -1) {
+                human.turns_stuck_building = 0;
+                if (human.pos == best_stand_point) {
+                    for(int k_dir=0; k_dir<4; ++k_dir){
+                        if(human.pos.r + DIRS[k_dir].r == best_wall_target.r &&
+                           human.pos.c + DIRS[k_dir].c == best_wall_target.c){
+                            chosen_action_for_human_i = DIR_CHARS_BUILD[k_dir];
+                            break;
+                        }
+                    }
+                } else {
+                    chosen_action_for_human_i = get_bfs_move_char(human.pos, best_stand_point, tentative_walls_this_turn);
+                }
+            } else {
+                if (unbuilt_walls_count > 0) {
+                    human.turns_stuck_building++;
+                }
+                if (human.pos != human.final_stand_pos) {
+                    chosen_action_for_human_i = get_bfs_move_char(human.pos, human.final_stand_pos, tentative_walls_this_turn);
+                } else {
+                    chosen_action_for_human_i = '.';
+                }
+            }
+        }
+        actions_str[i] = chosen_action_for_human_i;
+        if (chosen_action_for_human_i != '.' && (chosen_action_for_human_i == 'u' || chosen_action_for_human_i == 'd' || chosen_action_for_human_i == 'l' || chosen_action_for_human_i == 'r')) {
+            for(int k_dir=0; k_dir<4; ++k_dir) {
+                if (chosen_action_for_human_i == DIR_CHARS_BUILD[k_dir]) {
+                    Point built_wall_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
+                    if (is_valid_point(built_wall_pos)) {
+                        tentative_walls_this_turn.push_back(built_wall_pos);
+                    }
+                    break;
+                }
+            }
+        } else if (chosen_action_for_human_i != '.' && (chosen_action_for_human_i == 'U' || chosen_action_for_human_i == 'D' || chosen_action_for_human_i == 'L' || chosen_action_for_human_i == 'R')) {
+            for(int k_dir=0; k_dir<4; ++k_dir) {
+                if (chosen_action_for_human_i == DIR_CHARS_MOVE[k_dir]) {
+                    Point target_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
+                     if (is_valid_point(target_pos)) {
+                        tentative_move_targets_this_turn[i] = target_pos;
+                     } else {
+                        actions_str[i] = '.';
+                     }
+                    break;
+                }
+            }
+        }
+    }
+    for (int i = 0; i < M_humans_global; ++i) {
+        if (actions_str[i] != '.' && (actions_str[i] == 'U' || actions_str[i] == 'D' || actions_str[i] == 'L' || actions_str[i] == 'R')) {
+            Point target_move_sq = tentative_move_targets_this_turn[i];
+            if (target_move_sq.r == -1) {
+                actions_str[i] = '.';
+                continue;
+            }
+            bool conflict_with_wall = false;
+            for (const auto& wall_being_built : tentative_walls_this_turn) {
+                if (target_move_sq == wall_being_built) {
+                    conflict_with_wall = true;
+                    break;
+                }
+            }
+            if (conflict_with_wall) {
+                actions_str[i] = '.';
+            } else {
+                for (int j = 0; j < i; ++j) {
+                    if (actions_str[j] != '.' && (actions_str[j] == 'U' || actions_str[j] == 'D' || actions_str[j] == 'L' || actions_str[j] == 'R') &&
+                        tentative_move_targets_this_turn[j] == target_move_sq) {
+                        actions_str[i] = '.';
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    return actions_str;
+}
+void apply_actions_and_update_state(const std::string& actions_str_final) {
+    for (int i = 0; i < M_humans_global; ++i) {
+        char action = actions_str_final[i];
+        HumanInfo& human = humans_global_state[i];
+        if (action != '.' && (action == 'u' || action == 'd' || action == 'l' || action == 'r')) {
+            for(int k_dir=0; k_dir<4; ++k_dir){
+                if (action == DIR_CHARS_BUILD[k_dir]) {
+                    Point wall_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
+                    if (is_valid_point(wall_pos) && !is_impassable_grid_static[wall_pos.r][wall_pos.c]) {
+                        is_impassable_grid_static[wall_pos.r][wall_pos.c] = true;
+                    }
+                    break;
+                }
+            }
+        }
+    }
+    for (int i = 0; i < M_humans_global; ++i) {
+        char action = actions_str_final[i];
+        HumanInfo& human = humans_global_state[i];
+        if (action != '.' && (action == 'U' || action == 'D' || action == 'L' || action == 'R')) {
+            for(int k_dir=0; k_dir<4; ++k_dir){
+                 if (action == DIR_CHARS_MOVE[k_dir]) {
+                    Point next_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
+                    if (is_valid_point(next_pos) && !is_impassable_grid_static[next_pos.r][next_pos.c]) {
+                         human.pos = next_pos;
+                    }
+                    break;
+                }
+            }
+        }
+    }
+    for (int i = 0; i < N_pets_global; ++i) {
+        std::string pet_moves_str;
+        std::cin >> pet_moves_str;
+        if (pet_moves_str == ".") continue;
+        for (char move_char : pet_moves_str) {
+            for(int k_dir=0; k_dir<4; ++k_dir){
+                if(move_char == PET_MOVE_CHARS[k_dir]){
+                    pets_global_state[i].pos.r += DIRS[k_dir].r;
+                    pets_global_state[i].pos.c += DIRS[k_dir].c;
+                    break;
+                }
+            }
+        }
+    }
+}
+int main() {
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    initialize_game();
+    for (int turn_idx = 0; turn_idx < NUM_TURNS; ++turn_idx) {
+        std::string actions_to_perform = decide_human_actions();
+        std::cout << actions_to_perform << std::endl;
+        apply_actions_and_update_state(actions_to_perform);
+    }
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc011/best_program.cpp ADDED Viewed

	@@ -0,0 +1,730 @@

+# EVOLVE-BLOCK-START
+#include <iostream>
+#include <vector>
+#include <string>
+#include <array>
+#include <algorithm>
+#include <unordered_map>
+#include <map> // For A* visited set
+#include <iomanip>
+#include <chrono>
+#include <functional> // For std::hash
+#include <cmath>      // For std::round
+#include <random>     // For std::mt19937
+#include <numeric>    // For std::iota
+#include <queue>      // For A* search (priority_queue)
+// Constants for tile connections
+const int LEFT_MASK = 1;
+const int UP_MASK = 2;
+const int RIGHT_MASK = 4;
+const int DOWN_MASK = 8;
+// Max N value, actual N read from input
+const int N_MAX_CONST = 10;
+int N_actual; // Actual N for the current test case
+int T_param;  // Actual T for the current test case
+const int DR_TILE_RELATIVE_TO_EMPTY[] = {-1, 1, 0, 0};
+const int DC_TILE_RELATIVE_TO_EMPTY[] = {0, 0, -1, 1};
+const char MOVE_CHARS[] = {'U', 'D', 'L', 'R'};
+std::mt19937 zobrist_rng_engine(123456789);
+std::uniform_int_distribution<uint64_t> distrib_uint64;
+uint64_t zobrist_tile_keys[N_MAX_CONST][N_MAX_CONST][16];
+// Fast hex char -> int lookup
+int CHAR_TO_VAL[256];
+inline void init_char_to_val() {
+    for (int i = 0; i < 256; ++i) CHAR_TO_VAL[i] = 0;
+    for (int d = 0; d <= 9; ++d) CHAR_TO_VAL['0' + d] = d;
+    for (int d = 0; d < 6; ++d) {
+        CHAR_TO_VAL['a' + d] = 10 + d;
+        CHAR_TO_VAL['A' + d] = 10 + d;
+    }
+}
+void init_zobrist_keys() {
+    for (int i = 0; i < N_actual; ++i) {
+        for (int j = 0; j < N_actual; ++j) {
+            for (int k = 0; k < 16; ++k) {
+                zobrist_tile_keys[i][j][k] = distrib_uint64(zobrist_rng_engine);
+            }
+        }
+    }
+}
+int hex_char_to_int(char c) {
+    if (c >= '0' && c <= '9') return c - '0';
+    return c - 'a' + 10;
+}
+struct Board {
+    std::array<std::array<char, N_MAX_CONST>, N_MAX_CONST> tiles;
+    int empty_r, empty_c;
+    uint64_t zobrist_hash_value;
+    Board() : empty_r(0), empty_c(0), zobrist_hash_value(0) {}
+    void calculate_initial_hash() {
+        zobrist_hash_value = 0;
+        for (int i = 0; i < N_actual; ++i) {
+            for (int j = 0; j < N_actual; ++j) {
+                zobrist_hash_value ^= zobrist_tile_keys[i][j][CHAR_TO_VAL[(unsigned char)tiles[i][j]]];
+            }
+        }
+    }
+    void update_hash_after_move(int pos_tile_becomes_empty_r, int pos_tile_becomes_empty_c,
+                                int pos_empty_gets_tile_r, int pos_empty_gets_tile_c) {
+        int moved_tile_val_int = hex_char_to_int(tiles[pos_empty_gets_tile_r][pos_empty_gets_tile_c]);
+        zobrist_hash_value ^= zobrist_tile_keys[pos_tile_becomes_empty_r][pos_tile_becomes_empty_c][moved_tile_val_int];
+        zobrist_hash_value ^= zobrist_tile_keys[pos_empty_gets_tile_r][pos_empty_gets_tile_c][0];
+        zobrist_hash_value ^= zobrist_tile_keys[pos_tile_becomes_empty_r][pos_tile_becomes_empty_c][0];
+        zobrist_hash_value ^= zobrist_tile_keys[pos_empty_gets_tile_r][pos_empty_gets_tile_c][moved_tile_val_int];
+    }
+    bool apply_move_char(char move_char) {
+        int move_dir_idx = -1;
+        for(int i=0; i<4; ++i) if(MOVE_CHARS[i] == move_char) move_dir_idx = i;
+        if(move_dir_idx == -1) return false;
+        int tile_to_move_r = empty_r + DR_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
+        int tile_to_move_c = empty_c + DC_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
+        if (tile_to_move_r < 0 || tile_to_move_r >= N_actual || tile_to_move_c < 0 || tile_to_move_c >= N_actual) {
+            return false;
+        }
+        char moved_tile_hex_val = tiles[tile_to_move_r][tile_to_move_c];
+        tiles[empty_r][empty_c] = moved_tile_hex_val;
+        tiles[tile_to_move_r][tile_to_move_c] = '0';
+        update_hash_after_move(tile_to_move_r, tile_to_move_c, empty_r, empty_c);
+        empty_r = tile_to_move_r;
+        empty_c = tile_to_move_c;
+        return true;
+    }
+};
+struct ScoreComponents {
+    int max_tree_size;
+    int num_components;
+};
+std::unordered_map<uint64_t, ScoreComponents> s_value_cache_by_hash;
+const size_t MAX_SCORE_CACHE_SIZE_CONST = 2000000;
+struct DSU {
+    std::vector<int> parent;
+    std::vector<int> nodes_in_set;
+    std::vector<int> edges_in_set;
+    int N_sq_total_cells;
+    DSU(int current_N) : N_sq_total_cells(current_N * current_N) {
+        parent.resize(N_sq_total_cells);
+        std::iota(parent.begin(), parent.end(), 0);
+        nodes_in_set.assign(N_sq_total_cells, 0);
+        edges_in_set.assign(N_sq_total_cells, 0);
+    }
+    int find(int i) {
+        if (parent[i] == i)
+            return i;
+        return parent[i] = find(parent[i]);
+    }
+    void unite(int i_idx, int j_idx) {
+        int root_i = find(i_idx);
+        int root_j = find(j_idx);
+        if (nodes_in_set[root_i] < nodes_in_set[root_j]) std::swap(root_i, root_j);
+        parent[root_j] = root_i;
+        nodes_in_set[root_i] += nodes_in_set[root_j];
+        edges_in_set[root_i] += edges_in_set[root_j];
+    }
+    void add_edge(int u_idx, int v_idx) {
+        int root_u = find(u_idx);
+        int root_v = find(v_idx);
+        if (root_u != root_v) {
+            unite(u_idx, v_idx);
+            edges_in_set[find(u_idx)]++;
+        } else {
+            edges_in_set[root_u]++;
+        }
+    }
+};
+ScoreComponents calculate_scores(const Board& board) {
+    auto it_cache = s_value_cache_by_hash.find(board.zobrist_hash_value);
+    if (it_cache != s_value_cache_by_hash.end()) {
+        return it_cache->second;
+    }
+    DSU dsu(N_actual);
+    for (int r = 0; r < N_actual; ++r) {
+        for (int c = 0; c < N_actual; ++c) {
+            int cell_idx = r * N_actual + c;
+            if (board.tiles[r][c] != '0') {
+                dsu.nodes_in_set[cell_idx] = 1;
+            } else {
+                dsu.nodes_in_set[cell_idx] = 0;
+            }
+        }
+    }
+    for (int r = 0; r < N_actual; ++r) {
+        for (int c = 0; c < N_actual - 1; ++c) {
+            int tile1_val = CHAR_TO_VAL[(unsigned char)board.tiles[r][c]];
+            int tile2_val = CHAR_TO_VAL[(unsigned char)board.tiles[r][c+1]];
+            if (tile1_val && tile2_val && (tile1_val & RIGHT_MASK) && (tile2_val & LEFT_MASK)) {
+                dsu.add_edge(r * N_actual + c, r * N_actual + (c + 1));
+            }
+        }
+    }
+    for (int r = 0; r < N_actual - 1; ++r) {
+        for (int c = 0; c < N_actual; ++c) {
+            int tile1_val = CHAR_TO_VAL[(unsigned char)board.tiles[r][c]];
+            int tile2_val = CHAR_TO_VAL[(unsigned char)board.tiles[r+1][c]];
+            if (tile1_val && tile2_val && (tile1_val & DOWN_MASK) && (tile2_val & UP_MASK)) {
+                dsu.add_edge(r * N_actual + c, (r + 1) * N_actual + c);
+            }
+        }
+    }
+    int max_tree_size = 0;
+    int total_num_components = 0;
+    for (int i = 0; i < dsu.N_sq_total_cells; ++i) {
+        if (dsu.parent[i] == i && dsu.nodes_in_set[i] > 0) {
+            total_num_components++;
+            if (dsu.edges_in_set[i] == dsu.nodes_in_set[i] - 1) {
+                if (dsu.nodes_in_set[i] > max_tree_size) {
+                    max_tree_size = dsu.nodes_in_set[i];
+                }
+            }
+        }
+    }
+    ScoreComponents result = {max_tree_size, total_num_components};
+    if (s_value_cache_by_hash.size() < MAX_SCORE_CACHE_SIZE_CONST) {
+         s_value_cache_by_hash[board.zobrist_hash_value] = result;
+    }
+    return result;
+}
+int TARGET_EMPTY_R_GLOBAL_FOR_A_STAR, TARGET_EMPTY_C_GLOBAL_FOR_A_STAR; // Used by A* heuristic
+bool A_STAR_PHASE_WAS_RUN = false; // Flag to adjust beam score empty penalty
+double calculate_beam_score(const ScoreComponents& scores, int K_total, const Board& current_board_state) {
+    int S = scores.max_tree_size;
+    const double FULL_TREE_BASE_SCORE = 1e18;
+    if (S == N_actual * N_actual - 1) {
+        return FULL_TREE_BASE_SCORE + (double)(T_param * 2 - K_total);
+    }
+    double W_S = 1e9;
+    double W_NC = W_S * 0.6; // Slightly reduce component penalty to favor growing S faster.
+    double W_K = 1.0;
+    double W_empty_dist_penalty_main;
+    if (A_STAR_PHASE_WAS_RUN) { // A* moved empty to target initially
+        W_empty_dist_penalty_main = W_K * 0.5; // Very low penalty, allow free movement
+    } else { // Empty started at target, or A* failed (should not happen)
+        W_empty_dist_penalty_main = W_K * 10.0; // Moderate penalty
+    }
+    double score_val = (double)S * W_S;
+    if (scores.num_components > 1) {
+         score_val -= (double)(scores.num_components - 1) * W_NC;
+    } else if (scores.num_components == 0 && N_actual * N_actual - 1 > 0) {
+         score_val -= (double)(N_actual * N_actual -1) * W_NC;
+    }
+    // Bonus for being very close to a full tree and connected
+    if (S >= (N_actual * N_actual - 1) - 2 && scores.num_components == 1 && S < N_actual * N_actual - 1) {
+        score_val += W_S * 0.5; // Significant bonus to encourage the last step
+    }
+    score_val -= (double)K_total * W_K;
+    // Penalty for empty square relative to (N-1,N-1)
+    int dist_empty_to_corner = std::abs(current_board_state.empty_r - (N_actual - 1)) +
+                               std::abs(current_board_state.empty_c - (N_actual - 1));
+    score_val -= dist_empty_to_corner * W_empty_dist_penalty_main;
+    return score_val;
+}
+double calculate_actual_score(int S, int K_total) {
+    if (N_actual * N_actual - 1 == 0) return 0;
+    if (S == N_actual * N_actual - 1) {
+        if (K_total > T_param) return 0;
+        return std::round(500000.0 * (2.0 - (double)K_total / T_param));
+    } else {
+        return std::round(500000.0 * (double)S / (N_actual * N_actual - 1.0));
+    }
+}
+/* Function: count_matched_edge_pair
+   Doc: Returns 1 if two adjacent cells form a valid connection (L-R or U-D), else 0.
+        Assumes (r1,c1) and (r2,c2) differ by exactly 1 in Manhattan distance.
+*/
+inline int count_matched_edge_pair(const Board& b, int r1, int c1, int r2, int c2) {
+    if (r1 == r2) {
+        if (c1 > c2) std::swap(c1, c2);
+        if (c2 != c1 + 1) return 0;
+        int v1 = CHAR_TO_VAL[(unsigned char)b.tiles[r1][c1]];
+        int v2 = CHAR_TO_VAL[(unsigned char)b.tiles[r2][c2]];
+        if (!v1 || !v2) return 0;
+        return ((v1 & RIGHT_MASK) && (v2 & LEFT_MASK)) ? 1 : 0;
+    } else if (c1 == c2) {
+        if (r1 > r2) std::swap(r1, r2);
+        if (r2 != r1 + 1) return 0;
+        int v1 = CHAR_TO_VAL[(unsigned char)b.tiles[r1][c1]];
+        int v2 = CHAR_TO_VAL[(unsigned char)b.tiles[r2][c2]];
+        if (!v1 || !v2) return 0;
+        return ((v1 & DOWN_MASK) && (v2 & UP_MASK)) ? 1 : 0;
+    }
+    return 0;
+}
+/* Function: count_cell_matched_degree
+   Doc: Counts the number of matched edges incident to a given cell (r,c).
+*/
+inline int count_cell_matched_degree(const Board& b, int r, int c) {
+    int deg = 0;
+    if (r > 0) deg += count_matched_edge_pair(b, r - 1, c, r, c);
+    if (r + 1 < N_actual) deg += count_matched_edge_pair(b, r, c, r + 1, c);
+    if (c > 0) deg += count_matched_edge_pair(b, r, c - 1, r, c);
+    if (c + 1 < N_actual) deg += count_matched_edge_pair(b, r, c, r, c + 1);
+    return deg;
+}
+/* Function: compute_total_matched_edges
+   Doc: Counts all matched undirected edges on the board by scanning right and down neighbors.
+*/
+inline int compute_total_matched_edges(const Board& b) {
+    int cnt = 0;
+    for (int r = 0; r < N_actual; ++r) {
+        for (int c = 0; c + 1 < N_actual; ++c) {
+            cnt += count_matched_edge_pair(b, r, c, r, c + 1);
+        }
+    }
+    for (int r = 0; r + 1 < N_actual; ++r) {
+        for (int c = 0; c < N_actual; ++c) {
+            cnt += count_matched_edge_pair(b, r, c, r + 1, c);
+        }
+    }
+    return cnt;
+}
+struct BeamHistoryEntry {
+    int parent_history_idx;
+    char move_char_taken;
+};
+std::vector<BeamHistoryEntry> beam_history_storage;
+const size_t MAX_BEAM_HISTORY_STORAGE_SIZE_CONST = 3000000;
+struct BeamState {
+    Board board;
+    double beam_score_val;
+    int k_beam_moves;
+    int history_idx;
+    int prev_move_direction_idx;
+    int approx_edges; // heuristic: number of matched undirected edges
+    bool operator<(const BeamState& other) const {
+        if (beam_score_val != other.beam_score_val) return beam_score_val > other.beam_score_val;
+        return approx_edges > other.approx_edges;
+    }
+};
+struct CandidateLight {
+    // Doc: Lightweight candidate used to pre-filter by approximate edge count before expensive scoring.
+    Board board;
+    int approx_edges;
+    int k_beam_moves;
+    int history_idx;
+    int prev_move_direction_idx;
+    bool operator<(const CandidateLight& other) const {
+        return approx_edges > other.approx_edges; // sort descending by approx_edges
+    }
+};
+std::chrono::steady_clock::time_point T_START_CHRONO_MAIN;
+const int TIME_LIMIT_MS_SLACK_CONST = 400; // Universal slack
+long long TIME_LIMIT_MS_EFFECTIVE_MAIN;
+std::mt19937 rng_stochastic_selection_main;
+std::unordered_map<uint64_t, int> min_K_to_reach_by_hash_main;
+const size_t MAX_MIN_K_CACHE_SIZE_CONST = 2000000;
+struct AStarEmptyState {
+    int r, c;
+    int g_cost;
+    std::string path;
+    bool operator>(const AStarEmptyState& other) const {
+        int h_cost_this = std::abs(r - TARGET_EMPTY_R_GLOBAL_FOR_A_STAR) + std::abs(c - TARGET_EMPTY_C_GLOBAL_FOR_A_STAR);
+        int h_cost_other = std::abs(other.r - TARGET_EMPTY_R_GLOBAL_FOR_A_STAR) + std::abs(other.c - TARGET_EMPTY_C_GLOBAL_FOR_A_STAR);
+        if (g_cost + h_cost_this != other.g_cost + h_cost_other) {
+            return g_cost + h_cost_this > other.g_cost + h_cost_other;
+        }
+        return g_cost > other.g_cost;
+    }
+};
+std::string find_path_for_empty(const Board& initial_board_state_for_A_star, int target_r, int target_c) {
+    TARGET_EMPTY_R_GLOBAL_FOR_A_STAR = target_r;
+    TARGET_EMPTY_C_GLOBAL_FOR_A_STAR = target_c;
+    std::priority_queue<AStarEmptyState, std::vector<AStarEmptyState>, std::greater<AStarEmptyState>> pq;
+    std::vector<std::vector<int>> min_g_cost_grid(N_actual, std::vector<int>(N_actual, T_param + 1));
+    pq.push({initial_board_state_for_A_star.empty_r, initial_board_state_for_A_star.empty_c, 0, ""});
+    min_g_cost_grid[initial_board_state_for_A_star.empty_r][initial_board_state_for_A_star.empty_c] = 0;
+    int A_star_max_depth = N_actual * N_actual * 2; // Allow more depth just in case
+    while(!pq.empty()){
+        AStarEmptyState current = pq.top();
+        pq.pop();
+        if (current.g_cost > min_g_cost_grid[current.r][current.c]) {
+             continue;
+        }
+        if (current.r == target_r && current.c == target_c) {
+            return current.path;
+        }
+        if (current.g_cost >= A_star_max_depth) continue;
+        for (int move_idx = 0; move_idx < 4; ++move_idx) {
+            int tile_that_moves_r = current.r + DR_TILE_RELATIVE_TO_EMPTY[move_idx];
+            int tile_that_moves_c = current.c + DC_TILE_RELATIVE_TO_EMPTY[move_idx];
+            if (tile_that_moves_r < 0 || tile_that_moves_r >= N_actual || tile_that_moves_c < 0 || tile_that_moves_c >= N_actual) {
+                continue;
+            }
+            int next_empty_r = tile_that_moves_r;
+            int next_empty_c = tile_that_moves_c;
+            int next_g_cost = current.g_cost + 1;
+            if (min_g_cost_grid[next_empty_r][next_empty_c] <= next_g_cost) {
+                continue;
+            }
+            min_g_cost_grid[next_empty_r][next_empty_c] = next_g_cost;
+            pq.push({next_empty_r, next_empty_c, next_g_cost, current.path + MOVE_CHARS[move_idx]});
+        }
+    }
+    return "";
+}
+std::string reconstruct_beam_path(int final_history_idx) {
+    std::string path_str = "";
+    int current_trace_hist_idx = final_history_idx;
+    while(current_trace_hist_idx > 0 &&
+          static_cast<size_t>(current_trace_hist_idx) < beam_history_storage.size() &&
+          beam_history_storage[current_trace_hist_idx].parent_history_idx != -1) {
+        path_str += beam_history_storage[current_trace_hist_idx].move_char_taken;
+        current_trace_hist_idx = beam_history_storage[current_trace_hist_idx].parent_history_idx;
+    }
+    std::reverse(path_str.begin(), path_str.end());
+    return path_str;
+}
+int main(int /*argc*/, char** /*argv*/) {
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    unsigned int random_seed_stochastic = std::chrono::steady_clock::now().time_since_epoch().count();
+    rng_stochastic_selection_main.seed(random_seed_stochastic);
+    T_START_CHRONO_MAIN = std::chrono::steady_clock::now();
+    std::cin >> N_actual >> T_param;
+    init_char_to_val();
+    init_zobrist_keys();
+    Board current_board_obj;
+    for (int i = 0; i < N_actual; ++i) {
+        std::string row_str;
+        std::cin >> row_str;
+        for (int j = 0; j < N_actual; ++j) {
+            current_board_obj.tiles[i][j] = row_str[j];
+            if (current_board_obj.tiles[i][j] == '0') {
+                current_board_obj.empty_r = i;
+                current_board_obj.empty_c = j;
+            }
+        }
+    }
+    current_board_obj.calculate_initial_hash();
+    std::string initial_empty_moves_path = "";
+    // Try routing empty to each corner and pick the one that maximizes our beam score after routing.
+    {
+        const int cr[4] = {0, 0, N_actual - 1, N_actual - 1};
+        const int cc[4] = {0, N_actual - 1, 0, N_actual - 1};
+        double best_score = -1e300;
+        std::string best_path;
+        for (int i = 0; i < 4; ++i) {
+            std::string path = find_path_for_empty(current_board_obj, cr[i], cc[i]);
+            Board tmp = current_board_obj;
+            for (char ch : path) tmp.apply_move_char(ch);
+            ScoreComponents sc = calculate_scores(tmp);
+            A_STAR_PHASE_WAS_RUN = true; // relax empty-distance penalty after guided routing
+            double scv = calculate_beam_score(sc, (int)path.length(), tmp);
+            if (scv > best_score) { best_score = scv; best_path = path; }
+        }
+        initial_empty_moves_path = best_path;
+    }
+    for (char move_char : initial_empty_moves_path) {
+        current_board_obj.apply_move_char(move_char);
+    }
+    int K_initial_empty_moves = (int)initial_empty_moves_path.length();
+    // Adaptive time limit after A*
+    auto time_after_astar = std::chrono::steady_clock::now();
+    long long elapsed_astar_ms = std::chrono::duration_cast<std::chrono::milliseconds>(time_after_astar - T_START_CHRONO_MAIN).count();
+    TIME_LIMIT_MS_EFFECTIVE_MAIN = 2950 - elapsed_astar_ms - TIME_LIMIT_MS_SLACK_CONST;
+    // Reserve caches (still used by evaluation in MCTS)
+    beam_history_storage.reserve(MAX_BEAM_HISTORY_STORAGE_SIZE_CONST);
+    s_value_cache_by_hash.reserve(MAX_SCORE_CACHE_SIZE_CONST);
+    min_K_to_reach_by_hash_main.reserve(MAX_MIN_K_CACHE_SIZE_CONST);
+    // Initialize best known based on current board (after optional A* to corner)
+    ScoreComponents init_score_comp = calculate_scores(current_board_obj);
+    double overall_best_actual_score = calculate_actual_score(init_score_comp.max_tree_size, K_initial_empty_moves);
+    std::string overall_best_path_str = initial_empty_moves_path;
+    // -------------------------
+    // BEAM SEARCH (restored, time-bounded)
+    // -------------------------
+    // Doc: Deterministic beam search with:
+    //   - Zobrist-based visited table storing minimal K to reach a hash
+    //   - Strong primary score on largest tree size, penalties on #components and move count
+    //   - Tiebreaker using local matched-edge heuristic around the moved tile and the previous empty
+    //   - Elite retention + stochastic sampling for diversity
+    //   - Stops on time/memory budget or when T is exhausted
+    std::vector<BeamState> current_beam;
+    ScoreComponents initial_scores_for_beam = calculate_scores(current_board_obj);
+    double initial_beam_eval_score = calculate_beam_score(initial_scores_for_beam, K_initial_empty_moves, current_board_obj);
+    beam_history_storage.push_back({-1, ' '}); // history idx 0 is sentinel
+    current_beam.push_back({current_board_obj, initial_beam_eval_score, 0, 0, -1, compute_total_matched_edges(current_board_obj)});
+    min_K_to_reach_by_hash_main[current_board_obj.zobrist_hash_value] = K_initial_empty_moves;
+    int beam_width;
+    float elite_ratio = 0.2f;
+    int stochastic_sample_pool_factor = 3;
+    if (N_actual <= 6) { beam_width = 1200;}
+    else if (N_actual == 7) { beam_width = 1000;}
+    else if (N_actual == 8) { beam_width = 700;}
+    else if (N_actual == 9) { beam_width = 400;}
+    else { beam_width = 250;}
+    std::vector<BeamState> candidates_pool;
+    candidates_pool.reserve(beam_width * 4 + 16);
+    std::vector<BeamState> next_beam_states_temp;
+    next_beam_states_temp.reserve(beam_width + 16);
+    std::vector<int> stochastic_selection_indices;
+    stochastic_selection_indices.reserve(stochastic_sample_pool_factor * beam_width + 16);
+    int k_iter_count_beam = 0;
+    for (int k_beam_iter = 0; K_initial_empty_moves + k_beam_iter < T_param; ++k_beam_iter) {
+        k_iter_count_beam++;
+        if (k_iter_count_beam % 10 == 0) {
+            long long now_ms = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - T_START_CHRONO_MAIN).count();
+            if (now_ms > 2950 - TIME_LIMIT_MS_SLACK_CONST) break;
+        }
+        if (beam_history_storage.size() >= MAX_BEAM_HISTORY_STORAGE_SIZE_CONST - ((size_t)beam_width * 4 + 128)) {
+            break;
+        }
+        candidates_pool.clear();
+        bool found_full_this_iter = false;
+        for (const auto& current_state_in_beam : current_beam) {
+            Board temp_board_for_moves = current_state_in_beam.board;
+            int parent_k_beam = current_state_in_beam.k_beam_moves;
+            int parent_history_idx = current_state_in_beam.history_idx;
+            int prev_m_dir_idx = current_state_in_beam.prev_move_direction_idx;
+            for (int move_dir_idx = 0; move_dir_idx < 4; ++move_dir_idx) {
+                if (prev_m_dir_idx != -1 && ((prev_m_dir_idx ^ 1) == move_dir_idx)) continue;
+                char current_move_char = MOVE_CHARS[move_dir_idx];
+                int original_empty_r = temp_board_for_moves.empty_r;
+                int original_empty_c = temp_board_for_moves.empty_c;
+                uint64_t original_hash = temp_board_for_moves.zobrist_hash_value;
+                int tile_to_move_r = original_empty_r + DR_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
+                int tile_to_move_c = original_empty_c + DC_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
+                if (tile_to_move_r < 0 || tile_to_move_r >= N_actual || tile_to_move_c < 0 || tile_to_move_c >= N_actual) {
+                    continue;
+                }
+                // Inline move for speed (swap chars and update hash/coords)
+                char moved_tile_hex_val = temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c];
+                temp_board_for_moves.tiles[original_empty_r][original_empty_c] = moved_tile_hex_val;
+                temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = '0';
+                temp_board_for_moves.empty_r = tile_to_move_r;
+                temp_board_for_moves.empty_c = tile_to_move_c;
+                temp_board_for_moves.update_hash_after_move(tile_to_move_r, tile_to_move_c, original_empty_r, original_empty_c);
+                int next_k_beam = parent_k_beam + 1;
+                int next_K_total = K_initial_empty_moves + next_k_beam;
+                bool already_reached_better = false;
+                auto it_map = min_K_to_reach_by_hash_main.find(temp_board_for_moves.zobrist_hash_value);
+                if (it_map != min_K_to_reach_by_hash_main.end()) {
+                    if (it_map->second <= next_K_total) {
+                        already_reached_better = true;
+                    } else {
+                        it_map->second = next_K_total;
+                    }
+                } else {
+                    if (min_K_to_reach_by_hash_main.size() < MAX_MIN_K_CACHE_SIZE_CONST) {
+                        min_K_to_reach_by_hash_main[temp_board_for_moves.zobrist_hash_value] = next_K_total;
+                    }
+                }
+                if (already_reached_better) {
+                    // revert
+                    temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = moved_tile_hex_val;
+                    temp_board_for_moves.tiles[original_empty_r][original_empty_c] = '0';
+                    temp_board_for_moves.empty_r = original_empty_r;
+                    temp_board_for_moves.empty_c = original_empty_c;
+                    temp_board_for_moves.zobrist_hash_value = original_hash;
+                    continue;
+                }
+                ScoreComponents next_scores = calculate_scores(temp_board_for_moves);
+                if (next_scores.max_tree_size == N_actual * N_actual - 1) found_full_this_iter = true;
+                double next_beam_eval_score = calculate_beam_score(next_scores, next_K_total, temp_board_for_moves);
+                beam_history_storage.push_back({parent_history_idx, current_move_char});
+                int new_history_idx = (int)beam_history_storage.size() - 1;
+                int approx_local = count_cell_matched_degree(temp_board_for_moves, original_empty_r, original_empty_c)
+                                 + count_cell_matched_degree(temp_board_for_moves, tile_to_move_r, tile_to_move_c);
+                candidates_pool.push_back({temp_board_for_moves, next_beam_eval_score, next_k_beam, new_history_idx, move_dir_idx, approx_local});
+                double current_actual_score_val = calculate_actual_score(next_scores.max_tree_size, next_K_total);
+                if (current_actual_score_val > overall_best_actual_score) {
+                    overall_best_actual_score = current_actual_score_val;
+                    overall_best_path_str = initial_empty_moves_path + reconstruct_beam_path(new_history_idx);
+                } else if (current_actual_score_val == overall_best_actual_score) {
+                    std::string cand = initial_empty_moves_path + reconstruct_beam_path(new_history_idx);
+                    if (cand.length() < overall_best_path_str.length()) overall_best_path_str = cand;
+                }
+                // revert
+                temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = moved_tile_hex_val;
+                temp_board_for_moves.tiles[original_empty_r][original_empty_c] = '0';
+                temp_board_for_moves.empty_r = original_empty_r;
+                temp_board_for_moves.empty_c = original_empty_c;
+                temp_board_for_moves.zobrist_hash_value = original_hash;
+            }
+        }
+        if (candidates_pool.empty()) break;
+        if (found_full_this_iter) { break; } // Early exit: earliest full tree yields minimal K in beam
+        std::sort(candidates_pool.begin(), candidates_pool.end());
+        next_beam_states_temp.clear();
+        int num_elites = std::min((int)candidates_pool.size(), (int)(beam_width * elite_ratio));
+        num_elites = std::max(0, num_elites);
+        for (int i = 0; i < num_elites && i < (int)candidates_pool.size(); ++i) {
+            next_beam_states_temp.push_back(candidates_pool[i]);
+        }
+        if ((int)next_beam_states_temp.size() < beam_width && (int)candidates_pool.size() > num_elites) {
+            stochastic_selection_indices.clear();
+            int pool_start_idx = num_elites;
+            int pool_end_idx = std::min((int)candidates_pool.size(), num_elites + stochastic_sample_pool_factor * beam_width);
+            for (int i = pool_start_idx; i < pool_end_idx; ++i) stochastic_selection_indices.push_back(i);
+            if (!stochastic_selection_indices.empty()) {
+                std::shuffle(stochastic_selection_indices.begin(), stochastic_selection_indices.end(), rng_stochastic_selection_main);
+            }
+            for (size_t i = 0; i < stochastic_selection_indices.size() && (int)next_beam_states_temp.size() < beam_width; ++i) {
+                next_beam_states_temp.push_back(candidates_pool[stochastic_selection_indices[i]]);
+            }
+        }
+        current_beam = next_beam_states_temp;
+        if (current_beam.empty()) break;
+    }
+    // Local refinement: quick greedy hill-climb on the best found solution within remaining time
+    auto t_ref_end = T_START_CHRONO_MAIN + std::chrono::milliseconds(2950 - 20);
+    Board refine_b = current_board_obj;
+    for (char ch : overall_best_path_str) refine_b.apply_move_char(ch);
+    int K_now = (int)overall_best_path_str.size();
+    ScoreComponents sc_best = calculate_scores(refine_b);
+    int edges_best = compute_total_matched_edges(refine_b);
+    int last_dir_ref = -1;
+    if (!overall_best_path_str.empty()) {
+        char lastch = overall_best_path_str.back();
+        for (int i = 0; i < 4; ++i) if (MOVE_CHARS[i] == lastch) last_dir_ref = i;
+    }
+    while (sc_best.max_tree_size < N_actual * N_actual - 1 && K_now < T_param && std::chrono::steady_clock::now() < t_ref_end) {
+        int best_mv = -1; int best_S = sc_best.max_tree_size; int best_edges = edges_best;
+        // Try all non-backtracking moves and keep the best (lexicographically by S, then edges)
+        for (int mv = 0; mv < 4; ++mv) {
+            if (last_dir_ref != -1 && (last_dir_ref ^ 1) == mv) continue;
+            Board b2 = refine_b;
+            if (!b2.apply_move_char(MOVE_CHARS[mv])) continue;
+            ScoreComponents sc2 = calculate_scores(b2);
+            int e2 = compute_total_matched_edges(b2);
+            if (sc2.max_tree_size > best_S || (sc2.max_tree_size == best_S && e2 > best_edges)) {
+                best_mv = mv; best_S = sc2.max_tree_size; best_edges = e2;
+            }
+        }
+        if (best_mv == -1) break;
+        refine_b.apply_move_char(MOVE_CHARS[best_mv]);
+        overall_best_path_str.push_back(MOVE_CHARS[best_mv]);
+        sc_best.max_tree_size = best_S;
+        edges_best = best_edges;
+        last_dir_ref = best_mv;
+        ++K_now;
+    }
+    std::cout << overall_best_path_str << std::endl;
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc011/config.yaml ADDED Viewed

	@@ -0,0 +1,208 @@

+# ALE-Bench ahc011 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n\nStory\n--------\n\
+    Takahashi loves puzzles and is playing with the following famous sliding puzzle.\n> There are $N^2-1$ tiles on an $N \\\
+    times N$ board.\n> There is a single empty square, and you can slide an adjacent tile in any of the four directions into\
+    \ the empty square.\n> Some picture is divided into each tile. By repeatedly sliding the tiles, please align the picture.\n\
+    \nThe trouble is, Takahashi had thrown away the instruction manual, so he lost the target picture.\nAccording to his memory,\
+    \ the target picture was a <a href=\"https://en.wikipedia.org/wiki/Tree_(graph_theory)\">tree</a>.\nBy repeating the sliding\
+    \ operation, please complete a tree.\n\n![example](./images/example.gif)\n\nProblem Statement\n--------\nThere are $N^2-1$\
+    \ tiles on an $N \\times N$ board.\nLet $(i, j)$ denote the coordinates of row $i$ $(0\\leq i \\leq N-1)$ from the top\
+    \ and column $j$ $(0\\leq j\\leq N-1)$ from the left.\nEach tile contains a figure with lines from its center towards\
+    \ one or more of four directions: up, down, left, and right.\nWe represent each tile using a bitmask with 1 for left,\
+    \ 2 for up, 4 for right, and 8 for down, as follows.\n\n<table>\n<tr align=\"center\">\n<td>\nTile\n</td>\n<td>\n<svg\
+    \ height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect fill=\"white\"\
+    \ height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"\
+    0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"\
+    lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\"\
+    \ x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<rect fill=\"lightgray\" height=\"40\" width=\"40\" x=\"0\" y=\"0\"/>\n</svg>\n\
+    </td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n\
+    <rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"\
+    0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"\
+    />\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\"\
+    \ stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\"\
+    \ stroke-width=\"10\" x1=\"20\" x2=\"0\" y1=\"20\" y2=\"20\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n\
+    </g>\n</svg>\n</td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\"\
+    >\n<rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\"\
+    \ x1=\"0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"\
+    40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\"\
+    \ stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\"\
+    \ stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"0\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n\
+    </g>\n</svg>\n</td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\"\
+    >\n<rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\"\
+    \ x1=\"0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"\
+    40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\"\
+    \ stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\"\
+    \ stroke-width=\"10\" x1=\"20\" x2=\"0\" y1=\"20\" y2=\"20\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\"\
+    \ x2=\"20\" y1=\"20\" y2=\"0\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n<td>\n\
+    <svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect fill=\"\
+    white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\"\
+    \ y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"\
+    lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\"\
+    \ x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\" stroke-width=\"\
+    10\" x1=\"20\" x2=\"40\" y1=\"20\" y2=\"20\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n\
+    </td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n\
+    <rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"\
+    0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"\
+    />\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\"\
+    \ stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\"\
+    \ stroke-width=\"10\" x1=\"20\" x2=\"0\" y1=\"20\" y2=\"20\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\"\
+    \ x2=\"40\" y1=\"20\" y2=\"20\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n<td>\n\
+    <svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect fill=\"\
+    white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\"\
+    \ y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"\
+    lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\"\
+    \ x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\" stroke-width=\"\
+    10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"0\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"40\" y1=\"\
+    20\" y2=\"20\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n<td>\n<svg height=\"50\"\
+    \ id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect fill=\"white\" height=\"\
+    50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"0\" y2=\"\
+    0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"lightgray\"\
+    \ stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"40\"\
+    \ x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"\
+    20\" x2=\"0\" y1=\"20\" y2=\"20\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"\
+    0\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"40\" y1=\"20\" y2=\"20\"/>\n<circle cx=\"20\" cy=\"\
+    20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"\
+    50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line\
+    \ stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"\
+    1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\"\
+    \ y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"\
+    translate(0,0)\">\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"40\"/>\n<circle cx=\"\
+    20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50\
+    \ 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"\
+    -5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\"\
+    \ stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"\
+    40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g\
+    \ transform=\"translate(0,0)\">\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"0\" y1=\"20\" y2=\"20\"/>\n\
+    <line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"40\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"\
+    #905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"\
+    http://www.w3.org/2000/svg\">\n<rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\"\
+    \ stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"\
+    0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line\
+    \ stroke=\"lightgray\" stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n\
+    <line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"0\"/>\n<line stroke=\"#905020\" stroke-width=\"\
+    10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"40\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n\
+    </td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n\
+    <rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"\
+    0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"\
+    />\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\"\
+    \ stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\"\
+    \ stroke-width=\"10\" x1=\"20\" x2=\"0\" y1=\"20\" y2=\"20\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\"\
+    \ x2=\"20\" y1=\"20\" y2=\"0\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"40\"\
+    />\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"\
+    -5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect fill=\"white\" height=\"50\" width=\"50\" x=\"\
+    -5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"\
+    lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"\
+    0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"\
+    40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"40\" y1=\"20\" y2=\"\
+    20\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"40\"/>\n<circle cx=\"20\" cy=\"\
+    20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"\
+    50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line\
+    \ stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"\
+    1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\"\
+    \ y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"\
+    translate(0,0)\">\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"0\" y1=\"20\" y2=\"20\"/>\n<line stroke=\"\
+    #905020\" stroke-width=\"10\" x1=\"20\" x2=\"40\" y1=\"20\" y2=\"20\"/>\n<line stroke=\"#905020\" stroke-width=\"10\"\
+    \ x1=\"20\" x2=\"20\" y1=\"20\" y2=\"40\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n\
+    <td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect\
+    \ fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"-5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\"\
+    \ x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n\
+    <line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"\
+    1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g transform=\"translate(0,0)\">\n<line stroke=\"#905020\" stroke-width=\"\
+    10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"0\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"40\" y1=\"\
+    20\" y2=\"20\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"40\"/>\n<circle cx=\"\
+    20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n<td>\n<svg height=\"50\" id=\"vis\" viewBox=\"-5 -5 50\
+    \ 50\" width=\"50\" xmlns=\"http://www.w3.org/2000/svg\">\n<rect fill=\"white\" height=\"50\" width=\"50\" x=\"-5\" y=\"\
+    -5\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"40\" y1=\"0\" y2=\"0\"/>\n<line stroke=\"lightgray\"\
+    \ stroke-width=\"1\" x1=\"0\" x2=\"0\" y1=\"0\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"0\" x2=\"\
+    40\" y1=\"40\" y2=\"40\"/>\n<line stroke=\"lightgray\" stroke-width=\"1\" x1=\"40\" x2=\"40\" y1=\"0\" y2=\"40\"/>\n<g\
+    \ transform=\"translate(0,0)\">\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"0\" y1=\"20\" y2=\"20\"/>\n\
+    <line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"20\" y2=\"0\"/>\n<line stroke=\"#905020\" stroke-width=\"\
+    10\" x1=\"20\" x2=\"40\" y1=\"20\" y2=\"20\"/>\n<line stroke=\"#905020\" stroke-width=\"10\" x1=\"20\" x2=\"20\" y1=\"\
+    20\" y2=\"40\"/>\n<circle cx=\"20\" cy=\"20\" fill=\"#905020\" r=\"5\"/>\n</g>\n</svg>\n</td>\n</tr>\n<tr align=\"center\"\
+    >\n<td>\nBinary\n</td>\n<td>\n0000\n</td>\n<td>\n0001\n</td>\n<td>\n0010\n</td>\n<td>\n0011\n</td>\n<td>\n0100\n</td>\n\
+    <td>\n0101\n</td>\n<td>\n0110\n</td>\n<td>\n0111\n</td>\n<td>\n1000\n</td>\n<td>\n1001\n</td>\n<td>\n1010\n</td>\n<td>\n\
+    1011\n</td>\n<td>\n1100\n</td>\n<td>\n1101\n</td>\n<td>\n1110\n</td>\n<td>\n1111\n</td>\n</tr>\n<tr align=\"center\">\n\
+    <td>\nHex\n</td>\n<td>\n0\n</td>\n<td>\n1\n</td>\n<td>\n2\n</td>\n<td>\n3\n</td>\n<td>\n4\n</td>\n<td>\n5\n</td>\n<td>\n\
+    6\n</td>\n<td>\n7\n</td>\n<td>\n8\n</td>\n<td>\n9\n</td>\n<td>\na\n</td>\n<td>\nb\n</td>\n<td>\nc\n</td>\n<td>\nd\n</td>\n\
+    <td>\ne\n</td>\n<td>\nf\n</td>\n</tr>\n</table>\n\nThe number 0 represents an empty square, and there is exactly one empty\
+    \ square.\nWith a single operation, you can slide one of the tiles adjacent to the empty square in the four directions\
+    \ to the location of the empty square. After the move, the square from which the tile was moved becomes an empty square.\n\
+    You can repeat the sliding operation at most $T=2\\times N^3$ times.\n\nAfter finishing the operations, consider a graph\
+    \ with $N^2-1$ squares other than the empty square as vertices and the following edges.\n\n- For each $(i, j)$ $(0\\leq\
+    \ i\\leq N-2, 0\\leq j\\leq N-1)$, if $(i,j)$ is a tile with a downward line and $(i+1,j)$ is a tile with an upward line,\
+    \ then construct an edge between $(i,j)$ and $(i+1,j)$.\n- For each $(i, j)$ $(0\\leq i\\leq N-1, 0\\leq j\\leq N-2)$,\
+    \ if $(i,j)$ is a tile with a rightward line and $(i,j+1)$ is a tile with a leftward line, then construct an edge between\
+    \ $(i,j)$ and $(i,j+1)$.\n\nYour task is to find a short sequence of operations such that the size of the largest tree\
+    \ in this graph, i.e., the number of vertices of the largest connected component without cycles, is as large as possible.\n\
+    It is guaranteed that within $T$ operations you can construct a tree of size $N^2-1$ with the empty square in $(N-1,N-1)$.\n\
+    Note that the final position of the empty square is arbitrary and you do not have to move it to $(N-1,N-1)$.\n\nScoring\n\
+    --------\nLet $K$ be the number of operations and $S$ be the size of the largest tree painted on the board after applying\
+    \ the sequence of operations.\nThen, you will get the following score.\n\n- If $S<N^2-1$, $\\mathrm{round}\\left(500000\\\
+    times \\frac{S}{N^2-1}\\right)$\n- If $S=N^2-1$, $\\mathrm{round}\\left(500000\\times (2-\\frac{K}{T})\\right)$\n\nIf\
+    \ the number of operations exceeds $T$ or you perform an illegal operation to move a non-existent tile to the empty square,\
+    \ it will be judged as <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"不正解\">WA</span>\
+    \ .\n\n#### Number of test cases\n- Provisional test: 50\n- System test: 3000. We will publish <a href=\"https://img.atcoder.jp/ahc011/seeds.txt\"\
+    >seeds.txt</a> (sha256=041256f962c6ba1a60294ad7a575684d6e401163cba316cf978f2e66a4f7b0e3) after the contest is over.\n\
+    - Both provisional and system tests contain the same number of inputs for each $N=6,7,8,9,10$.\n\nThe score of a submission\
+    \ is the total scores for each test case.\nIn the provisional test, if your submission produces illegal output or exceeds\
+    \ the time limit for some test cases, the submission itself will be judged as <span class='label label-warning' data-toggle='tooltip'\
+    \ data-placement='top' title=\"Wrong Answer\">WA</span> or <span class='label label-warning' data-toggle='tooltip' data-placement='top'\
+    \ title=\"Time Limit Exceeded\">TLE</span> , and the score of the submission will be zero.\nIn the system test, if your\
+    \ submission produces illegal output or exceeds the time limit for some test cases, only the score for those test cases\
+    \ will be zero.\n\n#### About execution time\nExecution time may vary slightly from run to run.\nIn addition, since system\
+    \ tests simultaneously perform a large number of executions, it has been observed that execution time increases by several\
+    \ percent compared to provisional tests.\nFor these reasons, submissions that are very close to the time limit may result\
+    \ in <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\">TLE</span>\
+    \ in the s\nystem test.\nPlease measure the execution time in your program to terminate the process, or have enough margin\
+    \ in the execution time.\n\n\nInput\n--------\nInput is given from Standard Input in the following format:\n\n~~~\n$N$\
+    \ $T$\n$t_{0,0}$ $\\cdots$ $t_{0,N-1}$\n$\\vdots$\n$t_{N-1,0}$ $\\cdots$ $t_{N-1,N-1}$\n~~~\n\n$N$ is an integer representing\
+    \ the height and width of the board, satisfying $6\\leq N\\leq 10$.\nIn all test cases, $T=2\\times N^3$.\n$t_{i,0}$ $\\\
+    cdots$ $t_{i,N-1}$ is a string of length $N$.\nThe $j$-th character $t_{i,j}$ is `0`-`9` or `a`-`f` which is the hexadecimal\
+    \ representation of the figure contained in the tile $(i,j)$.\n\nOutput\n--------\n\nBy representing each operation of\
+    \ sliding the upward, downward, leftward, or rightward adjacent tile into the empty square by a single character `U`,\
+    \ `D`, `L` or `R`, respectively, output the sequence of $K$ operations as a string of length $K$ in one line to Standard\
+    \ Output.\n\n<a href=\"https://img.atcoder.jp/ahc011/df8bb452a2.html?lang=en&seed=0&output=RRRDLUULDDDDLUUUR\">Show example</a>\n\
+    \n\nInput Generation\n--------\n<details>\n\n#### Generation of $N$ and $T$\nWe generate $N$ as the remainder of the seed\
+    \ value divided by 5 + 6.\nHence, you can generate inputs with a specific $N$ value by adjusting the seed value.\nWe set\
+    \ $T=2\\times N^3$.\n\n#### Generation of $t_{i,j}$\nLet $[k]=\\\\{0,1,\\cdots,k-1\\\\}$.\nWe randomly generate a spanning\
+    \ tree $(V,F)$ with vertices $V=[N]\\times [N]\\setminus \\\\{(N-1,N-1)\\\\}$ as follows.\n\n1. First, we randomly shuffle\
+    \ edges $\\\\{\\\\{(i,j),(i+1,j)\\\\}\\mid (i,j)\\in [N-1]\\times [N]\\setminus \\\\{(N-2,N-1)\\\\}\\\\}\\cup\\\\{\\\\\
+    {(i,j),(i,j+1)\\\\}\\mid (i,j)\\in [N]\\times [N-1]\\setminus \\\\{(N-1,N-2)\\\\}\\\\}$ and obtain an ordered edge list\
+    \ $e_0, e_1, \\cdots$.\n2. Starting from $F=\\emptyset$, for each $e_k=\\\\{(i,j),(i',j')\\\\}$, we insert $e_k$ into\
+    \ $F$ if $(i,j)$ and $(i',j')$ are not connected in $(V,F)$.\n\nFrom the obtained spanning tree, we construct tiles on\
+    \ which a tree of size $N^2-1$ is drawn, as follows.\n\n1. For each $(i,j)$, if $\\\\{(i,j),(i+1,j)\\\\}\\in F$, then\
+    \ draw a downward line on tile $(i, j)$ and an upward line on tile $(i+1,j)$.\n2. For each $(i,j)$, if $\\\\{(i,j),(i,j+1)\\\
+    \\}\\in F$, then draw a rightward line on tile $(i, j)$ and a leftward line on tile $(i,j+1)$.\n\nFinally, starting from\
+    \ the constructed tile layout, randomly perform $T=2\\times N^3$ sliding operations, and let $t$ be the tile layout after\
+    \ the operations.\nHere, the $k (\\geq 2)$-th operation is chosen uniformly at random from at most three directions excluding\
+    \ the direction that reverts the $(k-1)$-th operation.\n\n</details>\n\nTools (Input generator and visualizer)\n--------\n\
+    - <a href=\"https://img.atcoder.jp/ahc011/df8bb452a2.html?lang=en\">Web version</a>: This is more powerful than the local\
+    \ version and can display animations.\n- <a href=\"https://img.atcoder.jp/ahc011/df8bb452a2.zip\">Local version</a>: You\
+    \ need a compilation environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n  - <a href=\"https://img.atcoder.jp/ahc011/df8bb452a2_windows.zip\"\
+    >Pre-compiled binary for Windows</a>: If you are not familiar with the Rust language environment, please use this instead.\n\
+    \n<font color=\"red\">\nYou are allowed to share output images (PNG) of the provided visualizer for seed=0 on twitter\
+    \ during the contest.\nNote that sharing in video format is prohibited.\n</font>\nYou have to use the specified hashtag\
+    \ and public account.\nYou can only share visualization results and scores for seed=0.\nDo not share GIFs, output itself,\
+    \ scores for other seeds or mention solutions or discussions.\n<font color=\"red\">\n(Added) The visualizer has a feature\
+    \ to change the value of N, but sharing visualization results for changed inputs is also prohibited.\n</font>\n\n<a href=\"\
+    https://twitter.com/search?q=%23AHC011%20%23visualizer&src=typed_query&f=live\">List of shared images</a>\n\n{sample example}\n\
+    \n\n    Problem constraints:\n    time_limit=3.0 memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc011/evaluator.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import traceback
+from pathlib import Path
+from ale_bench.result import CaseResult, JudgeResult, Result
+from ale_bench_eval.safe_ale_session import start_ale_bench_session
+import logging
+import sys
+logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
+def result_feedback(result: Result) -> CaseResult:
+    if result.overall_judge_result == JudgeResult.ACCEPTED:
+        return result.case_results[0]
+    else:
+        selected_case_idx = 0
+        for idx, case_result in enumerate(result.case_results):
+            if case_result.judge_result == result.overall_judge_result:
+                selected_case_idx = idx
+                break
+        return result.case_results[selected_case_idx]
+def evaluate(program_path):
+    problem_id = "ahc011"
+    logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
+    try:
+        session = None
+        logger.info("Starting ALE-Bench session")
+        session = start_ale_bench_session(
+            problem_id=problem_id,
+            lite_version=True,
+            num_workers=13,
+        )
+        logger.info("ALE-Bench session started")
+        if not session:
+            raise RuntimeError("Failed to start or restart the session.")
+        optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
+        code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
+        logger.info("Code extracted")
+        num_public_cases = 50
+        cases = session.case_gen(list(range(num_public_cases)))
+        public_result = session.case_eval(
+            cases, code, code_language="cpp20", skip_local_visualization=True
+        )
+        logger.info("Public evaluation completed")
+        extracted_case = result_feedback(public_result)
+        logger.info("Result feedback completed")
+        logger.info("ALE-Bench session closed")
+        combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
+        if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
+            combined_score = -sys.maxsize - 1
+        session.close()
+        return {
+            "judge_result": public_result.overall_judge_result.value,
+            "overall_score": public_result.overall_absolute_score,
+            "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
+            "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
+            "standard_error": extracted_case.error_str,
+            "message": extracted_case.message,
+            "combined_score": combined_score,
+        }
+    except Exception as e:
+        logger.error(f"Evaluation failed completely: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "overall_score": 0.0,
+            "error": str(e),
+        }

benchmarks/ale_bench/ale-bench-lite-problems/ahc011/initial_program.cpp ADDED Viewed

	@@ -0,0 +1,607 @@

+# EVOLVE-BLOCK-START
+#include <iostream>
+#include <vector>
+#include <string>
+#include <array>
+#include <algorithm>
+#include <unordered_map>
+#include <map> // For A* visited set
+#include <iomanip>
+#include <chrono>
+#include <functional> // For std::hash
+#include <cmath>      // For std::round
+#include <random>     // For std::mt19937
+#include <numeric>    // For std::iota
+#include <queue>      // For A* search (priority_queue)
+// Constants for tile connections
+const int LEFT_MASK = 1;
+const int UP_MASK = 2;
+const int RIGHT_MASK = 4;
+const int DOWN_MASK = 8;
+// Max N value, actual N read from input
+const int N_MAX_CONST = 10;
+int N_actual; // Actual N for the current test case
+int T_param;  // Actual T for the current test case
+const int DR_TILE_RELATIVE_TO_EMPTY[] = {-1, 1, 0, 0};
+const int DC_TILE_RELATIVE_TO_EMPTY[] = {0, 0, -1, 1};
+const char MOVE_CHARS[] = {'U', 'D', 'L', 'R'};
+std::mt19937 zobrist_rng_engine(123456789);
+std::uniform_int_distribution<uint64_t> distrib_uint64;
+uint64_t zobrist_tile_keys[N_MAX_CONST][N_MAX_CONST][16];
+void init_zobrist_keys() {
+    for (int i = 0; i < N_actual; ++i) {
+        for (int j = 0; j < N_actual; ++j) {
+            for (int k = 0; k < 16; ++k) {
+                zobrist_tile_keys[i][j][k] = distrib_uint64(zobrist_rng_engine);
+            }
+        }
+    }
+}
+int hex_char_to_int(char c) {
+    if (c >= '0' && c <= '9') return c - '0';
+    return c - 'a' + 10;
+}
+struct Board {
+    std::array<std::array<char, N_MAX_CONST>, N_MAX_CONST> tiles;
+    int empty_r, empty_c;
+    uint64_t zobrist_hash_value;
+    Board() : empty_r(0), empty_c(0), zobrist_hash_value(0) {}
+    void calculate_initial_hash() {
+        zobrist_hash_value = 0;
+        for (int i = 0; i < N_actual; ++i) {
+            for (int j = 0; j < N_actual; ++j) {
+                zobrist_hash_value ^= zobrist_tile_keys[i][j][hex_char_to_int(tiles[i][j])];
+            }
+        }
+    }
+    void update_hash_after_move(int pos_tile_becomes_empty_r, int pos_tile_becomes_empty_c,
+                                int pos_empty_gets_tile_r, int pos_empty_gets_tile_c) {
+        int moved_tile_val_int = hex_char_to_int(tiles[pos_empty_gets_tile_r][pos_empty_gets_tile_c]);
+        zobrist_hash_value ^= zobrist_tile_keys[pos_tile_becomes_empty_r][pos_tile_becomes_empty_c][moved_tile_val_int];
+        zobrist_hash_value ^= zobrist_tile_keys[pos_empty_gets_tile_r][pos_empty_gets_tile_c][0];
+        zobrist_hash_value ^= zobrist_tile_keys[pos_tile_becomes_empty_r][pos_tile_becomes_empty_c][0];
+        zobrist_hash_value ^= zobrist_tile_keys[pos_empty_gets_tile_r][pos_empty_gets_tile_c][moved_tile_val_int];
+    }
+    bool apply_move_char(char move_char) {
+        int move_dir_idx = -1;
+        for(int i=0; i<4; ++i) if(MOVE_CHARS[i] == move_char) move_dir_idx = i;
+        if(move_dir_idx == -1) return false;
+        int tile_to_move_r = empty_r + DR_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
+        int tile_to_move_c = empty_c + DC_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
+        if (tile_to_move_r < 0 || tile_to_move_r >= N_actual || tile_to_move_c < 0 || tile_to_move_c >= N_actual) {
+            return false;
+        }
+        char moved_tile_hex_val = tiles[tile_to_move_r][tile_to_move_c];
+        tiles[empty_r][empty_c] = moved_tile_hex_val;
+        tiles[tile_to_move_r][tile_to_move_c] = '0';
+        update_hash_after_move(tile_to_move_r, tile_to_move_c, empty_r, empty_c);
+        empty_r = tile_to_move_r;
+        empty_c = tile_to_move_c;
+        return true;
+    }
+};
+struct ScoreComponents {
+    int max_tree_size;
+    int num_components;
+};
+std::unordered_map<uint64_t, ScoreComponents> s_value_cache_by_hash;
+const size_t MAX_SCORE_CACHE_SIZE_CONST = 2000000;
+struct DSU {
+    std::vector<int> parent;
+    std::vector<int> nodes_in_set;
+    std::vector<int> edges_in_set;
+    int N_sq_total_cells;
+    DSU(int current_N) : N_sq_total_cells(current_N * current_N) {
+        parent.resize(N_sq_total_cells);
+        std::iota(parent.begin(), parent.end(), 0);
+        nodes_in_set.assign(N_sq_total_cells, 0);
+        edges_in_set.assign(N_sq_total_cells, 0);
+    }
+    int find(int i) {
+        if (parent[i] == i)
+            return i;
+        return parent[i] = find(parent[i]);
+    }
+    void unite(int i_idx, int j_idx) {
+        int root_i = find(i_idx);
+        int root_j = find(j_idx);
+        if (nodes_in_set[root_i] < nodes_in_set[root_j]) std::swap(root_i, root_j);
+        parent[root_j] = root_i;
+        nodes_in_set[root_i] += nodes_in_set[root_j];
+        edges_in_set[root_i] += edges_in_set[root_j];
+    }
+    void add_edge(int u_idx, int v_idx) {
+        int root_u = find(u_idx);
+        int root_v = find(v_idx);
+        if (root_u != root_v) {
+            unite(u_idx, v_idx);
+            edges_in_set[find(u_idx)]++;
+        } else {
+            edges_in_set[root_u]++;
+        }
+    }
+};
+ScoreComponents calculate_scores(const Board& board) {
+    auto it_cache = s_value_cache_by_hash.find(board.zobrist_hash_value);
+    if (it_cache != s_value_cache_by_hash.end()) {
+        return it_cache->second;
+    }
+    DSU dsu(N_actual);
+    for (int r = 0; r < N_actual; ++r) {
+        for (int c = 0; c < N_actual; ++c) {
+            int cell_idx = r * N_actual + c;
+            if (board.tiles[r][c] != '0') {
+                dsu.nodes_in_set[cell_idx] = 1;
+            } else {
+                dsu.nodes_in_set[cell_idx] = 0;
+            }
+        }
+    }
+    for (int r = 0; r < N_actual; ++r) {
+        for (int c = 0; c < N_actual - 1; ++c) {
+            int tile1_val = hex_char_to_int(board.tiles[r][c]);
+            int tile2_val = hex_char_to_int(board.tiles[r][c+1]);
+            if (tile1_val != 0 && tile2_val != 0) {
+                if ((tile1_val & RIGHT_MASK) && (tile2_val & LEFT_MASK)) {
+                    dsu.add_edge(r * N_actual + c, r * N_actual + (c + 1));
+                }
+            }
+        }
+    }
+    for (int r = 0; r < N_actual - 1; ++r) {
+        for (int c = 0; c < N_actual; ++c) {
+            int tile1_val = hex_char_to_int(board.tiles[r][c]);
+            int tile2_val = hex_char_to_int(board.tiles[r+1][c]);
+            if (tile1_val != 0 && tile2_val != 0) {
+                if ((tile1_val & DOWN_MASK) && (tile2_val & UP_MASK)) {
+                    dsu.add_edge(r * N_actual + c, (r + 1) * N_actual + c);
+                }
+            }
+        }
+    }
+    int max_tree_size = 0;
+    int total_num_components = 0;
+    for (int i = 0; i < dsu.N_sq_total_cells; ++i) {
+        if (dsu.parent[i] == i && dsu.nodes_in_set[i] > 0) {
+            total_num_components++;
+            if (dsu.edges_in_set[i] == dsu.nodes_in_set[i] - 1) {
+                if (dsu.nodes_in_set[i] > max_tree_size) {
+                    max_tree_size = dsu.nodes_in_set[i];
+                }
+            }
+        }
+    }
+    ScoreComponents result = {max_tree_size, total_num_components};
+    if (s_value_cache_by_hash.size() < MAX_SCORE_CACHE_SIZE_CONST) {
+         s_value_cache_by_hash[board.zobrist_hash_value] = result;
+    }
+    return result;
+}
+int TARGET_EMPTY_R_GLOBAL_FOR_A_STAR, TARGET_EMPTY_C_GLOBAL_FOR_A_STAR; // Used by A* heuristic
+bool A_STAR_PHASE_WAS_RUN = false; // Flag to adjust beam score empty penalty
+double calculate_beam_score(const ScoreComponents& scores, int K_total, const Board& current_board_state) {
+    int S = scores.max_tree_size;
+    const double FULL_TREE_BASE_SCORE = 1e18;
+    if (S == N_actual * N_actual - 1) {
+        return FULL_TREE_BASE_SCORE + (double)(T_param * 2 - K_total);
+    }
+    double W_S = 1e9;
+    double W_NC = W_S * 0.8; // Make W_NC very strong, almost as much as increasing S by 1.
+    double W_K = 1.0;
+    double W_empty_dist_penalty_main;
+    if (A_STAR_PHASE_WAS_RUN) { // A* moved empty to target initially
+        W_empty_dist_penalty_main = W_K * 0.5; // Very low penalty, allow free movement
+    } else { // Empty started at target, or A* failed (should not happen)
+        W_empty_dist_penalty_main = W_K * 10.0; // Moderate penalty
+    }
+    double score_val = (double)S * W_S;
+    if (scores.num_components > 1) {
+         score_val -= (double)(scores.num_components - 1) * W_NC;
+    } else if (scores.num_components == 0 && N_actual * N_actual - 1 > 0) {
+         score_val -= (double)(N_actual * N_actual -1) * W_NC;
+    }
+    // Bonus for being very close to a full tree and connected
+    if (S >= (N_actual * N_actual - 1) - 2 && scores.num_components == 1 && S < N_actual * N_actual - 1) {
+        score_val += W_S * 0.5; // Significant bonus to encourage the last step
+    }
+    score_val -= (double)K_total * W_K;
+    // Penalty for empty square relative to (N-1,N-1)
+    int dist_empty_to_corner = std::abs(current_board_state.empty_r - (N_actual - 1)) +
+                               std::abs(current_board_state.empty_c - (N_actual - 1));
+    score_val -= dist_empty_to_corner * W_empty_dist_penalty_main;
+    return score_val;
+}
+double calculate_actual_score(int S, int K_total) {
+    if (N_actual * N_actual - 1 == 0) return 0;
+    if (S == N_actual * N_actual - 1) {
+        if (K_total > T_param) return 0;
+        return std::round(500000.0 * (2.0 - (double)K_total / T_param));
+    } else {
+        return std::round(500000.0 * (double)S / (N_actual * N_actual - 1.0));
+    }
+}
+struct BeamHistoryEntry {
+    int parent_history_idx;
+    char move_char_taken;
+};
+std::vector<BeamHistoryEntry> beam_history_storage;
+const size_t MAX_BEAM_HISTORY_STORAGE_SIZE_CONST = 3000000;
+struct BeamState {
+    Board board;
+    double beam_score_val;
+    int k_beam_moves;
+    int history_idx;
+    int prev_move_direction_idx;
+    bool operator<(const BeamState& other) const {
+        return beam_score_val > other.beam_score_val;
+    }
+};
+std::chrono::steady_clock::time_point T_START_CHRONO_MAIN;
+const int TIME_LIMIT_MS_SLACK_CONST = 400; // Universal slack
+long long TIME_LIMIT_MS_EFFECTIVE_MAIN;
+std::mt19937 rng_stochastic_selection_main;
+std::unordered_map<uint64_t, int> min_K_to_reach_by_hash_main;
+const size_t MAX_MIN_K_CACHE_SIZE_CONST = 2000000;
+struct AStarEmptyState {
+    int r, c;
+    int g_cost;
+    std::string path;
+    bool operator>(const AStarEmptyState& other) const {
+        int h_cost_this = std::abs(r - TARGET_EMPTY_R_GLOBAL_FOR_A_STAR) + std::abs(c - TARGET_EMPTY_C_GLOBAL_FOR_A_STAR);
+        int h_cost_other = std::abs(other.r - TARGET_EMPTY_R_GLOBAL_FOR_A_STAR) + std::abs(other.c - TARGET_EMPTY_C_GLOBAL_FOR_A_STAR);
+        if (g_cost + h_cost_this != other.g_cost + h_cost_other) {
+            return g_cost + h_cost_this > other.g_cost + h_cost_other;
+        }
+        return g_cost > other.g_cost;
+    }
+};
+std::string find_path_for_empty(const Board& initial_board_state_for_A_star, int target_r, int target_c) {
+    TARGET_EMPTY_R_GLOBAL_FOR_A_STAR = target_r;
+    TARGET_EMPTY_C_GLOBAL_FOR_A_STAR = target_c;
+    std::priority_queue<AStarEmptyState, std::vector<AStarEmptyState>, std::greater<AStarEmptyState>> pq;
+    std::vector<std::vector<int>> min_g_cost_grid(N_actual, std::vector<int>(N_actual, T_param + 1));
+    pq.push({initial_board_state_for_A_star.empty_r, initial_board_state_for_A_star.empty_c, 0, ""});
+    min_g_cost_grid[initial_board_state_for_A_star.empty_r][initial_board_state_for_A_star.empty_c] = 0;
+    int A_star_max_depth = N_actual * N_actual * 2; // Allow more depth just in case
+    while(!pq.empty()){
+        AStarEmptyState current = pq.top();
+        pq.pop();
+        if (current.g_cost > min_g_cost_grid[current.r][current.c]) {
+             continue;
+        }
+        if (current.r == target_r && current.c == target_c) {
+            return current.path;
+        }
+        if (current.g_cost >= A_star_max_depth) continue;
+        for (int move_idx = 0; move_idx < 4; ++move_idx) {
+            int tile_that_moves_r = current.r + DR_TILE_RELATIVE_TO_EMPTY[move_idx];
+            int tile_that_moves_c = current.c + DC_TILE_RELATIVE_TO_EMPTY[move_idx];
+            if (tile_that_moves_r < 0 || tile_that_moves_r >= N_actual || tile_that_moves_c < 0 || tile_that_moves_c >= N_actual) {
+                continue;
+            }
+            int next_empty_r = tile_that_moves_r;
+            int next_empty_c = tile_that_moves_c;
+            int next_g_cost = current.g_cost + 1;
+            if (min_g_cost_grid[next_empty_r][next_empty_c] <= next_g_cost) {
+                continue;
+            }
+            min_g_cost_grid[next_empty_r][next_empty_c] = next_g_cost;
+            pq.push({next_empty_r, next_empty_c, next_g_cost, current.path + MOVE_CHARS[move_idx]});
+        }
+    }
+    return "";
+}
+std::string reconstruct_beam_path(int final_history_idx) {
+    std::string path_str = "";
+    int current_trace_hist_idx = final_history_idx;
+    while(current_trace_hist_idx > 0 &&
+          static_cast<size_t>(current_trace_hist_idx) < beam_history_storage.size() &&
+          beam_history_storage[current_trace_hist_idx].parent_history_idx != -1) {
+        path_str += beam_history_storage[current_trace_hist_idx].move_char_taken;
+        current_trace_hist_idx = beam_history_storage[current_trace_hist_idx].parent_history_idx;
+    }
+    std::reverse(path_str.begin(), path_str.end());
+    return path_str;
+}
+int main(int /*argc*/, char** /*argv*/) {
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    unsigned int random_seed_stochastic = std::chrono::steady_clock::now().time_since_epoch().count();
+    rng_stochastic_selection_main.seed(random_seed_stochastic);
+    T_START_CHRONO_MAIN = std::chrono::steady_clock::now();
+    std::cin >> N_actual >> T_param;
+    init_zobrist_keys();
+    Board current_board_obj;
+    for (int i = 0; i < N_actual; ++i) {
+        std::string row_str;
+        std::cin >> row_str;
+        for (int j = 0; j < N_actual; ++j) {
+            current_board_obj.tiles[i][j] = row_str[j];
+            if (current_board_obj.tiles[i][j] == '0') {
+                current_board_obj.empty_r = i;
+                current_board_obj.empty_c = j;
+            }
+        }
+    }
+    current_board_obj.calculate_initial_hash();
+    std::string initial_empty_moves_path = "";
+    int target_empty_final_r = N_actual - 1;
+    int target_empty_final_c = N_actual - 1;
+    if (current_board_obj.empty_r != target_empty_final_r || current_board_obj.empty_c != target_empty_final_c) {
+        initial_empty_moves_path = find_path_for_empty(current_board_obj, target_empty_final_r, target_empty_final_c);
+        A_STAR_PHASE_WAS_RUN = !initial_empty_moves_path.empty();
+    }
+    for (char move_char : initial_empty_moves_path) {
+        current_board_obj.apply_move_char(move_char);
+    }
+    int K_initial_empty_moves = initial_empty_moves_path.length();
+    // Adaptive time limit after A*
+    auto time_after_astar = std::chrono::steady_clock::now();
+    long long elapsed_astar_ms = std::chrono::duration_cast<std::chrono::milliseconds>(time_after_astar - T_START_CHRONO_MAIN).count();
+    TIME_LIMIT_MS_EFFECTIVE_MAIN = 2950 - elapsed_astar_ms - TIME_LIMIT_MS_SLACK_CONST;
+    beam_history_storage.reserve(MAX_BEAM_HISTORY_STORAGE_SIZE_CONST);
+    s_value_cache_by_hash.reserve(MAX_SCORE_CACHE_SIZE_CONST);
+    min_K_to_reach_by_hash_main.reserve(MAX_MIN_K_CACHE_SIZE_CONST);
+    std::vector<BeamState> current_beam;
+    ScoreComponents initial_scores_for_beam = calculate_scores(current_board_obj);
+    double initial_beam_eval_score = calculate_beam_score(initial_scores_for_beam, K_initial_empty_moves, current_board_obj);
+    beam_history_storage.push_back({-1, ' '});
+    current_beam.push_back({current_board_obj, initial_beam_eval_score, 0, 0, -1});
+    double overall_best_actual_score = calculate_actual_score(initial_scores_for_beam.max_tree_size, K_initial_empty_moves);
+    std::string overall_best_path_str = initial_empty_moves_path;
+    min_K_to_reach_by_hash_main[current_board_obj.zobrist_hash_value] = K_initial_empty_moves;
+    int beam_width;
+    float elite_ratio = 0.2f; // Standard elite ratio
+    int stochastic_sample_pool_factor = 3;
+    if (N_actual <= 6) { beam_width = 1200;} // N=6 is small, can afford wider
+    else if (N_actual == 7) { beam_width = 1000;}
+    else if (N_actual == 8) { beam_width = 700;} // Reduced from 800 to save time slightly
+    else if (N_actual == 9) { beam_width = 400;} // Reduced from 500
+    else { beam_width = 250;} // N=10, reduced from 300
+    std::vector<BeamState> candidates_pool;
+    candidates_pool.reserve(beam_width * 4 + 10);
+    std::vector<BeamState> next_beam_states_temp;
+    next_beam_states_temp.reserve(beam_width + 10);
+    std::vector<int> stochastic_selection_indices;
+    stochastic_selection_indices.reserve(stochastic_sample_pool_factor * beam_width + 10);
+    int k_iter_count_beam = 0;
+    for (int k_beam_iter = 0; K_initial_empty_moves + k_beam_iter < T_param; ++k_beam_iter) {
+        k_iter_count_beam++;
+        if (k_iter_count_beam % 10 == 0) {
+            if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - T_START_CHRONO_MAIN).count() > TIME_LIMIT_MS_EFFECTIVE_MAIN + elapsed_astar_ms) {
+                 // Compare against total time budget, not just remaining for beam.
+                 // Total time used > total budget minus slack
+                if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - T_START_CHRONO_MAIN).count() > 2950 - TIME_LIMIT_MS_SLACK_CONST) {
+                    break;
+                }
+            }
+        }
+        if (beam_history_storage.size() >= MAX_BEAM_HISTORY_STORAGE_SIZE_CONST - ( (size_t)beam_width * 4 + 100) ) {
+            break;
+        }
+        candidates_pool.clear();
+        for (const auto& current_state_in_beam : current_beam) {
+            Board temp_board_for_moves = current_state_in_beam.board;
+            int parent_k_beam = current_state_in_beam.k_beam_moves;
+            int parent_history_idx = current_state_in_beam.history_idx;
+            int prev_m_dir_idx = current_state_in_beam.prev_move_direction_idx;
+            for (int move_dir_idx = 0; move_dir_idx < 4; ++move_dir_idx) {
+                if (prev_m_dir_idx != -1) {
+                    if ((prev_m_dir_idx ^ 1) == move_dir_idx) { // Check for U/D or L/R reversal using XOR trick
+                        continue;
+                    }
+                }
+                char current_move_char = MOVE_CHARS[move_dir_idx];
+                int original_empty_r = temp_board_for_moves.empty_r;
+                int original_empty_c = temp_board_for_moves.empty_c;
+                uint64_t original_hash = temp_board_for_moves.zobrist_hash_value;
+                int tile_to_move_r = original_empty_r + DR_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
+                int tile_to_move_c = original_empty_c + DC_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
+                if (tile_to_move_r < 0 || tile_to_move_r >= N_actual || tile_to_move_c < 0 || tile_to_move_c >= N_actual) {
+                    continue;
+                }
+                char moved_tile_hex_val = temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c];
+                temp_board_for_moves.tiles[original_empty_r][original_empty_c] = moved_tile_hex_val;
+                temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = '0';
+                temp_board_for_moves.empty_r = tile_to_move_r;
+                temp_board_for_moves.empty_c = tile_to_move_c;
+                temp_board_for_moves.update_hash_after_move(tile_to_move_r, tile_to_move_c, original_empty_r, original_empty_c);
+                int next_k_beam = parent_k_beam + 1;
+                int next_K_total = K_initial_empty_moves + next_k_beam;
+                bool already_reached_better = false;
+                auto it_map = min_K_to_reach_by_hash_main.find(temp_board_for_moves.zobrist_hash_value);
+                if (it_map != min_K_to_reach_by_hash_main.end()) {
+                    if (it_map->second <= next_K_total) {
+                        already_reached_better = true;
+                    } else {
+                         it_map->second = next_K_total;
+                    }
+                } else {
+                    if (min_K_to_reach_by_hash_main.size() < MAX_MIN_K_CACHE_SIZE_CONST) {
+                        min_K_to_reach_by_hash_main[temp_board_for_moves.zobrist_hash_value] = next_K_total;
+                    }
+                }
+                if(already_reached_better) {
+                    temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = moved_tile_hex_val;
+                    temp_board_for_moves.tiles[original_empty_r][original_empty_c] = '0';
+                    temp_board_for_moves.empty_r = original_empty_r;
+                    temp_board_for_moves.empty_c = original_empty_c;
+                    temp_board_for_moves.zobrist_hash_value = original_hash;
+                    continue;
+                }
+                ScoreComponents next_scores = calculate_scores(temp_board_for_moves);
+                double next_beam_eval_score = calculate_beam_score(next_scores, next_K_total, temp_board_for_moves);
+                beam_history_storage.push_back({parent_history_idx, current_move_char});
+                int new_history_idx = beam_history_storage.size() - 1;
+                candidates_pool.push_back({temp_board_for_moves, next_beam_eval_score, next_k_beam, new_history_idx, move_dir_idx});
+                double current_actual_score_val = calculate_actual_score(next_scores.max_tree_size, next_K_total);
+                if (current_actual_score_val > overall_best_actual_score) {
+                    overall_best_actual_score = current_actual_score_val;
+                    overall_best_path_str = initial_empty_moves_path + reconstruct_beam_path(new_history_idx);
+                } else if (current_actual_score_val == overall_best_actual_score) {
+                    // Prefer shorter paths for same score
+                    if ((initial_empty_moves_path + reconstruct_beam_path(new_history_idx)).length() < overall_best_path_str.length()){
+                         overall_best_path_str = initial_empty_moves_path + reconstruct_beam_path(new_history_idx);
+                    }
+                }
+                temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = moved_tile_hex_val;
+                temp_board_for_moves.tiles[original_empty_r][original_empty_c] = '0';
+                temp_board_for_moves.empty_r = original_empty_r;
+                temp_board_for_moves.empty_c = original_empty_c;
+                temp_board_for_moves.zobrist_hash_value = original_hash;
+            }
+        }
+        if (candidates_pool.empty()) break;
+        std::sort(candidates_pool.begin(), candidates_pool.end());
+        next_beam_states_temp.clear();
+        int num_elites = std::min(static_cast<int>(candidates_pool.size()), static_cast<int>(beam_width * elite_ratio));
+        num_elites = std::max(0, num_elites);
+        for(int i=0; i < num_elites && i < static_cast<int>(candidates_pool.size()); ++i) {
+            next_beam_states_temp.push_back(candidates_pool[i]);
+        }
+        if (next_beam_states_temp.size() < static_cast<size_t>(beam_width) && candidates_pool.size() > static_cast<size_t>(num_elites)) {
+            stochastic_selection_indices.clear();
+            int pool_start_idx = num_elites;
+            int pool_end_idx = std::min(static_cast<int>(candidates_pool.size()), num_elites + stochastic_sample_pool_factor * beam_width);
+            for(int i = pool_start_idx; i < pool_end_idx; ++i) {
+                stochastic_selection_indices.push_back(i);
+            }
+            if (!stochastic_selection_indices.empty()){
+                std::shuffle(stochastic_selection_indices.begin(), stochastic_selection_indices.end(), rng_stochastic_selection_main);
+            }
+            for(size_t i=0; i < stochastic_selection_indices.size() && next_beam_states_temp.size() < static_cast<size_t>(beam_width); ++i) {
+                next_beam_states_temp.push_back(candidates_pool[stochastic_selection_indices[i]]);
+            }
+        }
+        current_beam = next_beam_states_temp;
+        if (current_beam.empty()) break;
+    }
+    std::cout << overall_best_path_str << std::endl;
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc015/best_program.cpp ADDED Viewed

	@@ -0,0 +1,664 @@

+# EVOLVE-BLOCK-START
+#include <iostream>
+#include <vector>
+#include <string>
+#include <array>
+#include <numeric>
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <chrono> // For seeding RNG
+#include <unordered_map>
+// #include <iomanip> // For debugging output
+// Constants
+const int GRID_SIZE = 10;
+const int NUM_TURNS = 100;
+const int NUM_FLAVORS = 3; // Flavors are 1, 2, 3
+// Directions: F, B, L, R (Up, Down, Left, Right on typical grid with (0,0) top-left)
+const int DR[] = {-1, 1, 0, 0};
+const int DC[] = {0, 0, -1, 1};
+const char DIR_CHARS[] = {'F', 'B', 'L', 'R'};
+const int NUM_DIRECTIONS = 4;
+// Global data initialized once
+std::array<int, NUM_TURNS> G_FLAVOR_SEQUENCE;
+std::array<int, NUM_FLAVORS + 1> G_flavor_total_counts;
+std::array<std::pair<int, int>, NUM_FLAVORS + 1> G_target_col_ranges;
+std::array<bool, NUM_FLAVORS + 1> G_flavor_active;
+int G_last_dir_idx = -1; // -1 indicates no previous tilt; used for axis continuity bias
+/* Docstring: Enable lightweight expectimax lookahead (up to depth 2).
+   We sample a few random placements of the next candy and, for each,
+   evaluate the best next tilt. At depth 2 we also sample one more step.
+   Depth 0 returns immediate board eval. */
+// Lookahead parameters
+const int MAX_LOOKAHEAD_DEPTH = 2;
+// Sample count per depth (depth 1 then depth 2). Still very fast on 10x10.
+static constexpr std::array<int, MAX_LOOKAHEAD_DEPTH> NUM_SAMPLES_CONFIG = {24, 12};
+struct XorshiftRNG {
+    uint64_t x;
+    XorshiftRNG() : x(std::chrono::steady_clock::now().time_since_epoch().count()) {}
+    uint64_t next() {
+        x ^= x << 13;
+        x ^= x >> 7;
+        x ^= x << 17;
+        return x;
+    }
+    int uniform_int(int min_val, int max_val) {
+        if (min_val > max_val) return min_val;
+        if (min_val == max_val) return min_val;
+        uint64_t range = static_cast<uint64_t>(max_val) - min_val + 1;
+        return min_val + static_cast<int>(next() % range);
+    }
+};
+XorshiftRNG rng;
+// Zobrist hashing and a small transposition table to memoize lookahead values.
+// This greatly reduces duplicate computations from convergent tilt sequences.
+std::array<std::array<std::array<uint64_t, NUM_FLAVORS + 1>, GRID_SIZE>, GRID_SIZE> G_zobrist;
+static inline uint64_t compute_board_hash(const std::array<std::array<int, GRID_SIZE>, GRID_SIZE>& board) {
+    uint64_t h = 0;
+    for (int r = 0; r < GRID_SIZE; ++r) {
+        for (int c = 0; c < GRID_SIZE; ++c) {
+            int v = board[r][c];
+            if (v) h ^= G_zobrist[r][c][v];
+        }
+    }
+    return h;
+}
+// Transposition table keyed by (board hash, turn, depth)
+static std::unordered_map<uint64_t, double> G_TT;
+static constexpr size_t G_TT_MAX = 800000;
+struct Candy {
+    int r, c, flavor;
+};
+struct GameState {
+    std::array<std::array<int, GRID_SIZE>, GRID_SIZE> board;
+    std::vector<Candy> candies_list;
+    int turn_num_1_indexed;
+    GameState() : turn_num_1_indexed(0) {
+        for (int i = 0; i < GRID_SIZE; ++i) {
+            board[i].fill(0);
+        }
+        candies_list.reserve(NUM_TURNS);
+    }
+    GameState(const GameState& other) = default;
+    GameState& operator=(const GameState& other) = default;
+    GameState(GameState&& other) noexcept = default;
+    GameState& operator=(GameState&& other) noexcept = default;
+    void place_candy(int r, int c, int flavor) {
+        board[r][c] = flavor;
+        candies_list.push_back({r, c, flavor});
+    }
+    std::pair<int, int> find_pth_empty_cell(int p_1_indexed) const {
+        int count = 0;
+        for (int r_idx = 0; r_idx < GRID_SIZE; ++r_idx) {
+            for (int c_idx = 0; c_idx < GRID_SIZE; ++c_idx) {
+                if (board[r_idx][c_idx] == 0) {
+                    count++;
+                    if (count == p_1_indexed) {
+                        return {r_idx, c_idx};
+                    }
+                }
+            }
+        }
+        return {-1, -1};
+    }
+    int count_empty_cells() const {
+        return GRID_SIZE * GRID_SIZE - static_cast<int>(candies_list.size());
+    }
+    /* Docstring: Apply a tilt by compacting each row/column toward the target edge.
+       Returns whether any candy actually moved. We intentionally do NOT rebuild
+       candies_list here; later evaluations scan the board directly, and the count
+       of candies (for empty-cell computation) remains correct since tilt doesn't
+       change it. */
+    bool apply_tilt(int dir_idx) {
+        bool changed = false;
+        if (dir_idx == 0) { // F (Up)
+            for (int c = 0; c < GRID_SIZE; ++c) {
+                int current_write_r = 0;
+                for (int r = 0; r < GRID_SIZE; ++r) {
+                    int v = board[r][c];
+                    if (v != 0) {
+                        if (r != current_write_r) {
+                            board[current_write_r][c] = v;
+                            board[r][c] = 0;
+                            changed = true;
+                        }
+                        current_write_r++;
+                    }
+                }
+            }
+        } else if (dir_idx == 1) { // B (Down)
+            for (int c = 0; c < GRID_SIZE; ++c) {
+                int current_write_r = GRID_SIZE - 1;
+                for (int r = GRID_SIZE - 1; r >= 0; --r) {
+                    int v = board[r][c];
+                    if (v != 0) {
+                        if (r != current_write_r) {
+                            board[current_write_r][c] = v;
+                            board[r][c] = 0;
+                            changed = true;
+                        }
+                        current_write_r--;
+                    }
+                }
+            }
+        } else if (dir_idx == 2) { // L (Left)
+            for (int r = 0; r < GRID_SIZE; ++r) {
+                int current_write_c = 0;
+                for (int c = 0; c < GRID_SIZE; ++c) {
+                    int v = board[r][c];
+                    if (v != 0) {
+                        if (c != current_write_c) {
+                            board[r][current_write_c] = v;
+                            board[r][c] = 0;
+                            changed = true;
+                        }
+                        current_write_c++;
+                    }
+                }
+            }
+        } else { // R (Right, dir_idx == 3)
+            for (int r = 0; r < GRID_SIZE; ++r) {
+                int current_write_c = GRID_SIZE - 1;
+                for (int c = GRID_SIZE - 1; c >= 0; --c) {
+                    int v = board[r][c];
+                    if (v != 0) {
+                        if (c != current_write_c) {
+                            board[r][current_write_c] = v;
+                            board[r][c] = 0;
+                            changed = true;
+                        }
+                        current_write_c--;
+                    }
+                }
+            }
+        }
+        return changed;
+    }
+    void rebuild_candies_list_from_board() {
+        candies_list.clear();
+        for (int r_idx = 0; r_idx < GRID_SIZE; ++r_idx) {
+            for (int c_idx = 0; c_idx < GRID_SIZE; ++c_idx) {
+                if (board[r_idx][c_idx] != 0) {
+                    candies_list.push_back({r_idx, c_idx, board[r_idx][c_idx]});
+                }
+            }
+        }
+    }
+    long long calculate_sum_sq_comp_size() const {
+        long long total_sq_sum = 0;
+        std::array<std::array<bool, GRID_SIZE>, GRID_SIZE> visited;
+        for (int i = 0; i < GRID_SIZE; ++i) visited[i].fill(false);
+        std::array<std::pair<int, int>, GRID_SIZE * GRID_SIZE> q_arr;
+        for (int r_start = 0; r_start < GRID_SIZE; ++r_start) {
+            for (int c_start = 0; c_start < GRID_SIZE; ++c_start) {
+                if (board[r_start][c_start] != 0 && !visited[r_start][c_start]) {
+                    int current_flavor = board[r_start][c_start];
+                    long long current_comp_size = 0;
+                    q_arr[0] = {r_start, c_start};
+                    visited[r_start][c_start] = true;
+                    int head = 0;
+                    int tail = 1;
+                    while(head < tail){
+                        current_comp_size++;
+                        const std::pair<int,int>& curr_cell = q_arr[head];
+                        const int curr_r = curr_cell.first;
+                        const int curr_c = curr_cell.second;
+                        head++;
+                        for (int i = 0; i < NUM_DIRECTIONS; ++i) {
+                            int nr = curr_r + DR[i];
+                            int nc = curr_c + DC[i];
+                            if (nr >= 0 && nr < GRID_SIZE && nc >= 0 && nc < GRID_SIZE &&
+                                !visited[nr][nc] && board[nr][nc] == current_flavor) {
+                                visited[nr][nc] = true;
+                                q_arr[tail++] = {nr, nc};
+                            }
+                        }
+                    }
+                    total_sq_sum += current_comp_size * current_comp_size;
+                }
+            }
+        }
+        return total_sq_sum;
+    }
+    /* Docstring: Compute per-flavor dispersion as sum of Manhattan distances
+       from each candy to its flavor's center-of-mass. Scan the board directly
+       so this remains correct even when candies_list positions are stale. */
+    double calculate_distance_penalty_CoM() const {
+        std::array<double, NUM_FLAVORS + 1> sum_r; sum_r.fill(0.0);
+        std::array<double, NUM_FLAVORS + 1> sum_c; sum_c.fill(0.0);
+        std::array<int, NUM_FLAVORS + 1> counts; counts.fill(0);
+        for (int r = 0; r < GRID_SIZE; ++r) {
+            for (int c = 0; c < GRID_SIZE; ++c) {
+                int v = board[r][c];
+                if (v == 0) continue;
+                counts[v]++;
+                sum_r[v] += r;
+                sum_c[v] += c;
+            }
+        }
+        std::array<std::pair<double, double>, NUM_FLAVORS + 1> com_coords;
+        for (int fl = 1; fl <= NUM_FLAVORS; ++fl) {
+            if (counts[fl] > 0) {
+                com_coords[fl] = {sum_r[fl] / counts[fl], sum_c[fl] / counts[fl]};
+            }
+        }
+        double total_manhattan_dist_penalty = 0.0;
+        for (int r = 0; r < GRID_SIZE; ++r) {
+            for (int c = 0; c < GRID_SIZE; ++c) {
+                int v = board[r][c];
+                if (v == 0) continue;
+                if (counts[v] > 1) {
+                    const auto& com = com_coords[v];
+                    total_manhattan_dist_penalty += std::abs(static_cast<double>(r) - com.first)
+                                                  + std::abs(static_cast<double>(c) - com.second);
+                }
+            }
+        }
+        return total_manhattan_dist_penalty;
+    }
+    /* Docstring: Penalty for candies that lie outside their assigned
+       flavor column strip. Scan the board for robustness under simulated tilts. */
+    double calculate_region_penalty() const {
+        double penalty = 0.0;
+        for (int r = 0; r < GRID_SIZE; ++r) {
+            for (int c = 0; c < GRID_SIZE; ++c) {
+                int v = board[r][c];
+                if (v == 0) continue;
+                if (!G_flavor_active[v]) continue;
+                const auto& range = G_target_col_ranges[v];
+                int min_target_c = range.first;
+                int max_target_c = range.second;
+                if (min_target_c > max_target_c) continue;
+                if (c < min_target_c) penalty += (min_target_c - c);
+                else if (c > max_target_c) penalty += (c - max_target_c);
+            }
+        }
+        return penalty;
+    }
+    /* Docstring: Small bonus inside the correct strip for being on outer edges
+       and corners; encourages compact blobs aligned to boundaries. */
+    double calculate_edge_bonus() const {
+        double bonus_val = 0.0;
+        const double PER_CANDY_BONUS_FACTOR = 0.5;
+        for (int r = 0; r < GRID_SIZE; ++r) {
+            for (int c = 0; c < GRID_SIZE; ++c) {
+                int v = board[r][c];
+                if (v == 0) continue;
+                if (!G_flavor_active[v]) continue;
+                const auto& range = G_target_col_ranges[v];
+                int min_target_c = range.first;
+                int max_target_c = range.second;
+                if (min_target_c > max_target_c) continue;
+                bool in_correct_strip = (c >= min_target_c && c <= max_target_c);
+                if (!in_correct_strip) continue;
+                if (r == 0 || r == GRID_SIZE - 1) {
+                    bonus_val += PER_CANDY_BONUS_FACTOR;
+                }
+                if ((c == 0 && min_target_c == 0) ||
+                    (c == GRID_SIZE - 1 && max_target_c == GRID_SIZE - 1)) {
+                    bonus_val += PER_CANDY_BONUS_FACTOR;
+                }
+            }
+        }
+        return bonus_val;
+    }
+    /* Docstring: Heuristic evaluation combining:
+       - sum of squares of same-flavor connected components (BFS),
+       - per-flavor center-of-mass Manhattan dispersion penalty,
+       - penalty for candies outside their assigned column strip,
+       - small edge/corner bonus inside correct strip,
+       - same-flavor adjacency pairs bonus (cohesion),
+       - interface penalty for different-flavor touching pairs (perimeter control).
+       Coefficients vary with turn to emphasize connectivity later while using
+       local cues early; scales are conservative to avoid overfitting. */
+    double evaluate() const {
+        if (turn_num_1_indexed == 0) return 0.0;
+        long long sum_sq_comp = calculate_sum_sq_comp_size();
+        double dist_penalty_com = calculate_distance_penalty_CoM();
+        double region_penalty_val = calculate_region_penalty();
+        double edge_bonus_val = calculate_edge_bonus();
+        // Count adjacent pairs (right/down only to avoid double-counting)
+        int adjacency_pairs = 0;
+        int mismatch_pairs = 0;
+        for (int r = 0; r < GRID_SIZE; ++r) {
+            for (int c = 0; c < GRID_SIZE; ++c) {
+                int v = board[r][c];
+                if (v == 0) continue;
+                if (r + 1 < GRID_SIZE) {
+                    int w = board[r + 1][c];
+                    if (w == v) adjacency_pairs++;
+                    else if (w != 0) mismatch_pairs++;
+                }
+                if (c + 1 < GRID_SIZE) {
+                    int w = board[r][c + 1];
+                    if (w == v) adjacency_pairs++;
+                    else if (w != 0) mismatch_pairs++;
+                }
+            }
+        }
+        double current_turn_double = static_cast<double>(turn_num_1_indexed);
+        double A_coeff_conn = 15.0 + 1.1 * current_turn_double;
+        double B_coeff_com_base = std::max(0.0, 170.0 - 1.7 * current_turn_double);
+        double C_coeff_region_penalty_direct = std::max(2.0, 27.0 - 0.17 * current_turn_double);
+        double D_coeff_edge_bonus = 5.0 + 0.2 * current_turn_double;
+        double E_coeff_adjacency = 180.0 + 3.0 * current_turn_double;
+        // Penalize heterogeneous boundaries to reduce perimeter and mixing
+        double F_coeff_mismatch = 120.0 + 2.0 * current_turn_double;
+        return A_coeff_conn * sum_sq_comp
+             - B_coeff_com_base * dist_penalty_com
+             - C_coeff_region_penalty_direct * region_penalty_val
+             + D_coeff_edge_bonus * edge_bonus_val
+             + E_coeff_adjacency * adjacency_pairs
+             - F_coeff_mismatch * mismatch_pairs;
+    }
+};
+/* Docstring: Expectimax-style lookahead evaluator with memoization (TT).
+   - Base cases: return immediate evaluation.
+   - Otherwise, sample a few next placements uniformly (without replacement),
+     maximize over the next tilt for each sample, and average.
+   - Use a Zobrist-hash keyed transposition table on (board, turn, depth)
+     to avoid recomputing converged states across branches. */
+double eval_lookahead(const GameState& state_after_tilt, int turn_T_of_candy_just_processed, int depth_remaining) {
+    if (depth_remaining == 0 || turn_T_of_candy_just_processed == NUM_TURNS) {
+        return state_after_tilt.evaluate();
+    }
+    int num_empty = state_after_tilt.count_empty_cells();
+    if (num_empty == 0) {
+        return state_after_tilt.evaluate();
+    }
+    // Probe cache
+    uint64_t h = compute_board_hash(state_after_tilt.board);
+    // Mix in turn and depth to distinguish future stochastic contexts
+    uint64_t key = h
+                 ^ (uint64_t(turn_T_of_candy_just_processed) * 0x9E3779B97F4A7C15ULL)
+                 ^ (uint64_t(depth_remaining) * 0xC2B2AE3D27D4EB4FULL);
+    auto it = G_TT.find(key);
+    if (it != G_TT.end()) return it->second;
+    int next_candy_flavor = G_FLAVOR_SEQUENCE[turn_T_of_candy_just_processed];
+    int sample_count_param_idx = MAX_LOOKAHEAD_DEPTH - depth_remaining;
+    int sample_count_this_depth = NUM_SAMPLES_CONFIG[sample_count_param_idx];
+    int actual_num_samples = std::min(sample_count_this_depth, num_empty);
+    if (actual_num_samples == 0) {
+        double base = state_after_tilt.evaluate();
+        if (G_TT.size() > G_TT_MAX) G_TT.clear();
+        G_TT.emplace(key, base);
+        return base;
+    }
+    double sum_over_sampled_placements = 0.0;
+    if (actual_num_samples == num_empty) {
+        // Enumerate all empty cells deterministically (1..num_empty)
+        for (int s = 0; s < actual_num_samples; ++s) {
+            int p_val_1_indexed_sample = s + 1;
+            GameState S_after_placement = state_after_tilt;
+            std::pair<int, int> candy_loc = S_after_placement.find_pth_empty_cell(p_val_1_indexed_sample);
+            S_after_placement.place_candy(candy_loc.first, candy_loc.second, next_candy_flavor);
+            S_after_placement.turn_num_1_indexed = turn_T_of_candy_just_processed + 1;
+            double max_eval_for_this_placement = std::numeric_limits<double>::lowest();
+            for (int dir_idx_next_tilt = 0; dir_idx_next_tilt < NUM_DIRECTIONS; ++dir_idx_next_tilt) {
+                GameState S_after_next_tilt = S_after_placement;
+                (void)S_after_next_tilt.apply_tilt(dir_idx_next_tilt);
+                double val = eval_lookahead(S_after_next_tilt, S_after_placement.turn_num_1_indexed, depth_remaining - 1);
+                if (val > max_eval_for_this_placement) {
+                    max_eval_for_this_placement = val;
+                }
+            }
+            sum_over_sampled_placements += max_eval_for_this_placement;
+        }
+    } else {
+        // Deterministic stratified sampling across the empty-cell index space
+        for (int s = 0; s < actual_num_samples; ++s) {
+            int x = static_cast<int>(((s + 0.5) * num_empty) / actual_num_samples) + 1;
+            if (x < 1) x = 1;
+            if (x > num_empty) x = num_empty;
+            GameState S_after_placement = state_after_tilt;
+            std::pair<int, int> candy_loc = S_after_placement.find_pth_empty_cell(x);
+            S_after_placement.place_candy(candy_loc.first, candy_loc.second, next_candy_flavor);
+            S_after_placement.turn_num_1_indexed = turn_T_of_candy_just_processed + 1;
+            double max_eval_for_this_placement = std::numeric_limits<double>::lowest();
+            for (int dir_idx_next_tilt = 0; dir_idx_next_tilt < NUM_DIRECTIONS; ++dir_idx_next_tilt) {
+                GameState S_after_next_tilt = S_after_placement;
+                (void)S_after_next_tilt.apply_tilt(dir_idx_next_tilt);
+                double val = eval_lookahead(S_after_next_tilt, S_after_placement.turn_num_1_indexed, depth_remaining - 1);
+                if (val > max_eval_for_this_placement) {
+                    max_eval_for_this_placement = val;
+                }
+            }
+            sum_over_sampled_placements += max_eval_for_this_placement;
+        }
+    }
+    double result = sum_over_sampled_placements / actual_num_samples;
+    if (G_TT.size() > G_TT_MAX) G_TT.clear();
+    G_TT.emplace(key, result);
+    return result;
+}
+/* Docstring: Choose the best tilt among the 4 directions using expectimax
+   (up to depth 2): simulate each tilt, then look ahead by sampling the next
+   placement(s) and maximizing over the next tilt(s).
+   Tie-breaking bias (tiny, deterministic):
+   - prefer continuing the same axis (F/B vs L/R)
+   - prefer exactly the same direction as last tilt
+   - in early turns, slightly prefer F/L to build a top-left corner.
+   The bias is tiny so it only acts as tie-breaker. */
+char decide_tilt_direction_logic(const GameState& current_gs_after_placement) {
+    double best_score_with_bias = std::numeric_limits<double>::lowest();
+    int best_dir_idx = 0;
+    int turn_T_for_lookahead_base = current_gs_after_placement.turn_num_1_indexed;
+    for (int i = 0; i < NUM_DIRECTIONS; ++i) {
+        GameState gs_after_tilt_T = current_gs_after_placement;
+        bool changed_by_tilt = gs_after_tilt_T.apply_tilt(i);
+        double base_eval = eval_lookahead(gs_after_tilt_T, turn_T_for_lookahead_base, MAX_LOOKAHEAD_DEPTH);
+        // Tiny deterministic bias for tie-breaking
+        double bias = 0.0;
+        if (G_last_dir_idx >= 0) {
+            bool same_axis = ((i < 2) == (G_last_dir_idx < 2));
+            if (same_axis) bias += 1e-9;
+            if (i == G_last_dir_idx) bias += 2e-9;
+        }
+        if (turn_T_for_lookahead_base <= 35) {
+            if (i == 0 || i == 2) bias += 5e-10; // early preference for F/L
+        }
+        // Prefer tilts that actually move candies (avoid wasting a move)
+        if (!changed_by_tilt) bias -= 5e-10;
+        double eval_with_bias = base_eval + bias;
+        if (eval_with_bias > best_score_with_bias) {
+            best_score_with_bias = eval_with_bias;
+            best_dir_idx = i;
+        }
+    }
+    return DIR_CHARS[best_dir_idx];
+}
+void initialize_global_data() {
+    G_flavor_total_counts.fill(0);
+    for (int t = 0; t < NUM_TURNS; ++t) {
+        std::cin >> G_FLAVOR_SEQUENCE[t];
+        G_flavor_total_counts[G_FLAVOR_SEQUENCE[t]]++;
+    }
+    // Deterministic RNG seed derived from the full flavor sequence.
+    // This stabilizes decisions across runs on the same input.
+    uint64_t seed = 0x9E3779B97F4A7C15ULL;
+    for (int t = 0; t < NUM_TURNS; ++t) {
+        seed ^= static_cast<uint64_t>(G_FLAVOR_SEQUENCE[t]) + 0x9E3779B97F4A7C15ULL + (seed << 6) + (seed >> 2);
+    }
+    rng.x = seed | 1ULL;
+    // Initialize zobrist table and reserve TT
+    for (int r = 0; r < GRID_SIZE; ++r) {
+        for (int c = 0; c < GRID_SIZE; ++c) {
+            for (int v = 1; v <= NUM_FLAVORS; ++v) {
+                G_zobrist[r][c][v] = rng.next();
+            }
+        }
+    }
+    G_TT.clear();
+    G_TT.reserve(1 << 20);
+    G_flavor_active.fill(false);
+    std::vector<std::pair<int, int>> sorter_flavor_count_id;
+    for (int fl = 1; fl <= NUM_FLAVORS; ++fl) {
+        if (G_flavor_total_counts[fl] > 0) {
+            G_flavor_active[fl] = true;
+            sorter_flavor_count_id.push_back({G_flavor_total_counts[fl], fl});
+        }
+    }
+    std::sort(sorter_flavor_count_id.begin(), sorter_flavor_count_id.end(),
+        [](const std::pair<int, int>& a, const std::pair<int, int>& b) {
+        if (a.first != b.first) {
+            return a.first > b.first;
+        }
+        return a.second < b.second;
+    });
+    std::vector<int> active_flavor_ids_sorted_by_priority;
+    for (const auto& p : sorter_flavor_count_id) {
+        active_flavor_ids_sorted_by_priority.push_back(p.second);
+    }
+    std::vector<int> assigned_widths(NUM_FLAVORS + 1, 0);
+    int total_assigned_width_sum = 0;
+    if (!active_flavor_ids_sorted_by_priority.empty()) {
+        double total_candies_for_proportion = 0;
+        for (int fl_id : active_flavor_ids_sorted_by_priority) {
+            total_candies_for_proportion += G_flavor_total_counts[fl_id];
+        }
+        if (total_candies_for_proportion == 0) total_candies_for_proportion = 1;
+        for (int fl_id : active_flavor_ids_sorted_by_priority) {
+            assigned_widths[fl_id] = static_cast<int>(std::floor(
+                static_cast<double>(GRID_SIZE) * G_flavor_total_counts[fl_id] / total_candies_for_proportion
+            ));
+            total_assigned_width_sum += assigned_widths[fl_id];
+        }
+        int remaining_width_to_assign = GRID_SIZE - total_assigned_width_sum;
+        for (int i = 0; i < remaining_width_to_assign; ++i) {
+            assigned_widths[active_flavor_ids_sorted_by_priority[i % active_flavor_ids_sorted_by_priority.size()]]++;
+        }
+    }
+    int current_col_start = 0;
+    for (int fl_id_in_sorted_order : active_flavor_ids_sorted_by_priority) {
+        if (assigned_widths[fl_id_in_sorted_order] > 0) {
+            G_target_col_ranges[fl_id_in_sorted_order] = {current_col_start, current_col_start + assigned_widths[fl_id_in_sorted_order] - 1};
+            current_col_start += assigned_widths[fl_id_in_sorted_order];
+        } else {
+            G_target_col_ranges[fl_id_in_sorted_order] = {current_col_start, current_col_start - 1};
+        }
+    }
+    for (int fl = 1; fl <= NUM_FLAVORS; ++fl) {
+        if (!G_flavor_active[fl]) {
+            G_target_col_ranges[fl] = {0, -1};
+        }
+    }
+}
+int main() {
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    initialize_global_data();
+    GameState current_gs;
+    for (int t_0_indexed = 0; t_0_indexed < NUM_TURNS; ++t_0_indexed) {
+        current_gs.turn_num_1_indexed = t_0_indexed + 1;
+        int p_val_1_indexed;
+        std::cin >> p_val_1_indexed;
+        std::pair<int, int> candy_loc = current_gs.find_pth_empty_cell(p_val_1_indexed);
+        current_gs.place_candy(candy_loc.first, candy_loc.second, G_FLAVOR_SEQUENCE[t_0_indexed]);
+        char chosen_dir_char = decide_tilt_direction_logic(current_gs);
+        std::cout << chosen_dir_char << std::endl;
+        int dir_idx_to_apply = 0;
+        for(int k=0; k<NUM_DIRECTIONS; ++k) {
+            if(DIR_CHARS[k] == chosen_dir_char) {
+                dir_idx_to_apply = k;
+                break;
+            }
+        }
+        G_last_dir_idx = dir_idx_to_apply;
+        (void)current_gs.apply_tilt(dir_idx_to_apply);
+    }
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc015/config.yaml ADDED Viewed

	@@ -0,0 +1,77 @@

+# ALE-Bench ahc015 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n\nStory\n--------\n\
+    AtCoder's CEO Takahashi prepares for Halloween tomorrow.\nAt AtCoder's Halloween party, Takahashi will dress up in disguise\
+    \ and receive a piece of candy from 100 employees in turn by saying, \"trick or treat!\"\nHe prepares a square box that\
+    \ can contain $10\\times 10$ pieces of candy in a grid pattern, and each employee puts a piece of candy in an empty space\
+    \ so that no candies overlap.\nThere are $3$ types of candies: strawberry, watermelon, and pumpkin flavors.\nHe knows\
+    \ which flavor of candy each employee will put in by preliminary survey, but he doesn't know where each employee will\
+    \ put it.\nSince he is a clean freak, he will move the pieces of candy by tilting the box forward, backward, left, or\
+    \ right, just once for each piece of candy received, and eventually wants to make sure that the same type of candy is\
+    \ clustered together as much as possible.\nPlease help him by writing a program to determine the direction to tilt.\n\n\
+    <figure>\n  <img src=\"./images/b639c75d_1.gif\">\n  <figcaption>Example for $5\\times 5$</figcaption>\n</figure>\n\n\n\
+    Problem Statement\n--------\nThere is a box that can contain $10\\times 10$ pieces of candy in a grid pattern.\nThe box\
+    \ is initially empty, and $100$ pieces of candy will be placed in order.\nThere are $3$ flavors of candy, and we know\
+    \ in advance the flavor $f_t (1\\leq f_t\\leq 3)$ of the $t$-th candy.\nOn the other hand, we do not know in advance to\
+    \ which cell each candy will be placed, and it will be chosen uniformly at random among the empty cells.\nYou cannot change\
+    \ the order in which the pieces of candy are received.\n\nEach time you receive one piece of candy, you must tilt the\
+    \ box forward, backward, left, or right exactly once.\nWhen you tilt the box, each piece of candy moves in that direction\
+    \ simultaneously until it reaches the edge or hits another candy.\nFor example, if you tilt the box forward in the state\
+    \ shown in the left figure, the box will be in the state shown in the right figure.\n\n![](./images/b639c75d_2.png)\n\
+    ![](./images/b639c75d_3.png)\n\nScoring\n--------\nWe define the connectivity of pieces of candy as follows and consider\
+    \ the connected components.\n> Two pieces of candy are connected if and only if they are of the same flavor and can reach\
+    \ each other through only pieces of candy of the same flavor in the four directions (front, back, left, right).\n\nFor\
+    \ example, the state in the figure below consists of $7$ connected components of size $\\\\{1, 1, 2, 2, 4, 6, 9\\\\}$.\n\
+    \n![](./images/b639c75d_4.png)\n\nLet $n_1,\\cdots,n_k$ be the list of sizes of connected components in the final state\
+    \ after receiving 100 pieces of candy, and let $d_i$ be the total number of pieces of candy of flavor $i$.\nThen the score\
+    \ for the test case is\n\n\\\\[\\mathrm{round}\\left(10^6\\times\\frac{\\sum_{i=1}^k n_i^2}{\\sum_{i=1}^3 d_i^2}\\right)\\\
+    \\]\n\nYour task is to write a program to determine the tilting directions so that you can get as high a score as possible.\n\
+    \nThere are 200 test cases, and the score of a submission is the total score for each test case.\nIf your submission produces\
+    \ an illegal output or exceeds the time limit for some test cases, the submission itself will be judged as <span class='label\
+    \ label-warning' data-toggle='tooltip' data-placement='top' title=\"Wrong Answer\">WA</span> or <span class='label label-warning'\
+    \ data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\">TLE</span> , and the score of the submission\
+    \ will be zero.\nThe highest score obtained during the contest will determine the final ranking, and there will be no\
+    \ system test after the contest.\nIf more than one participant gets the same score, they will be ranked in the same place\
+    \ regardless of the submission time (note that this is changed from previous short-term AHCs).\n\n\nInput and Output\n\
+    --------\nFirst, the flavor of each piece of candy is given from Standard Input in the following format.\n~~~\n$f_1$ $f_2$\
+    \ $\\cdots$ $f_{100}$\n~~~\n\nEach $f_t$ is an integer value between $1$ and $3$, representing the flavor of the $t$-th\
+    \ piece of candy.\n\nAfter reading the above information, repeat the following process $100$ times.\n\nIn the $t$-th process\
+    \ ($1\\leq t\\leq 100$), a single integer $p_t$ between $1$ and $101-t$ is given from Standard Input.\nLet us number the\
+    \ empty cells from $1$ to $101-t$ in front-to-back and left-to-right priority, as shown in the example figure below.\n\
+    Then the $t$-th piece of candy is placed in the $p_t$-th empty cell.\n\n![](./images/b639c75d_5.png)\n\nAfter reading\
+    \ $p_t$, by representing forward, backward, left, and right by `F`, `B`, `L`, and `R`, respectively, output a single character\
+    \ to Standard Output to indicate which direction to tilt the box.\n\n<font color=\"red\">**The output must be followed\
+    \ by a new line, and you have to flush Standard Output.**</font>\nOtherwise, the submission might be judged as <span class='label\
+    \ label-warning' data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\">TLE</span>.\n<font color=\"\
+    red\">**Note that $p_{t+1}$ will not be given until you output the $t$-th direction.**</font>\nSince nothing happens at\
+    \ the 100th tilt, you may skip the output.\n\n#### Example\n\n<table class=\"table table-bordered\">\n<thead>\n<tr>\n\
+    <th>$t$</th>\n<th>Input</th>\n<th>Output</th>\n</tr>\n</thead>\n<tbody>\n<tr>\n<td>Prior information</td>\n<td><pre>2\
+    \ 2 1 3 1 2 1 2 1 $\\cdots$ 3</pre></td>\n<td></td>\n</tr>\n<tr>\n<td>1</td>\n<td><pre>3</pre></td>\n<td><pre>R</pre></td>\n\
+    </tr>\n<tr>\n<td>2</td>\n<td><pre>98</pre></td>\n<td><pre>B</pre></td>\n</tr>\n<tr>\n<td>$\\vdots$</td>\n<td></td>\n<td></td>\n\
+    </tr>\n<tr>\n<td>100</td>\n<td><pre>1</pre></td>\n<td><pre>L</pre></td>\n</tr>\n</tbody>\n</table>\n\n\n<a href=\"https://img.atcoder.jp/ahc015/b639c75d.html?lang=en&seed=0&output=R%0D%0AB%0D%0AB%0D%0AR%0D%0AF%0D%0AR%0D%0AF%0D%0AR%0D%0AR%0D%0AF%0D%0AB%0D%0AL%0D%0AB%0D%0AL%0D%0AF%0D%0AF%0D%0AB%0D%0AF%0D%0AB%0D%0AL%0D%0AL%0D%0AL%0D%0AL%0D%0AB%0D%0AF%0D%0AF%0D%0AR%0D%0AR%0D%0AF%0D%0AL%0D%0AL%0D%0AB%0D%0AL%0D%0AL%0D%0AL%0D%0AB%0D%0AL%0D%0AR%0D%0AF%0D%0AL%0D%0AB%0D%0AL%0D%0AF%0D%0AF%0D%0AF%0D%0AL%0D%0AL%0D%0AR%0D%0AB%0D%0AB%0D%0AF%0D%0AL%0D%0AF%0D%0AR%0D%0AB%0D%0AL%0D%0AF%0D%0AF%0D%0AR%0D%0AL%0D%0AR%0D%0AL%0D%0AR%0D%0AR%0D%0AB%0D%0AR%0D%0AB%0D%0AR%0D%0AR%0D%0AF%0D%0AB%0D%0AF%0D%0AR%0D%0AR%0D%0AF%0D%0AB%0D%0AF%0D%0AB%0D%0AF%0D%0AR%0D%0AB%0D%0AF%0D%0AF%0D%0AF%0D%0AB%0D%0AB%0D%0AL%0D%0AL%0D%0AR%0D%0AF%0D%0AB%0D%0AL%0D%0AB%0D%0AF%0D%0AB%0D%0AR%0D%0AF%0D%0AF%0D%0AL%0D%0AL%0D%0A\"\
+    >Show example</a>\n\n\nInput Generation\n--------\nLet $\\mathrm{rand}(L,U)$ be a function that generates a uniform random\
+    \ integer between $L$ and $U$, inclusive.\nEach $f_t$ is generated by $\\mathrm{rand}(1,3)$.\nEach $p_t$ is generated\
+    \ by $\\mathrm{rand}(1,101-t)$.\n\n\nTools (Input generator and visualizer)\n--------\n- <a href=\"https://img.atcoder.jp/ahc015/b639c75d.html?lang=en\"\
+    >Web version</a>: This is more powerful than the local version providing animations and manual play.\n- <a href=\"https://img.atcoder.jp/ahc015/b639c75d.zip\"\
+    >Local version</a>: You need a compilation environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n\
+    \  - <a href=\"https://img.atcoder.jp/ahc015/b639c75d_windows.zip\">Pre-compiled binary for Windows</a>: If you are not\
+    \ familiar with the Rust language environment, please use this instead.\n\nPlease be aware that sharing visualization\
+    \ results or discussing solutions/ideas during the contest is prohibited.\n\n\nProblem constraints:\ntime_limit=2.0 memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc015/initial_program.cpp ADDED Viewed

	@@ -0,0 +1,491 @@

+# EVOLVE-BLOCK-START
+#include <iostream>
+#include <vector>
+#include <string>
+#include <array>
+#include <numeric>
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <chrono> // For seeding RNG
+// #include <iomanip> // For debugging output
+// Constants
+const int GRID_SIZE = 10;
+const int NUM_TURNS = 100;
+const int NUM_FLAVORS = 3; // Flavors are 1, 2, 3
+// Directions: F, B, L, R (Up, Down, Left, Right on typical grid with (0,0) top-left)
+const int DR[] = {-1, 1, 0, 0};
+const int DC[] = {0, 0, -1, 1};
+const char DIR_CHARS[] = {'F', 'B', 'L', 'R'};
+const int NUM_DIRECTIONS = 4;
+// Global data initialized once
+std::array<int, NUM_TURNS> G_FLAVOR_SEQUENCE;
+std::array<int, NUM_FLAVORS + 1> G_flavor_total_counts;
+std::array<std::pair<int, int>, NUM_FLAVORS + 1> G_target_col_ranges;
+std::array<bool, NUM_FLAVORS + 1> G_flavor_active;
+// Lookahead parameters
+const int MAX_LOOKAHEAD_DEPTH = 2;
+// Final Iteration: Reverted to sample counts from Iteration 2, which scored highest.
+static constexpr std::array<int, MAX_LOOKAHEAD_DEPTH> NUM_SAMPLES_CONFIG = {23, 9};
+struct XorshiftRNG {
+    uint64_t x;
+    XorshiftRNG() : x(std::chrono::steady_clock::now().time_since_epoch().count()) {}
+    uint64_t next() {
+        x ^= x << 13;
+        x ^= x >> 7;
+        x ^= x << 17;
+        return x;
+    }
+    int uniform_int(int min_val, int max_val) {
+        if (min_val > max_val) return min_val;
+        if (min_val == max_val) return min_val;
+        uint64_t range = static_cast<uint64_t>(max_val) - min_val + 1;
+        return min_val + static_cast<int>(next() % range);
+    }
+};
+XorshiftRNG rng;
+struct Candy {
+    int r, c, flavor;
+};
+struct GameState {
+    std::array<std::array<int, GRID_SIZE>, GRID_SIZE> board;
+    std::vector<Candy> candies_list;
+    int turn_num_1_indexed;
+    GameState() : turn_num_1_indexed(0) {
+        for (int i = 0; i < GRID_SIZE; ++i) {
+            board[i].fill(0);
+        }
+        candies_list.reserve(NUM_TURNS);
+    }
+    GameState(const GameState& other) = default;
+    GameState& operator=(const GameState& other) = default;
+    GameState(GameState&& other) noexcept = default;
+    GameState& operator=(GameState&& other) noexcept = default;
+    void place_candy(int r, int c, int flavor) {
+        board[r][c] = flavor;
+        candies_list.push_back({r, c, flavor});
+    }
+    std::pair<int, int> find_pth_empty_cell(int p_1_indexed) const {
+        int count = 0;
+        for (int r_idx = 0; r_idx < GRID_SIZE; ++r_idx) {
+            for (int c_idx = 0; c_idx < GRID_SIZE; ++c_idx) {
+                if (board[r_idx][c_idx] == 0) {
+                    count++;
+                    if (count == p_1_indexed) {
+                        return {r_idx, c_idx};
+                    }
+                }
+            }
+        }
+        return {-1, -1};
+    }
+    int count_empty_cells() const {
+        return GRID_SIZE * GRID_SIZE - static_cast<int>(candies_list.size());
+    }
+    void apply_tilt(int dir_idx) {
+        if (dir_idx == 0) { // F (Up)
+            for (int c = 0; c < GRID_SIZE; ++c) {
+                int current_write_r = 0;
+                for (int r = 0; r < GRID_SIZE; ++r) {
+                    if (board[r][c] != 0) {
+                        if (r != current_write_r) {
+                            board[current_write_r][c] = board[r][c];
+                            board[r][c] = 0;
+                        }
+                        current_write_r++;
+                    }
+                }
+            }
+        } else if (dir_idx == 1) { // B (Down)
+            for (int c = 0; c < GRID_SIZE; ++c) {
+                int current_write_r = GRID_SIZE - 1;
+                for (int r = GRID_SIZE - 1; r >= 0; --r) {
+                    if (board[r][c] != 0) {
+                        if (r != current_write_r) {
+                            board[current_write_r][c] = board[r][c];
+                            board[r][c] = 0;
+                        }
+                        current_write_r--;
+                    }
+                }
+            }
+        } else if (dir_idx == 2) { // L (Left)
+            for (int r = 0; r < GRID_SIZE; ++r) {
+                int current_write_c = 0;
+                for (int c = 0; c < GRID_SIZE; ++c) {
+                    if (board[r][c] != 0) {
+                        if (c != current_write_c) {
+                            board[r][current_write_c] = board[r][c];
+                            board[r][c] = 0;
+                        }
+                        current_write_c++;
+                    }
+                }
+            }
+        } else { // R (Right, dir_idx == 3)
+            for (int r = 0; r < GRID_SIZE; ++r) {
+                int current_write_c = GRID_SIZE - 1;
+                for (int c = GRID_SIZE - 1; c >= 0; --c) {
+                    if (board[r][c] != 0) {
+                        if (c != current_write_c) {
+                            board[r][current_write_c] = board[r][c];
+                            board[r][c] = 0;
+                        }
+                        current_write_c--;
+                    }
+                }
+            }
+        }
+        rebuild_candies_list_from_board();
+    }
+    void rebuild_candies_list_from_board() {
+        candies_list.clear();
+        for (int r_idx = 0; r_idx < GRID_SIZE; ++r_idx) {
+            for (int c_idx = 0; c_idx < GRID_SIZE; ++c_idx) {
+                if (board[r_idx][c_idx] != 0) {
+                    candies_list.push_back({r_idx, c_idx, board[r_idx][c_idx]});
+                }
+            }
+        }
+    }
+    long long calculate_sum_sq_comp_size() const {
+        long long total_sq_sum = 0;
+        std::array<std::array<bool, GRID_SIZE>, GRID_SIZE> visited;
+        for (int i = 0; i < GRID_SIZE; ++i) visited[i].fill(false);
+        std::array<std::pair<int, int>, GRID_SIZE * GRID_SIZE> q_arr;
+        for (int r_start = 0; r_start < GRID_SIZE; ++r_start) {
+            for (int c_start = 0; c_start < GRID_SIZE; ++c_start) {
+                if (board[r_start][c_start] != 0 && !visited[r_start][c_start]) {
+                    int current_flavor = board[r_start][c_start];
+                    long long current_comp_size = 0;
+                    q_arr[0] = {r_start, c_start};
+                    visited[r_start][c_start] = true;
+                    int head = 0;
+                    int tail = 1;
+                    while(head < tail){
+                        current_comp_size++;
+                        const std::pair<int,int>& curr_cell = q_arr[head];
+                        const int curr_r = curr_cell.first;
+                        const int curr_c = curr_cell.second;
+                        head++;
+                        for (int i = 0; i < NUM_DIRECTIONS; ++i) {
+                            int nr = curr_r + DR[i];
+                            int nc = curr_c + DC[i];
+                            if (nr >= 0 && nr < GRID_SIZE && nc >= 0 && nc < GRID_SIZE &&
+                                !visited[nr][nc] && board[nr][nc] == current_flavor) {
+                                visited[nr][nc] = true;
+                                q_arr[tail++] = {nr, nc};
+                            }
+                        }
+                    }
+                    total_sq_sum += current_comp_size * current_comp_size;
+                }
+            }
+        }
+        return total_sq_sum;
+    }
+    double calculate_distance_penalty_CoM() const {
+        if (candies_list.empty()) return 0.0;
+        std::array<double, NUM_FLAVORS + 1> sum_r; sum_r.fill(0.0);
+        std::array<double, NUM_FLAVORS + 1> sum_c; sum_c.fill(0.0);
+        std::array<int, NUM_FLAVORS + 1> counts; counts.fill(0);
+        for (const auto& candy : candies_list) {
+            counts[candy.flavor]++;
+            sum_r[candy.flavor] += candy.r;
+            sum_c[candy.flavor] += candy.c;
+        }
+        std::array<std::pair<double, double>, NUM_FLAVORS + 1> com_coords;
+        for (int fl = 1; fl <= NUM_FLAVORS; ++fl) {
+            if (counts[fl] > 0) {
+                com_coords[fl] = {sum_r[fl] / counts[fl], sum_c[fl] / counts[fl]};
+            }
+        }
+        double total_manhattan_dist_penalty = 0;
+        for (const auto& candy : candies_list) {
+            if (counts[candy.flavor] > 1) {
+                const auto& com = com_coords[candy.flavor];
+                total_manhattan_dist_penalty += std::abs(static_cast<double>(candy.r) - com.first) +
+                                                std::abs(static_cast<double>(candy.c) - com.second);
+            }
+        }
+        return total_manhattan_dist_penalty;
+    }
+    double calculate_region_penalty() const {
+        if (candies_list.empty()) return 0.0;
+        double penalty = 0.0;
+        for (const auto& candy : candies_list) {
+            if (!G_flavor_active[candy.flavor]) continue;
+            const auto& range = G_target_col_ranges[candy.flavor];
+            int min_target_c = range.first;
+            int max_target_c = range.second;
+            if (min_target_c > max_target_c) continue;
+            if (candy.c < min_target_c) {
+                penalty += (min_target_c - candy.c);
+            } else if (candy.c > max_target_c) {
+                penalty += (candy.c - max_target_c);
+            }
+        }
+        return penalty;
+    }
+    double calculate_edge_bonus() const {
+        double bonus_val = 0.0;
+        const double PER_CANDY_BONUS_FACTOR = 0.5;
+        for (const auto& candy : candies_list) {
+            if (!G_flavor_active[candy.flavor]) continue;
+            const auto& range = G_target_col_ranges[candy.flavor];
+            int min_target_c = range.first;
+            int max_target_c = range.second;
+            if (min_target_c > max_target_c) continue;
+            bool in_correct_strip = (candy.c >= min_target_c && candy.c <= max_target_c);
+            if (in_correct_strip) {
+                if (candy.r == 0 || candy.r == GRID_SIZE - 1) {
+                    bonus_val += PER_CANDY_BONUS_FACTOR;
+                }
+                if ((candy.c == 0 && min_target_c == 0) ||
+                    (candy.c == GRID_SIZE - 1 && max_target_c == GRID_SIZE - 1)) {
+                     bonus_val += PER_CANDY_BONUS_FACTOR;
+                }
+            }
+        }
+        return bonus_val;
+    }
+    double evaluate() const {
+        if (candies_list.empty() && turn_num_1_indexed == 0) return 0.0;
+        long long sum_sq_comp = calculate_sum_sq_comp_size();
+        double dist_penalty_com = calculate_distance_penalty_CoM();
+        double region_penalty_val = calculate_region_penalty();
+        double edge_bonus_val = calculate_edge_bonus();
+        double current_turn_double = static_cast<double>(turn_num_1_indexed);
+        // Coefficients from Iteration 2 (best scoring), with small tweak to C
+        double A_coeff_conn = 15.0 + 1.1 * current_turn_double;
+        double B_coeff_com_base = std::max(0.0, 170.0 - 1.7 * current_turn_double);
+        // Final iteration tweak for C_coeff_region_penalty_direct:
+        double C_coeff_region_penalty_direct = std::max(2.0, 27.0 - 0.17 * current_turn_double);
+        double D_coeff_edge_bonus = 5.0 + 0.2 * current_turn_double;
+        return A_coeff_conn * sum_sq_comp
+             - B_coeff_com_base * dist_penalty_com
+             - C_coeff_region_penalty_direct * region_penalty_val
+             + D_coeff_edge_bonus * edge_bonus_val;
+    }
+};
+// Forward declaration
+double eval_lookahead(const GameState& state_after_tilt, int turn_T_of_candy_just_processed, int depth_remaining);
+char decide_tilt_direction_logic(const GameState& current_gs_after_placement) {
+    double best_overall_eval = std::numeric_limits<double>::lowest();
+    int best_dir_idx = 0;
+    int turn_T_for_lookahead_base = current_gs_after_placement.turn_num_1_indexed;
+    for (int i = 0; i < NUM_DIRECTIONS; ++i) {
+        GameState gs_after_tilt_T = current_gs_after_placement;
+        gs_after_tilt_T.apply_tilt(i);
+        double current_tilt_eval_for_dir_i = eval_lookahead(gs_after_tilt_T, turn_T_for_lookahead_base, MAX_LOOKAHEAD_DEPTH);
+        if (current_tilt_eval_for_dir_i > best_overall_eval) {
+            best_overall_eval = current_tilt_eval_for_dir_i;
+            best_dir_idx = i;
+        }
+    }
+    return DIR_CHARS[best_dir_idx];
+}
+double eval_lookahead(const GameState& state_after_tilt, int turn_T_of_candy_just_processed, int depth_remaining) {
+    if (depth_remaining == 0 || turn_T_of_candy_just_processed == NUM_TURNS) {
+        return state_after_tilt.evaluate();
+    }
+    int num_empty = state_after_tilt.count_empty_cells();
+    if (num_empty == 0) {
+        return state_after_tilt.evaluate();
+    }
+    int next_candy_flavor = G_FLAVOR_SEQUENCE[turn_T_of_candy_just_processed];
+    int sample_count_param_idx = MAX_LOOKAHEAD_DEPTH - depth_remaining;
+    int sample_count_this_depth = NUM_SAMPLES_CONFIG[sample_count_param_idx];
+    int actual_num_samples = std::min(sample_count_this_depth, num_empty);
+    if (actual_num_samples == 0) {
+         return state_after_tilt.evaluate();
+    }
+    double sum_over_sampled_placements = 0.0;
+    for (int s = 0; s < actual_num_samples; ++s) {
+        int p_val_1_indexed_sample;
+        if (actual_num_samples == num_empty) {
+            p_val_1_indexed_sample = s + 1;
+        } else {
+            p_val_1_indexed_sample = rng.uniform_int(1, num_empty);
+        }
+        GameState S_after_placement = state_after_tilt;
+        std::pair<int, int> candy_loc = S_after_placement.find_pth_empty_cell(p_val_1_indexed_sample);
+        S_after_placement.place_candy(candy_loc.first, candy_loc.second, next_candy_flavor);
+        S_after_placement.turn_num_1_indexed = turn_T_of_candy_just_processed + 1;
+        double max_eval_for_this_placement = std::numeric_limits<double>::lowest();
+        for (int dir_idx_next_tilt = 0; dir_idx_next_tilt < NUM_DIRECTIONS; ++dir_idx_next_tilt) {
+            GameState S_after_next_tilt = S_after_placement;
+            S_after_next_tilt.apply_tilt(dir_idx_next_tilt);
+            double val = eval_lookahead(S_after_next_tilt, S_after_placement.turn_num_1_indexed, depth_remaining - 1);
+            if (val > max_eval_for_this_placement) {
+                max_eval_for_this_placement = val;
+            }
+        }
+        sum_over_sampled_placements += max_eval_for_this_placement;
+    }
+    return sum_over_sampled_placements / actual_num_samples;
+}
+void initialize_global_data() {
+    G_flavor_total_counts.fill(0);
+    for (int t = 0; t < NUM_TURNS; ++t) {
+        std::cin >> G_FLAVOR_SEQUENCE[t];
+        G_flavor_total_counts[G_FLAVOR_SEQUENCE[t]]++;
+    }
+    G_flavor_active.fill(false);
+    std::vector<std::pair<int, int>> sorter_flavor_count_id;
+    for (int fl = 1; fl <= NUM_FLAVORS; ++fl) {
+        if (G_flavor_total_counts[fl] > 0) {
+            G_flavor_active[fl] = true;
+            sorter_flavor_count_id.push_back({G_flavor_total_counts[fl], fl});
+        }
+    }
+    std::sort(sorter_flavor_count_id.begin(), sorter_flavor_count_id.end(),
+        [](const std::pair<int, int>& a, const std::pair<int, int>& b) {
+        if (a.first != b.first) {
+            return a.first > b.first;
+        }
+        return a.second < b.second;
+    });
+    std::vector<int> active_flavor_ids_sorted_by_priority;
+    for(const auto& p : sorter_flavor_count_id) {
+        active_flavor_ids_sorted_by_priority.push_back(p.second);
+    }
+    std::vector<int> assigned_widths(NUM_FLAVORS + 1, 0);
+    int total_assigned_width_sum = 0;
+    if (!active_flavor_ids_sorted_by_priority.empty()) {
+        double total_candies_for_proportion = 0;
+        for(int fl_id : active_flavor_ids_sorted_by_priority) {
+            total_candies_for_proportion += G_flavor_total_counts[fl_id];
+        }
+        if (total_candies_for_proportion == 0) total_candies_for_proportion = 1;
+        for (int fl_id : active_flavor_ids_sorted_by_priority) {
+            assigned_widths[fl_id] = static_cast<int>(std::floor(
+                static_cast<double>(GRID_SIZE) * G_flavor_total_counts[fl_id] / total_candies_for_proportion
+            ));
+            total_assigned_width_sum += assigned_widths[fl_id];
+        }
+        int remaining_width_to_assign = GRID_SIZE - total_assigned_width_sum;
+        for (int i = 0; i < remaining_width_to_assign; ++i) {
+            assigned_widths[active_flavor_ids_sorted_by_priority[i % active_flavor_ids_sorted_by_priority.size()]]++;
+        }
+    }
+    int current_col_start = 0;
+    for (int fl_id_in_sorted_order : active_flavor_ids_sorted_by_priority) {
+        if (assigned_widths[fl_id_in_sorted_order] > 0) {
+            G_target_col_ranges[fl_id_in_sorted_order] = {current_col_start, current_col_start + assigned_widths[fl_id_in_sorted_order] - 1};
+            current_col_start += assigned_widths[fl_id_in_sorted_order];
+        } else {
+            G_target_col_ranges[fl_id_in_sorted_order] = {current_col_start, current_col_start - 1};
+        }
+    }
+    for (int fl = 1; fl <= NUM_FLAVORS; ++fl) {
+        if (!G_flavor_active[fl]) {
+            G_target_col_ranges[fl] = {0, -1};
+        }
+    }
+}
+int main() {
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    initialize_global_data();
+    GameState current_gs;
+    for (int t_0_indexed = 0; t_0_indexed < NUM_TURNS; ++t_0_indexed) {
+        current_gs.turn_num_1_indexed = t_0_indexed + 1;
+        int p_val_1_indexed;
+        std::cin >> p_val_1_indexed;
+        std::pair<int, int> candy_loc = current_gs.find_pth_empty_cell(p_val_1_indexed);
+        current_gs.place_candy(candy_loc.first, candy_loc.second, G_FLAVOR_SEQUENCE[t_0_indexed]);
+        char chosen_dir_char = decide_tilt_direction_logic(current_gs);
+        std::cout << chosen_dir_char << std::endl;
+        int dir_idx_to_apply = 0;
+        for(int k=0; k<NUM_DIRECTIONS; ++k) {
+            if(DIR_CHARS[k] == chosen_dir_char) {
+                dir_idx_to_apply = k;
+                break;
+            }
+        }
+        current_gs.apply_tilt(dir_idx_to_apply);
+    }
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc016/best_program.cpp ADDED Viewed

	@@ -0,0 +1,244 @@

+# EVOLVE-BLOCK-START
+#include <bits/stdc++.h>
+using namespace std;
+/* Doc: Encode with deterministic block patterns. Signature = sorted expected noisy degrees + expected edge count.
+   Select patterns via farthest-first in this signature space; decode by nearest neighbor on the same signature. */
+/* Squared Euclidean distance between equal-length vectors */
+static inline double sse(const vector<double>& a, const vector<double>& b) {
+    double s = 0.0; int n = (int)a.size();
+    for (int i = 0; i < n; ++i) { double d = a[i] - b[i]; s += d * d; }
+    return s;
+}
+/* Doc: Return s positive integers that sum to N, approximately geometric with ratio r to separate degree levels. */
+static inline vector<int> build_block_sizes(int N, int s, double r) {
+    vector<double> w(s); double sum = 0.0;
+    for (int i = 0; i < s; ++i) { w[i] = pow(r, i); sum += w[i]; }
+    vector<double> exact(s);
+    vector<int> base(s);
+    int total = 0;
+    for (int i = 0; i < s; ++i) { exact[i] = (double)N * w[i] / sum; base[i] = max(1, (int)floor(exact[i])); total += base[i]; }
+    vector<pair<double,int>> frac;
+    for (int i = 0; i < s; ++i) frac.emplace_back(exact[i] - floor(exact[i]), i);
+    sort(frac.begin(), frac.end(), greater<pair<double,int>>());
+    while (total < N) { for (int t = 0; t < s && total < N; ++t) { base[frac[t].second]++; total++; } }
+    while (total > N) { for (int i = s - 1; i >= 0 && total > N; --i) if (base[i] > 1) { base[i]--; total--; } }
+    return base;
+}
+/* Doc: Build sorted vector of expected noisy degrees under BSC noise: mu_i = eps*(N-1) + (1-2eps)*deg(block_i). */
+static inline vector<double> build_mu_sorted(const vector<int>& sz, const vector<unsigned int>& mask, const vector<unsigned char>& diag, double eps) {
+    int s = (int)sz.size(); int N = 0; for (int v : sz) N += v;
+    vector<double> mu; mu.reserve(N);
+    const double a = eps * (N - 1), b = 1.0 - 2.0 * eps;
+    for (int bidx = 0; bidx < s; ++bidx) {
+        int d = diag[bidx] ? (sz[bidx] - 1) : 0;
+        unsigned int m = mask[bidx];
+        for (int c = 0; c < s; ++c) if (c != bidx && ((m >> c) & 1U)) d += sz[c];
+        double mv = a + b * (double)d;
+        for (int t = 0; t < sz[bidx]; ++t) mu.push_back(mv);
+    }
+    sort(mu.begin(), mu.end());
+    return mu;
+}
+/* Doc: Random symmetric block pattern: diag[b] in {0,1}, cross-block bits symmetric for i<j. */
+static inline void gen_random_pattern(int s, mt19937& rng, vector<unsigned int>& mask, vector<unsigned char>& diag) {
+    mask.assign(s, 0U); diag.assign(s, 0);
+    uniform_int_distribution<int> bit01(0, 1);
+    for (int i = 0; i < s; ++i) diag[i] = (unsigned char)bit01(rng);
+    for (int i = 0; i < s; ++i) for (int j = i + 1; j < s; ++j) if (bit01(rng)) { mask[i] |= (1U << j); mask[j] |= (1U << i); }
+}
+/* Doc: Add a handful of structured patterns (empty/full/diag/off/banded/threshold) to diversify the pool. */
+static inline void gen_structured_patterns(int s, vector<vector<unsigned int>>& masks, vector<vector<unsigned char>>& diags) {
+    auto add = [&](const vector<unsigned int>& m, const vector<unsigned char>& d){ masks.push_back(m); diags.push_back(d); };
+    vector<unsigned int> m(s,0); vector<unsigned char> d0(s,0), d1(s,1);
+    // empty, full
+    add(m, d0);
+    vector<unsigned int> full(s,0);
+    for (int i = 0; i < s; ++i) for (int j = 0; j < s; ++j) if (i!=j) full[i] |= (1U<<j);
+    add(full, d1);
+    // diag-only, off-only
+    add(m, d1);
+    add(full, d0);
+    // banded |i-j|<=t
+    for (int t = 0; t < s; ++t) {
+        vector<unsigned int> mm(s,0); vector<unsigned char> dd(s, (unsigned char)(t&1));
+        for (int i = 0; i < s; ++i) for (int j = 0; j < s; ++j) if (i!=j && abs(i-j)<=t) mm[i] |= (1U<<j);
+        add(mm, dd);
+    }
+    // threshold i+j <= T
+    for (int T = 0; T <= 2*(s-1); T += max(1,s/3)) {
+        vector<unsigned int> mm(s,0); vector<unsigned char> dd(s, (unsigned char)((T/2)&1));
+        for (int i = 0; i < s; ++i) for (int j = i+1; j < s; ++j) if (i + j <= T) { mm[i] |= (1U<<j); mm[j] |= (1U<<i); }
+        add(mm, dd);
+    }
+}
+/* Doc: Pick N as small as possible for score while ensuring enough distinct block patterns and noise robustness.
+   Start near a noise-dependent baseline, then increase N until capacity ~ 2^{s(s-1)/2 + b2} >= M (or exponent >=30). */
+static inline int chooseN(int M, double eps) {
+    auto s_of_N = [](int N){ return min(8, max(4, (int)floor(log2((double)N)) - 1)); };
+    // noise-aware baseline with small-N bias at low eps
+    int N = (int)llround(10.0 + 100.0 * eps);
+    int Nmin = (eps < 0.15 ? 10 : (eps < 0.28 ? 14 : 18));
+    N = max(N, Nmin);
+    N = min(N, 100);
+    double r = max(1.35, min(2.35, 1.5 + 1.5 * eps));
+    while (true) {
+        int s = s_of_N(N);
+        auto sz = build_block_sizes(N, s, r);
+        int b2 = 0; for (int v : sz) if (v >= 2) ++b2;
+        int expo = s*(s-1)/2 + b2;
+        if (expo >= 30 || (1 << expo) >= M) break;
+        if (N >= 100) break;
+        ++N;
+    }
+    if (M > 80 && eps > 0.25) N = min(100, N + 2); // stabilize in high-noise/high-M
+    return max(8, N);
+}
+/* Build adjacency bitstring from block pattern and vertex->block mapping */
+static inline string build_graph_bits(int N, const vector<pair<unsigned char,unsigned char>>& pairs,
+                                      const vector<int>& belong, const vector<unsigned int>& mask, const vector<unsigned char>& diag) {
+    int L = N * (N - 1) / 2; string s(L, '0');
+    for (int p = 0; p < L; ++p) {
+        auto ab = pairs[p];
+        int bu = belong[ab.first], bv = belong[ab.second];
+        bool conn = (bu == bv) ? (diag[bu] != 0) : (((mask[bu] >> bv) & 1U) != 0);
+        if (conn) s[p] = '1';
+    }
+    return s;
+}
+int main() {
+    ios::sync_with_stdio(false);
+    cin.tie(nullptr);
+    int M; double eps;
+    if (!(cin >> M >> eps)) return 0;
+    // Tune block-size ratio by noise level to widen degree gaps as noise increases
+    double BLOCK_R = max(1.35, min(2.35, 1.5 + 1.5 * eps));
+    // Choose N and s (number of blocks)
+    int N = chooseN(M, eps);
+    int s = min(8, max(4, (int)floor(log2((double)N)) - 1 + (eps < 0.08 ? 1 : 0) - (eps > 0.28 ? 1 : 0)));
+    vector<int> sz = build_block_sizes(N, s, BLOCK_R);
+    // Vertex -> block mapping
+    vector<int> belong(N, 0);
+    for (int i = 0, cur = 0, b = 0; b < s; ++b) for (int t = 0; t < sz[b]; ++t) belong[cur++] = b;
+    // Precompute (i,j) pairs and constants
+    int L = N * (N - 1) / 2;
+    vector<pair<unsigned char,unsigned char>> pairs;
+    pairs.reserve(L);
+    for (int i = 0; i < N; ++i) for (int j = i + 1; j < N; ++j)
+        pairs.emplace_back((unsigned char)i, (unsigned char)j);
+    // Variance-normalized weights for fused scoring (degrees + edge count)
+    double invVarDeg = 1.0 / ((double)(N - 1) * eps * (1.0 - eps) + 1e-12);
+    double invVarM   = 1.0 / ((double)L * eps * (1.0 - eps) + 1e-12);
+    double degW = 1.0 + 0.8 * eps;
+    // Build candidate pool of block patterns (structured + random + complements)
+    int seed = 146527 + M * 1000 + (int)llround(eps * 100.0) * 7919;
+    mt19937 rng(seed);
+    vector<vector<unsigned int>> cand_masks;
+    vector<vector<unsigned char>> cand_diags;
+    gen_structured_patterns(s, cand_masks, cand_diags);
+    auto add_comp = [&](const vector<unsigned int>& m, const vector<unsigned char>& d){
+        vector<unsigned int> mc(s, 0u); vector<unsigned char> dc(s, 0);
+        unsigned int all = (s >= 31 ? 0x7FFFFFFFu : ((1u << s) - 1u));
+        for (int i = 0; i < s; ++i) { unsigned int fulli = all & ~(1u << i); mc[i] = fulli ^ m[i]; dc[i] = (unsigned char)(1 - d[i]); }
+        cand_masks.push_back(mc); cand_diags.push_back(dc);
+    };
+    int RAND_CANDS = (eps <= 0.12 ? max(6 * M, 384) : min(3072, max(10 * M, 512)));
+    for (int t = 0; t < RAND_CANDS; ++t) {
+        vector<unsigned int> m; vector<unsigned char> d;
+        gen_random_pattern(s, rng, m, d);
+        cand_masks.push_back(m); cand_diags.push_back(d);
+        add_comp(m, d);
+    }
+    struct Cand { vector<unsigned int> mask; vector<unsigned char> diag; vector<double> mu; double mu_m; };
+    vector<Cand> pool; pool.reserve(cand_masks.size());
+    for (size_t i = 0; i < cand_masks.size(); ++i) {
+        vector<double> mu = build_mu_sorted(sz, cand_masks[i], cand_diags[i], eps);
+        long long edges = 0;
+        for (int b = 0; b < s; ++b) if (cand_diags[i][b]) edges += 1LL * sz[b] * (sz[b] - 1) / 2;
+        for (int b = 0; b < s; ++b) for (int c = b + 1; c < s; ++c)
+            if ((cand_masks[i][b] >> c) & 1U) edges += 1LL * sz[b] * sz[c];
+        double mu_m = eps * (double)L + (1.0 - 2.0 * eps) * (double)edges;
+        pool.push_back({cand_masks[i], cand_diags[i], move(mu), mu_m});
+    }
+    // Greedy farthest-point sampling using fused metric on (mu_sorted, mu_m)
+    auto fused_dist = [&](int a, int b)->double{
+        double d = sse(pool[a].mu, pool[b].mu) * invVarDeg * degW;
+        double dm = pool[a].mu_m - pool[b].mu_m;
+        return d + dm * dm * invVarM;
+    };
+    vector<int> chosen;
+    int C = (int)pool.size();
+    int seedIdx = 0; double bestVar = -1.0;
+    for (int i = 0; i < C; ++i) {
+        const auto& mu = pool[i].mu;
+        double mean = 0.0; for (double v : mu) mean += v; mean /= mu.size();
+        double var = 0.0; for (double v : mu) { double d = v - mean; var += d * d; }
+        if (var > bestVar) { bestVar = var; seedIdx = i; }
+    }
+    chosen.push_back(seedIdx);
+    for (int k = 1; k < M; ++k) {
+        int nxt = -1; double far = -1.0;
+        for (int i = 0; i < C; ++i) {
+            bool used = false; for (int id : chosen) if (id == i) { used = true; break; }
+            if (used) continue;
+            double md = 1e300;
+            for (int id : chosen) md = min(md, fused_dist(i, id));
+            if (md > far) { far = md; nxt = i; }
+        }
+        if (nxt < 0) nxt = chosen.back();
+        chosen.push_back(nxt);
+    }
+    // Output graphs
+    cout << N << '\n';
+    vector<vector<double>> mu_sorted_vec(M, vector<double>(N, 0.0));
+    vector<double> mu_m_vec(M, 0.0);
+    for (int k = 0; k < M; ++k) {
+        const auto& pat = pool[chosen[k]];
+        mu_sorted_vec[k] = pat.mu; mu_m_vec[k] = pat.mu_m;
+        string sbits = build_graph_bits(N, pairs, belong, pat.mask, pat.diag);
+        cout << sbits << '\n';
+    }
+    cout.flush();
+    // Online decoding: fused nearest neighbor using degree multiset + edge count
+    for (int q = 0; q < 100; ++q) {
+        string H; if (!(cin >> H)) return 0;
+        vector<int> deg(N, 0); int m = 0;
+        for (int p = 0; p < L; ++p) if (H[p] == '1') { auto ab = pairs[p]; deg[(int)ab.first]++; deg[(int)ab.second]++; ++m; }
+        vector<double> dh(N); for (int i = 0; i < N; ++i) dh[i] = (double)deg[i];
+        sort(dh.begin(), dh.end());
+        int best = 0; double bestS = 1e300;
+        for (int k = 0; k < M; ++k) {
+            double sdeg = sse(dh, mu_sorted_vec[k]) * invVarDeg * degW;
+            double dm = (double)m - mu_m_vec[k];
+            double score = sdeg + dm * dm * invVarM;
+            if (score < bestS) { bestS = score; best = k; }
+        }
+        cout << best << '\n';
+        cout.flush();
+    }
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc016/config.yaml ADDED Viewed

	@@ -0,0 +1,108 @@

+# ALE-Bench ahc016 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n    \n    Story\n\
+    --------\nTakahashi, a genius inventor, invented the time machine called \"Graphorean\" (Graph + DeLorean) that can send\
+    \ a <a href=\"https://en.wikipedia.org/wiki/Graph_(abstract_data_type)\">graph</a> to the past.\nWith this machine, he\
+    \ plans to get rich by playing casino roulette and sending the winning number information to the time before he plays.\n\
+    If he succeeds, he will move to the world line where he has successfully chosen the winning number and become very rich.\n\
+    \nBecause the machine cannot send the winning number directly, he needs to first convert the number into a graph (encoding),\
+    \ then send it, and finally convert the graph back into a number (decoding).\nSending a graph by the machine loses the\
+    \ vertex number information and introduces noise, so he must develop encoding/decoding methods so that the number can\
+    \ be correctly restored.\nIn order to receive a graph with $N$ vertices, he must set an integer $N$ to the machine.\n\
+    Therefore, the number of vertices of the graph to be sent must be determined in advance.\n\nThe time machine will be broken\
+    \ once he uses it, so failure will not be tolerated.\nTherefore, he decided to estimate the success probability by conducting\
+    \ simulations in advance and preparing an encoding/decoding method with as high a success probability as possible.\nFurthermore,\
+    \ because sending a large graph requires a huge amount of energy, it is desirable that the graph's size be as small as\
+    \ possible.\nPlease help him.\n\n\nProblem Statement\n--------\nGiven an integer $M$ and an error rate $\\epsilon$, determine\
+    \ an integer $N$ satisfying $4\\leq N\\leq 100$ and output $N$-vertex undirected graphs $G_0,G_1,\\cdots,G_{M-1}$.\nThe\
+    \ graphs may be disconnected.\nThen process the following query $100$ times.\n\nIn the $k$-th query, you are given an\
+    \ $N$-vertex undirected graph $H_k$.\n$H_k$ is a graph generated from some $G_{s_k}$ as follows.\n\n1. Initialize $H_k=G_{s_k}$.\n\
+    2. For each $(i,j)$ with $0\\leq i<j\\leq N-1$, flip whether or not $H_k$ contains edge $(i,j)$ with probability $\\epsilon$.\n\
+    3. Randomly shuffle the order of the vertices in $H_k$.\n\nAfter receiving $H_k$, predict from which graph $G_{s_k}$ it\
+    \ was generated, and output the predicted value $t_k$ of $s_k$.\n\n\nScoring\n--------\nLet $E$ be the number of failed\
+    \ predictions.\nThen the score for the test case is\n\n\\\\[\n\t\\mathrm{round}\\left(10^9\\times \\frac{0.9^E}{N}\\right)\n\
+    \\\\]\n\n\n\n#### Number of test cases\n- Provisional test: 50\n- System test: 2000. We will publish <a href=\"https://img.atcoder.jp/ahc016/seeds.txt\"\
+    >seeds.txt</a> (sha256=4093b6cb740beea16eb0ecf55120ca6ca6fbef18015ea4a863e64d0bea3de91d) after the contest is over.\n\
+    - System test contains at most one test case for each pair of $(M,\\epsilon)$.\n\nFor each test case, we compute the <font\
+    \ color=\"red\"><strong>relative score</strong></font> $\\mathrm{round}(10^9\\times \\frac{\\mathrm{YOUR}}{\\mathrm{MAX}})$,\
+    \ where YOUR is your score and MAX is the highest score among all competitors obtained on that test case.\nThe score of\
+    \ the submission is the sum of the relative scores.\n\nThe final ranking will be determined by the system test with more\
+    \ inputs which will be run after the contest is over.\nIn both the provisional/system test, if your submission produces\
+    \ illegal output or exceeds the time limit for some test cases, only the score for those test cases will be zero.\nThe\
+    \ system test will be performed only for <font color=\"red\"><strong>the last submission which received a result other\
+    \ than <span class=\"label label-warning\" data-toggle=\"tooltip\" data-placement=\"top\" title=\"\" data-original-title=\"\
+    Compilation Error\">CE</span> </strong></font>.\nBe careful not to make a mistake in the final submission.\n\n\n\n####\
+    \ About Relative Evaluation System\nIn both the provisional/system test, the standings will be calculated using only the\
+    \ last submission which received a result other than <span class=\"label label-warning\" data-toggle=\"tooltip\" data-placement=\"\
+    top\" title=\"\" data-original-title=\"Compilation Error\">CE</span>.\nOnly the last submissions are used to calculate\
+    \ the highest score among all competitors for each test case in calculating the relative scores.\n\nThe scores shown in\
+    \ the standings are relative, and whenever a new submission arrives, all relative scores are recalculated.\nOn the other\
+    \ hand, the score for each submission shown on the submissions page is an absolute score obtained by summing up the scores\
+    \ for each test case, and the relative scores are not shown.\nIn order to know the relative score of submission other\
+    \ than the latest one in the current standings, you need to resubmit it.\n<strong>(Update)</strong> If your submission\
+    \ produces illegal output or exceeds the time limit for some test cases, the absolute score shown in the submissions page\
+    \ becomes 0, but the standings show the sum of the relative scores for the test cases that were answered correctly.\n\n\
+    #### About execution time\nExecution time may vary slightly from run to run.\nIn addition, since system tests simultaneously\
+    \ perform a large number of executions, it has been observed that execution time increases by several percent compared\
+    \ to provisional tests.\nFor these reasons, submissions that are very close to the time limit may result in <span class='label\
+    \ label-warning' data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\">TLE</span> in the system test.\n\
+    Please measure the execution time in your program to terminate the process, or have enough margin in the execution time.\n\
+    \n\n\nInput and Output\n--------\nFirst, information about the problem setting is given from Standard Input in the following\
+    \ format.\n~~~\n$M$ $\\epsilon$\n~~~\n- $M$ is an integer representing the number of graphs, satisfying $10\\leq M\\leq\
+    \ 100$.\n- $\\epsilon$ is a real number representing the error rate, which is a multiple of $0.01$ and satisfies $0.00\\\
+    leq \\epsilon\\leq 0.4$.\n\nAfter reading the input, output $M$ graphs $G_0,G_1,\\cdots,G_{M-1}$ to Standard Output in\
+    \ the following format.\n~~~\n$N$\n$g_0$\n$\\vdots$\n$g_{M-1}$\n~~~\n- $N$ is an integer representing the number of vertices\
+    \ in each graph, which must satisfy $4\\leq N\\leq 100$.\n- Each $g_k$ is a string of length $N(N-1)/2$, which represents\
+    \ the $k$-th graph $G_k$ as follows. For each $(i,j)$ satisfying $0\\leq i<j\\leq N-1$, express the existence of edge\
+    \ $(i,j)$ as `1` if $G_k$ contains edge $(i,j)$ and `0` if it does not, using one character, and then arrange them in\
+    \ lexicographic order of $(i,j)$. For example, when $N=4$, the string `100101` represents a graph with $4$ vertices connected\
+    \ on a straight line.\n\n<font color=\"red\">**After output, you have to flush Standard Output.**</font>\nOtherwise, the\
+    \ submission might be judged as <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"\
+    Time Limit Exceeded\">TLE</span>.\nAfter outputting $M$ graphs, repeat the following process $100$ times.\n\nIn the $k$-th\
+    \ process, you are given an $N$-vertex graph $H_k$ represented as a string of $N(N-1)/2$ `01` characters in the same format\
+    \ as above, in a single line from Standard Input.\nAfter receiving $H_k$, predict from which graph $G_{s_k}$ $H_k$ is\
+    \ generated and output the prediction $t_k (0\\leq t_k\\leq M-1)$ of $s_k$ to Standard Output.\nThe output must be followed\
+    \ by a new line, and you have to flush Standard Output.\n<font color=\"red\">**Note that $H_{k+1}$ will not be given until\
+    \ you output the $t_k$.**</font>\n\nSample Solution\n--------\nThis is a sample solution in Python.\nIn this program,\
+    \ we set $N=20$, and each graph $G_k$ contains $k$ edges.\nFor each $H_k$, we count the number of edges $m$ and output\
+    \ $\\min(m, M-1)$.\n\n<pre class=\"prettyprint linenums\">M, eps = input().split()\nM = int(M)\neps = float(eps)\nprint(20)\n\
+    for k in range(M):\n\tprint(\"1\" * k + \"0\" * (190 - k))\n\nfor q in range(100):\n\tH = input()\n\tt = min(H.count('1'),\
+    \ M - 1)\n\tprint(t)\n</pre>\n\n\nInput Generation\n--------\nLet $\\mathrm{rand}(L,U)$ be a function that generates a\
+    \ uniform random integer between $L$ and $U$, inclusive.\n$M$ is generated by $\\mathrm{rand}(10,100)$.\n$\\epsilon$ is\
+    \ generated by $0.01\\times \\mathrm{rand}(0,40)$.\nEach $s_k$ is generated by $\\mathrm{rand}(0,M-1)$.\n\n\nTools (Input\
+    \ generator and visualizer)\n--------\n- <a href=\"https://img.atcoder.jp/ahc016/d5f3c281.html?lang=en\">Web version</a>:\
+    \ You can see the visualization of each input/output graph.\n- <a href=\"https://img.atcoder.jp/ahc016/d5f3c281.zip\"\
+    >Local version</a>: You need a compilation environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n\t\
+    - <a href=\"https://img.atcoder.jp/ahc016/d5f3c281_windows.zip\">Pre-compiled binary for Windows</a>: If you are not familiar\
+    \ with the Rust language environment, please use this instead.\n\n<font color=\"red\">**Please be aware that sharing visualization\
+    \ results or discussing solutions/ideas during the contest is prohibited.**</font>\n\n#### Specification of input/output\
+    \ files used by the tools\nInput files given to the local tester have the following format.\n~~~\n$M$ $\\epsilon$\n$s_0$\n\
+    $\\vdots$\n$s_{99}$\n$\\mathrm{seed}$\n~~~\nThe last $\\mathrm{seed}$ is a random seed value used for noise generation.\n\
+    Since each graph $H_0,\\cdots,H_{99}$ depends on output graphs $G_0,\\cdots,G_{M-1}$, the input file contains only the\
+    \ random seed value.\n\nThe local tester writes outputs from your program directly to the output file.\nYour program may\
+    \ output comment lines starting with `#`.\nThe web version of the visualizer displays the comment lines with the corresponding\
+    \ query, which may be useful for debugging and analysis.\nSince the judge program ignores all comment lines, you can submit\
+    \ a program that outputs comment lines as is.\n\nComment lines that begin with the following have special meaning in the\
+    \ visualizer.\n\n- `#v`: You can tell the visualizer that you have predicted that vertex $i$ in $H_k$ corresponds to vertex\
+    \ $p_i$ in $G_{t_k}$ by outputting it in the following format.\n~~~\n#v $p_0$ $\\cdots$ $p_{N-1}$\n~~~\n- `#h`: If you\
+    \ do not use the provided local tester, you can replace the $H_k$ displayed by the visualizer by outputting in the form\
+    \ `#h 100101 001101`. The left is a graph after adding noise to $G_{s_k}$, and the right is a graph after shuffling the\
+    \ vertex ordering.\n\n\n    Problem constraints:\n    time_limit=5.0 memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc016/evaluator.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import traceback
+from pathlib import Path
+from ale_bench.result import CaseResult, JudgeResult, Result
+from ale_bench_eval.safe_ale_session import start_ale_bench_session
+import logging
+import sys
+logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
+def result_feedback(result: Result) -> CaseResult:
+    if result.overall_judge_result == JudgeResult.ACCEPTED:
+        return result.case_results[0]
+    else:
+        selected_case_idx = 0
+        for idx, case_result in enumerate(result.case_results):
+            if case_result.judge_result == result.overall_judge_result:
+                selected_case_idx = idx
+                break
+        return result.case_results[selected_case_idx]
+def evaluate(program_path):
+    problem_id = "ahc016"
+    logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
+    try:
+        session = None
+        logger.info("Starting ALE-Bench session")
+        session = start_ale_bench_session(
+            problem_id=problem_id,
+            lite_version=True,
+            num_workers=13,
+        )
+        logger.info("ALE-Bench session started")
+        if not session:
+            raise RuntimeError("Failed to start or restart the session.")
+        optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
+        code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
+        logger.info("Code extracted")
+        num_public_cases = 50
+        cases = session.case_gen(list(range(num_public_cases)))
+        public_result = session.case_eval(
+            cases, code, code_language="cpp20", skip_local_visualization=True
+        )
+        logger.info("Public evaluation completed")
+        extracted_case = result_feedback(public_result)
+        logger.info("Result feedback completed")
+        logger.info("ALE-Bench session closed")
+        combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
+        if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
+            combined_score = -sys.maxsize - 1
+        session.close()
+        return {
+            "judge_result": public_result.overall_judge_result.value,
+            "overall_score": public_result.overall_absolute_score,
+            "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
+            "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
+            "standard_error": extracted_case.error_str,
+            "message": extracted_case.message,
+            "combined_score": combined_score,
+        }
+    except Exception as e:
+        logger.error(f"Evaluation failed completely: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "overall_score": 0.0,
+            "error": str(e),
+        }

benchmarks/ale_bench/ale-bench-lite-problems/ahc016/initial_program.cpp ADDED Viewed

	@@ -0,0 +1,495 @@

+# EVOLVE-BLOCK-START
+#ifndef ONLINE_JUDGE
+// #define DEBUG_OUTPUT // Uncomment for local debug prints
+#endif
+#include <iostream>
+#include <vector>
+#include <string>
+#include <numeric>
+#include <algorithm>
+#include <random>
+#include <set>
+#include <array>
+#include <iomanip>
+#include <cmath>
+#include <chrono>
+#include <map>
+// Max N for which we attempt full GED based strategy.
+constexpr int N_MAX_GED_CAP = 6;
+// Adjacency matrix for H_k received in query, or for G_i during pairwise GED. Max N=100
+bool CURRENT_GRAPH_ADJ_QUERY[100][100];
+int N_ACTUAL;
+int L_ACTUAL; // N_ACTUAL * (N_ACTUAL - 1) / 2
+// Stores chosen G_j graphs as adjacency matrices (for GED strategy, N <= N_MAX_GED_CAP)
+std::vector<std::array<std::array<bool, N_MAX_GED_CAP>, N_MAX_GED_CAP>> G_ADJS_CHOSEN_GED;
+// For large N strategy (edge density)
+std::vector<std::string> G_STRINGS_CHOSEN_LARGE_N;
+std::vector<int> G_EDGE_COUNTS_LARGE_N;
+std::vector<int> P_VERTS_PERM_QUERY; // Permutation vector for GED in query
+std::mt19937 RND_ENGINE;
+// Temp storage for canonical mask generation (N <= N_MAX_GED_CAP)
+bool CANON_TMP_ADJ[N_MAX_GED_CAP][N_MAX_GED_CAP];
+std::vector<int> CANON_P_PERM;
+enum class Strategy {
+    GED,
+    EDGE_COUNT
+};
+Strategy current_strategy;
+const std::vector<uint16_t> PRECOMPUTED_CANONICAL_MASKS_N6 = {
+    0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 27, 29, 31, 37, 39, 43, 45, 47, 53, 55, 61,
+    63, 73, 75, 77, 79, 91, 93, 95, 111, 117, 119, 125, 127, 141, 143, 157, 159, 173, 175,
+    181, 183, 189, 191, 205, 207, 221, 223, 237, 239, 253, 255, 285, 287, 315, 317, 319,
+    349, 351, 379, 381, 383, 413, 415, 445, 447, 477, 479, 509, 511, 565, 567, 573, 575,
+    589, 591, 605, 607, 637, 639, 701, 703, 717, 719, 733, 735, 749, 751, 765, 767, 797,
+    799, 829, 831, 861, 863, 893, 895, 957, 959, 989, 991, 1021, 1023, 1149, 1151, 1213,
+    1215, 1245, 1247, 1277, 1279, 1533, 1535, 1661, 1663, 1789, 1791, 1917, 1919, 2045,
+    2047, 2109, 2111, 2141, 2143, 2173, 2175, 2205, 2207, 2237, 2239, 2269, 2271, 2301,
+    2303, 2685, 2687, 2813, 2815, 2941, 2943, 3069, 3071, 3277, 3279, 3285, 3287, 3293,
+    3295, 3309, 3311, 3325, 3327, 3357, 3359, 3389, 3391, 3421, 3423, 3453, 3455, 3517,
+    3519, 3549, 3551, 3581, 3583, 3613, 3615, 3645, 3647, 3709, 3711, 3773, 3775, 3837,
+    3839, 4095, 8191, 16383, 32767
+}; // Total 156 graphs for N=6.
+void mask_to_adj_matrix_small_N(uint16_t mask, int N_nodes, bool adj_matrix[][N_MAX_GED_CAP]) {
+    int bit_idx = 0;
+    for (int i = 0; i < N_nodes; ++i) {
+        adj_matrix[i][i] = false;
+        for (int j = i + 1; j < N_nodes; ++j) {
+            adj_matrix[i][j] = adj_matrix[j][i] = ((mask >> bit_idx) & 1);
+            bit_idx++;
+        }
+    }
+}
+uint16_t adj_matrix_to_mask_small_N(int N_nodes, const bool adj_matrix[][N_MAX_GED_CAP], const std::vector<int>& p_perm) {
+    uint16_t mask = 0;
+    int bit_idx = 0;
+    for (int i = 0; i < N_nodes; ++i) {
+        for (int j = i + 1; j < N_nodes; ++j) {
+            if (adj_matrix[p_perm[i]][p_perm[j]]) {
+                mask |= (1U << bit_idx);
+            }
+            bit_idx++;
+        }
+    }
+    return mask;
+}
+uint16_t get_canonical_mask(uint16_t mask_val) {
+    int current_L_for_canon = N_ACTUAL * (N_ACTUAL - 1) / 2;
+    if (current_L_for_canon == 0) return 0;
+    mask_to_adj_matrix_small_N(mask_val, N_ACTUAL, CANON_TMP_ADJ);
+    std::iota(CANON_P_PERM.begin(), CANON_P_PERM.end(), 0);
+    uint16_t min_mask_representation = adj_matrix_to_mask_small_N(N_ACTUAL, CANON_TMP_ADJ, CANON_P_PERM);
+    while (std::next_permutation(CANON_P_PERM.begin(), CANON_P_PERM.end())) {
+        uint16_t current_perm_mask = adj_matrix_to_mask_small_N(N_ACTUAL, CANON_TMP_ADJ, CANON_P_PERM);
+        min_mask_representation = std::min(min_mask_representation, current_perm_mask);
+    }
+    return min_mask_representation;
+}
+int calculate_edit_distance_one_perm_small_N(
+    const std::array<std::array<bool, N_MAX_GED_CAP>, N_MAX_GED_CAP>& g_j_adj_template
+) {
+    int diff_count = 0;
+    for (int i = 0; i < N_ACTUAL; ++i) {
+        for (int j = i + 1; j < N_ACTUAL; ++j) {
+            bool template_has_edge = g_j_adj_template[i][j];
+            bool current_Hk_has_edge = CURRENT_GRAPH_ADJ_QUERY[P_VERTS_PERM_QUERY[i]][P_VERTS_PERM_QUERY[j]];
+            if (current_Hk_has_edge != template_has_edge) {
+                diff_count++;
+            }
+        }
+    }
+    return diff_count;
+}
+int min_edit_distance_global_perm_small_N(
+    const std::array<std::array<bool, N_MAX_GED_CAP>, N_MAX_GED_CAP>& g_j_adj_template
+) {
+    if (L_ACTUAL == 0) return 0;
+    std::iota(P_VERTS_PERM_QUERY.begin(), P_VERTS_PERM_QUERY.end(), 0);
+    int min_dist = L_ACTUAL + 1;
+    long long N_factorial = 1;
+    for(int i=1; i<=N_ACTUAL; ++i) N_factorial *= i;
+    long long ops_count = 0;
+    do {
+        int current_dist = calculate_edit_distance_one_perm_small_N(g_j_adj_template);
+        min_dist = std::min(min_dist, current_dist);
+        if (min_dist == 0) break;
+        ops_count++;
+        if (ops_count >= N_factorial) break;
+    } while (std::next_permutation(P_VERTS_PERM_QUERY.begin(), P_VERTS_PERM_QUERY.end()));
+    return min_dist;
+}
+std::vector<uint16_t> available_canonical_masks;
+std::vector<std::vector<int>> all_pairwise_ged_cache;
+std::map<uint16_t, int> mask_to_idx_map;
+std::vector<int> chosen_mask_indices_greedy;
+std::string generate_random_graph_string_large_n(int num_edges, int current_L) {
+    std::string s_out(current_L, '0');
+    if (num_edges <= 0 || current_L == 0) return s_out;
+    if (num_edges >= current_L) {
+        std::fill(s_out.begin(), s_out.end(), '1');
+        return s_out;
+    }
+    std::vector<int> edge_indices(current_L);
+    std::iota(edge_indices.begin(), edge_indices.end(), 0);
+    std::shuffle(edge_indices.begin(), edge_indices.end(), RND_ENGINE);
+    for (int i = 0; i < num_edges; ++i) {
+        s_out[edge_indices[i]] = '1';
+    }
+    return s_out;
+}
+int count_set_bits_in_string(const std::string& s) {
+    return std::count(s.begin(), s.end(), '1');
+}
+void string_to_adj_matrix_query(const std::string& s, int N_nodes) {
+    int char_idx = 0;
+    for(int i=0; i<N_nodes; ++i) {
+        CURRENT_GRAPH_ADJ_QUERY[i][i] = false;
+        for(int j=i+1; j<N_nodes; ++j) {
+            if (char_idx < (int)s.length()) {
+                CURRENT_GRAPH_ADJ_QUERY[i][j] = CURRENT_GRAPH_ADJ_QUERY[j][i] = (s[char_idx++] == '1');
+            } else {
+                CURRENT_GRAPH_ADJ_QUERY[i][j] = CURRENT_GRAPH_ADJ_QUERY[j][i] = false;
+            }
+        }
+    }
+}
+int main() {
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    unsigned int seed_val = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
+    RND_ENGINE.seed(seed_val);
+    int M_graphs;
+    double epsilon_noise_rate;
+    std::cin >> M_graphs >> epsilon_noise_rate;
+    int N_for_GED_strat;
+    if (M_graphs <= 11) N_for_GED_strat = 4;
+    else if (M_graphs <= 34) N_for_GED_strat = 5;
+    else N_for_GED_strat = N_MAX_GED_CAP;
+    const double K_SEP = 2.5;
+    double L_ideal;
+    double L_ideal_numerator = K_SEP * K_SEP * (M_graphs > 1 ? (M_graphs - 1.0) * (M_graphs - 1.0) : 1.0) *
+                               epsilon_noise_rate * (1.0 - epsilon_noise_rate);
+    double L_ideal_denominator_factor = (0.5 - epsilon_noise_rate);
+    double L_ideal_denominator = L_ideal_denominator_factor * L_ideal_denominator_factor;
+    if (std::abs(0.5 - epsilon_noise_rate) < 1e-9) {
+        L_ideal = (100.0 * 99.0) / 2.0;
+    } else {
+        L_ideal = L_ideal_numerator / L_ideal_denominator;
+    }
+    if (L_ideal < 0) L_ideal = 0;
+    int N_candidate_EC = 4;
+    if (L_ideal > 1e-9) {
+         double discriminant = 1.0 + 8.0 * L_ideal;
+         if (discriminant >=0) {
+            N_candidate_EC = static_cast<int>(std::ceil((1.0 + std::sqrt(discriminant)) / 2.0));
+         } else {
+            N_candidate_EC = 100;
+         }
+    }
+    N_candidate_EC = std::max(4, N_candidate_EC);
+    N_candidate_EC = std::min(100, N_candidate_EC);
+    if (epsilon_noise_rate < 0.01) {
+        current_strategy = Strategy::GED; N_ACTUAL = N_for_GED_strat;
+    } else {
+        if (N_candidate_EC > N_for_GED_strat) {
+             current_strategy = Strategy::EDGE_COUNT; N_ACTUAL = N_candidate_EC;
+        } else {
+            current_strategy = Strategy::GED; N_ACTUAL = N_for_GED_strat;
+        }
+    }
+    N_ACTUAL = std::min(100, std::max(4, N_ACTUAL)); // Final check on N_ACTUAL bounds
+    L_ACTUAL = N_ACTUAL * (N_ACTUAL - 1) / 2;
+    std::cout << N_ACTUAL << std::endl;
+#ifdef DEBUG_OUTPUT
+    std::cerr << "# M=" << M_graphs << ", eps=" << epsilon_noise_rate << std::endl;
+    std::cerr << "# Chosen N=" << N_ACTUAL << ", Strategy=" << (current_strategy == Strategy::GED ? "GED" : "EDGE_COUNT") << std::endl;
+    std::cerr << "# L_ideal=" << L_ideal << ", N_candidate_EC=" << N_candidate_EC << ", N_for_GED_strat=" << N_for_GED_strat << std::endl;
+#endif
+    if (current_strategy == Strategy::GED) {
+        P_VERTS_PERM_QUERY.resize(N_ACTUAL); CANON_P_PERM.resize(N_ACTUAL);
+        if (N_ACTUAL == 6) {
+            available_canonical_masks = PRECOMPUTED_CANONICAL_MASKS_N6;
+        } else {
+            std::set<uint16_t> unique_masks_set;
+            if (L_ACTUAL > 0) {
+                for (unsigned int i = 0; i < (1U << L_ACTUAL); ++i) {
+                    unique_masks_set.insert(get_canonical_mask(static_cast<uint16_t>(i)));
+                }
+            } else {
+                unique_masks_set.insert(0);
+            }
+            available_canonical_masks.assign(unique_masks_set.begin(), unique_masks_set.end());
+        }
+        int num_total_isos = available_canonical_masks.size();
+#ifdef DEBUG_OUTPUT
+    std::cerr << "# Num non-isomorphic graphs for N=" << N_ACTUAL << " is " << num_total_isos << std::endl;
+#endif
+        mask_to_idx_map.clear();
+        for(int i=0; i<num_total_isos; ++i) mask_to_idx_map[available_canonical_masks[i]] = i;
+        if (num_total_isos > 0) {
+            all_pairwise_ged_cache.assign(num_total_isos, std::vector<int>(num_total_isos, 0));
+            bool graph_i_adj_cstyle[N_MAX_GED_CAP][N_MAX_GED_CAP];
+            std::array<std::array<bool, N_MAX_GED_CAP>, N_MAX_GED_CAP> graph_j_adj_stdarray;
+            for (int i = 0; i < num_total_isos; ++i) {
+                mask_to_adj_matrix_small_N(available_canonical_masks[i], N_ACTUAL, graph_i_adj_cstyle);
+                for(int r=0; r<N_ACTUAL; ++r) for(int c=0; c<N_ACTUAL; ++c) CURRENT_GRAPH_ADJ_QUERY[r][c] = graph_i_adj_cstyle[r][c];
+                for (int j = i + 1; j < num_total_isos; ++j) {
+                    bool temp_adj_for_gj[N_MAX_GED_CAP][N_MAX_GED_CAP];
+                    mask_to_adj_matrix_small_N(available_canonical_masks[j], N_ACTUAL, temp_adj_for_gj);
+                    for(int r=0; r<N_ACTUAL; ++r) for(int c=0; c<N_ACTUAL; ++c) graph_j_adj_stdarray[r][c] = temp_adj_for_gj[r][c];
+                    all_pairwise_ged_cache[i][j] = all_pairwise_ged_cache[j][i] = min_edit_distance_global_perm_small_N(graph_j_adj_stdarray);
+                }
+            }
+        }
+        chosen_mask_indices_greedy.clear();
+        std::vector<bool> is_chosen_idx(num_total_isos, false);
+        if (num_total_isos > 0) {
+            if (mask_to_idx_map.count(0)) {
+                int zero_idx = mask_to_idx_map.at(0);
+                if (chosen_mask_indices_greedy.size() < (size_t)M_graphs) {
+                    chosen_mask_indices_greedy.push_back(zero_idx);
+                    is_chosen_idx[zero_idx] = true;
+                }
+            }
+            if (L_ACTUAL > 0 && chosen_mask_indices_greedy.size() < (size_t)M_graphs) {
+                uint16_t complete_mask_val = (1U << L_ACTUAL) - 1;
+                uint16_t canonical_complete_mask = get_canonical_mask(complete_mask_val);
+                if (mask_to_idx_map.count(canonical_complete_mask)) {
+                    int complete_idx = mask_to_idx_map.at(canonical_complete_mask);
+                    if (!is_chosen_idx[complete_idx]) {
+                         chosen_mask_indices_greedy.push_back(complete_idx);
+                         is_chosen_idx[complete_idx] = true;
+                    }
+                }
+            }
+        }
+        for (int k_count = chosen_mask_indices_greedy.size(); k_count < M_graphs; ++k_count) {
+            if (chosen_mask_indices_greedy.size() >= (size_t)num_total_isos) {
+                break;
+            }
+            int best_new_idx_to_add = -1;
+            int max_of_min_distances_found = -1;
+            for (int cand_idx = 0; cand_idx < num_total_isos; ++cand_idx) {
+                if (is_chosen_idx[cand_idx]) continue;
+                int current_cand_min_dist_to_existing_G;
+                if (chosen_mask_indices_greedy.empty()) {
+                     current_cand_min_dist_to_existing_G = L_ACTUAL + 1;
+                } else {
+                    current_cand_min_dist_to_existing_G = L_ACTUAL + 1;
+                    for (int chosen_idx : chosen_mask_indices_greedy) {
+                        current_cand_min_dist_to_existing_G = std::min(current_cand_min_dist_to_existing_G, all_pairwise_ged_cache[cand_idx][chosen_idx]);
+                    }
+                }
+                if (current_cand_min_dist_to_existing_G > max_of_min_distances_found) {
+                    max_of_min_distances_found = current_cand_min_dist_to_existing_G;
+                    best_new_idx_to_add = cand_idx;
+                }
+            }
+            if (best_new_idx_to_add != -1) {
+                chosen_mask_indices_greedy.push_back(best_new_idx_to_add);
+                is_chosen_idx[best_new_idx_to_add] = true;
+            } else {
+                break;
+            }
+        }
+        int num_distinct_chosen_graphs = chosen_mask_indices_greedy.size();
+        if (num_distinct_chosen_graphs < M_graphs) {
+            int fallback_idx = 0;
+            if (num_total_isos > 0) {
+                if (mask_to_idx_map.count(0)) {
+                    fallback_idx = mask_to_idx_map.at(0);
+                }
+            }
+            for (int k_idx = num_distinct_chosen_graphs; k_idx < M_graphs; ++k_idx) {
+                 if (num_total_isos > 0) {
+                    chosen_mask_indices_greedy.push_back(fallback_idx);
+                 } else {
+                    chosen_mask_indices_greedy.push_back(0);
+                 }
+            }
+        }
+#ifdef DEBUG_OUTPUT
+    std::cerr << "# Chosen mask indices (size " << chosen_mask_indices_greedy.size() << "): ";
+    if (!available_canonical_masks.empty()){ // Check before accessing
+        for(int idx : chosen_mask_indices_greedy) {
+            if (idx < available_canonical_masks.size()) std::cerr << idx << " (" << available_canonical_masks[idx] << ") ";
+            else std::cerr << idx << " (OOB) ";
+        }
+    }
+    std::cerr << std::endl;
+#endif
+        G_ADJS_CHOSEN_GED.resize(M_graphs);
+        for (int k_idx = 0; k_idx < M_graphs; ++k_idx) {
+            uint16_t mask_to_print = 0;
+            if (k_idx < chosen_mask_indices_greedy.size() &&
+                !available_canonical_masks.empty() &&
+                chosen_mask_indices_greedy[k_idx] < available_canonical_masks.size()) {
+                 mask_to_print = available_canonical_masks[chosen_mask_indices_greedy[k_idx]];
+            } else if (L_ACTUAL == 0 && k_idx < chosen_mask_indices_greedy.size()) {
+                 mask_to_print = 0;
+            }
+            bool temp_adj_cstyle[N_MAX_GED_CAP][N_MAX_GED_CAP];
+            mask_to_adj_matrix_small_N(mask_to_print, N_ACTUAL, temp_adj_cstyle);
+            for(int r=0; r<N_ACTUAL; ++r) for(int c=0; c<N_ACTUAL; ++c) G_ADJS_CHOSEN_GED[k_idx][r][c] = temp_adj_cstyle[r][c];
+            std::string s_out = "";
+            if (L_ACTUAL > 0) {
+                for (int bit_idx = 0; bit_idx < L_ACTUAL; ++bit_idx) {
+                    s_out += ((mask_to_print >> bit_idx) & 1) ? '1' : '0';
+                }
+            }
+            std::cout << s_out << std::endl;
+        }
+    } else {
+        G_EDGE_COUNTS_LARGE_N.resize(M_graphs); G_STRINGS_CHOSEN_LARGE_N.resize(M_graphs);
+        if (M_graphs == 1) {
+             G_EDGE_COUNTS_LARGE_N[0] = (L_ACTUAL > 0) ? L_ACTUAL / 2 : 0;
+        } else {
+            for (int k=0; k<M_graphs; ++k) G_EDGE_COUNTS_LARGE_N[k] = static_cast<int>(std::round((double)k * L_ACTUAL / (M_graphs - 1.0)));
+            for (int k=0; k<M_graphs-1; ++k) {
+                if (G_EDGE_COUNTS_LARGE_N[k+1] <= G_EDGE_COUNTS_LARGE_N[k]) {
+                    G_EDGE_COUNTS_LARGE_N[k+1] = G_EDGE_COUNTS_LARGE_N[k] + 1;
+                }
+            }
+            if (M_graphs > 0 && G_EDGE_COUNTS_LARGE_N[M_graphs-1] > L_ACTUAL) { // M_graphs > 0 check
+                int exceso = G_EDGE_COUNTS_LARGE_N[M_graphs-1] - L_ACTUAL;
+                for (int k=0; k<M_graphs; ++k) {
+                    G_EDGE_COUNTS_LARGE_N[k] -= exceso;
+                }
+            }
+            for (int k=0; k<M_graphs; ++k) G_EDGE_COUNTS_LARGE_N[k] = std::min(L_ACTUAL, std::max(0, G_EDGE_COUNTS_LARGE_N[k]));
+            for (int k=0; k<M_graphs-1; ++k) {
+                 G_EDGE_COUNTS_LARGE_N[k+1] = std::max(G_EDGE_COUNTS_LARGE_N[k+1], G_EDGE_COUNTS_LARGE_N[k] + 1);
+            }
+            for (int k=0; k<M_graphs; ++k) G_EDGE_COUNTS_LARGE_N[k] = std::min(L_ACTUAL, std::max(0, G_EDGE_COUNTS_LARGE_N[k]));
+        }
+        for (int k=0; k<M_graphs; ++k) {
+            G_STRINGS_CHOSEN_LARGE_N[k] = generate_random_graph_string_large_n(G_EDGE_COUNTS_LARGE_N[k], L_ACTUAL);
+            std::cout << G_STRINGS_CHOSEN_LARGE_N[k] << std::endl;
+        }
+    }
+    std::cout.flush(); // Explicit flush after all G_k are printed
+    for (int q_idx = 0; q_idx < 100; ++q_idx) {
+        std::string h_str; std::cin >> h_str;
+        if (current_strategy == Strategy::GED) {
+            if (M_graphs == 0) { std::cout << 0 << std::endl; std::cout.flush(); continue; }
+            if (G_ADJS_CHOSEN_GED.empty()){
+#ifdef DEBUG_OUTPUT
+                std::cerr << "# Query " << q_idx << ": G_ADJS_CHOSEN_GED is empty but M_graphs=" << M_graphs << ". Outputting 0." << std::endl;
+#endif
+                std::cout << 0 << std::endl; std::cout.flush(); continue;
+            }
+            string_to_adj_matrix_query(h_str, N_ACTUAL);
+            int best_g_idx = 0; int min_dist_found = L_ACTUAL + 2;
+            for (int j=0; j < M_graphs; ++j) {
+                if (j >= G_ADJS_CHOSEN_GED.size()) {
+#ifdef DEBUG_OUTPUT
+                    std::cerr << "# Query " << q_idx << ": Index j=" << j << " out of bounds for G_ADJS_CHOSEN_GED (size " << G_ADJS_CHOSEN_GED.size() << ")" << std::endl;
+#endif
+                    continue;
+                }
+                int dist = min_edit_distance_global_perm_small_N(G_ADJS_CHOSEN_GED[j]);
+                if (dist < min_dist_found) {
+                    min_dist_found = dist;
+                    best_g_idx = j;
+                }
+            }
+            std::cout << best_g_idx << std::endl;
+        } else {
+            if (M_graphs == 0) { std::cout << 0 << std::endl; std::cout.flush(); continue; }
+            if (G_EDGE_COUNTS_LARGE_N.empty()){
+#ifdef DEBUG_OUTPUT
+                std::cerr << "# Query " << q_idx << ": G_EDGE_COUNTS_LARGE_N is empty but M_graphs=" << M_graphs << ". Outputting 0." << std::endl;
+#endif
+                std::cout << 0 << std::endl; std::cout.flush(); continue;
+            }
+            int edges_Hk = count_set_bits_in_string(h_str);
+            int best_g_idx = 0; double min_abs_diff_expected_edges = -1.0;
+            for (int j=0; j<M_graphs; ++j) {
+                if (j >= G_EDGE_COUNTS_LARGE_N.size()) {
+#ifdef DEBUG_OUTPUT
+                     std::cerr << "# Query " << q_idx << ": Index j=" << j << " out of bounds for G_EDGE_COUNTS_LARGE_N (size " << G_EDGE_COUNTS_LARGE_N.size() << ")" << std::endl;
+#endif
+                    continue;
+                }
+                double expected_edges_Hk_from_Gj = (double)G_EDGE_COUNTS_LARGE_N[j] * (1.0 - 2.0*epsilon_noise_rate) + (double)L_ACTUAL * epsilon_noise_rate;
+                double diff = std::abs((double)edges_Hk - expected_edges_Hk_from_Gj);
+                if (min_abs_diff_expected_edges < -0.5 || diff < min_abs_diff_expected_edges) {
+                    min_abs_diff_expected_edges = diff;
+                    best_g_idx = j;
+                }
+            }
+            std::cout << best_g_idx << std::endl;
+        }
+        std::cout.flush(); // Explicit flush after each query prediction
+    }
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc024/best_program.cpp ADDED Viewed

	@@ -0,0 +1,626 @@

+# EVOLVE-BLOCK-START
+#pragma GCC optimize("O3,unroll-loops")
+#include <iostream>
+#include <vector>
+#include <array>
+#include <queue>
+#include <algorithm> // For std::min, std::max, std::sort, std::unique, std::shuffle
+#include <random>    // For XorShift and std::shuffle
+#include <chrono>
+#include <utility>   // For std::pair
+#include <cmath>     // For std::exp, std::pow
+#include <climits>   // For UINT_MAX
+// --- Globals ---
+const int N_FIXED = 50;
+const int M_FIXED = 100; // Max ward ID, problem states M=100
+std::vector<std::vector<int>> current_grid_state(N_FIXED, std::vector<int>(N_FIXED));
+std::vector<std::vector<int>> best_grid_state(N_FIXED, std::vector<int>(N_FIXED));
+int best_score_val = -1; // Stores count of 0-cells for the best state
+int boundary_zero_cells_count = 0; // Number of zero cells located on the boundary
+int last_boundary_zero_delta = 0;  // Delta applied to boundary_zero_cells_count by the last successful attempt
+struct XorShift {
+    unsigned int x, y, z, w;
+    XorShift() {
+        // Using std::random_device for better seed initialization
+        std::random_device rd;
+        x = rd();
+        y = rd();
+        z = rd();
+        w = rd();
+        // Ensure no zero initial state for w, which is common if rd() produces same values or all are 0
+        if (x == 0 && y == 0 && z == 0 && w == 0) w = 1; // Or any non-zero value
+    }
+    unsigned int next_uint() {
+        unsigned int t = x;
+        t ^= t << 11;
+        t ^= t >> 8;
+        x = y; y = z; z = w;
+        w ^= w >> 19;
+        w ^= t;
+        return w;
+    }
+    double next_double() { // In [0,1)
+        return (double)next_uint() / ((double)UINT_MAX + 1.0);
+    }
+    int next_int(int exclusive_max_val) { // In [0, exclusive_max_val - 1]
+        if (exclusive_max_val <= 0) return 0;
+        return next_uint() % exclusive_max_val;
+    }
+    // For std::shuffle
+    using result_type = unsigned int;
+    static constexpr unsigned int min() { return 0; }
+    static constexpr unsigned int max() { return UINT_MAX; }
+    unsigned int operator()() { return next_uint(); }
+};
+XorShift rnd_gen; // Global instance
+auto G_START_TIME = std::chrono::high_resolution_clock::now();
+double time_elapsed_ms() {
+    auto now = std::chrono::high_resolution_clock::now();
+    return std::chrono::duration<double, std::milli>(now - G_START_TIME).count();
+}
+struct AdjacencyInfo {
+    bool matrix[M_FIXED + 1][M_FIXED + 1];
+    AdjacencyInfo() {
+        for (int i = 0; i <= M_FIXED; ++i) for (int j = 0; j <= M_FIXED; ++j) matrix[i][j] = false;
+    }
+    void set_adj(int c1, int c2) {
+        if (c1 == c2) return;
+        matrix[std::min(c1, c2)][std::max(c1, c2)] = true;
+    }
+    bool is_adj(int c1, int c2) const {
+        if (c1 == c2) return false;
+        return matrix[std::min(c1, c2)][std::max(c1, c2)];
+    }
+};
+AdjacencyInfo required_adjacencies;
+bool ward_has_any_req_adj[M_FIXED + 1];
+struct BorderEdgeTracker {
+    int counts_arr[M_FIXED + 1][M_FIXED + 1];
+    BorderEdgeTracker() { clear(); }
+    void add_edge(int c1, int c2) {
+        if (c1 == c2) return;
+        counts_arr[std::min(c1, c2)][std::max(c1, c2)]++;
+    }
+    void remove_edge(int c1, int c2) {
+        if (c1 == c2) return;
+        counts_arr[std::min(c1, c2)][std::max(c1, c2)]--;
+    }
+    int get_count(int c1, int c2) const {
+        if (c1 == c2) return 0;
+        return counts_arr[std::min(c1, c2)][std::max(c1, c2)];
+    }
+    void clear() {
+        for (int i = 0; i <= M_FIXED; ++i) for (int j = 0; j <= M_FIXED; ++j) counts_arr[i][j] = 0;
+    }
+};
+BorderEdgeTracker current_border_edges_tracker;
+std::vector<std::vector<std::pair<int, int>>> cells_by_color(M_FIXED + 1);
+std::vector<std::vector<int>> pos_in_color_list(N_FIXED, std::vector<int>(N_FIXED));
+unsigned int visited_marker_grid[N_FIXED][N_FIXED];
+unsigned int current_visit_marker = 0;
+std::queue<std::pair<int, int>> q_bfs_global;
+const int DR[] = {-1, 1, 0, 0};
+const int DC[] = {0, 0, -1, 1};
+inline bool is_cell_on_grid(int r, int c) { return r >= 0 && r < N_FIXED && c >= 0 && c < N_FIXED; }
+void increment_bfs_marker() {
+    current_visit_marker++;
+    if (current_visit_marker == 0) {
+        for (int i = 0; i < N_FIXED; ++i) {
+            for (int j = 0; j < N_FIXED; ++j) {
+                visited_marker_grid[i][j] = 0;
+            }
+        }
+        current_visit_marker = 1;
+    }
+}
+void clear_global_bfs_queue() {
+    std::queue<std::pair<int, int>> empty_queue;
+    std::swap(q_bfs_global, empty_queue);
+}
+void add_cell_to_color_ds(int r, int c, int color) {
+    cells_by_color[color].push_back({r,c});
+    pos_in_color_list[r][c] = cells_by_color[color].size() - 1;
+}
+void remove_cell_from_color_ds(int r, int c, int color) {
+    int idx_to_remove = pos_in_color_list[r][c];
+    std::pair<int,int> last_cell = cells_by_color[color].back();
+    cells_by_color[color][idx_to_remove] = last_cell;
+    pos_in_color_list[last_cell.first][last_cell.second] = idx_to_remove;
+    cells_by_color[color].pop_back();
+}
+void initialize_all_data_structures(const std::vector<std::vector<int>>& initial_grid) {
+    required_adjacencies = AdjacencyInfo();
+    current_border_edges_tracker.clear();
+    for(int i=0; i <= M_FIXED; ++i) cells_by_color[i].clear();
+    for (int i = 0; i < N_FIXED; ++i) {
+        for (int j = 0; j < N_FIXED; ++j) {
+            current_grid_state[i][j] = initial_grid[i][j];
+            add_cell_to_color_ds(i, j, initial_grid[i][j]);
+        }
+    }
+    for (int i = 0; i < N_FIXED; ++i) {
+        for (int j = 0; j < N_FIXED; ++j) {
+            int initial_color_val = initial_grid[i][j];
+            if (i == 0 || i == N_FIXED - 1 || j == 0 || j == N_FIXED - 1) {
+                required_adjacencies.set_adj(0, initial_color_val);
+            }
+            if (j + 1 < N_FIXED && initial_color_val != initial_grid[i][j+1]) {
+                required_adjacencies.set_adj(initial_color_val, initial_grid[i][j+1]);
+            }
+            if (i + 1 < N_FIXED && initial_color_val != initial_grid[i+1][j]) {
+                required_adjacencies.set_adj(initial_color_val, initial_grid[i+1][j]);
+            }
+            int current_color_val = current_grid_state[i][j];
+            if (i == 0) current_border_edges_tracker.add_edge(0, current_color_val);
+            if (i == N_FIXED - 1) current_border_edges_tracker.add_edge(0, current_color_val);
+            if (j == 0) current_border_edges_tracker.add_edge(0, current_color_val);
+            if (j == N_FIXED - 1) current_border_edges_tracker.add_edge(0, current_color_val);
+            if (j + 1 < N_FIXED && current_color_val != current_grid_state[i][j+1]) {
+                current_border_edges_tracker.add_edge(current_color_val, current_grid_state[i][j+1]);
+            }
+            if (i + 1 < N_FIXED && current_color_val != current_grid_state[i+1][j]) {
+                current_border_edges_tracker.add_edge(current_color_val, current_grid_state[i+1][j]);
+            }
+        }
+    }
+    for (int c1 = 0; c1 <= M_FIXED; ++c1) {
+        ward_has_any_req_adj[c1] = false;
+        for (int c2 = 0; c2 <= M_FIXED; ++c2) {
+            if (c1 == c2) continue;
+            if (required_adjacencies.is_adj(c1, c2)) {
+                ward_has_any_req_adj[c1] = true;
+                break;
+            }
+        }
+    }
+    boundary_zero_cells_count = 0;
+    for (int i = 0; i < N_FIXED; ++i) {
+        for (int j = 0; j < N_FIXED; ++j) {
+            if (current_grid_state[i][j] == 0 && (i == 0 || i == N_FIXED - 1 || j == 0 || j == N_FIXED - 1)) {
+                boundary_zero_cells_count++;
+            }
+        }
+    }
+    best_grid_state = current_grid_state;
+    best_score_val = cells_by_color[0].size();
+}
+bool check_region_connectivity_bfs(int target_color) {
+    const auto& cells_of_target_color = cells_by_color[target_color];
+    if (cells_of_target_color.empty()) return true;
+    increment_bfs_marker();
+    clear_global_bfs_queue();
+    q_bfs_global.push(cells_of_target_color[0]);
+    visited_marker_grid[cells_of_target_color[0].first][cells_of_target_color[0].second] = current_visit_marker;
+    int count_visited_cells = 0;
+    while (!q_bfs_global.empty()) {
+        std::pair<int, int> curr = q_bfs_global.front();
+        q_bfs_global.pop();
+        count_visited_cells++;
+        for (int k = 0; k < 4; ++k) {
+            int nr = curr.first + DR[k];
+            int nc = curr.second + DC[k];
+            if (is_cell_on_grid(nr, nc) &&
+                current_grid_state[nr][nc] == target_color &&
+                visited_marker_grid[nr][nc] != current_visit_marker) {
+                visited_marker_grid[nr][nc] = current_visit_marker;
+                q_bfs_global.push({nr, nc});
+            }
+        }
+    }
+    return count_visited_cells == cells_of_target_color.size();
+}
+bool check_region_0_connectivity_full() {
+    const auto& cells_c0 = cells_by_color[0];
+    if (cells_c0.empty()) {
+        return true;
+    }
+    increment_bfs_marker();
+    clear_global_bfs_queue();
+    bool any_boundary_zero_cell_found = false;
+    for (const auto& cell_coord : cells_c0) {
+        int r = cell_coord.first;
+        int c = cell_coord.second;
+        if (r == 0 || r == N_FIXED - 1 || c == 0 || c == N_FIXED - 1) {
+            if (visited_marker_grid[r][c] != current_visit_marker) {
+                 q_bfs_global.push(cell_coord);
+                 visited_marker_grid[r][c] = current_visit_marker;
+            }
+            any_boundary_zero_cell_found = true;
+        }
+    }
+    if (!any_boundary_zero_cell_found) {
+        return false;
+    }
+    while (!q_bfs_global.empty()) {
+        std::pair<int, int> curr = q_bfs_global.front();
+        q_bfs_global.pop();
+        for (int k_dir = 0; k_dir < 4; ++k_dir) {
+            int nr = curr.first + DR[k_dir];
+            int nc = curr.second + DC[k_dir];
+            if (is_cell_on_grid(nr, nc) &&
+                current_grid_state[nr][nc] == 0 &&
+                visited_marker_grid[nr][nc] != current_visit_marker) {
+                visited_marker_grid[nr][nc] = current_visit_marker;
+                q_bfs_global.push({nr, nc});
+            }
+        }
+    }
+    for (const auto& cell_coord : cells_c0) {
+        if (visited_marker_grid[cell_coord.first][cell_coord.second] != current_visit_marker) {
+            return false;
+        }
+    }
+    return true;
+}
+/*
+  SmallAdjDelta: A tiny fixed-size accumulator for adjacency count deltas
+  between color pairs caused by a single-cell recoloring. We only ever touch
+  up to 8 distinct pairs (4 neighbors x 2 old/new), so linear lookup is fine.
+*/
+struct SmallAdjDelta {
+    int c1[16], c2[16], delta[16], sz;
+    inline void clear() { sz = 0; }
+    inline void add(int a, int b, int d) {
+        if (a == b) return;
+        if (a > b) std::swap(a, b);
+        for (int i = 0; i < sz; ++i) {
+            if (c1[i] == a && c2[i] == b) { delta[i] += d; return; }
+        }
+        c1[sz] = a; c2[sz] = b; delta[sz] = d; sz++;
+    }
+} temp_adj_deltas;
+/*
+  removal_keeps_connectivity_local:
+  Checks if removing cell (r,c) of 'color' keeps that color's region connected.
+  Assumes current_grid_state[r][c] is already set to a different color (i.e., removal applied).
+  Fast path: if the number of same-color neighbors of (r,c) is <= 1, it can't split the region.
+  Otherwise, BFS from one neighbor and ensure all other neighbors are reachable without passing through (r,c).
+*/
+inline bool removal_keeps_connectivity_local(int r, int c, int color) {
+    int nr[4], nc[4], cnt = 0;
+    for (int k = 0; k < 4; ++k) {
+        int rr = r + DR[k], cc = c + DC[k];
+        if (is_cell_on_grid(rr, cc) && current_grid_state[rr][cc] == color) {
+            nr[cnt] = rr; nc[cnt] = cc; cnt++;
+        }
+    }
+    if (cnt <= 1) return true;
+    increment_bfs_marker();
+    clear_global_bfs_queue();
+    bool found_nei[4] = {false, false, false, false};
+    q_bfs_global.push({nr[0], nc[0]});
+    visited_marker_grid[nr[0]][nc[0]] = current_visit_marker;
+    found_nei[0] = true;
+    int found = 1;
+    while (!q_bfs_global.empty() && found < cnt) {
+        auto cur = q_bfs_global.front(); q_bfs_global.pop();
+        for (int k = 0; k < 4; ++k) {
+            int rr = cur.first + DR[k], cc = cur.second + DC[k];
+            if (!is_cell_on_grid(rr, cc)) continue;
+            if (rr == r && cc == c) continue;
+            if (current_grid_state[rr][cc] != color) continue;
+            if (visited_marker_grid[rr][cc] == current_visit_marker) continue;
+            visited_marker_grid[rr][cc] = current_visit_marker;
+            q_bfs_global.push({rr, cc});
+            for (int i = 0; i < cnt; ++i) {
+                if (!found_nei[i] && nr[i] == rr && nc[i] == cc) {
+                    found_nei[i] = true;
+                    found++;
+                    break;
+                }
+            }
+        }
+    }
+    return found == cnt;
+}
+/*
+  has_adjacent_color:
+  Returns true if any of the 4-neighbors of (r,c) has the specified 'color'.
+*/
+inline bool has_adjacent_color(int r, int c, int color) {
+    for (int k = 0; k < 4; ++k) {
+        int rr = r + DR[k], cc = c + DC[k];
+        if (is_cell_on_grid(rr, cc) && current_grid_state[rr][cc] == color) return true;
+    }
+    return false;
+}
+bool attempt_change_cell_color_and_validate(int r, int c, int old_color, int new_color) {
+    /*
+      Docstring: Lightweight recolor validator and applier.
+      - Tentatively recolor and enforce only cheap local connectivity constraints (per-color, and 0 vs boundary).
+      - Update adjacency counts for only touched pairs and verify consistency with the required adjacency.
+      - Track boundary-0 delta so the SA step can undo a rejected-but-valid move cheaply.
+      Note: We avoid extra pre-mutation pruning to reduce overhead; the post-update validation
+      on the affected pairs is sufficient to ensure legality.
+    */
+    // Precompute counts before mutation
+    int old_count_old = (int)cells_by_color[old_color].size();
+    int old_count_new = (int)cells_by_color[new_color].size();
+    int old_zero_count = (int)cells_by_color[0].size();
+    // Early local feasibility pruning (no mutations yet)
+    bool on_boundary = (r == 0 || r == N_FIXED - 1 || c == 0 || c == N_FIXED - 1);
+    if (new_color != 0) {
+        if (old_count_new > 0 && !has_adjacent_color(r, c, new_color)) return false;
+    } else {
+        if (old_zero_count == 0) {
+            if (!on_boundary) return false;
+        } else {
+            if (!has_adjacent_color(r, c, 0) && !on_boundary) return false;
+        }
+    }
+    // Apply recolor to the working state
+    current_grid_state[r][c] = new_color;
+    remove_cell_from_color_ds(r, c, old_color);
+    add_cell_to_color_ds(r, c, new_color);
+    // Boundary 0 delta for this move (do not commit yet)
+    int boundary_delta = 0;
+    // 'on_boundary' computed earlier
+    if (on_boundary) {
+        if (old_color == 0) boundary_delta--;
+        if (new_color == 0) boundary_delta++;
+    }
+    // Quick local connectivity constraints
+    bool ok = true;
+    // New color connectivity
+    if (new_color != 0) {
+        if (old_count_new > 0) { // color already existed before adding this cell
+            if (!has_adjacent_color(r, c, new_color)) ok = false;
+        }
+    } else {
+        // new color is 0
+        if (old_zero_count == 0) {
+            if (!on_boundary) ok = false; // first zero must touch boundary
+        } else {
+            // must attach to existing zero or boundary to keep 0 (with outside) connected
+            if (!has_adjacent_color(r, c, 0) && !on_boundary) ok = false;
+        }
+    }
+    // Old color connectivity (after removal)
+    if (ok && old_color != 0 && old_count_old > 1) {
+        if (!removal_keeps_connectivity_local(r, c, old_color)) ok = false;
+    }
+    if (ok && old_color == 0 && old_zero_count > 1) {
+        if (!removal_keeps_connectivity_local(r, c, 0)) ok = false;
+        if (ok) {
+            int new_zero_count = old_zero_count - 1;
+            if (new_zero_count > 0) {
+                if (boundary_zero_cells_count + boundary_delta <= 0) ok = false;
+            }
+        }
+    }
+    if (!ok) {
+        // Revert recolor
+        current_grid_state[r][c] = old_color;
+        remove_cell_from_color_ds(r, c, new_color);
+        add_cell_to_color_ds(r, c, old_color);
+        return false;
+    }
+    // Prepare adjacency deltas and apply to the tracker
+    temp_adj_deltas.clear();
+    for (int k_adj = 0; k_adj < 4; ++k_adj) {
+        int nr = r + DR[k_adj];
+        int nc = c + DC[k_adj];
+        int neighbor_actual_color = is_cell_on_grid(nr, nc) ? current_grid_state[nr][nc] : 0;
+        if (old_color != neighbor_actual_color) temp_adj_deltas.add(old_color, neighbor_actual_color, -1);
+        if (new_color != neighbor_actual_color) temp_adj_deltas.add(new_color, neighbor_actual_color, +1);
+    }
+    for (int i = 0; i < temp_adj_deltas.sz; ++i) {
+        int c1 = temp_adj_deltas.c1[i], c2 = temp_adj_deltas.c2[i], d = temp_adj_deltas.delta[i];
+        if (d > 0) for (int t = 0; t < d; ++t) current_border_edges_tracker.add_edge(c1, c2);
+        else for (int t = 0; t < -d; ++t) current_border_edges_tracker.remove_edge(c1, c2);
+    }
+    // Verify adjacency constraints for only affected pairs
+    for (int i = 0; i < temp_adj_deltas.sz; ++i) {
+        int c1 = temp_adj_deltas.c1[i], c2 = temp_adj_deltas.c2[i];
+        bool has_edge_now = current_border_edges_tracker.get_count(c1, c2) > 0;
+        bool needs_edge = required_adjacencies.is_adj(c1, c2);
+        if (has_edge_now != needs_edge) { ok = false; break; }
+    }
+    // Cannot delete a ward that must exist (non-zero)
+    if (ok && old_color != 0 && cells_by_color[old_color].empty() && ward_has_any_req_adj[old_color]) ok = false;
+    if (!ok) {
+        // Revert adjacency tracker
+        for (int i = 0; i < temp_adj_deltas.sz; ++i) {
+            int c1 = temp_adj_deltas.c1[i], c2 = temp_adj_deltas.c2[i], d = temp_adj_deltas.delta[i];
+            if (d > 0) for (int t = 0; t < d; ++t) current_border_edges_tracker.remove_edge(c1, c2);
+            else for (int t = 0; t < -d; ++t) current_border_edges_tracker.add_edge(c1, c2);
+        }
+        // Revert recolor
+        current_grid_state[r][c] = old_color;
+        remove_cell_from_color_ds(r, c, new_color);
+        add_cell_to_color_ds(r, c, old_color);
+        return false;
+    }
+    // Commit boundary zero cells count (caller will undo on SA rejection)
+    boundary_zero_cells_count += boundary_delta;
+    last_boundary_zero_delta = boundary_delta;
+    return true;
+}
+void solve_main_logic() {
+    std::vector<std::vector<int>> initial_grid_from_input(N_FIXED, std::vector<int>(N_FIXED));
+    for (int i = 0; i < N_FIXED; ++i) for (int j = 0; j < N_FIXED; ++j) std::cin >> initial_grid_from_input[i][j];
+    initialize_all_data_structures(initial_grid_from_input);
+    const double GREEDY_PASS_BUDGET_MS = 300.0;
+    double greedy_pass_start_abs_time = time_elapsed_ms();
+    std::vector<std::pair<int,int>> all_cells_shuffled;
+    all_cells_shuffled.reserve(N_FIXED * N_FIXED);
+    for(int r_idx=0; r_idx<N_FIXED; ++r_idx) for(int c_idx=0; c_idx<N_FIXED; ++c_idx) all_cells_shuffled.push_back({r_idx,c_idx});
+    std::shuffle(all_cells_shuffled.begin(), all_cells_shuffled.end(), rnd_gen);
+    for (const auto& cell_coords : all_cells_shuffled) {
+        if (time_elapsed_ms() - greedy_pass_start_abs_time > GREEDY_PASS_BUDGET_MS) break;
+        int r = cell_coords.first; int c = cell_coords.second;
+        int original_color = current_grid_state[r][c];
+        if (original_color == 0) continue;
+        if (attempt_change_cell_color_and_validate(r, c, original_color, 0)) {
+            int current_zeros_count = cells_by_color[0].size();
+            if (current_zeros_count > best_score_val) {
+                best_score_val = current_zeros_count;
+                best_grid_state = current_grid_state;
+            }
+        }
+    }
+    double sa_start_temp = 2.0;
+    double sa_end_temp = 0.01;
+    const double TOTAL_COMPUTATION_TIME_MS = 1950.0;
+    double sa_start_abs_time = time_elapsed_ms();
+    double sa_total_duration_ms = TOTAL_COMPUTATION_TIME_MS - sa_start_abs_time;
+    if (sa_total_duration_ms <= 0) sa_total_duration_ms = 1.0;
+    int iter_count = 0;
+    while(true) {
+        iter_count++;
+        if(iter_count % 256 == 0) {
+             if (time_elapsed_ms() >= TOTAL_COMPUTATION_TIME_MS) break;
+        }
+        double time_spent_in_sa = time_elapsed_ms() - sa_start_abs_time;
+        double progress_ratio = (sa_total_duration_ms > 1e-9) ? (time_spent_in_sa / sa_total_duration_ms) : 1.0;
+        progress_ratio = std::min(progress_ratio, 1.0);
+        double current_temperature = sa_start_temp * std::pow(sa_end_temp / sa_start_temp, progress_ratio);
+        current_temperature = std::max(current_temperature, sa_end_temp);
+        int r_coord = rnd_gen.next_int(N_FIXED);
+        int c_coord = rnd_gen.next_int(N_FIXED);
+        int original_color_at_cell = current_grid_state[r_coord][c_coord];
+        // Build a tiny unique candidate set: {0} U neighbors (with out-of-grid treated as 0),
+        // sampled uniformly (duplicates bias toward 0 near the boundary).
+        int candidate_new_colors[5];
+        int num_candidate_options = 0;
+        candidate_new_colors[num_candidate_options++] = 0;
+        for(int k_neighbor_idx=0; k_neighbor_idx<4; ++k_neighbor_idx) {
+            int nr = r_coord + DR[k_neighbor_idx];
+            int nc = c_coord + DC[k_neighbor_idx];
+            if (is_cell_on_grid(nr,nc)) {
+                candidate_new_colors[num_candidate_options++] = current_grid_state[nr][nc];
+            } else {
+                candidate_new_colors[num_candidate_options++] = 0;
+            }
+        }
+        int new_proposed_color = candidate_new_colors[rnd_gen.next_int(num_candidate_options)];
+        if (original_color_at_cell == new_proposed_color) continue;
+        int delta_in_score_metric = 0;
+        if (new_proposed_color == 0 && original_color_at_cell != 0) delta_in_score_metric = 1;
+        else if (new_proposed_color != 0 && original_color_at_cell == 0) delta_in_score_metric = -1;
+        if (attempt_change_cell_color_and_validate(r_coord, c_coord, original_color_at_cell, new_proposed_color)) {
+            bool accept_this_move = false;
+            if (delta_in_score_metric >= 0) {
+                accept_this_move = true;
+                if (cells_by_color[0].size() > best_score_val) {
+                    best_score_val = cells_by_color[0].size();
+                    best_grid_state = current_grid_state;
+                }
+            } else {
+                if (current_temperature > 1e-9 && rnd_gen.next_double() < std::exp((double)delta_in_score_metric / current_temperature)) {
+                    accept_this_move = true;
+                } else {
+                    accept_this_move = false;
+                }
+            }
+            if (!accept_this_move) {
+                current_grid_state[r_coord][c_coord] = original_color_at_cell;
+                remove_cell_from_color_ds(r_coord, c_coord, new_proposed_color);
+                add_cell_to_color_ds(r_coord, c_coord, original_color_at_cell);
+                // Revert boundary zero count
+                boundary_zero_cells_count -= last_boundary_zero_delta;
+                last_boundary_zero_delta = 0;
+                for (int i = 0; i < temp_adj_deltas.sz; ++i) {
+                    int c1_ = temp_adj_deltas.c1[i], c2_ = temp_adj_deltas.c2[i], delta = temp_adj_deltas.delta[i];
+                    if (delta > 0) for (int t = 0; t < delta; ++t) current_border_edges_tracker.remove_edge(c1_, c2_);
+                    else for (int t = 0; t < -delta; ++t) current_border_edges_tracker.add_edge(c1_, c2_);
+                }
+            }
+        }
+    }
+    for (int i = 0; i < N_FIXED; ++i) {
+        for (int j = 0; j < N_FIXED; ++j) {
+            std::cout << best_grid_state[i][j] << (j == N_FIXED - 1 ? "" : " ");
+        }
+        std::cout << std::endl;
+    }
+}
+int main() {
+    std::ios_base::sync_with_stdio(false); std::cin.tie(NULL);
+    G_START_TIME = std::chrono::high_resolution_clock::now();
+    int n_in_dummy, m_in_dummy;
+    std::cin >> n_in_dummy >> m_in_dummy;
+    solve_main_logic();
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc024/config.yaml ADDED Viewed

	@@ -0,0 +1,73 @@

+# ALE-Bench ahc024 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n\nStory\n--------\n\
+    Mr. Takahashi, the mayor of Takahashi City, decided to draw a map of Takahashi City on the floor of the city hall lobby\
+    \ using colored square tiles.\nTakahashi City is divided into several wards, and in this map, each ward should be represented\
+    \ as a set of connected tiles of the same color.\nHe commissioned a contractor to create a draft of an accurate map, but\
+    \ the number of tiles to be used was too large, and the budget was exceeded.\nMayor Takahashi, who loves graphs, is only\
+    \ interested in the adjacencies between the wards and thinks that the map could be drawn with fewer tiles if information\
+    \ other than adjacencies, such as the shape and size of each ward, is ignored.\nPlease create a map using as few tiles\
+    \ as possible.\n\n<div style=\"display: flex; width: 100%;\">\n  <div style=\"flex-basis: 40%; text-align: center; margin-right:\
+    \ 10%;\">\n    <img src=\"./images/input.png\" style=\"max-width: 100%; max-height: 100%; vertical-align: middle;\">\n\
+    \  <p>Accurate map</p>\n  </div>\n  <div style=\"flex-basis: 50%; text-align: center;\">\n    <img src=\"./images/output.png\"\
+    \ style=\"max-width: 100%; max-height: 100%; vertical-align: middle;\">\n  <p>Small map correctly representing adjacencies</p>\n\
+    \  </div>\n</div>\n\nProblem Statement\n--------\nGiven a map of Takahashi City represented on a grid of $n\\times n$\
+    \ squares.\nLet $(0,0)$ be the coordinates of the top-left square, and $(i,j)$ be the coordinates of the square located\
+    \ $i$ squares down and $j$ squares to the right from there.\nThe city consists of $m$ wards, and the square of color $c$\
+    \ ($1\\leq c\\leq m$) corresponds to the $c$-th ward.\nThe outside of the $n\\times n$ squares correspond to the outside\
+    \ of the city and is colored $0$.\n\nTwo squares are defined as \"adjacent\" if they share an edge, and a set of squares\
+    \ is defined as \"connected\" if any two squares can reach each other via adjacent squares.\nIn the given map, for each\
+    \ color c, the set of squares of color c is guaranteed to be connected.\n\nYour task is to create a map represented on\
+    \ a grid of $n\\times n$ squares that satisfies all of the following conditions.\n\n- For every color $c$ ($0\\leq c\\\
+    leq m$), squares of color $c$ must be connected. Note that since the outside of the $n\\times n$ squares is colored $0$,\
+    \ squares of color $0$ can be connected through the outside squares.\n- For every pair of colors $c$ and $d$ ($0\\leq\
+    \ c<d\\leq m$), the adjacency of a set of squares of color $c$ and a set of squares of color $d$ in the original map and\
+    \ the created map must be identical. That is, if and only if there exist adjacent squares of color $c$ and $d$ in the\
+    \ original map, there exist adjacent squares of color $c$ and $d$ in the created map. Note that since the outside of the\
+    \ $n\\times n$ squares is colored $0$, the squares on the boundary are considered to be adjacent to squares of color $0$.\n\
+    \n\nScoring\n--------\nLet $E$ be the total number of squares of color $0$ in the created map.\nThen you will obtain a\
+    \ score of $E+1$.\n\nThere are 150 test cases, and the score of a submission is the total score for each test case.\n\
+    If your submission produces an illegal output or exceeds the time limit for some test cases, the submission itself will\
+    \ be judged as <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Wrong Answer\">WA</span>\
+    \ or <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\">TLE</span>\
+    \ , and the score of the submission will be zero.\nThe highest score obtained during the contest will determine the final\
+    \ ranking, and there will be no system test after the contest.\nIf more than one participant gets the same score, they\
+    \ will be ranked in the same place regardless of the submission time.\n\n\n\nInput\n--------\nInput is given from Standard\
+    \ Input in the following format.\n\n~~~\n$n$ $m$\n$c_{0,0}$ $c_{0,1}$ $\\cdots$ $c_{0,n-1}$\n$\\vdots$\n$c_{n-1,0}$ $c_{n-1,1}$\
+    \ $\\cdots$ $c_{n-1,n-1}$\n~~~\n\nFor all test cases, we fix $n = 50$ and $m = 100$.\n$c_{i,j}$ is an integer value representing\
+    \ the color of the square at coordinates $(i,j)$ and satisfies $1\\leq c_{i,j}\\leq m$.\nFor every $k=1,2,\\cdots,m$,\
+    \ there exists at least one $(i,j)$ with $c_{i,j}=k$.\n\n\nOutput\n--------\nLet $d_{i,j}$ ($0\\leq d_{i,j}\\leq m$) be\
+    \ the color of the square at coordinates $(i,j)$ in the created map.\nThen, output to Standard Output in the following\
+    \ format.\n\n~~~\n$d_{0,0}$ $d_{0,1}$ $\\cdots$ $d_{0,n-1}$\n$\\vdots$\n$d_{n-1,0}$ $d_{n-1,1}$ $\\cdots$ $d_{n-1,n-1}$\n\
+    ~~~\n\nIf the output map does not satisfy the conditions specified in the problem statement, the submission will be judged\
+    \ as <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Wrong Answer\">WA</span>.\n\n\
+    Your program may output multiple solutions.\nIf multiple solutions are output, only the last one is used for scoring.\n\
+    You can compare multiple solutions using the web version of the visualizer.\n\n<a href=\"https://img.atcoder.jp/ahc024/AU5KcDyn.html?lang=en&seed=0&output=sample\"\
+    >Show example</a>\n\n\nInput Generation\n--------\n<details>\nFirst, we initialize with $c_{i,j}=0$ for all $(i,j)$.\n\
+    Next, for each $k=1,2,\\cdots,m$, we randomly select a square with $c_{i,j}=0$ and set $c_{i,j}=k$.\nFinally, we repeat\
+    \ the following process while squares with $c_{i,j}=0$ remain.\n\nRandomly select a square with $c_{i,j}=0$ and randomly\
+    \ select its adjacent square $(i',j')$.\nWe set $c_{i,j}=c_{i',j'}$.\n</details>\n\nTools (Input generator and visualizer)\n\
+    --------\n- <a href=\"https://img.atcoder.jp/ahc024/AU5KcDyn.html?lang=en\">Web version</a>: This is more powerful than\
+    \ the local version providing animations and manual play.\n- <a href=\"https://img.atcoder.jp/ahc024/AU5KcDyn.zip\">Local\
+    \ version</a>: You need a compilation environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n  - <a\
+    \ href=\"https://img.atcoder.jp/ahc024/AU5KcDyn_windows.zip\">Pre-compiled binary for Windows</a>: If you are not familiar\
+    \ with the Rust language environment, please use this instead.\n\nPlease be aware that sharing visualization results or\
+    \ discussing solutions/ideas during the contest is prohibited.\n\n\n  Problem constraints:\n  time_limit=2.0 memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc024/evaluator.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import traceback
+from pathlib import Path
+from ale_bench.result import CaseResult, JudgeResult, Result
+from ale_bench_eval.safe_ale_session import start_ale_bench_session
+import logging
+import sys
+logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
+def result_feedback(result: Result) -> CaseResult:
+    if result.overall_judge_result == JudgeResult.ACCEPTED:
+        return result.case_results[0]
+    else:
+        selected_case_idx = 0
+        for idx, case_result in enumerate(result.case_results):
+            if case_result.judge_result == result.overall_judge_result:
+                selected_case_idx = idx
+                break
+        return result.case_results[selected_case_idx]
+def evaluate(program_path):
+    problem_id = "ahc024"
+    logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
+    try:
+        session = None
+        logger.info("Starting ALE-Bench session")
+        session = start_ale_bench_session(
+            problem_id=problem_id,
+            lite_version=True,
+            num_workers=13,
+        )
+        logger.info("ALE-Bench session started")
+        if not session:
+            raise RuntimeError("Failed to start or restart the session.")
+        optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
+        code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
+        logger.info("Code extracted")
+        num_public_cases = 50
+        cases = session.case_gen(list(range(num_public_cases)))
+        public_result = session.case_eval(
+            cases, code, code_language="cpp20", skip_local_visualization=True
+        )
+        logger.info("Public evaluation completed")
+        extracted_case = result_feedback(public_result)
+        logger.info("Result feedback completed")
+        logger.info("ALE-Bench session closed")
+        combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
+        if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
+            combined_score = -sys.maxsize - 1
+        session.close()
+        return {
+            "judge_result": public_result.overall_judge_result.value,
+            "overall_score": public_result.overall_absolute_score,
+            "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
+            "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
+            "standard_error": extracted_case.error_str,
+            "message": extracted_case.message,
+            "combined_score": combined_score,
+        }
+    except Exception as e:
+        logger.error(f"Evaluation failed completely: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "overall_score": 0.0,
+            "error": str(e),
+        }

benchmarks/ale_bench/ale-bench-lite-problems/ahc024/initial_program.cpp ADDED Viewed

	@@ -0,0 +1,481 @@

+# EVOLVE-BLOCK-START
+#pragma GCC optimize("O3,unroll-loops")
+#include <iostream>
+#include <vector>
+#include <map>       // For temp_adj_deltas_map_global
+#include <queue>
+#include <algorithm> // For std::min, std::max, std::sort, std::unique, std::shuffle
+#include <random>    // For XorShift and std::shuffle
+#include <chrono>
+#include <utility>   // For std::pair
+#include <cmath>     // For std::exp, std::pow
+#include <climits>   // For UINT_MAX
+// --- Globals ---
+const int N_FIXED = 50;
+const int M_FIXED = 100; // Max ward ID, problem states M=100
+std::vector<std::vector<int>> current_grid_state(N_FIXED, std::vector<int>(N_FIXED));
+std::vector<std::vector<int>> best_grid_state(N_FIXED, std::vector<int>(N_FIXED));
+int best_score_val = -1; // Stores count of 0-cells for the best state
+struct XorShift {
+    unsigned int x, y, z, w;
+    XorShift() {
+        // Using std::random_device for better seed initialization
+        std::random_device rd;
+        x = rd();
+        y = rd();
+        z = rd();
+        w = rd();
+        // Ensure no zero initial state for w, which is common if rd() produces same values or all are 0
+        if (x == 0 && y == 0 && z == 0 && w == 0) w = 1; // Or any non-zero value
+    }
+    unsigned int next_uint() {
+        unsigned int t = x;
+        t ^= t << 11;
+        t ^= t >> 8;
+        x = y; y = z; z = w;
+        w ^= w >> 19;
+        w ^= t;
+        return w;
+    }
+    double next_double() { // In [0,1)
+        return (double)next_uint() / ((double)UINT_MAX + 1.0);
+    }
+    int next_int(int exclusive_max_val) { // In [0, exclusive_max_val - 1]
+        if (exclusive_max_val <= 0) return 0;
+        return next_uint() % exclusive_max_val;
+    }
+    // For std::shuffle
+    using result_type = unsigned int;
+    static constexpr unsigned int min() { return 0; }
+    static constexpr unsigned int max() { return UINT_MAX; }
+    unsigned int operator()() { return next_uint(); }
+};
+XorShift rnd_gen; // Global instance
+auto G_START_TIME = std::chrono::high_resolution_clock::now();
+double time_elapsed_ms() {
+    auto now = std::chrono::high_resolution_clock::now();
+    return std::chrono::duration<double, std::milli>(now - G_START_TIME).count();
+}
+struct AdjacencyInfo {
+    bool matrix[M_FIXED + 1][M_FIXED + 1];
+    AdjacencyInfo() {
+        for (int i = 0; i <= M_FIXED; ++i) for (int j = 0; j <= M_FIXED; ++j) matrix[i][j] = false;
+    }
+    void set_adj(int c1, int c2) {
+        if (c1 == c2) return;
+        matrix[std::min(c1, c2)][std::max(c1, c2)] = true;
+    }
+    bool is_adj(int c1, int c2) const {
+        if (c1 == c2) return false;
+        return matrix[std::min(c1, c2)][std::max(c1, c2)];
+    }
+};
+AdjacencyInfo required_adjacencies;
+bool ward_has_any_req_adj[M_FIXED + 1];
+struct BorderEdgeTracker {
+    int counts_arr[M_FIXED + 1][M_FIXED + 1];
+    BorderEdgeTracker() { clear(); }
+    void add_edge(int c1, int c2) {
+        if (c1 == c2) return;
+        counts_arr[std::min(c1, c2)][std::max(c1, c2)]++;
+    }
+    void remove_edge(int c1, int c2) {
+        if (c1 == c2) return;
+        counts_arr[std::min(c1, c2)][std::max(c1, c2)]--;
+    }
+    int get_count(int c1, int c2) const {
+        if (c1 == c2) return 0;
+        return counts_arr[std::min(c1, c2)][std::max(c1, c2)];
+    }
+    void clear() {
+        for (int i = 0; i <= M_FIXED; ++i) for (int j = 0; j <= M_FIXED; ++j) counts_arr[i][j] = 0;
+    }
+};
+BorderEdgeTracker current_border_edges_tracker;
+std::vector<std::vector<std::pair<int, int>>> cells_by_color(M_FIXED + 1);
+std::vector<std::vector<int>> pos_in_color_list(N_FIXED, std::vector<int>(N_FIXED));
+unsigned int visited_marker_grid[N_FIXED][N_FIXED];
+unsigned int current_visit_marker = 0;
+std::queue<std::pair<int, int>> q_bfs_global;
+const int DR[] = {-1, 1, 0, 0};
+const int DC[] = {0, 0, -1, 1};
+inline bool is_cell_on_grid(int r, int c) { return r >= 0 && r < N_FIXED && c >= 0 && c < N_FIXED; }
+void increment_bfs_marker() {
+    current_visit_marker++;
+    if (current_visit_marker == 0) {
+        for (int i = 0; i < N_FIXED; ++i) {
+            for (int j = 0; j < N_FIXED; ++j) {
+                visited_marker_grid[i][j] = 0;
+            }
+        }
+        current_visit_marker = 1;
+    }
+}
+void clear_global_bfs_queue() {
+    std::queue<std::pair<int, int>> empty_queue;
+    std::swap(q_bfs_global, empty_queue);
+}
+void add_cell_to_color_ds(int r, int c, int color) {
+    cells_by_color[color].push_back({r,c});
+    pos_in_color_list[r][c] = cells_by_color[color].size() - 1;
+}
+void remove_cell_from_color_ds(int r, int c, int color) {
+    int idx_to_remove = pos_in_color_list[r][c];
+    std::pair<int,int> last_cell = cells_by_color[color].back();
+    cells_by_color[color][idx_to_remove] = last_cell;
+    pos_in_color_list[last_cell.first][last_cell.second] = idx_to_remove;
+    cells_by_color[color].pop_back();
+}
+void initialize_all_data_structures(const std::vector<std::vector<int>>& initial_grid) {
+    required_adjacencies = AdjacencyInfo();
+    current_border_edges_tracker.clear();
+    for(int i=0; i <= M_FIXED; ++i) cells_by_color[i].clear();
+    for (int i = 0; i < N_FIXED; ++i) {
+        for (int j = 0; j < N_FIXED; ++j) {
+            current_grid_state[i][j] = initial_grid[i][j];
+            add_cell_to_color_ds(i, j, initial_grid[i][j]);
+        }
+    }
+    for (int i = 0; i < N_FIXED; ++i) {
+        for (int j = 0; j < N_FIXED; ++j) {
+            int initial_color_val = initial_grid[i][j];
+            if (i == 0 || i == N_FIXED - 1 || j == 0 || j == N_FIXED - 1) {
+                required_adjacencies.set_adj(0, initial_color_val);
+            }
+            if (j + 1 < N_FIXED && initial_color_val != initial_grid[i][j+1]) {
+                required_adjacencies.set_adj(initial_color_val, initial_grid[i][j+1]);
+            }
+            if (i + 1 < N_FIXED && initial_color_val != initial_grid[i+1][j]) {
+                required_adjacencies.set_adj(initial_color_val, initial_grid[i+1][j]);
+            }
+            int current_color_val = current_grid_state[i][j];
+            if (i == 0) current_border_edges_tracker.add_edge(0, current_color_val);
+            if (i == N_FIXED - 1) current_border_edges_tracker.add_edge(0, current_color_val);
+            if (j == 0) current_border_edges_tracker.add_edge(0, current_color_val);
+            if (j == N_FIXED - 1) current_border_edges_tracker.add_edge(0, current_color_val);
+            if (j + 1 < N_FIXED && current_color_val != current_grid_state[i][j+1]) {
+                current_border_edges_tracker.add_edge(current_color_val, current_grid_state[i][j+1]);
+            }
+            if (i + 1 < N_FIXED && current_color_val != current_grid_state[i+1][j]) {
+                current_border_edges_tracker.add_edge(current_color_val, current_grid_state[i+1][j]);
+            }
+        }
+    }
+    for (int c1 = 0; c1 <= M_FIXED; ++c1) {
+        ward_has_any_req_adj[c1] = false;
+        for (int c2 = 0; c2 <= M_FIXED; ++c2) {
+            if (c1 == c2) continue;
+            if (required_adjacencies.is_adj(c1, c2)) {
+                ward_has_any_req_adj[c1] = true;
+                break;
+            }
+        }
+    }
+    best_grid_state = current_grid_state;
+    best_score_val = cells_by_color[0].size();
+}
+bool check_region_connectivity_bfs(int target_color) {
+    const auto& cells_of_target_color = cells_by_color[target_color];
+    if (cells_of_target_color.empty()) return true;
+    increment_bfs_marker();
+    clear_global_bfs_queue();
+    q_bfs_global.push(cells_of_target_color[0]);
+    visited_marker_grid[cells_of_target_color[0].first][cells_of_target_color[0].second] = current_visit_marker;
+    int count_visited_cells = 0;
+    while (!q_bfs_global.empty()) {
+        std::pair<int, int> curr = q_bfs_global.front();
+        q_bfs_global.pop();
+        count_visited_cells++;
+        for (int k = 0; k < 4; ++k) {
+            int nr = curr.first + DR[k];
+            int nc = curr.second + DC[k];
+            if (is_cell_on_grid(nr, nc) &&
+                current_grid_state[nr][nc] == target_color &&
+                visited_marker_grid[nr][nc] != current_visit_marker) {
+                visited_marker_grid[nr][nc] = current_visit_marker;
+                q_bfs_global.push({nr, nc});
+            }
+        }
+    }
+    return count_visited_cells == cells_of_target_color.size();
+}
+bool check_region_0_connectivity_full() {
+    const auto& cells_c0 = cells_by_color[0];
+    if (cells_c0.empty()) {
+        return true;
+    }
+    increment_bfs_marker();
+    clear_global_bfs_queue();
+    bool any_boundary_zero_cell_found = false;
+    for (const auto& cell_coord : cells_c0) {
+        int r = cell_coord.first;
+        int c = cell_coord.second;
+        if (r == 0 || r == N_FIXED - 1 || c == 0 || c == N_FIXED - 1) {
+            if (visited_marker_grid[r][c] != current_visit_marker) {
+                 q_bfs_global.push(cell_coord);
+                 visited_marker_grid[r][c] = current_visit_marker;
+            }
+            any_boundary_zero_cell_found = true;
+        }
+    }
+    if (!any_boundary_zero_cell_found) {
+        return false;
+    }
+    while (!q_bfs_global.empty()) {
+        std::pair<int, int> curr = q_bfs_global.front();
+        q_bfs_global.pop();
+        for (int k_dir = 0; k_dir < 4; ++k_dir) {
+            int nr = curr.first + DR[k_dir];
+            int nc = curr.second + DC[k_dir];
+            if (is_cell_on_grid(nr, nc) &&
+                current_grid_state[nr][nc] == 0 &&
+                visited_marker_grid[nr][nc] != current_visit_marker) {
+                visited_marker_grid[nr][nc] = current_visit_marker;
+                q_bfs_global.push({nr, nc});
+            }
+        }
+    }
+    for (const auto& cell_coord : cells_c0) {
+        if (visited_marker_grid[cell_coord.first][cell_coord.second] != current_visit_marker) {
+            return false;
+        }
+    }
+    return true;
+}
+std::map<std::pair<int, int>, int> temp_adj_deltas_map_global;
+bool attempt_change_cell_color_and_validate(int r, int c, int old_color, int new_color) {
+    current_grid_state[r][c] = new_color;
+    remove_cell_from_color_ds(r, c, old_color);
+    add_cell_to_color_ds(r, c, new_color);
+    temp_adj_deltas_map_global.clear();
+    for (int k_adj=0; k_adj<4; ++k_adj) {
+        int nr = r + DR[k_adj];
+        int nc = c + DC[k_adj];
+        int neighbor_actual_color = is_cell_on_grid(nr,nc) ? current_grid_state[nr][nc] : 0;
+        if (old_color != neighbor_actual_color) {
+             temp_adj_deltas_map_global[{std::min(old_color, neighbor_actual_color), std::max(old_color, neighbor_actual_color)}]--;
+        }
+        if (new_color != neighbor_actual_color) {
+             temp_adj_deltas_map_global[{std::min(new_color, neighbor_actual_color), std::max(new_color, neighbor_actual_color)}]++;
+        }
+    }
+    for(const auto& entry : temp_adj_deltas_map_global) {
+        int c1 = entry.first.first; int c2 = entry.first.second; int delta = entry.second;
+        if (delta > 0) for(int i=0; i<delta; ++i) current_border_edges_tracker.add_edge(c1,c2);
+        else for(int i=0; i<-delta; ++i) current_border_edges_tracker.remove_edge(c1,c2);
+    }
+    bool is_change_valid = true;
+    for(const auto& entry : temp_adj_deltas_map_global) {
+        int c1 = entry.first.first; int c2 = entry.first.second;
+        bool has_edge_now = current_border_edges_tracker.get_count(c1, c2) > 0;
+        bool needs_edge = required_adjacencies.is_adj(c1, c2);
+        if (has_edge_now != needs_edge) {
+            is_change_valid = false; break;
+        }
+    }
+    if (is_change_valid && old_color != 0 && cells_by_color[old_color].empty() && ward_has_any_req_adj[old_color]) {
+        is_change_valid = false;
+    }
+    if (is_change_valid && old_color != 0 && !cells_by_color[old_color].empty()) {
+        if (!check_region_connectivity_bfs(old_color)) is_change_valid = false;
+    }
+    if (is_change_valid && new_color != 0) {
+         if (!check_region_connectivity_bfs(new_color)) is_change_valid = false;
+    }
+    if (is_change_valid && (old_color == 0 || new_color == 0)) {
+        if (!cells_by_color[0].empty()) {
+            if (!check_region_0_connectivity_full()) is_change_valid = false;
+        } else {
+            if (ward_has_any_req_adj[0]) {
+                 is_change_valid = false;
+            }
+        }
+    }
+    if (!is_change_valid) {
+        current_grid_state[r][c] = old_color;
+        remove_cell_from_color_ds(r, c, new_color);
+        add_cell_to_color_ds(r, c, old_color);
+        for(const auto& entry : temp_adj_deltas_map_global) {
+            int c1_ = entry.first.first; int c2_ = entry.first.second; int delta = entry.second;
+            if (delta > 0) for(int i=0; i<delta; ++i) current_border_edges_tracker.remove_edge(c1_,c2_);
+            else for(int i=0; i<-delta; ++i) current_border_edges_tracker.add_edge(c1_,c2_);
+        }
+        return false;
+    }
+    return true;
+}
+void solve_main_logic() {
+    std::vector<std::vector<int>> initial_grid_from_input(N_FIXED, std::vector<int>(N_FIXED));
+    for (int i = 0; i < N_FIXED; ++i) for (int j = 0; j < N_FIXED; ++j) std::cin >> initial_grid_from_input[i][j];
+    initialize_all_data_structures(initial_grid_from_input);
+    const double GREEDY_PASS_BUDGET_MS = 300.0;
+    double greedy_pass_start_abs_time = time_elapsed_ms();
+    std::vector<std::pair<int,int>> all_cells_shuffled;
+    all_cells_shuffled.reserve(N_FIXED * N_FIXED);
+    for(int r_idx=0; r_idx<N_FIXED; ++r_idx) for(int c_idx=0; c_idx<N_FIXED; ++c_idx) all_cells_shuffled.push_back({r_idx,c_idx});
+    std::shuffle(all_cells_shuffled.begin(), all_cells_shuffled.end(), rnd_gen);
+    for (const auto& cell_coords : all_cells_shuffled) {
+        if (time_elapsed_ms() - greedy_pass_start_abs_time > GREEDY_PASS_BUDGET_MS) break;
+        int r = cell_coords.first; int c = cell_coords.second;
+        int original_color = current_grid_state[r][c];
+        if (original_color == 0) continue;
+        if (attempt_change_cell_color_and_validate(r, c, original_color, 0)) {
+            int current_zeros_count = cells_by_color[0].size();
+            if (current_zeros_count > best_score_val) {
+                best_score_val = current_zeros_count;
+                best_grid_state = current_grid_state;
+            }
+        }
+    }
+    double sa_start_temp = 2.0;
+    double sa_end_temp = 0.01;
+    const double TOTAL_COMPUTATION_TIME_MS = 1950.0;
+    double sa_start_abs_time = time_elapsed_ms();
+    double sa_total_duration_ms = TOTAL_COMPUTATION_TIME_MS - sa_start_abs_time;
+    if (sa_total_duration_ms <= 0) sa_total_duration_ms = 1.0;
+    int iter_count = 0;
+    while(true) {
+        iter_count++;
+        if(iter_count % 256 == 0) {
+             if (time_elapsed_ms() >= TOTAL_COMPUTATION_TIME_MS) break;
+        }
+        double time_spent_in_sa = time_elapsed_ms() - sa_start_abs_time;
+        double progress_ratio = (sa_total_duration_ms > 1e-9) ? (time_spent_in_sa / sa_total_duration_ms) : 1.0;
+        progress_ratio = std::min(progress_ratio, 1.0);
+        double current_temperature = sa_start_temp * std::pow(sa_end_temp / sa_start_temp, progress_ratio);
+        current_temperature = std::max(current_temperature, sa_end_temp);
+        int r_coord = rnd_gen.next_int(N_FIXED);
+        int c_coord = rnd_gen.next_int(N_FIXED);
+        int original_color_at_cell = current_grid_state[r_coord][c_coord];
+        int candidate_new_colors[5];
+        int num_candidate_options = 0;
+        candidate_new_colors[num_candidate_options++] = 0;
+        for(int k_neighbor_idx=0; k_neighbor_idx<4; ++k_neighbor_idx) {
+            int nr = r_coord + DR[k_neighbor_idx];
+            int nc = c_coord + DC[k_neighbor_idx];
+            if (is_cell_on_grid(nr,nc)) {
+                candidate_new_colors[num_candidate_options++] = current_grid_state[nr][nc];
+            } else {
+                candidate_new_colors[num_candidate_options++] = 0;
+            }
+        }
+        int new_proposed_color = candidate_new_colors[rnd_gen.next_int(num_candidate_options)];
+        if (original_color_at_cell == new_proposed_color) continue;
+        int delta_in_score_metric = 0;
+        if (new_proposed_color == 0 && original_color_at_cell != 0) delta_in_score_metric = 1;
+        else if (new_proposed_color != 0 && original_color_at_cell == 0) delta_in_score_metric = -1;
+        if (attempt_change_cell_color_and_validate(r_coord, c_coord, original_color_at_cell, new_proposed_color)) {
+            bool accept_this_move = false;
+            if (delta_in_score_metric >= 0) {
+                accept_this_move = true;
+                if (cells_by_color[0].size() > best_score_val) {
+                    best_score_val = cells_by_color[0].size();
+                    best_grid_state = current_grid_state;
+                }
+            } else {
+                if (current_temperature > 1e-9 && rnd_gen.next_double() < std::exp((double)delta_in_score_metric / current_temperature)) {
+                    accept_this_move = true;
+                } else {
+                    accept_this_move = false;
+                }
+            }
+            if (!accept_this_move) {
+                current_grid_state[r_coord][c_coord] = original_color_at_cell;
+                remove_cell_from_color_ds(r_coord, c_coord, new_proposed_color);
+                add_cell_to_color_ds(r_coord, c_coord, original_color_at_cell);
+                for(const auto& entry : temp_adj_deltas_map_global) {
+                    int c1_ = entry.first.first; int c2_ = entry.first.second; int delta = entry.second;
+                    if (delta > 0) for(int i=0; i<delta; ++i) current_border_edges_tracker.remove_edge(c1_,c2_);
+                    else for(int i=0; i<-delta; ++i) current_border_edges_tracker.add_edge(c1_,c2_);
+                }
+            }
+        }
+    }
+    for (int i = 0; i < N_FIXED; ++i) {
+        for (int j = 0; j < N_FIXED; ++j) {
+            std::cout << best_grid_state[i][j] << (j == N_FIXED - 1 ? "" : " ");
+        }
+        std::cout << std::endl;
+    }
+}
+int main() {
+    std::ios_base::sync_with_stdio(false); std::cin.tie(NULL);
+    G_START_TIME = std::chrono::high_resolution_clock::now();
+    int n_in_dummy, m_in_dummy;
+    std::cin >> n_in_dummy >> m_in_dummy;
+    solve_main_logic();
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc025/best_program.cpp ADDED Viewed

	@@ -0,0 +1,282 @@

+# EVOLVE-BLOCK-START
+#include <bits/stdc++.h>
+using namespace std;
+/* Approach overview:
+- Sort k pivots by 1v1 balance queries (merge sort), k is a power of two under budget.
+- Classify non-pivots by binary search among pivots.
+- Assign surrogate weights from ranks (spread quadratically for separation).
+- Greedy LPT packing, then a tiny deterministic refinement (move/swap) to reduce sum of squares.
+- Use remaining queries (if any) for light interactive refinement between groups, then pad. */
+// Global parameters and query counter
+int N_, D_, Q_;
+int q_used = 0;
+// Cache for 1v1 comparisons (symmetry-aware)
+/* cmp(a,b):
+   - Returns '<' if a is lighter than b, '>' if heavier, '=' if equal.
+   - Uses a 1 item per pan query and memoizes symmetric results. */
+static unsigned char memo_cmp[128][128]; // 0=unknown; else '<','>','='
+// Perform actual query
+/* ask(L,R): prints the sets on the pan and reads the judge's response.
+   Requirements: both sides non-empty and disjoint; used for both 1v1 and set vs set. */
+char ask(const vector<int>& L, const vector<int>& R){ /* Query balance between sets L and R; returns '<','>','='. Flush each call. */
+    ++q_used;
+    cout << (int)L.size() << " " << (int)R.size();
+    for(int x: L) cout << " " << x;
+    for(int x: R) cout << " " << x;
+    cout << endl;
+    char c; cin >> c;
+    return c;
+}
+// Compare two single items with caching
+/* cmp(a,b) as comparator:
+   - If we ran out of queries, conservatively returns '=' to avoid TLE/illegal ops. */
+char cmp(int a, int b){ /* Compare singletons a vs b with memoization; returns '<','>','='. */
+    if(a==b) return '=';
+    if(memo_cmp[a][b]) return (char)memo_cmp[a][b];
+    if(q_used>=Q_) return '=';
+    char r = ask({a},{b});
+    memo_cmp[a][b] = (unsigned char)r;
+    memo_cmp[b][a] = (unsigned char)(r=='<'?'>':(r=='>'?'<':'='));
+    return r;
+}
+// Merge sort using cmp as comparator on item indices
+/* merge_sort(ids,l,r):
+   - Sorts ids[l..r] in non-decreasing order of weight using only cmp(). */
+void merge_sort(vector<int>& ids, int l, int r){ /* Stable merge sort using cmp() as comparator over indices. */
+    if(l>=r) return;
+    int m=(l+r)>>1;
+    merge_sort(ids,l,m);
+    merge_sort(ids,m+1,r);
+    vector<int> tmp; tmp.reserve(r-l+1);
+    int i=l,j=m+1;
+    while(i<=m && j<=r){
+        char c = cmp(ids[i], ids[j]);
+        // Treat '=' as <= to keep order deterministic
+        if(c=='<' || c=='=') tmp.push_back(ids[i++]);
+        else tmp.push_back(ids[j++]);
+    }
+    while(i<=m) tmp.push_back(ids[i++]);
+    while(j<=r) tmp.push_back(ids[j++]);
+    for(int k=0;k<(int)tmp.size();++k) ids[l+k]=tmp[k];
+}
+int main(){ /* Orchestrates: budgeted ranking via 1v1, surrogate weights from exp order stats, LPT pack, local+interactive refinement. */
+    ios::sync_with_stdio(false);
+    cin.tie(nullptr);
+    cin >> N_ >> D_ >> Q_;
+    // Choose pivots with a reserved budget for later set-based refinement.
+    // With k=2^e, mergesort on pivots costs ~k*e and classifying others costs ~(N-k)*e => ~N*e.
+    int log2N = 0; while((1<<(log2N+1))<=max(1,N_)) ++log2N;
+    int reserve = max(D_, Q_/4);
+    int e = min(log2N, max(1, (Q_ - reserve - 2)/max(1,N_)));
+    int k = min(N_, 1<<e);
+    int e_full = 0; while((1<<e_full) < max(1,N_)) ++e_full;
+    long long cost_full = 1LL * N_ * e_full;
+    if(Q_ - reserve - 2 >= cost_full) k = N_;
+    if(k<1) k=1;
+    // Pick first k items as pivots and sort them by weight
+    vector<int> piv(k);
+    iota(piv.begin(), piv.end(), 0);
+    merge_sort(piv, 0, k-1);
+    vector<char> is_pivot(N_, 0);
+    for(int x: piv) is_pivot[x]=1;
+    // Surrogate weights via expected order statistics of exponential distribution.
+    // Map rank quantile q in (0,1] to w_hat ∝ H_N - H_{N - round(q*N)}, then scale to integers.
+    vector<long double> H(N_+1, 0.0L);
+    for(int i=1;i<=N_;++i) H[i]=H[i-1]+1.0L/i;
+    long double HN = H[N_];
+    auto q_to_w = [&](long double q)->long long{
+        if(q<=0) return 1;
+        int idx = (int)llround(q * (long double)N_);
+        if(idx<1) idx=1; if(idx>N_) idx=N_;
+        long double v = HN - H[N_-idx];
+        long long w = (long long)llround(v * 1000000.0L);
+        if(w<1) w=1;
+        return w;
+    };
+    vector<long long> w_hat(N_, 1);
+    for(int i=0;i<k;i++){
+        long double q = ((long double)i + 0.5L) / (long double)k;
+        w_hat[piv[i]] = q_to_w(q);
+    }
+    // Classify non-pivot items by binary searching among pivots
+    for(int id=0; id<N_; ++id){
+        if(is_pivot[id]) continue;
+        int lo=0, hi=k;
+        while(lo<hi){
+            int mid=(lo+hi)/2;
+            char r = cmp(id, piv[mid]);
+            if(r=='<' || r=='=') hi=mid; else lo=mid+1;
+        }
+        long double q;
+        if(lo==0) q = 0.25L / max(1, k); // slightly above 0
+        else if(lo==k) q = ((long double)k - 0.25L) / (long double)k; // slightly below 1
+        else q = ((long double)lo) / (long double)k;
+        w_hat[id] = q_to_w(q);
+    }
+    // Greedy largest-first with tie-breaker by size
+    vector<pair<long long,int>> ord; ord.reserve(N_);
+    for(int i=0;i<N_;++i) ord.push_back({-w_hat[i], i});
+    sort(ord.begin(), ord.end());
+    vector<long long> sum(D_, 0);
+    vector<vector<int>> grp(D_);
+    vector<int> ans(N_, 0);
+    for(auto [negw, id] : ord){
+        int best = 0;
+        for(int g=1; g<D_; ++g){
+            if(sum[g] < sum[best] || (sum[g]==sum[best] && grp[g].size() < grp[best].size())) best=g;
+        }
+        ans[id] = best;
+        sum[best] += -negw;
+        grp[best].push_back(id);
+    }
+    // Deterministic local refinement: move or swap between heaviest and lightest if it improves sum of squares
+    {
+        int iter_limit = max(100, N_);
+        for(int it=0; it<iter_limit; ++it){
+            int gH=0,gL=0;
+            for(int g=1; g<D_; ++g){ if(sum[g]>sum[gH]) gH=g; if(sum[g]<sum[gL]) gL=g; }
+            if(gH==gL) break;
+            long long SA=sum[gH], SB=sum[gL], diff=SA-SB;
+            if(diff<=0) break;
+            // Try a single-item move from gH to gL
+            int bestId=-1; long long bestGap=(1LL<<62);
+            for(int id: grp[gH]){
+                long long w=w_hat[id];
+                if(w<diff){
+                    long long gap = llabs((long long)(diff/2) - w);
+                    if(gap<bestGap){ bestGap=gap; bestId=id; }
+                }
+            }
+            bool improved=false;
+            if(bestId!=-1){
+                long long w=w_hat[bestId];
+                long long nSA=SA-w, nSB=SB+w;
+                long long old2=SA*SA + SB*SB, new2=nSA*nSA + nSB*nSB;
+                if(new2<old2){
+                    sum[gH]=nSA; sum[gL]=nSB;
+                    auto &A=grp[gH]; auto &B=grp[gL];
+                    for(int i=0;i<(int)A.size();++i) if(A[i]==bestId){ A[i]=A.back(); A.pop_back(); break; }
+                    B.push_back(bestId);
+                    improved=true;
+                }
+            }
+            if(improved) continue;
+            // Try swapping one item between gH and gL
+            int bestA=-1, bestB=-1; bestGap=(1LL<<62);
+            for(int ia: grp[gH]){
+                long long wA=w_hat[ia];
+                for(int ib: grp[gL]){
+                    long long wB=w_hat[ib];
+                    long long delta = wA - wB;
+                    if(delta<=0 || delta>=diff) continue;
+                    long long gap = llabs((long long)(diff/2) - delta);
+                    if(gap<bestGap){ bestGap=gap; bestA=ia; bestB=ib; }
+                }
+            }
+            if(bestA!=-1){
+                long long wA=w_hat[bestA], wB=w_hat[bestB];
+                long long nSA=SA - wA + wB, nSB=SB - wB + wA;
+                long long old2=SA*SA + SB*SB, new2=nSA*nSA + nSB*nSB;
+                if(new2<old2){
+                    sum[gH]=nSA; sum[gL]=nSB;
+                    auto &A=grp[gH]; auto &B=grp[gL];
+                    for(int i=0;i<(int)A.size();++i) if(A[i]==bestA){ A[i]=bestB; break; }
+                    for(int i=0;i<(int)B.size();++i) if(B[i]==bestB){ B[i]=bestA; break; }
+                    continue;
+                }
+            }
+            break; // no improving move or swap
+        }
+    }
+    // Rebuild final assignment from groups (keeps consistency after refinement)
+    fill(ans.begin(), ans.end(), 0);
+    for(int g=0; g<D_; ++g) for(int id: grp[g]) ans[id]=g;
+    // Set-based interactive refinement using remaining queries: move single items guided by balance
+    {
+        if(q_used < Q_){
+            mt19937 rng(712367);
+            vector<pair<int,int>> pairs;
+            pairs.reserve(D_*(D_-1)/2);
+            for(int a=0;a<D_;++a) for(int b=a+1;b<D_;++b) pairs.emplace_back(a,b);
+            int rem = Q_ - q_used;
+            int passes = min(24, 2 + rem / max(1, D_));
+            int K = min(12, 2 + rem / max(1, D_));
+            for(int pass=0; pass<passes && q_used < Q_; ++pass){
+                shuffle(pairs.begin(), pairs.end(), rng);
+                bool any=false;
+                for(auto pr: pairs){
+                    int a=pr.first, b=pr.second;
+                    if(q_used >= Q_) break;
+                    if(grp[a].empty() || grp[b].empty()) continue;
+                    char r = ask(grp[a], grp[b]);
+                    if(r=='=') continue;
+                    int H = (r=='>')?a:b;
+                    int L = (H==a)?b:a;
+                    if((int)grp[H].size()<=1) continue;
+                    vector<int> idx(grp[H].size()); iota(idx.begin(), idx.end(), 0);
+                    shuffle(idx.begin(), idx.end(), rng);
+                    int tries = min(K, (int)idx.size());
+                    for(int t=0; t<tries && q_used < Q_; ++t){
+                        int pos = idx[t];
+                        int id = grp[H][pos];
+                        // Build H\{id} as left set
+                        vector<int> Left; Left.reserve(grp[H].size()-1);
+                        for(int x: grp[H]) if(x!=id) Left.push_back(x);
+                        if(Left.empty() || grp[L].empty()) continue;
+                        char rr = ask(Left, grp[L]);
+                        if(rr=='>'){
+                            // apply move id: H -> L; keep surrogate sums in sync
+                            grp[L].push_back(id);
+                            grp[H][pos] = grp[H].back(); grp[H].pop_back();
+                            sum[H] -= w_hat[id];
+                            sum[L] += w_hat[id];
+                            any=true;
+                            break;
+                        }
+                    }
+                }
+                if(!any) break;
+            }
+        }
+    }
+    // Rebuild final assignment from groups after interactive refinement
+    fill(ans.begin(), ans.end(), 0);
+    for(int g=0; g<D_; ++g) for(int id: grp[g]) ans[id]=g;
+    // Consume any remaining queries with safe 1v1 dummies
+    if(N_>=2){
+        int a=0, b=1;
+        while(q_used < Q_){
+            ask({a},{b});
+            b = (b+1)%N_;
+            if(b==a) b=(b+1)%N_;
+        }
+    }
+    for(int i=0;i<N_;++i){
+        if(i) cout << ' ';
+        cout << ans[i];
+    }
+    cout << '\n';
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc025/config.yaml ADDED Viewed

	@@ -0,0 +1,104 @@

+# ALE-Bench ahc025 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n    \n    Story\n\
+    --------\nAtCoder offers online shopping for official goods.\nCEO Takahashi decided to sell the unsold goods together\
+    \ as a grab bag.\nThe weight of each bag should be as even as possible, but unfortunately, there was no scale in AtCoder's\
+    \ office to measure weight numerically.\nAs an alternative, he found a balance.\nBy using the balance to compare the weights,\
+    \ please divide the goods as evenly as possible.\n\nProblem Statement\n--------\nThere are $N$ items.\nThe weight $w_i$\
+    \ of each item $i$ is unknown.\nUsing a balance that can compare the sum of the weights of two item sets, you repeat the\
+    \ following operations.\n\nPlace as many items as you like on the left and right plates of the balance. Then you can see\
+    \ which side has the greater weight or they have equal weights.\n\nAfter repeating this operation $Q$ times, divide the\
+    \ items into $D$ sets of equal total weight as much as possible.\n\n\nScoring\n--------\nLet $t_0,t_1,\\cdots,t_{D-1}$\
+    \ be the total weight of items in each set in the output division. Let the mean of $t$ be $\\bar{t}=\\frac{1}{D}\\sum_{i=0}^{D-1}t_i$.\
+    \ The variance is $V=\\frac{1}{D}\\sum_{i=0}^{D-1} (t_i-\\bar{t})^2$.\nThen you will obtain an absolute score of $1+\\\
+    mathrm{round}\\left(100\\times \\sqrt{V}\\right)$.\nThe lower the absolute score, the better.\n\nFor each test case, you\
+    \ will obtain a <font color=\"red\"><strong>rank score</strong></font> according to your rank determined by lower absolute\
+    \ score. The score of the submission is the total rank score for each test case. The rank score is calculated as follows,\
+    \ and the higher the rank score, the better.\n\nLet $n_{submit}$ be the number of contestants with submissions, $n_{lose}$\
+    \ be the number of contestants who received an absolute score lower than yours, and $n_{tie}$ be the number of other contestants\
+    \ who received an absolute score equal to yours. Then your rank in this test case is determined as $r=n_{lose}+0.5 n_{tie}$,\
+    \ and your rank score is $\\mathrm{round}(10^8\\times (1-\\frac{r}{n_{submit}}))$.\n\n\nThe final ranking will be determined\
+    \ by the system test with more inputs which will be run after the contest is over.\nIn both the provisional/system test,\
+    \ if your submission produces illegal output or exceeds the time limit for some test cases, only the rank score for those\
+    \ test cases will be zero.\nThe system test will be performed only for <font color=\"red\"><strong>the last submission\
+    \ which received a result other than <span class=\"label label-warning\" data-toggle=\"tooltip\" data-placement=\"top\"\
+    \ title=\"\" data-original-title=\"Compilation Error\">CE</span> </strong></font>.\nBe careful not to make a mistake in\
+    \ the final submission.\n\n#### Number of test cases\n- Provisional test: 100\n- System test: 5000. We will publish <a\
+    \ href=\"https://img.atcoder.jp/ahc025/seeds.txt\">seeds.txt</a> (sha256=8a39261299bef0387172c0e0c4523c49b0cb993efd4f702ec7cf5124cf5b4c55)\
+    \ after the contest is over.\n\n\n#### About relative evaluation system\nIn both the provisional/system test, the standings\
+    \ will be calculated using only the last submission which received a result other than <span class=\"label label-warning\"\
+    \ data-toggle=\"tooltip\" data-placement=\"top\" title=\"\" data-original-title=\"Compilation Error\">CE</span>.\n\nThe\
+    \ scores shown in the standings are relative, and whenever a new submission arrives, all relative scores are recalculated.\n\
+    On the other hand, the score for each submission shown on the submissions page is the sum of the absolute score for each\
+    \ test case, and the relative scores are not shown.\nIn order to know the relative score of submission other than the\
+    \ latest one in the current standings, you need to resubmit it.\nIf your submission produces illegal output or exceeds\
+    \ the time limit for some test cases, the score shown on the submissions page will be 0, but the standings show the sum\
+    \ of the relative scores for the test cases that were answered correctly.\n\n#### About execution time\nExecution time\
+    \ may vary slightly from run to run.\nIn addition, since system tests simultaneously perform a large number of executions,\
+    \ it has been observed that execution time increases by several percent compared to provisional tests.\nFor these reasons,\
+    \ submissions that are very close to the time limit may result in <span class='label label-warning' data-toggle='tooltip'\
+    \ data-placement='top' title=\"Time Limit Exceeded\">TLE</span> in the system test.\nPlease measure the execution time\
+    \ in your program to terminate the process, or have enough margin in the execution time.\n\n\nInput and Output\n--------\n\
+    First, the number of items $N$, the number of divisions $D$, and the number of queries $Q$ are given from Standard Input\
+    \ in the following format.\n~~~\n$N$ $D$ $Q$\n~~~\n\nEach value satisfies the following constraints.\n\n- $30\\leq N\\\
+    leq 100$\n- $2\\leq D\\leq N/4$\n- $2N\\leq Q\\leq 32N$\n\n\nAfter reading the above information, repeat the following\
+    \ query $Q$ times.\n\nIn the $q$-th query ($0\\leq q\\leq Q-1$), you select the set of items $L$ to be placed on the left\
+    \ side of the balance and the set of items $R$ to be placed on the right side of the balance.\nEach set must not be empty,\
+    \ and the common part $L\\cap R$ must be empty.\nThere may be items that are not contained in either $L$ or $R$.\nLet\
+    \ $n_L=|L|$, $n_R=|R|$, $L=\\\\{l_0,\\cdots,l_{n_L-1}\\\\}$, and $R=\\\\{r_0,\\cdots,r_{n_R-1}\\\\}$ ($0\\leq l_i,r_i\\\
+    leq N-1$).\nThen output to Standard Output on a single line in the following format.\n~~~\n$n_L$ $n_R$ $l_0$ $\\cdots$\
+    \ $l_{n_L-1}$ $r_0$ $\\cdots$ $r_{n_R-1}$\n~~~\n\n<font color=\"red\">**The output must be followed by a new line, and\
+    \ you have to flush Standard Output.**</font>\nOtherwise, the submission might be judged as <span class='label label-warning'\
+    \ data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\">TLE</span>.\n\nAfter output, the information\
+    \ on which side the balance is tilted is given in a single line from the standard input.\nThe given string is one of the\
+    \ following three cases.\n\n- `<` : The total weight of $L$ is less than the total weight of $R$.\n- `>` : The total weight\
+    \ of $L$ is greater than the total weight of $R$.\n- `=` : The total weight of $L$ is equal to the total weight of $R$.\n\
+    \nThe query must be performed exactly $Q$ times.\nAfter $Q$ queries, divide the items into $D$ sets of equal total weight\
+    \ as much as possible.\nLet $i$-th ($0\\leq i\\leq N-1$) item be included in $d_i$-th ($0\\leq d_i\\leq D-1$) set.\nThen\
+    \ output to Standard Output on a single line in the following format.\n~~~\n$d_0$ $\\cdots$ $d_{N-1}$\n~~~\n\n\n#### Example\n\
+    \n\n<table class=\"table table-bordered\">\n<thead>\n<tr>\n<th>$q$</th>\n<th>Output</th>\n<th>Input</th>\n</tr>\n</thead>\n\
+    <tbody>\n<tr>\n<td>Prior information</td>\n<td></td>\n<td><pre>31 2 128</pre></td>\n</tr>\n<tr>\n<td>0</td>\n<td><pre>2\
+    \ 1 6 22 11</pre></td>\n<td><pre>></pre></td>\n</tr>\n<tr>\n<td>1</td>\n<td><pre>2 2 11 22 0 1</pre></td>\n<td><pre><</pre></td>\n\
+    </tr>\n<tr>\n<td>$\\vdots$</td>\n<td></td>\n<td></td>\n</tr>\n<tr>\n<td>127</td>\n<td><pre>1 1 14 24</pre></td>\n<td><pre><</pre></td>\n\
+    </tr>\n<tr>\n<td>Division</td>\n<td><pre>0 0 0 0 0 0 1 1 1 0 1 1 1 0 0 0 1 1 0 0 0 0 0 1 1 1 0 0 0 1 0</pre></td>\n<td></td>\n\
+    </tr>\n</tbody>\n</table>\n\n\n<a href=\"https://img.atcoder.jp/ahc025/tNvZmDfV.html?lang=en&seed=0&output=sample\">Show\
+    \ example</a>\n\nInput Generation\n--------\nLet $\\mathrm{rand\\\\_int}(L,U)$ be a function that generates a uniform\
+    \ random integer between $L$ and $U$, inclusive.\nLet $\\mathrm{rand\\\\_double}(L,U)$ be a function that generates a\
+    \ uniform random real number at least $L$ and less than $U$.\n\nThe number of items $N$ is generated by $\\mathrm{rand\\\
+    \\_int}(30,100)$.\nThe number of divisions $D$ is generated by $\\mathrm{rand\\\\_int}(2,\\mathrm{floor}(N/4))$.\nThe\
+    \ number of queries $Q$ is generated by $\\mathrm{round}(N\\times 2^{\\mathrm{rand\\\\_double(1,5)}})$.\n\nFor each item\
+    \ $i$, we independently generate a value $w'_i$ from the <a href=\"https://en.wikipedia.org/wiki/Exponential_distribution\"\
+    >exponential distribution</a> with $\\lambda=10^{-5}$, and we set the weight of item $i$ by $w_i=\\max(1, \\mathrm{round}(w'_i))$.\
+    \ If the generated value $w'_i$ exceeds $\\frac{10^5 N}{D}$, we regenerate it.\n\n\nTools (Input generator, local tester\
+    \ and visualizer)\n--------\n- <a href=\"https://img.atcoder.jp/ahc025/tNvZmDfV.html?lang=en\">Web version</a>: This is\
+    \ more powerful than the local version providing animations.\n- <a href=\"https://img.atcoder.jp/ahc025/tNvZmDfV.zip\"\
+    >Local version</a>: You need a compilation environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n\t\
+    - <a href=\"https://img.atcoder.jp/ahc025/tNvZmDfV_windows.zip\">Pre-compiled binary for Windows</a>: If you are not familiar\
+    \ with the Rust language environment, please use this instead.\n\n<font color=\"red\"><b>Please be aware that sharing\
+    \ visualization results or discussing solutions/ideas during the contest is prohibited.</b></font>\n\n#### Specification\
+    \ of input/output files used by the tools\n\nInput files given to the local tester have the following format.\n~~~\n$N$\
+    \ $D$ $Q$\n$w_0$ $\\cdots$ $w_{N-1}$\n~~~\nThe last $w_0$ $\\cdots$ $w_{N-1}$ is the weight of each item and is not given\
+    \ to the solution program.\n\nThe local tester writes outputs from your program directly to the output file.\nYour program\
+    \ may output comment lines starting with `#`. The web version of the visualizer displays the comment lines with the corresponding\
+    \ query, which may be useful for debugging and analysis. Since the judge program ignores all comment lines, you can submit\
+    \ a program that outputs comment lines as is.\nIn addition, comment lines starting with `#c` are treated specially and\
+    \ you can provide the visualizer with a tentative division by outputting in the following format.\n~~~\n#c $d_0$ $\\cdots$\
+    \ $d_{N-1}$\n~~~\n\n\n    Problem constraints:\n    time_limit=2.0 memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc025/evaluator.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import traceback
+from pathlib import Path
+from ale_bench.result import CaseResult, JudgeResult, Result
+from ale_bench_eval.safe_ale_session import start_ale_bench_session
+import logging
+import sys
+logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
+def result_feedback(result: Result) -> CaseResult:
+    if result.overall_judge_result == JudgeResult.ACCEPTED:
+        return result.case_results[0]
+    else:
+        selected_case_idx = 0
+        for idx, case_result in enumerate(result.case_results):
+            if case_result.judge_result == result.overall_judge_result:
+                selected_case_idx = idx
+                break
+        return result.case_results[selected_case_idx]
+def evaluate(program_path):
+    problem_id = "ahc025"
+    logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
+    try:
+        session = None
+        logger.info("Starting ALE-Bench session")
+        session = start_ale_bench_session(
+            problem_id=problem_id,
+            lite_version=True,
+            num_workers=13,
+        )
+        logger.info("ALE-Bench session started")
+        if not session:
+            raise RuntimeError("Failed to start or restart the session.")
+        optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
+        code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
+        logger.info("Code extracted")
+        num_public_cases = 50
+        cases = session.case_gen(list(range(num_public_cases)))
+        public_result = session.case_eval(
+            cases, code, code_language="cpp20", skip_local_visualization=True
+        )
+        logger.info("Public evaluation completed")
+        extracted_case = result_feedback(public_result)
+        logger.info("Result feedback completed")
+        logger.info("ALE-Bench session closed")
+        combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
+        if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
+            combined_score = -sys.maxsize - 1
+        session.close()
+        return {
+            "judge_result": public_result.overall_judge_result.value,
+            "overall_score": public_result.overall_absolute_score,
+            "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
+            "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
+            "standard_error": extracted_case.error_str,
+            "message": extracted_case.message,
+            "combined_score": combined_score,
+        }
+    except Exception as e:
+        logger.error(f"Evaluation failed completely: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "overall_score": 0.0,
+            "error": str(e),
+        }

benchmarks/ale_bench/ale-bench-lite-problems/ahc025/initial_program.cpp ADDED Viewed

	@@ -0,0 +1,628 @@

+# EVOLVE-BLOCK-START
+#include <iostream>
+#include <vector>
+#include <string>
+#include <numeric>
+#include <algorithm>
+#include <iomanip>
+#include <cmath>
+#include <set>
+#include <map>
+#include <chrono>
+#include <random>
+// Timer
+std::chrono::steady_clock::time_point program_start_time;
+std::chrono::milliseconds time_limit_ms(1850);
+// Global problem parameters and query counter/cache
+int N_items_global, D_groups_global, Q_total_global;
+int queries_made = 0;
+std::map<std::pair<int, int>, char> comparison_results_cache_1v1;
+std::map<int, std::map<std::pair<int, int>, char>> comparison_results_cache_1v2_specific;
+std::mt19937 rng_engine;
+// Function to perform a query via standard I/O
+char perform_query_actual(const std::vector<int>& L_items, const std::vector<int>& R_items) {
+    queries_made++;
+    // Debug: #c assignments_array[0] ... assignments_array[N-1]
+    // std::cout << "# Query " << queries_made << std::endl;
+    std::cout << L_items.size() << " " << R_items.size();
+    for (int item_idx : L_items) {
+        std::cout << " " << item_idx;
+    }
+    for (int item_idx : R_items) {
+        std::cout << " " << item_idx;
+    }
+    std::cout << std::endl;
+    char result_char;
+    std::cin >> result_char;
+    return result_char;
+}
+char compare_single_items(int item_idx1, int item_idx2) {
+    if (item_idx1 == item_idx2) return '=';
+    std::pair<int, int> query_pair_key = {std::min(item_idx1, item_idx2), std::max(item_idx1, item_idx2)};
+    auto it = comparison_results_cache_1v1.find(query_pair_key);
+    if (it != comparison_results_cache_1v1.end()) {
+        char cached_res = it->second;
+        if (item_idx1 == query_pair_key.first) return cached_res;
+        return (cached_res == '<' ? '>' : (cached_res == '>' ? '<' : '='));
+    }
+    if (queries_made >= Q_total_global) {
+        return '=';
+    }
+    char res_direct = perform_query_actual({item_idx1}, {item_idx2});
+    if (item_idx1 < item_idx2) {
+        comparison_results_cache_1v1[query_pair_key] = res_direct;
+    } else {
+        char reversed_res = (res_direct == '<' ? '>' : (res_direct == '>' ? '<' : '='));
+        comparison_results_cache_1v1[query_pair_key] = reversed_res;
+    }
+    return res_direct;
+}
+char compare_1v2_items_specific(int item_curr, int item_prev, int item_s_aux) {
+    // Assuming item_curr, item_prev, item_s_aux are distinct indices as per problem context
+    // L = {item_curr}, R = {item_prev, item_s_aux}
+    // L and R must be disjoint, already true. Each set non-empty.
+    // Items within R must be distinct (item_prev != item_s_aux). This is handled by caller logic in X_j estimation.
+    std::pair<int, int> R_pair_key = {std::min(item_prev, item_s_aux), std::max(item_prev, item_s_aux)};
+    auto it_LHS = comparison_results_cache_1v2_specific.find(item_curr);
+    if (it_LHS != comparison_results_cache_1v2_specific.end()) {
+        auto it_RHS = it_LHS->second.find(R_pair_key);
+        if (it_RHS != it_LHS->second.end()) {
+            return it_RHS->second;
+        }
+    }
+    if (queries_made >= Q_total_global) {
+        return '=';
+    }
+    char res_direct = perform_query_actual({item_curr}, {item_prev, item_s_aux});
+    comparison_results_cache_1v2_specific[item_curr][R_pair_key] = res_direct;
+    return res_direct;
+}
+void merge_for_sort(std::vector<int>& items_to_sort, int left, int mid, int right) {
+    int n1 = mid - left + 1;
+    int n2 = right - mid;
+    std::vector<int> L_half(n1), R_half(n2);
+    for (int i = 0; i < n1; i++) L_half[i] = items_to_sort[left + i];
+    for (int j = 0; j < n2; j++) R_half[j] = items_to_sort[mid + 1 + j];
+    int i = 0, j = 0, k = left;
+    while (i < n1 && j < n2) {
+        char cmp_res = compare_single_items(L_half[i], R_half[j]);
+        if (cmp_res == '<' || cmp_res == '=') {
+            items_to_sort[k++] = L_half[i++];
+        } else {
+            items_to_sort[k++] = R_half[j++];
+        }
+    }
+    while (i < n1) items_to_sort[k++] = L_half[i++];
+    while (j < n2) items_to_sort[k++] = R_half[j++];
+}
+void merge_sort_items(std::vector<int>& items_to_sort, int left, int right) {
+    if (left < right) {
+        int mid = left + (right - left) / 2;
+        merge_sort_items(items_to_sort, left, mid);
+        merge_sort_items(items_to_sort, mid + 1, right);
+        merge_for_sort(items_to_sort, left, mid, right);
+    }
+}
+long long BASE_WEIGHT = 100000;
+double estimate_log2(double val) {
+    if (val <= 1.0) return 0.0;
+    return std::log2(val);
+}
+int calculate_estimated_query_cost(int N_val, int k_pivots_val) {
+    if (k_pivots_val <= 0) return 0;
+    if (k_pivots_val == 1) {
+        return (N_val > 1) ? (N_val - 1) : 0;
+    }
+    double cost = 0;
+    cost += static_cast<double>(k_pivots_val) * estimate_log2(static_cast<double>(k_pivots_val));
+    for (int j = 2; j < k_pivots_val; ++j) {
+        if (j-1 > 0) cost += estimate_log2(static_cast<double>(j - 1));
+    }
+    cost += static_cast<double>(N_val - k_pivots_val) * estimate_log2(static_cast<double>(k_pivots_val));
+    return static_cast<int>(std::ceil(cost));
+}
+double calculate_variance_from_sums(double sum_sq_group_totals, double total_weight_double, int D_val) {
+    if (D_val <= 0) return 1e18;
+    double mean_weight = total_weight_double / D_val;
+    double variance = sum_sq_group_totals / D_val - mean_weight * mean_weight;
+    return std::max(0.0, variance);
+}
+int main() {
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    program_start_time = std::chrono::steady_clock::now();
+    uint64_t random_seed = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now().time_since_epoch()).count();
+    rng_engine.seed(random_seed);
+    std::cin >> N_items_global >> D_groups_global >> Q_total_global;
+    std::vector<long long> estimated_weights(N_items_global);
+    int k_pivots_chosen = (N_items_global > 0) ? 1 : 0;
+    if (N_items_global > 1) {
+        for (int cur_k_val = N_items_global; cur_k_val >= 1; --cur_k_val) {
+            if (calculate_estimated_query_cost(N_items_global, cur_k_val) <= Q_total_global) {
+                k_pivots_chosen = cur_k_val;
+                break;
+            }
+        }
+    }
+    k_pivots_chosen = std::min(k_pivots_chosen, N_items_global);
+    if (N_items_global == 0) k_pivots_chosen = 0;
+    std::vector<int> pivot_item_indices(k_pivots_chosen);
+    if (k_pivots_chosen > 0) {
+        std::vector<int> all_item_indices_temp(N_items_global);
+        std::iota(all_item_indices_temp.begin(), all_item_indices_temp.end(), 0);
+        std::shuffle(all_item_indices_temp.begin(), all_item_indices_temp.end(), rng_engine);
+        for (int i = 0; i < k_pivots_chosen; ++i) pivot_item_indices[i] = all_item_indices_temp[i];
+    }
+    std::vector<int> sorted_pivot_item_indices = pivot_item_indices;
+    // Factors from previous attempt (more aggressive & symmetric):
+    const int FACTOR_GT_NUM = 200;
+    const int FACTOR_LT_NUM = 50;
+    const int FACTOR_XJ_FALLBACK_NUM = 100;
+    if (k_pivots_chosen == 0) {
+        for (int i = 0; i < N_items_global; ++i) estimated_weights[i] = BASE_WEIGHT;
+    } else if (k_pivots_chosen == 1) {
+        estimated_weights[pivot_item_indices[0]] = BASE_WEIGHT;
+        for (int i = 0; i < N_items_global; ++i) {
+            if (i == pivot_item_indices[0]) continue;
+            char res = compare_single_items(i, pivot_item_indices[0]);
+            if (res == '=') estimated_weights[i] = BASE_WEIGHT;
+            else if (res == '<') estimated_weights[i] = std::max(1LL, BASE_WEIGHT * FACTOR_LT_NUM / 100);
+            else estimated_weights[i] = std::max(1LL, BASE_WEIGHT * FACTOR_GT_NUM / 100);
+        }
+    } else { // k_pivots_chosen >= 2
+        merge_sort_items(sorted_pivot_item_indices, 0, k_pivots_chosen - 1);
+        int p0_idx = sorted_pivot_item_indices[0];
+        estimated_weights[p0_idx] = BASE_WEIGHT;
+        int p1_idx = sorted_pivot_item_indices[1];
+        char res_p1_vs_p0 = compare_single_items(p1_idx, p0_idx);
+        if (res_p1_vs_p0 == '=') {
+            estimated_weights[p1_idx] = estimated_weights[p0_idx];
+        } else if (res_p1_vs_p0 == '<') {
+            estimated_weights[p1_idx] = std::max(1LL, estimated_weights[p0_idx] * FACTOR_LT_NUM / 100);
+        } else {
+            estimated_weights[p1_idx] = std::max(1LL, estimated_weights[p0_idx] * FACTOR_GT_NUM / 100);
+        }
+        // Ensure monotonicity and strictness if comparison was strict
+        if (estimated_weights[p1_idx] < estimated_weights[p0_idx]) {
+             estimated_weights[p1_idx] = estimated_weights[p0_idx];
+        }
+        if (res_p1_vs_p0 == '>' && estimated_weights[p1_idx] == estimated_weights[p0_idx]) {
+             estimated_weights[p1_idx] = estimated_weights[p0_idx] + 1;
+        }
+        const long long MAX_XJ_INITIAL_HIGH_BOUND = BASE_WEIGHT * (1LL * N_items_global / std::max(1, D_groups_global) + 10); // Increased +5 to +10 for safety margin
+        for (int j = 2; j < k_pivots_chosen; ++j) {
+            int current_pivot_idx = sorted_pivot_item_indices[j];
+            int prev_pivot_idx = sorted_pivot_item_indices[j-1];
+            char res_curr_vs_prev = compare_single_items(current_pivot_idx, prev_pivot_idx);
+            if (res_curr_vs_prev == '=') {
+                estimated_weights[current_pivot_idx] = estimated_weights[prev_pivot_idx];
+            } else if (res_curr_vs_prev == '<') {
+                estimated_weights[current_pivot_idx] = std::max(1LL, estimated_weights[prev_pivot_idx] * FACTOR_LT_NUM / 100);
+            } else {
+                long long X_low_bound_val = 1;
+                long long X_high_bound_val = MAX_XJ_INITIAL_HIGH_BOUND;
+                bool x_low_modified = false;
+                bool x_high_modified = false;
+                int s_search_low_arr_idx = 0, s_search_high_arr_idx = j - 2;
+                int num_s_candidates = (s_search_high_arr_idx - s_search_low_arr_idx + 1);
+                int queries_for_this_Xj = 0;
+                if (num_s_candidates > 0) {
+                     queries_for_this_Xj = static_cast<int>(std::ceil(estimate_log2(static_cast<double>(num_s_candidates))));
+                     if (num_s_candidates == 1) queries_for_this_Xj = 1;
+                }
+                for(int bs_iter = 0; bs_iter < queries_for_this_Xj && queries_made < Q_total_global; ++bs_iter) {
+                    if (s_search_low_arr_idx > s_search_high_arr_idx) break;
+                    int s_mid_arr_idx = s_search_low_arr_idx + (s_search_high_arr_idx - s_search_low_arr_idx) / 2;
+                    int item_s_aux_idx = sorted_pivot_item_indices[s_mid_arr_idx];
+                    // Skip if s_aux is same as prev_pivot_idx; R items must be distinct for query.
+                    // This should not happen if s_aux is chosen from p0...p_{j-2} and prev_pivot is p_{j-1}.
+                    // if (item_s_aux_idx == prev_pivot_idx) continue; // Should not be necessary
+                    char res_1v2 = compare_1v2_items_specific(current_pivot_idx, prev_pivot_idx, item_s_aux_idx);
+                    if (res_1v2 == '=') {
+                        X_low_bound_val = X_high_bound_val = estimated_weights[item_s_aux_idx];
+                        x_low_modified = x_high_modified = true;
+                        break;
+                    } else if (res_1v2 == '<') {
+                        X_high_bound_val = estimated_weights[item_s_aux_idx];
+                        x_high_modified = true;
+                        s_search_high_arr_idx = s_mid_arr_idx - 1;
+                    } else { // res_1v2 == '>'
+                        X_low_bound_val = estimated_weights[item_s_aux_idx];
+                        x_low_modified = true;
+                        s_search_low_arr_idx = s_mid_arr_idx + 1;
+                    }
+                }
+                long long estimated_X_j;
+                if (x_low_modified && !x_high_modified) { // X_j > X_low_bound_val (max s_aux smaller than X_j)
+                    estimated_X_j = X_low_bound_val * FACTOR_GT_NUM / 100;
+                } else if (!x_low_modified && x_high_modified) { // X_j < X_high_bound_val (min s_aux larger than X_j)
+                    estimated_X_j = X_high_bound_val * FACTOR_LT_NUM / 100;
+                } else if (x_low_modified && x_high_modified) { // X_j is bracketed
+                    // Reverted to ARITHMETIC MEAN for X_j
+                    estimated_X_j = (X_low_bound_val + X_high_bound_val) / 2;
+                } else { // Fallback if binary search didn't narrow down X_j
+                    estimated_X_j = estimated_weights[prev_pivot_idx] * FACTOR_XJ_FALLBACK_NUM / 100;
+                    if (estimated_weights[prev_pivot_idx] > 0 && estimated_X_j == 0) estimated_X_j = 1;
+                    else if (estimated_weights[prev_pivot_idx] == 0) {
+                         estimated_X_j = std::max(1LL, BASE_WEIGHT * FACTOR_XJ_FALLBACK_NUM / 100);
+                    }
+                }
+                estimated_X_j = std::max(1LL, estimated_X_j);
+                estimated_weights[current_pivot_idx] = estimated_weights[prev_pivot_idx] + estimated_X_j;
+            }
+            // Ensure monotonicity and strictness
+            if(estimated_weights[current_pivot_idx] < estimated_weights[prev_pivot_idx]) {
+                 estimated_weights[current_pivot_idx] = estimated_weights[prev_pivot_idx];
+            }
+            if (res_curr_vs_prev == '>' && estimated_weights[current_pivot_idx] == estimated_weights[prev_pivot_idx]) {
+                estimated_weights[current_pivot_idx] = estimated_weights[prev_pivot_idx] + 1;
+            }
+        }
+        // Estimate weights for non-pivot items
+        for (int i=0; i<N_items_global; ++i) {
+            bool is_pivot_flag = false;
+            for(int p_idx_val=0; p_idx_val<k_pivots_chosen; ++p_idx_val) {
+                if(sorted_pivot_item_indices[p_idx_val] == i) {
+                    is_pivot_flag = true;
+                    break;
+                }
+            }
+            if (is_pivot_flag) continue;
+            int bs_low_arr_idx = 0, bs_high_arr_idx = k_pivots_chosen - 1;
+            int found_pivot_idx_for_eq = -1;
+            while(bs_low_arr_idx <= bs_high_arr_idx) {
+                if (queries_made >= Q_total_global && found_pivot_idx_for_eq == -1) break; // Stop if out of queries unless already found exact
+                int mid_p_arr_idx = bs_low_arr_idx + (bs_high_arr_idx - bs_low_arr_idx) / 2;
+                char res_item_vs_p = compare_single_items(i, sorted_pivot_item_indices[mid_p_arr_idx]);
+                if (res_item_vs_p == '=') {
+                    found_pivot_idx_for_eq = mid_p_arr_idx;
+                    break;
+                } else if (res_item_vs_p == '<') {
+                    bs_high_arr_idx = mid_p_arr_idx - 1;
+                } else {
+                    bs_low_arr_idx = mid_p_arr_idx + 1;
+                }
+            }
+            if (found_pivot_idx_for_eq != -1) {
+                estimated_weights[i] = estimated_weights[sorted_pivot_item_indices[found_pivot_idx_for_eq]];
+                continue;
+            }
+            int insert_pos_arr_idx = bs_low_arr_idx;
+            if (insert_pos_arr_idx == 0) { // Smaller than p0
+                long long w_p0 = estimated_weights[sorted_pivot_item_indices[0]];
+                if (k_pivots_chosen >= 2) {
+                    long long w_p1 = estimated_weights[sorted_pivot_item_indices[1]];
+                    // Ensure w_p1 != 0 before division, and w_p0 must be < w_p1 for this extrapolation to make sense
+                    if (w_p1 > w_p0 && w_p0 > 0 && w_p1 != 0) { // w_p1 should not be 0 if weights are >=1
+                         estimated_weights[i] = std::max(1LL, w_p0 * w_p0 / w_p1);
+                    } else {
+                        estimated_weights[i] = std::max(1LL, w_p0 * FACTOR_LT_NUM / 100);
+                    }
+                } else { // Only p0 exists
+                     estimated_weights[i] = std::max(1LL, w_p0 * FACTOR_LT_NUM / 100);
+                }
+            } else if (insert_pos_arr_idx == k_pivots_chosen) { // Larger than p_{k-1}
+                long long w_pk_1 = estimated_weights[sorted_pivot_item_indices[k_pivots_chosen-1]];
+                 if (k_pivots_chosen >= 2) {
+                    long long w_pk_2 = estimated_weights[sorted_pivot_item_indices[k_pivots_chosen-2]];
+                    // Ensure w_pk_2 != 0 and w_pk_2 < w_pk_1
+                    if (w_pk_1 > w_pk_2 && w_pk_2 > 0 && w_pk_2 != 0) { // w_pk_2 should not be 0
+                        estimated_weights[i] = std::max(1LL, w_pk_1 * w_pk_1 / w_pk_2);
+                    } else {
+                        estimated_weights[i] = std::max(1LL, w_pk_1 * FACTOR_GT_NUM / 100);
+                    }
+                 } else { // Only p0 exists (which is p_{k-1} here)
+                     estimated_weights[i] = std::max(1LL, w_pk_1 * FACTOR_GT_NUM / 100);
+                 }
+            } else { // Between p_{idx-1} and p_{idx}
+                long long w_prev_p = estimated_weights[sorted_pivot_item_indices[insert_pos_arr_idx-1]];
+                long long w_next_p = estimated_weights[sorted_pivot_item_indices[insert_pos_arr_idx]];
+                // Geometric mean for interpolation is generally preferred for exponential-like data
+                if (w_prev_p > 0 && w_next_p > 0) {
+                    estimated_weights[i] = static_cast<long long>(std::sqrt(static_cast<double>(w_prev_p) * w_next_p));
+                } else { // Fallback for safety or if one weight is zero (should be >=1)
+                    estimated_weights[i] = (w_prev_p + w_next_p) / 2;
+                }
+                // Ensure estimate is within the bounds of the two pivots it's between
+                estimated_weights[i] = std::max(w_prev_p, estimated_weights[i]);
+                estimated_weights[i] = std::min(w_next_p, estimated_weights[i]);
+            }
+            if (estimated_weights[i] <=0) estimated_weights[i] = 1;
+        }
+    }
+    // Final check: all weights must be at least 1.
+    for(int i=0; i<N_items_global; ++i) {
+        if (estimated_weights[i] <= 0) {
+            // This state indicates a flaw in estimation logic or extreme case.
+            // Fallback to a reasonable default like BASE_WEIGHT or 1.
+            // Previous version used BASE_WEIGHT. Smallest possible is 1.
+            // Using 1 might be safer if other weights are also small.
+            // However, if most are large, BASE_WEIGHT might be better.
+            // Sticking to previous fallback.
+            estimated_weights[i] = BASE_WEIGHT;
+        }
+    }
+    // Exhaust remaining queries
+    int dummy_item_0_idx = 0;
+    int dummy_item_1_idx = 1;
+    // N_items_global >= 30, so 0 and 1 are valid and distinct indices.
+    while(queries_made < Q_total_global) {
+        perform_query_actual({dummy_item_0_idx}, {dummy_item_1_idx});
+        // Cycle one of the items to make queries slightly different, though not critical for correctness
+        dummy_item_1_idx = (dummy_item_1_idx + 1) % N_items_global;
+        if (dummy_item_1_idx == dummy_item_0_idx) { // Ensure distinctness
+            dummy_item_1_idx = (dummy_item_1_idx + 1) % N_items_global;
+        }
+    }
+    // --- Assignment Phase: Greedy followed by Simulated Annealing ---
+    std::vector<int> assignment_array(N_items_global);
+    std::vector<long long> group_sums_array(D_groups_global, 0);
+    long long total_sum_est_val = 0;
+    std::vector<std::vector<int>> group_items_indices(D_groups_global);
+    std::vector<int> item_pos_in_group_vector(N_items_global);
+    std::vector<std::pair<long long, int>> items_sorted_for_greedy(N_items_global);
+    for(int i=0; i<N_items_global; ++i) {
+        items_sorted_for_greedy[i] = {-estimated_weights[i], i};
+    }
+    std::sort(items_sorted_for_greedy.begin(), items_sorted_for_greedy.end());
+    for(int i=0; i<N_items_global; ++i) {
+        int item_actual_idx = items_sorted_for_greedy[i].second;
+        long long item_w = estimated_weights[item_actual_idx];
+        int best_grp_current = 0;
+        if (D_groups_global > 1) {
+            long long min_sum_in_group = group_sums_array[0];
+            // Small optimization: if multiple groups have same min_sum, pick one randomly or by index
+            // Current logic picks smallest index. This is fine.
+            for(int j=1; j<D_groups_global; ++j) {
+                if (group_sums_array[j] < min_sum_in_group) {
+                    min_sum_in_group = group_sums_array[j];
+                    best_grp_current = j;
+                }
+            }
+        }
+        assignment_array[item_actual_idx] = best_grp_current;
+        group_sums_array[best_grp_current] += item_w;
+        group_items_indices[best_grp_current].push_back(item_actual_idx);
+        item_pos_in_group_vector[item_actual_idx] = group_items_indices[best_grp_current].size() - 1;
+        total_sum_est_val += item_w;
+    }
+    double current_sum_sq_group_totals = 0;
+    for(long long s : group_sums_array) {
+        current_sum_sq_group_totals += static_cast<double>(s) * s;
+    }
+    double current_var = calculate_variance_from_sums(current_sum_sq_group_totals, static_cast<double>(total_sum_est_val), D_groups_global);
+    // SA Parameters
+    double T_initial_factor = 0.25;
+    double T = std::max(1.0, current_var * T_initial_factor);
+    if (total_sum_est_val > 0 && current_var < 1e-9 && D_groups_global > 0) {
+        T = std::max(1.0, static_cast<double>(total_sum_est_val) / std::max(1,N_items_global) * 0.1);
+    } else if (total_sum_est_val == 0 && D_groups_global > 0) {
+        T = std::max(1.0, static_cast<double>(BASE_WEIGHT) * N_items_global / D_groups_global * 0.01 );
+    }
+    if (D_groups_global <= 1) T = 0;
+    double cool_rate = 0.9999;
+    int sa_iters_count = 0;
+    std::uniform_real_distribution<double> unif_dist(0.0, 1.0);
+    int no_improvement_streak = 0;
+    const int REHEAT_STREAK_THRESH_FACTOR = N_items_global > 50 ? 10 : 20;
+    const int CHECK_TIME_INTERVAL = 256;
+    while(D_groups_global > 1 && N_items_global > 0) {
+        sa_iters_count++;
+        if (sa_iters_count % CHECK_TIME_INTERVAL == 0) {
+            auto time_now = std::chrono::steady_clock::now();
+            if (std::chrono::duration_cast<std::chrono::milliseconds>(time_now - program_start_time) >= time_limit_ms) {
+                break;
+            }
+            T *= cool_rate;
+            if (no_improvement_streak > N_items_global * REHEAT_STREAK_THRESH_FACTOR && T < current_var * 0.05 && current_var > 1.0 + 1e-9) {
+                 T = std::max(1.0, current_var * T_initial_factor * 0.5);
+                 no_improvement_streak = 0;
+            }
+        }
+        if (T < 1e-12 && current_var > 1e-9) T = 1e-9; // Floor T if var high but T too low
+        if (T < 1e-12 && current_var < (1.0 + 1e-9)) break; // Converged or T too low
+        int move_type_rand_val = rng_engine();
+        // Adjust probability of swap vs relocate: 1/3 swap, 2/3 relocate
+        bool try_swap_move = ( (move_type_rand_val % 3 == 0) );
+        if (!try_swap_move) { // Relocate an item
+            if (N_items_global == 0) continue;
+            int item_to_move_idx = rng_engine() % N_items_global;
+            int old_grp_idx = assignment_array[item_to_move_idx];
+            if (D_groups_global <=1) continue;
+            int new_grp_idx = rng_engine() % D_groups_global;
+            while(new_grp_idx == old_grp_idx) new_grp_idx = rng_engine() % D_groups_global;
+            long long item_w_val = estimated_weights[item_to_move_idx];
+            long long old_sum_grp_A = group_sums_array[old_grp_idx];
+            long long old_sum_grp_B = group_sums_array[new_grp_idx];
+            long long new_sum_grp_A = old_sum_grp_A - item_w_val;
+            long long new_sum_grp_B = old_sum_grp_B + item_w_val;
+            double new_sum_sq_group_totals_cand = current_sum_sq_group_totals;
+            new_sum_sq_group_totals_cand -= static_cast<double>(old_sum_grp_A)*old_sum_grp_A + static_cast<double>(old_sum_grp_B)*old_sum_grp_B;
+            new_sum_sq_group_totals_cand += static_cast<double>(new_sum_grp_A)*new_sum_grp_A + static_cast<double>(new_sum_grp_B)*new_sum_grp_B;
+            double new_var = calculate_variance_from_sums(new_sum_sq_group_totals_cand, static_cast<double>(total_sum_est_val), D_groups_global);
+            double delta_V = new_var - current_var;
+            if (delta_V < 0 || (T > 1e-12 && unif_dist(rng_engine) < std::exp(-delta_V / T)) ) {
+                current_var = new_var;
+                current_sum_sq_group_totals = new_sum_sq_group_totals_cand;
+                group_sums_array[old_grp_idx] = new_sum_grp_A;
+                group_sums_array[new_grp_idx] = new_sum_grp_B;
+                assignment_array[item_to_move_idx] = new_grp_idx;
+                int pos_in_old_vec = item_pos_in_group_vector[item_to_move_idx];
+                if (!group_items_indices[old_grp_idx].empty()) {
+                    int last_item_in_old_grp_vec = group_items_indices[old_grp_idx].back();
+                    if (item_to_move_idx != last_item_in_old_grp_vec) {
+                         group_items_indices[old_grp_idx][pos_in_old_vec] = last_item_in_old_grp_vec;
+                         item_pos_in_group_vector[last_item_in_old_grp_vec] = pos_in_old_vec;
+                    }
+                    group_items_indices[old_grp_idx].pop_back();
+                }
+                group_items_indices[new_grp_idx].push_back(item_to_move_idx);
+                item_pos_in_group_vector[item_to_move_idx] = group_items_indices[new_grp_idx].size() - 1;
+                if (delta_V < -1e-9) no_improvement_streak = 0; else no_improvement_streak++;
+            } else {
+                no_improvement_streak++;
+            }
+        } else { // Try swap move
+            if (D_groups_global <= 1) continue;
+            int grp1_idx = rng_engine() % D_groups_global;
+            int grp2_idx = rng_engine() % D_groups_global;
+            while(grp2_idx == grp1_idx) grp2_idx = rng_engine() % D_groups_global;
+            if(group_items_indices[grp1_idx].empty() || group_items_indices[grp2_idx].empty()) {
+                no_improvement_streak++;
+                continue;
+            }
+            int item1_original_idx = group_items_indices[grp1_idx][rng_engine() % group_items_indices[grp1_idx].size()];
+            int item2_original_idx = group_items_indices[grp2_idx][rng_engine() % group_items_indices[grp2_idx].size()];
+            long long w1 = estimated_weights[item1_original_idx];
+            long long w2 = estimated_weights[item2_original_idx];
+            // If w1 == w2, swap has no effect on sums, so delta_V = 0.
+            // This move is only useful if it helps escape local minimum for other reasons,
+            // or if it's accepted by chance and enables further moves.
+            // If w1 == w2, delta_V will be 0. Acceptance depends on T (always if T>0).
+            // No need to explicitly check for w1==w2.
+            long long old_sum_grp1 = group_sums_array[grp1_idx];
+            long long old_sum_grp2 = group_sums_array[grp2_idx];
+            long long new_sum_grp1 = old_sum_grp1 - w1 + w2;
+            long long new_sum_grp2 = old_sum_grp2 - w2 + w1;
+            double new_sum_sq_group_totals_cand = current_sum_sq_group_totals;
+            new_sum_sq_group_totals_cand -= static_cast<double>(old_sum_grp1)*old_sum_grp1 + static_cast<double>(old_sum_grp2)*old_sum_grp2;
+            new_sum_sq_group_totals_cand += static_cast<double>(new_sum_grp1)*new_sum_grp1 + static_cast<double>(new_sum_grp2)*new_sum_grp2;
+            double new_var = calculate_variance_from_sums(new_sum_sq_group_totals_cand, static_cast<double>(total_sum_est_val), D_groups_global);
+            double delta_V = new_var - current_var;
+            if (delta_V < 0 || (T > 1e-12 && unif_dist(rng_engine) < std::exp(-delta_V / T)) ) {
+                current_var = new_var;
+                current_sum_sq_group_totals = new_sum_sq_group_totals_cand;
+                group_sums_array[grp1_idx] = new_sum_grp1;
+                group_sums_array[grp2_idx] = new_sum_grp2;
+                assignment_array[item1_original_idx] = grp2_idx;
+                assignment_array[item2_original_idx] = grp1_idx;
+                // Update item tracking structures
+                int pos1_in_G1 = item_pos_in_group_vector[item1_original_idx];
+                // group_items_indices[grp1_idx] cannot be empty here as item1 was picked from it.
+                int back1_of_G1 = group_items_indices[grp1_idx].back();
+                if (item1_original_idx != back1_of_G1) {
+                    group_items_indices[grp1_idx][pos1_in_G1] = back1_of_G1;
+                    item_pos_in_group_vector[back1_of_G1] = pos1_in_G1;
+                }
+                group_items_indices[grp1_idx].pop_back();
+                int pos2_in_G2 = item_pos_in_group_vector[item2_original_idx];
+                int back2_of_G2 = group_items_indices[grp2_idx].back();
+                if (item2_original_idx != back2_of_G2) {
+                    group_items_indices[grp2_idx][pos2_in_G2] = back2_of_G2;
+                    item_pos_in_group_vector[back2_of_G2] = pos2_in_G2;
+                }
+                group_items_indices[grp2_idx].pop_back();
+                group_items_indices[grp2_idx].push_back(item1_original_idx);
+                item_pos_in_group_vector[item1_original_idx] = group_items_indices[grp2_idx].size() - 1;
+                group_items_indices[grp1_idx].push_back(item2_original_idx);
+                item_pos_in_group_vector[item2_original_idx] = group_items_indices[grp1_idx].size() - 1;
+                if (delta_V < -1e-9) no_improvement_streak = 0; else no_improvement_streak++;
+            } else {
+                no_improvement_streak++;
+            }
+        }
+    }
+    for (int i = 0; i < N_items_global; ++i) {
+        std::cout << assignment_array[i] << (i == N_items_global - 1 ? "" : " ");
+    }
+    std::cout << std::endl;
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc026/best_program.cpp ADDED Viewed

	@@ -0,0 +1,653 @@

+# EVOLVE-BLOCK-START
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+#include <limits>
+#include <chrono>
+#include <random>
+#include <cmath>
+// #include <queue> // Not strictly needed now for beam search pruning strategy
+#include <utility> // For std::pair, std::move
+#include <span>    // For std::span (C++20)
+// Using 0-indexed internally for stacks and box values for convenience with vectors
+// Box values 0 to N-1, stack indices 0 to M-1.
+// Input is 1-indexed for box values (1 to N) and stack indices (1 to M).
+// Output must be 1-indexed for box values and stack indices.
+// target_stack_idx=0 for carry-out operation.
+// Constants for heuristic evaluation
+const double HEURISTIC_EMPTY_STACK_BONUS_SCORE = 1000.0;
+const double STACK_HEIGHT_PENALTY_FACTOR = 0.1;
+const int HEURISTIC_LOOKAHEAD_WINDOW = 8;
+const double HEURISTIC_COVER_CRITICAL_PENALTY_PER_BOX_ABOVE = 3.0;
+const double HEURISTIC_MIN_LABEL_IN_DEST_FACTOR = 0.06;
+ // Extra heuristic terms (penalize burying small labels)
+const double SMALL_TOP_PENALTY = 45.0;
+const double DEST_SMALL_COUNT_PENALTY = 1.7;
+const double BLOCK_MIN_OVER_SMALL_DEST_PENALTY = 20.0;
+// Beam tie-breaker weights (lower is better): prefer states with fewer covered near-future small labels,
+// slightly penalize total height, and mildly prefer having empty stacks.
+const double BEAM_TIE_COVERED_SMALL_WEIGHT = 0.6;
+const double BEAM_TIE_HEIGHT_WEIGHT = 0.02;
+const double BEAM_TIE_EMPTY_STACK_BONUS = 1.0;
+// SA Parameters
+const double TIME_LIMIT_SECONDS_TOTAL = 1.95;
+double BEAM_SEARCH_TIME_LIMIT_SECONDS_PARAM = 0.30;
+const int BEAM_WIDTH = 5;
+double T_INITIAL_SA = 75.0;
+double T_FINAL_SA = 0.01;
+// --- Random Number Generation ---
+struct RandomGenerator {
+    std::mt19937 rng;
+    RandomGenerator() : rng(std::chrono::steady_clock::now().time_since_epoch().count()) {}
+    int an_int(int min_val, int max_val) {
+        if (min_val > max_val) return min_val;
+        std::uniform_int_distribution<int> dist(min_val, max_val);
+        return dist(rng);
+    }
+    double a_double(double min_val, double max_val) {
+        std::uniform_real_distribution<double> dist(min_val, max_val);
+        return dist(rng);
+    }
+} RGen;
+auto GLOBAL_START_TIME = std::chrono::steady_clock::now();
+// --- State Definition ---
+struct State {
+    std::vector<std::vector<int>> stacks;
+    std::vector<std::pair<int, int>> box_pos; // {stack_idx, height_idx}
+    long long energy_cost;
+    std::vector<std::pair<int, int>> ops_history;
+    int N_val;
+    int M_val;
+    bool record_ops_flag;
+    State() : energy_cost(0), N_val(0), M_val(0), record_ops_flag(true) {}
+    State(int N_in, int M_in, const std::vector<std::vector<int>>& initial_stacks_input, bool rec_ops = true)
+        : energy_cost(0), N_val(N_in), M_val(M_in), record_ops_flag(rec_ops) {
+        stacks.resize(M_val);
+        for (int i = 0; i < M_val; ++i) {
+            stacks[i].reserve(N_val + 20);
+        }
+        box_pos.resize(N_val);
+        if (record_ops_flag) {
+            ops_history.reserve(N_val * 2 + 50);
+        }
+        for (int i = 0; i < M_val; ++i) {
+            for (size_t j = 0; j < initial_stacks_input[i].size(); ++j) {
+                int box_id = initial_stacks_input[i][j] - 1; // 0-indexed
+                stacks[i].push_back(box_id);
+                box_pos[box_id] = {i, (int)j};
+            }
+        }
+    }
+    State(const State& other) = default;
+    State& operator=(const State& other) = default;
+    State(State&& other) noexcept = default;
+    State& operator=(State&& other) noexcept = default;
+    double evaluate_destination_stack_choice(
+        int current_target_box_val, // 0-indexed
+        std::span<const int> block_to_move, // 0-indexed box values
+        int dest_stack_idx) const { // 0-indexed
+        /**
+         Heuristic evaluation for choosing destination stack for the obstructing block.
+         Lower score is better. Terms:
+         - strong preference to use empty stacks
+         - mild penalty by destination height
+         - prefer stacks whose minimum label is large
+         - penalize covering near-future small labels in destination and inside moved block
+         - avoid burying very small labels (dest top in small window or dest min < block min)
+        */
+        const auto& dest_stack_content = stacks[dest_stack_idx];
+        double current_score = 0;
+        const int small_thr = current_target_box_val + HEURISTIC_LOOKAHEAD_WINDOW;
+        if (dest_stack_content.empty()) {
+            current_score -= HEURISTIC_EMPTY_STACK_BONUS_SCORE;
+        } else {
+            current_score += (double)dest_stack_content.size() * STACK_HEIGHT_PENALTY_FACTOR;
+            int min_label_in_dest_stack = N_val;
+            int dest_small_count = 0;
+            for (int box_val_in_dest : dest_stack_content) {
+                min_label_in_dest_stack = std::min(min_label_in_dest_stack, box_val_in_dest);
+                if (box_val_in_dest > current_target_box_val && box_val_in_dest <= small_thr) {
+                    ++dest_small_count;
+                }
+            }
+            current_score -= (double)min_label_in_dest_stack * HEURISTIC_MIN_LABEL_IN_DEST_FACTOR;
+            // Penalize if the destination top is a small value (will be buried)
+            int dest_top = dest_stack_content.back();
+            if (dest_top <= small_thr) {
+                current_score += SMALL_TOP_PENALTY * (int)block_to_move.size();
+            }
+            // Penalize by how many near-future small labels exist in destination
+            current_score += DEST_SMALL_COUNT_PENALTY * dest_small_count * (int)block_to_move.size();
+            // If the destination contains very small labels (min <= block_min), avoid burying them
+            if (!block_to_move.empty()) {
+                int block_min = N_val;
+                for (int v : block_to_move) block_min = std::min(block_min, v);
+                if (min_label_in_dest_stack <= block_min) {
+                    current_score += BLOCK_MIN_OVER_SMALL_DEST_PENALTY * (int)block_to_move.size();
+                }
+            }
+        }
+        // Penalize for near-future small labels inside the moved block that would get buried within the block
+        for (size_t i = 0; i < block_to_move.size(); ++i) {
+            int box_in_block = block_to_move[i];
+            if (box_in_block > current_target_box_val && box_in_block <= small_thr) {
+                int boxes_on_top_in_block = (int)block_to_move.size() - 1 - (int)i;
+                current_score += HEURISTIC_COVER_CRITICAL_PENALTY_PER_BOX_ABOVE * boxes_on_top_in_block;
+            }
+        }
+        return current_score;
+    }
+    void apply_op1_move(int first_box_in_block_val, int num_moved_boxes, int dest_stack_idx) { // All 0-indexed
+        int src_stack_idx = box_pos[first_box_in_block_val].first;
+        int first_box_height_idx_in_src = box_pos[first_box_in_block_val].second;
+        auto& src_stack_vec = stacks[src_stack_idx];
+        auto& dest_stack_vec = stacks[dest_stack_idx];
+        auto P_k_start_iter = src_stack_vec.begin() + first_box_height_idx_in_src;
+        auto P_k_end_iter = src_stack_vec.begin() + first_box_height_idx_in_src + num_moved_boxes;
+        int old_dest_stack_height = dest_stack_vec.size();
+        dest_stack_vec.insert(dest_stack_vec.end(),
+                              std::make_move_iterator(P_k_start_iter),
+                              std::make_move_iterator(P_k_end_iter));
+        for (int i = 0; i < num_moved_boxes; ++i) {
+            int moved_box_val = dest_stack_vec[old_dest_stack_height + i];
+            box_pos[moved_box_val] = {dest_stack_idx, old_dest_stack_height + i};
+        }
+        src_stack_vec.erase(P_k_start_iter, P_k_end_iter);
+        energy_cost += (num_moved_boxes + 1);
+        if (record_ops_flag) {
+            ops_history.push_back({first_box_in_block_val + 1, dest_stack_idx + 1});
+        }
+    }
+    void apply_op2_carry_out(int target_box_val) { // 0-indexed
+        int stack_idx = box_pos[target_box_val].first;
+        stacks[stack_idx].pop_back();
+        if (record_ops_flag) {
+            ops_history.push_back({target_box_val + 1, 0});
+        }
+    }
+};
+struct BeamNode {
+    State current_board_state;
+    std::vector<int> partial_plan_D;
+    double tie_score;
+    BeamNode() : tie_score(0.0) {}
+    BeamNode(State state, std::vector<int> plan)
+        : current_board_state(std::move(state)), partial_plan_D(std::move(plan)), tie_score(0.0) {}
+    // Order by energy first; if tied, use a lightweight heuristic tie-breaker computed per node.
+    bool operator<(const BeamNode& other) const {
+        if (current_board_state.energy_cost != other.current_board_state.energy_cost)
+            return current_board_state.energy_cost < other.current_board_state.energy_cost;
+        return tie_score < other.tie_score;
+    }
+};
+// Compute a tie-breaker score for beam nodes (lower is better).
+// Approach: look ahead to the next target (k = plan length). Sum how many boxes cover
+// near-future small labels (k < v <= k + HEURISTIC_LOOKAHEAD_WINDOW), weighted by their depth.
+// Add a mild total-height penalty, and subtract a small bonus per empty stack.
+static inline double compute_beam_tie_score(const State& S, int next_target_k) {
+    const int small_thr = std::min(S.N_val - 1, next_target_k + HEURISTIC_LOOKAHEAD_WINDOW);
+    long long covered_sum = 0;
+    long long total_height = 0;
+    int empty_count = 0;
+    for (const auto& st : S.stacks) {
+        const int h = (int)st.size();
+        total_height += h;
+        if (h == 0) { ++empty_count; continue; }
+        for (int idx = 0; idx < h; ++idx) {
+            int v = st[idx];
+            if (v > next_target_k && v <= small_thr) {
+                covered_sum += (h - 1 - idx); // boxes above this near-future small label
+            }
+        }
+    }
+    return covered_sum * BEAM_TIE_COVERED_SMALL_WEIGHT
+         + total_height * BEAM_TIE_HEIGHT_WEIGHT
+         - empty_count * BEAM_TIE_EMPTY_STACK_BONUS;
+}
+std::vector<int> generate_initial_plan_beam_search(
+    const std::vector<std::vector<int>>& initial_stacks_param,
+    int N_CONST, int M_CONST, int beam_width_param, double max_duration_for_beam_search) {
+    /**
+     Build an initial destination plan by beam search over k=0..N-1.
+     Each step carries if already on top or moves the obstructing block once to each destination,
+     keeping the best few states (by energy so far). If time runs out, greedily complete the head node.
+    */
+    std::vector<BeamNode> beam;
+    beam.reserve(beam_width_param);
+    State initial_state_for_beam(N_CONST, M_CONST, initial_stacks_param, false);
+    beam.emplace_back(std::move(initial_state_for_beam), std::vector<int>());
+    if (N_CONST > 0) beam.back().partial_plan_D.reserve(N_CONST);
+    // Initialize tie-score for the root node (next target k = 0)
+    beam.back().tie_score = compute_beam_tie_score(beam.back().current_board_state, 0);
+    std::vector<BeamNode> candidates;
+    if (M_CONST > 0) candidates.reserve(beam_width_param * M_CONST + 5);
+    else candidates.reserve(beam_width_param + 5);
+    for (int k_target_box = 0; k_target_box < N_CONST; ++k_target_box) {
+        double elapsed_seconds_so_far = std::chrono::duration<double>(std::chrono::steady_clock::now() - GLOBAL_START_TIME).count();
+        bool time_is_up = elapsed_seconds_so_far > max_duration_for_beam_search;
+        if (time_is_up) {
+            if (beam.empty()) {
+                 std::vector<int> emergency_plan(N_CONST);
+                 if (N_CONST == 0) return emergency_plan;
+                 for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+                 return emergency_plan;
+            }
+            std::sort(beam.begin(), beam.end());
+            BeamNode& best_node_so_far = beam[0];
+            for (int k_future = k_target_box; k_future < N_CONST; ++k_future) {
+                State& S_greedy = best_node_so_far.current_board_state;
+                int f_target_val = k_future;
+                int f_src_idx = S_greedy.box_pos[f_target_val].first;
+                int f_h_idx = S_greedy.box_pos[f_target_val].second;
+                int f_num_top = S_greedy.stacks[f_src_idx].size() - 1 - f_h_idx;
+                if (f_num_top == 0) {
+                    best_node_so_far.partial_plan_D.push_back(f_src_idx);
+                } else {
+                    int f_block_first_val = S_greedy.stacks[f_src_idx][f_h_idx + 1];
+                    std::span<const int> block_span_greedy;
+                    if (f_num_top > 0) {
+                        block_span_greedy = std::span<const int>(S_greedy.stacks[f_src_idx].data() + f_h_idx + 1, f_num_top);
+                    }
+                    double min_h_eval_score = std::numeric_limits<double>::max();
+                    int best_d_greedy = (M_CONST > 1) ? (f_src_idx + 1) % M_CONST : 0;
+                    for (int d_cand = 0; d_cand < M_CONST; ++d_cand) {
+                        if (d_cand == f_src_idx) continue;
+                        double h_eval_score = S_greedy.evaluate_destination_stack_choice(k_future, block_span_greedy, d_cand);
+                        if (h_eval_score < min_h_eval_score) {
+                            min_h_eval_score = h_eval_score;
+                            best_d_greedy = d_cand;
+                        }
+                    }
+                    best_node_so_far.partial_plan_D.push_back(best_d_greedy);
+                    S_greedy.apply_op1_move(f_block_first_val, f_num_top, best_d_greedy);
+                }
+                S_greedy.apply_op2_carry_out(f_target_val);
+            }
+            return best_node_so_far.partial_plan_D;
+        }
+        candidates.clear();
+        for (auto& current_beam_node : beam) {
+            State& S_curr = current_beam_node.current_board_state;
+            int target_val = k_target_box;
+            int src_idx = S_curr.box_pos[target_val].first;
+            int h_idx = S_curr.box_pos[target_val].second;
+            int num_top = S_curr.stacks[src_idx].size() - 1 - h_idx;
+            if (num_top == 0) {
+                State next_S = S_curr;
+                std::vector<int> next_plan = current_beam_node.partial_plan_D;
+                next_plan.push_back(src_idx);
+                next_S.apply_op2_carry_out(target_val);
+                candidates.emplace_back(std::move(next_S), std::move(next_plan));
+            } else {
+                int block_first_val = S_curr.stacks[src_idx][h_idx + 1];
+                std::span<const int> block_span_bs;
+                if (num_top > 0) {
+                   block_span_bs = std::span<const int>(S_curr.stacks[src_idx].data() + h_idx + 1, num_top);
+                }
+                for (int dest_cand = 0; dest_cand < M_CONST; ++dest_cand) {
+                    if (dest_cand == src_idx) continue;
+                    State next_S = S_curr;
+                    std::vector<int> next_plan = current_beam_node.partial_plan_D;
+                    next_plan.push_back(dest_cand);
+                    next_S.apply_op1_move(block_first_val, num_top, dest_cand);
+                    next_S.apply_op2_carry_out(target_val);
+                    BeamNode cand_node(std::move(next_S), std::move(next_plan));
+                    cand_node.tie_score = compute_beam_tie_score(cand_node.current_board_state, (int)cand_node.partial_plan_D.size());
+                    candidates.emplace_back(std::move(cand_node));
+                }
+            }
+        }
+        if (candidates.empty()) {
+            std::vector<int> emergency_plan(N_CONST);
+            if (N_CONST == 0) return emergency_plan;
+            if (beam.empty() && N_CONST > 0) {
+                 for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+                 return emergency_plan;
+            }
+            // If candidates is empty but beam was not (e.g. M=1 case for Op1 where no valid dest_cand)
+            // For M=10, this shouldn't happen unless beam_width is too small or other issues.
+            // Fallback to random plan completion from best current beam node.
+            // This is tricky, for now, if candidates is empty, signal failure for this path.
+            // The outer logic will pick best from beam if one exists or ultimately generate full random.
+            // If `beam` was non-empty but all paths led to no candidates, return best plan so far or random.
+            // The current fallback is just random full plan.
+            for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+            return emergency_plan;
+        }
+        std::sort(candidates.begin(), candidates.end());
+        beam.clear();
+        for (size_t i = 0; i < std::min((size_t)beam_width_param, candidates.size()); ++i) {
+            beam.push_back(std::move(candidates[i]));
+        }
+        if (beam.empty() && N_CONST > 0) {
+            std::vector<int> emergency_plan(N_CONST);
+            for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+            return emergency_plan;
+        }
+    }
+    if (beam.empty()){
+        std::vector<int> emergency_plan(N_CONST);
+        if (N_CONST == 0) return emergency_plan;
+        for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+        return emergency_plan;
+    }
+    std::sort(beam.begin(), beam.end());
+    return beam[0].partial_plan_D;
+}
+struct SimulationResult {
+    long long energy_cost;
+    std::vector<std::pair<int, int>> ops_history;
+};
+std::pair<State, long long> simulate_up_to_k(
+    const std::vector<std::vector<int>>& initial_stacks_param,
+    const std::vector<int>& plan_D,
+    int N_CONST, int M_CONST,
+    int k_limit_box_idx) {
+    State current_sim_state(N_CONST, M_CONST, initial_stacks_param, false);
+    for (int k_target_box = 0; k_target_box < k_limit_box_idx; ++k_target_box) {
+        int target_box_val = k_target_box;
+        int src_stack_idx = current_sim_state.box_pos[target_box_val].first;
+        int height_idx = current_sim_state.box_pos[target_box_val].second;
+        int num_boxes_on_top = current_sim_state.stacks[src_stack_idx].size() - 1 - height_idx;
+        if (num_boxes_on_top > 0) {
+            int op1_first_box_in_block_val = current_sim_state.stacks[src_stack_idx][height_idx + 1];
+            int actual_dest_stack_idx = plan_D[k_target_box];
+            if (actual_dest_stack_idx == src_stack_idx && M_CONST > 1) {
+                actual_dest_stack_idx = (src_stack_idx + 1) % M_CONST;
+            }
+            if (M_CONST > 1) {
+                 current_sim_state.apply_op1_move(op1_first_box_in_block_val, num_boxes_on_top, actual_dest_stack_idx);
+            }
+        }
+        current_sim_state.apply_op2_carry_out(target_box_val);
+    }
+    long long final_energy = current_sim_state.energy_cost; // Store before move
+    return {std::move(current_sim_state), final_energy};
+}
+SimulationResult run_simulation_from_intermediate_state(
+    State intermediate_state,
+    const std::vector<int>& plan_D,
+    int k_start_box_idx,
+    int N_CONST, int M_CONST,
+    bool record_ops_for_suffix) {
+    State current_sim_state = std::move(intermediate_state);
+    if (record_ops_for_suffix) {
+        current_sim_state.ops_history.clear();
+        current_sim_state.record_ops_flag = true;
+    } else {
+        current_sim_state.record_ops_flag = false;
+    }
+    for (int k_target_box = k_start_box_idx; k_target_box < N_CONST; ++k_target_box) {
+        int target_box_val = k_target_box;
+        int src_stack_idx = current_sim_state.box_pos[target_box_val].first;
+        int height_idx = current_sim_state.box_pos[target_box_val].second;
+        int num_boxes_on_top = current_sim_state.stacks[src_stack_idx].size() - 1 - height_idx;
+        if (num_boxes_on_top > 0) {
+            int op1_first_box_in_block_val = current_sim_state.stacks[src_stack_idx][height_idx + 1];
+            int actual_dest_stack_idx = plan_D[k_target_box];
+            if (actual_dest_stack_idx == src_stack_idx && M_CONST > 1) {
+                actual_dest_stack_idx = (src_stack_idx + 1) % M_CONST;
+            }
+            if (M_CONST > 1) {
+                 current_sim_state.apply_op1_move(op1_first_box_in_block_val, num_boxes_on_top, actual_dest_stack_idx);
+            }
+        }
+        current_sim_state.apply_op2_carry_out(target_box_val);
+    }
+    return {current_sim_state.energy_cost, std::move(current_sim_state.ops_history)};
+}
+SimulationResult run_simulation(const std::vector<std::vector<int>>& initial_stacks_param,
+                                const std::vector<int>& plan_D, int N_CONST, int M_CONST, bool record_all_ops = true) {
+    /**
+     Convenience wrapper to simulate from the initial configuration.
+    */
+    State initial_state_for_full_sim(N_CONST, M_CONST, initial_stacks_param, record_all_ops);
+    return run_simulation_from_intermediate_state(std::move(initial_state_for_full_sim), plan_D, 0, N_CONST, M_CONST, record_all_ops);
+}
+int main() {
+    /**
+     Driver:
+     - read instance
+     - beam search for seed plan
+     - simulate and refine by SA with partial re-evaluation
+     - output operations
+    */
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    GLOBAL_START_TIME = std::chrono::steady_clock::now();
+    int N_CONST, M_CONST;
+    std::cin >> N_CONST >> M_CONST;
+    std::vector<std::vector<int>> initial_stacks_main(M_CONST, std::vector<int>(N_CONST / M_CONST));
+    for (int i = 0; i < M_CONST; ++i) {
+        for (int j = 0; j < N_CONST / M_CONST; ++j) {
+            std::cin >> initial_stacks_main[i][j];
+        }
+    }
+    double beam_search_duration_budget = TIME_LIMIT_SECONDS_TOTAL * BEAM_SEARCH_TIME_LIMIT_SECONDS_PARAM;
+    std::vector<int> current_plan_D = generate_initial_plan_beam_search(
+        initial_stacks_main,
+        N_CONST,
+        M_CONST,
+        BEAM_WIDTH,
+        beam_search_duration_budget
+    );
+    if (current_plan_D.size() < (size_t)N_CONST && N_CONST > 0) {
+        current_plan_D.resize(N_CONST, 0);
+    }
+     if (N_CONST == 0) {
+        current_plan_D.clear();
+    }
+    SimulationResult current_sim_res_eval = run_simulation(initial_stacks_main, current_plan_D, N_CONST, M_CONST, false);
+    long long current_energy = current_sim_res_eval.energy_cost;
+    std::vector<int> best_plan_D = current_plan_D;
+    long long best_energy = current_energy;
+    const int MAX_BLOCK_CHANGE_LEN = (N_CONST == 0) ? 0 : std::max(1, N_CONST / 15);
+    double time_for_sa_start_offset = std::chrono::duration<double>(std::chrono::steady_clock::now() - GLOBAL_START_TIME).count();
+    while (true) {
+        double elapsed_seconds_total = std::chrono::duration<double>(std::chrono::steady_clock::now() - GLOBAL_START_TIME).count();
+        if (elapsed_seconds_total >= TIME_LIMIT_SECONDS_TOTAL - 0.02) break;
+        double elapsed_seconds_sa_phase = elapsed_seconds_total - time_for_sa_start_offset;
+        double total_time_for_sa_phase = (TIME_LIMIT_SECONDS_TOTAL - 0.02) - time_for_sa_start_offset;
+        if (total_time_for_sa_phase <= 0.001) break;
+        double progress_ratio = std::min(1.0, std::max(0.0, elapsed_seconds_sa_phase / total_time_for_sa_phase));
+        double current_temp = T_INITIAL_SA * std::pow(T_FINAL_SA / T_INITIAL_SA, progress_ratio);
+        current_temp = std::max(current_temp, T_FINAL_SA);
+        std::vector<int> new_plan_D = current_plan_D;
+        long long new_energy;
+        int k_change_start_idx = N_CONST;
+        State state_at_change_point;
+        bool can_use_partial_simulation = false;
+        double op_choice_rand = RGen.a_double(0.0, 1.0);
+        if (op_choice_rand < 0.30 && N_CONST > 0) {
+            // Single-point random reassignment
+            int idx_to_change = RGen.an_int(0, N_CONST - 1);
+            k_change_start_idx = idx_to_change;
+            new_plan_D[idx_to_change] = RGen.an_int(0, M_CONST - 1);
+            auto sim_pair = simulate_up_to_k(initial_stacks_main, new_plan_D, N_CONST, M_CONST, k_change_start_idx);
+            state_at_change_point = std::move(sim_pair.first);
+            can_use_partial_simulation = true;
+        } else if (op_choice_rand < 0.70 && N_CONST > 0) {
+            // Random block reassignment
+            int block_op_start_idx_rand = RGen.an_int(0, N_CONST - 1);
+            int len = RGen.an_int(1, MAX_BLOCK_CHANGE_LEN);
+            k_change_start_idx = N_CONST;
+            for(int i=0; i<len; ++i) {
+                int current_k_in_plan_rand = (block_op_start_idx_rand + i) % N_CONST;
+                k_change_start_idx = std::min(k_change_start_idx, current_k_in_plan_rand);
+                new_plan_D[current_k_in_plan_rand] = RGen.an_int(0, M_CONST - 1);
+            }
+            auto sim_pair = simulate_up_to_k(initial_stacks_main, new_plan_D, N_CONST, M_CONST, k_change_start_idx);
+            state_at_change_point = std::move(sim_pair.first);
+            can_use_partial_simulation = true;
+        } else if (op_choice_rand < 0.85 && N_CONST > 1) {
+            // Swap two indices in the plan to diversify neighborhood
+            int a = RGen.an_int(0, N_CONST - 1);
+            int b = RGen.an_int(0, N_CONST - 1);
+            if (a == b) b = (b + 1) % N_CONST;
+            if (a > b) std::swap(a, b);
+            k_change_start_idx = a;
+            std::swap(new_plan_D[a], new_plan_D[b]);
+            auto sim_pair = simulate_up_to_k(initial_stacks_main, new_plan_D, N_CONST, M_CONST, k_change_start_idx);
+            state_at_change_point = std::move(sim_pair.first);
+            can_use_partial_simulation = true;
+        } else if (N_CONST > 0) {
+            // Greedy recompute of a single decision using the heuristic
+            int k_to_recompute = RGen.an_int(0, N_CONST - 1);
+            k_change_start_idx = k_to_recompute;
+            auto sim_pair_greedy = simulate_up_to_k(initial_stacks_main, current_plan_D, N_CONST, M_CONST, k_change_start_idx);
+            State decision_state = std::move(sim_pair_greedy.first);
+            int target_op_val = k_to_recompute;
+            int src_op_idx = decision_state.box_pos[target_op_val].first;
+            int height_op_idx = decision_state.box_pos[target_op_val].second;
+            int num_top_op = decision_state.stacks[src_op_idx].size() - 1 - height_op_idx;
+            if (num_top_op > 0 && M_CONST > 1) {
+                std::span<const int> block_span_sa;
+                if (num_top_op > 0) {
+                     block_span_sa = std::span<const int>(decision_state.stacks[src_op_idx].data() + height_op_idx + 1, num_top_op);
+                }
+                double min_h_score = std::numeric_limits<double>::max();
+                int best_dest_idx = (M_CONST > 1) ? (src_op_idx + 1) % M_CONST : 0;
+                for (int dest_cand = 0; dest_cand < M_CONST; ++dest_cand) {
+                    if (dest_cand == src_op_idx) continue;
+                    double h_score = decision_state.evaluate_destination_stack_choice(target_op_val, block_span_sa, dest_cand);
+                    if (h_score < min_h_score) {
+                        min_h_score = h_score;
+                        best_dest_idx = dest_cand;
+                    }
+                }
+                new_plan_D[k_to_recompute] = best_dest_idx;
+            } else {
+                new_plan_D[k_to_recompute] = src_op_idx;
+            }
+            state_at_change_point = std::move(decision_state);
+            can_use_partial_simulation = true;
+        } else {
+             k_change_start_idx = 0;
+             can_use_partial_simulation = false;
+        }
+        if (N_CONST == 0) {
+            new_energy = 0;
+        } else if (!can_use_partial_simulation || k_change_start_idx == 0) {
+            // If k_change_start_idx is 0, state_at_change_point is initial state with 0 energy.
+            // Full simulation is equivalent and perhaps cleaner.
+             new_energy = run_simulation(initial_stacks_main, new_plan_D, N_CONST, M_CONST, false).energy_cost;
+        } else {
+             SimulationResult suffix_res = run_simulation_from_intermediate_state(std::move(state_at_change_point), new_plan_D, k_change_start_idx, N_CONST, M_CONST, false);
+             new_energy = suffix_res.energy_cost;
+        }
+        if (new_energy < current_energy) {
+            current_energy = new_energy;
+            current_plan_D = new_plan_D;
+            if (new_energy < best_energy) {
+                best_energy = new_energy;
+                best_plan_D = new_plan_D;
+            }
+        } else {
+            double delta_energy = new_energy - current_energy;
+            if (current_temp > 1e-9 && RGen.a_double(0.0, 1.0) < std::exp(-delta_energy / current_temp)) {
+                current_energy = new_energy;
+                current_plan_D = new_plan_D;
+            }
+        }
+    }
+    SimulationResult final_sim_result = run_simulation(initial_stacks_main, best_plan_D, N_CONST, M_CONST, true);
+    for (const auto& op : final_sim_result.ops_history) {
+        std::cout << op.first << " " << op.second << "\n";
+    }
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc026/config.yaml ADDED Viewed

	@@ -0,0 +1,69 @@

+# ALE-Bench ahc026 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n\nStory\n--------\n\
+    AtCoder has $n$ cardboard boxes in a warehouse, which are divided into $m$ vertical stacks.\nEach box is labeled with\
+    \ a unique number from $1,\\cdots,n$, and CEO Takahashi wants to carry them out of the warehouse one by one in ascending\
+    \ order of their numbers.\nIn order to carry out a box, he needs to move all the boxes on top of it to another stack.\n\
+    As Takahashi is a very strong man, he can lift and move as many boxes in a stack at a time, but he expends energy depending\
+    \ on the number of boxes he lifts.\nPlease find a way to carry the boxes out that expends as little energy as possible.\n\
+    \nProblem Statement\n--------\nThere are $n$ cardboard boxes, each labeled with a unique number from $1,\\cdots,n$, divided\
+    \ into $m$ stacks.\nWe refer to the box labeled with the number $v(1\\leq v\\leq n)$ as \"box $v$\" and the $i(1\\leq\
+    \ i\\leq m)$-th stack from the left as \"stack $i$\".\nThe number of stacks $m$ is a divisor of $n$, and each stack $i$\
+    \ contains $n/m$ boxes, with numbers $b_{i,1},b_{i,2},\\cdots,b_{i,n/m}$ from bottom to top.\n\nYou can repeat the following\
+    \ two types of operations up to $5000$ times.\n\n1. Choose one box $v (1\\leq v\\leq n)$ that has not yet been carried\
+    \ out. Remove box $v$ and all boxes above it from the current stack and move them to the top of another stack $i(1\\leq\
+    \ i\\leq m)$ in the same order. Assume that in the stack $i'$ to which box $v$ belongs, the boxes are numbered $b_{i',1},\
+    \ \\cdots, b_{i',h'}$ from bottom to top, with $b_{i',j} = v$. Also, assume that the boxes in the destination stack $i$\
+    \ are numbered $b_{i,1}, \\cdots, b_{i,h}$ from bottom to top. After this operation, stack $i'$ will become $b_{i',1},\
+    \ \\cdots, b_{i',j-1}$, and stack $i$ will become $b_{i,1}, \\cdots, b_{i,h}, b_{i',j}, \\cdots, b_{i',h'}$. Let the number\
+    \ of boxes moved by this operation be $k = h' - j + 1$. Then, $k+1$ units of energy will be expended. If $i=i'$, this\
+    \ operation changes nothing and just wastes energy.\n2. If the smallest number among all the remaining boxes is $v$, and\
+    \ box $v$ is at the top of a stack, then box $v$ can be carried out. This operation does not expend energy.\n\nOperation\
+    \ 1 cannot create a new stack $i>m$, but it can move boxes into an empty stack $i(1\\leq i\\leq m)$ after all boxes have\
+    \ been carried out from it by operation 2.\n\nPlease find a sequence of operations that carries out all the boxes with\
+    \ as little total energy expenditure as possible.\n\nScoring\n--------\nIf all the boxes are carried out with less or\
+    \ equal to $5000$ operations, and the total energy expenditure is $V$, you will obtain a score of $\\max(1, 10000-V)$.\n\
+    If you failed to carry out all the boxes, or if you output an illegal operation sequence (specifying out-of-range values\
+    \ or a box that has already been carried out, or specifying a box that does not satisfy the condition in operation 2),\
+    \ it is judged as <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Wrong Answer\"\
+    >WA</span>.\n\nThere are $150$ test cases, and the score of a submission is the total score for each test case.\nIf your\
+    \ submission produces an illegal output or exceeds the time limit for some test cases, the submission itself will be judged\
+    \ as <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Wrong Answer\">WA</span> or\
+    \ <span class='label label-warning' data-toggle='tooltip' data-placement='top' title=\"Time Limit Exceeded\">TLE</span>\
+    \ , and the score of the submission will be zero.\nThe highest score obtained during the contest will determine the final\
+    \ ranking, and there will be no system test after the contest.\nIf more than one participant gets the same score, they\
+    \ will be ranked in the same place regardless of the submission time.\n\n\n\nInput\n--------\nInput is given from Standard\
+    \ Input in the following format:\n~~~\n$n$ $m$\n$b_{1,1}$ $\\cdots$ $b_{1,n/m}$\n$\\vdots$\n$b_{m,1}$ $\\cdots$ $b_{m,n/m}$\n\
+    ~~~\n\nThe number of boxes $n$ and the number of stacks $m$ are fixed at $n=200$ and <font color='red'>$m=10$</font> in\
+    \ all the test cases.\nThe number $b_{i,j}$ represents the number of the $j$-th box from the bottom of stack $i$, and\
+    \ satisfies $1\\leq b_{i,j}\\leq n$.\n\nOutput\n--------\nLet the $k$-th operation be represented by the two integers\
+    \ $(v_k,i_k)$ as follows.\n\n1. If you use operation 1 to move box $v(1\\leq v\\leq n)$ and all the boxes stacked on it\
+    \ to another stack $i(1\\leq i\\leq m)$, then $(v_k,i_k)=(v,i)$.\n2. If you use operation 2 to carry out box $v$, $(v_k,i_k)=(v,0)$.\n\
+    \nOutput the obtained sequence of operations $(v_1,i_1),\\cdots,(v_K,i_K)$ ($K\\leq 5000$) to Standard Output in the following\
+    \ format:\n~~~\n$v_1$ $i_1$\n$\\vdots$\n$v_K$ $i_K$\n~~~\n\n<a href=\"https://img.atcoder.jp/ahc026/lPQezTZx.html?lang=en&seed=0&output=sample\"\
+    >Show example</a>\n\n\nInput Generation\n--------\nThe box numbers are generated by randomly shuffling the integers from\
+    \ $1$ to $n$ and then dividing them into groups of $n/m$ each.\n\nTools (Input generator and visualizer)\n--------\n-\
+    \ <a href=\"https://img.atcoder.jp/ahc026/lPQezTZx.html?lang=en\">Web version</a>: This is more powerful than the local\
+    \ version providing animations.\n- <a href=\"https://img.atcoder.jp/ahc026/lPQezTZx.zip\">Local version</a>: You need\
+    \ a compilation environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n  - <a href=\"https://img.atcoder.jp/ahc026/lPQezTZx_windows.zip\"\
+    >Pre-compiled binary for Windows</a>: If you are not familiar with the Rust language environment, please use this instead.\n\
+    \nPlease be aware that sharing visualization results or discussing solutions/ideas during the contest is prohibited.\n\
+    \n\n  Problem constraints:\n  time_limit=2.0 memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc026/evaluator.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import traceback
+from pathlib import Path
+from ale_bench.result import CaseResult, JudgeResult, Result
+from ale_bench_eval.safe_ale_session import start_ale_bench_session
+import logging
+import sys
+logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
+def result_feedback(result: Result) -> CaseResult:
+    if result.overall_judge_result == JudgeResult.ACCEPTED:
+        return result.case_results[0]
+    else:
+        selected_case_idx = 0
+        for idx, case_result in enumerate(result.case_results):
+            if case_result.judge_result == result.overall_judge_result:
+                selected_case_idx = idx
+                break
+        return result.case_results[selected_case_idx]
+def evaluate(program_path):
+    problem_id = "ahc026"
+    logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
+    try:
+        session = None
+        logger.info("Starting ALE-Bench session")
+        session = start_ale_bench_session(
+            problem_id=problem_id,
+            lite_version=True,
+            num_workers=13,
+        )
+        logger.info("ALE-Bench session started")
+        if not session:
+            raise RuntimeError("Failed to start or restart the session.")
+        optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
+        code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
+        logger.info("Code extracted")
+        num_public_cases = 50
+        cases = session.case_gen(list(range(num_public_cases)))
+        public_result = session.case_eval(
+            cases, code, code_language="cpp20", skip_local_visualization=True
+        )
+        logger.info("Public evaluation completed")
+        extracted_case = result_feedback(public_result)
+        logger.info("Result feedback completed")
+        logger.info("ALE-Bench session closed")
+        combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
+        if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
+            combined_score = -sys.maxsize - 1
+        session.close()
+        return {
+            "judge_result": public_result.overall_judge_result.value,
+            "overall_score": public_result.overall_absolute_score,
+            "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
+            "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
+            "standard_error": extracted_case.error_str,
+            "message": extracted_case.message,
+            "combined_score": combined_score,
+        }
+    except Exception as e:
+        logger.error(f"Evaluation failed completely: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "overall_score": 0.0,
+            "error": str(e),
+        }

benchmarks/ale_bench/ale-bench-lite-problems/ahc026/initial_program.cpp ADDED Viewed

	@@ -0,0 +1,563 @@

+# EVOLVE-BLOCK-START
+#include <iostream>
+#include <vector>
+#include <string>
+#include <algorithm>
+#include <limits>
+#include <chrono>
+#include <random>
+#include <cmath>
+// #include <queue> // Not strictly needed now for beam search pruning strategy
+#include <utility> // For std::pair, std::move
+#include <span>    // For std::span (C++20)
+// Using 0-indexed internally for stacks and box values for convenience with vectors
+// Box values 0 to N-1, stack indices 0 to M-1.
+// Input is 1-indexed for box values (1 to N) and stack indices (1 to M).
+// Output must be 1-indexed for box values and stack indices.
+// target_stack_idx=0 for carry-out operation.
+// Constants for heuristic evaluation
+const double HEURISTIC_EMPTY_STACK_BONUS_SCORE = 1000.0;
+const double STACK_HEIGHT_PENALTY_FACTOR = 0.1;
+const int HEURISTIC_LOOKAHEAD_WINDOW = 5;
+const double HEURISTIC_COVER_CRITICAL_PENALTY_PER_BOX_ABOVE = 3.0;
+const double HEURISTIC_MIN_LABEL_IN_DEST_FACTOR = 0.05;
+// SA Parameters
+const double TIME_LIMIT_SECONDS_TOTAL = 1.95;
+double BEAM_SEARCH_TIME_LIMIT_SECONDS_PARAM = 0.30;
+const int BEAM_WIDTH = 5;
+double T_INITIAL_SA = 75.0;
+double T_FINAL_SA = 0.01;
+// --- Random Number Generation ---
+struct RandomGenerator {
+    std::mt19937 rng;
+    RandomGenerator() : rng(std::chrono::steady_clock::now().time_since_epoch().count()) {}
+    int an_int(int min_val, int max_val) {
+        if (min_val > max_val) return min_val;
+        std::uniform_int_distribution<int> dist(min_val, max_val);
+        return dist(rng);
+    }
+    double a_double(double min_val, double max_val) {
+        std::uniform_real_distribution<double> dist(min_val, max_val);
+        return dist(rng);
+    }
+} RGen;
+auto GLOBAL_START_TIME = std::chrono::steady_clock::now();
+// --- State Definition ---
+struct State {
+    std::vector<std::vector<int>> stacks;
+    std::vector<std::pair<int, int>> box_pos; // {stack_idx, height_idx}
+    long long energy_cost;
+    std::vector<std::pair<int, int>> ops_history;
+    int N_val;
+    int M_val;
+    bool record_ops_flag;
+    State() : energy_cost(0), N_val(0), M_val(0), record_ops_flag(true) {}
+    State(int N_in, int M_in, const std::vector<std::vector<int>>& initial_stacks_input, bool rec_ops = true)
+        : energy_cost(0), N_val(N_in), M_val(M_in), record_ops_flag(rec_ops) {
+        stacks.resize(M_val);
+        for (int i = 0; i < M_val; ++i) {
+            stacks[i].reserve(N_val + 20);
+        }
+        box_pos.resize(N_val);
+        if (record_ops_flag) {
+            ops_history.reserve(N_val * 2 + 50);
+        }
+        for (int i = 0; i < M_val; ++i) {
+            for (size_t j = 0; j < initial_stacks_input[i].size(); ++j) {
+                int box_id = initial_stacks_input[i][j] - 1; // 0-indexed
+                stacks[i].push_back(box_id);
+                box_pos[box_id] = {i, (int)j};
+            }
+        }
+    }
+    State(const State& other) = default;
+    State& operator=(const State& other) = default;
+    State(State&& other) noexcept = default;
+    State& operator=(State&& other) noexcept = default;
+    double evaluate_destination_stack_choice(
+        int current_target_box_val, // 0-indexed
+        std::span<const int> block_to_move, // 0-indexed box values
+        int dest_stack_idx) const { // 0-indexed
+        const auto& dest_stack_content = stacks[dest_stack_idx];
+        double current_score = 0;
+        if (dest_stack_content.empty()) {
+            current_score -= HEURISTIC_EMPTY_STACK_BONUS_SCORE;
+        } else {
+            current_score += (double)dest_stack_content.size() * STACK_HEIGHT_PENALTY_FACTOR;
+            int min_label_in_dest_stack = N_val;
+            for (int box_val_in_dest : dest_stack_content) {
+                min_label_in_dest_stack = std::min(min_label_in_dest_stack, box_val_in_dest);
+            }
+            current_score -= (double)min_label_in_dest_stack * HEURISTIC_MIN_LABEL_IN_DEST_FACTOR;
+        }
+        for (int box_in_dest : dest_stack_content) {
+            if (box_in_dest > current_target_box_val && box_in_dest < current_target_box_val + HEURISTIC_LOOKAHEAD_WINDOW + 1) {
+                current_score += HEURISTIC_COVER_CRITICAL_PENALTY_PER_BOX_ABOVE * block_to_move.size();
+            }
+        }
+        for (size_t i = 0; i < block_to_move.size(); ++i) {
+            int box_in_block = block_to_move[i];
+            if (box_in_block > current_target_box_val && box_in_block < current_target_box_val + HEURISTIC_LOOKAHEAD_WINDOW + 1) {
+                int boxes_on_top_in_block = block_to_move.size() - 1 - i;
+                current_score += HEURISTIC_COVER_CRITICAL_PENALTY_PER_BOX_ABOVE * boxes_on_top_in_block;
+            }
+        }
+        return current_score;
+    }
+    void apply_op1_move(int first_box_in_block_val, int num_moved_boxes, int dest_stack_idx) { // All 0-indexed
+        int src_stack_idx = box_pos[first_box_in_block_val].first;
+        int first_box_height_idx_in_src = box_pos[first_box_in_block_val].second;
+        auto& src_stack_vec = stacks[src_stack_idx];
+        auto& dest_stack_vec = stacks[dest_stack_idx];
+        auto P_k_start_iter = src_stack_vec.begin() + first_box_height_idx_in_src;
+        auto P_k_end_iter = src_stack_vec.begin() + first_box_height_idx_in_src + num_moved_boxes;
+        int old_dest_stack_height = dest_stack_vec.size();
+        dest_stack_vec.insert(dest_stack_vec.end(),
+                              std::make_move_iterator(P_k_start_iter),
+                              std::make_move_iterator(P_k_end_iter));
+        for (int i = 0; i < num_moved_boxes; ++i) {
+            int moved_box_val = dest_stack_vec[old_dest_stack_height + i];
+            box_pos[moved_box_val] = {dest_stack_idx, old_dest_stack_height + i};
+        }
+        src_stack_vec.erase(P_k_start_iter, P_k_end_iter);
+        energy_cost += (num_moved_boxes + 1);
+        if (record_ops_flag) {
+            ops_history.push_back({first_box_in_block_val + 1, dest_stack_idx + 1});
+        }
+    }
+    void apply_op2_carry_out(int target_box_val) { // 0-indexed
+        int stack_idx = box_pos[target_box_val].first;
+        stacks[stack_idx].pop_back();
+        if (record_ops_flag) {
+            ops_history.push_back({target_box_val + 1, 0});
+        }
+    }
+};
+struct BeamNode {
+    State current_board_state;
+    std::vector<int> partial_plan_D;
+    BeamNode() = default;
+    BeamNode(State state, std::vector<int> plan)
+        : current_board_state(std::move(state)), partial_plan_D(std::move(plan)) {}
+    bool operator<(const BeamNode& other) const {
+        return current_board_state.energy_cost < other.current_board_state.energy_cost;
+    }
+};
+std::vector<int> generate_initial_plan_beam_search(
+    const std::vector<std::vector<int>>& initial_stacks_param,
+    int N_CONST, int M_CONST, int beam_width_param, double max_duration_for_beam_search) {
+    std::vector<BeamNode> beam;
+    beam.reserve(beam_width_param);
+    State initial_state_for_beam(N_CONST, M_CONST, initial_stacks_param, false);
+    beam.emplace_back(std::move(initial_state_for_beam), std::vector<int>());
+    if (N_CONST > 0) beam.back().partial_plan_D.reserve(N_CONST);
+    std::vector<BeamNode> candidates;
+    if (M_CONST > 0) candidates.reserve(beam_width_param * M_CONST + 5);
+    else candidates.reserve(beam_width_param + 5);
+    for (int k_target_box = 0; k_target_box < N_CONST; ++k_target_box) {
+        double elapsed_seconds_so_far = std::chrono::duration<double>(std::chrono::steady_clock::now() - GLOBAL_START_TIME).count();
+        bool time_is_up = elapsed_seconds_so_far > max_duration_for_beam_search;
+        if (time_is_up) {
+            if (beam.empty()) {
+                 std::vector<int> emergency_plan(N_CONST);
+                 if (N_CONST == 0) return emergency_plan;
+                 for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+                 return emergency_plan;
+            }
+            std::sort(beam.begin(), beam.end());
+            BeamNode& best_node_so_far = beam[0];
+            for (int k_future = k_target_box; k_future < N_CONST; ++k_future) {
+                State& S_greedy = best_node_so_far.current_board_state;
+                int f_target_val = k_future;
+                int f_src_idx = S_greedy.box_pos[f_target_val].first;
+                int f_h_idx = S_greedy.box_pos[f_target_val].second;
+                int f_num_top = S_greedy.stacks[f_src_idx].size() - 1 - f_h_idx;
+                if (f_num_top == 0) {
+                    best_node_so_far.partial_plan_D.push_back(f_src_idx);
+                } else {
+                    int f_block_first_val = S_greedy.stacks[f_src_idx][f_h_idx + 1];
+                    std::span<const int> block_span_greedy;
+                    if (f_num_top > 0) {
+                        block_span_greedy = std::span<const int>(S_greedy.stacks[f_src_idx].data() + f_h_idx + 1, f_num_top);
+                    }
+                    double min_h_eval_score = std::numeric_limits<double>::max();
+                    int best_d_greedy = (M_CONST > 1) ? (f_src_idx + 1) % M_CONST : 0;
+                    for (int d_cand = 0; d_cand < M_CONST; ++d_cand) {
+                        if (d_cand == f_src_idx) continue;
+                        double h_eval_score = S_greedy.evaluate_destination_stack_choice(k_future, block_span_greedy, d_cand);
+                        if (h_eval_score < min_h_eval_score) {
+                            min_h_eval_score = h_eval_score;
+                            best_d_greedy = d_cand;
+                        }
+                    }
+                    best_node_so_far.partial_plan_D.push_back(best_d_greedy);
+                    S_greedy.apply_op1_move(f_block_first_val, f_num_top, best_d_greedy);
+                }
+                S_greedy.apply_op2_carry_out(f_target_val);
+            }
+            return best_node_so_far.partial_plan_D;
+        }
+        candidates.clear();
+        for (auto& current_beam_node : beam) {
+            State& S_curr = current_beam_node.current_board_state;
+            int target_val = k_target_box;
+            int src_idx = S_curr.box_pos[target_val].first;
+            int h_idx = S_curr.box_pos[target_val].second;
+            int num_top = S_curr.stacks[src_idx].size() - 1 - h_idx;
+            if (num_top == 0) {
+                State next_S = S_curr;
+                std::vector<int> next_plan = current_beam_node.partial_plan_D;
+                next_plan.push_back(src_idx);
+                next_S.apply_op2_carry_out(target_val);
+                candidates.emplace_back(std::move(next_S), std::move(next_plan));
+            } else {
+                int block_first_val = S_curr.stacks[src_idx][h_idx + 1];
+                std::span<const int> block_span_bs;
+                if (num_top > 0) {
+                   block_span_bs = std::span<const int>(S_curr.stacks[src_idx].data() + h_idx + 1, num_top);
+                }
+                for (int dest_cand = 0; dest_cand < M_CONST; ++dest_cand) {
+                    if (dest_cand == src_idx) continue;
+                    State next_S = S_curr;
+                    std::vector<int> next_plan = current_beam_node.partial_plan_D;
+                    next_plan.push_back(dest_cand);
+                    next_S.apply_op1_move(block_first_val, num_top, dest_cand);
+                    next_S.apply_op2_carry_out(target_val);
+                    candidates.emplace_back(std::move(next_S), std::move(next_plan));
+                }
+            }
+        }
+        if (candidates.empty()) {
+            std::vector<int> emergency_plan(N_CONST);
+            if (N_CONST == 0) return emergency_plan;
+            if (beam.empty() && N_CONST > 0) {
+                 for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+                 return emergency_plan;
+            }
+            // If candidates is empty but beam was not (e.g. M=1 case for Op1 where no valid dest_cand)
+            // For M=10, this shouldn't happen unless beam_width is too small or other issues.
+            // Fallback to random plan completion from best current beam node.
+            // This is tricky, for now, if candidates is empty, signal failure for this path.
+            // The outer logic will pick best from beam if one exists or ultimately generate full random.
+            // If `beam` was non-empty but all paths led to no candidates, return best plan so far or random.
+            // The current fallback is just random full plan.
+            for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+            return emergency_plan;
+        }
+        std::sort(candidates.begin(), candidates.end());
+        beam.clear();
+        for (size_t i = 0; i < std::min((size_t)beam_width_param, candidates.size()); ++i) {
+            beam.push_back(std::move(candidates[i]));
+        }
+        if (beam.empty() && N_CONST > 0) {
+            std::vector<int> emergency_plan(N_CONST);
+            for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+            return emergency_plan;
+        }
+    }
+    if (beam.empty()){
+        std::vector<int> emergency_plan(N_CONST);
+        if (N_CONST == 0) return emergency_plan;
+        for (int i = 0; i < N_CONST; ++i) emergency_plan[i] = RGen.an_int(0, M_CONST - 1);
+        return emergency_plan;
+    }
+    std::sort(beam.begin(), beam.end());
+    return beam[0].partial_plan_D;
+}
+struct SimulationResult {
+    long long energy_cost;
+    std::vector<std::pair<int, int>> ops_history;
+};
+std::pair<State, long long> simulate_up_to_k(
+    const std::vector<std::vector<int>>& initial_stacks_param,
+    const std::vector<int>& plan_D,
+    int N_CONST, int M_CONST,
+    int k_limit_box_idx) {
+    State current_sim_state(N_CONST, M_CONST, initial_stacks_param, false);
+    for (int k_target_box = 0; k_target_box < k_limit_box_idx; ++k_target_box) {
+        int target_box_val = k_target_box;
+        int src_stack_idx = current_sim_state.box_pos[target_box_val].first;
+        int height_idx = current_sim_state.box_pos[target_box_val].second;
+        int num_boxes_on_top = current_sim_state.stacks[src_stack_idx].size() - 1 - height_idx;
+        if (num_boxes_on_top > 0) {
+            int op1_first_box_in_block_val = current_sim_state.stacks[src_stack_idx][height_idx + 1];
+            int actual_dest_stack_idx = plan_D[k_target_box];
+            if (actual_dest_stack_idx == src_stack_idx && M_CONST > 1) {
+                actual_dest_stack_idx = (src_stack_idx + 1) % M_CONST;
+            }
+            if (M_CONST > 1) {
+                 current_sim_state.apply_op1_move(op1_first_box_in_block_val, num_boxes_on_top, actual_dest_stack_idx);
+            }
+        }
+        current_sim_state.apply_op2_carry_out(target_box_val);
+    }
+    long long final_energy = current_sim_state.energy_cost; // Store before move
+    return {std::move(current_sim_state), final_energy};
+}
+SimulationResult run_simulation_from_intermediate_state(
+    State intermediate_state,
+    const std::vector<int>& plan_D,
+    int k_start_box_idx,
+    int N_CONST, int M_CONST,
+    bool record_ops_for_suffix) {
+    State current_sim_state = std::move(intermediate_state);
+    if (record_ops_for_suffix) {
+        current_sim_state.ops_history.clear();
+        current_sim_state.record_ops_flag = true;
+    } else {
+        current_sim_state.record_ops_flag = false;
+    }
+    for (int k_target_box = k_start_box_idx; k_target_box < N_CONST; ++k_target_box) {
+        int target_box_val = k_target_box;
+        int src_stack_idx = current_sim_state.box_pos[target_box_val].first;
+        int height_idx = current_sim_state.box_pos[target_box_val].second;
+        int num_boxes_on_top = current_sim_state.stacks[src_stack_idx].size() - 1 - height_idx;
+        if (num_boxes_on_top > 0) {
+            int op1_first_box_in_block_val = current_sim_state.stacks[src_stack_idx][height_idx + 1];
+            int actual_dest_stack_idx = plan_D[k_target_box];
+            if (actual_dest_stack_idx == src_stack_idx && M_CONST > 1) {
+                actual_dest_stack_idx = (src_stack_idx + 1) % M_CONST;
+            }
+            if (M_CONST > 1) {
+                 current_sim_state.apply_op1_move(op1_first_box_in_block_val, num_boxes_on_top, actual_dest_stack_idx);
+            }
+        }
+        current_sim_state.apply_op2_carry_out(target_box_val);
+    }
+    return {current_sim_state.energy_cost, std::move(current_sim_state.ops_history)};
+}
+SimulationResult run_simulation(const std::vector<std::vector<int>>& initial_stacks_param,
+                                const std::vector<int>& plan_D, int N_CONST, int M_CONST, bool record_all_ops = true) {
+    State initial_state_for_full_sim(N_CONST, M_CONST, initial_stacks_param, record_all_ops);
+    return run_simulation_from_intermediate_state(std::move(initial_state_for_full_sim), plan_D, 0, N_CONST, M_CONST, record_all_ops);
+}
+int main() {
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    GLOBAL_START_TIME = std::chrono::steady_clock::now();
+    int N_CONST, M_CONST;
+    std::cin >> N_CONST >> M_CONST;
+    std::vector<std::vector<int>> initial_stacks_main(M_CONST, std::vector<int>(N_CONST / M_CONST));
+    for (int i = 0; i < M_CONST; ++i) {
+        for (int j = 0; j < N_CONST / M_CONST; ++j) {
+            std::cin >> initial_stacks_main[i][j];
+        }
+    }
+    double beam_search_duration_budget = TIME_LIMIT_SECONDS_TOTAL * BEAM_SEARCH_TIME_LIMIT_SECONDS_PARAM;
+    std::vector<int> current_plan_D = generate_initial_plan_beam_search(
+        initial_stacks_main,
+        N_CONST,
+        M_CONST,
+        BEAM_WIDTH,
+        beam_search_duration_budget
+    );
+    if (current_plan_D.size() < (size_t)N_CONST && N_CONST > 0) {
+        current_plan_D.resize(N_CONST, 0);
+    }
+     if (N_CONST == 0) {
+        current_plan_D.clear();
+    }
+    SimulationResult current_sim_res_eval = run_simulation(initial_stacks_main, current_plan_D, N_CONST, M_CONST, false);
+    long long current_energy = current_sim_res_eval.energy_cost;
+    std::vector<int> best_plan_D = current_plan_D;
+    long long best_energy = current_energy;
+    const int MAX_BLOCK_CHANGE_LEN = (N_CONST == 0) ? 0 : std::max(1, N_CONST / 15);
+    double time_for_sa_start_offset = std::chrono::duration<double>(std::chrono::steady_clock::now() - GLOBAL_START_TIME).count();
+    while (true) {
+        double elapsed_seconds_total = std::chrono::duration<double>(std::chrono::steady_clock::now() - GLOBAL_START_TIME).count();
+        if (elapsed_seconds_total >= TIME_LIMIT_SECONDS_TOTAL - 0.02) break;
+        double elapsed_seconds_sa_phase = elapsed_seconds_total - time_for_sa_start_offset;
+        double total_time_for_sa_phase = (TIME_LIMIT_SECONDS_TOTAL - 0.02) - time_for_sa_start_offset;
+        if (total_time_for_sa_phase <= 0.001) break;
+        double progress_ratio = std::min(1.0, std::max(0.0, elapsed_seconds_sa_phase / total_time_for_sa_phase));
+        double current_temp = T_INITIAL_SA * std::pow(T_FINAL_SA / T_INITIAL_SA, progress_ratio);
+        current_temp = std::max(current_temp, T_FINAL_SA);
+        std::vector<int> new_plan_D = current_plan_D;
+        long long new_energy;
+        int k_change_start_idx = N_CONST;
+        State state_at_change_point;
+        bool can_use_partial_simulation = false;
+        double op_choice_rand = RGen.a_double(0.0, 1.0);
+        if (op_choice_rand < 0.35 && N_CONST > 0) {
+            int idx_to_change = RGen.an_int(0, N_CONST - 1);
+            k_change_start_idx = idx_to_change;
+            new_plan_D[idx_to_change] = RGen.an_int(0, M_CONST - 1); // M_CONST is 10, so M_CONST-1 is 9
+            // For partial sim, state needs to be based on prefix of new_plan_D
+            auto sim_pair = simulate_up_to_k(initial_stacks_main, new_plan_D, N_CONST, M_CONST, k_change_start_idx);
+            state_at_change_point = std::move(sim_pair.first);
+            can_use_partial_simulation = true;
+        } else if (op_choice_rand < 0.80 && N_CONST > 0) {
+            int block_op_start_idx_rand = RGen.an_int(0, N_CONST - 1);
+            int len = RGen.an_int(1, MAX_BLOCK_CHANGE_LEN);
+            k_change_start_idx = N_CONST;
+            for(int i=0; i<len; ++i) {
+                int current_k_in_plan_rand = (block_op_start_idx_rand + i) % N_CONST;
+                k_change_start_idx = std::min(k_change_start_idx, current_k_in_plan_rand);
+                new_plan_D[current_k_in_plan_rand] = RGen.an_int(0, M_CONST - 1);
+            }
+            // For partial sim, state needs to be based on prefix of new_plan_D
+            auto sim_pair = simulate_up_to_k(initial_stacks_main, new_plan_D, N_CONST, M_CONST, k_change_start_idx);
+            state_at_change_point = std::move(sim_pair.first);
+            can_use_partial_simulation = true;
+        } else if (N_CONST > 0) {
+            int k_to_recompute = RGen.an_int(0, N_CONST - 1);
+            k_change_start_idx = k_to_recompute;
+            // For greedy, decisions are based on current_plan_D's prefix
+            // So, simulate current_plan_D up to k_change_start_idx
+            auto sim_pair_greedy = simulate_up_to_k(initial_stacks_main, current_plan_D, N_CONST, M_CONST, k_change_start_idx);
+            State decision_state = std::move(sim_pair_greedy.first);
+            int target_op_val = k_to_recompute;
+            int src_op_idx = decision_state.box_pos[target_op_val].first;
+            int height_op_idx = decision_state.box_pos[target_op_val].second;
+            int num_top_op = decision_state.stacks[src_op_idx].size() - 1 - height_op_idx;
+            if (num_top_op > 0 && M_CONST > 1) {
+                std::span<const int> block_span_sa;
+                if (num_top_op > 0) {
+                     block_span_sa = std::span<const int>(decision_state.stacks[src_op_idx].data() + height_op_idx + 1, num_top_op);
+                }
+                double min_h_score = std::numeric_limits<double>::max();
+                int best_dest_idx = (M_CONST > 1) ? (src_op_idx + 1) % M_CONST : 0;
+                for (int dest_cand = 0; dest_cand < M_CONST; ++dest_cand) {
+                    if (dest_cand == src_op_idx) continue;
+                    double h_score = decision_state.evaluate_destination_stack_choice(target_op_val, block_span_sa, dest_cand);
+                    if (h_score < min_h_score) {
+                        min_h_score = h_score;
+                        best_dest_idx = dest_cand;
+                    }
+                }
+                new_plan_D[k_to_recompute] = best_dest_idx;
+            } else {
+                new_plan_D[k_to_recompute] = src_op_idx;
+            }
+            // The state for suffix evaluation should be decision_state,
+            // as new_plan_D only differs at or after k_change_start_idx.
+            // The prefix energy is decision_state.energy_cost.
+            state_at_change_point = std::move(decision_state);
+            can_use_partial_simulation = true;
+        } else {
+             k_change_start_idx = 0;
+             can_use_partial_simulation = false;
+        }
+        if (N_CONST == 0) {
+            new_energy = 0;
+        } else if (!can_use_partial_simulation || k_change_start_idx == 0) {
+            // If k_change_start_idx is 0, state_at_change_point is initial state with 0 energy.
+            // Full simulation is equivalent and perhaps cleaner.
+             new_energy = run_simulation(initial_stacks_main, new_plan_D, N_CONST, M_CONST, false).energy_cost;
+        } else {
+             SimulationResult suffix_res = run_simulation_from_intermediate_state(std::move(state_at_change_point), new_plan_D, k_change_start_idx, N_CONST, M_CONST, false);
+             new_energy = suffix_res.energy_cost;
+        }
+        if (new_energy < current_energy) {
+            current_energy = new_energy;
+            current_plan_D = new_plan_D;
+            if (new_energy < best_energy) {
+                best_energy = new_energy;
+                best_plan_D = new_plan_D;
+            }
+        } else {
+            double delta_energy = new_energy - current_energy;
+            if (current_temp > 1e-9 && RGen.a_double(0.0, 1.0) < std::exp(-delta_energy / current_temp)) {
+                current_energy = new_energy;
+                current_plan_D = new_plan_D;
+            }
+        }
+    }
+    SimulationResult final_sim_result = run_simulation(initial_stacks_main, best_plan_D, N_CONST, M_CONST, true);
+    for (const auto& op : final_sim_result.ops_history) {
+        std::cout << op.first << " " << op.second << "\n";
+    }
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc027/best_program.cpp ADDED Viewed

	@@ -0,0 +1,595 @@

+# EVOLVE-BLOCK-START
+#pragma GCC optimize("O3,unroll-loops")
+#include <iostream>
+#include <vector>
+#include <string>
+#include <numeric>
+#include <algorithm>
+#include <chrono>
+#include <random>
+#include <iomanip>
+#include <cmath>
+// #include <map> // Not used
+// Global game data
+int N_GRID_SIZE;
+std::vector<std::string> H_WALLS_INFO;
+std::vector<std::string> V_WALLS_INFO;
+int D_SUSC[40][40];
+struct Pos {
+    int16_t r, c;
+    Pos() : r(0), c(0) {}
+    Pos(int16_t r_val, int16_t c_val) : r(r_val), c(c_val) {}
+    bool operator==(const Pos& other) const { return r == other.r && c == other.c; }
+    bool operator!=(const Pos& other) const { return !(*this == other); }
+    bool operator<(const Pos& other) const {
+        if (r != other.r) return r < other.r;
+        return c < other.c;
+    }
+};
+constexpr int DR[] = {0, 1, 0, -1}; // R, D, L, U
+constexpr int DC[] = {1, 0, -1, 0};
+constexpr char DIR_CHARS[] = {'R', 'D', 'L', 'U'};
+const int MAX_L_PATH = 100000;
+double MAX_L_PATH_HIGH_THRESHOLD_EFFECTIVE;
+double MIN_L_PATH_LOW_THRESHOLD_EFFECTIVE;
+std::mt19937 RND_ENGINE;
+Pos APSP_PARENT[40][40][40][40];
+int APSP_DIST[40][40][40][40];
+bool is_valid_pos(int r, int c) {
+    return r >= 0 && r < N_GRID_SIZE && c >= 0 && c < N_GRID_SIZE;
+}
+bool check_wall(Pos p_from, Pos p_to) {
+    int dr = p_to.r - p_from.r;
+    int dc = p_to.c - p_from.c;
+    if (dr == 1) { // Down
+        return H_WALLS_INFO[p_from.r][p_from.c] == '1';
+    } else if (dr == -1) { // Up
+        return H_WALLS_INFO[p_to.r][p_to.c] == '1';
+    } else if (dc == 1) { // Right
+        return V_WALLS_INFO[p_from.r][p_from.c] == '1';
+    } else if (dc == -1) { // Left
+        return V_WALLS_INFO[p_from.r][p_to.c] == '1';
+    }
+    return true;
+}
+char get_move_char(Pos p_from, Pos p_to) {
+    int dr = p_to.r - p_from.r;
+    int dc = p_to.c - p_from.c;
+    for(int i=0; i<4; ++i) if(DR[i] == dr && DC[i] == dc) return DIR_CHARS[i];
+    return ' ';
+}
+ // Unused stub retained for compatibility.
+inline char invert_move(char){ return ' '; }
+void compute_apsp() {
+/*
+  All-pairs unweighted shortest paths on the grid.
+  For each source cell, we run BFS and store distance and parent to reconstruct
+  shortest paths instantly during local edits.
+*/
+    for (int sr = 0; sr < N_GRID_SIZE; ++sr) {
+        for (int sc = 0; sc < N_GRID_SIZE; ++sc) {
+            for (int tr = 0; tr < N_GRID_SIZE; ++tr) for (int tc = 0; tc < N_GRID_SIZE; ++tc) APSP_DIST[sr][sc][tr][tc] = -1;
+            std::vector<Pos> q; q.reserve(N_GRID_SIZE * N_GRID_SIZE);
+            q.push_back(Pos{(int16_t)sr, (int16_t)sc});
+            APSP_DIST[sr][sc][sr][sc] = 0;
+            int head = 0;
+            while(head < static_cast<int>(q.size())){
+                Pos curr = q[head++];
+                for(int i=0; i<4; ++i){
+                    Pos next_candidate = Pos{(int16_t)(curr.r + DR[i]), (int16_t)(curr.c + DC[i])};
+                    if(is_valid_pos(next_candidate.r, next_candidate.c) && !check_wall(curr, next_candidate) && APSP_DIST[sr][sc][next_candidate.r][next_candidate.c] == -1){
+                        APSP_DIST[sr][sc][next_candidate.r][next_candidate.c] = APSP_DIST[sr][sc][curr.r][curr.c] + 1;
+                        APSP_PARENT[sr][sc][next_candidate.r][next_candidate.c] = curr;
+                        q.push_back(next_candidate);
+                    }
+                }
+            }
+        }
+    }
+}
+bool get_apsp_moves(Pos p_from, Pos p_to, std::vector<char>& out_moves) {
+    out_moves.clear();
+    if (p_from == p_to) return true;
+    if (APSP_DIST[p_from.r][p_from.c][p_to.r][p_to.c] == -1) return false;
+    out_moves.reserve(APSP_DIST[p_from.r][p_from.c][p_to.r][p_to.c]);
+    Pos curr = p_to;
+    while(curr != p_from) {
+       Pos prev = APSP_PARENT[p_from.r][p_from.c][curr.r][curr.c];
+       out_moves.push_back(get_move_char(prev, curr));
+       curr = prev;
+    }
+    std::reverse(out_moves.begin(), out_moves.end());
+    return true;
+}
+std::vector<std::vector<std::vector<int>>> CELL_VISIT_TIMES_GLOBAL_BUFFER;
+struct CellDirtInfo {
+    long double weighted_dirt_contribution;
+    Pos p;
+    bool operator<(const CellDirtInfo& other) const {
+        return weighted_dirt_contribution > other.weighted_dirt_contribution;
+    }
+};
+std::vector<CellDirtInfo> TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER;
+struct PathData {
+    std::vector<char> moves;
+    std::vector<Pos> coords;
+    bool visited_flags[40][40];
+    long double score_val;
+    long double total_dirt_sum_numerator;
+    long double cell_dirt_term_sum[40][40];
+    PathData() : score_val(1e18L), total_dirt_sum_numerator(0.0L) {
+        for(int i=0; i<N_GRID_SIZE; ++i) for(int j=0; j<N_GRID_SIZE; ++j) {
+            visited_flags[i][j] = false;
+            cell_dirt_term_sum[i][j] = 0.0L;
+        }
+    }
+    PathData(const PathData& other) = default;
+    PathData(PathData&& other) = default;
+    PathData& operator=(const PathData& other) = default;
+    PathData& operator=(PathData&& other) = default;
+};
+bool update_coords_and_visited_flags(PathData& pd) {
+/*
+  Rebuilds the coordinate list from move characters and marks visited cells.
+  Validates boundaries and walls. Returns false on any invalid step.
+*/
+    pd.coords.assign(1, Pos{0,0});
+    if (!pd.moves.empty()) {
+      pd.coords.reserve(pd.moves.size() + 1);
+    }
+    for (int r = 0; r < N_GRID_SIZE; ++r) for (int c = 0; c < N_GRID_SIZE; ++c) pd.visited_flags[r][c] = false;
+    pd.visited_flags[0][0] = true;
+    Pos current_p = Pos{0,0};
+    for (char move_char : pd.moves) {
+        int dir_idx = -1;
+        for (int i = 0; i < 4; ++i) if (DIR_CHARS[i] == move_char) dir_idx = i;
+        if (dir_idx == -1) return false;
+        Pos next_p = Pos{(int16_t)(current_p.r + DR[dir_idx]), (int16_t)(current_p.c + DC[dir_idx])};
+        if (!is_valid_pos(next_p.r, next_p.c) || check_wall(current_p, next_p)) return false;
+        current_p = next_p;
+        pd.coords.push_back(current_p);
+        pd.visited_flags[current_p.r][current_p.c] = true;
+    }
+    return true;
+}
+void calculate_score_full(PathData& pd) {
+/*
+  Computes average dirtiness exactly for a periodic route.
+  For each cell, let the visit times within one cycle be t1 < ... < tk.
+  The contribution is sum over gaps g = t_i - t_{i-1} (with wrap-around using t0 = t_k - L) of g*(g-1)/2.
+  We accumulate this in O(L + N^2) time without storing per-cell vectors:
+  - Track first_visit and last_visit times per cell while scanning the trajectory once.
+  - Add wrap-around gap for visited cells.
+  - For never-visited cells (should not happen for valid paths) set L*(L-1)/2.
+*/
+    if (!update_coords_and_visited_flags(pd)) { pd.score_val = 1e18L; return; }
+    if (pd.moves.size() > MAX_L_PATH) { pd.score_val = 1e18L; return; }
+    if (!pd.moves.empty()) {
+        if (pd.coords.back() != Pos{0,0}) { pd.score_val = 1e18L; return; }
+    } else {
+        if (N_GRID_SIZE > 1) { pd.score_val = 1e18L; return; }
+    }
+    for (int r = 0; r < N_GRID_SIZE; ++r) for (int c = 0; c < N_GRID_SIZE; ++c)
+        if (!pd.visited_flags[r][c]) { pd.score_val = 1e18L; return; }
+    const int L = (int)pd.moves.size();
+    if (L == 0) {
+        pd.score_val = (N_GRID_SIZE == 1) ? 0.0L : 1e18L;
+        pd.total_dirt_sum_numerator = 0.0L;
+        if (N_GRID_SIZE == 1) pd.cell_dirt_term_sum[0][0] = 0.0L;
+        return;
+    }
+    static int first_visit[40][40], last_visit[40][40];
+    for (int r=0;r<N_GRID_SIZE;++r) for (int c=0;c<N_GRID_SIZE;++c) {
+        first_visit[r][c] = -1; last_visit[r][c] = -1;
+        pd.cell_dirt_term_sum[r][c] = 0.0L;
+    }
+    for (int t = 1; t <= L; ++t) {
+        const Pos& p = pd.coords[t];
+        if (last_visit[p.r][p.c] == -1) {
+            first_visit[p.r][p.c] = t;
+            last_visit[p.r][p.c] = t;
+        } else {
+            long long delta = (long long)t - (long long)last_visit[p.r][p.c];
+            pd.cell_dirt_term_sum[p.r][p.c] += (long double)delta * (delta - 1) / 2.0L;
+            last_visit[p.r][p.c] = t;
+        }
+    }
+    pd.total_dirt_sum_numerator = 0.0L;
+    for (int r=0;r<N_GRID_SIZE;++r) for (int c=0;c<N_GRID_SIZE;++c) {
+        if (first_visit[r][c] == -1) {
+            pd.cell_dirt_term_sum[r][c] = (long double)L * (L - 1) / 2.0L;
+        } else {
+            long long delta_wrap = (long long)first_visit[r][c] + (long long)L - (long long)last_visit[r][c];
+            pd.cell_dirt_term_sum[r][c] += (long double)delta_wrap * (delta_wrap - 1) / 2.0L;
+        }
+        pd.total_dirt_sum_numerator += (long double)D_SUSC[r][c] * pd.cell_dirt_term_sum[r][c];
+    }
+    pd.score_val = pd.total_dirt_sum_numerator / L;
+}
+bool initial_dfs_visited[40][40];
+void generate_initial_dfs_path(int r, int c, PathData& pd) {
+    initial_dfs_visited[r][c] = true;
+    for (int dir_idx = 0; dir_idx < 4; ++dir_idx) {
+        Pos current_p = Pos{(int16_t)r, (int16_t)c};
+        Pos next_p = Pos{(int16_t)(r + DR[dir_idx]), (int16_t)(c + DC[dir_idx])};
+        if (is_valid_pos(next_p.r, next_p.c) && !check_wall(current_p, next_p) && !initial_dfs_visited[next_p.r][next_p.c]) {
+            pd.moves.push_back(DIR_CHARS[dir_idx]);
+            generate_initial_dfs_path(next_p.r, next_p.c, pd);
+            pd.moves.push_back(DIR_CHARS[(dir_idx + 2) % 4]);
+        }
+    }
+}
+Pos select_target_cell_for_dirt_ops(const PathData& current_pd_obj, bool use_sqrt_N_sampling) {
+    TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.clear();
+    if (current_pd_obj.score_val > 1e17L) {
+        std::uniform_int_distribution<int> r_dist(0, N_GRID_SIZE-1);
+        return Pos{(int16_t)r_dist(RND_ENGINE), (int16_t)r_dist(RND_ENGINE)};
+    }
+    for(int r=0; r<N_GRID_SIZE; ++r) {
+        for(int c=0; c<N_GRID_SIZE; ++c) {
+             TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.push_back({(long double)D_SUSC[r][c] * current_pd_obj.cell_dirt_term_sum[r][c], Pos{(int16_t)r,(int16_t)c}});
+        }
+    }
+    std::sort(TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.begin(), TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.end());
+    if (TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.empty()) {
+        std::uniform_int_distribution<int> r_dist(0, N_GRID_SIZE-1);
+        return Pos{(int16_t)r_dist(RND_ENGINE), (int16_t)r_dist(RND_ENGINE)};
+    }
+    int K_select;
+    if(use_sqrt_N_sampling){
+        K_select = std::min((int)TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.size(), std::max(1, N_GRID_SIZE));
+    } else {
+        K_select = std::min((int)TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.size(), std::max(10, N_GRID_SIZE * N_GRID_SIZE / 10));
+    }
+    if (K_select <= 0) {
+         std::uniform_int_distribution<int> r_dist(0, N_GRID_SIZE-1);
+         return Pos{(int16_t)r_dist(RND_ENGINE), (int16_t)r_dist(RND_ENGINE)};
+    }
+    std::uniform_int_distribution<int> top_k_dist(0, K_select - 1);
+    return TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER[top_k_dist(RND_ENGINE)].p;
+}
+const int OP4_SAMPLE_POINTS = 20;
+const int OP5_MAX_SUBSEGMENT_LEN = 20;
+std::vector<char> GET_APSP_MOVES_BUFFER1;
+std::vector<char> GET_APSP_MOVES_BUFFER2;
+int main(int argc, char *argv[]) {
+    std::ios_base::sync_with_stdio(false); std::cin.tie(NULL);
+    double time_limit_seconds = 1.95;
+    if (argc > 1) time_limit_seconds = std::stod(argv[1]);
+    auto time_start_prog = std::chrono::high_resolution_clock::now();
+    RND_ENGINE.seed(std::chrono::system_clock::now().time_since_epoch().count());
+    std::cin >> N_GRID_SIZE;
+    H_WALLS_INFO.resize(N_GRID_SIZE - 1);
+    V_WALLS_INFO.resize(N_GRID_SIZE);
+    for (int i = 0; i < N_GRID_SIZE - 1; ++i) std::cin >> H_WALLS_INFO[i];
+    for (int i = 0; i < N_GRID_SIZE; ++i) std::cin >> V_WALLS_INFO[i];
+    for (int i = 0; i < N_GRID_SIZE; ++i) for (int j = 0; j < N_GRID_SIZE; ++j) std::cin >> D_SUSC[i][j];
+    MAX_L_PATH_HIGH_THRESHOLD_EFFECTIVE = MAX_L_PATH * 0.95;
+    MIN_L_PATH_LOW_THRESHOLD_EFFECTIVE = N_GRID_SIZE * N_GRID_SIZE;
+    compute_apsp();
+    TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.reserve(N_GRID_SIZE * N_GRID_SIZE);
+    GET_APSP_MOVES_BUFFER1.reserve(N_GRID_SIZE*N_GRID_SIZE*2);
+    GET_APSP_MOVES_BUFFER2.reserve(N_GRID_SIZE*N_GRID_SIZE*2);
+    PathData current_pd_obj;
+    for(int i=0; i<N_GRID_SIZE; ++i) for(int j=0; j<N_GRID_SIZE; ++j) initial_dfs_visited[i][j] = false;
+    generate_initial_dfs_path(0, 0, current_pd_obj);
+    calculate_score_full(current_pd_obj);
+    PathData best_pd_obj = current_pd_obj;
+    double start_temp = 5000.0 * sqrt(N_GRID_SIZE);
+    double end_temp = 0.1;
+    int iterations_count = 0;
+    PathData candidate_pd_obj;
+    std::uniform_real_distribution<double> accept_dist_01(0.0, 1.0);
+    while(true) {
+        iterations_count++;
+        if(iterations_count % 100 == 0){
+            auto now_time = std::chrono::high_resolution_clock::now();
+            double elapsed_seconds = std::chrono::duration<double>(now_time - time_start_prog).count();
+            if (elapsed_seconds > time_limit_seconds) break;
+        }
+        int L_curr = current_pd_obj.moves.size();
+        bool modified_successfully = false;
+        for (int try_op = 0; try_op < 10; ++try_op) {
+            candidate_pd_obj = current_pd_obj;
+            std::uniform_int_distribution<int> op_dist(0, 99);
+            int op_choice_val = op_dist(RND_ENGINE);
+            int operation_type = -1;
+            if (op_choice_val < 15) operation_type = 0;
+            else if (op_choice_val < 30) operation_type = 1;
+            else if (op_choice_val < 60) operation_type = 2;
+            else if (op_choice_val < 70) operation_type = 3;
+            else if (op_choice_val < 85) operation_type = 4;
+            else operation_type = 5;
+            bool is_length_increasing_op = (operation_type == 0 || operation_type == 4);
+            // Op5 can increase or decrease length. Check its specific outcome for length control.
+            bool is_length_decreasing_op = (operation_type == 1 || operation_type == 2);
+            if (is_length_increasing_op && L_curr > MAX_L_PATH_HIGH_THRESHOLD_EFFECTIVE) {
+                if (accept_dist_01(RND_ENGINE) < 0.75) continue;
+            }
+            if (is_length_decreasing_op && L_curr < MIN_L_PATH_LOW_THRESHOLD_EFFECTIVE) {
+                 if (accept_dist_01(RND_ENGINE) < 0.75) continue;
+            }
+            if (operation_type == 0) {
+                if (L_curr == 0 && N_GRID_SIZE > 1) continue;
+                if (candidate_pd_obj.moves.size() + 2 > MAX_L_PATH) continue;
+                if (current_pd_obj.coords.empty() && N_GRID_SIZE > 1) continue;
+                std::uniform_int_distribution<int> k_idx_dist(0, L_curr);
+                int k_coord_idx = k_idx_dist(RND_ENGINE);
+                Pos p_k = current_pd_obj.coords[k_coord_idx];
+                std::vector<int> possible_dirs; possible_dirs.reserve(4);
+                for(int dir_i=0; dir_i<4; ++dir_i) {
+                    Pos neighbor_p = Pos{(int16_t)(p_k.r + DR[dir_i]), (int16_t)(p_k.c + DC[dir_i])};
+                    if (is_valid_pos(neighbor_p.r, neighbor_p.c) && !check_wall(p_k, neighbor_p)) {
+                        possible_dirs.push_back(dir_i);
+                    }
+                }
+                if (possible_dirs.empty()) continue;
+                std::uniform_int_distribution<int> dir_choice_dist(0, possible_dirs.size()-1);
+                int random_dir_idx = possible_dirs[dir_choice_dist(RND_ENGINE)];
+                candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + k_coord_idx,
+                                             {DIR_CHARS[random_dir_idx], DIR_CHARS[(random_dir_idx+2)%4]});
+            } else if (operation_type == 1) {
+                if (L_curr < 2) continue;
+                if (current_pd_obj.coords.size() < 3) continue;
+                std::vector<int> possible_indices; possible_indices.reserve(L_curr);
+                for(int k_m_idx = 0; k_m_idx <= L_curr - 2; ++k_m_idx) {
+                    if (current_pd_obj.coords[k_m_idx] == current_pd_obj.coords[k_m_idx+2]) {
+                        possible_indices.push_back(k_m_idx);
+                    }
+                }
+                if (possible_indices.empty()) continue;
+                std::uniform_int_distribution<int> idx_choice_dist(0, possible_indices.size()-1);
+                int k_move_idx_to_remove = possible_indices[idx_choice_dist(RND_ENGINE)];
+                candidate_pd_obj.moves.erase(candidate_pd_obj.moves.begin() + k_move_idx_to_remove,
+                                             candidate_pd_obj.moves.begin() + k_move_idx_to_remove + 2);
+            } else if (operation_type == 2) {
+                if (L_curr < 1) continue;
+                std::uniform_int_distribution<int> c_idx1_dist(0, L_curr > 0 ? L_curr - 1 : 0);
+                int c_idx1 = c_idx1_dist(RND_ENGINE);
+                std::uniform_int_distribution<int> c_idx2_dist(c_idx1 + 1, L_curr);
+                int c_idx2 = c_idx2_dist(RND_ENGINE);
+                if (c_idx1 >= c_idx2 && L_curr > 0) continue; // Need valid subsegment
+                if (L_curr == 0 && (c_idx1!=0 || c_idx2!=0)) continue; // L=0 means c_idx1=0, c_idx2=0 only
+                Pos p_A = current_pd_obj.coords[c_idx1]; Pos p_B = current_pd_obj.coords[c_idx2];
+                if (!get_apsp_moves(p_A, p_B, GET_APSP_MOVES_BUFFER1)) continue;
+                if (GET_APSP_MOVES_BUFFER1.size() >= (size_t)(c_idx2 - c_idx1) && L_curr > 0 ) continue; // APSP not shorter (allow if L_curr=0)
+                if ( ( (long long)candidate_pd_obj.moves.size() - (c_idx2 - c_idx1) + GET_APSP_MOVES_BUFFER1.size()) > MAX_L_PATH) continue;
+                if (c_idx1 < c_idx2) { // Ensure erase range is valid
+                    candidate_pd_obj.moves.erase(candidate_pd_obj.moves.begin() + c_idx1,
+                                                candidate_pd_obj.moves.begin() + c_idx2);
+                }
+                candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + c_idx1,
+                                              GET_APSP_MOVES_BUFFER1.begin(), GET_APSP_MOVES_BUFFER1.end());
+            } else if (operation_type == 3) {
+                continue; // Disabled: reversing + inverting subsegments often breaks path; skip to save time.
+            } else if (operation_type == 4) {
+                if (L_curr == 0 && N_GRID_SIZE > 1) continue;
+                if (current_pd_obj.coords.empty() && N_GRID_SIZE > 1) continue;
+                Pos target_cell = select_target_cell_for_dirt_ops(current_pd_obj, false);
+                int best_k_coord_idx = -1;
+                long long min_detour_len_increase = (long long)MAX_L_PATH * 2 +1; // path len increase: 2 for wiggle, 2*dist for detour
+                if (L_curr >= 0) { // Path can be empty (L_curr=0), then coords has 1 element (0,0)
+                    int num_samples = (L_curr == 0) ? 1: OP4_SAMPLE_POINTS; // If L_curr=0, only one point to pick: coords[0]
+                    for (int i=0; i < num_samples; ++i) {
+                        std::uniform_int_distribution<int> k_idx_dist(0, L_curr);
+                        int k_coord_idx_sample = (L_curr == 0) ? 0 : k_idx_dist(RND_ENGINE);
+                        Pos p_A_sample = current_pd_obj.coords[k_coord_idx_sample];
+                        long long current_detour_increase;
+                        if (p_A_sample == target_cell) {
+                             current_detour_increase = 2; // Wiggle cost
+                        } else {
+                            int dist_pa_target = APSP_DIST[p_A_sample.r][p_A_sample.c][target_cell.r][target_cell.c];
+                            if (dist_pa_target != -1) {
+                                current_detour_increase = (long long)dist_pa_target * 2;
+                            } else {
+                                current_detour_increase = (long long)MAX_L_PATH * 2 + 1; // effectively infinity
+                            }
+                        }
+                        if (current_detour_increase < min_detour_len_increase) {
+                            min_detour_len_increase = current_detour_increase;
+                            best_k_coord_idx = k_coord_idx_sample;
+                        }
+                    }
+                }
+                if (best_k_coord_idx == -1 || min_detour_len_increase > MAX_L_PATH) continue;
+                Pos p_A = current_pd_obj.coords[best_k_coord_idx];
+                if (candidate_pd_obj.moves.size() + min_detour_len_increase > MAX_L_PATH) continue;
+                if (p_A == target_cell) {
+                     std::vector<int> possible_dirs; possible_dirs.reserve(4);
+                     for(int dir_i=0; dir_i<4; ++dir_i) {
+                         Pos neighbor_p = Pos{(int16_t)(p_A.r + DR[dir_i]), (int16_t)(p_A.c + DC[dir_i])};
+                         if (is_valid_pos(neighbor_p.r, neighbor_p.c) && !check_wall(p_A, neighbor_p)) {
+                             possible_dirs.push_back(dir_i);
+                         }
+                     }
+                     if (possible_dirs.empty()) continue;
+                     std::uniform_int_distribution<int> dir_choice_dist(0, possible_dirs.size()-1);
+                     int random_dir_idx = possible_dirs[dir_choice_dist(RND_ENGINE)];
+                     candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + best_k_coord_idx,
+                                                  {DIR_CHARS[random_dir_idx], DIR_CHARS[(random_dir_idx+2)%4]});
+                } else {
+                    if (!get_apsp_moves(p_A, target_cell, GET_APSP_MOVES_BUFFER1)) continue;
+                    if (!get_apsp_moves(target_cell, p_A, GET_APSP_MOVES_BUFFER2)) continue;
+                    candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + best_k_coord_idx,
+                                                  GET_APSP_MOVES_BUFFER2.begin(), GET_APSP_MOVES_BUFFER2.end());
+                    candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + best_k_coord_idx,
+                                                  GET_APSP_MOVES_BUFFER1.begin(), GET_APSP_MOVES_BUFFER1.end());
+                }
+            } else { // operation_type == 5:
+                Pos target_cell = select_target_cell_for_dirt_ops(current_pd_obj, true);
+                int c_idx1, c_idx2;
+                if (L_curr == 0) {
+                    c_idx1 = 0; c_idx2 = 0;
+                } else {
+                    std::uniform_int_distribution<int> c_idx1_dist_op5(0, L_curr -1 );
+                    c_idx1 = c_idx1_dist_op5(RND_ENGINE);
+                    std::uniform_int_distribution<int> c_idx2_dist_op5(c_idx1 + 1, std::min(L_curr, c_idx1 + OP5_MAX_SUBSEGMENT_LEN));
+                    c_idx2 = c_idx2_dist_op5(RND_ENGINE);
+                }
+                if (c_idx1 > c_idx2) continue; // Should not happen with above logic for L_curr > 0
+                Pos p_A = current_pd_obj.coords[c_idx1];
+                Pos p_B = current_pd_obj.coords[c_idx2];
+                if (!get_apsp_moves(p_A, target_cell, GET_APSP_MOVES_BUFFER1)) continue;
+                if (!get_apsp_moves(target_cell, p_B, GET_APSP_MOVES_BUFFER2)) continue;
+                long long current_subsegment_len_moves = c_idx2 - c_idx1;
+                long long new_subsegment_len_moves = GET_APSP_MOVES_BUFFER1.size() + GET_APSP_MOVES_BUFFER2.size();
+                // Specific length control for Op5
+                if (new_subsegment_len_moves > current_subsegment_len_moves && L_curr > MAX_L_PATH_HIGH_THRESHOLD_EFFECTIVE) {
+                     if (accept_dist_01(RND_ENGINE) < 0.75) continue;
+                }
+                if (new_subsegment_len_moves < current_subsegment_len_moves && L_curr < MIN_L_PATH_LOW_THRESHOLD_EFFECTIVE) {
+                     if (accept_dist_01(RND_ENGINE) < 0.75) continue;
+                }
+                if ( ( (long long)candidate_pd_obj.moves.size() - current_subsegment_len_moves + new_subsegment_len_moves) > MAX_L_PATH) continue;
+                if (c_idx1 < c_idx2) {
+                    candidate_pd_obj.moves.erase(candidate_pd_obj.moves.begin() + c_idx1,
+                                                candidate_pd_obj.moves.begin() + c_idx2);
+                }
+                candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + c_idx1,
+                                            GET_APSP_MOVES_BUFFER2.begin(), GET_APSP_MOVES_BUFFER2.end());
+                candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + c_idx1,
+                                            GET_APSP_MOVES_BUFFER1.begin(), GET_APSP_MOVES_BUFFER1.end());
+            }
+            modified_successfully = true;
+            break;
+        }
+        if (!modified_successfully) continue;
+        calculate_score_full(candidate_pd_obj);
+        bool candidate_is_invalid = candidate_pd_obj.score_val > 1e17L;
+        if (candidate_pd_obj.score_val < current_pd_obj.score_val) {
+            current_pd_obj = std::move(candidate_pd_obj);
+            if (current_pd_obj.score_val < best_pd_obj.score_val) {
+                best_pd_obj = current_pd_obj;
+            }
+        } else if (!candidate_is_invalid) {
+             auto now_time_temp = std::chrono::high_resolution_clock::now();
+             double elapsed_seconds_temp = std::chrono::duration<double>(now_time_temp - time_start_prog).count();
+             double progress_ratio = elapsed_seconds_temp / time_limit_seconds;
+             progress_ratio = std::min(1.0, std::max(0.0, progress_ratio));
+             double current_temp_val = end_temp;
+             if (start_temp > end_temp + 1e-9) {
+                current_temp_val = start_temp * std::pow(end_temp / start_temp, progress_ratio);
+             } else if (start_temp > 1e-9) {
+                current_temp_val = start_temp;
+             } else {
+                 current_temp_val = end_temp;
+             }
+             if (current_temp_val < 1e-9 && end_temp >= 1e-9) current_temp_val = end_temp;
+             else if (current_temp_val < 1e-9) current_temp_val = 1e-9;
+             if (exp((current_pd_obj.score_val - candidate_pd_obj.score_val) / current_temp_val) > accept_dist_01(RND_ENGINE)) {
+                 current_pd_obj = std::move(candidate_pd_obj);
+             }
+        }
+    }
+    for (char move_char : best_pd_obj.moves) std::cout << move_char;
+    std::cout << std::endl;
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc027/config.yaml ADDED Viewed

	@@ -0,0 +1,117 @@

+# ALE-Bench ahc027 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n\nStory\n--------\n\
+    F Corporation developed a robotic vacuum cleaner, Takahashi-kun cleaner No.2, and decided to entrust it with the cleaning\
+    \ of their office.\nTakahashi-kun cleaner No.2 can operate indefinitely through solar power and repeats cleaning on a\
+    \ predetermined route indefinitely.\nThe office has varying levels of susceptibility to dirt in different areas, and by\
+    \ frequently cleaning the areas that are more prone to dirt, the entire office can be kept cleaner.\n\nProblem Statement\n\
+    --------\nThere is an $N\\times N$ square board.\nLet $(0, 0)$ be the coordinates of the top-left square, and $(i, j)$\
+    \ be the coordinates of the square located $i$ squares down and $j$ squares to the right from there.\nThe perimeter of\
+    \ the $N\\times N$ board is surrounded by walls, and there may also be walls between adjacent squares.\n\nEach square\
+    \ $(i,j)$ is assigned a value $d_{i,j}$ which represents its susceptibility to dirt.\nYour task is to clean these squares\
+    \ by moving around the board.\nYou can move to an adjacent square that is not blocked by a wall.\nAfter the move, the\
+    \ dirtiness of the square you moved to becomes $0$, and the dirtiness of all other squares $(i, j)$ increases by $d_{i,\
+    \ j}$.\nConsider a cleaning route that starts and ends at $(0, 0)$, with a length (number of moves) not exceeding $10^5$.\n\
+    The cleaning route may pass through the same square multiple times, but must visit each square at least once.\n\nLet $a_{t,i,j}$\
+    \ denote the dirtiness of each square $(i,j)$ after the $t$-th move, and let $S_t=\\sum_{i=0}^{N-1}\\sum_{j=0}^{N-1} a_{t,i,j}$\
+    \ denote the total dirtiness.\nAt $t=0$, we assume that the dirtiness of all squares is $a_{0,i,j}=0$.\nDefine the **average\
+    \ dirtiness** as\n\\\\[\n  \\bar{S}=\\frac{1}{L}\\sum_{t=L}^{2L-1}S_t,\n\\\\]\nwhich is the average of the total dirtiness\
+    \ during the period $t=L,L+1,\\cdots,2L-1$ when the cleaning route of length $L$ is repeated infinitely.\n\nPlease find\
+    \ a cleaning route that minimizes the average dirtiness as much as possible.\n\n#### The Meaning of Average Dirtiness\
+    \ \nWe can prove that $a_{t,i,j}=a_{t+L,i,j}$ for $t\\geq L$ when the cleaning route of length $L$ is repeated infinitely.\n\
+    Therefore, considering the average $\\frac{1}{T} \\sum_{t=0}^{T-1} S_t$ of the total dirtiness up to $T$ turns, its limit\
+    \ as $T \\to \\infty$ coincides with the average dirtiness.\n\n\nScoring\n--------\nLet $\\bar{S}$ be the average dirtiness\
+    \ of the output cleaning route.\nThen you will obtain an absolute score of $\\mathrm{round}(\\bar{S})$.\nThe lower the\
+    \ absolute score, the better.\nIf you output an illegal cleaning route (length exceeds $10^5$, does not return to $(0,0)$,\
+    \ there is an unvisited square, or it hits a wall), it will be judged as <span class='label label-warning' data-toggle='tooltip'\
+    \ data-placement='top' title=\"Wrong Answer\">WA</span>.\n\nFor each test case, we compute the <font color=\"red\"><strong>relative\
+    \ score</strong></font> $\\mathrm{round}(10^9\\times \\frac{\\mathrm{MIN}}{\\mathrm{YOUR}})$, where YOUR is your absolute\
+    \ score and MIN is the lowest absolute score among all competitors obtained on that test case. The score of the submission\
+    \ is the sum of the relative scores.\n\nThe final ranking will be determined by the system test with more inputs which\
+    \ will be run after the contest is over.\nIn both the provisional/system test, if your submission produces illegal output\
+    \ or exceeds the time limit for some test cases, only the score for those test cases will be zero, and your submission\
+    \ will be excluded from the MIN calculation for those test cases.\n\nThe system test will be performed only for <font\
+    \ color=\"red\"><strong>the last submission which received a result other than <span class=\"label label-warning\" data-toggle=\"\
+    tooltip\" data-placement=\"top\" title=\"\" data-original-title=\"Compilation Error\">CE</span> </strong></font>.\nBe\
+    \ careful not to make a mistake in the final submission.\n\n\n#### Number of test cases\n- Provisional test: 50\n- System\
+    \ test: 2000. We will publish <a href=\"https://img.atcoder.jp/ahc027/seeds.txt\">seeds.txt</a>  (sha256=cdea33a6050850bf1387e2191b802a1df7e43fcb969fd6c3bf9cbd96a4d790d7)\
+    \ after the contest is over.\n\n#### About relative evaluation system\nIn both the provisional/system test, the standings\
+    \ will be calculated using only the last submission which received a result other than <span class=\"label label-warning\"\
+    \ data-toggle=\"tooltip\" data-placement=\"top\" title=\"\" data-original-title=\"Compilation Error\">CE</span>.\nOnly\
+    \ the last submissions are used to calculate the MIN for each test case when calculating the relative scores.\n\nThe scores\
+    \ shown in the standings are relative, and whenever a new submission arrives, all relative scores are recalculated.\n\
+    On the other hand, the score for each submission shown on the submissions page is the sum of the absolute score for each\
+    \ test case, and the relative scores are not shown.\nIn order to know the relative score of submission other than the\
+    \ latest one in the current standings, you need to resubmit it.\nIf your submission produces illegal output or exceeds\
+    \ the time limit for some test cases, the score shown on the submissions page will be 0, but the standings show the sum\
+    \ of the relative scores for the test cases that were answered correctly.\n\n#### About execution time\nExecution time\
+    \ may vary slightly from run to run.\nIn addition, since system tests simultaneously perform a large number of executions,\
+    \ it has been observed that execution time increases by several percent compared to provisional tests.\nFor these reasons,\
+    \ submissions that are very close to the time limit may result in <span class='label label-warning' data-toggle='tooltip'\
+    \ data-placement='top' title=\"Time Limit Exceeded\">TLE</span> in the system test.\nPlease measure the execution time\
+    \ in your program to terminate the process, or have enough margin in the execution time.\n\n\nInput\n--------\nInput is\
+    \ given from Standard Input in the following format.\n\n~~~\n$N$\n$h_{0,0}\\cdots h_{0,N-1}$\n$\\vdots$\n$h_{N-2,0} \\\
+    cdots h_{N-2,N-1}$\n$v_{0,0} \\cdots v_{0,N-2}$\n$\\vdots$\n$v_{N-1,0} \\cdots v_{N-1,N-2}$\n$d_{0,0}$ $\\cdots$ $d_{0,N-1}$\n\
+    $\\vdots$\n$d_{N-1,0}$ $\\cdots$ $d_{N-1,N-1}$\n~~~\n\n- $N$ is the horizontal and vertical size of the board and satisfies\
+    \ $20\\leq N\\leq 40$.\n- $h_{i,0}\\cdots h_{i,N-1}$ is a string of length $N$ consisting of only `0` and `1`. $h_{i,j}=1$\
+    \ if and only if there is a wall between square $(i,j)$ and its lower neighbor $(i+1,j)$.\n- $v_{i,0}\\cdots v_{i,N-2}$\
+    \ is a string of length $N-1$ consisting of only `0` and `1`. $v_{i,j}=1$ if and only if there is a wall between square\
+    \ $(i,j)$ and its right neighbor $(i,j+1)$.\n- All squares are guaranteed to be reachable from $(0, 0)$.\n- $d_{i,j}$\
+    \ is an integer value representing the susceptibility to dirt of square $(i,j)$ and satisfies $1\\leq d_{i,j}\\leq 10^3$.\n\
+    \n\nOutput\n--------\nRepresent a move up, down, left, or right by `U`, `D`, `L`, or `R`, respectively.\nRepresent the\
+    \ cleaning route of length $L$ as a string of $L$ characters corresponding to each move, and output it in a single line\
+    \ to Standard Output.\n\n\n<a href=\"https://img.atcoder.jp/ahc027/aPdjCUIZ.html?lang=en&seed=0&output=sample\">Show example</a>\n\
+    \n\nSample Solution\n--------\n<details>\nThis is a sample solution in Python.\nIn this program, by moving along the depth-first\
+    \ search tree starting from (0,0), each edge in the tree is passed twice, once on the way there and once on the way back,\
+    \ and the program outputs a cleaning route that returns to (0,0).\n<pre class=\"prettyprint linenums\">\nimport sys\n\
+    sys.setrecursionlimit(1000000)\n\nN = int(input())\nh = [input() for _ in range(N-1)]\nv = [input() for _ in range(N)]\n\
+    d = [list(map(int, input().split())) for _ in range(N)]\n\nvisited = [[False for _ in range(N)] for _ in range(N)]\nDIJ\
+    \ = [(0, 1), (1, 0), (0, -1), (-1, 0)]\nDIR = \"RDLU\"\n\ndef dfs(i, j):\n  visited[i][j] = True\n  for dir in range(4):\n\
+    \    di, dj = DIJ[dir]\n    i2 = i + di\n    j2 = j + dj\n    if 0 <= i2 < N and 0 <= j2 < N and not visited[i2][j2]:\n\
+    \      if di == 0 and v[i][min(j, j2)] == '0' or dj == 0 and h[min(i, i2)][j] == '0':\n        print(DIR[dir], end='')\n\
+    \        dfs(i2, j2)\n        print(DIR[(dir + 2) % 4], end='')\n\ndfs(0, 0)\nprint()\n</pre>\n</details>\n\n\nInput Generation\n\
+    --------\n<details>\nLet $\\mathrm{randint}(L,U)$ be a function that generates a uniform random integer between $L$ and\
+    \ $U$, inclusive.\nLet $\\mathrm{randdouble}(L,U)$ be a function that generates a uniform random floating-point number\
+    \ at least $L$ and less than $U$.\n\n#### Generation of $N$\n$N=\\mathrm{randint}(20,40)$.\n\n#### Generation of $h$ and\
+    \ $v$\nGenerate a parameter $w=\\mathrm{randint}(1,N)$ that controls the number of walls.\nStarting from a state with\
+    \ no walls, generate walls by repeating the following operation $w$ times.\n\nRandomly select one of the four directions\
+    \ (up, down, left, right).\nFor the left direction, generate $i=\\mathrm{randint}(0,N-2)$, $j=\\mathrm{randint}(0,N-1)$,\
+    \ and $k=\\mathrm{randint}(3,\\lfloor N/2\\rfloor)$.\nThen, set $h_{i,j}\\cdots h_{i,\\max(j-k+1, 0)}$ to $1$.\nSimilarly,\
+    \ for the right direction, generate values in the same manner, and set $h_{i,j}\\cdots h_{i,\\min(j+k-1, N-1)}$ to $1$.\n\
+    For the upward direction, generate $i=\\mathrm{randint}(0,N-1)$, $j=\\mathrm{randint}(0,N-2)$, and $k=\\mathrm{randint}(3,\\\
+    lfloor N/2\\rfloor)$.\nThen, set $v_{i,j}\\cdots v_{\\max(i-k+1, 0),j}$ to $1$.\nSimilarly, for the downward direction,\
+    \ generate values in the same manner, and set $v_{i,j}\\cdots v_{\\min(i+k-1, N-1),j}$ to $1$.\n\nAfter $w$ iterations\
+    \ are completed, check if all squares are reachable from $(0, 0)$, and if there are unreachable squares, remove all walls\
+    \ and redo the $w$ iterations.\n\n#### Generation of $d$\nGenerate a parameter $c=\\mathrm{randint}(1,\\lfloor N/2\\rfloor)$\
+    \ that determines the number of susceptible regions.\nCreate an array $d'$ with $d'_{i,j}=0$ for all $(i,j)$, and update\
+    \ $d'$ by repeating the following process $c$ times.\n\nGenerate $i=\\mathrm{randint}(0,N-1)$, $j=\\mathrm{randint}(0,N-1)$,\
+    \ $m=\\mathrm{randint}(N,\\lfloor N^2/c\\rfloor)$, and $b=\\mathrm{randdouble}(0,2)$.\nGenerate a set $S$ by starting\
+    \ from $S=\\\\{(i,j)\\\\}$ and repeating the following process until the size of $S$ becomes $m$.\n\nRandomly choose $p\\\
+    in S$, and randomly choose one of the four directions (up, down, left, or right). If there is no wall in that direction\
+    \ from $p$, add the adjacent square $q$ to $S$.\n\nFor each square $(i',j')\\in S$ contained in the generated $S$, overwrite\
+    \ $d'_{i',j'}=b$.\n\nAfter $c$ iterations are completed, generate $d_{i,j}=\\mathrm{round}(10^{d'_{i,j}+\\mathrm{randdouble}(0,1)})$\
+    \ for each $(i,j)$.\n</details>\n\nTools (Input generator and visualizer)\n--------\n- <a href=\"https://img.atcoder.jp/ahc027/aPdjCUIZ.html?lang=en\"\
+    >Web version</a>: This is more powerful than the local version providing animations.\n- <a href=\"https://img.atcoder.jp/ahc027/aPdjCUIZ_v2.zip\"\
+    >Local version</a>: You need a compilation environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n\
+    \  - <a href=\"https://img.atcoder.jp/ahc027/aPdjCUIZ_windows_v2.zip\">Pre-compiled binary for Windows</a>: If you are\
+    \ not familiar with the Rust language environment, please use this instead.\n\nPlease be aware that sharing visualization\
+    \ results or discussing solutions/ideas during the contest is prohibited.\n\n{sample example}\n\n\n    Problem constraints:\n\
+    \    time_limit=2.0 memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc027/evaluator.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import traceback
+from pathlib import Path
+from ale_bench.result import CaseResult, JudgeResult, Result
+from ale_bench_eval.safe_ale_session import start_ale_bench_session
+import logging
+import sys
+logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
+def result_feedback(result: Result) -> CaseResult:
+    if result.overall_judge_result == JudgeResult.ACCEPTED:
+        return result.case_results[0]
+    else:
+        selected_case_idx = 0
+        for idx, case_result in enumerate(result.case_results):
+            if case_result.judge_result == result.overall_judge_result:
+                selected_case_idx = idx
+                break
+        return result.case_results[selected_case_idx]
+def evaluate(program_path):
+    problem_id = "ahc027"
+    logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
+    try:
+        session = None
+        logger.info("Starting ALE-Bench session")
+        session = start_ale_bench_session(
+            problem_id=problem_id,
+            lite_version=True,
+            num_workers=13,
+        )
+        logger.info("ALE-Bench session started")
+        if not session:
+            raise RuntimeError("Failed to start or restart the session.")
+        optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
+        code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
+        logger.info("Code extracted")
+        num_public_cases = 50
+        cases = session.case_gen(list(range(num_public_cases)))
+        public_result = session.case_eval(
+            cases, code, code_language="cpp20", skip_local_visualization=True
+        )
+        logger.info("Public evaluation completed")
+        extracted_case = result_feedback(public_result)
+        logger.info("Result feedback completed")
+        logger.info("ALE-Bench session closed")
+        combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
+        if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
+            combined_score = -sys.maxsize - 1
+        session.close()
+        return {
+            "judge_result": public_result.overall_judge_result.value,
+            "overall_score": public_result.overall_absolute_score,
+            "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
+            "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
+            "standard_error": extracted_case.error_str,
+            "message": extracted_case.message,
+            "combined_score": combined_score,
+        }
+    except Exception as e:
+        logger.error(f"Evaluation failed completely: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "overall_score": 0.0,
+            "error": str(e),
+        }

benchmarks/ale_bench/ale-bench-lite-problems/ahc027/initial_program.cpp ADDED Viewed

	@@ -0,0 +1,614 @@

+# EVOLVE-BLOCK-START
+#pragma GCC optimize("O3,unroll-loops")
+#include <iostream>
+#include <vector>
+#include <string>
+#include <numeric>
+#include <algorithm>
+#include <chrono>
+#include <random>
+#include <iomanip>
+#include <cmath>
+// #include <map> // Not used
+// Global game data
+int N_GRID_SIZE;
+std::vector<std::string> H_WALLS_INFO;
+std::vector<std::string> V_WALLS_INFO;
+int D_SUSC[40][40];
+struct Pos {
+    int16_t r, c;
+    Pos() : r(0), c(0) {}
+    Pos(int16_t r_val, int16_t c_val) : r(r_val), c(c_val) {}
+    bool operator==(const Pos& other) const { return r == other.r && c == other.c; }
+    bool operator!=(const Pos& other) const { return !(*this == other); }
+    bool operator<(const Pos& other) const {
+        if (r != other.r) return r < other.r;
+        return c < other.c;
+    }
+};
+constexpr int DR[] = {0, 1, 0, -1}; // R, D, L, U
+constexpr int DC[] = {1, 0, -1, 0};
+constexpr char DIR_CHARS[] = {'R', 'D', 'L', 'U'};
+const int MAX_L_PATH = 100000;
+double MAX_L_PATH_HIGH_THRESHOLD_EFFECTIVE;
+double MIN_L_PATH_LOW_THRESHOLD_EFFECTIVE;
+std::mt19937 RND_ENGINE;
+Pos APSP_PARENT[40][40][40][40];
+int APSP_DIST[40][40][40][40];
+bool is_valid_pos(int r, int c) {
+    return r >= 0 && r < N_GRID_SIZE && c >= 0 && c < N_GRID_SIZE;
+}
+bool check_wall(Pos p_from, Pos p_to) {
+    int dr = p_to.r - p_from.r;
+    int dc = p_to.c - p_from.c;
+    if (dr == 1) { // Down
+        return H_WALLS_INFO[p_from.r][p_from.c] == '1';
+    } else if (dr == -1) { // Up
+        return H_WALLS_INFO[p_to.r][p_to.c] == '1';
+    } else if (dc == 1) { // Right
+        return V_WALLS_INFO[p_from.r][p_from.c] == '1';
+    } else if (dc == -1) { // Left
+        return V_WALLS_INFO[p_from.r][p_to.c] == '1';
+    }
+    return true;
+}
+char get_move_char(Pos p_from, Pos p_to) {
+    int dr = p_to.r - p_from.r;
+    int dc = p_to.c - p_from.c;
+    for(int i=0; i<4; ++i) if(DR[i] == dr && DC[i] == dc) return DIR_CHARS[i];
+    return ' ';
+}
+char invert_move(char move_char) {
+    for(int i=0; i<4; ++i) if(DIR_CHARS[i] == move_char) return DIR_CHARS[(i+2)%4];
+    return ' ';
+}
+void compute_apsp() {
+    for (int sr = 0; sr < N_GRID_SIZE; ++sr) {
+        for (int sc = 0; sc < N_GRID_SIZE; ++sc) {
+            for (int tr = 0; tr < N_GRID_SIZE; ++tr) for (int tc = 0; tc < N_GRID_SIZE; ++tc) APSP_DIST[sr][sc][tr][tc] = -1;
+            std::vector<Pos> q; q.reserve(N_GRID_SIZE * N_GRID_SIZE);
+            q.push_back(Pos{(int16_t)sr, (int16_t)sc});
+            APSP_DIST[sr][sc][sr][sc] = 0;
+            int head = 0;
+            while(head < static_cast<int>(q.size())){
+                Pos curr = q[head++];
+                for(int i=0; i<4; ++i){
+                    Pos next_candidate = Pos{(int16_t)(curr.r + DR[i]), (int16_t)(curr.c + DC[i])};
+                    if(is_valid_pos(next_candidate.r, next_candidate.c) && !check_wall(curr, next_candidate) && APSP_DIST[sr][sc][next_candidate.r][next_candidate.c] == -1){
+                        APSP_DIST[sr][sc][next_candidate.r][next_candidate.c] = APSP_DIST[sr][sc][curr.r][curr.c] + 1;
+                        APSP_PARENT[sr][sc][next_candidate.r][next_candidate.c] = curr;
+                        q.push_back(next_candidate);
+                    }
+                }
+            }
+        }
+    }
+}
+bool get_apsp_moves(Pos p_from, Pos p_to, std::vector<char>& out_moves) {
+    out_moves.clear();
+    if (p_from == p_to) return true;
+    if (APSP_DIST[p_from.r][p_from.c][p_to.r][p_to.c] == -1) return false;
+    out_moves.reserve(APSP_DIST[p_from.r][p_from.c][p_to.r][p_to.c]);
+    Pos curr = p_to;
+    while(curr != p_from) {
+       Pos prev = APSP_PARENT[p_from.r][p_from.c][curr.r][curr.c];
+       out_moves.push_back(get_move_char(prev, curr));
+       curr = prev;
+    }
+    std::reverse(out_moves.begin(), out_moves.end());
+    return true;
+}
+std::vector<std::vector<std::vector<int>>> CELL_VISIT_TIMES_GLOBAL_BUFFER;
+struct CellDirtInfo {
+    long double weighted_dirt_contribution;
+    Pos p;
+    bool operator<(const CellDirtInfo& other) const {
+        return weighted_dirt_contribution > other.weighted_dirt_contribution;
+    }
+};
+std::vector<CellDirtInfo> TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER;
+struct PathData {
+    std::vector<char> moves;
+    std::vector<Pos> coords;
+    bool visited_flags[40][40];
+    long double score_val;
+    long double total_dirt_sum_numerator;
+    long double cell_dirt_term_sum[40][40];
+    PathData() : score_val(1e18L), total_dirt_sum_numerator(0.0L) {
+        for(int i=0; i<N_GRID_SIZE; ++i) for(int j=0; j<N_GRID_SIZE; ++j) {
+            visited_flags[i][j] = false;
+            cell_dirt_term_sum[i][j] = 0.0L;
+        }
+    }
+    PathData(const PathData& other) = default;
+    PathData(PathData&& other) = default;
+    PathData& operator=(const PathData& other) = default;
+    PathData& operator=(PathData&& other) = default;
+};
+bool update_coords_and_visited_flags(PathData& pd) {
+    pd.coords.assign(1, Pos{0,0});
+    if (!pd.moves.empty()) {
+      pd.coords.reserve(pd.moves.size() + 1);
+    }
+    for (int r = 0; r < N_GRID_SIZE; ++r) for (int c = 0; c < N_GRID_SIZE; ++c) pd.visited_flags[r][c] = false;
+    pd.visited_flags[0][0] = true;
+    Pos current_p = Pos{0,0};
+    for (char move_char : pd.moves) {
+        int dir_idx = -1;
+        for (int i = 0; i < 4; ++i) if (DIR_CHARS[i] == move_char) dir_idx = i;
+        if (dir_idx == -1) return false;
+        Pos next_p = Pos{(int16_t)(current_p.r + DR[dir_idx]), (int16_t)(current_p.c + DC[dir_idx])};
+        if (!is_valid_pos(next_p.r, next_p.c) || check_wall(current_p, next_p)) return false;
+        current_p = next_p;
+        pd.coords.push_back(current_p);
+        pd.visited_flags[current_p.r][current_p.c] = true;
+    }
+    return true;
+}
+void calculate_score_full(PathData& pd) {
+    if (!update_coords_and_visited_flags(pd)) {
+        pd.score_val = 1e18L;
+        return;
+    }
+    if (pd.moves.size() > MAX_L_PATH) {
+        pd.score_val = 1e18L; return;
+    }
+    if (!pd.moves.empty()){
+        if (pd.coords.back() != Pos{0,0}) { pd.score_val = 1e18L; return;}
+    } else {
+        if (N_GRID_SIZE > 1) {
+             pd.score_val = 1e18L; return;
+        }
+    }
+    for (int r = 0; r < N_GRID_SIZE; ++r) for (int c = 0; c < N_GRID_SIZE; ++c) {
+        if (!pd.visited_flags[r][c]) { pd.score_val = 1e18L; return; }
+    }
+    int L = pd.moves.size();
+    if (L == 0) {
+       pd.score_val = (N_GRID_SIZE == 1) ? 0.0L : 1e18L; // N=1 case not in this contest
+       pd.total_dirt_sum_numerator = 0;
+       if (N_GRID_SIZE == 1) pd.cell_dirt_term_sum[0][0] = 0;
+       return;
+    }
+    for(int r=0; r<N_GRID_SIZE; ++r) {
+        for(int c=0; c<N_GRID_SIZE; ++c) {
+            CELL_VISIT_TIMES_GLOBAL_BUFFER[r][c].clear();
+        }
+    }
+    for (int t = 1; t <= L; ++t) {
+        Pos p = pd.coords[t];
+        CELL_VISIT_TIMES_GLOBAL_BUFFER[p.r][p.c].push_back(t);
+    }
+    pd.total_dirt_sum_numerator = 0;
+    for (int r_ = 0; r_ < N_GRID_SIZE; ++r_) {
+        for (int c_ = 0; c_ < N_GRID_SIZE; ++c_) {
+            const auto& specific_cell_visits = CELL_VISIT_TIMES_GLOBAL_BUFFER[r_][c_];
+            long double current_cell_dirt_term = 0;
+            if (!specific_cell_visits.empty()){
+                 int num_visits_in_cycle = specific_cell_visits.size();
+                 for (int i = 0; i < num_visits_in_cycle; ++i) {
+                    long long prev_visit_t = (i == 0) ? ((long long)specific_cell_visits[num_visits_in_cycle - 1] - L) : (long long)specific_cell_visits[i-1];
+                    long long cur_visit_t = specific_cell_visits[i];
+                    long long delta = cur_visit_t - prev_visit_t;
+                    current_cell_dirt_term += (long double)delta * (delta - 1) / 2.0L;
+                }
+            } else {
+                current_cell_dirt_term = (long double)L * (L - 1) / 2.0L;
+            }
+            pd.cell_dirt_term_sum[r_][c_] = current_cell_dirt_term;
+            pd.total_dirt_sum_numerator += (long double)D_SUSC[r_][c_] * current_cell_dirt_term;
+        }
+    }
+    pd.score_val = pd.total_dirt_sum_numerator / L;
+}
+bool initial_dfs_visited[40][40];
+void generate_initial_dfs_path(int r, int c, PathData& pd) {
+    initial_dfs_visited[r][c] = true;
+    for (int dir_idx = 0; dir_idx < 4; ++dir_idx) {
+        Pos current_p = Pos{(int16_t)r, (int16_t)c};
+        Pos next_p = Pos{(int16_t)(r + DR[dir_idx]), (int16_t)(c + DC[dir_idx])};
+        if (is_valid_pos(next_p.r, next_p.c) && !check_wall(current_p, next_p) && !initial_dfs_visited[next_p.r][next_p.c]) {
+            pd.moves.push_back(DIR_CHARS[dir_idx]);
+            generate_initial_dfs_path(next_p.r, next_p.c, pd);
+            pd.moves.push_back(DIR_CHARS[(dir_idx + 2) % 4]);
+        }
+    }
+}
+Pos select_target_cell_for_dirt_ops(const PathData& current_pd_obj, bool use_sqrt_N_sampling) {
+    TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.clear();
+    if (current_pd_obj.score_val > 1e17L) {
+        std::uniform_int_distribution<int> r_dist(0, N_GRID_SIZE-1);
+        return Pos{(int16_t)r_dist(RND_ENGINE), (int16_t)r_dist(RND_ENGINE)};
+    }
+    for(int r=0; r<N_GRID_SIZE; ++r) {
+        for(int c=0; c<N_GRID_SIZE; ++c) {
+             TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.push_back({(long double)D_SUSC[r][c] * current_pd_obj.cell_dirt_term_sum[r][c], Pos{(int16_t)r,(int16_t)c}});
+        }
+    }
+    std::sort(TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.begin(), TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.end());
+    if (TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.empty()) {
+        std::uniform_int_distribution<int> r_dist(0, N_GRID_SIZE-1);
+        return Pos{(int16_t)r_dist(RND_ENGINE), (int16_t)r_dist(RND_ENGINE)};
+    }
+    int K_select;
+    if(use_sqrt_N_sampling){
+        K_select = std::min((int)TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.size(), std::max(1, N_GRID_SIZE));
+    } else {
+        K_select = std::min((int)TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.size(), std::max(10, N_GRID_SIZE * N_GRID_SIZE / 10));
+    }
+    if (K_select <= 0) {
+         std::uniform_int_distribution<int> r_dist(0, N_GRID_SIZE-1);
+         return Pos{(int16_t)r_dist(RND_ENGINE), (int16_t)r_dist(RND_ENGINE)};
+    }
+    std::uniform_int_distribution<int> top_k_dist(0, K_select - 1);
+    return TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER[top_k_dist(RND_ENGINE)].p;
+}
+const int OP4_SAMPLE_POINTS = 20;
+const int OP5_MAX_SUBSEGMENT_LEN = 20;
+std::vector<char> GET_APSP_MOVES_BUFFER1;
+std::vector<char> GET_APSP_MOVES_BUFFER2;
+int main(int argc, char *argv[]) {
+    std::ios_base::sync_with_stdio(false); std::cin.tie(NULL);
+    double time_limit_seconds = 1.95;
+    if (argc > 1) time_limit_seconds = std::stod(argv[1]);
+    auto time_start_prog = std::chrono::high_resolution_clock::now();
+    RND_ENGINE.seed(std::chrono::system_clock::now().time_since_epoch().count());
+    std::cin >> N_GRID_SIZE;
+    H_WALLS_INFO.resize(N_GRID_SIZE - 1);
+    V_WALLS_INFO.resize(N_GRID_SIZE);
+    for (int i = 0; i < N_GRID_SIZE - 1; ++i) std::cin >> H_WALLS_INFO[i];
+    for (int i = 0; i < N_GRID_SIZE; ++i) std::cin >> V_WALLS_INFO[i];
+    for (int i = 0; i < N_GRID_SIZE; ++i) for (int j = 0; j < N_GRID_SIZE; ++j) std::cin >> D_SUSC[i][j];
+    MAX_L_PATH_HIGH_THRESHOLD_EFFECTIVE = MAX_L_PATH * 0.95;
+    MIN_L_PATH_LOW_THRESHOLD_EFFECTIVE = N_GRID_SIZE * N_GRID_SIZE;
+    compute_apsp();
+    CELL_VISIT_TIMES_GLOBAL_BUFFER.resize(N_GRID_SIZE);
+    TMP_CELL_DIRT_INFOS_LIST_GLOBAL_BUFFER.reserve(N_GRID_SIZE * N_GRID_SIZE);
+    for(int r=0; r<N_GRID_SIZE; ++r) {
+        CELL_VISIT_TIMES_GLOBAL_BUFFER[r].resize(N_GRID_SIZE);
+        int reserve_size = std::max(2, MAX_L_PATH / (N_GRID_SIZE*N_GRID_SIZE) + 50);
+         if (reserve_size > MAX_L_PATH / 2 && MAX_L_PATH > 2) reserve_size = MAX_L_PATH/2; // Cap reasonable max
+        for(int c=0; c<N_GRID_SIZE; ++c) {
+             CELL_VISIT_TIMES_GLOBAL_BUFFER[r][c].reserve(reserve_size);
+        }
+    }
+    GET_APSP_MOVES_BUFFER1.reserve(N_GRID_SIZE*N_GRID_SIZE*2);
+    GET_APSP_MOVES_BUFFER2.reserve(N_GRID_SIZE*N_GRID_SIZE*2);
+    PathData current_pd_obj;
+    for(int i=0; i<N_GRID_SIZE; ++i) for(int j=0; j<N_GRID_SIZE; ++j) initial_dfs_visited[i][j] = false;
+    generate_initial_dfs_path(0, 0, current_pd_obj);
+    calculate_score_full(current_pd_obj);
+    PathData best_pd_obj = current_pd_obj;
+    double start_temp = 5000.0 * sqrt(N_GRID_SIZE);
+    double end_temp = 0.1;
+    int iterations_count = 0;
+    PathData candidate_pd_obj;
+    std::uniform_real_distribution<double> accept_dist_01(0.0, 1.0);
+    while(true) {
+        iterations_count++;
+        if(iterations_count % 100 == 0){
+            auto now_time = std::chrono::high_resolution_clock::now();
+            double elapsed_seconds = std::chrono::duration<double>(now_time - time_start_prog).count();
+            if (elapsed_seconds > time_limit_seconds) break;
+        }
+        int L_curr = current_pd_obj.moves.size();
+        bool modified_successfully = false;
+        for (int try_op = 0; try_op < 10; ++try_op) {
+            candidate_pd_obj = current_pd_obj;
+            std::uniform_int_distribution<int> op_dist(0, 99);
+            int op_choice_val = op_dist(RND_ENGINE);
+            int operation_type = -1;
+            if (op_choice_val < 15) operation_type = 0;
+            else if (op_choice_val < 30) operation_type = 1;
+            else if (op_choice_val < 60) operation_type = 2;
+            else if (op_choice_val < 70) operation_type = 3;
+            else if (op_choice_val < 85) operation_type = 4;
+            else operation_type = 5;
+            bool is_length_increasing_op = (operation_type == 0 || operation_type == 4);
+            // Op5 can increase or decrease length. Check its specific outcome for length control.
+            bool is_length_decreasing_op = (operation_type == 1 || operation_type == 2);
+            if (is_length_increasing_op && L_curr > MAX_L_PATH_HIGH_THRESHOLD_EFFECTIVE) {
+                if (accept_dist_01(RND_ENGINE) < 0.75) continue;
+            }
+            if (is_length_decreasing_op && L_curr < MIN_L_PATH_LOW_THRESHOLD_EFFECTIVE) {
+                 if (accept_dist_01(RND_ENGINE) < 0.75) continue;
+            }
+            if (operation_type == 0) {
+                if (L_curr == 0 && N_GRID_SIZE > 1) continue;
+                if (candidate_pd_obj.moves.size() + 2 > MAX_L_PATH) continue;
+                if (current_pd_obj.coords.empty() && N_GRID_SIZE > 1) continue;
+                std::uniform_int_distribution<int> k_idx_dist(0, L_curr);
+                int k_coord_idx = k_idx_dist(RND_ENGINE);
+                Pos p_k = current_pd_obj.coords[k_coord_idx];
+                std::vector<int> possible_dirs; possible_dirs.reserve(4);
+                for(int dir_i=0; dir_i<4; ++dir_i) {
+                    Pos neighbor_p = Pos{(int16_t)(p_k.r + DR[dir_i]), (int16_t)(p_k.c + DC[dir_i])};
+                    if (is_valid_pos(neighbor_p.r, neighbor_p.c) && !check_wall(p_k, neighbor_p)) {
+                        possible_dirs.push_back(dir_i);
+                    }
+                }
+                if (possible_dirs.empty()) continue;
+                std::uniform_int_distribution<int> dir_choice_dist(0, possible_dirs.size()-1);
+                int random_dir_idx = possible_dirs[dir_choice_dist(RND_ENGINE)];
+                candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + k_coord_idx,
+                                             {DIR_CHARS[random_dir_idx], DIR_CHARS[(random_dir_idx+2)%4]});
+            } else if (operation_type == 1) {
+                if (L_curr < 2) continue;
+                if (current_pd_obj.coords.size() < 3) continue;
+                std::vector<int> possible_indices; possible_indices.reserve(L_curr);
+                for(int k_m_idx = 0; k_m_idx <= L_curr - 2; ++k_m_idx) {
+                    if (current_pd_obj.coords[k_m_idx] == current_pd_obj.coords[k_m_idx+2]) {
+                        possible_indices.push_back(k_m_idx);
+                    }
+                }
+                if (possible_indices.empty()) continue;
+                std::uniform_int_distribution<int> idx_choice_dist(0, possible_indices.size()-1);
+                int k_move_idx_to_remove = possible_indices[idx_choice_dist(RND_ENGINE)];
+                candidate_pd_obj.moves.erase(candidate_pd_obj.moves.begin() + k_move_idx_to_remove,
+                                             candidate_pd_obj.moves.begin() + k_move_idx_to_remove + 2);
+            } else if (operation_type == 2) {
+                if (L_curr < 1) continue;
+                std::uniform_int_distribution<int> c_idx1_dist(0, L_curr > 0 ? L_curr - 1 : 0);
+                int c_idx1 = c_idx1_dist(RND_ENGINE);
+                std::uniform_int_distribution<int> c_idx2_dist(c_idx1 + 1, L_curr);
+                int c_idx2 = c_idx2_dist(RND_ENGINE);
+                if (c_idx1 >= c_idx2 && L_curr > 0) continue; // Need valid subsegment
+                if (L_curr == 0 && (c_idx1!=0 || c_idx2!=0)) continue; // L=0 means c_idx1=0, c_idx2=0 only
+                Pos p_A = current_pd_obj.coords[c_idx1]; Pos p_B = current_pd_obj.coords[c_idx2];
+                if (!get_apsp_moves(p_A, p_B, GET_APSP_MOVES_BUFFER1)) continue;
+                if (GET_APSP_MOVES_BUFFER1.size() >= (size_t)(c_idx2 - c_idx1) && L_curr > 0 ) continue; // APSP not shorter (allow if L_curr=0)
+                if ( ( (long long)candidate_pd_obj.moves.size() - (c_idx2 - c_idx1) + GET_APSP_MOVES_BUFFER1.size()) > MAX_L_PATH) continue;
+                if (c_idx1 < c_idx2) { // Ensure erase range is valid
+                    candidate_pd_obj.moves.erase(candidate_pd_obj.moves.begin() + c_idx1,
+                                                candidate_pd_obj.moves.begin() + c_idx2);
+                }
+                candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + c_idx1,
+                                              GET_APSP_MOVES_BUFFER1.begin(), GET_APSP_MOVES_BUFFER1.end());
+            } else if (operation_type == 3) {
+                if (L_curr < 1) continue;
+                std::uniform_int_distribution<int> move_idx_dist(0, L_curr -1);
+                int move_idx1 = move_idx_dist(RND_ENGINE);
+                std::uniform_int_distribution<int> move_idx_dist2(move_idx1, L_curr -1);
+                int move_idx2_inclusive = move_idx_dist2(RND_ENGINE);
+                int move_idx2_exclusive = move_idx2_inclusive + 1;
+                std::reverse(candidate_pd_obj.moves.begin() + move_idx1, candidate_pd_obj.moves.begin() + move_idx2_exclusive);
+                for(int i = move_idx1; i < move_idx2_exclusive; ++i)
+                    candidate_pd_obj.moves[i] = invert_move(candidate_pd_obj.moves[i]);
+            } else if (operation_type == 4) {
+                if (L_curr == 0 && N_GRID_SIZE > 1) continue;
+                if (current_pd_obj.coords.empty() && N_GRID_SIZE > 1) continue;
+                Pos target_cell = select_target_cell_for_dirt_ops(current_pd_obj, false);
+                int best_k_coord_idx = -1;
+                long long min_detour_len_increase = (long long)MAX_L_PATH * 2 +1; // path len increase: 2 for wiggle, 2*dist for detour
+                if (L_curr >= 0) { // Path can be empty (L_curr=0), then coords has 1 element (0,0)
+                    int num_samples = (L_curr == 0) ? 1: OP4_SAMPLE_POINTS; // If L_curr=0, only one point to pick: coords[0]
+                    for (int i=0; i < num_samples; ++i) {
+                        std::uniform_int_distribution<int> k_idx_dist(0, L_curr);
+                        int k_coord_idx_sample = (L_curr == 0) ? 0 : k_idx_dist(RND_ENGINE);
+                        Pos p_A_sample = current_pd_obj.coords[k_coord_idx_sample];
+                        long long current_detour_increase;
+                        if (p_A_sample == target_cell) {
+                             current_detour_increase = 2; // Wiggle cost
+                        } else {
+                            int dist_pa_target = APSP_DIST[p_A_sample.r][p_A_sample.c][target_cell.r][target_cell.c];
+                            if (dist_pa_target != -1) {
+                                current_detour_increase = (long long)dist_pa_target * 2;
+                            } else {
+                                current_detour_increase = (long long)MAX_L_PATH * 2 + 1; // effectively infinity
+                            }
+                        }
+                        if (current_detour_increase < min_detour_len_increase) {
+                            min_detour_len_increase = current_detour_increase;
+                            best_k_coord_idx = k_coord_idx_sample;
+                        }
+                    }
+                }
+                if (best_k_coord_idx == -1 || min_detour_len_increase > MAX_L_PATH) continue;
+                Pos p_A = current_pd_obj.coords[best_k_coord_idx];
+                if (candidate_pd_obj.moves.size() + min_detour_len_increase > MAX_L_PATH) continue;
+                if (p_A == target_cell) {
+                     std::vector<int> possible_dirs; possible_dirs.reserve(4);
+                     for(int dir_i=0; dir_i<4; ++dir_i) {
+                         Pos neighbor_p = Pos{(int16_t)(p_A.r + DR[dir_i]), (int16_t)(p_A.c + DC[dir_i])};
+                         if (is_valid_pos(neighbor_p.r, neighbor_p.c) && !check_wall(p_A, neighbor_p)) {
+                             possible_dirs.push_back(dir_i);
+                         }
+                     }
+                     if (possible_dirs.empty()) continue;
+                     std::uniform_int_distribution<int> dir_choice_dist(0, possible_dirs.size()-1);
+                     int random_dir_idx = possible_dirs[dir_choice_dist(RND_ENGINE)];
+                     candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + best_k_coord_idx,
+                                                  {DIR_CHARS[random_dir_idx], DIR_CHARS[(random_dir_idx+2)%4]});
+                } else {
+                    if (!get_apsp_moves(p_A, target_cell, GET_APSP_MOVES_BUFFER1)) continue;
+                    if (!get_apsp_moves(target_cell, p_A, GET_APSP_MOVES_BUFFER2)) continue;
+                    candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + best_k_coord_idx,
+                                                  GET_APSP_MOVES_BUFFER2.begin(), GET_APSP_MOVES_BUFFER2.end());
+                    candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + best_k_coord_idx,
+                                                  GET_APSP_MOVES_BUFFER1.begin(), GET_APSP_MOVES_BUFFER1.end());
+                }
+            } else { // operation_type == 5:
+                Pos target_cell = select_target_cell_for_dirt_ops(current_pd_obj, true);
+                int c_idx1, c_idx2;
+                if (L_curr == 0) {
+                    c_idx1 = 0; c_idx2 = 0;
+                } else {
+                    std::uniform_int_distribution<int> c_idx1_dist_op5(0, L_curr -1 );
+                    c_idx1 = c_idx1_dist_op5(RND_ENGINE);
+                    std::uniform_int_distribution<int> c_idx2_dist_op5(c_idx1 + 1, std::min(L_curr, c_idx1 + OP5_MAX_SUBSEGMENT_LEN));
+                    c_idx2 = c_idx2_dist_op5(RND_ENGINE);
+                }
+                if (c_idx1 > c_idx2) continue; // Should not happen with above logic for L_curr > 0
+                Pos p_A = current_pd_obj.coords[c_idx1];
+                Pos p_B = current_pd_obj.coords[c_idx2];
+                if (!get_apsp_moves(p_A, target_cell, GET_APSP_MOVES_BUFFER1)) continue;
+                if (!get_apsp_moves(target_cell, p_B, GET_APSP_MOVES_BUFFER2)) continue;
+                long long current_subsegment_len_moves = c_idx2 - c_idx1;
+                long long new_subsegment_len_moves = GET_APSP_MOVES_BUFFER1.size() + GET_APSP_MOVES_BUFFER2.size();
+                // Specific length control for Op5
+                if (new_subsegment_len_moves > current_subsegment_len_moves && L_curr > MAX_L_PATH_HIGH_THRESHOLD_EFFECTIVE) {
+                     if (accept_dist_01(RND_ENGINE) < 0.75) continue;
+                }
+                if (new_subsegment_len_moves < current_subsegment_len_moves && L_curr < MIN_L_PATH_LOW_THRESHOLD_EFFECTIVE) {
+                     if (accept_dist_01(RND_ENGINE) < 0.75) continue;
+                }
+                if ( ( (long long)candidate_pd_obj.moves.size() - current_subsegment_len_moves + new_subsegment_len_moves) > MAX_L_PATH) continue;
+                if (c_idx1 < c_idx2) {
+                    candidate_pd_obj.moves.erase(candidate_pd_obj.moves.begin() + c_idx1,
+                                                candidate_pd_obj.moves.begin() + c_idx2);
+                }
+                candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + c_idx1,
+                                            GET_APSP_MOVES_BUFFER2.begin(), GET_APSP_MOVES_BUFFER2.end());
+                candidate_pd_obj.moves.insert(candidate_pd_obj.moves.begin() + c_idx1,
+                                            GET_APSP_MOVES_BUFFER1.begin(), GET_APSP_MOVES_BUFFER1.end());
+            }
+            modified_successfully = true;
+            break;
+        }
+        if (!modified_successfully) continue;
+        calculate_score_full(candidate_pd_obj);
+        bool candidate_is_invalid = candidate_pd_obj.score_val > 1e17L;
+        if (candidate_pd_obj.score_val < current_pd_obj.score_val) {
+            current_pd_obj = std::move(candidate_pd_obj);
+            if (current_pd_obj.score_val < best_pd_obj.score_val) {
+                best_pd_obj = current_pd_obj;
+            }
+        } else if (!candidate_is_invalid) {
+             auto now_time_temp = std::chrono::high_resolution_clock::now();
+             double elapsed_seconds_temp = std::chrono::duration<double>(now_time_temp - time_start_prog).count();
+             double progress_ratio = elapsed_seconds_temp / time_limit_seconds;
+             progress_ratio = std::min(1.0, std::max(0.0, progress_ratio));
+             double current_temp_val = end_temp;
+             if (start_temp > end_temp + 1e-9) {
+                current_temp_val = start_temp * std::pow(end_temp / start_temp, progress_ratio);
+             } else if (start_temp > 1e-9) {
+                current_temp_val = start_temp;
+             } else {
+                 current_temp_val = end_temp;
+             }
+             if (current_temp_val < 1e-9 && end_temp >= 1e-9) current_temp_val = end_temp;
+             else if (current_temp_val < 1e-9) current_temp_val = 1e-9;
+             if (exp((current_pd_obj.score_val - candidate_pd_obj.score_val) / current_temp_val) > accept_dist_01(RND_ENGINE)) {
+                 current_pd_obj = std::move(candidate_pd_obj);
+             }
+        }
+    }
+    for (char move_char : best_pd_obj.moves) std::cout << move_char;
+    std::cout << std::endl;
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale-bench-lite-problems/ahc039/config.yaml ADDED Viewed

	@@ -0,0 +1,77 @@

+# ALE-Bench ahc039 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n\nStory\n--------\n\
+    Takahashi is a skilled purse seine fisher.\nHis fishing boat is equipped with state-of-the-art sonar, allowing him to\
+    \ accurately determine the positions of fish within the fishing area.\nAdditionally, the boat is capable of high-speed\
+    \ movement, enabling him to assume that fish remain stationary while he sets up the fishing net.\n\nThe fishing method\
+    \ involves using the boat to deploy nets and form a closed polygon, capturing the fish within the enclosed area.\nTo optimize\
+    \ efficiency, each edge of the polygon formed by the nets must be aligned either parallel to the east-west or north-south\
+    \ direction.\nFurthermore, due to the limited length of the nets equipped on the boat, the polygon must be constructed\
+    \ within these constraints.\n\nThe fishing area contains two types of fish: mackerels and sardines.\nFor resource conservation\
+    \ reasons, sardines are currently prohibited from being caught in this fishing area.\nAny sardines caught in the net must\
+    \ be released back into the sea.\nBecause this process is labor-intensive, Takahashi should focus on maximizing the catch\
+    \ of mackerel while avoiding sardines as much as possible.\n\n\nProblem Statement\n--------\nThere are $N$ mackerels and\
+    \ $N$ sardines on a two-dimensional plane.\nConstruct a polygon that satisfies the following conditions and maximize the\
+    \ value obtained by subtracting the total number of sardines inside the polygon from the total number of mackerels inside\
+    \ it.\nNote that any points lying on the edges of the polygon are considered to be inside the polygon.\n\n### Conditions\n\
+    1. The number of vertices in the polygon must not exceed $1000$, and the total length of its edges must not exceed $4\
+    \ \\times 10^5$.\n2. The coordinates of each vertex $(x, y)$ must be integers satisfying $0 \\leq x, y \\leq 10^5$.\n\
+    3. Each edge of the polygon must be parallel to either the $x$-axis or the $y$-axis.\n4. The polygon must not self-intersect:\
+    \ non-adjacent edges must not share any points, and adjacent edges must only meet at their endpoints.\n\n\n\nScoring\n\
+    --------\nLet $a$ be the total number of mackerels inside the polygon and $b$ be the total number of sardines inside the\
+    \ polygon.\nThen, you will obtain the score of $\\max(0, a - b + 1)$.\n\nThere are $150$ test cases, and the score of\
+    \ a submission is the total score for each test case.\nIf your submission produces an illegal output or exceeds the time\
+    \ limit for some test cases, the submission itself will be judged as <span class='label label-warning' data-toggle='tooltip'\
+    \ data-placement='top' title=\"Wrong Answer\">WA</span> or <span class='label label-warning' data-toggle='tooltip' data-placement='top'\
+    \ title=\"Time Limit Exceeded\">TLE</span> , and the score of the submission will be zero.\nThe highest score obtained\
+    \ during the contest will determine the final ranking, and there will be no system test after the contest.\nIf more than\
+    \ one participant gets the same score, they will be ranked in the same place regardless of the submission time.\n\n\n\n\
+    Input\n--------\nInput is given from Standard Input in the following format:\n~~~\n$N$\n$x_0$ $y_0$\n$\\vdots$\n$x_{2N-1}$\
+    \ $y_{2N-1}$\n~~~\n\n- In all test cases, the number of mackerels and sardines, $N$, is fixed at $5000$.\n- For each $i\
+    \ = 0, 1, \\dots, N-1$, $(x_i, y_i)$ represents the coordinates of the $i$-th mackerel.\n- For each $i = 0, 1, \\dots,\
+    \ N-1$, $(x_{N+i}, y_{N+i})$ represents the coordinates of the $i$-th sardine.\n- Each coordinate $(x_i, y_i)$ satisfies\
+    \ $0 \\leq x_i, y_i \\leq 10^5$, and all coordinates are distinct.\n\n\nOutput\n--------\nLet the number of vertices in\
+    \ the polygon be $m$ ($4 \\leq m \\leq 1000$), and let $(a_i, b_i)$ denote the coordinates of the $i$-th vertex.\nThen,\
+    \ output to Standard Output in the following format:\n~~~\n$m$\n$a_0$ $b_0$\n$\\vdots$\n$a_{m-1}$ $b_{m-1}$\n~~~\n\nThe\
+    \ output vertices do not necessarily need to form the actual corners of the polygon.\nIn other words, three consecutive\
+    \ vertices $(a_i, b_i), (a_{i+1}, b_{i+1}), (a_{i+2}, b_{i+2})$ may lie on a straight line.\nHowever, all vertices must\
+    \ have distinct coordinates.\n\nThe vertices can be output in either clockwise or counterclockwise order.\n\n<a href=\"\
+    https://img.atcoder.jp/ahc039/KNtTkgAy.html?lang=en&seed=0&output=sample\">Show example</a>\n\n\nYour program may output\
+    \ multiple solutions.\nIf multiple solutions are output, only the last one is used for scoring.\nYou can compare multiple\
+    \ solutions using the web version of the visualizer.\n\n\n\n\n\nInput Generation\n--------\n- $\\mathrm{rand}(L, U)$:\
+    \ Generates a random integer uniformly distributed between $L$ and $U$ (inclusive).\n- $\\mathrm{rand\\\\_double}(L, U)$:\
+    \ Generates a random real number uniformly distributed between $L$ and $U$.\n- $\\mathrm{normal}(\\mu, \\sigma)$: Generates\
+    \ a random real number from a normal distribution with mean $\\mu$ and standard deviation $\\sigma$.\n\nFirst, generate\
+    \ the coordinates of mackerels.\nThe number of clusters $n$ is determined by generating $n = \\mathrm{rand}(10, 25)$.\n\
+    For each cluster $i$, generate the following parameters:\n\n- Weight $w_i = \\mathrm{rand\\\\_double}(0, 1)$\n- Center\
+    \ $(cx_i, cy_i) = (\\mathrm{rand}(20000, 80000), \\mathrm{rand}(20000, 80000))$\n- Standard deviation $\\sigma_i = \\\
+    mathrm{rand}(1000, 5000)$\n\nRepeat the following process $N$ times to generate the coordinates of $N$ mackerels:\n\n\
+    - Randomly select a cluster $i$ with probability proportional to its weight $w_i$.\n- Generate $x = \\mathrm{round}(\\\
+    mathrm{normal}(cx_i, \\sigma_i))$ and $y = \\mathrm{round}(\\mathrm{normal}(cy_i, \\sigma_i))$.\n- If the generated coordinates\
+    \ $(x, y)$ satisfy $0 \\leq x, y \\leq 10^5$ and are distinct from all previously generated coordinates, they are accepted\
+    \ as the coordinates of a mackerel. Otherwise, regenerate $(x, y)$.\n\nAfter generating the coordinates of mackerels,\
+    \ generate the coordinates of sardines in the same way.\n\n\n\nTools (Input generator and visualizer)\n--------\n- <a\
+    \ href=\"https://img.atcoder.jp/ahc039/KNtTkgAy.html?lang=en\">Web version</a>: This is more powerful than the local version\
+    \ providing animations.\n- <a href=\"https://img.atcoder.jp/ahc039/KNtTkgAy.zip\">Local version</a>: You need a compilation\
+    \ environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n  - <a href=\"https://img.atcoder.jp/ahc039/KNtTkgAy_windows.zip\"\
+    >Pre-compiled binary for Windows</a>: If you are not familiar with the Rust language environment, please use this instead.\n\
+    \nPlease be aware that sharing visualization results or discussing solutions/ideas during the contest is prohibited.\n\
+    \n\n    Problem constraints:\n    time_limit=2.0 memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc039/evaluator.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import traceback
+from pathlib import Path
+from ale_bench.result import CaseResult, JudgeResult, Result
+from ale_bench_eval.safe_ale_session import start_ale_bench_session
+import logging
+import sys
+logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
+def result_feedback(result: Result) -> CaseResult:
+    if result.overall_judge_result == JudgeResult.ACCEPTED:
+        return result.case_results[0]
+    else:
+        selected_case_idx = 0
+        for idx, case_result in enumerate(result.case_results):
+            if case_result.judge_result == result.overall_judge_result:
+                selected_case_idx = idx
+                break
+        return result.case_results[selected_case_idx]
+def evaluate(program_path):
+    problem_id = "ahc039"
+    logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
+    try:
+        session = None
+        logger.info("Starting ALE-Bench session")
+        session = start_ale_bench_session(
+            problem_id=problem_id,
+            lite_version=True,
+            num_workers=13,
+        )
+        logger.info("ALE-Bench session started")
+        if not session:
+            raise RuntimeError("Failed to start or restart the session.")
+        optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
+        code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
+        logger.info("Code extracted")
+        num_public_cases = 50
+        cases = session.case_gen(list(range(num_public_cases)))
+        public_result = session.case_eval(
+            cases, code, code_language="cpp20", skip_local_visualization=True
+        )
+        logger.info("Public evaluation completed")
+        extracted_case = result_feedback(public_result)
+        logger.info("Result feedback completed")
+        logger.info("ALE-Bench session closed")
+        combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
+        if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
+            combined_score = -sys.maxsize - 1
+        session.close()
+        return {
+            "judge_result": public_result.overall_judge_result.value,
+            "overall_score": public_result.overall_absolute_score,
+            "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
+            "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
+            "standard_error": extracted_case.error_str,
+            "message": extracted_case.message,
+            "combined_score": combined_score,
+        }
+    except Exception as e:
+        logger.error(f"Evaluation failed completely: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "overall_score": 0.0,
+            "error": str(e),
+        }

benchmarks/ale_bench/ale-bench-lite-problems/ahc046/config.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+# ALE-Bench ahc046 — AtCoder Heuristic Contest
+# Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
+language: cpp
+diff_based_generation: true
+max_iterations: 100
+checkpoint_interval: 10
+max_solution_length: 60000
+llm:
+  api_base: https://api.openai.com/v1
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  max_tokens: 32000
+  timeout: 600
+prompt:
+  system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
+    \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n\nProblem Statement\n\
+    --------\nThere is a skating rink consisting of $N \\times N$ squares.\nLet $(0, 0)$ be the coordinates of the top-left\
+    \ square, and $(i, j)$ be the coordinates of the square located $i$ squares down and $j$ squares to the right from there.\n\
+    All squares outside the $N \\times N$ area are covered with blocks and are impassable.\nInitially, there are no blocks\
+    \ inside the $N \\times N$ area.\n\nYou start at the initial position $(i_0, j_0)$ and must visit the specified target\
+    \ squares $(i_1, j_1), \\dots, (i_{M-1}, j_{M-1})$ in the given order.\n\nAt each turn, you may choose one of the four\
+    \ cardinal directions and perform one of the following actions:\n\n- **Move**: Move one square in the specified direction.\
+    \ You cannot move into a square containing a block.\n- **Slide**: Continue sliding in the specified direction until you\
+    \ hit a block.\n- **Alter**: Place a block on the adjacent square in the specified direction if it does not already contain\
+    \ one; otherwise, remove the existing block.\n  You may not specify a square outside the $N \\times N$ area.\n  It is\
+    \ also allowed to place a block on a current or future target square; however, you must remove the block in order to visit\
+    \ that square.\n\nIf you slide over a target square without stopping on it, it is **not** considered visited.\nA target\
+    \ square is considered visited only if you either stop on it after a **Slide**, or move onto it directly via a **Move**.\n\
+    \nYou must visit the target squares in the given order.\nEven if you pass over a later target square before visiting earlier\
+    \ ones, it is not considered visited at that time. You will need to visit it again when its turn in the sequence arrives.\n\
+    \nYou may perform at most $2NM$ actions.\nVisit all target squares in the specified order using as few turns as possible.\n\
+    \nScoring\n--------\nLet $T$ be the length of the output action sequence, and $m$ be the number of target squares successfully\
+    \ visited.\nThen, you will obtain the following score.\n\n- If $m<M-1$, $m+1$\n- If $m=M-1$, $M+2NM-T$\n\nThere are $150$\
+    \ test cases, and the score of a submission is the total score for each test case.\nIf your submission produces an illegal\
+    \ output or exceeds the time limit for some test cases, the submission itself will be judged as <span class='label label-warning'\
+    \ data-toggle='tooltip' data-placement='top' title=\"Wrong Answer\">WA</span> or <span class='label label-warning' data-toggle='tooltip'\
+    \ data-placement='top' title=\"Time Limit Exceeded\">TLE</span> , and the score of the submission will be zero.\nThe highest\
+    \ score obtained during the contest will determine the final ranking, and there will be no system test after the contest.\n\
+    If more than one participant gets the same score, they will be ranked in the same place regardless of the submission time.\n\
+    \n\nInput\n--------\nInput is given from Standard Input in the following format:\n\n~~~\n$N$ $M$\n$i_0$ $j_0$\n$\\vdots$\n\
+    $i_{M-1}$ $j_{M-1}$\n~~~\n\n- In all test cases, $N = 20$ and $M = 40$ are fixed.\n- The coordinates $(i_k, j_k)$ of the\
+    \ initial position and each target square are integers satisfying $0 \\leq i_k, j_k \\leq N-1$, and all coordinates are\
+    \ distinct.\n\n\nOutput\n--------\nAt each turn, represent the selected action and direction using a single uppercase\
+    \ alphabet letter as follows.\n\n**Actions**\n\n- Move: `M`\n- Slide: `S`\n- Alter: `A`\n\n**Directions**\n\n- Up: `U`\n\
+    - Down: `D`\n- Left: `L`\n- Right: `R`\n\nLet $a_t$ and $d_t$ denote the action and direction selected at turn $t$ ($t\
+    \ = 0, 1, \\dots, T-1$), respectively.  \nThen, output to Standard Output in the following format:\n~~~\n$a_0$ $d_0$\n\
+    $\\vdots$\n$a_{T-1}$ $d_{T-1}$\n~~~\n\n\n<a href=\"https://img.atcoder.jp/ahc046/EuNd3uow.html?lang=en&seed=0&output=sample\"\
+    >Show example</a>\n\n\n\nInput Generation\n--------\nThe initial position and the target squares are generated according\
+    \ to the following procedure.\n\nFirst, randomly shuffle the coordinates of all $N^2$ squares.\nThen, take the first $M$\
+    \ coordinates from the shuffled list and assign them sequentially as $(i_0, j_0), (i_1, j_1), \\dots, (i_{M-1}, j_{M-1})$.\n\
+    \n\nTools (Input generator and visualizer)\n--------\n- <a href=\"https://img.atcoder.jp/ahc046/EuNd3uow.html?lang=en\"\
+    >Web version</a>: This is more powerful than the local version providing animations and manual play.\n- <a href=\"https://img.atcoder.jp/ahc046/EuNd3uow.zip\"\
+    >Local version</a>: You need a compilation environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n\
+    \  - <a href=\"https://img.atcoder.jp/ahc046/EuNd3uow_windows.zip\">Pre-compiled binary for Windows</a>: If you are not\
+    \ familiar with the Rust language environment, please use this instead.\n\nPlease be aware that sharing visualization\
+    \ results or discussing solutions/ideas during the contest is prohibited.\n\n{sample example}\n\n\n    Problem constraints:\n\
+    time_limit=2.0 memory_limit=1073741824\n"
+evaluator:
+  timeout: 10000
+  cascade_evaluation: false

benchmarks/ale_bench/ale-bench-lite-problems/ahc046/evaluator.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import traceback
+from pathlib import Path
+from ale_bench.result import CaseResult, JudgeResult, Result
+from ale_bench_eval.safe_ale_session import start_ale_bench_session
+import logging
+import sys
+logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
+def result_feedback(result: Result) -> CaseResult:
+    if result.overall_judge_result == JudgeResult.ACCEPTED:
+        return result.case_results[0]
+    else:
+        selected_case_idx = 0
+        for idx, case_result in enumerate(result.case_results):
+            if case_result.judge_result == result.overall_judge_result:
+                selected_case_idx = idx
+                break
+        return result.case_results[selected_case_idx]
+def evaluate(program_path):
+    problem_id = "ahc046"
+    logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
+    try:
+        session = None
+        logger.info("Starting ALE-Bench session")
+        session = start_ale_bench_session(
+            problem_id=problem_id,
+            lite_version=True,
+            num_workers=13,
+        )
+        logger.info("ALE-Bench session started")
+        if not session:
+            raise RuntimeError("Failed to start or restart the session.")
+        optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
+        code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
+        logger.info("Code extracted")
+        num_public_cases = 50
+        cases = session.case_gen(list(range(num_public_cases)))
+        public_result = session.case_eval(
+            cases, code, code_language="cpp20", skip_local_visualization=True
+        )
+        logger.info("Public evaluation completed")
+        extracted_case = result_feedback(public_result)
+        logger.info("Result feedback completed")
+        logger.info("ALE-Bench session closed")
+        combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
+        if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
+            combined_score = -sys.maxsize - 1
+        session.close()
+        return {
+            "judge_result": public_result.overall_judge_result.value,
+            "overall_score": public_result.overall_absolute_score,
+            "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
+            "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
+            "standard_error": extracted_case.error_str,
+            "message": extracted_case.message,
+            "combined_score": combined_score,
+        }
+    except Exception as e:
+        logger.error(f"Evaluation failed completely: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "overall_score": 0.0,
+            "error": str(e),
+        }

benchmarks/ale_bench/ale-bench-lite-problems/ahc046/initial_program.cpp ADDED Viewed

	@@ -0,0 +1,897 @@

+# EVOLVE-BLOCK-START
+#include <iostream>
+#include <vector>
+#include <string>
+#include <queue>
+#include <algorithm>
+#include <tuple>
+#include <array>
+#include <chrono>
+#include <random>
+#include <cmath> // For std::exp, std::pow
+#include <numeric> // For std::iota
+// Constants
+const int N_GRID = 20;
+const int M_TARGETS_INPUT = 40;
+const int NUM_SEGMENTS = M_TARGETS_INPUT - 1;
+const int INF_COST = 1e9;
+const int MAX_TOTAL_TURNS = 2 * N_GRID * M_TARGETS_INPUT; // 2*20*40 = 1600
+// Randomness
+unsigned int RND_SEED = std::chrono::steady_clock::now().time_since_epoch().count();
+std::mt19937 rng(RND_SEED);
+// Coordinates
+struct Pos {
+    int r, c;
+    bool operator==(const Pos& other) const { return r == other.r && c == other.c; }
+    bool operator!=(const Pos& other) const { return !(*this == other); }
+    bool operator<(const Pos& other) const {
+        if (r != other.r) return r < other.r;
+        return c < other.c;
+    }
+};
+const Pos INVALID_POS = {-1, -1};
+// Grid state
+using Grid = std::array<std::array<bool, N_GRID>, N_GRID>; // true if block exists
+bool is_valid_pos(Pos p) {
+    return p.r >= 0 && p.r < N_GRID && p.c >= 0 && p.c < N_GRID;
+}
+bool is_blocked_pos(Pos p, const Grid& grid) {
+    if (!is_valid_pos(p)) return true;
+    return grid[p.r][p.c];
+}
+void toggle_block_pos(Pos p, Grid& grid) {
+    if (is_valid_pos(p)) {
+        grid[p.r][p.c] = !grid[p.r][p.c];
+    }
+}
+// Directions
+const int DR[] = {-1, 1, 0, 0}; // U, D, L, R
+const int DC[] = {0, 0, -1, 1};
+const char DIR_CHARS[] = {'U', 'D', 'L', 'R'};
+const int DIR_REV_IDX[] = {1, 0, 3, 2}; // U(0)<->D(1), L(2)<->R(3)
+// Global BFS structures for optimization
+unsigned int g_bfs_generation_id = 0;
+std::array<std::array<unsigned int, N_GRID>, N_GRID> g_bfs_cell_last_visited_generation;
+std::array<std::array<int, N_GRID>, N_GRID> g_bfs_cell_dist;
+std::string reconstruct_path_from_came_from(Pos start_pos, Pos dest_pos,
+    const std::array<std::array<std::pair<Pos, std::pair<char, char>>, N_GRID>, N_GRID>& came_from_data) {
+    std::string path_actions_str_reversed = ""; Pos p_trace = dest_pos;
+    while(p_trace != start_pos && is_valid_pos(p_trace)) {
+        auto const& action_info = came_from_data[p_trace.r][p_trace.c];
+        path_actions_str_reversed += action_info.second.second;
+        path_actions_str_reversed += action_info.second.first;
+        p_trace = action_info.first;
+    }
+    std::reverse(path_actions_str_reversed.begin(), path_actions_str_reversed.end());
+    return path_actions_str_reversed;
+}
+struct BFSResult { int cost; std::string actions_str; };
+BFSResult bfs(Pos start_pos, Pos dest_pos, const Grid& grid, Pos intermediate_target_to_avoid, bool avoid_intermediate_target, bool build_action_str) {
+    g_bfs_generation_id++;
+    std::array<std::array<std::pair<Pos, std::pair<char, char>>, N_GRID>, N_GRID> came_from_local;
+    std::queue<Pos> q;
+    if (!is_valid_pos(start_pos)) return {INF_COST, ""};
+    if (avoid_intermediate_target && start_pos == intermediate_target_to_avoid && start_pos != dest_pos) {
+        return {INF_COST, ""};
+    }
+    g_bfs_cell_last_visited_generation[start_pos.r][start_pos.c] = g_bfs_generation_id;
+    g_bfs_cell_dist[start_pos.r][start_pos.c] = 0;
+    q.push(start_pos);
+    int min_dist_to_dest = INF_COST;
+    if (start_pos == dest_pos) min_dist_to_dest = 0;
+    while(!q.empty()){
+        Pos curr = q.front();
+        q.pop();
+        int d = g_bfs_cell_dist[curr.r][curr.c];
+        if (curr == dest_pos) {
+             min_dist_to_dest = std::min(min_dist_to_dest, d);
+        }
+        if (min_dist_to_dest != INF_COST && d >= min_dist_to_dest && curr != dest_pos) continue;
+        if (d + 1 > N_GRID * N_GRID) continue;
+        for (int i = 0; i < 4; ++i) { // Moves
+            Pos next_p = {curr.r + DR[i], curr.c + DC[i]};
+            if (is_blocked_pos(next_p, grid)) continue;
+            if (avoid_intermediate_target && next_p == intermediate_target_to_avoid && next_p != dest_pos) continue;
+            bool visited_in_current_bfs = (g_bfs_cell_last_visited_generation[next_p.r][next_p.c] == g_bfs_generation_id);
+            if (!visited_in_current_bfs || g_bfs_cell_dist[next_p.r][next_p.c] > d + 1) {
+                g_bfs_cell_last_visited_generation[next_p.r][next_p.c] = g_bfs_generation_id;
+                g_bfs_cell_dist[next_p.r][next_p.c] = d + 1;
+                if (build_action_str) came_from_local[next_p.r][next_p.c] = {curr, {'M', DIR_CHARS[i]}};
+                q.push(next_p);
+            }
+        }
+        for (int i = 0; i < 4; ++i) { // Slides
+            Pos current_slide_p = curr; Pos landed_at_p = curr;
+            while (true) {
+                Pos next_tile_in_slide = {current_slide_p.r + DR[i], current_slide_p.c + DC[i]};
+                if (is_blocked_pos(next_tile_in_slide, grid)) { landed_at_p = current_slide_p; break; }
+                if (avoid_intermediate_target && next_tile_in_slide == intermediate_target_to_avoid && next_tile_in_slide != dest_pos) {
+                     landed_at_p = curr;
+                     break;
+                }
+                current_slide_p = next_tile_in_slide;
+            }
+            if (landed_at_p == curr) continue;
+            Pos next_p = landed_at_p;
+            bool visited_in_current_bfs = (g_bfs_cell_last_visited_generation[next_p.r][next_p.c] == g_bfs_generation_id);
+            if (!visited_in_current_bfs || g_bfs_cell_dist[next_p.r][next_p.c] > d + 1) {
+                g_bfs_cell_last_visited_generation[next_p.r][next_p.c] = g_bfs_generation_id;
+                g_bfs_cell_dist[next_p.r][next_p.c] = d + 1;
+                if (build_action_str) came_from_local[next_p.r][next_p.c] = {curr, {'S', DIR_CHARS[i]}};
+                q.push(next_p);
+            }
+        }
+    }
+    BFSResult res = {INF_COST, ""};
+    if (is_valid_pos(dest_pos) && g_bfs_cell_last_visited_generation[dest_pos.r][dest_pos.c] == g_bfs_generation_id) {
+        res.cost = g_bfs_cell_dist[dest_pos.r][dest_pos.c];
+        if (build_action_str && res.cost != INF_COST) {
+            res.actions_str = reconstruct_path_from_came_from(start_pos, dest_pos, came_from_local);
+        }
+    }
+    return res;
+}
+void bfs_all(Pos start_pos, const Grid& grid,
+             Pos intermediate_target_to_avoid, bool strictly_avoid_intermediate,
+             std::array<std::array<int, N_GRID>, N_GRID>& dist_out,
+             std::array<std::array<std::pair<Pos, std::pair<char, char>>, N_GRID>, N_GRID>& came_from_out,
+             bool store_came_from) {
+    g_bfs_generation_id++;
+    std::queue<Pos> q;
+    for (int r_idx=0; r_idx<N_GRID; ++r_idx) std::fill(dist_out[r_idx].begin(), dist_out[r_idx].end(), INF_COST);
+    if (!is_valid_pos(start_pos)) return;
+    if (strictly_avoid_intermediate && start_pos == intermediate_target_to_avoid) {
+        return;
+    }
+    g_bfs_cell_last_visited_generation[start_pos.r][start_pos.c] = g_bfs_generation_id;
+    g_bfs_cell_dist[start_pos.r][start_pos.c] = 0;
+    q.push(start_pos);
+    while(!q.empty()){
+        Pos curr = q.front();
+        q.pop();
+        int d = g_bfs_cell_dist[curr.r][curr.c];
+        if (d + 1 > N_GRID * N_GRID) continue;
+        for (int i = 0; i < 4; ++i) { // Moves
+            Pos next_p = {curr.r + DR[i], curr.c + DC[i]};
+            if (is_blocked_pos(next_p, grid)) continue;
+            if (strictly_avoid_intermediate && next_p == intermediate_target_to_avoid) continue;
+            bool visited_in_current_bfs = (g_bfs_cell_last_visited_generation[next_p.r][next_p.c] == g_bfs_generation_id);
+            if (!visited_in_current_bfs || g_bfs_cell_dist[next_p.r][next_p.c] > d + 1) {
+                g_bfs_cell_last_visited_generation[next_p.r][next_p.c] = g_bfs_generation_id;
+                g_bfs_cell_dist[next_p.r][next_p.c] = d + 1;
+                if (store_came_from) came_from_out[next_p.r][next_p.c] = {curr, {'M', DIR_CHARS[i]}};
+                q.push(next_p);
+            }
+        }
+        for (int i = 0; i < 4; ++i) { // Slides
+            Pos current_slide_p = curr;
+            Pos landed_at_p = curr;
+            while (true) {
+                Pos next_tile_in_slide = {current_slide_p.r + DR[i], current_slide_p.c + DC[i]};
+                if (is_blocked_pos(next_tile_in_slide, grid)) {
+                    landed_at_p = current_slide_p;
+                    break;
+                }
+                if (strictly_avoid_intermediate && next_tile_in_slide == intermediate_target_to_avoid) {
+                     landed_at_p = curr;
+                     break;
+                }
+                current_slide_p = next_tile_in_slide;
+            }
+            if (landed_at_p == curr) continue;
+            Pos next_p = landed_at_p;
+            bool visited_in_current_bfs = (g_bfs_cell_last_visited_generation[next_p.r][next_p.c] == g_bfs_generation_id);
+            if (!visited_in_current_bfs || g_bfs_cell_dist[next_p.r][next_p.c] > d + 1) {
+                g_bfs_cell_last_visited_generation[next_p.r][next_p.c] = g_bfs_generation_id;
+                g_bfs_cell_dist[next_p.r][next_p.c] = d + 1;
+                if (store_came_from) came_from_out[next_p.r][next_p.c] = {curr, {'S', DIR_CHARS[i]}};
+                q.push(next_p);
+            }
+        }
+    }
+    for (int r_idx = 0; r_idx < N_GRID; ++r_idx) {
+        for (int c_idx = 0; c_idx < N_GRID; ++c_idx) {
+            if (g_bfs_cell_last_visited_generation[r_idx][c_idx] == g_bfs_generation_id) {
+                dist_out[r_idx][c_idx] = g_bfs_cell_dist[r_idx][c_idx];
+            }
+        }
+    }
+}
+Pos G_initial_pos;
+std::vector<Pos> G_targets_vec;
+struct SegmentExecResult { int turns = INF_COST; std::string actions_str; };
+bool apply_direct_path_strat(Pos cur_P, Pos target_P, const Grid& g, SegmentExecResult& res, bool build_action_str) {
+    if (is_blocked_pos(target_P, g)) return false;
+    BFSResult bfs_res = bfs(cur_P, target_P, g, INVALID_POS, false, build_action_str);
+    if (bfs_res.cost == INF_COST) return false;
+    res.turns = bfs_res.cost;
+    if(build_action_str) res.actions_str = bfs_res.actions_str; else res.actions_str.clear();
+    return true;
+}
+bool apply_unblock_and_go_strat(Pos cur_P, Pos target_P, Grid& g , SegmentExecResult& res, bool build_action_str) {
+    if (!is_blocked_pos(target_P, g)) return false;
+    std::array<std::array<int, N_GRID>, N_GRID> dist_from_cur_P;
+    std::array<std::array<std::pair<Pos, std::pair<char, char>>, N_GRID>, N_GRID> came_from_data;
+    bfs_all(cur_P, g, target_P, true, dist_from_cur_P, came_from_data, build_action_str);
+    Pos best_adj_P = INVALID_POS;
+    int cost_to_best_adj_P = INF_COST;
+    char alter_dir_char_to_unblock = ' ';
+    for (int i=0; i<4; ++i) {
+        Pos adj_P = {target_P.r + DR[i], target_P.c + DC[i]};
+        if (!is_valid_pos(adj_P) || is_blocked_pos(adj_P, g)) continue;
+        if (dist_from_cur_P[adj_P.r][adj_P.c] < cost_to_best_adj_P) {
+            cost_to_best_adj_P = dist_from_cur_P[adj_P.r][adj_P.c];
+            best_adj_P = adj_P;
+            alter_dir_char_to_unblock = DIR_CHARS[DIR_REV_IDX[i]];
+        }
+    }
+    if (best_adj_P == INVALID_POS || cost_to_best_adj_P == INF_COST) return false;
+    res.turns = cost_to_best_adj_P + 1 + 1;
+    if (build_action_str) {
+        res.actions_str = reconstruct_path_from_came_from(cur_P, best_adj_P, came_from_data);
+        res.actions_str += 'A'; res.actions_str += alter_dir_char_to_unblock;
+        res.actions_str += 'M'; res.actions_str += alter_dir_char_to_unblock;
+    } else {
+        res.actions_str.clear();
+    }
+    toggle_block_pos(target_P, g);
+    return true;
+}
+bool apply_slide_strat(Pos cur_P, Pos target_P, Grid& g , SegmentExecResult& res, int slide_dir_idx, int type, bool build_action_str) {
+    if (is_blocked_pos(target_P, g)) return false;
+    int slide_dr = DR[slide_dir_idx], slide_dc = DC[slide_dir_idx];
+    char slide_dir_char = DIR_CHARS[slide_dir_idx];
+    Pos slide_start_P = {target_P.r - slide_dr, target_P.c - slide_dc};
+    Pos block_at_P = {target_P.r + slide_dr, target_P.c + slide_dc};
+    if (!is_valid_pos(slide_start_P)) return false;
+    if (slide_start_P == target_P) return false;
+    if (type == 0) {
+        bool wall_exists_for_slide = !is_valid_pos(block_at_P) || is_blocked_pos(block_at_P, g);
+        if (!wall_exists_for_slide) return false;
+        BFSResult path_to_slide_start_P = bfs(cur_P, slide_start_P, g,
+                                              target_P, true, build_action_str);
+        if (path_to_slide_start_P.cost == INF_COST) return false;
+        res.turns = path_to_slide_start_P.cost + 1;
+        if (build_action_str) {
+            res.actions_str = path_to_slide_start_P.actions_str;
+            res.actions_str += 'S'; res.actions_str += slide_dir_char;
+        } else {
+            res.actions_str.clear();
+        }
+        return true;
+    } else if (type == 1) {
+        if (!is_valid_pos(block_at_P)) return false;
+        if (is_blocked_pos(block_at_P, g)) return false;
+        BFSResult path_cur_to_target_P = bfs(cur_P, target_P, g, INVALID_POS, false, build_action_str);
+        if (path_cur_to_target_P.cost == INF_COST) return false;
+        Grid g_after_alter = g;
+        toggle_block_pos(block_at_P, g_after_alter);
+        char alter_dir_char_for_block = DIR_CHARS[slide_dir_idx];
+        BFSResult path_target_to_slide_start_P = bfs(target_P, slide_start_P, g_after_alter,
+                                                     target_P, true, build_action_str);
+        if (path_target_to_slide_start_P.cost == INF_COST) return false;
+        res.turns = path_cur_to_target_P.cost + 1 + path_target_to_slide_start_P.cost + 1;
+        if (build_action_str) {
+            res.actions_str = path_cur_to_target_P.actions_str;
+            res.actions_str += 'A'; res.actions_str += alter_dir_char_for_block;
+            res.actions_str += path_target_to_slide_start_P.actions_str;
+            res.actions_str += 'S'; res.actions_str += slide_dir_char;
+        } else {
+            res.actions_str.clear();
+        }
+        g = g_after_alter;
+        return true;
+    }
+    return false;
+}
+const int NUM_BASE_STRATEGIES_DIRECT = 1;
+const int NUM_BASE_STRATEGIES_UNBLOCK = 1;
+const int NUM_BASE_STRATEGIES_SLIDE_TYPE0 = 4;
+const int NUM_BASE_STRATEGIES_SLIDE_TYPE1 = 4;
+const int NUM_BASE_STRATEGIES = NUM_BASE_STRATEGIES_DIRECT + NUM_BASE_STRATEGIES_UNBLOCK +
+                                NUM_BASE_STRATEGIES_SLIDE_TYPE0 + NUM_BASE_STRATEGIES_SLIDE_TYPE1; // 1+1+4+4 = 10
+bool apply_base_strategy_internal(int base_code, Pos cur_P, Pos target_P, Grid& g, SegmentExecResult& res, bool build_action_str) {
+    if (base_code == 0) return apply_direct_path_strat(cur_P, target_P, g, res, build_action_str);
+    if (base_code == 1) return apply_unblock_and_go_strat(cur_P, target_P, g, res, build_action_str);
+    int type = -1, dir_idx = -1;
+    if (base_code >= 2 && base_code < 2 + NUM_BASE_STRATEGIES_SLIDE_TYPE0) {
+        type = 0; dir_idx = base_code - 2;
+    }
+    else if (base_code >= 2 + NUM_BASE_STRATEGIES_SLIDE_TYPE0 &&
+             base_code < 2 + NUM_BASE_STRATEGIES_SLIDE_TYPE0 + NUM_BASE_STRATEGIES_SLIDE_TYPE1) {
+        type = 1; dir_idx = base_code - (2 + NUM_BASE_STRATEGIES_SLIDE_TYPE0);
+    }
+    else return false;
+    return apply_slide_strat(cur_P, target_P, g, res, dir_idx, type, build_action_str);
+}
+const int NUM_POST_ALTER_OPTIONS_NONE = 1;
+const int NUM_POST_ALTER_OPTIONS_ADJACENT = 4;
+const int NUM_POST_ALTER_OPTIONS_MOVE_PLUS_ALTER = 12;
+const int NUM_POST_ALTER_OPTIONS_CUMULATIVE_NONE = NUM_POST_ALTER_OPTIONS_NONE;
+const int NUM_POST_ALTER_OPTIONS_CUMULATIVE_ADJACENT = NUM_POST_ALTER_OPTIONS_CUMULATIVE_NONE + NUM_POST_ALTER_OPTIONS_ADJACENT;
+const int NUM_POST_ALTER_OPTIONS = NUM_POST_ALTER_OPTIONS_CUMULATIVE_ADJACENT + NUM_POST_ALTER_OPTIONS_MOVE_PLUS_ALTER;
+const int TOTAL_STRATEGIES_PER_SEGMENT = NUM_BASE_STRATEGIES * NUM_POST_ALTER_OPTIONS; // 10 * 17 = 170
+const int GREEDY_REOPTIMIZE_SUBSET_SIZE = 40;
+bool apply_combined_strategy(int combined_code, Pos& player_pos_ref ,
+                             Pos segment_target_P, Grid& g ,
+                             SegmentExecResult& res , bool build_action_str) {
+    res.turns = 0;
+    res.actions_str.clear();
+    int base_strategy_code = combined_code % NUM_BASE_STRATEGIES;
+    int post_alter_option_code = combined_code / NUM_BASE_STRATEGIES;
+    Pos player_original_pos_at_segment_start = player_pos_ref;
+    Grid g_original_at_segment_start = g;
+    bool base_success = apply_base_strategy_internal(base_strategy_code, player_original_pos_at_segment_start, segment_target_P, g, res, build_action_str);
+    if (!base_success) {
+        g = g_original_at_segment_start;
+        return false;
+    }
+    Pos player_pos_after_base = segment_target_P;
+    if (post_alter_option_code == 0) {
+        // No action
+    } else if (post_alter_option_code < NUM_POST_ALTER_OPTIONS_CUMULATIVE_ADJACENT) {
+        int alter_dir_idx = post_alter_option_code - NUM_POST_ALTER_OPTIONS_CUMULATIVE_NONE;
+        Pos alter_on_P = {player_pos_after_base.r + DR[alter_dir_idx], player_pos_after_base.c + DC[alter_dir_idx]};
+        if (!is_valid_pos(alter_on_P)) {
+            g = g_original_at_segment_start;
+            return false;
+        }
+        res.turns++;
+        if (build_action_str) {
+            res.actions_str += 'A';
+            res.actions_str += DIR_CHARS[alter_dir_idx];
+        }
+        toggle_block_pos(alter_on_P, g);
+    } else {
+        int offset_code = post_alter_option_code - NUM_POST_ALTER_OPTIONS_CUMULATIVE_ADJACENT;
+        int D1_idx_move = offset_code / 3;
+        int D2_choice_idx_alter = offset_code % 3;
+        int D2_idx_alter = -1;
+        int current_choice_count = 0;
+        for (int d_candidate = 0; d_candidate < 4; ++d_candidate) {
+            if (d_candidate == DIR_REV_IDX[D1_idx_move]) continue;
+            if (current_choice_count == D2_choice_idx_alter) {
+                D2_idx_alter = d_candidate;
+                break;
+            }
+            current_choice_count++;
+        }
+        Pos S1_moved_pos = {player_pos_after_base.r + DR[D1_idx_move], player_pos_after_base.c + DC[D1_idx_move]};
+        if (!is_valid_pos(S1_moved_pos) || is_blocked_pos(S1_moved_pos, g)) {
+            g = g_original_at_segment_start;
+            return false;
+        }
+        Pos S2_target_of_alter = {S1_moved_pos.r + DR[D2_idx_alter], S1_moved_pos.c + DC[D2_idx_alter]};
+        if (!is_valid_pos(S2_target_of_alter)) {
+            g = g_original_at_segment_start;
+            return false;
+        }
+        res.turns += 2;
+        if (build_action_str) {
+            res.actions_str += 'M'; res.actions_str += DIR_CHARS[D1_idx_move];
+            res.actions_str += 'A'; res.actions_str += DIR_CHARS[D2_idx_alter];
+        }
+        toggle_block_pos(S2_target_of_alter, g);
+        player_pos_after_base = S1_moved_pos;
+    }
+    player_pos_ref = player_pos_after_base;
+    return true;
+}
+struct PathCacheEntry { Pos player_pos_before_segment; Grid grid_before_segment; int turns_before_segment; };
+struct FullEvalResult { int total_turns; std::string actions_log; bool possible; };
+FullEvalResult evaluate_choices(const std::vector<int>& choices, Pos initial_P, const std::vector<Pos>& targets,
+                                bool build_action_str, int k_eval_start_idx,
+                                const std::vector<PathCacheEntry>* reference_path_cache,
+                                std::vector<PathCacheEntry>* path_cache_for_new_state) {
+    Grid current_grid_sim; Pos player_pos_sim; int total_turns_sim = 0;
+    std::string total_actions_log_sim_segments_builder = "";
+    if (k_eval_start_idx == 0 || reference_path_cache == nullptr || reference_path_cache->empty() || (NUM_SEGMENTS > 0 && k_eval_start_idx >= static_cast<int>(reference_path_cache->size())) ) {
+        for(int r=0; r<N_GRID; ++r) for(int c=0; c<N_GRID; ++c) current_grid_sim[r][c] = false;
+        player_pos_sim = initial_P;
+        total_turns_sim = 0;
+        if (k_eval_start_idx != 0 && NUM_SEGMENTS > 0) k_eval_start_idx = 0;
+    } else {
+        const PathCacheEntry& prev_entry = (*reference_path_cache)[k_eval_start_idx];
+        current_grid_sim = prev_entry.grid_before_segment;
+        player_pos_sim = prev_entry.player_pos_before_segment;
+        total_turns_sim = prev_entry.turns_before_segment;
+        if (total_turns_sim == INF_COST) {
+             return {INF_COST, "", false};
+        }
+    }
+    if (path_cache_for_new_state != nullptr && k_eval_start_idx > 0 && reference_path_cache != nullptr && !reference_path_cache->empty() &&
+        static_cast<int>(path_cache_for_new_state->size()) >= k_eval_start_idx && static_cast<int>(reference_path_cache->size()) >= k_eval_start_idx) {
+        std::copy(reference_path_cache->begin(), reference_path_cache->begin() + k_eval_start_idx, path_cache_for_new_state->begin());
+    }
+    for (int seg_idx = k_eval_start_idx; seg_idx < NUM_SEGMENTS; ++seg_idx) {
+        if (path_cache_for_new_state != nullptr && !path_cache_for_new_state->empty() && static_cast<int>(path_cache_for_new_state->size()) > seg_idx) {
+            (*path_cache_for_new_state)[seg_idx].player_pos_before_segment = player_pos_sim;
+            (*path_cache_for_new_state)[seg_idx].grid_before_segment = current_grid_sim;
+            (*path_cache_for_new_state)[seg_idx].turns_before_segment = total_turns_sim;
+        }
+        Pos target_P_for_segment = targets[seg_idx];
+        SegmentExecResult segment_res;
+        bool success = apply_combined_strategy(choices[seg_idx], player_pos_sim, target_P_for_segment, current_grid_sim, segment_res, build_action_str);
+        if (!success || segment_res.turns == INF_COST || total_turns_sim + segment_res.turns > MAX_TOTAL_TURNS) {
+             if (path_cache_for_new_state != nullptr && !path_cache_for_new_state->empty()) {
+                 for(int fill_inf_idx = seg_idx; fill_inf_idx < NUM_SEGMENTS; ++fill_inf_idx) {
+                    if (static_cast<int>(path_cache_for_new_state->size()) > fill_inf_idx)
+                        (*path_cache_for_new_state)[fill_inf_idx].turns_before_segment = INF_COST;
+                 }
+             }
+            return {INF_COST, "", false};
+        }
+        if (build_action_str) total_actions_log_sim_segments_builder += segment_res.actions_str;
+        total_turns_sim += segment_res.turns;
+    }
+    return {total_turns_sim, total_actions_log_sim_segments_builder, true};
+}
+auto time_start = std::chrono::steady_clock::now();
+double get_elapsed_time_ms() { return std::chrono::duration<double, std::milli>(std::chrono::steady_clock::now() - time_start).count(); }
+const double TIME_LIMIT_MS = 1950.0;
+enum class NeighborhoodOpType {
+    RANDOM_MULTI_SEGMENT,
+    FINE_TWEAK_SINGLE_SEGMENT,
+    GREEDY_REOPTIMIZE_SINGLE_SEGMENT
+};
+int main() {
+    std::ios_base::sync_with_stdio(false); std::cin.tie(NULL);
+    int N_in_dummy, M_in_dummy; std::cin >> N_in_dummy >> M_in_dummy;
+    std::cin >> G_initial_pos.r >> G_initial_pos.c;
+    if (NUM_SEGMENTS > 0) {
+        G_targets_vec.resize(NUM_SEGMENTS);
+        for (int k=0; k < NUM_SEGMENTS; ++k) std::cin >> G_targets_vec[k].r >> G_targets_vec[k].c;
+    }
+    std::vector<int> current_sa_choices(NUM_SEGMENTS > 0 ? NUM_SEGMENTS : 0);
+    std::vector<int> best_sa_choices(NUM_SEGMENTS > 0 ? NUM_SEGMENTS : 0);
+    std::vector<PathCacheEntry> current_path_cache(NUM_SEGMENTS > 0 ? NUM_SEGMENTS : 0);
+    std::vector<PathCacheEntry> neighbor_path_cache(NUM_SEGMENTS > 0 ? NUM_SEGMENTS : 0);
+    int current_total_turns = INF_COST;
+    int best_total_turns = INF_COST;
+    std::string best_actions_log_str = "";
+    bool best_is_from_sa = false;
+    int initial_greedy_score_turns = INF_COST;
+    if (NUM_SEGMENTS == 0) {
+        // No actions
+    } else {
+        Grid greedy_grid_sim_build;
+        for(int r=0; r<N_GRID; ++r) for(int c=0; c<N_GRID; ++c) greedy_grid_sim_build[r][c] = false;
+        Pos player_pos_sim_build = G_initial_pos;
+        std::string greedy_actions_log_build_temp = "";
+        int greedy_total_turns_build_temp = 0;
+        bool possible_greedy = true;
+        for (int k = 0; k < NUM_SEGMENTS; ++k) {
+            current_path_cache[k].player_pos_before_segment = player_pos_sim_build;
+            current_path_cache[k].grid_before_segment = greedy_grid_sim_build;
+            current_path_cache[k].turns_before_segment = greedy_total_turns_build_temp;
+            Pos target_P_k = G_targets_vec[k];
+            int current_best_strategy_code_for_k = -1;
+            int current_min_turns_for_segment_k = INF_COST;
+            for (int code = 0; code < TOTAL_STRATEGIES_PER_SEGMENT; ++code) {
+                SegmentExecResult temp_segment_res_eval;
+                Grid temp_grid_eval = greedy_grid_sim_build;
+                Pos temp_player_pos_eval = player_pos_sim_build;
+                bool success = apply_combined_strategy(code, temp_player_pos_eval, target_P_k, temp_grid_eval, temp_segment_res_eval, false);
+                if (success && temp_segment_res_eval.turns < current_min_turns_for_segment_k) {
+                    current_min_turns_for_segment_k = temp_segment_res_eval.turns;
+                    current_best_strategy_code_for_k = code;
+                }
+            }
+            if (current_best_strategy_code_for_k == -1 || greedy_total_turns_build_temp + current_min_turns_for_segment_k > MAX_TOTAL_TURNS) {
+                possible_greedy = false; break;
+            }
+            current_sa_choices[k] = current_best_strategy_code_for_k;
+            SegmentExecResult final_segment_res_for_k_build;
+            apply_combined_strategy(current_best_strategy_code_for_k,
+                                    player_pos_sim_build,
+                                    target_P_k,
+                                    greedy_grid_sim_build,
+                                    final_segment_res_for_k_build,
+                                    true);
+            greedy_actions_log_build_temp += final_segment_res_for_k_build.actions_str;
+            greedy_total_turns_build_temp += final_segment_res_for_k_build.turns;
+        }
+        if(possible_greedy) {
+            current_total_turns = greedy_total_turns_build_temp;
+            best_total_turns = greedy_total_turns_build_temp;
+            initial_greedy_score_turns = greedy_total_turns_build_temp;
+            best_sa_choices = current_sa_choices;
+            best_actions_log_str = greedy_actions_log_build_temp;
+        } else {
+            Grid fallback_grid_sim; for(int r=0; r<N_GRID; ++r) for(int c=0; c<N_GRID; ++c) fallback_grid_sim[r][c] = false;
+            Pos fallback_player_pos = G_initial_pos;
+            int fallback_total_turns = 0;
+            for(int k_fallback=0; k_fallback<NUM_SEGMENTS; ++k_fallback) {
+                current_path_cache[k_fallback].player_pos_before_segment = fallback_player_pos;
+                current_path_cache[k_fallback].grid_before_segment = fallback_grid_sim;
+                current_path_cache[k_fallback].turns_before_segment = fallback_total_turns;
+                Pos target_P_k_fallback = G_targets_vec[k_fallback];
+                int chosen_code_fallback = -1;
+                SegmentExecResult res_simple_direct, res_simple_unblock;
+                Grid temp_grid_direct = fallback_grid_sim; Pos temp_pos_direct = fallback_player_pos;
+                bool success_direct = apply_combined_strategy(0, temp_pos_direct, target_P_k_fallback, temp_grid_direct, res_simple_direct, false);
+                Grid temp_grid_unblock = fallback_grid_sim; Pos temp_pos_unblock = fallback_player_pos;
+                bool success_unblock = apply_combined_strategy(1, temp_pos_unblock, target_P_k_fallback, temp_grid_unblock, res_simple_unblock, false);
+                if (success_direct && (!success_unblock || res_simple_direct.turns <= res_simple_unblock.turns)) {
+                    chosen_code_fallback = 0;
+                } else if (success_unblock) {
+                    chosen_code_fallback = 1;
+                } else {
+                    chosen_code_fallback = std::uniform_int_distribution<>(0, TOTAL_STRATEGIES_PER_SEGMENT - 1)(rng);
+                }
+                current_sa_choices[k_fallback] = chosen_code_fallback;
+                SegmentExecResult temp_res_chosen_fallback;
+                bool success_chosen_fb = apply_combined_strategy(chosen_code_fallback, fallback_player_pos, target_P_k_fallback, fallback_grid_sim, temp_res_chosen_fallback, false);
+                if (!success_chosen_fb || fallback_total_turns + temp_res_chosen_fallback.turns > MAX_TOTAL_TURNS) {
+                    for(int fill_idx = k_fallback; fill_idx < NUM_SEGMENTS; ++fill_idx) {
+                        if (static_cast<int>(current_path_cache.size()) > fill_idx)
+                            current_path_cache[fill_idx].turns_before_segment = INF_COST;
+                    }
+                    break;
+                }
+                fallback_total_turns += temp_res_chosen_fallback.turns;
+            }
+            FullEvalResult fallback_eval = evaluate_choices(current_sa_choices, G_initial_pos, G_targets_vec, false, 0, nullptr, &current_path_cache);
+            if (fallback_eval.possible) {
+                current_total_turns = fallback_eval.total_turns;
+                if (current_total_turns < best_total_turns) {
+                    best_total_turns = current_total_turns;
+                    best_sa_choices = current_sa_choices;
+                    best_is_from_sa = true;
+                }
+            } else { current_total_turns = INF_COST; }
+            if (current_total_turns == INF_COST) {
+                for(int k_rand_init=0; k_rand_init<NUM_SEGMENTS; ++k_rand_init) {
+                     current_sa_choices[k_rand_init] = std::uniform_int_distribution<>(0, TOTAL_STRATEGIES_PER_SEGMENT - 1)(rng);
+                }
+                FullEvalResult random_init_eval = evaluate_choices(current_sa_choices, G_initial_pos, G_targets_vec, false, 0, nullptr, &current_path_cache);
+                if (random_init_eval.possible) {
+                    current_total_turns = random_init_eval.total_turns;
+                    if (current_total_turns < best_total_turns) {
+                         best_total_turns = current_total_turns;
+                         best_sa_choices = current_sa_choices;
+                         best_is_from_sa = true;
+                    }
+                }
+            }
+        }
+        double T_param_start = 20.0, T_param_end = 0.01;
+        std::vector<int> segment_indices_for_shuffle(NUM_SEGMENTS);
+        if (NUM_SEGMENTS > 0) std::iota(segment_indices_for_shuffle.begin(), segment_indices_for_shuffle.end(), 0);
+        int iterations_stuck_at_inf = 0;
+        const int MAX_STUCK_ITERATIONS_FOR_RANDOM_RESTART = 50;
+        while (get_elapsed_time_ms() < TIME_LIMIT_MS) {
+            if (current_total_turns == INF_COST) {
+                iterations_stuck_at_inf++;
+                if (iterations_stuck_at_inf > MAX_STUCK_ITERATIONS_FOR_RANDOM_RESTART) {
+                    iterations_stuck_at_inf = 0;
+                    for(int k_rand_init=0; k_rand_init<NUM_SEGMENTS; ++k_rand_init) {
+                        current_sa_choices[k_rand_init] = std::uniform_int_distribution<>(0, TOTAL_STRATEGIES_PER_SEGMENT - 1)(rng);
+                    }
+                    FullEvalResult random_restart_eval = evaluate_choices(current_sa_choices, G_initial_pos, G_targets_vec, false, 0, nullptr, &current_path_cache);
+                    if (random_restart_eval.possible) {
+                        current_total_turns = random_restart_eval.total_turns;
+                        if (current_total_turns < best_total_turns) {
+                            best_total_turns = current_total_turns;
+                            best_sa_choices = current_sa_choices;
+                            best_is_from_sa = true;
+                        }
+                    }
+                }
+            } else {
+                iterations_stuck_at_inf = 0;
+            }
+            if (NUM_SEGMENTS == 0) break;
+            std::vector<int> neighbor_sa_choices_temp = current_sa_choices;
+            int k_eval_start_idx = NUM_SEGMENTS;
+            bool changed_anything_in_choices_vector = false;
+            double op_type_roll = std::uniform_real_distribution<>(0.0, 1.0)(rng);
+            NeighborhoodOpType current_op_type_local;
+            if (op_type_roll < 0.50) current_op_type_local = NeighborhoodOpType::RANDOM_MULTI_SEGMENT;
+            else if (op_type_roll < 0.85) current_op_type_local = NeighborhoodOpType::FINE_TWEAK_SINGLE_SEGMENT;
+            else current_op_type_local = NeighborhoodOpType::GREEDY_REOPTIMIZE_SINGLE_SEGMENT;
+            if (current_op_type_local == NeighborhoodOpType::RANDOM_MULTI_SEGMENT) {
+                int num_local_changes;
+                double r_nc_dist = std::uniform_real_distribution<>(0.0, 1.0)(rng);
+                int max_pert_base = std::max(1, NUM_SEGMENTS / 5);
+                if (r_nc_dist < 0.60) num_local_changes = 1;
+                else if (r_nc_dist < 0.85) num_local_changes = 2;
+                else if (r_nc_dist < 0.95) num_local_changes = 3;
+                else num_local_changes = std::min(NUM_SEGMENTS,
+                    static_cast<int>(4 + std::uniform_int_distribution<>(0, std::max(0, max_pert_base - 4))(rng))
+                );
+                num_local_changes = std::min(num_local_changes, NUM_SEGMENTS);
+                num_local_changes = std::max(1, num_local_changes);
+                changed_anything_in_choices_vector = true;
+                double r_mt_dist = std::uniform_real_distribution<>(0.0, 1.0)(rng);
+                if (r_mt_dist < 0.80 || num_local_changes >= NUM_SEGMENTS ) {
+                    std::shuffle(segment_indices_for_shuffle.begin(), segment_indices_for_shuffle.end(), rng);
+                    int min_k_changed_val = NUM_SEGMENTS;
+                    for (int i_change = 0; i_change < num_local_changes; ++i_change) {
+                        int k_to_change = segment_indices_for_shuffle[i_change];
+                        min_k_changed_val = std::min(min_k_changed_val, k_to_change);
+                        int old_code = neighbor_sa_choices_temp[k_to_change];
+                        int new_code = old_code;
+                        if (TOTAL_STRATEGIES_PER_SEGMENT > 1) {
+                            do { new_code = std::uniform_int_distribution<>(0, TOTAL_STRATEGIES_PER_SEGMENT - 1)(rng); } while (new_code == old_code);
+                        } else { new_code = 0; }
+                        neighbor_sa_choices_temp[k_to_change] = new_code;
+                    }
+                    k_eval_start_idx = min_k_changed_val;
+                } else {
+                    int L = num_local_changes;
+                    int k_start_block = std::uniform_int_distribution<>(0, NUM_SEGMENTS - L)(rng);
+                    for (int i = 0; i < L; ++i) {
+                        int k_to_change = k_start_block + i;
+                        int old_code = neighbor_sa_choices_temp[k_to_change];
+                        int new_code = old_code;
+                         if (TOTAL_STRATEGIES_PER_SEGMENT > 1) {
+                            do { new_code = std::uniform_int_distribution<>(0, TOTAL_STRATEGIES_PER_SEGMENT - 1)(rng); } while (new_code == old_code);
+                        } else { new_code = 0; }
+                        neighbor_sa_choices_temp[k_to_change] = new_code;
+                    }
+                    k_eval_start_idx = k_start_block;
+                }
+            } else if (current_op_type_local == NeighborhoodOpType::FINE_TWEAK_SINGLE_SEGMENT) {
+                changed_anything_in_choices_vector = true;
+                int k_to_change = std::uniform_int_distribution<>(0, NUM_SEGMENTS - 1)(rng);
+                k_eval_start_idx = k_to_change;
+                int current_strategy_code = neighbor_sa_choices_temp[k_to_change];
+                int base_code = current_strategy_code % NUM_BASE_STRATEGIES;
+                int post_alter_code = current_strategy_code / NUM_BASE_STRATEGIES;
+                double tweak_type_rand = std::uniform_real_distribution<>(0.0, 1.0)(rng);
+                if (tweak_type_rand < 0.5 && NUM_POST_ALTER_OPTIONS > 1) {
+                    int new_post_alter_code = post_alter_code;
+                    do { new_post_alter_code = std::uniform_int_distribution<>(0, NUM_POST_ALTER_OPTIONS - 1)(rng); } while (new_post_alter_code == post_alter_code);
+                    neighbor_sa_choices_temp[k_to_change] = new_post_alter_code * NUM_BASE_STRATEGIES + base_code;
+                } else if (NUM_BASE_STRATEGIES > 1) {
+                    int new_base_code = base_code;
+                    do { new_base_code = std::uniform_int_distribution<>(0, NUM_BASE_STRATEGIES - 1)(rng); } while (new_base_code == base_code);
+                    neighbor_sa_choices_temp[k_to_change] = post_alter_code * NUM_BASE_STRATEGIES + new_base_code;
+                } else {
+                     if (TOTAL_STRATEGIES_PER_SEGMENT > 1) {
+                        int new_code = current_strategy_code;
+                        do { new_code = std::uniform_int_distribution<>(0, TOTAL_STRATEGIES_PER_SEGMENT - 1)(rng); } while (new_code == current_strategy_code);
+                        neighbor_sa_choices_temp[k_to_change] = new_code;
+                     } else { changed_anything_in_choices_vector = false; }
+                }
+                if (neighbor_sa_choices_temp[k_to_change] == current_sa_choices[k_to_change]) {
+                     changed_anything_in_choices_vector = false;
+                }
+            } else { // GREEDY_REOPTIMIZE_SINGLE_SEGMENT
+                int k_to_reoptimize = std::uniform_int_distribution<>(0, NUM_SEGMENTS - 1)(rng);
+                if (current_total_turns == INF_COST || current_path_cache.empty() ||
+                    k_to_reoptimize >= static_cast<int>(current_path_cache.size()) ||
+                    current_path_cache[k_to_reoptimize].turns_before_segment == INF_COST) {
+                     changed_anything_in_choices_vector = false;
+                } else {
+                    k_eval_start_idx = k_to_reoptimize;
+                    Pos player_pos_before_k = current_path_cache[k_to_reoptimize].player_pos_before_segment;
+                    Grid grid_before_k = current_path_cache[k_to_reoptimize].grid_before_segment;
+                    Pos target_P_k = G_targets_vec[k_to_reoptimize];
+                    int original_choice_for_k = current_sa_choices[k_to_reoptimize];
+                    int best_strategy_for_k = original_choice_for_k;
+                    SegmentExecResult best_res_for_k_eval;
+                    Grid temp_grid_eval_current = grid_before_k; Pos temp_player_pos_eval_current = player_pos_before_k;
+                    bool current_choice_possible = apply_combined_strategy(original_choice_for_k, temp_player_pos_eval_current, target_P_k, temp_grid_eval_current, best_res_for_k_eval, false);
+                    if (!current_choice_possible) best_res_for_k_eval.turns = INF_COST;
+                    for (int i = 0; i < GREEDY_REOPTIMIZE_SUBSET_SIZE; ++i) {
+                        int code_to_try = std::uniform_int_distribution<>(0, TOTAL_STRATEGIES_PER_SEGMENT - 1)(rng);
+                        if (code_to_try == original_choice_for_k && current_choice_possible) {
+                            continue;
+                        }
+                        SegmentExecResult current_segment_res_eval;
+                        Grid temp_grid_iter_eval = grid_before_k;
+                        Pos temp_player_pos_iter_eval = player_pos_before_k;
+                        bool success = apply_combined_strategy(code_to_try, temp_player_pos_iter_eval, target_P_k, temp_grid_iter_eval, current_segment_res_eval, false);
+                        if (success && current_segment_res_eval.turns < best_res_for_k_eval.turns) {
+                            best_res_for_k_eval.turns = current_segment_res_eval.turns;
+                            best_strategy_for_k = code_to_try;
+                        }
+                    }
+                    neighbor_sa_choices_temp[k_to_reoptimize] = best_strategy_for_k;
+                    if (best_strategy_for_k != original_choice_for_k) {
+                        changed_anything_in_choices_vector = true;
+                    }
+                }
+            }
+            if (!changed_anything_in_choices_vector) continue;
+            FullEvalResult neighbor_eval_res = evaluate_choices(neighbor_sa_choices_temp, G_initial_pos, G_targets_vec,
+                                                                false, k_eval_start_idx,
+                                                                &current_path_cache, &neighbor_path_cache);
+            if (neighbor_eval_res.possible) {
+                bool accepted = false;
+                if (neighbor_eval_res.total_turns < current_total_turns) { accepted = true; }
+                else if (current_total_turns != INF_COST) {
+                    double temperature = T_param_start;
+                    double progress = get_elapsed_time_ms() / TIME_LIMIT_MS;
+                    if (progress < 1.0 && progress >=0.0) { temperature = T_param_start * std::pow(T_param_end / T_param_start, progress); }
+                    else if (progress >= 1.0) { temperature = T_param_end; }
+                    temperature = std::max(temperature, T_param_end);
+                    if (temperature > 1e-9) {
+                        double delta_cost = static_cast<double>(neighbor_eval_res.total_turns - current_total_turns);
+                        if (std::exp(-delta_cost / temperature) > std::uniform_real_distribution<>(0.0, 1.0)(rng) ) { accepted = true; }
+                    }
+                } else {
+                    accepted = true;
+                }
+                if (accepted) {
+                    current_sa_choices.swap(neighbor_sa_choices_temp);
+                    current_total_turns = neighbor_eval_res.total_turns;
+                    if (!current_path_cache.empty() && !neighbor_path_cache.empty()) {
+                         current_path_cache.swap(neighbor_path_cache);
+                    }
+                    if (current_total_turns < best_total_turns) {
+                        best_total_turns = current_total_turns;
+                        best_sa_choices = current_sa_choices;
+                        best_is_from_sa = true;
+                    }
+                }
+            }
+        }
+        if (best_total_turns == INF_COST) {
+            best_actions_log_str = "";
+        } else {
+            if (best_is_from_sa || !possible_greedy || best_total_turns < initial_greedy_score_turns) {
+                 FullEvalResult final_best_res = evaluate_choices(best_sa_choices, G_initial_pos, G_targets_vec, true, 0, nullptr, nullptr);
+                 if (final_best_res.possible) {
+                    best_actions_log_str = final_best_res.actions_log;
+                 } else {
+                    best_actions_log_str = "";
+                 }
+            }
+        }
+    }
+    const std::string& final_actions_to_print = best_actions_log_str;
+    for (size_t i = 0; i < final_actions_to_print.length(); i += 2) {
+        std::cout << final_actions_to_print[i] << " " << final_actions_to_print[i+1] << "\n";
+    }
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/ale_agent_best/ahc016.cpp ADDED Viewed

	@@ -0,0 +1,495 @@

+# EVOLVE-BLOCK-START
+#ifndef ONLINE_JUDGE
+// #define DEBUG_OUTPUT // Uncomment for local debug prints
+#endif
+#include <iostream>
+#include <vector>
+#include <string>
+#include <numeric>
+#include <algorithm>
+#include <random>
+#include <set>
+#include <array>
+#include <iomanip>
+#include <cmath>
+#include <chrono>
+#include <map>
+// Max N for which we attempt full GED based strategy.
+constexpr int N_MAX_GED_CAP = 6;
+// Adjacency matrix for H_k received in query, or for G_i during pairwise GED. Max N=100
+bool CURRENT_GRAPH_ADJ_QUERY[100][100];
+int N_ACTUAL;
+int L_ACTUAL; // N_ACTUAL * (N_ACTUAL - 1) / 2
+// Stores chosen G_j graphs as adjacency matrices (for GED strategy, N <= N_MAX_GED_CAP)
+std::vector<std::array<std::array<bool, N_MAX_GED_CAP>, N_MAX_GED_CAP>> G_ADJS_CHOSEN_GED;
+// For large N strategy (edge density)
+std::vector<std::string> G_STRINGS_CHOSEN_LARGE_N;
+std::vector<int> G_EDGE_COUNTS_LARGE_N;
+std::vector<int> P_VERTS_PERM_QUERY; // Permutation vector for GED in query
+std::mt19937 RND_ENGINE;
+// Temp storage for canonical mask generation (N <= N_MAX_GED_CAP)
+bool CANON_TMP_ADJ[N_MAX_GED_CAP][N_MAX_GED_CAP];
+std::vector<int> CANON_P_PERM;
+enum class Strategy {
+    GED,
+    EDGE_COUNT
+};
+Strategy current_strategy;
+const std::vector<uint16_t> PRECOMPUTED_CANONICAL_MASKS_N6 = {
+    0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 27, 29, 31, 37, 39, 43, 45, 47, 53, 55, 61,
+    63, 73, 75, 77, 79, 91, 93, 95, 111, 117, 119, 125, 127, 141, 143, 157, 159, 173, 175,
+    181, 183, 189, 191, 205, 207, 221, 223, 237, 239, 253, 255, 285, 287, 315, 317, 319,
+    349, 351, 379, 381, 383, 413, 415, 445, 447, 477, 479, 509, 511, 565, 567, 573, 575,
+    589, 591, 605, 607, 637, 639, 701, 703, 717, 719, 733, 735, 749, 751, 765, 767, 797,
+    799, 829, 831, 861, 863, 893, 895, 957, 959, 989, 991, 1021, 1023, 1149, 1151, 1213,
+    1215, 1245, 1247, 1277, 1279, 1533, 1535, 1661, 1663, 1789, 1791, 1917, 1919, 2045,
+    2047, 2109, 2111, 2141, 2143, 2173, 2175, 2205, 2207, 2237, 2239, 2269, 2271, 2301,
+    2303, 2685, 2687, 2813, 2815, 2941, 2943, 3069, 3071, 3277, 3279, 3285, 3287, 3293,
+    3295, 3309, 3311, 3325, 3327, 3357, 3359, 3389, 3391, 3421, 3423, 3453, 3455, 3517,
+    3519, 3549, 3551, 3581, 3583, 3613, 3615, 3645, 3647, 3709, 3711, 3773, 3775, 3837,
+    3839, 4095, 8191, 16383, 32767
+}; // Total 156 graphs for N=6.
+void mask_to_adj_matrix_small_N(uint16_t mask, int N_nodes, bool adj_matrix[][N_MAX_GED_CAP]) {
+    int bit_idx = 0;
+    for (int i = 0; i < N_nodes; ++i) {
+        adj_matrix[i][i] = false;
+        for (int j = i + 1; j < N_nodes; ++j) {
+            adj_matrix[i][j] = adj_matrix[j][i] = ((mask >> bit_idx) & 1);
+            bit_idx++;
+        }
+    }
+}
+uint16_t adj_matrix_to_mask_small_N(int N_nodes, const bool adj_matrix[][N_MAX_GED_CAP], const std::vector<int>& p_perm) {
+    uint16_t mask = 0;
+    int bit_idx = 0;
+    for (int i = 0; i < N_nodes; ++i) {
+        for (int j = i + 1; j < N_nodes; ++j) {
+            if (adj_matrix[p_perm[i]][p_perm[j]]) {
+                mask |= (1U << bit_idx);
+            }
+            bit_idx++;
+        }
+    }
+    return mask;
+}
+uint16_t get_canonical_mask(uint16_t mask_val) {
+    int current_L_for_canon = N_ACTUAL * (N_ACTUAL - 1) / 2;
+    if (current_L_for_canon == 0) return 0;
+    mask_to_adj_matrix_small_N(mask_val, N_ACTUAL, CANON_TMP_ADJ);
+    std::iota(CANON_P_PERM.begin(), CANON_P_PERM.end(), 0);
+    uint16_t min_mask_representation = adj_matrix_to_mask_small_N(N_ACTUAL, CANON_TMP_ADJ, CANON_P_PERM);
+    while (std::next_permutation(CANON_P_PERM.begin(), CANON_P_PERM.end())) {
+        uint16_t current_perm_mask = adj_matrix_to_mask_small_N(N_ACTUAL, CANON_TMP_ADJ, CANON_P_PERM);
+        min_mask_representation = std::min(min_mask_representation, current_perm_mask);
+    }
+    return min_mask_representation;
+}
+int calculate_edit_distance_one_perm_small_N(
+    const std::array<std::array<bool, N_MAX_GED_CAP>, N_MAX_GED_CAP>& g_j_adj_template
+) {
+    int diff_count = 0;
+    for (int i = 0; i < N_ACTUAL; ++i) {
+        for (int j = i + 1; j < N_ACTUAL; ++j) {
+            bool template_has_edge = g_j_adj_template[i][j];
+            bool current_Hk_has_edge = CURRENT_GRAPH_ADJ_QUERY[P_VERTS_PERM_QUERY[i]][P_VERTS_PERM_QUERY[j]];
+            if (current_Hk_has_edge != template_has_edge) {
+                diff_count++;
+            }
+        }
+    }
+    return diff_count;
+}
+int min_edit_distance_global_perm_small_N(
+    const std::array<std::array<bool, N_MAX_GED_CAP>, N_MAX_GED_CAP>& g_j_adj_template
+) {
+    if (L_ACTUAL == 0) return 0;
+    std::iota(P_VERTS_PERM_QUERY.begin(), P_VERTS_PERM_QUERY.end(), 0);
+    int min_dist = L_ACTUAL + 1;
+    long long N_factorial = 1;
+    for(int i=1; i<=N_ACTUAL; ++i) N_factorial *= i;
+    long long ops_count = 0;
+    do {
+        int current_dist = calculate_edit_distance_one_perm_small_N(g_j_adj_template);
+        min_dist = std::min(min_dist, current_dist);
+        if (min_dist == 0) break;
+        ops_count++;
+        if (ops_count >= N_factorial) break;
+    } while (std::next_permutation(P_VERTS_PERM_QUERY.begin(), P_VERTS_PERM_QUERY.end()));
+    return min_dist;
+}
+std::vector<uint16_t> available_canonical_masks;
+std::vector<std::vector<int>> all_pairwise_ged_cache;
+std::map<uint16_t, int> mask_to_idx_map;
+std::vector<int> chosen_mask_indices_greedy;
+std::string generate_random_graph_string_large_n(int num_edges, int current_L) {
+    std::string s_out(current_L, '0');
+    if (num_edges <= 0 || current_L == 0) return s_out;
+    if (num_edges >= current_L) {
+        std::fill(s_out.begin(), s_out.end(), '1');
+        return s_out;
+    }
+    std::vector<int> edge_indices(current_L);
+    std::iota(edge_indices.begin(), edge_indices.end(), 0);
+    std::shuffle(edge_indices.begin(), edge_indices.end(), RND_ENGINE);
+    for (int i = 0; i < num_edges; ++i) {
+        s_out[edge_indices[i]] = '1';
+    }
+    return s_out;
+}
+int count_set_bits_in_string(const std::string& s) {
+    return std::count(s.begin(), s.end(), '1');
+}
+void string_to_adj_matrix_query(const std::string& s, int N_nodes) {
+    int char_idx = 0;
+    for(int i=0; i<N_nodes; ++i) {
+        CURRENT_GRAPH_ADJ_QUERY[i][i] = false;
+        for(int j=i+1; j<N_nodes; ++j) {
+            if (char_idx < (int)s.length()) {
+                CURRENT_GRAPH_ADJ_QUERY[i][j] = CURRENT_GRAPH_ADJ_QUERY[j][i] = (s[char_idx++] == '1');
+            } else {
+                CURRENT_GRAPH_ADJ_QUERY[i][j] = CURRENT_GRAPH_ADJ_QUERY[j][i] = false;
+            }
+        }
+    }
+}
+int main() {
+    std::ios_base::sync_with_stdio(false);
+    std::cin.tie(NULL);
+    unsigned int seed_val = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
+    RND_ENGINE.seed(seed_val);
+    int M_graphs;
+    double epsilon_noise_rate;
+    std::cin >> M_graphs >> epsilon_noise_rate;
+    int N_for_GED_strat;
+    if (M_graphs <= 11) N_for_GED_strat = 4;
+    else if (M_graphs <= 34) N_for_GED_strat = 5;
+    else N_for_GED_strat = N_MAX_GED_CAP;
+    const double K_SEP = 2.5;
+    double L_ideal;
+    double L_ideal_numerator = K_SEP * K_SEP * (M_graphs > 1 ? (M_graphs - 1.0) * (M_graphs - 1.0) : 1.0) *
+                               epsilon_noise_rate * (1.0 - epsilon_noise_rate);
+    double L_ideal_denominator_factor = (0.5 - epsilon_noise_rate);
+    double L_ideal_denominator = L_ideal_denominator_factor * L_ideal_denominator_factor;
+    if (std::abs(0.5 - epsilon_noise_rate) < 1e-9) {
+        L_ideal = (100.0 * 99.0) / 2.0;
+    } else {
+        L_ideal = L_ideal_numerator / L_ideal_denominator;
+    }
+    if (L_ideal < 0) L_ideal = 0;
+    int N_candidate_EC = 4;
+    if (L_ideal > 1e-9) {
+         double discriminant = 1.0 + 8.0 * L_ideal;
+         if (discriminant >=0) {
+            N_candidate_EC = static_cast<int>(std::ceil((1.0 + std::sqrt(discriminant)) / 2.0));
+         } else {
+            N_candidate_EC = 100;
+         }
+    }
+    N_candidate_EC = std::max(4, N_candidate_EC);
+    N_candidate_EC = std::min(100, N_candidate_EC);
+    if (epsilon_noise_rate < 0.01) {
+        current_strategy = Strategy::GED; N_ACTUAL = N_for_GED_strat;
+    } else {
+        if (N_candidate_EC > N_for_GED_strat) {
+             current_strategy = Strategy::EDGE_COUNT; N_ACTUAL = N_candidate_EC;
+        } else {
+            current_strategy = Strategy::GED; N_ACTUAL = N_for_GED_strat;
+        }
+    }
+    N_ACTUAL = std::min(100, std::max(4, N_ACTUAL)); // Final check on N_ACTUAL bounds
+    L_ACTUAL = N_ACTUAL * (N_ACTUAL - 1) / 2;
+    std::cout << N_ACTUAL << std::endl;
+#ifdef DEBUG_OUTPUT
+    std::cerr << "# M=" << M_graphs << ", eps=" << epsilon_noise_rate << std::endl;
+    std::cerr << "# Chosen N=" << N_ACTUAL << ", Strategy=" << (current_strategy == Strategy::GED ? "GED" : "EDGE_COUNT") << std::endl;
+    std::cerr << "# L_ideal=" << L_ideal << ", N_candidate_EC=" << N_candidate_EC << ", N_for_GED_strat=" << N_for_GED_strat << std::endl;
+#endif
+    if (current_strategy == Strategy::GED) {
+        P_VERTS_PERM_QUERY.resize(N_ACTUAL); CANON_P_PERM.resize(N_ACTUAL);
+        if (N_ACTUAL == 6) {
+            available_canonical_masks = PRECOMPUTED_CANONICAL_MASKS_N6;
+        } else {
+            std::set<uint16_t> unique_masks_set;
+            if (L_ACTUAL > 0) {
+                for (unsigned int i = 0; i < (1U << L_ACTUAL); ++i) {
+                    unique_masks_set.insert(get_canonical_mask(static_cast<uint16_t>(i)));
+                }
+            } else {
+                unique_masks_set.insert(0);
+            }
+            available_canonical_masks.assign(unique_masks_set.begin(), unique_masks_set.end());
+        }
+        int num_total_isos = available_canonical_masks.size();
+#ifdef DEBUG_OUTPUT
+    std::cerr << "# Num non-isomorphic graphs for N=" << N_ACTUAL << " is " << num_total_isos << std::endl;
+#endif
+        mask_to_idx_map.clear();
+        for(int i=0; i<num_total_isos; ++i) mask_to_idx_map[available_canonical_masks[i]] = i;
+        if (num_total_isos > 0) {
+            all_pairwise_ged_cache.assign(num_total_isos, std::vector<int>(num_total_isos, 0));
+            bool graph_i_adj_cstyle[N_MAX_GED_CAP][N_MAX_GED_CAP];
+            std::array<std::array<bool, N_MAX_GED_CAP>, N_MAX_GED_CAP> graph_j_adj_stdarray;
+            for (int i = 0; i < num_total_isos; ++i) {
+                mask_to_adj_matrix_small_N(available_canonical_masks[i], N_ACTUAL, graph_i_adj_cstyle);
+                for(int r=0; r<N_ACTUAL; ++r) for(int c=0; c<N_ACTUAL; ++c) CURRENT_GRAPH_ADJ_QUERY[r][c] = graph_i_adj_cstyle[r][c];
+                for (int j = i + 1; j < num_total_isos; ++j) {
+                    bool temp_adj_for_gj[N_MAX_GED_CAP][N_MAX_GED_CAP];
+                    mask_to_adj_matrix_small_N(available_canonical_masks[j], N_ACTUAL, temp_adj_for_gj);
+                    for(int r=0; r<N_ACTUAL; ++r) for(int c=0; c<N_ACTUAL; ++c) graph_j_adj_stdarray[r][c] = temp_adj_for_gj[r][c];
+                    all_pairwise_ged_cache[i][j] = all_pairwise_ged_cache[j][i] = min_edit_distance_global_perm_small_N(graph_j_adj_stdarray);
+                }
+            }
+        }
+        chosen_mask_indices_greedy.clear();
+        std::vector<bool> is_chosen_idx(num_total_isos, false);
+        if (num_total_isos > 0) {
+            if (mask_to_idx_map.count(0)) {
+                int zero_idx = mask_to_idx_map.at(0);
+                if (chosen_mask_indices_greedy.size() < (size_t)M_graphs) {
+                    chosen_mask_indices_greedy.push_back(zero_idx);
+                    is_chosen_idx[zero_idx] = true;
+                }
+            }
+            if (L_ACTUAL > 0 && chosen_mask_indices_greedy.size() < (size_t)M_graphs) {
+                uint16_t complete_mask_val = (1U << L_ACTUAL) - 1;
+                uint16_t canonical_complete_mask = get_canonical_mask(complete_mask_val);
+                if (mask_to_idx_map.count(canonical_complete_mask)) {
+                    int complete_idx = mask_to_idx_map.at(canonical_complete_mask);
+                    if (!is_chosen_idx[complete_idx]) {
+                         chosen_mask_indices_greedy.push_back(complete_idx);
+                         is_chosen_idx[complete_idx] = true;
+                    }
+                }
+            }
+        }
+        for (int k_count = chosen_mask_indices_greedy.size(); k_count < M_graphs; ++k_count) {
+            if (chosen_mask_indices_greedy.size() >= (size_t)num_total_isos) {
+                break;
+            }
+            int best_new_idx_to_add = -1;
+            int max_of_min_distances_found = -1;
+            for (int cand_idx = 0; cand_idx < num_total_isos; ++cand_idx) {
+                if (is_chosen_idx[cand_idx]) continue;
+                int current_cand_min_dist_to_existing_G;
+                if (chosen_mask_indices_greedy.empty()) {
+                     current_cand_min_dist_to_existing_G = L_ACTUAL + 1;
+                } else {
+                    current_cand_min_dist_to_existing_G = L_ACTUAL + 1;
+                    for (int chosen_idx : chosen_mask_indices_greedy) {
+                        current_cand_min_dist_to_existing_G = std::min(current_cand_min_dist_to_existing_G, all_pairwise_ged_cache[cand_idx][chosen_idx]);
+                    }
+                }
+                if (current_cand_min_dist_to_existing_G > max_of_min_distances_found) {
+                    max_of_min_distances_found = current_cand_min_dist_to_existing_G;
+                    best_new_idx_to_add = cand_idx;
+                }
+            }
+            if (best_new_idx_to_add != -1) {
+                chosen_mask_indices_greedy.push_back(best_new_idx_to_add);
+                is_chosen_idx[best_new_idx_to_add] = true;
+            } else {
+                break;
+            }
+        }
+        int num_distinct_chosen_graphs = chosen_mask_indices_greedy.size();
+        if (num_distinct_chosen_graphs < M_graphs) {
+            int fallback_idx = 0;
+            if (num_total_isos > 0) {
+                if (mask_to_idx_map.count(0)) {
+                    fallback_idx = mask_to_idx_map.at(0);
+                }
+            }
+            for (int k_idx = num_distinct_chosen_graphs; k_idx < M_graphs; ++k_idx) {
+                 if (num_total_isos > 0) {
+                    chosen_mask_indices_greedy.push_back(fallback_idx);
+                 } else {
+                    chosen_mask_indices_greedy.push_back(0);
+                 }
+            }
+        }
+#ifdef DEBUG_OUTPUT
+    std::cerr << "# Chosen mask indices (size " << chosen_mask_indices_greedy.size() << "): ";
+    if (!available_canonical_masks.empty()){ // Check before accessing
+        for(int idx : chosen_mask_indices_greedy) {
+            if (idx < available_canonical_masks.size()) std::cerr << idx << " (" << available_canonical_masks[idx] << ") ";
+            else std::cerr << idx << " (OOB) ";
+        }
+    }
+    std::cerr << std::endl;
+#endif
+        G_ADJS_CHOSEN_GED.resize(M_graphs);
+        for (int k_idx = 0; k_idx < M_graphs; ++k_idx) {
+            uint16_t mask_to_print = 0;
+            if (k_idx < chosen_mask_indices_greedy.size() &&
+                !available_canonical_masks.empty() &&
+                chosen_mask_indices_greedy[k_idx] < available_canonical_masks.size()) {
+                 mask_to_print = available_canonical_masks[chosen_mask_indices_greedy[k_idx]];
+            } else if (L_ACTUAL == 0 && k_idx < chosen_mask_indices_greedy.size()) {
+                 mask_to_print = 0;
+            }
+            bool temp_adj_cstyle[N_MAX_GED_CAP][N_MAX_GED_CAP];
+            mask_to_adj_matrix_small_N(mask_to_print, N_ACTUAL, temp_adj_cstyle);
+            for(int r=0; r<N_ACTUAL; ++r) for(int c=0; c<N_ACTUAL; ++c) G_ADJS_CHOSEN_GED[k_idx][r][c] = temp_adj_cstyle[r][c];
+            std::string s_out = "";
+            if (L_ACTUAL > 0) {
+                for (int bit_idx = 0; bit_idx < L_ACTUAL; ++bit_idx) {
+                    s_out += ((mask_to_print >> bit_idx) & 1) ? '1' : '0';
+                }
+            }
+            std::cout << s_out << std::endl;
+        }
+    } else {
+        G_EDGE_COUNTS_LARGE_N.resize(M_graphs); G_STRINGS_CHOSEN_LARGE_N.resize(M_graphs);
+        if (M_graphs == 1) {
+             G_EDGE_COUNTS_LARGE_N[0] = (L_ACTUAL > 0) ? L_ACTUAL / 2 : 0;
+        } else {
+            for (int k=0; k<M_graphs; ++k) G_EDGE_COUNTS_LARGE_N[k] = static_cast<int>(std::round((double)k * L_ACTUAL / (M_graphs - 1.0)));
+            for (int k=0; k<M_graphs-1; ++k) {
+                if (G_EDGE_COUNTS_LARGE_N[k+1] <= G_EDGE_COUNTS_LARGE_N[k]) {
+                    G_EDGE_COUNTS_LARGE_N[k+1] = G_EDGE_COUNTS_LARGE_N[k] + 1;
+                }
+            }
+            if (M_graphs > 0 && G_EDGE_COUNTS_LARGE_N[M_graphs-1] > L_ACTUAL) { // M_graphs > 0 check
+                int exceso = G_EDGE_COUNTS_LARGE_N[M_graphs-1] - L_ACTUAL;
+                for (int k=0; k<M_graphs; ++k) {
+                    G_EDGE_COUNTS_LARGE_N[k] -= exceso;
+                }
+            }
+            for (int k=0; k<M_graphs; ++k) G_EDGE_COUNTS_LARGE_N[k] = std::min(L_ACTUAL, std::max(0, G_EDGE_COUNTS_LARGE_N[k]));
+            for (int k=0; k<M_graphs-1; ++k) {
+                 G_EDGE_COUNTS_LARGE_N[k+1] = std::max(G_EDGE_COUNTS_LARGE_N[k+1], G_EDGE_COUNTS_LARGE_N[k] + 1);
+            }
+            for (int k=0; k<M_graphs; ++k) G_EDGE_COUNTS_LARGE_N[k] = std::min(L_ACTUAL, std::max(0, G_EDGE_COUNTS_LARGE_N[k]));
+        }
+        for (int k=0; k<M_graphs; ++k) {
+            G_STRINGS_CHOSEN_LARGE_N[k] = generate_random_graph_string_large_n(G_EDGE_COUNTS_LARGE_N[k], L_ACTUAL);
+            std::cout << G_STRINGS_CHOSEN_LARGE_N[k] << std::endl;
+        }
+    }
+    std::cout.flush(); // Explicit flush after all G_k are printed
+    for (int q_idx = 0; q_idx < 100; ++q_idx) {
+        std::string h_str; std::cin >> h_str;
+        if (current_strategy == Strategy::GED) {
+            if (M_graphs == 0) { std::cout << 0 << std::endl; std::cout.flush(); continue; }
+            if (G_ADJS_CHOSEN_GED.empty()){
+#ifdef DEBUG_OUTPUT
+                std::cerr << "# Query " << q_idx << ": G_ADJS_CHOSEN_GED is empty but M_graphs=" << M_graphs << ". Outputting 0." << std::endl;
+#endif
+                std::cout << 0 << std::endl; std::cout.flush(); continue;
+            }
+            string_to_adj_matrix_query(h_str, N_ACTUAL);
+            int best_g_idx = 0; int min_dist_found = L_ACTUAL + 2;
+            for (int j=0; j < M_graphs; ++j) {
+                if (j >= G_ADJS_CHOSEN_GED.size()) {
+#ifdef DEBUG_OUTPUT
+                    std::cerr << "# Query " << q_idx << ": Index j=" << j << " out of bounds for G_ADJS_CHOSEN_GED (size " << G_ADJS_CHOSEN_GED.size() << ")" << std::endl;
+#endif
+                    continue;
+                }
+                int dist = min_edit_distance_global_perm_small_N(G_ADJS_CHOSEN_GED[j]);
+                if (dist < min_dist_found) {
+                    min_dist_found = dist;
+                    best_g_idx = j;
+                }
+            }
+            std::cout << best_g_idx << std::endl;
+        } else {
+            if (M_graphs == 0) { std::cout << 0 << std::endl; std::cout.flush(); continue; }
+            if (G_EDGE_COUNTS_LARGE_N.empty()){
+#ifdef DEBUG_OUTPUT
+                std::cerr << "# Query " << q_idx << ": G_EDGE_COUNTS_LARGE_N is empty but M_graphs=" << M_graphs << ". Outputting 0." << std::endl;
+#endif
+                std::cout << 0 << std::endl; std::cout.flush(); continue;
+            }
+            int edges_Hk = count_set_bits_in_string(h_str);
+            int best_g_idx = 0; double min_abs_diff_expected_edges = -1.0;
+            for (int j=0; j<M_graphs; ++j) {
+                if (j >= G_EDGE_COUNTS_LARGE_N.size()) {
+#ifdef DEBUG_OUTPUT
+                     std::cerr << "# Query " << q_idx << ": Index j=" << j << " out of bounds for G_EDGE_COUNTS_LARGE_N (size " << G_EDGE_COUNTS_LARGE_N.size() << ")" << std::endl;
+#endif
+                    continue;
+                }
+                double expected_edges_Hk_from_Gj = (double)G_EDGE_COUNTS_LARGE_N[j] * (1.0 - 2.0*epsilon_noise_rate) + (double)L_ACTUAL * epsilon_noise_rate;
+                double diff = std::abs((double)edges_Hk - expected_edges_Hk_from_Gj);
+                if (min_abs_diff_expected_edges < -0.5 || diff < min_abs_diff_expected_edges) {
+                    min_abs_diff_expected_edges = diff;
+                    best_g_idx = j;
+                }
+            }
+            std::cout << best_g_idx << std::endl;
+        }
+        std::cout.flush(); // Explicit flush after each query prediction
+    }
+    return 0;
+}
+# EVOLVE-BLOCK-END

benchmarks/ale_bench/private_eval.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import argparse
+import json
+import os
+import traceback
+from pathlib import Path
+import ale_bench
+from ale_bench.result import CaseResult, JudgeResult, Result
+def result_feedback(result: Result) -> CaseResult:
+    if result.overall_judge_result == JudgeResult.ACCEPTED:
+        return result.case_results[0]
+    else:
+        selected_case_idx = 0
+        for idx, case_result in enumerate(result.case_results):
+            if case_result.judge_result == result.overall_judge_result:
+                selected_case_idx = idx
+                break
+        return result.case_results[selected_case_idx]
+def main(program_path: str, problem_id: str) -> dict:
+    """Runs the evaluation using the shinka.eval utility."""
+    print(f"Problem ID: {problem_id}")
+    print(f"Evaluating program: {program_path}")
+    try:
+        session = ale_bench.start(
+            problem_id=problem_id,
+            lite_version=False,
+            num_workers=13,
+        )
+        code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
+        private_result, final_rank, final_performance = session.private_eval(
+            code, code_language="cpp20",
+        )
+        # Store the private_result as JSON in the results directory
+        private_json_str = private_result.model_dump_json(indent=4)
+        private_json = json.loads(private_json_str)
+        private_passed_cases, private_failed_cases = 0, 0
+        num_private_cases = len(private_json["case_results"])
+        for case in private_json["case_results"]:
+            if case["judge_result"] == "ACCEPTED":
+                private_passed_cases += 1
+            else:
+                private_failed_cases += 1
+        print(
+            f"Passed {private_passed_cases} cases, failed {private_failed_cases} cases out of {num_private_cases}"
+        )
+        print(
+            f"Final Private Score: {private_result.overall_absolute_score} - Mean Score: {private_result.overall_absolute_score / num_private_cases}"
+        )
+        print(f"Rank: {final_rank}, Performance: {final_performance}")
+        metrics = {}
+        private_metrics = {
+            "private_rank": final_rank,
+            "private_performance": final_performance,
+            "private_score": private_result.overall_absolute_score,
+            "num_private_passed_cases": private_passed_cases,
+            "num_private_failed_cases": private_failed_cases,
+        }
+        metrics["private"] = private_metrics
+        # Monitor resource consumption
+        print(f"Current Resource Usage: {session.current_resource_usage}")
+        print(f"Remaining Resources: {session.remaining_resource_usage}")
+        return metrics
+    except Exception as e:
+        print(f"Evaluation failed completely: {str(e)}")
+        print(traceback.format_exc())
+        metrics = {
+            "combined_score": 0.0,
+            "public": {"judge_result": "REJECTED"},
+            "private": {
+                "private_rank": 0,
+                "private_performance": 0,
+                "private_score": 0,
+                "num_private_passed_cases": 0,
+                "num_private_failed_cases": 0,
+            },
+        }
+        return metrics
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Agent evaluation script using shinka.eval"
+    )
+    parser.add_argument(
+        "--program-path",
+        type=str,
+        default="program.cpp",
+        help="Path to the program to evaluate",
+    )
+    parser.add_argument(
+        "--problem-id",
+        type=str,
+        default="ahc025",
+        help="Problem ID",
+    )
+    parsed_args = parser.parse_args()
+    # Collect results from 3 runs
+    all_results = []
+    for i in range(3):
+        print(f"\n{'='*60}")
+        print(f"Running evaluation {i+1} of 3")
+        print('='*60)
+        result = main(
+            parsed_args.program_path,
+            parsed_args.problem_id,
+        )
+        all_results.append(result)
+        print('='*60)
+    # Compute averages
+    print(f"\n{'='*60}")
+    print("FINAL AVERAGED RESULTS ACROSS 3 RUNS")
+    print('='*60)
+    private_scores = [r["private"]["private_score"] for r in all_results]
+    private_performances = [r["private"]["private_performance"] for r in all_results]
+    private_ranks = [r["private"]["private_rank"] for r in all_results]
+    passed_cases = [r["private"]["num_private_passed_cases"] for r in all_results]
+    failed_cases = [r["private"]["num_private_failed_cases"] for r in all_results]
+    avg_private_score = sum(private_scores) / len(private_scores)
+    avg_private_performance = sum(private_performances) / len(private_performances)
+    avg_private_rank = sum(private_ranks) / len(private_ranks)
+    avg_passed_cases = sum(passed_cases) / len(passed_cases)
+    avg_failed_cases = sum(failed_cases) / len(failed_cases)
+    print(f"\nAverage Private Score: {avg_private_score:.2f}")
+    print(f"  Individual scores: {private_scores}")
+    print(f"\nAverage Private Performance: {avg_private_performance:.4f}")
+    print(f"  Individual performances: {private_performances}")
+    print(f"\nAverage Private Rank: {avg_private_rank:.2f}")
+    print(f"  Individual ranks: {private_ranks}")
+    print(f"\nAverage Passed Cases: {avg_passed_cases:.2f}")
+    print(f"Average Failed Cases: {avg_failed_cases:.2f}")
+    print('='*60)
+    # Return summary
+    summary = {
+        "avg_private_score": avg_private_score,
+        "avg_private_performance": avg_private_performance,
+        "avg_private_rank": avg_private_rank,
+        "all_results": all_results
+    }
+    print(f"\nFinal Summary:")
+    print(json.dumps(summary, indent=2))

benchmarks/arc_benchmark/README.md ADDED Viewed

	@@ -0,0 +1,108 @@

+# ARC Benchmark
+Evolves ARC-AGI visual reasoning task solutions using SkyDiscover.
+## Setup
+### 1. Download ARC data
+Clone the ARC-AGI-2 repo and convert the data:
+```bash
+cd benchmarks/arc_benchmark
+git clone https://github.com/arcprize/ARC-AGI-2.git /tmp/ARC-AGI-2
+OUT_DIR=./data uv run python convert_arc_agi2_data.py /tmp/ARC-AGI-2
+rm -rf /tmp/ARC-AGI-2
+```
+This creates 4 files in `data/`:
+- `arc-agi_training_challenges.json` (1000 tasks)
+- `arc-agi_training_solutions.json`
+- `arc-agi_evaluation_challenges.json` (120 tasks)
+- `arc-agi_evaluation_solutions.json`
+### 2. Set your API key
+```bash
+export OPENAI_API_KEY=...
+```
+## Run a single task
+ARC requires a per-task config (each task has unique training examples as the prompt). Use `generate_config.py` to create one, then run with any search backend:
+```bash
+cd benchmarks/arc_benchmark
+# Generate task-specific config
+TASK_NUM=0 ARC_TASK_FILE=training CONFIG_OUT=./config_task_0.yaml \
+  uv run python generate_config.py
+# Run with any backend
+uv run skydiscover-run initial_program.py evaluator.py \
+  -c config_task_0.yaml -s [your_algorithm] -i 30
+# Or with evox, openevolve, gepa:
+uv run skydiscover-run initial_program.py evaluator.py \
+  -c config_task_0.yaml -s [your_algorithm] -i 30
+```
+## Run all evaluation tasks
+```bash
+cd benchmarks/arc_benchmark
+export ARC_TASK_FILE=evaluation
+NUM_TASKS=$(uv run python -c "import json; print(len(json.load(open('data/arc-agi_evaluation_challenges.json'))))")
+for i in $(seq 0 $((NUM_TASKS - 1))); do
+  TASK_NUM=$i CONFIG_OUT=./config_task_${i}.yaml uv run python generate_config.py
+  TASK_NUM=$i uv run skydiscover-run initial_program.py evaluator.py \
+    -c config_task_${i}.yaml -s [your_algorithm] -i 30 \
+    -o outputs/eval_task_${i}
+done
+```
+## Post-discovery test evaluation
+After the discovery process, evaluate the best program on held-out test inputs:
+```bash
+TASK_NUM=0 ARC_TASK_FILE=evaluation \
+  OUTS_DIR=./outputs/eval_task_0/adaevolve \
+  uv run python post_discovery_eval.py
+```
+## Config: GPT vs Gemini
+Edit `config.yaml` — comment the GPT block and uncomment the Gemini block, or override with `--model`:
+```bash
+uv run skydiscover-run ... -m gemini/gemini-3-pro-preview
+```
+## Files
+| File | Description |
+|------|-------------|
+| `initial_program.py` | Seed program with two transform functions to evolve |
+| `evaluator.py` | Scores programs on pass@2 + cell accuracy |
+| `config.yaml` | Base config template (prompt injected by generate_config.py) |
+| `generate_config.py` | Injects task-specific training examples into config as system prompt |
+| `post_discovery_eval.py` | Evaluates best program on held-out test inputs |
+| `convert_arc_agi2_data.py` | Converts raw ARC-AGI-2 data to benchmark format |
+| `requirements.txt` | Dependencies (numpy) |
+## Environment variables
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `OPENAI_API_KEY` | (required) | API key |
+| `ARC_TASK_FILE` | `training` | `training` or `evaluation` |
+| `TASK_NUM` | `0` | Task index within the dataset |
+| `BASE_CONFIG` | `./config.yaml` | Base config template path |
+| `CONFIG_OUT` | `./config_task_{N}.yaml` | Output path for generated config |
+| `DATA_ROOT` | `./data` | Path to ARC data directory |
+| `MAX_ITERATIONS` | (from config) | Override `max_iterations` at runtime |
+| `ARC_EVAL_INCLUDE_TEST` | `0` | Set to `1` to also run the held-out test inputs during evolution |
+| `ARC_EVAL_USE_TEST_FOR_SCORE` | `0` | Set to `1` to average train and test scores into `combined_score` (only used when `ARC_EVAL_INCLUDE_TEST=1`) |

configs/README.md ADDED Viewed

	@@ -0,0 +1,355 @@

+# ⚙️ Configuration
+All settings are YAML. Environment variables can be referenced with `${VAR}` syntax in any string value.
+---
+## 📁 Available Config Files
+| File | Search | Description |
+|------|--------|-------------|
+| **default.yaml** | Top-K | Minimal starting template — good for first experiments |
+| **adaevolve.yaml** | AdaEvolve | Full multi-island config with adaptive intensity, migration, paradigm breakthroughs, and ablation flags |
+| **evox.yaml** | EvoX | Co-evolving solution generation and search strategies |
+| **openevolve_native.yaml** | OpenEvolve Native | Native port of OpenEvolve's island-based MAP-Elites search with ring migration |
+| **llm_judge.yaml** | - | Demonstrates LLM-as-a-judge evaluation (uses gpt-4o-mini for both generation and judging) |
+| **human_in_the_loop.yaml** | Top-K | Enables the live monitor dashboard and human-in-the-loop feedback |
+Each file is a ready-to-copy template. Fill in the **system_message** with your problem description and you're good to go.
+---
+## 🔧 Parameter Reference
+### Top-level
+```yaml
+max_iterations: 100        # total evolution iterations
+checkpoint_interval: 10    # save checkpoint every N iterations
+log_level: "INFO"          # DEBUG / INFO / WARNING
+log_dir: null              # directory for logs (default: outputs/)
+random_seed: 42
+language: null             # auto-detected from initial program ("python", "cpp", "java", etc.)
+                           # set to "image" for image-generation mode
+file_suffix: ".py"         # output file extension, auto-set from initial program at runtime
+diff_based_generation: true # LLM receives diffs instead of full programs
+max_solution_length: 60000 # max characters in a program; longer programs are trimmed
+```
+### llm
+```yaml
+llm:
+  temperature: 0.7
+  top_p: 0.95
+  max_tokens: 32000
+  timeout: 600           # seconds per LLM request
+  retries: 3
+  retry_delay: 5         # seconds between retries
+  random_seed: null
+  reasoning_effort: null # "low" / "medium" / "high" for o-series models
+```
+**Model specification** — use `provider/model` or a bare name (auto-detected for known prefixes):
+| Provider | Format | API key env var |
+|----------|--------|-----------------|
+| OpenAI | `gpt-5`, `o3-mini` | OPENAI_API_KEY |
+| Gemini | `gemini/gemini-2.0-flash` | GEMINI_API_KEY or GOOGLE_API_KEY |
+| Anthropic | `claude-sonnet-4-6` or `anthropic/claude-sonnet-4-6` | ANTHROPIC_API_KEY |
+| DeepSeek | `deepseek-chat` or `deepseek/deepseek-chat` | DEEPSEEK_API_KEY |
+| Mistral | `mistral-large` or `mistral/mistral-large` | MISTRAL_API_KEY |
+| Ollama / vLLM | `ollama/llama3`, `vllm/my-model` | — |
+<details>
+<summary><b>Single model, multi-model pool, separate pools, and API override examples</b></summary>
+**Single model (shorthand):**
+```yaml
+llm:
+  primary_model: "gpt-5"
+  primary_model_weight: 1.0
+```
+**Multi-model pool (weighted sampling):**
+```yaml
+llm:
+  models:
+    - name: "gpt-5"
+      weight: 0.8
+    - name: "anthropic/claude-opus-4-6"
+      weight: 0.2
+```
+**Separate model pools** — by default all pools share `models`; override individually:
+```yaml
+llm:
+  models:
+    - name: "gpt-5"
+  evaluator_models:   # used by LLM-as-a-judge (evaluator.use_llm_feedback)
+    - name: "gpt-4o-mini"
+  guide_models:       # used for paradigm breakthroughs and variation labels
+    - name: "gpt-4o-mini"
+```
+**Override API base:**
+```yaml
+llm:
+  api_base: "https://my-proxy.example.com/v1"
+  api_key: "${MY_API_KEY}"
+```
+You can also set OPENAI_API_BASE or OPENAI_BASE_URL env vars to override the config globally.
+</details>
+### search
+```yaml
+search:
+  type: "adaevolve" # evox | openevolve_native | beam_search | best_of_n | topk
+  num_context_programs: 4   # context programs shown to LLMs as examples
+```
+<details>
+<summary><b>topk</b> — no extra settings</summary>
+Always picks the single best program as parent and the next K as additional context programs.
+</details>
+<details>
+<summary><b>adaevolve</b> — full settings</summary>
+```yaml
+search:
+  type: "adaevolve"
+  database:
+    population_size: 20
+    num_islands: 2
+    # Adaptive intensity
+    decay: 0.9              # EMA weight for accumulated signal G
+    intensity_min: 0.15     # min intensity (exploitation)
+    intensity_max: 0.5      # max intensity (exploration)
+    # Migration
+    migration_interval: 15  # migrate every N iterations
+    migration_count: 5      # top programs to copy between islands
+    # Archive diversity
+    fitness_weight: 1.0     # fitness contribution to elite score
+    novelty_weight: 0.0     # novelty contribution to elite score
+    diversity_strategy: "code"  # "code" / "metric" / "hybrid"
+    # Dynamic island spawning
+    use_dynamic_islands: true
+    max_islands: 5
+    spawn_productivity_threshold: 0.015
+    spawn_cooldown_iterations: 30
+    # Paradigm breakthrough
+    use_paradigm_breakthrough: true
+    paradigm_window_size: 10
+    paradigm_improvement_threshold: 0.12
+    paradigm_num_to_generate: 3
+    paradigm_max_uses: 2
+    paradigm_max_tried: 10
+    # Error retry
+    enable_error_retry: true
+    max_error_retries: 2
+    # Ablation flags (set false to disable)
+    use_adaptive_search: true   # G-based intensity; false → use fixed_intensity
+    use_ucb_selection: true     # UCB island selection; false → round-robin
+    use_migration: true
+    use_unified_archive: true   # quality-diversity archive; false → simple list
+```
+</details>
+<details>
+<summary><b>evox</b></summary>
+```yaml
+search:
+  type: "evox"
+  database:
+    auto_generate_variation_operators: true  # by default generate variation operator once
+```
+</details>
+<details>
+<summary><b>beam_search</b></summary>
+```yaml
+search:
+  type: "beam_search"
+  database:
+    beam_width: 5
+    beam_selection_strategy: "diversity_weighted"  # diversity_weighted / stochastic / round_robin / best
+    beam_diversity_weight: 0.3
+    beam_temperature: 1.0
+    beam_depth_penalty: 0.0
+```
+</details>
+<details>
+<summary><b>best_of_n</b></summary>
+```yaml
+search:
+  type: "best_of_n"
+  database:
+    best_of_n: 5  # reuse the same parent for N iterations, then switch to current best
+```
+</details>
+<details>
+<summary><b>openevolve_native</b> — MAP-Elites + island-based evolutionary search</summary>
+Native port of [OpenEvolve](https://github.com/codelion/openevolve)'s search algorithm.
+Uses MAP-Elites quality-diversity grid per island with ring-topology migration.
+```yaml
+search:
+  type: "openevolve_native"
+  num_context_programs: 5
+  database:
+    num_islands: 5
+    population_size: 40
+    archive_size: 100
+    exploration_ratio: 0.2          # P(explore) — random from current island
+    exploitation_ratio: 0.7         # P(exploit) — archive elite, prefer current island
+    # remaining 0.1 = P(random)    — any program in population
+    elite_selection_ratio: 0.1      # fraction of additional context programs from top elites
+    feature_dimensions: ["complexity", "diversity"]
+    feature_bins: 10
+    diversity_reference_size: 20
+    migration_interval: 10          # migrate every N island-generations
+    migration_rate: 0.1             # fraction of island to migrate
+    random_seed: 42
+```
+See [`skydiscover/search/openevolve_native/README.md`](../skydiscover/search/openevolve_native/README.md) for architecture details.
+</details>
+### prompt
+```yaml
+prompt:
+  system_message: |
+    You are an expert coder helping to improve programs through evolution.
+  # system_message can also be a path to a .txt file (relative to the config):
+  # system_message: "system_prompt.txt"
+  evaluator_system_message: |   # system message for the LLM judge
+    You are a strict code quality judge. ...  # only used when evaluator.use_llm_feedback: true
+  suggest_simplification_after_chars: 500  # threshold for program labeling in prompts
+```
+### evaluator
+```yaml
+evaluator:
+  timeout: 360            # seconds before killing evaluate() subprocess
+  max_retries: 3
+  # Cascade evaluation: skip expensive full eval on low-scoring programs
+  cascade_evaluation: true
+  cascade_thresholds: [0.3, 0.6]
+  # Prepend evaluator source code (or instruction.md for Harbor tasks)
+  # to the LLM system message so the model can see how solutions are scored.
+  inject_evaluator_context: false  # default false
+  # LLM-as-a-judge
+  use_llm_feedback: false
+  llm_feedback_weight: 1.0  # relative weight of LLM score in combined_score
+```
+### agentic
+Multi-turn agent that can read files and search the codebase before generating solutions.
+Enable via `--agentic` on the CLI or `agentic=True` in `run_discovery()`. The codebase root
+defaults to the initial program's directory; override it here if needed.
+```yaml
+agentic:
+  enabled: false
+  codebase_root: null      # defaults to initial program's directory when omitted
+  max_steps: 5             # max tool-call turns per iteration
+  per_step_timeout: 60.0   # seconds per tool call
+  overall_timeout: 300.0   # total seconds for one agentic generation
+  max_context_chars: 400000
+  max_file_chars: 50000
+  max_files_read: 20
+  max_search_results: 50
+```
+### monitor
+Live dashboard served over WebSocket.
+```yaml
+monitor:
+  enabled: false
+  port: 8765
+  host: "0.0.0.0"
+  max_solution_length: 10000
+  # AI-generated run summaries
+  summary_model: "gpt-5-mini"
+  summary_api_key: null    # falls back to OPENAI_API_KEY
+  summary_top_k: 3
+  summary_interval: 0      # auto-generate every N programs (0 = manual only)
+```
+### human_feedback
+```yaml
+human_feedback_enabled: false
+human_feedback_file: null       # path to a file containing feedback text
+human_feedback_mode: "append"   # "append" or "replace"
+```
+---
+## 🚀 Getting Started
+**1. Pick a template** — copy one of the config files above into your project directory:
+```bash
+cp configs/evox.yaml my_config.yaml
+```
+**2. Fill in the system message** — this is the most important field. Tell the LLM what problem it's solving:
+```yaml
+prompt:
+  system_message: |
+    You are an expert at optimizing circle packing algorithms.
+    Maximize the number of non-overlapping circles in a unit square.
+```
+**3. Run with your config:**
+```bash
+uv run skydiscover-run initial_program.py evaluator.py -c my_config.yaml -i 100
+```
+You can override any config value from the CLI — for example, switch the search algorithm or model without editing the YAML:
+```bash
+uv run skydiscover-run initial_program.py evaluator.py \
+  -c my_config.yaml \
+  --model gemini/gemini-3-pro-preview \
+  -i 50
+```

configs/adaevolve.yaml ADDED Viewed

	@@ -0,0 +1,125 @@

+# AdaEvolve - Adaptive Evolutionary Search Configuration
+#
+# AdaEvolve is an adaptive multi-island evolutionary algorithm that adjusts
+# search intensity per island based on accumulated improvement history.
+#
+# Key features:
+#   - Per-island adaptive exploration/exploitation via accumulated signal (G)
+#   - UCB with decayed magnitude rewards for island selection
+#   - UnifiedArchive per island for quality-diversity balance
+#   - Ring migration for cross-pollination between islands
+#   - Dynamic island spawning when global productivity drops
+#   - Paradigm breakthrough for high-level strategy shifts during stagnation
+#
+# Usage:
+#   Copy this file to your example directory as config_adaevolve.yaml
+#   and fill in the system_message with your problem description.
+# General settings
+max_iterations: 100
+checkpoint_interval: 10
+log_level: "INFO"
+random_seed: 42
+# LLM configuration
+llm:
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  api_base: "https://api.openai.com/v1"
+  temperature: 0.7
+  # top_p: 0.95  # omitted by default; some providers (e.g. Anthropic) reject both temperature and top_p
+  max_tokens: 32000
+  timeout: 600
+  # To use Gemini: comment out models + api_base above, uncomment below
+  # models:
+  #   - name: "gemini-3-pro-preview"
+  # api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
+  # api_key: ${GEMINI_API_KEY}
+# Search configuration
+search:
+  type: "adaevolve"
+  num_context_programs: 4
+  database:
+    # Population settings
+    population_size: 20
+    num_islands: 2
+    # --- Adaptive search intensity ---
+    # Controls how search intensity adapts based on improvement signal (G)
+    # High G (productive island) -> low intensity (exploit)
+    # Low G (stagnating island) -> high intensity (explore)
+    decay: 0.9                        # Exponential moving average weight (rho)
+    intensity_min: 0.15               # Minimum search intensity (exploitation)
+    intensity_max: 0.5                # Maximum search intensity (exploration)
+    # --- Ablation flags ---
+    # Set to false to disable specific adaptive mechanisms for ablation studies
+    use_adaptive_search: true         # G-based intensity vs fixed_intensity
+    use_ucb_selection: true           # UCB island selection vs round-robin
+    use_migration: true               # Inter-island migration
+    use_unified_archive: true         # Quality-diversity archive vs simple list
+    # fixed_intensity: 0.4            # Used when use_adaptive_search=false
+    # --- Migration ---
+    migration_interval: 15            # Migrate every N iterations
+    migration_count: 5                # Number of top programs to migrate
+    # --- Archive settings (when use_unified_archive=true) ---
+    fitness_weight: 1.0               # Weight for fitness rank in elite score
+    novelty_weight: 0.0               # Weight for novelty rank in elite score
+    diversity_strategy: "code"        # Diversity metric: "code", "metric", or "hybrid"
+    # --- Optional Pareto multiobjective mode ---
+    # Leave pareto_objectives empty to keep the current scalar combined_score behavior.
+    # When enabled, return the raw objective metrics from the evaluator and set:
+    # pareto_objectives:
+    #   - accuracy
+    #   - latency
+    # higher_is_better:
+    #   accuracy: true
+    #   latency: false
+    # fitness_key: accuracy              # Optional scalar proxy for adaptive state / tie-breaking
+    # pareto_objectives_weight: 0.4      # Weight of Pareto percentile in archive elite score
+    # --- Dynamic island spawning ---
+    use_dynamic_islands: true
+    max_islands: 5                    # Maximum number of islands
+    spawn_productivity_threshold: 0.015  # Spawn if global productivity below this
+    spawn_cooldown_iterations: 30     # Wait N iterations between spawns
+    # --- Paradigm breakthrough ---
+    use_paradigm_breakthrough: true
+    paradigm_window_size: 10          # Window for improvement rate calculation
+    paradigm_improvement_threshold: 0.12  # Stagnation threshold
+    paradigm_max_uses: 2              # Max uses per paradigm
+    paradigm_max_tried: 10            # Max tried paradigms to track
+    paradigm_num_to_generate: 3       # Number of paradigms to generate per trigger
+    # --- Error retry ---
+    enable_error_retry: true
+    max_error_retries: 2
+# Prompt configuration
+prompt:
+  system_message: |
+    <REPLACE WITH YOUR PROBLEM-SPECIFIC SYSTEM MESSAGE>
+    Describe:
+    - What the algorithm/function should do
+    - Input/output format
+    - Optimization objective and metrics
+    - Constraints
+    - Techniques to explore
+# Evaluator configuration
+evaluator:
+  timeout: 360
+  max_retries: 3
+  cascade_evaluation: true
+  cascade_thresholds: [0.3, 0.6]
+# Generation settings
+diff_based_generation: true
+max_solution_length: 60000

configs/default.yaml ADDED Viewed

	@@ -0,0 +1,38 @@

+# General settings
+max_iterations: 100
+checkpoint_interval: 10
+log_level: "INFO"
+random_seed: 42
+# LLM configuration
+llm:
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  temperature: 0.7
+  # top_p: 0.95  # omitted by default; some providers (e.g. Anthropic) reject both temperature and top_p
+  max_tokens: 32000
+  timeout: 600
+# Database configuration
+search:
+  # Read from the evolved file and then use the evolve algorithm to evolve the program
+  type: "topk"
+  database:
+    random_seed: 42
+  num_context_programs: 4
+# Prompt configuration
+prompt:
+  system_message: "You are an expert to help find the best solution to the problem."
+# Evaluator configuration
+evaluator:
+  timeout: 10000
+  max_retries: 3
+  cascade_evaluation: false
+# Generation settings
+diff_based_generation: true
+max_solution_length: 60000

configs/evox.yaml ADDED Viewed

	@@ -0,0 +1,59 @@

+# EvoX - Label-guided Evolutionary Search Configuration
+#
+# EvoX is a co-evolutionary search strategy that uses label-guided
+# exploration and exploitation to evolve programs.
+#
+# Usage:
+#   Copy this file to your example directory as config_evox.yaml
+#   and fill in the system_message with your problem description.
+# General settings
+max_iterations: 100
+checkpoint_interval: 10
+log_level: "INFO"
+random_seed: 42
+# LLM configuration
+llm:
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  api_base: "https://api.openai.com/v1"
+  temperature: 0.7
+  # top_p: 0.95  # omitted by default; some providers (e.g. Anthropic) reject both temperature and top_p
+  max_tokens: 32000
+  timeout: 600
+  # To use Gemini: comment out models + api_base above, uncomment below
+  # models:
+  #   - name: "gemini-3-pro-preview"
+  # api_base: "https://generativelanguage.googleapis.com/v1beta/openai/"
+  # api_key: ${GEMINI_API_KEY}
+# Search configuration
+search:
+  type: "evox"
+  database:
+    auto_generate_variation_operators: true
+# Prompt configuration
+prompt:
+  system_message: |
+    <REPLACE WITH YOUR PROBLEM-SPECIFIC SYSTEM MESSAGE>
+    Describe:
+    - What the algorithm/function should do
+    - Input/output format
+    - Optimization objective and metrics
+    - Constraints
+    - Techniques to explore
+# Evaluator configuration
+evaluator:
+  timeout: 360
+  max_retries: 3
+  cascade_evaluation: true
+  cascade_thresholds: [0.3, 0.6]
+# Generation settings
+diff_based_generation: true
+max_solution_length: 60000

configs/human_in_the_loop.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+# Human-in-the-loop configuration
+# Enables the live monitor dashboard and human feedback.
+# Usage: skydiscover-run program.py evaluator.py -c configs/human_in_the_loop.yaml
+# General settings
+max_iterations: 100
+checkpoint_interval: 10
+log_level: "INFO"
+random_seed: 42
+# LLM configuration
+llm:
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  temperature: 0.7
+  # top_p: 0.95  # omitted by default; some providers (e.g. Anthropic) reject both temperature and top_p
+  max_tokens: 32000
+  timeout: 600
+# Search configuration
+search:
+  type: "topk"
+  database:
+    random_seed: 42
+  num_context_programs: 4
+# Prompt configuration
+prompt:
+  system_message: "You are an expert to help find the best solution to the problem."
+# Evaluator configuration
+evaluator:
+  timeout: 10000
+  max_retries: 3
+  cascade_evaluation: false
+# Generation settings
+diff_based_generation: true
+max_solution_length: 60000
+# Live monitor dashboard
+monitor:
+  enabled: true
+  port: 8765
+  host: "127.0.0.1"
+# Human feedback
+human_feedback_enabled: true

configs/llm_judge.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+# Minimal config to test LLM-as-a-judge evaluation feedback
+# Usage: skydiscover-run initial_program.py evaluator.py -c configs/llm_judge.yaml
+max_iterations: 10
+checkpoint_interval: 5
+log_level: "DEBUG"
+llm:
+  models:
+    - name: "gpt-4o-mini"
+      weight: 1.0
+  temperature: 0.7
+  max_tokens: 16000
+  timeout: 120
+  # Model used by the evaluator's LLM judge
+  evaluator_models:
+    - name: "gpt-4o-mini"
+      weight: 1.0
+search:
+  type: "topk"
+  num_context_programs: 2
+prompt:
+  system_message: "You are an expert programmer. Improve the given program."
+  evaluator_system_message: >
+    You are a strict code quality judge. Evaluate the given code and return a JSON object
+    with scores between 0.0 and 1.0 for each metric. Be critical — only exceptional code
+    should score above 0.8. Consider algorithmic correctness, edge case handling, and
+    real-world production readiness in your assessment.
+evaluator:
+  timeout: 60
+  max_retries: 1
+  cascade_evaluation: false
+  llm_as_judge: true
+diff_based_generation: true
+max_solution_length: 10000

configs/openevolve_native.yaml ADDED Viewed

	@@ -0,0 +1,70 @@

+# OpenEvolve Native — MAP-Elites + Island-based Evolutionary Search
+#
+# A faithful port of the OpenEvolve search algorithm:
+#   - MAP-Elites grid per island for quality-diversity
+#   - Island-based population with round-robin rotation
+#   - Ring-topology migration between islands
+#   - Exploration / exploitation / random parent sampling
+#   - Greedy-diverse context program sampling from nearby MAP-Elites cells
+#
+# Usage:
+#   skydiscover-run initial_program.py evaluator.py \
+#       -s openevolve_native -m gpt-5-mini -i 100
+# General settings
+max_iterations: 100
+checkpoint_interval: 10
+log_level: "INFO"
+# LLM configuration
+llm:
+  models:
+    - name: "gpt-5"
+      weight: 1.0
+  api_base: "https://api.openai.com/v1"
+  temperature: 0.7
+  # top_p: 0.95  # omitted by default; some providers (e.g. Anthropic) reject both temperature and top_p
+  max_tokens: 32000
+  timeout: 600
+# Search configuration
+search:
+  type: "openevolve_native"
+  num_context_programs: 5
+  database:
+    num_islands: 5
+    population_size: 40
+    archive_size: 100
+    exploration_ratio: 0.2          # P(explore) — random from current island
+    exploitation_ratio: 0.7         # P(exploit) — archive elite, prefer current island
+    # remaining 0.1 = P(random)     — any program in population
+    elite_selection_ratio: 0.1      # fraction of context programs from top elites
+    feature_dimensions: ["complexity", "diversity"]
+    feature_bins: 10
+    diversity_reference_size: 20
+    migration_interval: 10          # migrate every N island-generations
+    migration_rate: 0.1             # fraction of island to migrate
+    random_seed: 42
+# Prompt configuration
+prompt:
+  system_message: |
+    <REPLACE WITH YOUR PROBLEM-SPECIFIC SYSTEM MESSAGE>
+    Describe:
+    - What the algorithm/function should do
+    - Input/output format
+    - Optimization objective and metrics
+    - Constraints
+    - Techniques to explore
+# Evaluator configuration
+evaluator:
+  timeout: 360
+  max_retries: 3
+  cascade_evaluation: true
+  cascade_thresholds: [0.3, 0.6]
+# Generation settings
+diff_based_generation: true
+max_solution_length: 60000

pyproject.toml ADDED Viewed

	@@ -0,0 +1,124 @@

+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "skydiscover"
+dynamic = ["version"]
+description = "A Flexible Framework for AI-Driven Scientific and Algorithmic Discovery"
+readme = "README.md"
+requires-python = ">=3.10, <3.14"
+license = {text = "Apache-2.0"}
+dependencies = [
+    "openai>=1.0.0",
+    "pyyaml>=6.0",
+    "tqdm>=4.64.0",
+    "numpy>=1.22.0",
+]
+[project.optional-dependencies]
+math = [
+    "scipy>=1.11.0",
+    "sympy>=1.14.0",
+    "jax>=0.6.2",
+    "optax>=0.2.6",
+    "torch",
+    "scikit-learn>=1.0.0",
+    "numba",
+    "pandas",
+    "matplotlib",
+    "plotly",
+    "networkx",
+    "cvxpy",
+    "autograd",
+    "pymoo",
+    "PyWavelets",
+]
+adrs = [
+    "numpy>=1.22.0",
+    "pandas",
+    "networkx>=3.2,<3.4",
+    "torch",
+]
+external = [
+    "openevolve",
+    "gepa[full]",
+    "litellm>=1.81",  # gepa[full] needs litellm, but uv override-dependencies strips the [full] extra
+]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+    "black>=22.0.0",
+    "isort>=5.10.0",
+    "mypy>=0.950",
+    "requests>=2.28.0",
+]
+frontier-cs = [
+    "anthropic>=0.74.0",
+    "colorlog>=6.10.1",
+    "datasets>=4.4.1",
+    "google-genai>=1.55.0",
+    "google-generativeai>=0.8.5",
+    "numpy>=2.0.0",  # Frontier-CS requires numpy 2.x
+    "python-dotenv>=1.2.1",
+    "skypilot>=0.10.5",
+]
+prompt-optimization = [
+    "dspy>=3.1.3",
+    "litellm>=1.81",
+    "bm25s>=0.3.0",
+    "pystemmer>=2.2.0.3",
+    "datasets>=4.5.0",
+    "diskcache>=5.6.3",
+    "ujson>=5.11.0",
+]
+[tool.uv]
+override-dependencies = ["httpx>=0.28.1", "gepa>=0.0.26"]
+[tool.uv.sources]
+openevolve = { git = "https://github.com/algorithmicsuperintelligence/openevolve.git", branch = "main" }
+gepa = { git = "https://github.com/gepa-ai/gepa.git", branch = "main" }
+[tool.black]
+line-length = 100
+target-version = ['py310']
+include = '\.pyi?$'
+[tool.isort]
+profile = "black"
+line_length = 100
+[tool.mypy]
+python_version = "3.10"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+[project.scripts]
+skydiscover-run = "skydiscover.cli:main"
+skydiscover-viewer = "skydiscover.extras.monitor.viewer:main"
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+    "integration: marks tests as integration tests requiring external services"
+]
+addopts = "--strict-markers"
+[tool.setuptools.packages.find]
+include = ["skydiscover*"]
+[tool.setuptools.package-data]
+skydiscover = [
+    "context_builder/*/templates/*.txt",
+    "search/evox/config/*.yaml",
+    "search/evox/config/*.txt",
+    "extras/external/defaults/*.yaml",
+    "extras/monitor/dashboard.html",
+]
+[tool.setuptools.dynamic]
+version = {attr = "skydiscover._version.__version__"}