JustinTX commited on
Commit
517cbd2
·
verified ·
1 Parent(s): 725b3aa

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +60 -0
  2. .gitmodules +3 -0
  3. LICENSE +201 -0
  4. README.md +388 -0
  5. benchmarks/ADRS/cloudcast/README.md +50 -0
  6. benchmarks/ADRS/cloudcast/evaluator/Dockerfile +13 -0
  7. benchmarks/ADRS/cloudcast/evaluator/broadcast.py +44 -0
  8. benchmarks/ADRS/cloudcast/evaluator/download_dataset.sh +37 -0
  9. benchmarks/ADRS/cloudcast/evaluator/evaluate.py +224 -0
  10. benchmarks/ADRS/cloudcast/evaluator/evaluate.sh +7 -0
  11. benchmarks/ADRS/cloudcast/evaluator/evaluator.py +297 -0
  12. benchmarks/ADRS/cloudcast/evaluator/requirements.txt +2 -0
  13. benchmarks/ADRS/cloudcast/evaluator/simulator.py +196 -0
  14. benchmarks/ADRS/cloudcast/evaluator/utils.py +109 -0
  15. benchmarks/ADRS/cloudcast/evaluator/wrapper.py +98 -0
  16. benchmarks/ADRS/cloudcast/initial_program.py +118 -0
  17. benchmarks/ADRS/eplb/README.md +63 -0
  18. benchmarks/ADRS/eplb/evaluator/evaluate.sh +7 -0
  19. benchmarks/ADRS/eplb/evaluator/evaluate_best_program.py +66 -0
  20. benchmarks/ADRS/eplb/evaluator/wrapper.py +98 -0
  21. benchmarks/ADRS/eplb/initial_program.py +238 -0
  22. benchmarks/ADRS/llm_sql/config.yaml +81 -0
  23. benchmarks/ADRS/llm_sql/evaluator/Dockerfile +13 -0
  24. benchmarks/ADRS/llm_sql/evaluator/download_dataset.sh +30 -0
  25. benchmarks/ADRS/llm_sql/evaluator/evaluate.sh +7 -0
  26. benchmarks/ADRS/llm_sql/evaluator/evaluator.py +227 -0
  27. benchmarks/ADRS/llm_sql/evaluator/requirements.txt +2 -0
  28. benchmarks/ADRS/llm_sql/evaluator/solver.py +161 -0
  29. benchmarks/ADRS/llm_sql/evaluator/utils.py +81 -0
  30. benchmarks/ADRS/prism/evaluator/Dockerfile +13 -0
  31. benchmarks/ADRS/prism/evaluator/wrapper.py +98 -0
  32. benchmarks/ADRS/txn_scheduling/evaluator/Dockerfile +13 -0
  33. benchmarks/ADRS/txn_scheduling/evaluator/evaluate.sh +7 -0
  34. benchmarks/ADRS/txn_scheduling/evaluator/evaluator.py +258 -0
  35. benchmarks/ADRS/txn_scheduling/evaluator/requirements.txt +1 -0
  36. benchmarks/ADRS/txn_scheduling/evaluator/txn_simulator.py +229 -0
  37. benchmarks/ADRS/txn_scheduling/evaluator/workloads.py +12 -0
  38. benchmarks/ADRS/txn_scheduling/evaluator/wrapper.py +98 -0
  39. benchmarks/ADRS/txn_scheduling/initial_program.py +106 -0
  40. benchmarks/ale_bench/README.md +84 -0
  41. benchmarks/ale_bench/ale-bench-lite-problems/ahc008/initial_program.cpp +508 -0
  42. benchmarks/ale_bench/ale-bench-lite-problems/ahc011/best_program.cpp +730 -0
  43. benchmarks/ale_bench/ale-bench-lite-problems/ahc015/evaluator.py +65 -0
  44. benchmarks/ale_bench/ale-bench-lite-problems/ahc016/evaluator.py +65 -0
  45. benchmarks/ale_bench/ale-bench-lite-problems/ahc039/best_program.cpp +1003 -0
  46. benchmarks/ale_bench/ale-bench-lite-problems/ahc039/config.yaml +77 -0
  47. benchmarks/ale_bench/ale-bench-lite-problems/ahc039/evaluator.py +65 -0
  48. benchmarks/ale_bench/ale_agent_best/ahc008.cpp +508 -0
  49. benchmarks/ale_bench/ale_agent_best/ahc011.cpp +607 -0
  50. benchmarks/ale_bench/ale_agent_best/ahc015.cpp +491 -0
.gitignore ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ *.egg
8
+ dist/
9
+ build/
10
+ .eggs/
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ env/
16
+
17
+ # IDE
18
+ .idea/
19
+ .vscode/
20
+ *.swp
21
+ *.swo
22
+ .claude/
23
+
24
+ # OS
25
+ .DS_Store
26
+
27
+ # Testing
28
+ .pytest_cache/
29
+ .coverage
30
+ htmlcov/
31
+
32
+ # Secrets
33
+ .env
34
+ secrets.yaml
35
+
36
+ # Logs & outputs
37
+ *.log
38
+ *.jsonl
39
+ output*/
40
+ outputs*/
41
+ outputs_*/
42
+
43
+
44
+ # Benchmark generated data
45
+ benchmarks/image_gen/sky_festival/sky_festival_output/
46
+ benchmarks/image_gen/sky_festival/sky_festival_paradigm_output_*/
47
+ benchmarks/frontier-cs-eval/Frontier-CS
48
+ benchmarks/ADRS/eplb/expert-load.json
49
+ benchmarks/ADRS/cloudcast/profiles/
50
+ benchmarks/ADRS/cloudcast/examples/
51
+ benchmarks/ADRS/llm_sql/datasets/
52
+
53
+ # Generated test outputs (re-generate with test_all_benchmarks.sh)
54
+ tests/**/test_outputs_*/
55
+
56
+ # Evaluation run outputs
57
+ eval_runs/
58
+
59
+ # Local documentation
60
+ tasks/
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "benchmarks/ale_bench/ALE-Bench"]
2
+ path = benchmarks/ale_bench/ALE-Bench
3
+ url = https://github.com/SakanaAI/ALE-Bench.git
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [2025] [SkyDiscover Team]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,388 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <h1 align="center">
2
+   <img src="assets/logo_vector.png" height="80" alt="SkyDiscover logo" style="vertical-align: middle;">&nbsp;
3
+
4
+   <b>SkyDiscover</b>
5
+ </h1>
6
+
7
+
8
+ <p align="center"> A Flexible Framework for AI-Driven Scientific and Algorithmic Discovery</p>
9
+ <p align="center">
10
+   <a href="https://skydiscover-ai.github.io/blog.html"><img src="https://img.shields.io/badge/blog-SkyDiscover-orange?style=flat-square" alt="Blog" /></a>
11
+ <a href="https://arxiv.org/abs/2602.20133"><img src="https://img.shields.io/badge/paper-AdaEvolve-red?style=flat-square" alt="AdaEvolve Paper" /></a>
12
+ <a href="https://arxiv.org/abs/2602.23413"><img src="https://img.shields.io/badge/paper-EvoX-lightblue?style=flat-square" alt="EvoX Paper" /></a>
13
+   <a href="LICENSE"><img src="https://img.shields.io/badge/license-Apache--2.0-green?style=flat-square" /></a>
14
+ </p>
15
+
16
+
17
+
18
+ <p align="center">
19
+ <img src="assets/architecture.png" width="720" alt="SkyDiscover architecture"><br>
20
+ </p>
21
+
22
+
23
+ **SkyDiscover** is a modular framework for AI-driven scientific and algorithmic discovery, providing a unified interface for implementing, running, and fairly comparing discovery algorithms across 200+ optimization tasks.
24
+
25
+ SkyDiscover introduces two new adaptive optimization algorithms:
26
+
27
+ - **[AdaEvolve](https://arxiv.org/abs/2602.20133)**, which dynamically adjusts its optimization behavior based on observed progress.
28
+ - **[EvoX](https://arxiv.org/abs/2602.23413)**, which dynamically evolves the optimization (evolution) strategy itself using LLMs on the fly.
29
+
30
+ SkyDiscover also supports using OpenEvolve, ShinkaEvolve and GEPA to quickly benchmark these algorithms using their own source code. SkyDiscover also hosts native versions of OpenEvolve and GEPA under `openevolve_native` and `gepa_native` algorithms using the modular interface.
31
+
32
+ SkyDiscover natively supports [Harbor](https://harborframework.com/)-format benchmarks, so you can run external benchmark suites out of the box, including [AlgoTune](https://github.com/oripress/AlgoTune), [EvoEval](https://github.com/evo-eval/evoeval), [HumanEvalFix](https://github.com/bigcode-project/octopack), [BigCodeBench](https://github.com/bigcode-project/bigcodebench), [LiveCodeBench](https://livecodebench.github.io/), [USACO](https://usaco.org/), [CRUSTBench](https://github.com/AInfinity/CRUSTBench), and [CodePDE](https://github.com/).
33
+ > 🚧 This project is under active development.
34
+
35
+ ---
36
+
37
+ ## 🏆 Benchmark Performance
38
+
39
+ Across ~200 optimization benchmarks, AdaEvolve and EvoX achieve the strongest open-source results: matching or exceeding AlphaEvolve and human SOTA, and outperforming OpenEvolve, GEPA, and ShinkaEvolve under identical generation budgets.
40
+
41
+ - **Frontier-CS (172 problems)**: ~34% median score improvement over OpenEvolve, GEPA, and ShinkaEvolve
42
+ - **Math + Systems Optimization (14 tasks evaluated)**: Matches or exceeds AlphaEvolve and human-designed SOTA on 6/6 systems and 6/8 math tasks
43
+ - **Real-world systems impact**: 41% lower cross-cloud transfer cost, 14% better GPU load balance for MoE serving, and 29% lower KV-cache pressure via GPU model placement
44
+
45
+ <p align="center">
46
+ <img src="assets/benchmarks.png" width="900" alt="SkyDiscover benchmarks">
47
+ </p>
48
+
49
+ <details>
50
+ <summary><b>📊 Complete results of AdaEvolve and EvoX (100 iterations)</b></summary>
51
+
52
+ > AdaEvolve and EvoX are **complementary**: AdaEvolve adapts search *parameters* for fast early gains; EvoX evolves the search *strategy itself* for stronger long-horizon gains. Both are built on SkyDiscover.
53
+
54
+ <p align="center">
55
+ <img src="assets/comparison.png" width="900" alt="Main results for systems and math problems">
56
+ </p>
57
+
58
+ </details>
59
+
60
+ <details>
61
+ <summary><b>📈 Scaling behavior of AdaEvolve and EvoX</b></summary>
62
+
63
+ The scaling behavior of AdaEvolve and EvoX shows a **complementary crossover**. AdaEvolve's per-iteration parameter adaptation yields fast early gains in low-budget runs (T≤50), while EvoX's demand-driven strategy evolution unlocks step-change improvements in longer runs (T≥50).
64
+
65
+ <p align="center">
66
+ <img src="assets/scaling_comparison.png" width="900" alt="Scaling behavior of AdaEvolve vs EvoX across 500 iterations">
67
+ <br><em>Best-so-far score vs. iteration for Signal Processing, Heilbronn Convex, Prism, and Cloudcast (500 iterations, GPT-5).</em>
68
+ </p>
69
+
70
+ </details>
71
+
72
+ <details>
73
+ <summary><b>🔗 Evolving AdaEvolve's policy with EvoX (coming soon)</b></summary>
74
+
75
+ The two methods are **composable**: EvoX can evolve using AdaEvolve as its starting strategy, achieving the best results on 3 out of 4 benchmarks (100 iterations, GPT-5). This combined mode will be available in SkyDiscover soon.
76
+
77
+ | Benchmark | AdaEvolve | EvoX (Random Init) | EvoX (AdaEvolve Init) |
78
+ |:--|--:|--:|--:|
79
+ | Signal Proc. (↑) | 0.718 | 0.721 | **0.760** |
80
+ | Heilbronn Cvx. (↑) | 0.0290 | 0.0270 | **0.0291** |
81
+ | Cloudcast (↓) | 640.5 | 637.1 | **623.4** |
82
+ | Prism (↑) | 26.37 | **30.52** | 26.27 |
83
+
84
+ </details>
85
+
86
+ <details>
87
+ <summary><b>Task breakdown across math, systems, and programming challenges</b></summary>
88
+
89
+ | | Benchmark | Domain | Tasks | Description |
90
+ |-|-----------|--------|------:|-------------|
91
+ | 🔢 | [math/](benchmarks/math/) | Math | 14 | Circle packing, Erdos problems, geometric optimization |
92
+ | 🖥️ | [ADRS/](benchmarks/ADRS/) | Systems | 5 | Cloud scheduling, load balancing, MoE expert placement |
93
+ | ⚡ | [gpu_mode/](benchmarks/gpu_mode/) | Systems | 4 | GPU kernel optimization |
94
+ | 🔧 | [kernelbench/](benchmarks/kernelbench/) | Systems | 250+ | [KernelBench](https://github.com/ScalingIntelligence/KernelBench) GPU kernel speedup optimization |
95
+ | 🧩 | [frontier-cs-eval/](benchmarks/frontier-cs-eval/) | Algorithms | 172 | [Frontier-CS](https://frontier-cs.org/) competitive programming |
96
+ | 🧠 | [arc_benchmark/](benchmarks/arc_benchmark/) | Reasoning | — | ARC-AGI visual reasoning |
97
+ | 💻 | [ale_bench/](benchmarks/ale_bench/) | Algorithms | 10 | Algorithmic programming contests |
98
+ | 🎨 | [image_gen/](benchmarks/image_gen/) | Creative | 1 | AI image generation evolution |
99
+ | 💬 | [prompt_optimization/](benchmarks/prompt_optimization/) | NLP | 1 | HotPotQA prompt evolution |
100
+
101
+ See [Dependency extras](#dependency-extras) for install commands per benchmark.
102
+
103
+ </details>
104
+
105
+ ## 🚀 Quick Start
106
+
107
+ **Prerequisites:** Python >= 3.10, [uv](https://docs.astral.sh/uv/)
108
+
109
+ ```bash
110
+ # Install
111
+ uv sync
112
+ export OPENAI_API_KEY="<your-key>"
113
+
114
+ # Try the circle packing benchmark
115
+ uv sync --extra math
116
+ uv run skydiscover-run benchmarks/math/circle_packing/initial_program.py \
117
+ benchmarks/math/circle_packing/evaluator.py \
118
+ --config benchmarks/math/circle_packing/config.yaml \
119
+ --search evox \
120
+ --iterations 100
121
+
122
+ uv run skydiscover-run benchmarks/math/circle_packing/initial_program.py \
123
+ benchmarks/math/circle_packing/evaluator.py \
124
+ --config benchmarks/math/circle_packing/config.yaml \
125
+ --search adaevolve \
126
+ --iterations 100
127
+
128
+ # Or run on your own problem
129
+ # algo can be "evox", "adaevolve", "openevolve", "gepa", "shinkaevolve"
130
+ uv run skydiscover-run initial_program.py evaluator.py \
131
+ --search <algo> \
132
+ --model gpt-5 \
133
+ --iterations 100
134
+
135
+ # initial_program is optional — omit it to let the LLM start from scratch
136
+ uv run skydiscover-run evaluator.py \
137
+ --search <algo> \
138
+ --model gpt-5 \
139
+ --iterations 100
140
+
141
+ # Run a Harbor benchmark (e.g. AlgoTune) — no seed program needed
142
+ pip install harbor
143
+ harbor datasets download algotune@1.0 -o /tmp/algotune
144
+ uv run skydiscover-run /tmp/algotune/<id>/algotune-set-cover \
145
+ --model anthropic/claude-sonnet-4-6 \
146
+ --search best_of_n -i 10
147
+ ```
148
+
149
+ Or use the Python API:
150
+
151
+ ```python
152
+ from skydiscover import run_discovery
153
+
154
+ result = run_discovery(
155
+ initial_program="initial_program.py",
156
+ evaluator="evaluator.py",
157
+ search=[algo], # algo can be "adaevolve", "evox", "openevolve", "gepa", "shinkaevolve"
158
+ model="gpt-5",
159
+ iterations=100,
160
+ )
161
+
162
+ print(result.best_score, result.best_solution)
163
+ ```
164
+
165
+
166
+ ## ✏️ What You Write
167
+
168
+ ### Scoring Function (required)
169
+
170
+ SkyDiscover supports three evaluator formats — pick whichever fits your use case:
171
+
172
+ | Format | When to use | What you point `evaluation_file` at |
173
+ |:---|:---|:---|
174
+ | **Python function** | Simple tasks, no system deps | `evaluator.py` |
175
+ | **Containerized** | Custom deps, data files, isolation | `evaluator/` directory (must contain `Dockerfile` + `evaluate.sh`) |
176
+ | **Harbor task** | External benchmark suites (AlgoTune, EvoEval, HumanEvalFix, BigCodeBench, LiveCodeBench, USACO, CRUSTBench, CodePDE, and more) | Task directory (must contain `instruction.md` + `tests/` + `environment/Dockerfile`) |
177
+
178
+ SkyDiscover auto-detects the format. See [`benchmarks/README.md`](benchmarks/README.md#adding-a-benchmark) for full setup instructions.
179
+
180
+ **Python evaluator** — a file with an `evaluate(program_path)` function:
181
+
182
+ ```python
183
+ def evaluate(program_path):
184
+ score = run_and_grade(program_path)
185
+ return {
186
+ "combined_score": score, # primary optimization target (maximized)
187
+ "artifacts": { # optional — stored with the solution for future context
188
+ "feedback": "Off by one in the loop boundary",
189
+ },
190
+ }
191
+ ```
192
+
193
+ **Containerized evaluator** — a directory with a `Dockerfile` and `evaluate.sh` that writes JSON to stdout. Runs in Docker, so it can have arbitrary dependencies.
194
+
195
+ **Harbor task** — a directory following the [Harbor](https://harborframework.com/) format (`instruction.md`, `environment/Dockerfile`, `tests/test.sh`). Works out of the box with 8+ tested benchmark suites (see [benchmarks/README.md](benchmarks/README.md#tested-harbor-datasets) for the full list).
196
+
197
+ - **combined_score** drives evolution. If omitted, SkyDiscover averages all numeric values in the dict.
198
+ - **artifacts** is optional — entries are injected into the next LLM prompt as context.
199
+
200
+ For `search.type: adaevolve`, you can also enable explicit Pareto optimization by configuring `search.database.pareto_objectives` and returning those objective metrics directly from the evaluator. In that mode, `combined_score` becomes optional and is only used as a scalar fallback/proxy when configured.
201
+
202
+ ### Starting Solution (optional)
203
+
204
+ The initial program is **optional**. When omitted, the LLM generates a solution from scratch. If provided, it marks the region to mutate with EVOLVE-BLOCK markers. Everything outside is left untouched.
205
+
206
+ ```python
207
+ # EVOLVE-BLOCK-START
208
+ def solve(input_data):
209
+ return input_data # baseline — SkyDiscover will improve this
210
+ # EVOLVE-BLOCK-END
211
+ ```
212
+
213
+ If no markers are present, the entire file is treated as mutatable.
214
+
215
+
216
+ ## 🧬 Pick an Algorithm
217
+
218
+ See [Benchmark Performance](#-benchmark-performance) for a detailed comparison of AdaEvolve and EvoX against other algorithms.
219
+
220
+ | Algorithm | Flag | Description |
221
+ |:---|:---|:---|
222
+ | ⭐&nbsp;**AdaEvolve** | `--search adaevolve` | Multi-island adaptive search with UCB, migration, and paradigm breakthroughs |
223
+ | 🧠&nbsp;**EvoX** | `--search evox` | Self-evolving paradigm that co-adapts solution generation and experience management |
224
+ | 📊&nbsp;**Top-K** | `--search topk` | Selects top-K solutions to refine |
225
+ | 🔍&nbsp;**Beam&nbsp;Search** | `--search beam_search` | Breadth-first expansion of a beam of top solutions |
226
+ | 🎲&nbsp;**Best-of-N** | `--search best_of_n` | Generates N variants per iteration, keeps the best |
227
+ | 🧪&nbsp;**GEPA&nbsp;Native** | `--search gepa_native` | Pareto-efficient search with reflective prompting and LLM-mediated merge |
228
+ | 🗺️&nbsp;**OpenEvolve&nbsp;Native** | `--search openevolve_native` | MAP-Elites + island-based evolutionary search |
229
+
230
+ ### External backends
231
+
232
+ Install with `uv sync --extra external`, then use the corresponding flag:
233
+
234
+ | Backend | Flag | Source |
235
+ |:---|:---|:---|
236
+ | **OpenEvolve** | `--search openevolve` | [codelion/openevolve](https://github.com/codelion/openevolve) |
237
+ | **GEPA** | `--search gepa` | [gepa-ai/gepa](https://github.com/gepa-ai/gepa) |
238
+ | **ShinkaEvolve** | `--search shinkaevolve` | [SakanaAI/ShinkaEvolve](https://github.com/SakanaAI/ShinkaEvolve) (manual install) |
239
+
240
+ <details>
241
+ <summary>ShinkaEvolve manual install</summary>
242
+
243
+ ```bash
244
+ git clone --depth 1 https://github.com/SakanaAI/ShinkaEvolve.git external_repos/ShinkaEvolve
245
+ uv pip install -e external_repos/ShinkaEvolve
246
+ ```
247
+
248
+ </details>
249
+
250
+
251
+ ## ⚙️ Configuration
252
+
253
+ Pass a YAML config with `-c`. See [configs/](configs/) for full annotated templates.
254
+
255
+ ```yaml
256
+ max_iterations: 100
257
+ llm:
258
+ models: [{ name: "gemini/gemini-3-pro-preview", weight: 1.0 }]
259
+ search:
260
+ type: "adaevolve" # or "evox", "topk", "beam_search", "best_of_n"
261
+ prompt:
262
+ system_message: |
263
+ You are an expert at optimizing algorithms.
264
+ ```
265
+
266
+ API keys (OPENAI_API_KEY, GEMINI_API_KEY, etc.) are resolved from environment variables automatically.
267
+
268
+ ### 📊 Live Monitor & Human Feedback
269
+
270
+ Add `monitor: { enabled: true }` to your config. The dashboard URL prints at run start — scatter plot of all programs, code diffs, metrics, and AI summaries. A **Human Feedback** panel lets you steer evolution in real time.
271
+ Replay a completed run:
272
+
273
+ ```bash
274
+ uv run skydiscover-viewer /path/to/checkpoints/checkpoint_100
275
+ ```
276
+
277
+
278
+ ## 📖 Reference
279
+
280
+ <details>
281
+ <summary><b>CLI flags</b></summary>
282
+
283
+ ```
284
+ uv run skydiscover-run [INITIAL_PROGRAM] EVALUATOR [options]
285
+ ```
286
+
287
+ | Flag | Description |
288
+ |:---|:---|
289
+ | `-c, --config FILE` | Config YAML |
290
+ | `-i, --iterations N` | Number of iterations |
291
+ | `-m, --model MODEL` | LLM model (overrides config) |
292
+ | `-s, --search TYPE` | Search algorithm |
293
+ | `-o, --output DIR` | Output directory |
294
+ | `--api-base URL` | Override LLM API endpoint |
295
+ | `--checkpoint DIR` | Resume from checkpoint |
296
+ | `--agentic` | Enable agentic mode (LLM can read your files) |
297
+ | `-l, --log-level LEVEL` | DEBUG, INFO, WARNING, or ERROR |
298
+
299
+ </details>
300
+
301
+ <details>
302
+ <summary><b>Python API — discover_solution() (convenience wrapper)</b></summary>
303
+
304
+ `discover_solution()` is a convenience wrapper around `run_discovery()` (shown in [Quick Start](#-quick-start)) for inline string solutions and callable evaluators:
305
+
306
+ ```python
307
+ from skydiscover import discover_solution
308
+
309
+ result = discover_solution(
310
+ initial_solution="def solve(x): return x", # optional — omit to start from scratch
311
+ evaluator=lambda path: {"combined_score": run_tests(path)},
312
+ iterations=50,
313
+ search="evox",
314
+ )
315
+ ```
316
+
317
+ </details>
318
+
319
+ <details>
320
+ <summary><b>Model providers</b></summary>
321
+
322
+ Any [LiteLLM](https://docs.litellm.ai/)-compatible model works using `provider/model` format:
323
+
324
+ ```bash
325
+ --model gpt-5 # OpenAI (default)
326
+ --model gemini/gemini-3-pro-preview # Gemini
327
+ --model anthropic/claude-sonnet-4-20250514 # Anthropic
328
+ --model ollama/llama3 --api-base http://localhost:11434/v1 # Local (Ollama, vLLM, etc.)
329
+ ```
330
+
331
+ Multi-model pools with weighted sampling are supported in config:
332
+
333
+ ```yaml
334
+ llm:
335
+ models:
336
+ - name: "gpt-5-mini"
337
+ weight: 0.7
338
+ - name: "gemini/gemini-2.0-flash"
339
+ weight: 0.3
340
+ ```
341
+
342
+ </details>
343
+
344
+ <details id="dependency-extras">
345
+ <summary><b>Benchmark dependency extras</b></summary>
346
+
347
+ ```bash
348
+ uv sync # Base install
349
+ uv sync --extra math # Math benchmarks (SciPy, JAX, PyWavelets, …)
350
+ uv sync --extra adrs # ADRS systems benchmarks
351
+ uv sync --extra frontier-cs # Frontier-CS benchmark tooling
352
+ uv sync --extra external # OpenEvolve / GEPA / ShinkaEvolve backends
353
+ uv sync --extra prompt-optimization # HotPotQA prompt optimization
354
+ ```
355
+
356
+ Combine extras as needed: `uv sync --extra external --extra math`
357
+
358
+ If a benchmark ships its own `requirements.txt`, also run: `uv pip install -r path/to/requirements.txt`
359
+
360
+ </details>
361
+
362
+ ---
363
+
364
+ ## 🛠️ Extending SkyDiscover
365
+
366
+ - **New benchmark** → [`benchmarks/README.md`](benchmarks/README.md#adding-a-benchmark)
367
+ - **New search algorithm** → [`skydiscover/search/README.md`](skydiscover/search/README.md)
368
+ - **New context builder** → [`skydiscover/context_builder/README.md`](skydiscover/context_builder/README.md)
369
+
370
+ ---
371
+
372
+ ## 🔗 Related Work
373
+ SkyDiscover is inspired by [AlphaEvolve](https://deepmind.google/discover/blog/alphaevolve-a-gemini-powered-coding-agent-for-designing-advanced-algorithms/) and incorporates useful code components from open-source efforts such as [OpenEvolve](https://github.com/codelion/openevolve). Its interface is compatible with the [optimize_anything](https://gepa-ai.github.io/gepa/blog/2026/02/18/introducing-optimize-anything/) API.
374
+
375
+ ## ✍️ Citation
376
+
377
+ ```bibtex
378
+ @misc{skydiscover2026,
379
+ title = {SkyDiscover: A Flexible Framework for AI-Driven Scientific and Algorithmic Discovery},
380
+ author = {Liu, Shu and Cemri, Mert and Agarwal, Shubham and Krentsel, Alexander and Naren, Ashwin and Mang, Qiuyang and Li, Zhifei and Gupta, Akshat and Maheswaran, Monishwaran and Cheng, Audrey and Pan, Melissa and Boneh, Ethan and Ramchandran, Kannan and Sen, Koushik and Dimakis, Alexandros G. and Zaharia, Matei and Stoica, Ion},
381
+ year = {2026},
382
+ url = {https://skydiscover-ai.github.io/blog.html}
383
+ }
384
+ ```
385
+
386
+ ## 📬 Contact Us
387
+ For questions or feedback, reach out to us:
388
+ [lshu@berkeley.edu](mailto:lshu@berkeley.edu) · [mert_cemri@berkeley.edu](mailto:mert_cemri@berkeley.edu) · [shubham3@berkeley.edu](mailto:shubham3@berkeley.edu)
benchmarks/ADRS/cloudcast/README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Cloudcast — Multi-Cloud Data Transfer Optimization
2
+
3
+ Broadcast a dataset from a source cloud region to multiple destinations at minimum total cost. The evolved `search_algorithm` constructs routing topologies (relay trees, Steiner-like structures) that exploit shared intermediate hops across cloud providers.
4
+
5
+ Based on the Skyplane/Cloudcast system (NSDI'24).
6
+
7
+ ## Setup
8
+
9
+ 1. **Download the dataset** (network profiles and evaluation configs):
10
+
11
+ ```bash
12
+ cd benchmarks/ADRS/cloudcast
13
+ bash download_dataset.sh
14
+ ```
15
+
16
+ This downloads:
17
+ - `profiles/cost.csv` — egress cost ($/GB) per region pair
18
+ - `profiles/throughput.csv` — measured throughput (bps) per region pair
19
+ - `examples/config/*.json` — 5 network configurations used for evaluation (intra-AWS, intra-Azure, intra-GCP, inter-cloud)
20
+
21
+ 2. **Set your API key:**
22
+
23
+ ```bash
24
+ export OPENAI_API_KEY=...
25
+ ```
26
+
27
+ ## Run
28
+
29
+ From the repo root:
30
+
31
+ ```bash
32
+ uv run skydiscover-run \
33
+ benchmarks/ADRS/cloudcast/initial_program.py \
34
+ benchmarks/ADRS/cloudcast/evaluator.py \
35
+ -c benchmarks/ADRS/cloudcast/config.yaml \
36
+ -s [your_algorithm] \
37
+ -i 100
38
+ ```
39
+
40
+ ## Files
41
+
42
+ | File | Description |
43
+ |------|-------------|
44
+ | `initial_program.py` | Baseline `search_algorithm` function to evolve |
45
+ | `evaluator.py` | Scores programs on total transfer cost across 5 network configs |
46
+ | `config.yaml` | Task-specific config (LLM, evaluator timeout, system prompt) |
47
+ | `simulator.py` | Broadcast cost simulator |
48
+ | `broadcast.py` | `BroadCastTopology` data structure |
49
+ | `utils.py` | Graph construction from profile CSVs |
50
+ | `download_dataset.sh` | Script to download required data files |
benchmarks/ADRS/cloudcast/evaluator/Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+ WORKDIR /benchmark
3
+
4
+ COPY requirements.txt .
5
+ RUN pip install --no-cache-dir -r requirements.txt
6
+
7
+ # wrapper.py provides backwards compatibility for old Python-based evaluators
8
+ # that define evaluate(program_path) -> dict, bridging them to the container
9
+ # JSON protocol. Source of truth: skydiscover/evaluation/wrapper.py
10
+ COPY . .
11
+ RUN chmod +x evaluate.sh
12
+
13
+ ENTRYPOINT ["./evaluate.sh"]
benchmarks/ADRS/cloudcast/evaluator/broadcast.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List
2
+
3
+
4
+ class SingleDstPath(Dict):
5
+ partition: int
6
+ edges: List[List] # [[src, dst, edge data]]
7
+
8
+
9
+ class BroadCastTopology:
10
+ def __init__(self, src: str, dsts: List[str], num_partitions: int = 4, paths: Dict[str, SingleDstPath] = None):
11
+ self.src = src # single str
12
+ self.dsts = dsts # list of strs
13
+ self.num_partitions = num_partitions
14
+
15
+ # dict(dst) --> dict(partition) --> list(nx.edges)
16
+ # example: {dst1: {partition1: [src->node1, node1->dst1], partition 2: [src->dst1]}}
17
+ if paths is not None:
18
+ self.paths = paths
19
+ self.set_graph()
20
+ else:
21
+ self.paths = {dst: SingleDstPath().fromkeys(range(num_partitions)) for dst in dsts}
22
+
23
+ def get_paths(self):
24
+ print(f"now the set path is: {self.paths}")
25
+ return self.paths
26
+
27
+ def set_num_partitions(self, num_partitions: int):
28
+ self.num_partitions = num_partitions
29
+
30
+ def set_dst_partition_paths(self, dst: str, partition: int, paths: List[List]):
31
+ """
32
+ Set paths for partition = partition to reach dst
33
+ """
34
+ partition = str(partition)
35
+ self.paths[dst][partition] = paths
36
+
37
+ def append_dst_partition_path(self, dst: str, partition: int, path: List):
38
+ """
39
+ Append path for partition = partition to reach dst
40
+ """
41
+ partition = str(partition)
42
+ if self.paths[dst][partition] is None:
43
+ self.paths[dst][partition] = []
44
+ self.paths[dst][partition].append(path)
benchmarks/ADRS/cloudcast/evaluator/download_dataset.sh ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Download dataset and config files for the Cloudcast benchmark.
3
+ #
4
+ # Required files:
5
+ # profiles/cost.csv - Cloud egress cost per region pair ($/GB)
6
+ # profiles/throughput.csv - Measured throughput per region pair (bps)
7
+ # examples/config/*.json - Network configurations for evaluation
8
+ #
9
+ # Usage:
10
+ # cd benchmarks/ADRS/cloudcast
11
+ # bash download_dataset.sh
12
+
13
+ set -euo pipefail
14
+ cd "$(dirname "$0")"
15
+
16
+ BASE_URL="https://huggingface.co/datasets/f20180301/adrs-data/resolve/main/cloudcast"
17
+
18
+ echo "Downloading Cloudcast benchmark data..."
19
+
20
+ # Download profiles
21
+ mkdir -p profiles
22
+ echo " Downloading profiles/cost.csv..."
23
+ wget -q -O profiles/cost.csv "${BASE_URL}/profiles/cost.csv"
24
+ echo " Downloading profiles/throughput.csv..."
25
+ wget -q -O profiles/throughput.csv "${BASE_URL}/profiles/throughput.csv"
26
+
27
+ # Download example configs
28
+ mkdir -p examples/config
29
+ for config in intra_aws.json intra_azure.json intra_gcp.json inter_agz.json inter_gaz2.json; do
30
+ echo " Downloading examples/config/${config}..."
31
+ wget -q -O "examples/config/${config}" "${BASE_URL}/examples/config/${config}"
32
+ done
33
+
34
+ echo ""
35
+ echo "Done. Downloaded files:"
36
+ ls -lh profiles/*.csv
37
+ ls -lh examples/config/*.json
benchmarks/ADRS/cloudcast/evaluator/evaluate.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils import *
2
+ from simulator import *
3
+ from broadcast import BroadCastTopology
4
+ from pathlib import Path
5
+ import networkx as nx
6
+ import subprocess
7
+ import argparse
8
+ import json
9
+ import sys
10
+ import os
11
+
12
+
13
+ def N_dijkstra(src, dsts, G, num_partitions):
14
+ h = G.copy()
15
+ h.remove_edges_from(list(h.in_edges(source_node)) + list(nx.selfloop_edges(h)))
16
+ bc_topology = BroadCastTopology(src, dsts, num_partitions)
17
+
18
+ for dst in dsts:
19
+ path = nx.dijkstra_path(h, src, dst, weight="cost")
20
+ for i in range(0, len(path) - 1):
21
+ s, t = path[i], path[i + 1]
22
+ for j in range(bc_topology.num_partitions):
23
+ bc_topology.append_dst_partition_path(dst, j, [s, t, G[s][t]])
24
+
25
+ return bc_topology
26
+
27
+
28
+ def N_direct(src, dsts, G, num_partitions):
29
+ bc_topology = BroadCastTopology(src, dsts, num_partitions)
30
+
31
+ for dst in dsts:
32
+ edge = G[src][dst]
33
+ for j in range(bc_topology.num_partitions):
34
+ bc_topology.set_dst_partition_paths(dst, j, [[src, dst, edge]])
35
+
36
+ return bc_topology
37
+
38
+
39
+ def MULTI_MDST(src, dsts, G, num_partitions):
40
+ # Construct MDST path based on original graph
41
+ h = G.copy()
42
+ MDST_graphs = []
43
+ while len(list(h.edges())) > 0:
44
+ _, MDST_graph = MDST(src, dsts, h, 1)
45
+ print("MDST graph: ", MDST_graph.edges.data())
46
+ MDST_graphs.append(MDST_graph)
47
+ h.remove_edges_from(list(MDST_graph.edges()))
48
+
49
+ print("Number of MDSTs: ", len(MDST_graphs))
50
+
51
+
52
+ def Min_Steiner_Tree(src, dsts, G, num_partitions, hop_limit=3000):
53
+ source_v, dest_v = src, dsts
54
+
55
+ h = G.copy()
56
+ h.remove_edges_from(list(h.in_edges(source_v)) + list(nx.selfloop_edges(h)))
57
+
58
+ nodes, edges = list(h.nodes), list(h.edges)
59
+ num_nodes, num_edges = len(nodes), len(edges)
60
+ id_to_name = {nodes.index(n) + 1: n for n in nodes}
61
+
62
+ config_loc = "write.set"
63
+ write_loc = "test.stplog"
64
+ param_loc = "test.stp"
65
+
66
+ with open(config_loc, "w") as f:
67
+ f.write('stp/logfile = "use_probname"')
68
+ f.close()
69
+
70
+ scipstp_bin = os.environ.get("SCIPSTP_BIN", "scipstp")
71
+ command = f" {scipstp_bin}"
72
+ command += f" -f {param_loc} -s {config_loc} -l {write_loc}"
73
+
74
+ def construct_stp():
75
+ section_begin = '33D32945 STP File, STP Format Version 1.0\n\nSECTION Comment\nName "Relay: cloud regions"\nCreator "SkyDiscover"\n'
76
+ section_begin += f'Remark "Cloud region problem adapted from relay"\nEND\n\nSECTION Graph\n'
77
+ section_begin += f"Nodes {num_nodes}\nEdges {num_edges}\nHopLimit {hop_limit}\n"
78
+
79
+ Edge_info = []
80
+ cnt = 0
81
+ for edge in edges:
82
+ s, d = nodes.index(edge[0]) + 1, nodes.index(edge[1]) + 1
83
+ cost = h[edge[0]][edge[1]]["cost"]
84
+ cnt += 1
85
+ Edge_info.append(f"A {s} {d} {cost}\n")
86
+ if cnt == num_edges:
87
+ Edge_info.append("END\n")
88
+
89
+ s = nodes.index(source_v) + 1
90
+ v = [nodes.index(i) + 1 for i in dest_v]
91
+ terminal_info = [f"T {i}\n" for i in v]
92
+ terminal_info.append("END\n\nEOF")
93
+ section_terminal = f"""\nSECTION Terminals\nRoot {s}\nTerminals {len(dest_v)}\n"""
94
+
95
+ with open(param_loc, "w") as f:
96
+ f.write(section_begin)
97
+ for edge in Edge_info:
98
+ f.write(edge.lstrip())
99
+ f.write(section_terminal)
100
+ for t in terminal_info:
101
+ f.write(t)
102
+ f.close()
103
+ return
104
+
105
+ def read_result(loc):
106
+ di_stree_graph = nx.DiGraph()
107
+ with open(loc, "r") as f:
108
+ lines = f.readlines()
109
+ for line in lines:
110
+ if line.startswith("E") and len(line.split()) == 3:
111
+ l = line.split()
112
+ src_r, dst_r = id_to_name[int(l[1])], id_to_name[int(l[2])]
113
+ di_stree_graph.add_edge(src_r, dst_r, **G[src_r][dst_r])
114
+
115
+ # overlays = [node for node in di_stree_graph.nodes if node not in [source_v]+dest_v]
116
+ return di_stree_graph
117
+
118
+ construct_stp() # construct problem to a file
119
+ process = subprocess.Popen(command, shell=True) # run the steiner tree solver
120
+ process.wait()
121
+ solution_graph = read_result(loc=write_loc)
122
+
123
+ print(
124
+ f"Number of overlays added: {len(solution_graph.nodes) - (1 + len(dsts))}, {[node for node in solution_graph.nodes if node not in [src]+dsts]}"
125
+ )
126
+ bc_topology = BroadCastTopology(src, dsts, num_partitions)
127
+
128
+ os.remove(config_loc)
129
+ os.remove(write_loc)
130
+ os.remove(param_loc)
131
+
132
+ return append_src_dst_paths(src, dsts, solution_graph, bc_topology)
133
+
134
+
135
+ if __name__ == "__main__":
136
+ parser = argparse.ArgumentParser()
137
+ parser.add_argument("jsonfile", help="input json file")
138
+ parser.add_argument("-a", "--algo", type=str, nargs="?", const="")
139
+ parser.add_argument("-n", "--num-vms", type=int, nargs="?", const="")
140
+ args = vars(parser.parse_args())
141
+ print("Args:", args)
142
+
143
+ print(f"\n==============> Baseline generation")
144
+ with open(args["jsonfile"], "r") as f:
145
+ config_name = args["jsonfile"].split("/")[1].split(".")[0]
146
+ config = json.loads(f.read())
147
+
148
+ # generate default graph with node and edge info
149
+ # G = make_nx_graph(throughput_path="profiles/aws_throughput_11_8.csv")
150
+ G = make_nx_graph(num_vms=int(args["num_vms"]))
151
+
152
+ # src, dst
153
+ source_node = config["source_node"]
154
+ terminal_nodes = config["dest_nodes"]
155
+
156
+ print(f"source_v = '{source_node}'")
157
+ print(f"dest_v = {terminal_nodes}")
158
+ # baseline path generations
159
+ if args["algo"] is None:
160
+ algorithms = [
161
+ "Ndirect",
162
+ "MDST",
163
+ # "HST",
164
+ ]
165
+ else:
166
+ algorithms = [args["algo"]]
167
+ print(f"Algorithms: {algorithms}\n")
168
+
169
+ directory = f"paths/{config_name}"
170
+ if not os.path.exists(directory):
171
+ Path(directory).mkdir(parents=True, exist_ok=True)
172
+
173
+ num_partitions = config["num_partitions"]
174
+ for algo in algorithms:
175
+ outf = f"{directory}/{algo}.json"
176
+ print(f"Generate {algo} paths into {outf}")
177
+ if algo == "Ndirect":
178
+ bc_t = N_direct(source_node, terminal_nodes, G, num_partitions)
179
+ elif algo == "MDST":
180
+ bc_t, mdgraph = MDST(source_node, terminal_nodes, G, num_partitions)
181
+ elif algo == "MULTI-MDST":
182
+ bc_t = MULTI_MDST(source_node, terminal_nodes, G, num_partitions)
183
+ elif algo == "HST":
184
+ bc_t = Min_Steiner_Tree(source_node, terminal_nodes, G, num_partitions)
185
+ elif algo == "Ndijkstra":
186
+ bc_t = N_dijkstra(source_node, terminal_nodes, G, num_partitions)
187
+ else:
188
+ raise NotImplementedError(algo)
189
+
190
+ bc_t.set_num_partitions(config["num_partitions"]) # simple baseline, don't care about partitions, simply set it
191
+
192
+ with open(outf, "w") as outfile:
193
+ outfile.write(
194
+ json.dumps(
195
+ {
196
+ "algo": algo,
197
+ "source_node": bc_t.src,
198
+ "terminal_nodes": bc_t.dsts,
199
+ "num_partitions": bc_t.num_partitions,
200
+ "generated_path": bc_t.paths,
201
+ }
202
+ )
203
+ )
204
+
205
+ # put the evaluate logic here
206
+ input_dir = "paths" # input paths
207
+ output_dir = "evals" # eval results
208
+ with open(sys.argv[1], "r") as f:
209
+ config_name = sys.argv[1].split("/")[1].split(".")[0]
210
+ config = json.loads(f.read())
211
+
212
+ input_dir += f"/{config_name}"
213
+ output_dir += f"/{config_name}"
214
+ if not os.path.exists(output_dir):
215
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
216
+
217
+ simulator = BCSimulator(int(args["num_vms"]), output_dir)
218
+ for algo in algorithms:
219
+ path = f"{input_dir}/{algo}.json"
220
+ simulator.evaluate_path(path, config) # path of algorithm output, basic config to evaluate
221
+
222
+ # nx.draw(mdgraph, with_labels=True)
223
+ # plt.show()
224
+ # h.render(filename="Ndirect")
benchmarks/ADRS/cloudcast/evaluator/evaluate.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ PROGRAM="$1"
5
+ # MODE ($2) accepted but ignored — override this file to use train/test splits.
6
+
7
+ python /benchmark/evaluator.py "$PROGRAM"
benchmarks/ADRS/cloudcast/evaluator/evaluator.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib.util
2
+ import traceback
3
+ import json
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ # Add parent directory to Python path
9
+ parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
10
+ sys.path.insert(0, parent_dir)
11
+ from utils import *
12
+ from simulator import *
13
+ from broadcast import *
14
+ import networkx as nx
15
+
16
+
17
+ def validate_broadcast_topology(bc_t, source_node, terminal_nodes, num_partitions, G):
18
+ """
19
+ Validate that the broadcast topology is complete and correct.
20
+
21
+ Returns:
22
+ (is_valid, error_message) tuple
23
+ """
24
+ # Check 1: Verify all destinations are present
25
+ if set(bc_t.dsts) != set(terminal_nodes):
26
+ missing_dsts = set(terminal_nodes) - set(bc_t.dsts)
27
+ extra_dsts = set(bc_t.dsts) - set(terminal_nodes)
28
+ return False, f"Destination mismatch: missing={missing_dsts}, extra={extra_dsts}"
29
+
30
+ # Check 2: Verify source matches
31
+ if bc_t.src != source_node:
32
+ return False, f"Source mismatch: expected={source_node}, got={bc_t.src}"
33
+
34
+ # Check 3: Verify all partitions exist for all destinations
35
+ missing_partitions = []
36
+ empty_partitions = []
37
+ invalid_paths = []
38
+
39
+ for dst in terminal_nodes:
40
+ if dst not in bc_t.paths:
41
+ return False, f"Missing destination '{dst}' in paths"
42
+
43
+ for partition_id in range(num_partitions):
44
+ partition_key = str(partition_id)
45
+
46
+ # Check if partition exists
47
+ if partition_key not in bc_t.paths[dst]:
48
+ missing_partitions.append((dst, partition_id))
49
+ continue
50
+
51
+ partition_paths = bc_t.paths[dst][partition_key]
52
+
53
+ # Check if partition paths are None or empty
54
+ if partition_paths is None or len(partition_paths) == 0:
55
+ empty_partitions.append((dst, partition_id))
56
+ continue
57
+
58
+ # Check 4: Verify paths form valid routes from source to destination
59
+ # Build a path from edges
60
+ path_nodes = [source_node]
61
+ path_valid = True
62
+
63
+ for edge in partition_paths:
64
+ if len(edge) < 3:
65
+ invalid_paths.append((dst, partition_id, "edge format invalid"))
66
+ path_valid = False
67
+ break
68
+
69
+ edge_src, edge_dst, edge_data = edge[0], edge[1], edge[2]
70
+
71
+ # Verify edge exists in graph
72
+ if not G.has_edge(edge_src, edge_dst):
73
+ invalid_paths.append((dst, partition_id, f"edge {edge_src}->{edge_dst} not in graph"))
74
+ path_valid = False
75
+ break
76
+
77
+ # Verify path continuity
78
+ if path_nodes[-1] != edge_src:
79
+ invalid_paths.append((dst, partition_id, f"path discontinuity: expected {path_nodes[-1]}, got {edge_src}"))
80
+ path_valid = False
81
+ break
82
+
83
+ path_nodes.append(edge_dst)
84
+
85
+ # Check if path reaches destination (only if path was valid so far)
86
+ if path_valid and path_nodes[-1] != dst:
87
+ invalid_paths.append((dst, partition_id, f"path does not reach destination: ends at {path_nodes[-1]}, expected {dst}"))
88
+
89
+ # Compile validation errors
90
+ errors = []
91
+ if missing_partitions:
92
+ errors.append(f"Missing partitions: {missing_partitions}")
93
+ if empty_partitions:
94
+ errors.append(f"Empty partitions: {empty_partitions}")
95
+ if invalid_paths:
96
+ errors.append(f"Invalid paths: {invalid_paths}")
97
+
98
+ if errors:
99
+ return False, "Validation failed: " + "; ".join(errors)
100
+
101
+ # Check 5: Verify all data volumes are accounted for
102
+ # Count total partitions that should be transferred
103
+ expected_total_partitions = len(terminal_nodes) * num_partitions
104
+
105
+ # Count partitions actually present
106
+ actual_partitions = 0
107
+ for dst in terminal_nodes:
108
+ for partition_id in range(num_partitions):
109
+ partition_key = str(partition_id)
110
+ if (partition_key in bc_t.paths[dst] and
111
+ bc_t.paths[dst][partition_key] is not None and
112
+ len(bc_t.paths[dst][partition_key]) > 0):
113
+ actual_partitions += 1
114
+
115
+ if actual_partitions != expected_total_partitions:
116
+ return False, f"Data loss detected: expected {expected_total_partitions} partitions, got {actual_partitions}"
117
+
118
+ return True, None
119
+
120
+
121
+ def evaluate(program_path):
122
+ """
123
+ Evaluate the evolved broadcast optimization program across multiple configurations.
124
+
125
+ Args:
126
+ program_path: Path to the evolved program file
127
+
128
+ Returns:
129
+ Dictionary with evaluation metrics including required 'combined_score'
130
+ """
131
+ try:
132
+ # Load the evolved program
133
+ spec = importlib.util.spec_from_file_location("program", program_path)
134
+ program = importlib.util.module_from_spec(spec)
135
+ spec.loader.exec_module(program)
136
+
137
+ # Check if the required function exists
138
+ if not hasattr(program, "search_algorithm"):
139
+ return {
140
+ "combined_score": 0.0,
141
+ "runs_successfully": 0.0,
142
+ "error": "Missing search_algorithm function"
143
+ }
144
+
145
+ # Configuration - individual JSON file paths (relative to evaluator location)
146
+ evaluator_dir = os.path.dirname(os.path.abspath(__file__))
147
+ config_files = [
148
+ os.path.join(evaluator_dir, "examples/config/intra_aws.json"),
149
+ os.path.join(evaluator_dir, "examples/config/intra_azure.json"),
150
+ os.path.join(evaluator_dir, "examples/config/intra_gcp.json"),
151
+ os.path.join(evaluator_dir, "examples/config/inter_agz.json"),
152
+ os.path.join(evaluator_dir, "examples/config/inter_gaz2.json")
153
+ ]
154
+
155
+ # Filter to only include files that exist
156
+ existing_configs = [f for f in config_files if os.path.exists(f)]
157
+
158
+ if not existing_configs:
159
+ return {
160
+ "combined_score": 0.0,
161
+ "runs_successfully": 0.0,
162
+ "error": f"No configuration files found. Checked: {config_files}"
163
+ }
164
+
165
+ num_vms = 2
166
+ total_cost = 0.0
167
+ successful_configs = 0
168
+ failed_configs = 0
169
+
170
+ # Process each configuration file
171
+ for jsonfile in existing_configs:
172
+ try:
173
+ print(f"Processing config: {os.path.basename(jsonfile)}")
174
+
175
+ # Load configuration
176
+ with open(jsonfile, "r") as f:
177
+ config_name = os.path.basename(jsonfile).split(".")[0]
178
+ config = json.loads(f.read())
179
+
180
+ # Create graph
181
+ G = make_nx_graph(num_vms=int(num_vms))
182
+
183
+ # Source and destination nodes
184
+ source_node = config["source_node"]
185
+ terminal_nodes = config["dest_nodes"]
186
+
187
+ # Create output directory
188
+ directory = f"paths/{config_name}"
189
+ if not os.path.exists(directory):
190
+ Path(directory).mkdir(parents=True, exist_ok=True)
191
+
192
+ # Run the evolved algorithm
193
+ num_partitions = config["num_partitions"]
194
+ bc_t = program.search_algorithm(source_node, terminal_nodes, G, num_partitions)
195
+
196
+ bc_t.set_num_partitions(config["num_partitions"])
197
+
198
+ # Validate the broadcast topology before evaluation
199
+ is_valid, validation_error = validate_broadcast_topology(
200
+ bc_t, source_node, terminal_nodes, num_partitions, G
201
+ )
202
+
203
+ if not is_valid:
204
+ print(f"Validation failed for {config_name}: {validation_error}")
205
+ # raise ValueError(f"Invalid broadcast topology: {validation_error}")
206
+ return {
207
+ "combined_score": 0.0,
208
+ "runs_successfully": 0.0,
209
+ "error": f"Invalid broadcast topology: {validation_error}"
210
+ }
211
+
212
+ # Save the generated paths
213
+ outf = f"{directory}/search_algorithm.json"
214
+ with open(outf, "w") as outfile:
215
+ outfile.write(
216
+ json.dumps(
217
+ {
218
+ "algo": "search_algorithm",
219
+ "source_node": bc_t.src,
220
+ "terminal_nodes": bc_t.dsts,
221
+ "num_partitions": bc_t.num_partitions,
222
+ "generated_path": bc_t.paths,
223
+ }
224
+ )
225
+ )
226
+
227
+ # Evaluate the generated paths
228
+ input_dir = f"paths/{config_name}"
229
+ output_dir = f"evals/{config_name}"
230
+ if not os.path.exists(output_dir):
231
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
232
+
233
+ # Run simulation
234
+ simulator = BCSimulator(int(num_vms), output_dir)
235
+ _, cost = simulator.evaluate_path(outf, config)
236
+
237
+ # Accumulate results
238
+ total_cost += cost
239
+ successful_configs += 1
240
+
241
+ print(f"Config {config_name}: cost={cost:.2f}")
242
+
243
+ except Exception as e:
244
+ print(f"Failed to process {os.path.basename(jsonfile)}: {str(e)}")
245
+ failed_configs += 1
246
+ break
247
+
248
+ # Check if we have any successful evaluations
249
+ if failed_configs != 0:
250
+ return {
251
+ "combined_score": 0.0,
252
+ "runs_successfully": 0.0,
253
+ "error": "1 or more configuration files failed to process"
254
+ }
255
+
256
+ # Calculate aggregate metrics
257
+ avg_cost = total_cost / successful_configs
258
+ success_rate = successful_configs / (successful_configs + failed_configs)
259
+
260
+ print(f"Summary: {successful_configs} successful, {failed_configs} failed")
261
+ print(f"Total cost: {total_cost:.2f}")
262
+
263
+ # Calculate metrics for SkyDiscover
264
+ # Normalize scores (higher is better)
265
+ cost_score = 1.0 / (1.0 + total_cost) # Lower cost = higher score
266
+
267
+ # Combined score considering total cost, and success rate
268
+ combined_score = cost_score
269
+
270
+ return {
271
+ "combined_score": combined_score, # Required by SkyDiscover
272
+ "runs_successfully": success_rate,
273
+ "total_cost": total_cost,
274
+ "avg_cost": avg_cost,
275
+ "successful_configs": successful_configs,
276
+ "failed_configs": failed_configs,
277
+ "cost_score": cost_score,
278
+ "success_rate": success_rate
279
+ }
280
+
281
+ except Exception as e:
282
+ print(f"Evaluation failed: {str(e)}")
283
+ print(traceback.format_exc())
284
+ return {
285
+ "combined_score": 0.0, # Required by SkyDiscover
286
+ "runs_successfully": 0.0,
287
+ "error": str(e)
288
+ }
289
+
290
+
291
+ if __name__ == "__main__":
292
+ # Backwards-compat: bridges old evaluate() -> dict to the container JSON
293
+ # protocol. wrapper.py is auto-injected at build time from
294
+ # skydiscover/evaluation/wrapper.py.
295
+ from wrapper import run
296
+
297
+ run(evaluate)
benchmarks/ADRS/cloudcast/evaluator/requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ networkx>=3.2,<3.4
2
+ pandas
benchmarks/ADRS/cloudcast/evaluator/simulator.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import networkx as nx
3
+ import json
4
+ from broadcast import *
5
+ from utils import *
6
+
7
+ class BCSimulator:
8
+ # Default variables
9
+ data_vol: float = 4.0 # size of data to be sent to multiple dsts
10
+ num_partitions: int = 1
11
+ partition_data_vol: int = data_vol / num_partitions
12
+ default_vms_per_region: int = 1
13
+ cost_per_instance_hr: float = 0.54 # based on m5.8xlarge spot
14
+ src: str
15
+ dsts: List[str]
16
+ algo: str
17
+ g = nx.DiGraph
18
+
19
+ def __init__(self, num_vms, output_dir=None):
20
+ # write output to file
21
+ self.output_dir = output_dir
22
+ self.default_vms_per_region = num_vms
23
+
24
+ def initialization(self, path, config):
25
+ # check if path is dict
26
+ if isinstance(path, str):
27
+ # Read from json
28
+ with open(path, "r") as f:
29
+ data = json.loads(f.read())
30
+ else:
31
+ data = {
32
+ "algo": "none",
33
+ "source_node": path.src,
34
+ "terminal_nodes": path.dsts,
35
+ "num_partitions": path.num_partitions,
36
+ "generated_path": path.paths,
37
+ }
38
+
39
+ self.src = data["source_node"]
40
+ self.dsts = data["terminal_nodes"]
41
+ self.algo = data["algo"]
42
+ self.paths = data["generated_path"]
43
+
44
+ self.num_partitions = config["num_partitions"]
45
+ self.data_vol = config["data_vol"]
46
+ self.partition_data_vol = self.data_vol / self.num_partitions
47
+
48
+ # Default in/egress limit if not set
49
+ providers = ["aws", "gcp", "azure"]
50
+ provider_ingress = [10, 16, 16]
51
+ provider_egress = [5, 7, 16]
52
+ self.ingress_limits = {providers[i]: provider_ingress[i] for i in range(len(providers))}
53
+ self.egress_limits = {providers[i]: provider_egress[i] for i in range(len(providers))}
54
+
55
+ if "ingress_limit" in config:
56
+ for p, limit in config["ingress_limit"].items():
57
+ self.ingress_limits[p] = self.default_vms_per_region * limit
58
+
59
+ if "egress_limit" in config:
60
+ for p, limit in config["egress_limit"].items():
61
+ self.egress_limits[p] = self.default_vms_per_region * limit
62
+ # print("Data vol (Gbit): ", self.data_vol * 8)
63
+ print("Ingress limits: ", self.ingress_limits)
64
+ print("Egress limits: ", self.egress_limits)
65
+
66
+ def evaluate_path(self, path, config, write_to_file=False):
67
+ print(f"\n==============> Evaluation")
68
+ self.initialization(path, config)
69
+
70
+ # construct graph
71
+ print(f"\n--------- Algo: {self.algo}")
72
+ self.g = self.__construct_g()
73
+ print("\n=> Data path to dests")
74
+ for path in self.__get_path():
75
+ print("--")
76
+ print(path)
77
+ for i in range(len(path) - 1):
78
+ print(f"Flow: {self.g[path[i]][path[i+1]]['flow']}")
79
+ print(f"Actual throughput: {round(self.g[path[i]][path[i+1]]['throughput'], 4)}")
80
+ print(f"Cost: {self.g[path[i]][path[i+1]]['cost']}\n")
81
+
82
+ # evaluate transfer time and total cost
83
+ max_t, avg_t, last_dst = self.__transfer_time()
84
+ self.cost = self.__total_cost()
85
+
86
+ # output to json file
87
+ if write_to_file:
88
+ open(f"{self.output_dir}/{self.algo}_eval.json", "w").write(
89
+ json.dumps(
90
+ {
91
+ "path": path,
92
+ "max_transfer_time": max_t,
93
+ "avg_transfer_time": avg_t,
94
+ "last_dst": last_dst,
95
+ "tot_cost": self.cost,
96
+ }
97
+ )
98
+ )
99
+ return max_t, self.cost
100
+
101
+ def __construct_g(self):
102
+ # construct a graph based on the given topology
103
+ g = nx.DiGraph()
104
+ for dst in self.dsts:
105
+ for partition_id in range(self.num_partitions):
106
+ print(self.paths)
107
+ print("Num of partitions: ", self.num_partitions)
108
+ for edge in self.paths[dst][str(partition_id)]:
109
+ src, dst, edge_data = edge[0], edge[1], edge[2]
110
+ if not g.has_edge(src, dst):
111
+ cost = edge_data["cost"]
112
+ throughput = edge_data["throughput"] # * self.default_vms_per_region
113
+ g.add_edge(src, dst, throughput=throughput, cost=edge_data["cost"], flow=throughput)
114
+ g[src][dst]["partitions"] = set()
115
+ g[src][dst]["partitions"].add(partition_id)
116
+
117
+ print(f"Default vms: {self.default_vms_per_region}")
118
+ # Proportionally share if exceed in/egress limit of any node
119
+ for node in g.nodes:
120
+ provider = node.split(":")[0]
121
+
122
+ in_edges, out_edges = g.in_edges(node), g.out_edges(node)
123
+ in_flow_sum = sum([g[i[0]][i[1]]["flow"] for i in in_edges])
124
+ out_flow_sum = sum([g[o[0]][o[1]]["flow"] for o in out_edges])
125
+
126
+ if in_flow_sum > self.ingress_limits[provider]:
127
+ # print("\nExceed ingress limit")
128
+ for edge in in_edges:
129
+ src, dst = edge[0], edge[1]
130
+ # assign based on flow proportion
131
+ # flow_proportion = g[src][dst]['throughput'] / in_flow_sum
132
+
133
+ # or assign based on num of incoming flows
134
+ flow_proportion = 1 / len(list(in_edges))
135
+
136
+ g[src][dst]["flow"] = min(g[src][dst]["flow"], self.ingress_limits[provider] * flow_proportion)
137
+
138
+ if out_flow_sum > self.egress_limits[provider]:
139
+ # print("\nExceed egress limit")
140
+ for edge in out_edges:
141
+ src, dst = edge[0], edge[1]
142
+
143
+ # assign based on flow proportion
144
+ # flow_proportion = g[src][dst]['throughput'] / out_flow_sum
145
+
146
+ # or assign based on num of incoming flows
147
+ flow_proportion = 1 / len(list(out_edges))
148
+
149
+ print(f"src: {src}, dst: {dst}, flow proportion: {flow_proportion}")
150
+ g[src][dst]["flow"] = min(g[src][dst]["flow"], self.egress_limits[provider] * flow_proportion)
151
+
152
+ return g
153
+
154
+ def __get_path(self):
155
+ all_paths = [path for node in self.dsts for path in nx.all_simple_paths(self.g, self.src, node)]
156
+ return all_paths
157
+
158
+ def __slowest_capacity_link(self):
159
+ min_tput = min([edge[-1]["throughput"] for edge in self.g.edges().data()])
160
+ return min_tput
161
+
162
+ def __transfer_time(self, log=True):
163
+ # time for each (src, dst) pair
164
+ t_dict = dict()
165
+ for dst in self.dsts:
166
+ partition_time = float("-inf")
167
+ for i in range(self.num_partitions):
168
+ path_edges = self.paths[dst][str(i)]
169
+ bottleneck = min(self.g[e[0]][e[1]]['flow'] for e in path_edges)
170
+ t = self.partition_data_vol / bottleneck if bottleneck > 0 else float('inf')
171
+ partition_time = max(partition_time, t)
172
+ t_dict[dst] = partition_time
173
+
174
+ max_t = max(t_dict.values())
175
+ last_dst = [k for k, v in t_dict.items() if v == max_t] # last dst receiving obj
176
+ avg_t = sum(t_dict.values()) / len(t_dict.values())
177
+ return max_t, avg_t, last_dst
178
+
179
+ def __total_cost(self):
180
+ sum_egress_cost = 0
181
+ for edge in self.g.edges.data():
182
+ edge_data = edge[-1]
183
+ sum_egress_cost += (
184
+ len(edge_data["partitions"]) * self.partition_data_vol * edge_data["cost"]
185
+ )
186
+
187
+ runtime_s, _, _ = self.__transfer_time(log=False)
188
+ runtime_s = round(runtime_s, 2)
189
+ sum_instance_cost = 0
190
+ for node in self.g.nodes():
191
+ # print("Default vm per region: ", self.default_vms_per_region)
192
+ # print("Cost per instance hr: ", (self.cost_per_instance_hr / 3600) * runtime_s)
193
+ sum_instance_cost += self.default_vms_per_region * (self.cost_per_instance_hr / 3600) * runtime_s
194
+
195
+ sum_cost = sum_egress_cost + sum_instance_cost
196
+ return sum_cost
benchmarks/ADRS/cloudcast/evaluator/utils.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import networkx as nx
2
+ from broadcast import *
3
+ import pandas as pd
4
+ import time
5
+ import functools
6
+ import os
7
+
8
+
9
+ GBIT_PER_GBYTE = 8
10
+
11
+
12
+ class Timer:
13
+ def __init__(self, print_desc=None):
14
+ self.print_desc = print_desc
15
+ self.start = time.time()
16
+ self.end = None
17
+
18
+ def __enter__(self):
19
+ return self
20
+
21
+ def __exit__(self, exc_typ, exc_val, exc_tb):
22
+ self.end = time.time()
23
+
24
+ @property
25
+ def elapsed(self):
26
+ if self.end is None:
27
+ end = time.time()
28
+ return end - self.start
29
+ else:
30
+ return self.end - self.start
31
+
32
+
33
+ @functools.lru_cache(maxsize=None)
34
+ def get_path_cost(src, dst, src_tier="PREMIUM", dst_tier="PREMIUM"):
35
+ from skyplane import compute
36
+
37
+ assert src_tier == "PREMIUM" and dst_tier == "PREMIUM"
38
+ return compute.CloudProvider.get_transfer_cost(src, dst)
39
+
40
+
41
+ def make_nx_graph(cost_path=None, throughput_path=None, num_vms=1):
42
+ """
43
+ Default graph with capacity constraints and cost info
44
+ nodes: regions, edges: links
45
+ per edge:
46
+ throughput: max tput achievable (gbps)
47
+ cost: $/GB
48
+ flow: actual flow (gbps), must be < throughput, default = 0
49
+ """
50
+ if cost_path is None:
51
+ # Use relative path from utils.py location
52
+ utils_dir = os.path.dirname(os.path.abspath(__file__))
53
+ cost = pd.read_csv(os.path.join(utils_dir, "profiles/cost.csv"))
54
+ else:
55
+ cost = pd.read_csv(cost_path)
56
+
57
+ if throughput_path is None:
58
+ # Use relative path from utils.py location
59
+ utils_dir = os.path.dirname(os.path.abspath(__file__))
60
+ throughput = pd.read_csv(os.path.join(utils_dir, "profiles/throughput.csv"))
61
+ else:
62
+ throughput = pd.read_csv(throughput_path)
63
+
64
+ G = nx.DiGraph()
65
+ for _, row in throughput.iterrows():
66
+ if row["src_region"] == row["dst_region"]:
67
+ continue
68
+ G.add_edge(row["src_region"], row["dst_region"], cost=None, throughput=num_vms * row["throughput_sent"] / 1e9)
69
+
70
+ for _, row in cost.iterrows():
71
+ if row["src"] in G and row["dest"] in G[row["src"]]:
72
+ G[row["src"]][row["dest"]]["cost"] = row["cost"]
73
+
74
+ # some pairs not in the cost grid
75
+ no_cost_pairs = []
76
+ for edge in G.edges.data():
77
+ src, dst = edge[0], edge[1]
78
+ if edge[-1]["cost"] is None:
79
+ no_cost_pairs.append((src, dst))
80
+ print("Unable to get costs for: ", no_cost_pairs)
81
+
82
+ return G
83
+
84
+
85
+ def push_flow_helper(src, g, ingress_limit=10 * 5, egress_limit=10 * 5):
86
+ """
87
+ Push positive flows in the constructed paths (g) under constraints
88
+ """
89
+ for child in list(g.successors(src)):
90
+ dfs_edges = [edge for edge in nx.dfs_edges(g, source=child)]
91
+ dfs_min = float("inf") if not dfs_edges else min([g[t[0]][t[1]]["throughput"] for t in dfs_edges])
92
+ min_flow = min([dfs_min, g[src][child]["throughput"], ingress_limit, egress_limit])
93
+
94
+ # assign flows
95
+ g[src][child]["flow"] = min_flow
96
+ for t in dfs_edges:
97
+ g[t[0]][t[1]]["flow"] = min_flow
98
+ return g
99
+
100
+
101
+ def append_src_dst_paths(src, dsts, G, bc_topology):
102
+ # Append src dst paths for partitions (all partitions follow the same path)
103
+ for dst in dsts:
104
+ for path in list(nx.all_simple_paths(G, src, dst)):
105
+ for i in range(0, len(path) - 1):
106
+ s, t = path[i], path[i + 1]
107
+ for j in range(bc_topology.num_partitions):
108
+ bc_topology.append_dst_partition_path(dst, j, [s, t, G[s][t]])
109
+ return bc_topology
benchmarks/ADRS/cloudcast/evaluator/wrapper.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Backwards-compat wrapper for old Python-based evaluators.
2
+
3
+ Old-style evaluators define ``evaluate(program_path) -> dict``. This module
4
+ bridges that interface to the container JSON protocol expected by
5
+ ContainerizedEvaluator.
6
+
7
+ Usage — add this to the bottom of your evaluator.py::
8
+
9
+ if __name__ == "__main__":
10
+ from wrapper import run
11
+ run(evaluate)
12
+ """
13
+
14
+ import json
15
+ import sys
16
+ import traceback
17
+
18
+
19
+ def run(evaluate_fn):
20
+ """Call *evaluate_fn*, format the result as container-protocol JSON on stdout.
21
+
22
+ * Reads ``sys.argv[1]`` as the program path.
23
+ * Redirects stdout → stderr while *evaluate_fn* runs so that debug prints
24
+ don't contaminate the JSON output.
25
+ * Separates numeric metrics from non-numeric artifacts.
26
+ * Guarantees ``combined_score`` is always present in metrics.
27
+ """
28
+ if len(sys.argv) < 2:
29
+ print("Usage: evaluator.py <program_path>", file=sys.stderr)
30
+ sys.exit(1)
31
+
32
+ program_path = sys.argv[1]
33
+
34
+ # Redirect stdout → stderr during evaluation so debug prints from
35
+ # the evaluator don't contaminate the JSON output on stdout.
36
+ real_stdout = sys.stdout
37
+ sys.stdout = sys.stderr
38
+ try:
39
+ result = evaluate_fn(program_path)
40
+ except Exception as e:
41
+ sys.stdout = real_stdout
42
+ print(
43
+ json.dumps(
44
+ {
45
+ "status": "error",
46
+ "combined_score": 0.0,
47
+ "metrics": {"combined_score": 0.0},
48
+ "artifacts": {
49
+ "error": str(e),
50
+ "traceback": traceback.format_exc(),
51
+ },
52
+ }
53
+ )
54
+ )
55
+ return
56
+ sys.stdout = real_stdout
57
+
58
+ if not isinstance(result, dict):
59
+ print(
60
+ json.dumps(
61
+ {
62
+ "status": "error",
63
+ "combined_score": 0.0,
64
+ "metrics": {"combined_score": 0.0},
65
+ "artifacts": {
66
+ "error": f"evaluate() returned {type(result).__name__}, expected dict"
67
+ },
68
+ }
69
+ )
70
+ )
71
+ return
72
+
73
+ # Separate numeric metrics from non-numeric artifacts.
74
+ metrics = {}
75
+ artifacts = {}
76
+ for k, v in result.items():
77
+ if isinstance(v, bool):
78
+ metrics[k] = float(v)
79
+ elif isinstance(v, (int, float)):
80
+ metrics[k] = float(v)
81
+ elif isinstance(v, str):
82
+ artifacts[k] = v
83
+ elif isinstance(v, (list, dict)):
84
+ artifacts[k] = json.dumps(v)
85
+
86
+ if "combined_score" not in metrics:
87
+ metrics["combined_score"] = 0.0
88
+
89
+ status = "error" if "error" in artifacts else "success"
90
+ output = {
91
+ "status": status,
92
+ "combined_score": metrics["combined_score"],
93
+ "metrics": metrics,
94
+ }
95
+ if artifacts:
96
+ output["artifacts"] = artifacts
97
+
98
+ print(json.dumps(output))
benchmarks/ADRS/cloudcast/initial_program.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EVOLVE-BLOCK-START
2
+ import networkx as nx
3
+ import json
4
+ import os
5
+ import pandas as pd
6
+ from typing import Dict, List
7
+
8
+
9
+ def search_algorithm(src, dsts, G, num_partitions):
10
+ h = G.copy()
11
+ h.remove_edges_from(list(h.in_edges(src)) + list(nx.selfloop_edges(h)))
12
+ bc_topology = BroadCastTopology(src, dsts, num_partitions)
13
+
14
+ for dst in dsts:
15
+ path = nx.dijkstra_path(h, src, dst, weight="cost")
16
+ for i in range(0, len(path) - 1):
17
+ s, t = path[i], path[i + 1]
18
+ for j in range(bc_topology.num_partitions):
19
+ bc_topology.append_dst_partition_path(dst, j, [s, t, G[s][t]])
20
+
21
+ return bc_topology
22
+
23
+
24
+ class SingleDstPath(Dict):
25
+ partition: int
26
+ edges: List[List] # [[src, dst, edge data]]
27
+
28
+
29
+ class BroadCastTopology:
30
+ def __init__(self, src: str, dsts: List[str], num_partitions: int = 4, paths: Dict[str, SingleDstPath] = None):
31
+ self.src = src # single str
32
+ self.dsts = dsts # list of strs
33
+ self.num_partitions = num_partitions
34
+
35
+ # dict(dst) --> dict(partition) --> list(nx.edges)
36
+ # example: {dst1: {partition1: [src->node1, node1->dst1], partition 2: [src->dst1]}}
37
+ if paths is not None:
38
+ self.paths = paths
39
+ self.set_graph()
40
+ else:
41
+ self.paths = {dst: {str(i): None for i in range(num_partitions)} for dst in dsts}
42
+
43
+ def get_paths(self):
44
+ print(f"now the set path is: {self.paths}")
45
+ return self.paths
46
+
47
+ def set_num_partitions(self, num_partitions: int):
48
+ self.num_partitions = num_partitions
49
+
50
+ def set_dst_partition_paths(self, dst: str, partition: int, paths: List[List]):
51
+ """
52
+ Set paths for partition = partition to reach dst
53
+ """
54
+ partition = str(partition)
55
+ self.paths[dst][partition] = paths
56
+
57
+ def append_dst_partition_path(self, dst: str, partition: int, path: List):
58
+ """
59
+ Append path for partition = partition to reach dst
60
+ """
61
+ partition = str(partition)
62
+ if self.paths[dst][partition] is None:
63
+ self.paths[dst][partition] = []
64
+ self.paths[dst][partition].append(path)
65
+
66
+ def make_nx_graph(cost_path=None, throughput_path=None, num_vms=1):
67
+ """
68
+ Default graph with capacity constraints and cost info
69
+ nodes: regions, edges: links
70
+ per edge:
71
+ throughput: max tput achievable (gbps)
72
+ cost: $/GB
73
+ flow: actual flow (gbps), must be < throughput, default = 0
74
+ """
75
+ # Use relative path from this file's location
76
+ current_dir = os.path.dirname(os.path.abspath(__file__))
77
+
78
+ if cost_path is None:
79
+ cost = pd.read_csv(os.path.join(current_dir, "profiles/cost.csv"))
80
+ else:
81
+ cost = pd.read_csv(cost_path)
82
+
83
+ if throughput_path is None:
84
+ throughput = pd.read_csv(os.path.join(current_dir, "profiles/throughput.csv"))
85
+ else:
86
+ throughput = pd.read_csv(throughput_path)
87
+
88
+ G = nx.DiGraph()
89
+ for _, row in throughput.iterrows():
90
+ if row["src_region"] == row["dst_region"]:
91
+ continue
92
+ G.add_edge(row["src_region"], row["dst_region"], cost=None, throughput=num_vms * row["throughput_sent"] / 1e9)
93
+
94
+ for _, row in cost.iterrows():
95
+ if row["src"] in G and row["dest"] in G[row["src"]]:
96
+ G[row["src"]][row["dest"]]["cost"] = row["cost"]
97
+
98
+ # some pairs not in the cost grid
99
+ no_cost_pairs = []
100
+ for edge in G.edges.data():
101
+ src, dst = edge[0], edge[1]
102
+ if edge[-1]["cost"] is None:
103
+ no_cost_pairs.append((src, dst))
104
+ print("Unable to get costs for: ", no_cost_pairs)
105
+
106
+ return G
107
+
108
+
109
+ # EVOLVE-BLOCK-END
110
+
111
+ # Helper functions that won't be evolved
112
+ def create_broadcast_topology(src: str, dsts: List[str], num_partitions: int = 4):
113
+ """Create a broadcast topology instance"""
114
+ return BroadCastTopology(src, dsts, num_partitions)
115
+
116
+ def run_search_algorithm(src: str, dsts: List[str], G, num_partitions: int):
117
+ """Run the search algorithm and return the topology"""
118
+ return search_algorithm(src, dsts, G, num_partitions)
benchmarks/ADRS/eplb/README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Expert Parallelism Load Balancer (EPLB)
2
+
3
+ This benchmark uses SkyDiscover to optimize the Expert Parallelism Load Balancer (EPLB) algorithm for Mixture-of-Expert (MoE) models. The goal is to rearrange and replicate experts across GPUs to balance load, while keeping the rearrangement algorithm itself fast.
4
+
5
+ ## Setup
6
+
7
+ 1. **Install PyTorch** (required by the evaluator):
8
+
9
+ ```bash
10
+ uv pip install torch
11
+ ```
12
+
13
+ 2. **Download the workload file** from [Hugging Face](https://huggingface.co/datasets/abmfy/eplb-openevolve) into this directory:
14
+
15
+ ```bash
16
+ cd benchmarks/ADRS/eplb
17
+ wget https://huggingface.co/datasets/abmfy/eplb-openevolve/resolve/main/expert-load.json
18
+ ```
19
+
20
+ 3. **Set your API key:**
21
+
22
+ ```bash
23
+ export OPENAI_API_KEY=...
24
+ ```
25
+
26
+ ## Run
27
+
28
+ From the repo root:
29
+
30
+ ```bash
31
+ uv run skydiscover-run \
32
+ benchmarks/ADRS/eplb/initial_program.py \
33
+ benchmarks/ADRS/eplb/evaluator.py \
34
+ -c benchmarks/ADRS/eplb/config.yaml \
35
+ -s [your_algorithm] \
36
+ -i 100 \
37
+ -o eplb_output
38
+ ```
39
+
40
+ Or from this directory:
41
+
42
+ ```bash
43
+ uv run skydiscover-run initial_program.py evaluator.py \
44
+ -c config.yaml \
45
+ -s [your_algorithm] \
46
+ -i 100
47
+ ```
48
+
49
+ ## Evaluate a saved program
50
+
51
+ ```bash
52
+ python evaluate_best_program.py
53
+ ```
54
+
55
+ ## Files
56
+
57
+ | File | Description |
58
+ |------|-------------|
59
+ | `initial_program.py` | Baseline `rebalance_experts` function to evolve |
60
+ | `evaluator.py` | Scores programs on load-balance quality and execution speed |
61
+ | `config.yaml` | Task-specific config (LLM, evaluator timeout, system prompt) |
62
+ | `evaluate_best_program.py` | Standalone script to evaluate a saved best program |
63
+ | `expert-load.json` | Workload data (must be downloaded — see Setup) |
benchmarks/ADRS/eplb/evaluator/evaluate.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ PROGRAM="$1"
5
+ # MODE ($2) accepted but ignored — override this file to use train/test splits.
6
+
7
+ python /benchmark/evaluator.py "$PROGRAM"
benchmarks/ADRS/eplb/evaluator/evaluate_best_program.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Evaluate a best_program.py file using the eplb evaluator.
4
+ Runs multiple times and averages the results.
5
+ """
6
+ import sys
7
+ import json
8
+ from pathlib import Path
9
+ from evaluator import evaluate
10
+
11
+ def main():
12
+ if len(sys.argv) < 2:
13
+ print("Usage: evaluate_best_program.py <path_to_best_program.py> [num_runs]")
14
+ sys.exit(1)
15
+
16
+ program_path = Path(sys.argv[1])
17
+ if not program_path.exists():
18
+ print(f"Error: File not found: {program_path}")
19
+ sys.exit(1)
20
+
21
+ num_runs = int(sys.argv[2]) if len(sys.argv) > 2 else 3
22
+
23
+ print(f"Evaluating: {program_path}")
24
+ print(f"Running {num_runs} times and averaging results...")
25
+ print("=" * 60)
26
+
27
+ results = []
28
+ for run in range(1, num_runs + 1):
29
+ print(f"\n--- Run {run}/{num_runs} ---")
30
+ result = evaluate(str(program_path))
31
+
32
+ if "error" in result:
33
+ print(f"❌ Error in run {run}: {result['error']}")
34
+ sys.exit(1)
35
+
36
+ results.append(result)
37
+ print(f"Run {run} - Combined Score: {result.get('combined_score', 0.0):.6f}")
38
+
39
+ # Compute averages
40
+ avg_result = {
41
+ "balancedness_score_gpu": sum(r.get("balancedness_score_gpu", 0.0) for r in results) / len(results),
42
+ "balancedness_score_expert": sum(r.get("balancedness_score_expert", 0.0) for r in results) / len(results),
43
+ "times_algorithm": sum(r.get("times_algorithm", 0.0) for r in results) / len(results),
44
+ "times_inference": sum(r.get("times_inference", 0.0) for r in results) / len(results),
45
+ "speed_score": sum(r.get("speed_score", 0.0) for r in results) / len(results),
46
+ "combined_score": sum(r.get("combined_score", 0.0) for r in results) / len(results),
47
+ }
48
+
49
+ print("\n" + "=" * 60)
50
+ print("AVERAGED RESULTS (over {} runs):".format(num_runs))
51
+ print("=" * 60)
52
+ print(json.dumps(avg_result, indent=2))
53
+
54
+ print("\n" + "-" * 60)
55
+ print("Summary:")
56
+ print(f"✅ Combined Score: {avg_result['combined_score']:.6f}")
57
+ print(f" Balancedness (GPU): {avg_result['balancedness_score_gpu']:.6f}")
58
+ print(f" Balancedness (Expert): {avg_result['balancedness_score_expert']:.6f}")
59
+ print(f" Speed Score: {avg_result['speed_score']:.6f}")
60
+ print(f" Avg Algorithm Time: {avg_result['times_algorithm']:.6f}s")
61
+ print(f" Avg Inference Time: {avg_result['times_inference']:.6f}s")
62
+ print("-" * 60)
63
+
64
+ if __name__ == "__main__":
65
+ main()
66
+
benchmarks/ADRS/eplb/evaluator/wrapper.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Backwards-compat wrapper for old Python-based evaluators.
2
+
3
+ Old-style evaluators define ``evaluate(program_path) -> dict``. This module
4
+ bridges that interface to the container JSON protocol expected by
5
+ ContainerizedEvaluator.
6
+
7
+ Usage — add this to the bottom of your evaluator.py::
8
+
9
+ if __name__ == "__main__":
10
+ from wrapper import run
11
+ run(evaluate)
12
+ """
13
+
14
+ import json
15
+ import sys
16
+ import traceback
17
+
18
+
19
+ def run(evaluate_fn):
20
+ """Call *evaluate_fn*, format the result as container-protocol JSON on stdout.
21
+
22
+ * Reads ``sys.argv[1]`` as the program path.
23
+ * Redirects stdout → stderr while *evaluate_fn* runs so that debug prints
24
+ don't contaminate the JSON output.
25
+ * Separates numeric metrics from non-numeric artifacts.
26
+ * Guarantees ``combined_score`` is always present in metrics.
27
+ """
28
+ if len(sys.argv) < 2:
29
+ print("Usage: evaluator.py <program_path>", file=sys.stderr)
30
+ sys.exit(1)
31
+
32
+ program_path = sys.argv[1]
33
+
34
+ # Redirect stdout → stderr during evaluation so debug prints from
35
+ # the evaluator don't contaminate the JSON output on stdout.
36
+ real_stdout = sys.stdout
37
+ sys.stdout = sys.stderr
38
+ try:
39
+ result = evaluate_fn(program_path)
40
+ except Exception as e:
41
+ sys.stdout = real_stdout
42
+ print(
43
+ json.dumps(
44
+ {
45
+ "status": "error",
46
+ "combined_score": 0.0,
47
+ "metrics": {"combined_score": 0.0},
48
+ "artifacts": {
49
+ "error": str(e),
50
+ "traceback": traceback.format_exc(),
51
+ },
52
+ }
53
+ )
54
+ )
55
+ return
56
+ sys.stdout = real_stdout
57
+
58
+ if not isinstance(result, dict):
59
+ print(
60
+ json.dumps(
61
+ {
62
+ "status": "error",
63
+ "combined_score": 0.0,
64
+ "metrics": {"combined_score": 0.0},
65
+ "artifacts": {
66
+ "error": f"evaluate() returned {type(result).__name__}, expected dict"
67
+ },
68
+ }
69
+ )
70
+ )
71
+ return
72
+
73
+ # Separate numeric metrics from non-numeric artifacts.
74
+ metrics = {}
75
+ artifacts = {}
76
+ for k, v in result.items():
77
+ if isinstance(v, bool):
78
+ metrics[k] = float(v)
79
+ elif isinstance(v, (int, float)):
80
+ metrics[k] = float(v)
81
+ elif isinstance(v, str):
82
+ artifacts[k] = v
83
+ elif isinstance(v, (list, dict)):
84
+ artifacts[k] = json.dumps(v)
85
+
86
+ if "combined_score" not in metrics:
87
+ metrics["combined_score"] = 0.0
88
+
89
+ status = "error" if "error" in artifacts else "success"
90
+ output = {
91
+ "status": status,
92
+ "combined_score": metrics["combined_score"],
93
+ "metrics": metrics,
94
+ }
95
+ if artifacts:
96
+ output["artifacts"] = artifacts
97
+
98
+ print(json.dumps(output))
benchmarks/ADRS/eplb/initial_program.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """
3
+ Expert parallelism load balancer (EPLB) for vLLM.
4
+
5
+ This module implements the core rearrangement algorithm.
6
+
7
+ The rearrangement algorithm is adapted from
8
+ [DeepSeek EPLB](https://github.com/deepseek-ai/eplb).
9
+
10
+ Please find at [#12](https://github.com/deepseek-ai/EPLB/issues/12) an example
11
+ on how the EPLB algorithm works.
12
+ """
13
+
14
+ # EVOLVE-BLOCK-START
15
+
16
+ import torch
17
+
18
+
19
+ def balanced_packing(weight: torch.Tensor,
20
+ num_packs: int) -> tuple[torch.Tensor, torch.Tensor]:
21
+ """
22
+ Pack n weighted objects to m packs, such that each bin contains exactly
23
+ n/m objects and the weights of all packs are as balanced as possible.
24
+
25
+ Parameters:
26
+ weight: [X, n], the weight of each item
27
+ num_packs: number of packs
28
+
29
+ Returns:
30
+ pack_index: [X, n], the pack index of each item
31
+ rank_in_pack: [X, n], the rank of the item in the pack
32
+ """
33
+ num_layers, num_groups = weight.shape
34
+ assert num_groups % num_packs == 0
35
+ groups_per_pack = num_groups // num_packs
36
+
37
+ if groups_per_pack == 1:
38
+ pack_index = torch.arange(weight.size(-1),
39
+ dtype=torch.int64,
40
+ device=weight.device).expand(weight.shape)
41
+ rank_in_pack = torch.zeros_like(weight, dtype=torch.int64)
42
+ return pack_index, rank_in_pack
43
+
44
+ indices = weight.float().sort(-1, descending=True).indices.cpu()
45
+ pack_index = torch.full_like(weight,
46
+ fill_value=-1,
47
+ dtype=torch.int64,
48
+ device="cpu")
49
+ rank_in_pack = torch.full_like(pack_index, fill_value=-1)
50
+ for i in range(num_layers):
51
+ pack_weights = [0] * num_packs
52
+ pack_items = [0] * num_packs
53
+ for group in indices[i]:
54
+ pack = min(
55
+ (i
56
+ for i in range(num_packs) if pack_items[i] < groups_per_pack),
57
+ key=pack_weights.__getitem__,
58
+ )
59
+ assert pack_items[pack] < groups_per_pack
60
+ pack_index[i, group] = pack
61
+ rank_in_pack[i, group] = pack_items[pack]
62
+ pack_weights[pack] += weight[i, group]
63
+ pack_items[pack] += 1
64
+ return pack_index, rank_in_pack
65
+
66
+
67
+ def replicate_experts(
68
+ weight: torch.Tensor,
69
+ num_phy: int) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
70
+ """
71
+ Replicate `num_log` experts to `num_phy` replicas, such that the maximum
72
+ load of all replicas is minimized.
73
+
74
+ Parameters:
75
+ weight: [X, num_log]
76
+ num_phy: total number of experts after replication
77
+
78
+ Returns:
79
+ phy2log: [X, num_phy], logical expert id of each physical expert
80
+ rank: [X, num_phy], the replica rank
81
+ logcnt: [X, num_log], number of replicas for each logical expert
82
+ """
83
+ n, num_log = weight.shape
84
+ num_redundant = num_phy - num_log
85
+ assert num_redundant >= 0
86
+ device = weight.device
87
+ phy2log = torch.arange(num_phy, dtype=torch.int64,
88
+ device=device).repeat(n, 1)
89
+ rank = torch.zeros(n, num_phy, dtype=torch.int64, device=device)
90
+ logcnt = torch.ones(n, num_log, dtype=torch.int64, device=device)
91
+ arangen = torch.arange(n, dtype=torch.int64, device=device)
92
+ for i in range(num_log, num_phy):
93
+ redundant_indices = (weight / logcnt).max(dim=-1).indices
94
+ phy2log[:, i] = redundant_indices
95
+ rank[:, i] = logcnt[arangen, redundant_indices]
96
+ logcnt[arangen, redundant_indices] += 1
97
+ return phy2log, rank, logcnt
98
+
99
+
100
+ def rebalance_experts_hierarchical(
101
+ weight: torch.Tensor,
102
+ num_physical_experts: int,
103
+ num_groups: int,
104
+ num_nodes: int,
105
+ num_gpus: int,
106
+ ):
107
+ """
108
+ Parameters:
109
+ weight: [num_moe_layers, num_logical_experts]
110
+ num_physical_experts: number of physical experts after replication
111
+ num_groups: number of expert groups
112
+ num_nodes: number of server nodes, where the intra-node network
113
+ (e.g, NVLink) is faster
114
+ num_gpus: number of GPUs, must be a multiple of `num_nodes`
115
+
116
+ Returns:
117
+ physical_to_logical_map: [num_moe_layers, num_physical_experts]
118
+ logical_to_physical_map: [num_moe_layers, num_logical_experts, X]
119
+ logical_count: [num_moe_layers, num_logical_experts]
120
+ """
121
+ num_layers, num_logical_experts = weight.shape
122
+ assert num_logical_experts % num_groups == 0
123
+ group_size = num_logical_experts // num_groups
124
+ assert num_groups % num_nodes == 0
125
+ groups_per_node = num_groups // num_nodes
126
+ assert num_gpus % num_nodes == 0
127
+ assert num_physical_experts % num_gpus == 0
128
+ phy_experts_per_gpu = num_physical_experts // num_gpus
129
+
130
+ def inverse(perm: torch.Tensor) -> torch.Tensor:
131
+ inv = torch.empty_like(perm)
132
+ inv.scatter_(
133
+ 1,
134
+ perm,
135
+ torch.arange(perm.size(1), dtype=torch.int64,
136
+ device=perm.device).expand(perm.shape),
137
+ )
138
+ return inv
139
+
140
+ # Step 1: pack groups to nodes
141
+ tokens_per_group = weight.unflatten(-1, (num_groups, group_size)).sum(-1)
142
+ group_pack_index, group_rank_in_pack = balanced_packing(
143
+ tokens_per_group, num_nodes)
144
+ log2mlog = (((group_pack_index * groups_per_node + group_rank_in_pack) *
145
+ group_size).unsqueeze(-1) +
146
+ torch.arange(group_size,
147
+ dtype=torch.int64,
148
+ device=group_pack_index.device)).flatten(-2)
149
+ mlog2log = inverse(log2mlog)
150
+
151
+ # Step 2: construct redundant experts within nodes
152
+ # [num_layers * num_nodes, num_logical_experts // num_nodes]
153
+ tokens_per_mlog = weight.gather(-1, mlog2log).view(
154
+ -1, num_logical_experts // num_nodes)
155
+ phy2mlog, phyrank, mlogcnt = replicate_experts(
156
+ tokens_per_mlog, num_physical_experts // num_nodes)
157
+
158
+ # Step 3: pack physical_experts to GPUs
159
+ # [num_layers * num_nodes, num_physical_experts // num_nodes]
160
+ tokens_per_phy = (tokens_per_mlog / mlogcnt).gather(-1, phy2mlog)
161
+ pack_index, rank_in_pack = balanced_packing(tokens_per_phy,
162
+ num_gpus // num_nodes)
163
+ phy2pphy = pack_index * phy_experts_per_gpu + rank_in_pack
164
+ pphy2phy = inverse(phy2pphy)
165
+
166
+ pphy2mlog = phy2mlog.gather(
167
+ -1, pphy2phy) # [num_layers * num_nodes, num_log_per_nodes]
168
+ pphy2mlog = (pphy2mlog.view(num_layers, num_nodes, -1) + torch.arange(
169
+ 0,
170
+ num_logical_experts,
171
+ num_logical_experts // num_nodes,
172
+ device=group_pack_index.device,
173
+ ).view(1, -1, 1)).flatten(-2)
174
+ pphy2log = mlog2log.gather(-1, pphy2mlog)
175
+ pphyrank = phyrank.gather(-1, pphy2phy).view(num_layers, -1)
176
+ logcnt = mlogcnt.view(num_layers, -1).gather(-1, log2mlog)
177
+ return pphy2log, pphyrank, logcnt
178
+
179
+
180
+ def rebalance_experts(
181
+ weight: torch.Tensor,
182
+ num_replicas: int,
183
+ num_groups: int,
184
+ num_nodes: int,
185
+ num_gpus: int,
186
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
187
+ """
188
+ Entry point for expert-parallelism load balancer.
189
+
190
+ Parameters:
191
+ weight: [layers, num_logical_experts], the load statistics for all
192
+ logical experts
193
+ num_replicas: number of physical experts, must be a multiple of
194
+ `num_gpus`
195
+ num_groups: number of expert groups
196
+ num_nodes: number of server nodes, where the intra-node network
197
+ (e.g, NVLink) is faster
198
+ num_gpus: number of GPUs, must be a multiple of `num_nodes`
199
+
200
+ Returns:
201
+ physical_to_logical_map: [layers, num_replicas], the expert index of
202
+ each replica
203
+ logical_to_physical_map: [layers, num_logical_experts, X], the replica
204
+ indices for each expert
205
+ expert_count: [layers, num_logical_experts], number of physical
206
+ replicas for each logical expert
207
+ """
208
+ num_layers, num_logical_experts = weight.shape
209
+ weight = weight.float().cpu()
210
+ if num_groups % num_nodes == 0:
211
+ # use hierarchical load-balance policy
212
+ phy2log, phyrank, logcnt = rebalance_experts_hierarchical(
213
+ weight, num_replicas, num_groups, num_nodes, num_gpus)
214
+ else:
215
+ # use global load-balance policy
216
+ phy2log, phyrank, logcnt = rebalance_experts_hierarchical(
217
+ weight, num_replicas, 1, 1, num_gpus)
218
+ num_redundant_experts = num_replicas - num_logical_experts
219
+ maxlogcnt = num_redundant_experts + 1
220
+ log2phy: torch.Tensor = torch.full(
221
+ (num_layers, num_logical_experts, maxlogcnt),
222
+ -1,
223
+ dtype=torch.int64,
224
+ device=logcnt.device,
225
+ )
226
+ log2phy.view(num_layers, -1).scatter_(
227
+ -1,
228
+ phy2log * maxlogcnt + phyrank,
229
+ torch.arange(num_replicas, dtype=torch.int64,
230
+ device=log2phy.device).expand(num_layers, -1),
231
+ )
232
+ return phy2log, log2phy, logcnt
233
+
234
+
235
+ # EVOLVE-BLOCK-END
236
+
237
+ __all__ = ["rebalance_experts"]
238
+
benchmarks/ADRS/llm_sql/config.yaml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLM SQL — Prompt Caching Column Reordering Optimization
2
+ # Usage: skydiscover-run initial_program.py evaluator.py -c config.yaml -s <strategy>
3
+ language: python
4
+ diff_based_generation: true
5
+ max_iterations: 100
6
+ checkpoint_interval: 5
7
+ max_solution_length: 60000
8
+
9
+ llm:
10
+ api_base: https://api.openai.com/v1
11
+ models:
12
+ - name: "gpt-5"
13
+ weight: 1.0
14
+ max_tokens: 32000
15
+ timeout: 600
16
+
17
+ prompt:
18
+ system_message: |-
19
+ You are an expert in data optimization and LLM prompt caching. Your task is to evolve the existing Evolved class to maximize prefix hit count (PHC) for efficient LLM prompt caching.
20
+
21
+ Problem Context:
22
+ - You are given a pandas DataFrame `df` with text data in rows and columns
23
+ - The goal is to reorder columns to maximize prefix reuse when processing rows sequentially
24
+ - Prefix reuse occurs when consecutive rows have matching values in the same column positions
25
+ - This reduces LLM computation costs by reusing cached prefixes
26
+
27
+ Objective:
28
+ - Dual objective: (1) maximize prefix reuse across consecutive rows and (2) minimize end-to-end runtime of the algorithm.
29
+ - Your goal is to evolve the Evolved class such that when the LLM processes each row sequentially, it reuses as much of the prefix from the previous row as possible, while keeping the algorithm computationally efficient.
30
+ - Prefix reuse is defined as consecutive field values (starting from the first column) that are **exact matches** with the corresponding fields of the previous row.
31
+ - The **hit score** of a row is defined as the **sum of squares of the string lengths** of the matching prefix fields.
32
+ - The algorithm will be evaluated on a combined metric that balances accuracy (prefix reuse) and speed (runtime).
33
+
34
+ Formally:
35
+ - For a given column ordering `C`, PHC(C) = sum over all rows `r` of `hit(C, r)`
36
+ - `hit(C, r)` = sum of `len(df[r][C[f]])^2` for all f in prefix where `df[r][C[f]] == df[r-1][C[f]]`; zero if mismatch starts at the first field.
37
+ - Runtime is measured as wall-clock seconds to compute the reordered DataFrame from the input DataFrame.
38
+ - Combined score used for selection: `combined_score = 0.95 * average_hit_rate + 0.05 * (12 - min(12, average_runtime)) / 12`.
39
+
40
+ Required API (DO NOT CHANGE):
41
+ - You must keep the existing Evolved class structure and the reorder method signature:
42
+ ```python
43
+ class Evolved(Algorithm):
44
+ def reorder(
45
+ self,
46
+ df: pd.DataFrame,
47
+ early_stop: int = 0,
48
+ row_stop: int = None,
49
+ col_stop: int = None,
50
+ col_merge: List[List[str]] = [],
51
+ one_way_dep: List[Tuple[str, str]] = [],
52
+ distinct_value_threshold: float = 0.8,
53
+ parallel: bool = True,
54
+ ) -> Tuple[pd.DataFrame, List[List[str]]]:
55
+ ```
56
+ - You can modify the internal implementation of methods but must preserve the class structure and method signatures
57
+ - The reorder method must return a tuple of (reordered_dataframe, column_orderings)
58
+
59
+ Algorithm Design Guidelines:
60
+ - For each row, determine the optimal column order based on matches with the previous row
61
+ - Consider column statistics (unique values, string lengths) for ordering
62
+ - Implement greedy or heuristic approaches for scalability
63
+ - Focus on columns with high value frequency and long strings
64
+ - Handle missing values and mixed data types appropriately
65
+ - Optimize the existing recursive approach or replace it with more efficient vectorized methods
66
+ - Consider prefix-aware greedy approaches that condition on the current matched prefix
67
+
68
+ Constraints:
69
+ - Do not add/remove rows or columns
70
+ - You must have different column orderings for different rows to maximize prefit hit rate
71
+ - Return a DataFrame with the same shape as input
72
+ - Use exact string matching for prefix calculations
73
+ - Keep memory usage reasonable for large datasets
74
+ - Preserve all existing method signatures and class structure
75
+ - The algorithm will be called with the same parameters as the original Evolved
76
+
77
+ Simply return the optimized Evolved class, do not provide explanations.
78
+
79
+ evaluator:
80
+ timeout: 360
81
+
benchmarks/ADRS/llm_sql/evaluator/Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+ WORKDIR /benchmark
3
+
4
+ COPY requirements.txt .
5
+ RUN pip install --no-cache-dir -r requirements.txt
6
+
7
+ # wrapper.py provides backwards compatibility for old Python-based evaluators
8
+ # that define evaluate(program_path) -> dict, bridging them to the container
9
+ # JSON protocol. Source of truth: skydiscover/evaluation/wrapper.py
10
+ COPY . .
11
+ RUN chmod +x evaluate.sh
12
+
13
+ ENTRYPOINT ["./evaluate.sh"]
benchmarks/ADRS/llm_sql/evaluator/download_dataset.sh ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Download CSV datasets for the LLM-SQL benchmark.
3
+ #
4
+ # Required files (placed in datasets/):
5
+ # movies.csv - Rotten Tomatoes movie reviews (~9 MB)
6
+ # beer.csv - Beer review dataset (~2.5 MB)
7
+ # BIRD.csv - BIRD text-to-SQL dataset (~34 MB)
8
+ # PDMX.csv - PDMX metadata dataset (~7.4 MB)
9
+ # products.csv - Amazon product catalog (~16 MB)
10
+ #
11
+ # Usage:
12
+ # cd benchmarks/ADRS/llm_sql
13
+ # bash download_dataset.sh
14
+
15
+ set -euo pipefail
16
+ cd "$(dirname "$0")"
17
+
18
+ BASE_URL="https://huggingface.co/datasets/f20180301/adrs-data/resolve/main/llm_sql"
19
+
20
+ echo "Downloading LLM-SQL benchmark datasets..."
21
+
22
+ mkdir -p datasets
23
+ for dataset in movies.csv beer.csv BIRD.csv PDMX.csv products.csv; do
24
+ echo " Downloading datasets/${dataset}..."
25
+ wget -q --show-progress -O "datasets/${dataset}" "${BASE_URL}/datasets/${dataset}"
26
+ done
27
+
28
+ echo ""
29
+ echo "Done. Downloaded files:"
30
+ ls -lh datasets/*.csv
benchmarks/ADRS/llm_sql/evaluator/evaluate.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ PROGRAM="$1"
5
+ # MODE ($2) accepted but ignored — override this file to use train/test splits.
6
+
7
+ python /benchmark/evaluator.py "$PROGRAM"
benchmarks/ADRS/llm_sql/evaluator/evaluator.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import traceback
4
+ import time
5
+
6
+ import pandas as pd
7
+
8
+ parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
9
+ sys.path.insert(0, parent_dir)
10
+ import importlib.util
11
+
12
+ from utils import evaluate_df_prefix_hit_cnt
13
+ from initial_program import Evolved
14
+
15
+
16
+ def run_quick(
17
+ master_df,
18
+ col_merge,
19
+ ):
20
+ st = time.time()
21
+ quick, _ = QuickGreedy().reorder(
22
+ master_df,
23
+ early_stop=100000,
24
+ distinct_value_threshold=0.7,
25
+ row_stop=4,
26
+ col_stop=2,
27
+ col_merge=col_merge,
28
+ )
29
+ end = time.time() - st
30
+
31
+ results = evaluate_df_prefix_hit_cnt(quick)
32
+ # results = evaluate_cell_hit_cnt(quick)
33
+ return results, end
34
+
35
+ def run_evolved(
36
+ master_df,
37
+ col_merge,
38
+ ):
39
+ st = time.time()
40
+ reordered, _ = Evolved().reorder(
41
+ master_df,
42
+ early_stop=100000,
43
+ distinct_value_threshold=0.7,
44
+ row_stop=4,
45
+ col_stop=2,
46
+ col_merge=col_merge,
47
+ )
48
+ end = time.time() - st
49
+
50
+ results = evaluate_df_prefix_hit_cnt(reordered)
51
+ # results = evaluate_cell_hit_cnt(reordered)
52
+ return results, end
53
+
54
+
55
+ def run(filename, alg="", col_merge=[]):
56
+ master_df = pd.read_csv(filename)
57
+
58
+ print(f"Evaluate master df shape: {master_df.shape}")
59
+ print(f"Nunique: {master_df.nunique().sort_values()}")
60
+
61
+ if alg == "QuickGreedy":
62
+ return run_quick(master_df, col_merge)
63
+
64
+ return run_evolved(master_df, col_merge)
65
+
66
+
67
+ def evaluate(program_path):
68
+ try:
69
+ # Add the llm_sql directory to sys.path so solver can be imported
70
+ current_dir = os.path.dirname(os.path.abspath(__file__))
71
+ if current_dir not in sys.path:
72
+ sys.path.insert(0, current_dir)
73
+
74
+ # Import the program
75
+ spec = importlib.util.spec_from_file_location("program", program_path)
76
+ program = importlib.util.module_from_spec(spec)
77
+ spec.loader.exec_module(program)
78
+
79
+ # Check if the required function exists
80
+ if not hasattr(program, "Evolved"):
81
+ return {
82
+ "combined_score": 0.0,
83
+ "runs_successfully": 0.0,
84
+ "error": "Missing algorithm function",
85
+ }
86
+
87
+ # Get the directory of this file and construct dataset paths
88
+ current_dir = os.path.dirname(os.path.abspath(__file__))
89
+ datasets_dir = os.path.join(current_dir, "datasets")
90
+
91
+ # Test on different datasets
92
+ test_files = [
93
+ os.path.join(datasets_dir, "movies.csv"),
94
+ os.path.join(datasets_dir, "beer.csv"),
95
+ os.path.join(datasets_dir, "BIRD.csv"),
96
+ os.path.join(datasets_dir, "PDMX.csv"),
97
+ os.path.join(datasets_dir, "products.csv"),
98
+ ]
99
+
100
+ col_merges = [
101
+ [['movieinfo', 'movietitle', 'rottentomatoeslink']],
102
+ [['beer/beerId', 'beer/name']],
103
+ [['PostId', 'Body']],
104
+ [['path', 'metadata'], ['hasmetadata', 'isofficial', 'isuserpublisher', 'isdraft', 'hasannotations', 'subsetall']],
105
+ [['product_title', 'parent_asin']],
106
+ ]
107
+
108
+ failed_files = 0
109
+ hit_rates = []
110
+ total_runtime = 0.0
111
+ successful_files = 0
112
+
113
+ for filename, col_merge in zip(test_files, col_merges):
114
+ try:
115
+ # Check if file exists
116
+ if not os.path.exists(filename):
117
+ print(f"Dataset not found: {filename}, skipping...")
118
+ failed_files += 1
119
+ continue
120
+
121
+ print(f"Processing dataset: {filename}")
122
+ # This will test the algorithm with the dataset
123
+ master_df = pd.read_csv(filename)
124
+
125
+ # Calculate character count of original dataframe
126
+ total_chars_before = master_df.astype(str).apply(lambda x: x.str.len().sum(), axis=1).sum()
127
+ original_row_count = len(master_df)
128
+
129
+ st = time.time()
130
+ reordered, _ = program.Evolved().reorder(
131
+ master_df,
132
+ early_stop=100000,
133
+ distinct_value_threshold=0.7,
134
+ row_stop=4,
135
+ col_stop=2,
136
+ col_merge=col_merge,
137
+ )
138
+ runtime = time.time() - st
139
+
140
+ # Validate row count
141
+ reordered_row_count = len(reordered)
142
+ if reordered_row_count != original_row_count:
143
+ diff = reordered_row_count - original_row_count
144
+ if diff < 0:
145
+ error_msg = f"Evaluation failed: row count decreases by {abs(diff)} rows. Data were lost - you might have dropped some rows or failed to preserve all data during reordering."
146
+ else:
147
+ error_msg = f"Evaluation failed: row count increases by {diff} rows. Data were duplicated - you might have duplicated some rows during reordering."
148
+ return {
149
+ "combined_score": 0.0,
150
+ "runs_successfully": 0.0,
151
+ "error": error_msg,
152
+ }
153
+
154
+ # Calculate character count of reordered dataframe
155
+ total_chars_after = reordered.astype(str).apply(lambda x: x.str.len().sum(), axis=1).sum()
156
+
157
+ # Calculate column counts for additional context
158
+ original_col_count = len(master_df.columns)
159
+ reordered_col_count = len(reordered.columns)
160
+
161
+ # Validate character count (reordered cannot be less than original)
162
+ if total_chars_after < total_chars_before:
163
+ char_diff = total_chars_before - total_chars_after
164
+ char_diff_pct = (char_diff / total_chars_before * 100) if total_chars_before > 0 else 0
165
+
166
+ message = f"Evaluation failed: character decreases by {char_diff_pct:.2f}%. Data were lost - you might have dropped some data or failed to preserve all data during reordering."
167
+
168
+ return {
169
+ "combined_score": 0.0,
170
+ "runs_successfully": 0.0,
171
+ "error": message,
172
+ }
173
+
174
+ results = evaluate_df_prefix_hit_cnt(reordered)
175
+ print(f"Results: {results}, Runtime: {runtime}")
176
+
177
+ hit_rate = results[1] / 100
178
+
179
+ hit_rates.append(hit_rate)
180
+ total_runtime += runtime
181
+ successful_files += 1
182
+
183
+ except Exception as e:
184
+ print(f"Failed to process {os.path.basename(filename)}: {str(e)}")
185
+ print(traceback.format_exc())
186
+ failed_files += 1
187
+ break
188
+
189
+ if successful_files == 0:
190
+ return {
191
+ "combined_score": 0.0,
192
+ "runs_successfully": 0.0,
193
+ "error": "No files processed successfully",
194
+ }
195
+
196
+ if failed_files > 0:
197
+ return {
198
+ "combined_score": 0.0,
199
+ "runs_successfully": 0.0,
200
+ "error": "1 or more files failed to run",
201
+ }
202
+
203
+ average_hit_rate = sum(hit_rates) / successful_files
204
+ average_runtime = total_runtime / successful_files
205
+
206
+ score = 0.95 * average_hit_rate + 0.05 * (12 - min(12, average_runtime)) / 12
207
+
208
+ return {
209
+ "combined_score": score,
210
+ "runs_successfully": 1.0,
211
+ "hit_rates": hit_rates,
212
+ "total_runtime": total_runtime,
213
+ }
214
+
215
+ except Exception as e:
216
+ print(f"Evaluation failed: {str(e)}")
217
+ print(traceback.format_exc())
218
+ return {"combined_score": 0.0, "runs_successfully": 0.0, "error": str(e)}
219
+
220
+
221
+ if __name__ == "__main__":
222
+ # Backwards-compat: bridges old evaluate() -> dict to the container JSON
223
+ # protocol. wrapper.py is auto-injected at build time from
224
+ # skydiscover/evaluation/wrapper.py.
225
+ from wrapper import run as run_wrapper
226
+
227
+ run_wrapper(evaluate)
benchmarks/ADRS/llm_sql/evaluator/requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pandas
2
+ networkx>=3.2,<3.4
benchmarks/ADRS/llm_sql/evaluator/solver.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from typing import List, Tuple
3
+ from concurrent.futures import ThreadPoolExecutor
4
+ from utils import Trie
5
+ import time
6
+
7
+
8
+ class Algorithm:
9
+ def __init__(self, df: pd.DataFrame = None):
10
+ self.df = df
11
+
12
+ def reorder(self, df: pd.DataFrame) -> pd.DataFrame:
13
+ raise NotImplementedError("Subclasses should implement this!")
14
+
15
+ @staticmethod
16
+ def evaluate_df_prefix_hit_cnt(self, df: pd.DataFrame) -> int:
17
+ """
18
+ Function to evaluate the prefix hit count of a DataFrame
19
+ """
20
+
21
+ def max_overlap(trie, row_string):
22
+ return trie.longest_common_prefix(row_string)
23
+
24
+ trie = Trie()
25
+ total_prefix_hit_count = 0
26
+
27
+ def process_row(index, row):
28
+ row_string = "".join(row.astype(str).values) # No spaces between columns
29
+ row_prefix_hit_count = max_overlap(trie, row_string)
30
+ trie.insert(row_string)
31
+ return row_prefix_hit_count
32
+
33
+ with ThreadPoolExecutor() as executor:
34
+ results = executor.map(process_row, df.index, [row for _, row in df.iterrows()])
35
+
36
+ total_prefix_hit_count = sum(results)
37
+ return total_prefix_hit_count
38
+
39
+ @staticmethod
40
+ def evaluate_cell_hit_cnt(df: pd.DataFrame) -> int:
41
+ """
42
+ Function to evaluate the prefix hit count of a DataFrame based on exact cell matching.
43
+ For a cell to be a hit, all previous cells in the row must also be hits.
44
+ """
45
+
46
+ total_prefix_hit_count = 0
47
+ seen_rows = set() # Cache of fully processed rows
48
+
49
+ def process_row(index, row):
50
+ nonlocal seen_rows
51
+ prefix_hit_count = 0
52
+ current_row_cache = []
53
+
54
+ for col_value in row:
55
+ # Check if adding this cell matches exactly with prior cache
56
+ current_row_cache.append(col_value)
57
+ if tuple(current_row_cache) in seen_rows:
58
+ prefix_hit_count += 1
59
+ else:
60
+ break # Stop counting hits if any cell isn't in the cache
61
+
62
+ seen_rows.add(tuple(row)) # Add the fully processed row to cache
63
+ return prefix_hit_count
64
+
65
+ # Process each row sequentially (row-to-row comparison for hits)
66
+ for _, row in df.iterrows():
67
+ total_prefix_hit_count += process_row(_, row)
68
+
69
+ return total_prefix_hit_count
70
+
71
+ @staticmethod
72
+ def get_groups_values(df: pd.DataFrame):
73
+ """
74
+ Function to get the value counts of a DataFrame
75
+ """
76
+ if df.empty:
77
+ return {}
78
+ value_counts = df.stack().value_counts()
79
+ if value_counts.empty:
80
+ return {}
81
+ return value_counts
82
+
83
+ @staticmethod
84
+ def calculate_length(value):
85
+ val = 0
86
+ if isinstance(value, bool):
87
+ val = 4 # length of 'True' or 'False'
88
+ elif isinstance(value, (int, float)):
89
+ val = len(str(value))
90
+ elif isinstance(value, str):
91
+ val = len(value)
92
+ else:
93
+ val = 0
94
+ return val**2
95
+
96
+ @staticmethod
97
+ def drop_col(df: pd.DataFrame, col):
98
+ return df.drop(columns=[col])
99
+
100
+ @staticmethod
101
+ def drop_rows(df: pd.DataFrame, rows):
102
+ return df.drop(index=rows)
103
+
104
+ @staticmethod
105
+ def merging_columns(df: pd.DataFrame, col_names: List[str], delimiter: str = "_", prepended: bool = False) -> pd.DataFrame:
106
+ if not all(col in df.columns for col in col_names):
107
+ raise ValueError("Column names not found in DataFrame")
108
+
109
+ # before merging, check that each column to be merged has the same number of unique values
110
+ if len(set(df[col_names].nunique())) != 1:
111
+ raise ValueError(f"Columns to be merged {col_names}, do not have the same number of unique values: {df.nunique().sort_values()}")
112
+
113
+ merged_names = delimiter.join(col_names)
114
+ if prepended:
115
+ df[merged_names] = df[col_names].apply(
116
+ lambda x: merged_names + ": " + delimiter.join([val.split(": ", 1)[1] for col, val in zip(col_names, x)]), axis=1
117
+ )
118
+ else:
119
+ df[merged_names] = df[col_names].apply(lambda x: "".join([f"{val}" for val in x]), axis=1)
120
+ df = df.drop(columns=col_names)
121
+ return df
122
+
123
+ @staticmethod
124
+ def calculate_col_stats(df: pd.DataFrame, enable_index=False):
125
+ num_rows = len(df)
126
+ column_stats = []
127
+ for col in df.columns:
128
+ if col == "original_index":
129
+ continue
130
+
131
+ num_groups = df[col].nunique()
132
+ if df[col].dtype == "object" or df[col].dtype == "string":
133
+ avg_length = df[col].astype(str).str.len().mean()
134
+ elif df[col].dtype == "bool":
135
+ avg_length = 4 # Assuming 'True' or 'False' as average length
136
+ elif df[col].dtype in ["int64", "float64"]:
137
+ avg_length = df[col].astype(str).str.len().mean()
138
+ else:
139
+ avg_length = 0
140
+
141
+ avg_length = avg_length**2
142
+
143
+ if num_groups == 0:
144
+ score = 0
145
+ else:
146
+ # Average size per group: number of rows in each group
147
+ avg_size_per_group = num_rows / num_groups
148
+ # score = avg_size_per_group * avg_length
149
+ score = avg_length * (avg_size_per_group - 1)
150
+
151
+ if num_rows == num_groups: # no sharing at all
152
+ score = 0
153
+ column_stats.append((col, num_groups, avg_length, score))
154
+
155
+ # original_index all distinct values, so give lowest score
156
+ if enable_index and "original_index" in df.columns:
157
+ column_stats.append(("original_index", len(df), 0, 0))
158
+
159
+ # Sort the columns based on the score
160
+ column_stats.sort(key=lambda x: x[3], reverse=True)
161
+ return num_rows, column_stats
benchmarks/ADRS/llm_sql/evaluator/utils.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from concurrent.futures import ThreadPoolExecutor
2
+ import pandas as pd
3
+ from typing import List, Tuple
4
+
5
+ class TrieNode:
6
+ def __init__(self):
7
+ self.children = {}
8
+ self.end_of_word = False
9
+
10
+
11
+ class Trie:
12
+ def __init__(self):
13
+ self.root = TrieNode()
14
+
15
+ def insert(self, word):
16
+ node = self.root
17
+ for char in word:
18
+ if char not in node.children:
19
+ node.children[char] = TrieNode()
20
+ node = node.children[char]
21
+ node.end_of_word = True
22
+
23
+ def longest_common_prefix(self, word):
24
+ node = self.root
25
+ common_prefix_length = 0
26
+ for char in word:
27
+ if char in node.children:
28
+ common_prefix_length += len(char)
29
+ node = node.children[char]
30
+ else:
31
+ break
32
+ return common_prefix_length
33
+
34
+ def calculate_length(value):
35
+ val = 0
36
+ if isinstance(value, bool):
37
+ val = 4 # length of 'True' or 'False'
38
+ elif isinstance(value, (int, float)):
39
+ val = len(str(value))
40
+ elif isinstance(value, str):
41
+ val = len(value)
42
+ else:
43
+ val = 0
44
+ return val**2
45
+
46
+ def evaluate_df_prefix_hit_cnt(df: pd.DataFrame) -> Tuple[int, int]:
47
+ """
48
+ Function to evaluate the prefix hit count of a DataFrame
49
+ """
50
+
51
+ def max_overlap(trie, row_string):
52
+ return min(len(row_string), trie.longest_common_prefix(row_string))
53
+
54
+
55
+ trie = Trie()
56
+ total_prefix_hit_count = 0
57
+ total_string_length = 0
58
+
59
+ def process_row(index, row):
60
+ nonlocal total_string_length
61
+ row_string = "".join(row.fillna("").astype(str).values) # No spaces between columns
62
+ total_string_length += len(row_string)
63
+ row_prefix_hit_count = max_overlap(trie, row_string)
64
+ trie.insert(row_string)
65
+ return row_prefix_hit_count
66
+
67
+ with ThreadPoolExecutor() as executor:
68
+ results = executor.map(process_row, df.index, [row for _, row in df.iterrows()])
69
+
70
+ total_prefix_hit_count = sum(results)
71
+ total_prefix_hit_rate = total_prefix_hit_count / total_string_length
72
+ assert total_prefix_hit_count <= total_string_length
73
+ print(f"Total string length: {total_string_length}")
74
+ no_cache_pricing = 2.5 / 5 # per 1M if not cached
75
+ cache_pricing = 1.25 / 5 # per 1M if cached
76
+ cached_tokens_pricing = total_prefix_hit_count * cache_pricing / 1e6
77
+ non_cached_tokens_pricing = (total_string_length - total_prefix_hit_count) * no_cache_pricing / 1e6
78
+ print(
79
+ f"Cached tokens pricing = {round(cached_tokens_pricing,2)}, Non-cached tokens pricing = {round(non_cached_tokens_pricing,2)}, total pricing = {round(cached_tokens_pricing + non_cached_tokens_pricing,2)}"
80
+ )
81
+ return total_prefix_hit_count, total_prefix_hit_rate * 100
benchmarks/ADRS/prism/evaluator/Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+ WORKDIR /benchmark
3
+
4
+ COPY requirements.txt .
5
+ RUN pip install --no-cache-dir -r requirements.txt
6
+
7
+ # wrapper.py provides backwards compatibility for old Python-based evaluators
8
+ # that define evaluate(program_path) -> dict, bridging them to the container
9
+ # JSON protocol. Source of truth: skydiscover/evaluation/wrapper.py
10
+ COPY . .
11
+ RUN chmod +x evaluate.sh
12
+
13
+ ENTRYPOINT ["./evaluate.sh"]
benchmarks/ADRS/prism/evaluator/wrapper.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Backwards-compat wrapper for old Python-based evaluators.
2
+
3
+ Old-style evaluators define ``evaluate(program_path) -> dict``. This module
4
+ bridges that interface to the container JSON protocol expected by
5
+ ContainerizedEvaluator.
6
+
7
+ Usage — add this to the bottom of your evaluator.py::
8
+
9
+ if __name__ == "__main__":
10
+ from wrapper import run
11
+ run(evaluate)
12
+ """
13
+
14
+ import json
15
+ import sys
16
+ import traceback
17
+
18
+
19
+ def run(evaluate_fn):
20
+ """Call *evaluate_fn*, format the result as container-protocol JSON on stdout.
21
+
22
+ * Reads ``sys.argv[1]`` as the program path.
23
+ * Redirects stdout → stderr while *evaluate_fn* runs so that debug prints
24
+ don't contaminate the JSON output.
25
+ * Separates numeric metrics from non-numeric artifacts.
26
+ * Guarantees ``combined_score`` is always present in metrics.
27
+ """
28
+ if len(sys.argv) < 2:
29
+ print("Usage: evaluator.py <program_path>", file=sys.stderr)
30
+ sys.exit(1)
31
+
32
+ program_path = sys.argv[1]
33
+
34
+ # Redirect stdout → stderr during evaluation so debug prints from
35
+ # the evaluator don't contaminate the JSON output on stdout.
36
+ real_stdout = sys.stdout
37
+ sys.stdout = sys.stderr
38
+ try:
39
+ result = evaluate_fn(program_path)
40
+ except Exception as e:
41
+ sys.stdout = real_stdout
42
+ print(
43
+ json.dumps(
44
+ {
45
+ "status": "error",
46
+ "combined_score": 0.0,
47
+ "metrics": {"combined_score": 0.0},
48
+ "artifacts": {
49
+ "error": str(e),
50
+ "traceback": traceback.format_exc(),
51
+ },
52
+ }
53
+ )
54
+ )
55
+ return
56
+ sys.stdout = real_stdout
57
+
58
+ if not isinstance(result, dict):
59
+ print(
60
+ json.dumps(
61
+ {
62
+ "status": "error",
63
+ "combined_score": 0.0,
64
+ "metrics": {"combined_score": 0.0},
65
+ "artifacts": {
66
+ "error": f"evaluate() returned {type(result).__name__}, expected dict"
67
+ },
68
+ }
69
+ )
70
+ )
71
+ return
72
+
73
+ # Separate numeric metrics from non-numeric artifacts.
74
+ metrics = {}
75
+ artifacts = {}
76
+ for k, v in result.items():
77
+ if isinstance(v, bool):
78
+ metrics[k] = float(v)
79
+ elif isinstance(v, (int, float)):
80
+ metrics[k] = float(v)
81
+ elif isinstance(v, str):
82
+ artifacts[k] = v
83
+ elif isinstance(v, (list, dict)):
84
+ artifacts[k] = json.dumps(v)
85
+
86
+ if "combined_score" not in metrics:
87
+ metrics["combined_score"] = 0.0
88
+
89
+ status = "error" if "error" in artifacts else "success"
90
+ output = {
91
+ "status": status,
92
+ "combined_score": metrics["combined_score"],
93
+ "metrics": metrics,
94
+ }
95
+ if artifacts:
96
+ output["artifacts"] = artifacts
97
+
98
+ print(json.dumps(output))
benchmarks/ADRS/txn_scheduling/evaluator/Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+ WORKDIR /benchmark
3
+
4
+ COPY requirements.txt .
5
+ RUN pip install --no-cache-dir -r requirements.txt
6
+
7
+ # wrapper.py provides backwards compatibility for old Python-based evaluators
8
+ # that define evaluate(program_path) -> dict, bridging them to the container
9
+ # JSON protocol. Source of truth: skydiscover/evaluation/wrapper.py
10
+ COPY . .
11
+ RUN chmod +x evaluate.sh
12
+
13
+ ENTRYPOINT ["./evaluate.sh"]
benchmarks/ADRS/txn_scheduling/evaluator/evaluate.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ PROGRAM="$1"
5
+ # MODE ($2) accepted but ignored — override this file to use train/test splits.
6
+
7
+ python /benchmark/evaluator.py "$PROGRAM"
benchmarks/ADRS/txn_scheduling/evaluator/evaluator.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib.util
2
+ import os
3
+ import pickle
4
+ import signal
5
+ import subprocess
6
+ import sys
7
+ import tempfile
8
+ import time
9
+ import traceback
10
+
11
+ import numpy as np
12
+
13
+
14
+ class TimeoutError(Exception):
15
+ pass
16
+
17
+
18
+ def timeout_handler(signum, frame):
19
+ """Handle timeout signal"""
20
+ raise TimeoutError("Function execution timed out")
21
+
22
+
23
+ def validate_schedule(txn_seq):
24
+ for i in range(len(txn_seq)):
25
+ if not i in txn_seq:
26
+ return False
27
+
28
+ return True
29
+
30
+
31
+ def run_with_timeout(program_path, timeout_seconds=20):
32
+ """
33
+ Run the program in a separate process with timeout
34
+ using a simple subprocess approach
35
+
36
+ Args:
37
+ program_path: Path to the program file
38
+ timeout_seconds: Maximum execution time in seconds
39
+
40
+ Returns:
41
+ makespan, schedule tuple from the program
42
+ """
43
+ # Create a temporary file to execute
44
+ # Ensure the scheduling module directory is on sys.path for imports like `import workloads`
45
+ sched_dir = os.path.dirname(os.path.abspath(__file__))
46
+ with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
47
+ # Write a script that executes the program and saves results
48
+ script = f"""
49
+ import sys
50
+ import numpy as np
51
+ import os
52
+ import pickle
53
+ import traceback
54
+
55
+ # Add the directory to sys.path
56
+ sys.path.insert(0, os.path.dirname('{program_path}'))
57
+ # Also add the scheduling directory for importing sibling modules like `workloads`
58
+ sys.path.insert(0, r'{sched_dir}')
59
+
60
+ # Debugging info
61
+ print(f"Running in subprocess, Python version: {{sys.version}}")
62
+ print(f"Program path: {program_path}")
63
+
64
+ try:
65
+ # Import the program
66
+ spec = __import__('importlib.util').util.spec_from_file_location("program", '{program_path}')
67
+ program = __import__('importlib.util').util.module_from_spec(spec)
68
+ spec.loader.exec_module(program)
69
+
70
+ # Run the packing function
71
+ print("Calling scheduling()...")
72
+ makespan, schedule = program.get_random_costs()
73
+ print(f"scheduling() returned successfully: makespan = {{makespan}}")
74
+
75
+ # Save results to a file
76
+ results = {{
77
+ 'makespan': makespan,
78
+ 'schedule': schedule,
79
+ }}
80
+
81
+ with open('{temp_file.name}.results', 'wb') as f:
82
+ pickle.dump(results, f)
83
+ print(f"Results saved to {temp_file.name}.results")
84
+
85
+ except Exception as e:
86
+ # If an error occurs, save the error instead
87
+ print(f"Error in subprocess: {{str(e)}}")
88
+ traceback.print_exc()
89
+ with open('{temp_file.name}.results', 'wb') as f:
90
+ pickle.dump({{'error': str(e)}}, f)
91
+ print(f"Error saved to {temp_file.name}.results")
92
+ """
93
+ temp_file.write(script.encode())
94
+ temp_file_path = temp_file.name
95
+
96
+ results_path = f"{temp_file_path}.results"
97
+
98
+ try:
99
+ # Run the script with timeout
100
+ process = subprocess.Popen(
101
+ [sys.executable, temp_file_path],
102
+ stdout=subprocess.PIPE,
103
+ stderr=subprocess.PIPE,
104
+ )
105
+
106
+ try:
107
+ stdout, stderr = process.communicate(timeout=timeout_seconds)
108
+ exit_code = process.returncode
109
+
110
+ # Always print output for debugging purposes
111
+ print(f"Subprocess stdout: {stdout.decode()}")
112
+ if stderr:
113
+ print(f"Subprocess stderr: {stderr.decode()}")
114
+
115
+ # Still raise an error for non-zero exit codes, but only after printing the output
116
+ if exit_code != 0:
117
+ raise RuntimeError(f"Process exited with code {exit_code}")
118
+
119
+ # Load the results
120
+ if os.path.exists(results_path):
121
+ with open(results_path, "rb") as f:
122
+ results = pickle.load(f)
123
+
124
+ # Check if an error was returned
125
+ if "error" in results:
126
+ raise RuntimeError(f"Program execution failed: {results['error']}")
127
+
128
+ return results["makespan"], results["schedule"]
129
+ else:
130
+ raise RuntimeError("Results file not found")
131
+
132
+ except subprocess.TimeoutExpired:
133
+ # Kill the process if it times out
134
+ process.kill()
135
+ process.wait()
136
+ raise TimeoutError(f"Process timed out after {timeout_seconds} seconds")
137
+
138
+ finally:
139
+ # Clean up temporary files
140
+ if os.path.exists(temp_file_path):
141
+ os.unlink(temp_file_path)
142
+ if os.path.exists(results_path):
143
+ os.unlink(results_path)
144
+
145
+
146
+ def evaluate(program_path):
147
+ """
148
+ Evaluate the program by running it once and checking the schedule
149
+
150
+ Args:
151
+ program_path: Path to the program file
152
+
153
+ Returns:
154
+ Dictionary of metrics
155
+ """
156
+
157
+ try:
158
+ # For constructor-based approaches, a single evaluation is sufficient
159
+ # since the result is deterministic
160
+ start_time = time.time()
161
+
162
+ # Use subprocess to run with timeout
163
+ makespan, schedule = run_with_timeout(
164
+ program_path, timeout_seconds=600 # Single timeout
165
+ )
166
+
167
+ end_time = time.time()
168
+ eval_time = end_time - start_time
169
+
170
+ # Validate solution
171
+ valid = True
172
+ for s in schedule:
173
+ valid &= validate_schedule(s)
174
+ if not valid:
175
+ break
176
+
177
+ # Validity score
178
+ validity = 1.0 if valid else 0.0
179
+
180
+ # Combined score - higher is better, positive values that scale with makespan
181
+ # Use reciprocal scaling: higher makespan = lower score, but always positive
182
+ combined_score = 1000 / (1 + makespan) * 1000
183
+
184
+ print(f"Evaluation: valid={valid}, makespan={makespan}, time={eval_time:.2f}s")
185
+
186
+ return {
187
+ "makespan": float(makespan),
188
+ "schedule": float(len(schedule)),
189
+ "validity": float(validity),
190
+ "combined_score": float(combined_score),
191
+ }
192
+
193
+ except Exception as e:
194
+ print(f"Evaluation failed completely: {str(e)}")
195
+ traceback.print_exc()
196
+ return {
197
+ "makespan": 0.0,
198
+ "schedule": 0.0,
199
+ "validity": 0.0,
200
+ "combined_score": 0.0,
201
+ }
202
+
203
+ # Stage-based evaluation for cascade evaluation
204
+ def evaluate_stage1(program_path):
205
+ """
206
+ First stage evaluation - quick validation check
207
+ """
208
+ try:
209
+ # Use the simplified subprocess approach
210
+ try:
211
+ makespan, schedule = run_with_timeout(program_path, timeout_seconds=600)
212
+
213
+ valid = True
214
+ for s in schedule:
215
+ valid &= validate_schedule(s)
216
+ if not valid:
217
+ break
218
+
219
+ # Simple combined score for stage 1 - positive values that scale with makespan
220
+ combined_score = 1000 / (1 + makespan) * 1000 if valid else 0.0
221
+
222
+ # Return evaluation metrics
223
+ return {
224
+ "validity": 1.0 if valid else 0.0,
225
+ "makespan": float(makespan),
226
+ "schedule": float(len(schedule)),
227
+ "combined_score": float(combined_score),
228
+ }
229
+
230
+ except TimeoutError as e:
231
+ print(f"Stage 1 evaluation timed out: {e}")
232
+ return {"validity": 0.0, "combined_score": 0.0, "error": "Timeout"}
233
+ except Exception as e:
234
+ print(f"Stage 1 evaluation failed: {e}")
235
+ print(traceback.format_exc())
236
+ return {"validity": 0.0, "combined_score": 0.0, "error": str(e)}
237
+
238
+ except Exception as e:
239
+ print(f"Stage 1 evaluation failed completely: {e}")
240
+ print(traceback.format_exc())
241
+ return {"validity": 0.0, "combined_score": 0.0, "error": str(e)}
242
+
243
+
244
+ def evaluate_stage2(program_path):
245
+ """
246
+ Second stage evaluation - full evaluation
247
+ """
248
+ # Full evaluation as in the main evaluate function
249
+ return evaluate(program_path)
250
+
251
+
252
+ if __name__ == "__main__":
253
+ # Backwards-compat: bridges old evaluate() -> dict to the container JSON
254
+ # protocol. wrapper.py is auto-injected at build time from
255
+ # skydiscover/evaluation/wrapper.py.
256
+ from wrapper import run
257
+
258
+ run(evaluate)
benchmarks/ADRS/txn_scheduling/evaluator/requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ numpy
benchmarks/ADRS/txn_scheduling/evaluator/txn_simulator.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections
2
+ import json
3
+ import numpy as np
4
+
5
+
6
+ class Workload:
7
+ """
8
+ Constructor for taking in transactions and representing them as (read/write, key, position, txn_len)
9
+ """
10
+ def __init__(self, workload_json, debug=False, verify=False):
11
+ self.workload = list(json.loads(workload_json).values())
12
+ self.num_txns = len(self.workload)
13
+ self.debug = debug
14
+ self.verify = verify
15
+ self.txns = [] # list of txns (list of ops)
16
+ self.only_hot_keys = False # True#
17
+ self.hot_keys_thres = 100
18
+ self.hot_keys = set()
19
+ self.hot_keys_map = {}
20
+ self.sorted_len = None
21
+ self.median_len = 0
22
+ self.conflict_blocks = []
23
+ self.conflict_blocks_map = {}
24
+ self.m = 0
25
+
26
+ self.get_txns()
27
+
28
+ # get transactions from json and represent hot keys as (r/w, key, position, txn_len)
29
+
30
+ def get_txns(self):
31
+ """
32
+ Loads transactions from json and represents them as (read/write, key, position, txn_len)
33
+ """
34
+ key_freqs = {}
35
+ len_map = {}
36
+ lens = []
37
+ for txn in self.workload: # .values()
38
+ txn_ops = []
39
+ ops = txn.split(" ")
40
+ txn_len = len(ops)
41
+ skip_txn = False
42
+ count = 0
43
+ tmp1 = None
44
+ tmp2 = None
45
+ tmp3 = None
46
+ # ops_map = {}
47
+ # all_ops = []
48
+ for i in range(len(ops)):
49
+ op = ops[i]
50
+ if op != "*":
51
+ vals = op.split("-")
52
+ if len(vals) != 2:
53
+ print(op, vals)
54
+ assert len(vals) == 2
55
+ # if i == 0 and int(vals[1]) > 500:
56
+ # skip_txn = True
57
+ # break
58
+ if self.only_hot_keys and int(vals[1]) > self.hot_keys_thres:
59
+ tmp1 = vals[0]
60
+ tmp2 = vals[1]
61
+ tmp3 = i + 1
62
+ continue
63
+ else:
64
+ count += 1
65
+ # sorted reads
66
+ # if vals[0] == 'r' and vals[1] not in ops_map:
67
+ # ops_map[vals[1]] = (vals[0], vals[1], i+1, len(ops))
68
+ # else:
69
+ # all_ops.append((vals[0], vals[1], i+1, len(ops)))
70
+ txn_ops.append((vals[0], vals[1], i + 1, len(ops)))
71
+ if vals[1] not in key_freqs:
72
+ key_freqs[vals[1]] = 1
73
+ else:
74
+ key_freqs[vals[1]] += 1
75
+ if len(ops) not in len_map:
76
+ len_map[len(ops)] = 1
77
+ else:
78
+ len_map[len(ops)] += 1
79
+ lens.append(len(ops))
80
+ # sorted_keys = collections.OrderedDict(sorted(ops_map.items()))
81
+ # txn_ops = list(sorted_keys.values()) + all_ops
82
+ # assert len(txn_ops) == len(ops)
83
+ # print(sorted_keys, txn_ops)
84
+ if count == 0 and self.only_hot_keys:
85
+ txn_ops.append((tmp1, tmp2, tmp3, len(ops)))
86
+ # if skip_txn:
87
+ # continue
88
+ self.txns.append(txn_ops)
89
+ if self.debug:
90
+ print(self.txns)
91
+ self.num_txns = len(self.txns)
92
+
93
+ # make sure key_map is roughly in order per key
94
+ def insert_key_map(self, key, key_map, op_type, key_start, key_end, txn_id):
95
+ index = len(key_map[key]) - 1
96
+ for op in key_map[key]:
97
+ (_, s, e, _) = key_map[key][index]
98
+ if e <= key_end:
99
+ if s <= key_start: # e <= key_start or
100
+ index += 1
101
+ break
102
+ elif s <= key_start:
103
+ index += 1
104
+ break
105
+ index -= 1
106
+ if index == -1:
107
+ (_, s, e, _) = key_map[key][0]
108
+ if key_end < e and key_start < s:
109
+ key_map[key].insert(0, (op_type, key_start, key_end, txn_id))
110
+ else:
111
+ key_map[key].append((op_type, key_start, key_end, txn_id))
112
+ else:
113
+ key_map[key].insert(index, (op_type, key_start, key_end, txn_id))
114
+ if self.debug:
115
+ print("insert: ", index, key, key_start, key_end, key_map[key])
116
+
117
+ # get the index of the first in a consecutive seq. of reads
118
+ def find_earliest_read(self, key, key_map, txn_id):
119
+ # must use latest read if part of same txn
120
+ if key_map[key][-1][3] == txn_id: # as we're adding to key_map
121
+ print("TXN_ID")
122
+ return key_map[key][-1][1]
123
+ else:
124
+ if self.debug:
125
+ print(key, key_map[key], txn_id)
126
+ index = len(key_map[key]) - 1
127
+ while key_map[key][index][0] == "r":
128
+ if index == -1:
129
+ break
130
+ index -= 1
131
+ if self.debug:
132
+ print("index: ", index)
133
+ if index == -1: # can be first read
134
+ index = 0
135
+ else:
136
+ index = key_map[key][index][2] + 1 # after first write found
137
+ return index
138
+
139
+ def get_opt_seq_cost(self, txn_seq):
140
+ """
141
+ Gets the makespan of a given sequence of transactions
142
+
143
+ Returns
144
+ Value representing the makespan (time to execute given schedule)
145
+ """
146
+ if self.debug:
147
+ print("seq: ", txn_seq)
148
+ key_map = {} # <key, [(r/w, lock_start, lock_end, txn_id)]>
149
+ prev_txn = txn_seq[0]
150
+ total_cost = 0
151
+ txn_id = 0
152
+ cost_map = {}
153
+ for i in range(len(txn_seq)):
154
+ time = i
155
+ txn = self.txns[txn_seq[i]]
156
+ txn_start = 1
157
+ txn_total_len = 0
158
+ max_release = 0
159
+ cost = 0
160
+ for j in range(len(txn)):
161
+ (op_type, key, pos, txn_len) = txn[j]
162
+ if key in key_map:
163
+ key_start = 0
164
+ if key_map[key][-1][0] == "w" or op_type == "w":
165
+ key_start = (
166
+ key_map[key][-1][2] + 1
167
+ ) # get end time of latest lock end
168
+ else:
169
+ key_start = self.find_earliest_read(key, key_map, txn_id)
170
+ # key_start = key_map[key][-1][1] #pos # read locks shared
171
+ txn_start = max(
172
+ txn_start, key_start - pos + 1
173
+ ) # place txn start behind conflicting locks
174
+ if self.debug:
175
+ print(key, key_start, pos, txn_start)
176
+ max_release = max(
177
+ max_release, key_start - 1
178
+ ) # latest release of all locks
179
+ txn_total_len = txn_len
180
+ txn_end = txn_start + txn_total_len - 1
181
+ cost = txn_end - total_cost # max_release
182
+ # if max_release == 0:
183
+ if (
184
+ txn_end <= total_cost
185
+ ): # in some cases, later txn in seq can finish first
186
+ cost = 0
187
+ # else:
188
+ # cost = txn_end - total_cost
189
+ if cost in cost_map:
190
+ cost_map[cost] += 1
191
+ else:
192
+ cost_map[cost] = 1
193
+ total_cost += cost
194
+ if self.debug:
195
+ print(txn, txn_start, txn_end, max_release, cost, total_cost)
196
+
197
+ curr_txn = txn_seq[i]
198
+ prev_txn = curr_txn
199
+ if self.debug:
200
+ print(txn_start, txn_end, max_release, cost)
201
+
202
+ for j in range(len(txn)):
203
+ (op_type, key, pos, txn_len) = txn[j]
204
+ key_start = txn_start + pos - 1
205
+ if key in key_map:
206
+ if key_map[key][-1][0] == "w" or op_type == "w":
207
+ self.insert_key_map(
208
+ key, key_map, op_type, key_start, key_start, txn_id
209
+ )
210
+ # key_map[key].append((op_type, key_start, key_start, txn_id))
211
+ else:
212
+ self.insert_key_map(
213
+ key, key_map, op_type, key_start, key_start, txn_id
214
+ )
215
+ # key_map[key].append((op_type, key_start, key_start, txn_id))
216
+ else:
217
+ key_map[key] = [(op_type, key_start, key_start, txn_id)]
218
+ if self.debug:
219
+ print(key_map)
220
+ txn_id += 1
221
+ if self.debug:
222
+ print(total_cost)
223
+
224
+ # print(key_map)
225
+ od = collections.OrderedDict(sorted(cost_map.items()))
226
+ # print(od.keys())
227
+ # print(od.values())
228
+ return total_cost
229
+
benchmarks/ADRS/txn_scheduling/evaluator/workloads.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Workload data for scheduling experiments.
3
+
4
+ This module contains predefined workload configurations used for testing
5
+ transaction scheduling algorithms.
6
+ """
7
+
8
+ WORKLOAD_1 = '{"txn0":"w-17 r-5 w-3 r-4 r-54 r-14 w-6 r-11 w-22 r-7 w-1 w-8 w-9 w-27 r-2 r-25", "txn1":"r-17 r-280 r-38 r-3 r-4 r-5 w-10 w-195 r-6 w-18 w-7 r-1 r-8 r-9 r-2 w-21", "txn2":"r-5 r-3 w-4 w-14 r-10 w-38 w-6 r-11 r-7 r-1 w-30 r-8 r-9 r-12 w-2 r-15", "txn3":"w-17 w-4 r-3 r-5 r-14 w-6 w-80 r-11 r-16 r-1 w-19 r-9 w-12 w-2 w-73 w-15", "txn4":"w-45 w-4 r-3 w-5 w-6 w-201 w-781 w-20 w-253 r-1 r-65 r-30 w-8 w-23 r-12 w-2", "txn5":"w-45 r-5 w-3 r-4 w-14 r-199 r-6 w-11 r-16 w-7 w-1 w-8 r-12 r-2 r-21 w-15", "txn6":"r-5 w-3 w-10 w-2 w-6 w-11 w-16 r-53 w-22 w-7 w-1 r-8 r-12 r-4 w-13 r-32", "txn7":"w-17 r-21 w-4 r-3 w-5 w-10 w-101 w-6 w-16 r-50 r-1 w-8 r-13 w-2 w-28 r-15", "txn8":"w-4 w-3 r-5 w-14 w-10 w-19 w-6 r-11 r-18 w-67 r-7 r-1 r-8 r-13 r-2 w-27", "txn9":"w-17 r-5 r-3 r-4 r-6 w-47 w-11 w-7 w-20 w-1 w-50 w-9 w-13 w-2 w-248 r-15", "txn10":"w-35 r-5 r-3 w-4 w-6 w-11 w-16 w-542 w-7 r-1 w-36 w-8 r-13 w-2 w-69 r-15", "txn11":"w-9 w-17 r-4 w-3 w-14 w-10 w-6 r-131 r-16 r-7 w-942 w-1 w-50 w-8 r-23 r-2", "txn12":"r-29 r-21 w-4 w-5 r-3 w-10 w-19 r-6 w-7 r-20 r-1 r-8 w-9 w-12 r-2 w-13", "txn13":"r-35 r-5 w-3 w-4 r-10 r-143 w-6 w-18 w-7 w-1 w-8 w-9 r-12 w-2 r-25 r-254", "txn14":"r-17 r-4 r-3 w-55 r-5 r-10 w-6 r-11 w-16 r-7 w-60 w-1 r-9 w-13 r-2 r-32", "txn15":"r-4 r-3 w-183 w-26 w-6 w-47 w-11 r-486 w-7 r-1 w-8 w-9 w-13 w-2 w-12 r-15", "txn16":"r-17 w-4 w-3 r-5 r-76 w-10 r-6 w-11 w-22 w-7 w-1 w-8 r-49 w-27 w-2 r-21", "txn17":"r-41 w-34 w-4 r-3 r-5 w-14 r-10 w-48 w-6 w-7 w-1 w-23 w-12 r-2 r-27 r-25", "txn18":"w-17 w-4 w-3 w-5 w-14 w-10 r-1173 w-6 w-23 w-28 w-7 w-1 w-19 r-9 w-2 r-206", "txn19":"w-4 w-3 r-5 r-352 r-6 r-11 r-23 w-22 w-7 w-1 r-8 r-9 w-41 r-2 w-12 r-171", "txn20":"r-21 r-4 r-3 r-26 w-10 r-6 r-11 r-22 w-7 w-60 r-1 r-9 r-2 r-102 r-32 r-15", "txn21":"r-17 w-4 w-3 w-5 r-14 r-12 w-11 r-7 r-1 r-74 r-36 w-19 w-9 w-13 w-2 w-21", "txn22":"w-63 r-4 w-3 r-5 w-38 r-10 r-6 w-11 w-16 w-7 r-1 r-8 w-13 r-2 w-21 w-15", "txn23":"w-5 w-4 w-3 w-10 r-94 w-11 w-18 r-39 r-7 r-61 r-1 r-9 r-13 r-2 w-21 r-15", "txn24":"w-4 w-3 r-5 r-10 w-6 w-11 r-16 w-53 r-7 r-50 w-1 r-61 r-8 w-12 w-2 w-13", "txn25":"r-21 w-4 w-3 w-14 w-6 w-16 r-33 w-7 w-1 r-30 w-19 w-9 w-12 r-2 w-28 r-8", "txn26":"r-4 w-3 w-5 w-10 w-6 w-11 r-16 w-1289 r-331 w-71 w-7 r-1 w-8 w-9 w-13 w-2", "txn27":"r-34 r-4 r-3 w-5 w-14 w-76 w-10 r-6 r-11 r-53 w-7 r-1 r-13 w-2 w-25 w-15", "txn28":"r-29 r-4 w-3 w-5 w-10 r-6 w-11 r-16 w-61 r-20 r-1 r-8 w-9 r-13 r-2 r-27", "txn29":"r-2041 w-5 r-3 r-4 r-5270 r-10 r-14 w-6 r-11 w-87 r-1 r-8 w-9 w-12 r-2 w-13", "txn30":"w-45 r-5 r-4 w-3 r-51 w-10 w-6 r-16 r-7 r-1 r-30 w-8 r-9 w-12 w-2 w-28", "txn31":"r-34 w-4 w-232 r-3 w-14 w-5 w-54 w-6 r-25 w-7 r-20 r-1 r-49 w-13 w-2 r-32", "txn32":"r-5 r-3 w-14 r-10 w-58 w-109 r-2 r-11 w-204 r-7 w-1 r-30 r-37 r-8 w-4 r-565", "txn33":"w-17 w-5 w-4 r-3 r-26 w-10 w-6 w-24 w-438 w-7 w-1 r-8 r-9 r-2 w-25 w-15", "txn34":"w-72 w-4 r-3 w-5 w-52 w-10 w-96 w-1072 r-6 r-14 w-31 r-1 w-8 r-23 r-12 r-2", "txn35":"r-34 w-4 w-3 r-5 w-14 w-10 r-6 w-11 r-7 r-1 w-42 r-8 w-128 r-2 r-69 w-15", "txn36":"r-17 w-4 w-3 w-76 r-5 r-10 r-32 r-14 r-164 r-11 w-7 r-1 w-23 w-12 w-2 r-25", "txn37":"w-45 r-17 w-4 r-3 w-5 r-89 r-14 w-6 r-11 w-16 w-7 w-1 w-43 w-8 w-9 r-2", "txn38":"r-126 r-17 r-4 r-3 r-14 r-195 w-6 w-82 r-7 w-1 r-1529 w-8 w-9 w-13 w-2 w-15", "txn39":"w-623 r-4 w-3 w-5 w-10 r-6 w-11 r-7 r-31 w-1 w-36 r-8 w-12 r-2 r-69 w-15", "txn40":"r-5 r-4 r-3 w-6 r-197 r-16 r-18 w-25 w-7 w-65 r-1 r-8 w-23 w-2 r-32 r-15", "txn41":"w-5 w-3 r-4 r-14 w-10 w-19 r-6 w-23 w-7 w-31 r-1 w-8 r-9 r-12 w-2 w-21", "txn42":"r-21 r-72 r-5 r-3 w-4 w-6 r-47 w-142 r-50 w-1 r-8 w-9 w-41 r-2 w-13 r-25", "txn43":"r-17 w-4 r-3 r-5 w-10 w-108 r-6 r-11 w-22 w-7 w-1 w-8 w-9 w-12 r-2 w-28", "txn44":"w-34 w-4 w-3 r-5 r-54 r-10 r-23 w-16 w-7 r-1 w-36 w-177 r-8 r-9 w-42 w-2", "txn45":"w-5 w-3 w-4 w-38 r-10 w-16 r-49 r-20 w-7 w-1 r-66 r-36 w-8 r-9 w-2 r-21", "txn46":"w-112 w-17 w-4 r-3 w-5 r-38 r-10 r-6 r-7 w-1 w-77 w-8 w-9 r-13 w-2 r-21", "txn47":"w-265 r-534 w-4 r-3 w-5 w-10 r-6 r-11 w-16 r-1 w-8 w-9 w-13 r-2 r-12 w-15", "txn48":"r-4 r-5 r-3 r-26 w-6 w-11 w-7 r-20 r-1 w-156 w-8 w-9 r-12 r-2 r-25 w-15", "txn49":"w-17 w-5 w-3 w-4 w-134 w-10 r-6 r-22 w-7 w-1 r-37 w-8 r-27 w-2 w-32 w-15", "txn50":"w-29 w-35 r-5 w-3 w-14 w-10 w-6 r-2 w-18 r-33 w-7 r-20 w-1 w-13 w-4 w-12", "txn51":"r-5 w-3 w-4 w-81 r-6 w-11 w-16 w-7 w-20 w-1 r-8 r-19 w-9 w-2 w-21 r-15", "txn52":"r-17 w-4 w-3 w-5 w-100 r-26 r-6 r-22 w-44 w-33 w-7 w-1 w-66 w-8 w-9 r-2", "txn53":"r-72 r-5 w-3 w-4 w-14 w-19 w-6 r-39 r-28 w-7 w-1 r-8 w-41 w-2 r-12 w-15", "txn54":"w-4 r-3 w-76 r-5 w-12 w-6 r-11 w-7 w-1 w-30 r-8 w-9 w-13 w-2 r-21 w-203", "txn55":"w-4 w-3 w-5 w-14 r-10 r-6 w-7 w-31 w-1 w-8 r-9 r-13 r-2 r-819 r-56 w-15", "txn56":"w-35 w-4 w-3 r-5 w-6 w-143 r-18 r-22 r-59 r-7 w-67 w-1 w-57 r-9 w-2 r-8", "txn57":"r-4 w-3 r-5 r-10 r-19 r-11 w-46 w-20 w-7 r-1 r-117 r-36 w-8 w-9 r-13 r-2", "txn58":"w-17 r-48 r-4 w-3 r-5 w-139 r-10 r-410 r-6 w-11 w-16 w-1 w-8 w-2 r-28 r-103", "txn59":"w-5 w-3 r-4 w-38 r-56 w-14 r-18 w-39 r-7 w-50 w-1 r-8 r-13 r-2 w-148 w-1520", "txn60":"r-17 r-5 w-3 r-4 w-14 w-81 w-375 w-6 r-11 r-18 r-1 w-8 w-9 r-12 w-2 w-13", "txn61":"w-17 w-34 w-4 w-3 r-5 r-14 r-10 r-19 w-6 r-11 w-18 w-16 w-1 w-8 r-2 r-15", "txn62":"r-17 r-4 w-3 r-5 r-83 r-10 r-19 w-6 w-11 w-1 w-30 w-8 w-13 w-2 w-28 w-25", "txn63":"w-5 w-3 w-4 r-14 r-10 w-278 r-6 r-16 r-7 r-1 r-81 w-9 w-12 w-2 w-13 r-8", "txn64":"w-35 w-21 w-5 w-3 r-4 w-34 w-18 r-7 r-31 w-1 r-8 r-9 r-12 w-2 w-13 r-15", "txn65":"w-4 w-3 w-13 w-14 r-10 r-6 r-23 w-18 r-7 w-1 r-30 r-8 w-9 r-27 r-2 w-12", "txn66":"r-15 w-4 w-3 r-5 w-38 w-48 r-10 w-6 r-20 w-1 w-66 r-24 w-9 w-13 r-2 w-8", "txn67":"r-5 r-3 r-6 w-2 w-11 w-18 r-46 r-7 r-1 r-30 w-3031 r-24 w-9 w-12 w-4 w-28", "txn68":"r-84 w-4 r-3 w-52 w-145 r-6 w-47 w-46 r-7 r-65 r-1 w-376 w-24 w-13 w-2 r-15", "txn69":"w-4 w-3 r-5 w-10 r-6 w-11 w-18 w-39 w-16 r-20 w-7 w-1 r-77 w-8 w-9 r-2", "txn70":"r-4 w-3 r-5 r-26 w-79 r-6 w-11 w-53 w-16 r-7 w-211 r-1 w-24 w-9 r-13 w-2", "txn71":"w-17 w-4 w-3 w-5 w-14 w-10 r-6 r-11 w-7 r-20 r-1 w-19 r-9 w-12 r-2 w-8", "txn72":"r-4 r-3 w-5 r-52 w-10 w-48 r-14 r-6 w-23 w-11 w-7 w-1 w-8 w-9 r-13 r-2", "txn73":"w-34 r-4 w-3 w-5 w-14 r-10 w-39 r-33 r-7 w-156 r-1 w-8 r-9 w-13 w-2 w-119", "txn74":"w-29 w-84 r-5 r-3 r-4 r-10 w-6 w-11 w-33 w-7 r-1 r-42 r-23 w-55 r-2 r-25", "txn75":"w-98 w-4 w-3 w-5 r-10 r-278 w-6 r-68 r-18 r-33 w-7 r-1 w-9 w-12 r-2 w-25", "txn76":"r-17 r-5 w-3 r-4 r-26 w-6 r-11 w-71 w-7 w-50 w-1 r-8 r-9 w-13 w-2 r-15", "txn77":"r-5 w-3 r-48 w-14 w-6 w-2 w-23 r-28 w-7 w-31 w-1 w-36 w-9 w-4 r-21 w-25", "txn78":"w-343 r-72 w-17 w-5 r-3 w-4 r-189 r-51 w-11 w-33 r-7 r-1 w-8 r-9 r-2 r-28", "txn79":"r-112 r-34 w-4 w-3 w-5 w-10 r-6 r-25 r-7 w-1 w-8 w-9 w-13 w-2 r-21 r-32", "txn80":"r-4 r-3 r-5 w-10 r-6 w-11 r-18 w-33 w-7 r-31 w-1 w-8 w-9 r-13 r-2 w-12", "txn81":"r-4 r-3 r-5 r-147 w-6 w-23 r-18 r-16 w-24 w-7 w-1 w-66 r-8 w-9 w-12 w-2", "txn82":"w-35 w-34 w-48 w-3 w-17 w-14 r-5 w-10 w-109 w-6 r-4 r-16 w-1 r-9 r-12 w-2", "txn83":"w-4 r-3 w-5 w-10 w-224 w-6 r-11 w-18 w-16 w-85 r-7 w-1 w-8 r-9 w-12 r-2", "txn84":"w-21 w-5 r-3 w-83 w-4 w-10 w-16 r-7 r-1 w-8 w-19 w-9 r-13 w-2 r-12 r-15", "txn85":"r-4 w-3 w-209 r-5 w-10 w-11 w-24 w-18 r-16 r-891 r-7 w-67 w-1 w-8 w-2 w-21", "txn86":"r-41 w-4 r-3 w-5 w-6 r-70 w-273 w-7 r-1 w-77 r-8 r-13 r-2 r-21 w-25 w-15", "txn87":"w-29 r-35 r-34 w-4 r-3 w-10 r-128 w-6 w-18 w-7 w-31 w-1 w-9 r-12 r-2 w-32", "txn88":"r-435 r-34 r-5 r-3 r-4 r-14 r-10 w-6 w-49 r-7 w-1 w-8 r-23 w-27 w-2 w-13", "txn89":"r-45 w-126 w-5 r-3 w-4 w-6 r-16 w-39 w-7 r-31 w-1 w-8 r-9 r-12 r-2 w-25", "txn90":"r-4 w-5 w-3 r-26 r-10 w-51 w-6 w-23 w-33 r-1 w-42 r-9 w-41 r-2 r-25 r-40", "txn91":"w-149 w-17 w-5 w-4 r-3 w-14 r-6 r-11 w-18 w-7 r-78 w-1 w-9 r-12 w-2 r-15", "txn92":"r-34 r-5 r-3 r-4 r-26 w-10 r-6 w-11 r-18 w-7 r-1 w-37 w-9 r-12 r-2 r-25", "txn93":"r-4 r-3 w-5 w-121 w-233 r-10 r-11 r-16 r-7 r-1 w-547 w-8 w-9 r-13 r-2 w-12", "txn94":"w-5 w-4 r-3 w-14 r-109 r-68 w-6 w-644 r-22 w-7 w-1 w-8 r-9 w-2 w-28 w-15", "txn95":"w-302 r-4 r-3 r-5 w-51 w-10 r-6 w-49 r-7 w-20 w-1 r-37 w-8 w-9 w-12 w-2", "txn96":"w-5 w-3 r-58 w-6 r-2 r-18 r-22 w-313 w-20 r-7 w-1 w-24 w-9 w-4 w-28 r-8", "txn97":"w-15 r-5 r-3 r-4 w-10 r-47 r-6 w-11 w-18 w-7 r-1 w-19 w-9 w-2 w-480 r-8", "txn98":"r-17 r-4 r-3 r-5 r-26 r-14 w-6 r-639 w-22 r-7 r-1 r-77 r-8 w-9 r-2 r-15", "txn99":"w-4 w-3 w-5 w-26 w-14 r-10 w-241 w-6 w-53 r-24 w-1 r-19 w-9 r-13 r-2 w-8"}'
9
+
10
+ WORKLOAD_2 = '{"txn0":"r-3 r-40 w-40 * * * * * * * * * * * * * * * * * *", "txn1":"r-1 r-13 w-13 * * * * * * * * * * * * * * * * * *", "txn2":"r-3 r-32 w-32 * * * * * * * * * * * * * * * * * *", "txn3":"r-2 r-29 w-29 * * * * * * * * * * * * * * * * * *", "txn4":"r-4 r-44 w-44 * * * * * * * * * * * * * * * * * *", "txn5":"r-2 r-23 w-23 * * * * * * * * * * * * * * * * * *", "txn6":"r-1 r-13 w-13 * * * * * * * * * * * * * * * * * *", "txn7":"r-3 r-34 w-34 * * * * * * * * * * * * * * * * * *", "txn8":"r-3 r-34 w-34 * * * * * * * * * * * * * * * * * *", "txn9":"r-2 r-25 w-25 * * * * * * * * * * * * * * * * * *", "txn10":"r-3 r-35 w-35 * * * * * * * * * * * * * * * * * *", "txn11":"r-4 r-43 w-43 * * * * * * * * * * * * * * * * * *", "txn12":"r-2 r-21 w-21 * * * * * * * * * * * * * * * * * *", "txn13":"r-2 r-30 w-30 * * * * * * * * * * * * * * * * * *", "txn14":"r-2 r-29 w-29 * * * * * * * * * * * * * * * * * *", "txn15":"r-2 r-27 w-27 * * * * * * * * * * * * * * * * * *", "txn16":"r-3 r-39 w-39 * * * * * * * * * * * * * * * * * *", "txn17":"r-3 r-34 w-34 * * * * * * * * * * * * * * * * * *", "txn18":"r-1 r-19 w-19 * * * * * * * * * * * * * * * * * *", "txn19":"r-4 r-48 w-48 * * * * * * * * * * * * * * * * * *", "txn20":"r-4 r-50 w-50 * * * * * * * * * * * * * * * * * *", "txn21":"r-4 r-41 w-41 * * * * * * * * * * * * * * * * * *", "txn22":"r-2 r-29 w-29 * * * * * * * * * * * * * * * * * *", "txn23":"r-1 r-17 w-17 * * * * * * * * * * * * * * * * * *", "txn24":"r-1 r-19 w-19 * * * * * * * * * * * * * * * * * *", "txn25":"r-2 r-23 w-23 * * * * * * * * * * * * * * * * * *", "txn26":"r-4 r-47 w-47 * * * * * * * * * * * * * * * * * *", "txn27":"r-4 r-42 w-42 * * * * * * * * * * * * * * * * * *", "txn28":"r-4 r-50 w-50 * * * * * * * * * * * * * * * * * *", "txn29":"r-4 r-41 w-41 * * * * * * * * * * * * * * * * * *", "txn30":"r-3 r-32 w-32 * * * * * * * * * * * * * * * * * *", "txn31":"r-2 r-24 w-24 * * * * * * * * * * * * * * * * * *", "txn32":"r-1 r-12 w-12 * * * * * * * * * * * * * * * * * *", "txn33":"r-4 r-42 w-42 * * * * * * * * * * * * * * * * * *", "txn34":"r-2 r-23 w-23 * * * * * * * * * * * * * * * * * *", "txn35":"r-3 r-32 w-32 * * * * * * * * * * * * * * * * * *", "txn36":"r-3 r-40 w-40 * * * * * * * * * * * * * * * * * *", "txn37":"r-4 r-50 w-50 * * * * * * * * * * * * * * * * * *", "txn38":"r-3 r-32 w-32 * * * * * * * * * * * * * * * * * *", "txn39":"r-2 r-26 w-26 * * * * * * * * * * * * * * * * * *", "txn40":"r-4 r-41 w-41 * * * * * * * * * * * * * * * * * *", "txn41":"r-1 r-17 w-17 * * * * * * * * * * * * * * * * * *", "txn42":"r-1 r-11 w-11 * * * * * * * * * * * * * * * * * *", "txn43":"r-1 r-20 w-20 * * * * * * * * * * * * * * * * * *", "txn44":"r-3 r-34 w-34 * * * * * * * * * * * * * * * * * *", "txn45":"r-3 r-31 w-31 * * * * * * * * * * * * * * * * * *", "txn46":"r-3 r-39 w-39 * * * * * * * * * * * * * * * * * *", "txn47":"r-3 r-31 w-31 * * * * * * * * * * * * * * * * * *", "txn48":"r-2 r-30 w-30 * * * * * * * * * * * * * * * * * *", "txn49":"r-1 r-13 w-13 * * * * * * * * * * * * * * * * * *", "txn50":"r-1 w-1 r-15 w-15 *", "txn51":"r-1 w-1 r-13 w-13 *", "txn52":"r-3 w-3 r-31 w-31 *", "txn53":"r-1 w-1 r-20 w-20 *", "txn54":"r-4 w-4 r-41 w-41 *", "txn55":"r-3 w-3 r-35 w-35 *", "txn56":"r-1 w-1 r-12 w-12 *", "txn57":"r-2 w-2 r-24 w-24 *", "txn58":"r-2 w-2 r-22 w-22 *", "txn59":"r-2 w-2 r-30 w-30 *", "txn60":"r-2 w-2 r-22 w-22 *", "txn61":"r-4 w-4 r-46 w-46 *", "txn62":"r-4 w-4 r-50 w-50 *", "txn63":"r-2 w-2 r-23 w-23 *", "txn64":"r-2 w-2 r-29 w-29 *", "txn65":"r-3 w-3 r-32 w-32 *", "txn66":"r-3 w-3 r-32 w-32 *", "txn67":"r-4 w-4 r-45 w-45 *", "txn68":"r-1 w-1 r-13 w-13 *", "txn69":"r-2 w-2 r-23 w-23 *", "txn70":"r-4 w-4 r-48 w-48 *", "txn71":"r-1 w-1 r-15 w-15 *", "txn72":"r-1 w-1 r-17 w-17 *", "txn73":"r-2 w-2 r-23 w-23 *", "txn74":"r-4 w-4 r-43 w-43 *", "txn75":"r-4 w-4 r-48 w-48 *", "txn76":"r-3 w-3 r-37 w-37 *", "txn77":"r-4 w-4 r-48 w-48 *", "txn78":"r-3 w-3 r-32 w-32 *", "txn79":"r-4 w-4 r-44 w-44 *", "txn80":"r-2 w-2 r-30 w-30 *", "txn81":"r-1 w-1 r-19 w-19 *", "txn82":"r-2 w-2 r-22 w-22 *", "txn83":"r-4 w-4 r-41 w-41 *", "txn84":"r-3 w-3 r-33 w-33 *", "txn85":"r-3 w-3 r-34 w-34 *", "txn86":"r-1 w-1 r-18 w-18 *", "txn87":"r-3 w-3 r-39 w-39 *", "txn88":"r-3 w-3 r-38 w-38 *", "txn89":"r-2 w-2 r-24 w-24 *", "txn90":"r-4 w-4 r-46 w-46 *", "txn91":"r-4 w-4 r-49 w-49 *", "txn92":"r-4 w-4 r-43 w-43 *", "txn93":"r-4 w-4 r-47 w-47 *", "txn94":"r-2 w-2 r-28 w-28 *", "txn95":"r-4 w-4 r-41 w-41 *", "txn96":"r-3 w-3 r-39 w-39 *", "txn97":"r-1 w-1 r-15 w-15 *", "txn98":"r-1 w-1 r-11 w-11 *", "txn99":"r-3 w-3 r-39 w-39 *"}'
11
+
12
+ WORKLOAD_3 = '{"txn0":"r-4 * * * * * * * w-6", "txn1":"r-3 * * * * * * * w-7", "txn2":"r-5 * * * * * * * w-9", "txn3":"r-2 * * * * * * * w-8", "txn4":"r-3 * * * * * * * w-7", "txn5":"r-1 * * * * * * * w-8", "txn6":"r-1 * * * * * * * w-9", "txn7":"r-2 * * * * * * * w-7", "txn8":"r-5 * * * * * * * w-9", "txn9":"r-1 * * * * * * * w-7", "txn10":"r-3 * * * * * * * w-7", "txn11":"r-2 * * * * * * * w-9", "txn12":"r-1 * * * * * * * w-6", "txn13":"r-5 * * * * * * * w-6", "txn14":"r-4 * * * * * * * w-10", "txn15":"r-5 * * * * * * * w-10", "txn16":"r-5 * * * * * * * w-10", "txn17":"r-2 * * * * * * * w-6", "txn18":"r-1 * * * * * * * w-7", "txn19":"r-4 * * * * * * * w-6", "txn20":"r-2 * * * * * * * w-8", "txn21":"r-4 * * * * * * * w-10", "txn22":"r-4 * * * * * * * w-7", "txn23":"r-4 * * * * * * * w-7", "txn24":"r-5 * * * * * * * w-6", "txn25":"r-5 * * * * * * * w-10", "txn26":"r-5 * * * * * * * w-8", "txn27":"r-2 * * * * * * * w-9", "txn28":"r-5 * * * * * * * w-8", "txn29":"r-1 * * * * * * * w-8", "txn30":"r-4 * * * * * * * w-7", "txn31":"r-3 * * * * * * * w-10", "txn32":"r-3 * * * * * * * w-6", "txn33":"r-1 * * * * * * * w-6", "txn34":"r-4 * * * * * * * w-7", "txn35":"r-5 * * * * * * * w-7", "txn36":"r-3 * * * * * * * w-7", "txn37":"r-1 * * * * * * * w-8", "txn38":"r-3 * * * * * * * w-6", "txn39":"r-2 * * * * * * * w-6", "txn40":"r-1 * * * * * * * w-8", "txn41":"r-1 * * * * * * * w-10", "txn42":"r-5 * * * * * * * w-6", "txn43":"r-2 * * * * * * * w-6", "txn44":"r-3 * * * * * * * w-6", "txn45":"r-2 * * * * * * * w-6", "txn46":"r-5 * * * * * * * w-6", "txn47":"r-1 * * * * * * * w-9", "txn48":"r-2 * * * * * * * w-8", "txn49":"r-1 * * * * * * * w-10", "txn50":"r-6 * * * * * * * w-1", "txn51":"r-9 * * * * * * * w-2", "txn52":"r-6 * * * * * * * w-4", "txn53":"r-6 * * * * * * * w-1", "txn54":"r-7 * * * * * * * w-5", "txn55":"r-8 * * * * * * * w-1", "txn56":"r-9 * * * * * * * w-3", "txn57":"r-8 * * * * * * * w-5", "txn58":"r-8 * * * * * * * w-3", "txn59":"r-10 * * * * * * * w-1", "txn60":"r-8 * * * * * * * w-1", "txn61":"r-6 * * * * * * * w-2", "txn62":"r-10 * * * * * * * w-2", "txn63":"r-9 * * * * * * * w-3", "txn64":"r-9 * * * * * * * w-3", "txn65":"r-8 * * * * * * * w-2", "txn66":"r-6 * * * * * * * w-4", "txn67":"r-8 * * * * * * * w-2", "txn68":"r-9 * * * * * * * w-3", "txn69":"r-9 * * * * * * * w-2", "txn70":"r-6 * * * * * * * w-5", "txn71":"r-9 * * * * * * * w-1", "txn72":"r-10 * * * * * * * w-2", "txn73":"r-9 * * * * * * * w-1", "txn74":"r-6 * * * * * * * w-1", "txn75":"r-7 * * * * * * * w-5", "txn76":"r-7 * * * * * * * w-5", "txn77":"r-7 * * * * * * * w-2", "txn78":"r-10 * * * * * * * w-5", "txn79":"r-9 * * * * * * * w-3", "txn80":"r-10 * * * * * * * w-3", "txn81":"r-10 * * * * * * * w-2", "txn82":"r-7 * * * * * * * w-5", "txn83":"r-9 * * * * * * * w-4", "txn84":"r-8 * * * * * * * w-3", "txn85":"r-9 * * * * * * * w-3", "txn86":"r-10 * * * * * * * w-2", "txn87":"r-8 * * * * * * * w-2", "txn88":"r-10 * * * * * * * w-2", "txn89":"r-8 * * * * * * * w-5", "txn90":"r-10 * * * * * * * w-1", "txn91":"r-7 * * * * * * * w-1", "txn92":"r-6 * * * * * * * w-2", "txn93":"r-10 * * * * * * * w-5", "txn94":"r-10 * * * * * * * w-4", "txn95":"r-9 * * * * * * * w-2", "txn96":"r-7 * * * * * * * w-2", "txn97":"r-8 * * * * * * * w-3", "txn98":"r-9 * * * * * * * w-2", "txn99":"r-7 * * * * * * * w-2"}'
benchmarks/ADRS/txn_scheduling/evaluator/wrapper.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Backwards-compat wrapper for old Python-based evaluators.
2
+
3
+ Old-style evaluators define ``evaluate(program_path) -> dict``. This module
4
+ bridges that interface to the container JSON protocol expected by
5
+ ContainerizedEvaluator.
6
+
7
+ Usage — add this to the bottom of your evaluator.py::
8
+
9
+ if __name__ == "__main__":
10
+ from wrapper import run
11
+ run(evaluate)
12
+ """
13
+
14
+ import json
15
+ import sys
16
+ import traceback
17
+
18
+
19
+ def run(evaluate_fn):
20
+ """Call *evaluate_fn*, format the result as container-protocol JSON on stdout.
21
+
22
+ * Reads ``sys.argv[1]`` as the program path.
23
+ * Redirects stdout → stderr while *evaluate_fn* runs so that debug prints
24
+ don't contaminate the JSON output.
25
+ * Separates numeric metrics from non-numeric artifacts.
26
+ * Guarantees ``combined_score`` is always present in metrics.
27
+ """
28
+ if len(sys.argv) < 2:
29
+ print("Usage: evaluator.py <program_path>", file=sys.stderr)
30
+ sys.exit(1)
31
+
32
+ program_path = sys.argv[1]
33
+
34
+ # Redirect stdout → stderr during evaluation so debug prints from
35
+ # the evaluator don't contaminate the JSON output on stdout.
36
+ real_stdout = sys.stdout
37
+ sys.stdout = sys.stderr
38
+ try:
39
+ result = evaluate_fn(program_path)
40
+ except Exception as e:
41
+ sys.stdout = real_stdout
42
+ print(
43
+ json.dumps(
44
+ {
45
+ "status": "error",
46
+ "combined_score": 0.0,
47
+ "metrics": {"combined_score": 0.0},
48
+ "artifacts": {
49
+ "error": str(e),
50
+ "traceback": traceback.format_exc(),
51
+ },
52
+ }
53
+ )
54
+ )
55
+ return
56
+ sys.stdout = real_stdout
57
+
58
+ if not isinstance(result, dict):
59
+ print(
60
+ json.dumps(
61
+ {
62
+ "status": "error",
63
+ "combined_score": 0.0,
64
+ "metrics": {"combined_score": 0.0},
65
+ "artifacts": {
66
+ "error": f"evaluate() returned {type(result).__name__}, expected dict"
67
+ },
68
+ }
69
+ )
70
+ )
71
+ return
72
+
73
+ # Separate numeric metrics from non-numeric artifacts.
74
+ metrics = {}
75
+ artifacts = {}
76
+ for k, v in result.items():
77
+ if isinstance(v, bool):
78
+ metrics[k] = float(v)
79
+ elif isinstance(v, (int, float)):
80
+ metrics[k] = float(v)
81
+ elif isinstance(v, str):
82
+ artifacts[k] = v
83
+ elif isinstance(v, (list, dict)):
84
+ artifacts[k] = json.dumps(v)
85
+
86
+ if "combined_score" not in metrics:
87
+ metrics["combined_score"] = 0.0
88
+
89
+ status = "error" if "error" in artifacts else "success"
90
+ output = {
91
+ "status": status,
92
+ "combined_score": metrics["combined_score"],
93
+ "metrics": metrics,
94
+ }
95
+ if artifacts:
96
+ output["artifacts"] = artifacts
97
+
98
+ print(json.dumps(output))
benchmarks/ADRS/txn_scheduling/initial_program.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ from txn_simulator import Workload
4
+ from workloads import WORKLOAD_1, WORKLOAD_2, WORKLOAD_3
5
+
6
+ # EVOLVE-BLOCK-START
7
+
8
+ def get_best_schedule(workload, num_seqs):
9
+ """
10
+ Get optimal schedule using greedy cost sampling strategy.
11
+
12
+ Returns:
13
+ Tuple of (lowest makespan, corresponding schedule)
14
+ """
15
+ def get_greedy_cost_sampled(num_samples, sample_rate):
16
+ # greedy with random starting point
17
+ start_txn = random.randint(0, workload.num_txns - 1)
18
+ txn_seq = [start_txn]
19
+ remaining_txns = [x for x in range(0, workload.num_txns)]
20
+ remaining_txns.remove(start_txn)
21
+ running_cost = workload.txns[start_txn][0][3]
22
+ # min_costs = []
23
+ # key_map, total_cost = workload.get_incremental_seq_cost(start_txn, {}, 0)
24
+ for i in range(0, workload.num_txns - 1):
25
+ min_cost = 100000 # MAX
26
+ min_relative_cost = 10
27
+ min_txn = -1
28
+ # min_index = 0
29
+ holdout_txns = []
30
+ done = False
31
+ key_maps = []
32
+
33
+ sample = random.random()
34
+ if sample > sample_rate:
35
+ idx = random.randint(0, len(remaining_txns) - 1)
36
+ t = remaining_txns[idx]
37
+ txn_seq.append(t)
38
+ remaining_txns.pop(idx)
39
+ continue
40
+
41
+ for j in range(0, num_samples):
42
+ idx = 0
43
+ if len(remaining_txns) > 1:
44
+ idx = random.randint(0, len(remaining_txns) - 1)
45
+ else:
46
+ done = True
47
+ t = remaining_txns[idx]
48
+ holdout_txns.append(remaining_txns.pop(idx))
49
+ if workload.debug:
50
+ print(remaining_txns, holdout_txns)
51
+ txn_len = workload.txns[t][0][3]
52
+ test_seq = txn_seq.copy()
53
+ test_seq.append(t)
54
+ cost = 0
55
+ cost = workload.get_opt_seq_cost(test_seq)
56
+ if cost < min_cost:
57
+ # if relative_cost < min_relative_cost:
58
+ min_cost = cost
59
+ min_txn = t
60
+ # min_relative_cost = relative_cost
61
+ # min_index = j
62
+ if done:
63
+ break
64
+ assert(min_txn != -1)
65
+ running_cost = min_cost
66
+ txn_seq.append(min_txn)
67
+ holdout_txns.remove(min_txn)
68
+ remaining_txns.extend(holdout_txns)
69
+
70
+ if workload.debug:
71
+ print("min: ", min_txn, remaining_txns, holdout_txns, txn_seq)
72
+ if workload.debug:
73
+ print(txn_seq)
74
+ print(len(set(txn_seq)))
75
+ assert len(set(txn_seq)) == workload.num_txns
76
+ # print(txn_seq)
77
+
78
+ overall_cost = workload.get_opt_seq_cost(txn_seq)
79
+
80
+ return overall_cost, txn_seq
81
+
82
+ return get_greedy_cost_sampled(10, 1.0)
83
+
84
+ # EVOLVE-BLOCK-END
85
+
86
+ def get_random_costs():
87
+ workload_size = 100
88
+ workload = Workload(WORKLOAD_1)
89
+
90
+ makespan1, schedule1 = get_best_schedule(workload, 10)
91
+ cost1 = workload.get_opt_seq_cost(schedule1)
92
+
93
+ workload2 = Workload(WORKLOAD_2)
94
+ makespan2, schedule2 = get_best_schedule(workload2, 10)
95
+ cost2 = workload2.get_opt_seq_cost(schedule2)
96
+
97
+ workload3 = Workload(WORKLOAD_3)
98
+ makespan3, schedule3 = get_best_schedule(workload3, 10)
99
+ cost3 = workload3.get_opt_seq_cost(schedule3)
100
+ print(cost1, cost2, cost3)
101
+ return cost1 + cost2 + cost3, [schedule1, schedule2, schedule3]
102
+
103
+
104
+ if __name__ == "__main__":
105
+ makespan, schedule = get_random_costs()
106
+ print(f"Makespan: {makespan}")
benchmarks/ale_bench/README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ALE-Bench: AtCoder Heuristic Contest Benchmark
2
+
3
+ 10 problems from AtCoder Heuristic Contests (AHC), evaluated via the `ale_bench` package. Programs are written in C++ and scored on 50 public test cases during evolution. A separate private evaluator runs the full hidden test set for final ranking.
4
+
5
+ ## Problems
6
+
7
+ | Problem | Description |
8
+ |---------|-------------|
9
+ | `ahc008` | Pet partitioning — place walls to create pet-free areas on a 30×30 grid over 300 turns |
10
+ | `ahc011` | AtCoder Heuristic Contest 11 |
11
+ | `ahc015` | AtCoder Heuristic Contest 15 |
12
+ | `ahc016` | AtCoder Heuristic Contest 16 |
13
+ | `ahc024` | AtCoder Heuristic Contest 24 |
14
+ | `ahc025` | Balance weighing — use a balance scale to divide N items into D equal-weight sets using Q queries |
15
+ | `ahc026` | AtCoder Heuristic Contest 26 |
16
+ | `ahc027` | AtCoder Heuristic Contest 27 |
17
+ | `ahc039` | AtCoder Heuristic Contest 39 |
18
+ | `ahc046` | AtCoder Heuristic Contest 46 |
19
+
20
+ ## Quick Start
21
+
22
+ Run evolution on a single problem:
23
+
24
+ ```bash
25
+ uv run skydiscover-run \
26
+ benchmarks/ale_bench/ale-bench-lite-problems/ahc025/initial_program.cpp \
27
+ benchmarks/ale_bench/ale-bench-lite-problems/ahc025/evaluator.py \
28
+ -c benchmarks/ale_bench/ale-bench-lite-problems/ahc025/config.yaml \
29
+ --search evox \
30
+ -i 100
31
+ ```
32
+
33
+ ## Scoring
34
+
35
+ During evolution, each iteration runs 50 public test cases:
36
+
37
+ ```
38
+ combined_score = overall_absolute_score * optim_factor / num_public_cases
39
+ ```
40
+
41
+ `optim_factor` is `+1` for maximize problems and `-1` for minimize problems (so `combined_score` is always higher-is-better).
42
+
43
+ ## Private Evaluation
44
+
45
+ After evolution, evaluate the best program on the full private test set:
46
+
47
+ ```bash
48
+ python benchmarks/ale_bench/private_eval.py \
49
+ --program-path path/to/best_program.cpp \
50
+ --problem-id ahc025
51
+ ```
52
+
53
+ This runs 3 independent evaluations and reports the average private rank, performance score, and per-case pass/fail counts.
54
+
55
+ ## Directory Structure
56
+
57
+ ```
58
+ ale_bench/
59
+ ├── ale-bench-lite-problems/
60
+ │ └── ahcXXX/
61
+ │ ├── initial_program.cpp # Starting C++ solution
62
+ │ ├── evaluator.py # Runs 50 public cases via ale_bench
63
+ │ └── config.yaml # Search config (cpp, diff-based, 100 iterations)
64
+ ├── ale_agent_best/
65
+ │ └── ahcXXX.cpp # Best known solutions (reference)
66
+ └── private_eval.py # Full private set evaluation + ranking
67
+ ```
68
+
69
+ ## Requirements
70
+
71
+ Requires the `ale_bench` and `ale_bench_eval` packages. These are not in the default `uv sync` — install them separately per the ALE-Bench documentation.
72
+
73
+ ## Config Defaults
74
+
75
+ All problems share the same base config:
76
+
77
+ ```yaml
78
+ language: cpp
79
+ diff_based_evolution: true
80
+ max_iterations: 100
81
+ max_solution_length: 60000
82
+ evaluator:
83
+ timeout: 10000
84
+ ```
benchmarks/ale_bench/ale-bench-lite-problems/ahc008/initial_program.cpp ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EVOLVE-BLOCK-START
2
+ #include <iostream>
3
+ #include <vector>
4
+ #include <string>
5
+ #include <algorithm>
6
+ // #include <map>
7
+ // #include <set>
8
+ #include <queue>
9
+ #include <cmath>
10
+ #include <iomanip>
11
+ #include <limits>
12
+
13
+ // --- Constants ---
14
+ constexpr int GRID_SIZE = 30;
15
+ constexpr int NUM_TURNS = 300;
16
+ constexpr int INF = std::numeric_limits<int>::max();
17
+
18
+ struct Point {
19
+ int r, c;
20
+
21
+ bool operator==(const Point& other) const { return r == other.r && c == other.c; }
22
+ bool operator!=(const Point& other) const { return !(*this == other); }
23
+ bool operator<(const Point& other) const {
24
+ if (r != other.r) return r < other.r;
25
+ return c < other.c;
26
+ }
27
+ };
28
+ const Point INVALID_POINT = {-1, -1};
29
+
30
+
31
+ // Tunable parameters
32
+ constexpr int STAND_OUTSIDE_INNER_SAFE_PENALTY = 1000;
33
+ constexpr int ADJACENT_WALL_PRIORITY_BONUS = 0;
34
+ constexpr int NEAR_PET_PENALTY_POINTS_PER_PET = 0;
35
+ constexpr int NEAR_PET_RADIUS = 2;
36
+ constexpr int MAX_STUCK_TURNS = 10; // Slightly increased
37
+
38
+ // Directions: Up, Down, Left, Right (indices 0, 1, 2, 3)
39
+ const Point DIRS[4] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
40
+ const char DIR_CHARS_BUILD[4] = {'u', 'd', 'l', 'r'};
41
+ const char DIR_CHARS_MOVE[4] = {'U', 'D', 'L', 'R'};
42
+ const char PET_MOVE_CHARS[4] = {'U', 'D', 'L', 'R'};
43
+
44
+ struct PetInfo {
45
+ Point pos;
46
+ int type;
47
+ int id;
48
+ };
49
+
50
+ enum class HumanObjective {
51
+ BUILDING_WALLS,
52
+ GOING_TO_SAFE_SPOT,
53
+ STAYING_IN_SAFE_SPOT,
54
+ REPOSITIONING_STUCK
55
+ // FLEEING_PET_IN_PEN removed, simplified objective setting
56
+ };
57
+
58
+ struct HumanInfo {
59
+ Point pos;
60
+ int id;
61
+
62
+ int strip_r_start;
63
+ int strip_r_end;
64
+
65
+ Point inner_safe_ul;
66
+ Point inner_safe_br;
67
+ Point final_stand_pos;
68
+
69
+ std::vector<Point> assigned_wall_cells;
70
+ HumanObjective objective;
71
+ int turns_stuck_building = 0;
72
+ };
73
+
74
+ // --- Game Grid and State ---
75
+ bool is_impassable_grid_static[GRID_SIZE + 1][GRID_SIZE + 1];
76
+ std::vector<PetInfo> pets_global_state;
77
+ std::vector<HumanInfo> humans_global_state;
78
+ int N_pets_global, M_humans_global;
79
+
80
+ Point bfs_parent_grid[GRID_SIZE + 1][GRID_SIZE + 1];
81
+ bool bfs_visited_grid[GRID_SIZE + 1][GRID_SIZE + 1];
82
+
83
+
84
+ // --- Utility Functions ---
85
+ bool is_valid_coord(int val) {
86
+ return val >= 1 && val <= GRID_SIZE;
87
+ }
88
+
89
+ bool is_valid_point(Point p) {
90
+ return is_valid_coord(p.r) && is_valid_coord(p.c);
91
+ }
92
+
93
+ int manhattan_distance(Point p1, Point p2) {
94
+ if (!is_valid_point(p1) || !is_valid_point(p2)) return INF;
95
+ return std::abs(p1.r - p2.r) + std::abs(p1.c - p2.c);
96
+ }
97
+
98
+ int count_adjacent_walls_or_boundaries(Point p) {
99
+ int count = 0;
100
+ for (int i = 0; i < 4; ++i) {
101
+ Point neighbor = {p.r + DIRS[i].r, p.c + DIRS[i].c};
102
+ if (!is_valid_point(neighbor) || (is_valid_point(neighbor) && is_impassable_grid_static[neighbor.r][neighbor.c])) {
103
+ count++;
104
+ }
105
+ }
106
+ return count;
107
+ }
108
+
109
+ bool can_theoretically_build_at(Point wall_pos, int builder_human_id) {
110
+ if (!is_valid_point(wall_pos)) return false;
111
+ if (is_impassable_grid_static[wall_pos.r][wall_pos.c]) return false;
112
+
113
+ for (const auto& pet : pets_global_state) {
114
+ if (pet.pos == wall_pos) return false;
115
+ if (manhattan_distance(wall_pos, pet.pos) == 1) return false;
116
+ }
117
+
118
+ for (const auto& human : humans_global_state) {
119
+ if (human.id == builder_human_id) continue; // Builder themself can be adjacent
120
+ if (human.pos == wall_pos) return false; // Other human on the wall_pos
121
+ }
122
+ return true;
123
+ }
124
+
125
+ char get_bfs_move_char(Point start_pos, Point target_pos,
126
+ const std::vector<Point>& current_turn_tentative_walls) {
127
+ if (start_pos == target_pos) return '.';
128
+
129
+ std::queue<Point> q;
130
+ q.push(start_pos);
131
+
132
+ for(int r_bfs = 1; r_bfs <= GRID_SIZE; ++r_bfs) for(int c_bfs = 1; c_bfs <= GRID_SIZE; ++c_bfs) {
133
+ bfs_visited_grid[r_bfs][c_bfs] = false;
134
+ bfs_parent_grid[r_bfs][c_bfs] = INVALID_POINT;
135
+ }
136
+ if (!is_valid_point(start_pos)) return '.';
137
+ bfs_visited_grid[start_pos.r][start_pos.c] = true;
138
+
139
+ Point path_found_dest = INVALID_POINT;
140
+
141
+ while(!q.empty()){
142
+ Point curr = q.front();
143
+ q.pop();
144
+
145
+ for(int i_dir=0; i_dir < 4; ++i_dir){
146
+ Point next_p = {curr.r + DIRS[i_dir].r, curr.c + DIRS[i_dir].c};
147
+
148
+ if(is_valid_point(next_p) &&
149
+ !is_impassable_grid_static[next_p.r][next_p.c] &&
150
+ !bfs_visited_grid[next_p.r][next_p.c]){
151
+
152
+ bool is_tentative_wall_conflict = false;
153
+ for(const auto& tw : current_turn_tentative_walls) {
154
+ if(next_p == tw) {
155
+ is_tentative_wall_conflict = true;
156
+ break;
157
+ }
158
+ }
159
+ if(is_tentative_wall_conflict) continue;
160
+
161
+ bfs_visited_grid[next_p.r][next_p.c] = true;
162
+ bfs_parent_grid[next_p.r][next_p.c] = curr;
163
+
164
+ if (next_p == target_pos) {
165
+ path_found_dest = next_p;
166
+ goto bfs_done_label;
167
+ }
168
+ q.push(next_p);
169
+ }
170
+ }
171
+ }
172
+
173
+ bfs_done_label:;
174
+ if (path_found_dest.r == -1) return '.';
175
+
176
+ Point current_step_in_path = path_found_dest;
177
+ while(!(bfs_parent_grid[current_step_in_path.r][current_step_in_path.c] == INVALID_POINT) &&
178
+ !(bfs_parent_grid[current_step_in_path.r][current_step_in_path.c] == start_pos)) {
179
+ current_step_in_path = bfs_parent_grid[current_step_in_path.r][current_step_in_path.c];
180
+ }
181
+
182
+ for(int i_dir = 0; i_dir < 4; ++i_dir){
183
+ if(start_pos.r + DIRS[i_dir].r == current_step_in_path.r &&
184
+ start_pos.c + DIRS[i_dir].c == current_step_in_path.c){
185
+ return DIR_CHARS_MOVE[i_dir];
186
+ }
187
+ }
188
+ return '.';
189
+ }
190
+
191
+
192
+ void initialize_game() {
193
+ std::cin >> N_pets_global;
194
+ pets_global_state.resize(N_pets_global);
195
+ for (int i = 0; i < N_pets_global; ++i) {
196
+ pets_global_state[i].id = i;
197
+ std::cin >> pets_global_state[i].pos.r >> pets_global_state[i].pos.c >> pets_global_state[i].type;
198
+ }
199
+
200
+ std::cin >> M_humans_global;
201
+ humans_global_state.resize(M_humans_global);
202
+
203
+ for(int r_grid=0; r_grid <= GRID_SIZE; ++r_grid) for(int c_grid=0; c_grid <= GRID_SIZE; ++c_grid) is_impassable_grid_static[r_grid][c_grid] = false;
204
+
205
+ int base_strip_height = GRID_SIZE / M_humans_global;
206
+ int remainder_heights = GRID_SIZE % M_humans_global;
207
+ int current_r_start_coord = 1;
208
+
209
+ for (int i = 0; i < M_humans_global; ++i) {
210
+ HumanInfo& human = humans_global_state[i];
211
+ human.id = i;
212
+ std::cin >> human.pos.r >> human.pos.c;
213
+
214
+ int strip_h_for_this_human = base_strip_height + (i < remainder_heights ? 1 : 0);
215
+ human.strip_r_start = current_r_start_coord;
216
+ human.strip_r_end = human.strip_r_start + strip_h_for_this_human - 1;
217
+ human.strip_r_end = std::min(human.strip_r_end, GRID_SIZE);
218
+
219
+ int actual_strip_h = human.strip_r_end - human.strip_r_start + 1;
220
+ int actual_strip_w = GRID_SIZE;
221
+
222
+ human.inner_safe_ul.r = human.strip_r_start + (actual_strip_h >= 3 ? 1 : 0);
223
+ human.inner_safe_ul.c = 1 + (actual_strip_w >= 3 ? 1 : 0);
224
+ human.inner_safe_br.r = human.strip_r_end - (actual_strip_h >= 3 ? 1 : 0);
225
+ human.inner_safe_br.c = GRID_SIZE - (actual_strip_w >= 3 ? 1 : 0);
226
+
227
+ if (human.inner_safe_ul.r > human.inner_safe_br.r) human.inner_safe_br.r = human.inner_safe_ul.r;
228
+ if (human.inner_safe_ul.c > human.inner_safe_br.c) human.inner_safe_br.c = human.inner_safe_ul.c;
229
+
230
+ human.final_stand_pos = {
231
+ human.inner_safe_ul.r + (human.inner_safe_br.r - human.inner_safe_ul.r) / 2,
232
+ human.inner_safe_ul.c + (human.inner_safe_br.c - human.inner_safe_ul.c) / 2
233
+ };
234
+ human.final_stand_pos.r = std::max(human.inner_safe_ul.r, std::min(human.inner_safe_br.r, human.final_stand_pos.r));
235
+ human.final_stand_pos.c = std::max(human.inner_safe_ul.c, std::min(human.inner_safe_br.c, human.final_stand_pos.c));
236
+ if (!is_valid_point(human.final_stand_pos)) {
237
+ human.final_stand_pos = {human.strip_r_start, 1};
238
+ }
239
+
240
+ human.assigned_wall_cells.clear();
241
+ int r_s = human.strip_r_start;
242
+ int r_e = human.strip_r_end;
243
+
244
+ if (i == 0) {
245
+ for (int c_coord = 1; c_coord <= GRID_SIZE; ++c_coord) human.assigned_wall_cells.push_back({r_s, c_coord});
246
+ } else {
247
+ for (int c_coord = GRID_SIZE / 2 + 1; c_coord <= GRID_SIZE; ++c_coord) human.assigned_wall_cells.push_back({r_s, c_coord});
248
+ }
249
+ if (i == M_humans_global - 1) {
250
+ for (int c_coord = 1; c_coord <= GRID_SIZE; ++c_coord) human.assigned_wall_cells.push_back({r_e, c_coord});
251
+ } else {
252
+ for (int c_coord = 1; c_coord <= GRID_SIZE / 2; ++c_coord) human.assigned_wall_cells.push_back({r_e, c_coord});
253
+ }
254
+ for (int r_mid = r_s + 1; r_mid <= r_e - 1; ++r_mid) {
255
+ human.assigned_wall_cells.push_back({r_mid, 1});
256
+ human.assigned_wall_cells.push_back({r_mid, GRID_SIZE});
257
+ }
258
+
259
+ std::sort(human.assigned_wall_cells.begin(), human.assigned_wall_cells.end());
260
+ human.assigned_wall_cells.erase(
261
+ std::unique(human.assigned_wall_cells.begin(), human.assigned_wall_cells.end()),
262
+ human.assigned_wall_cells.end()
263
+ );
264
+ current_r_start_coord = human.strip_r_end + 1;
265
+ }
266
+ }
267
+
268
+ std::string decide_human_actions() {
269
+ std::string actions_str(M_humans_global, '.');
270
+ std::vector<Point> tentative_walls_this_turn;
271
+ std::vector<Point> tentative_move_targets_this_turn(M_humans_global, INVALID_POINT);
272
+
273
+ for (int i = 0; i < M_humans_global; ++i) {
274
+ HumanInfo& human = humans_global_state[i];
275
+
276
+ int unbuilt_walls_count = 0;
277
+ for (const auto& wall_cell : human.assigned_wall_cells) {
278
+ if (is_valid_point(wall_cell) && !is_impassable_grid_static[wall_cell.r][wall_cell.c]) {
279
+ unbuilt_walls_count++;
280
+ }
281
+ }
282
+
283
+ if (unbuilt_walls_count == 0) {
284
+ human.objective = (human.pos == human.final_stand_pos) ?
285
+ HumanObjective::STAYING_IN_SAFE_SPOT :
286
+ HumanObjective::GOING_TO_SAFE_SPOT;
287
+ } else {
288
+ human.objective = HumanObjective::BUILDING_WALLS;
289
+ }
290
+
291
+ if(human.objective == HumanObjective::BUILDING_WALLS && human.turns_stuck_building >= MAX_STUCK_TURNS) {
292
+ human.objective = HumanObjective::REPOSITIONING_STUCK;
293
+ }
294
+
295
+ char chosen_action_for_human_i = '.';
296
+ if (human.objective == HumanObjective::STAYING_IN_SAFE_SPOT) {
297
+ chosen_action_for_human_i = '.';
298
+ } else if (human.objective == HumanObjective::GOING_TO_SAFE_SPOT ||
299
+ human.objective == HumanObjective::REPOSITIONING_STUCK) {
300
+ if(human.objective == HumanObjective::REPOSITIONING_STUCK) human.turns_stuck_building = 0;
301
+
302
+ chosen_action_for_human_i = get_bfs_move_char(human.pos, human.final_stand_pos, tentative_walls_this_turn);
303
+
304
+ } else if (human.objective == HumanObjective::BUILDING_WALLS) {
305
+ Point best_wall_target = INVALID_POINT;
306
+ Point best_stand_point = INVALID_POINT;
307
+ int min_eval_score = INF;
308
+
309
+ for (const auto& wall_coord : human.assigned_wall_cells) {
310
+ if (!is_valid_point(wall_coord) || is_impassable_grid_static[wall_coord.r][wall_coord.c]) continue;
311
+ if (!can_theoretically_build_at(wall_coord, human.id)) continue;
312
+
313
+ int adj_wall_bonus_val = count_adjacent_walls_or_boundaries(wall_coord) * ADJACENT_WALL_PRIORITY_BONUS;
314
+ int current_near_pet_penalty = 0; // NEAR_PET_PENALTY_POINTS_PER_PET is 0
315
+
316
+ for (int k_dir_idx = 0; k_dir_idx < 4; ++k_dir_idx) {
317
+ Point potential_stand_pos = {wall_coord.r + DIRS[k_dir_idx].r,
318
+ wall_coord.c + DIRS[k_dir_idx].c};
319
+
320
+ if (!is_valid_point(potential_stand_pos) || is_impassable_grid_static[potential_stand_pos.r][potential_stand_pos.c]) continue;
321
+
322
+ bool conflict_with_tentative_wall_build_spot = false;
323
+ for(const auto& tw : tentative_walls_this_turn) { if(potential_stand_pos == tw) { conflict_with_tentative_wall_build_spot = true; break; }}
324
+ if(conflict_with_tentative_wall_build_spot) continue;
325
+
326
+ bool conflict_with_tentative_move_dest = false;
327
+ for(int j=0; j < i; ++j) {
328
+ if (tentative_move_targets_this_turn[j] == potential_stand_pos) { conflict_with_tentative_move_dest = true; break; }
329
+ }
330
+ if (conflict_with_tentative_move_dest) continue;
331
+
332
+ int current_dist_to_stand = manhattan_distance(human.pos, potential_stand_pos);
333
+ int current_eval_score = current_dist_to_stand - adj_wall_bonus_val + current_near_pet_penalty;
334
+
335
+ bool is_inside_inner_safe_region =
336
+ (potential_stand_pos.r >= human.inner_safe_ul.r &&
337
+ potential_stand_pos.r <= human.inner_safe_br.r &&
338
+ potential_stand_pos.c >= human.inner_safe_ul.c &&
339
+ potential_stand_pos.c <= human.inner_safe_br.c);
340
+
341
+ if (!is_inside_inner_safe_region) {
342
+ current_eval_score += STAND_OUTSIDE_INNER_SAFE_PENALTY;
343
+ }
344
+
345
+ if (current_eval_score < min_eval_score) {
346
+ min_eval_score = current_eval_score;
347
+ best_wall_target = wall_coord;
348
+ best_stand_point = potential_stand_pos;
349
+ } else if (current_eval_score == min_eval_score) {
350
+ if (best_wall_target.r == -1 ||
351
+ wall_coord < best_wall_target ||
352
+ (wall_coord == best_wall_target && potential_stand_pos < best_stand_point)) {
353
+ best_wall_target = wall_coord;
354
+ best_stand_point = potential_stand_pos;
355
+ }
356
+ }
357
+ }
358
+ }
359
+
360
+ if (best_wall_target.r != -1) {
361
+ human.turns_stuck_building = 0;
362
+ if (human.pos == best_stand_point) {
363
+ for(int k_dir=0; k_dir<4; ++k_dir){
364
+ if(human.pos.r + DIRS[k_dir].r == best_wall_target.r &&
365
+ human.pos.c + DIRS[k_dir].c == best_wall_target.c){
366
+ chosen_action_for_human_i = DIR_CHARS_BUILD[k_dir];
367
+ break;
368
+ }
369
+ }
370
+ } else {
371
+ chosen_action_for_human_i = get_bfs_move_char(human.pos, best_stand_point, tentative_walls_this_turn);
372
+ }
373
+ } else {
374
+ if (unbuilt_walls_count > 0) {
375
+ human.turns_stuck_building++;
376
+ }
377
+ if (human.pos != human.final_stand_pos) {
378
+ chosen_action_for_human_i = get_bfs_move_char(human.pos, human.final_stand_pos, tentative_walls_this_turn);
379
+ } else {
380
+ chosen_action_for_human_i = '.';
381
+ }
382
+ }
383
+ }
384
+
385
+ actions_str[i] = chosen_action_for_human_i;
386
+
387
+ if (chosen_action_for_human_i != '.' && (chosen_action_for_human_i == 'u' || chosen_action_for_human_i == 'd' || chosen_action_for_human_i == 'l' || chosen_action_for_human_i == 'r')) {
388
+ for(int k_dir=0; k_dir<4; ++k_dir) {
389
+ if (chosen_action_for_human_i == DIR_CHARS_BUILD[k_dir]) {
390
+ Point built_wall_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
391
+ if (is_valid_point(built_wall_pos)) {
392
+ tentative_walls_this_turn.push_back(built_wall_pos);
393
+ }
394
+ break;
395
+ }
396
+ }
397
+ } else if (chosen_action_for_human_i != '.' && (chosen_action_for_human_i == 'U' || chosen_action_for_human_i == 'D' || chosen_action_for_human_i == 'L' || chosen_action_for_human_i == 'R')) {
398
+ for(int k_dir=0; k_dir<4; ++k_dir) {
399
+ if (chosen_action_for_human_i == DIR_CHARS_MOVE[k_dir]) {
400
+ Point target_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
401
+ if (is_valid_point(target_pos)) {
402
+ tentative_move_targets_this_turn[i] = target_pos;
403
+ } else {
404
+ actions_str[i] = '.';
405
+ }
406
+ break;
407
+ }
408
+ }
409
+ }
410
+ }
411
+
412
+ for (int i = 0; i < M_humans_global; ++i) {
413
+ if (actions_str[i] != '.' && (actions_str[i] == 'U' || actions_str[i] == 'D' || actions_str[i] == 'L' || actions_str[i] == 'R')) {
414
+ Point target_move_sq = tentative_move_targets_this_turn[i];
415
+ if (target_move_sq.r == -1) {
416
+ actions_str[i] = '.';
417
+ continue;
418
+ }
419
+
420
+ bool conflict_with_wall = false;
421
+ for (const auto& wall_being_built : tentative_walls_this_turn) {
422
+ if (target_move_sq == wall_being_built) {
423
+ conflict_with_wall = true;
424
+ break;
425
+ }
426
+ }
427
+ if (conflict_with_wall) {
428
+ actions_str[i] = '.';
429
+ } else {
430
+ for (int j = 0; j < i; ++j) {
431
+ if (actions_str[j] != '.' && (actions_str[j] == 'U' || actions_str[j] == 'D' || actions_str[j] == 'L' || actions_str[j] == 'R') &&
432
+ tentative_move_targets_this_turn[j] == target_move_sq) {
433
+ actions_str[i] = '.';
434
+ break;
435
+ }
436
+ }
437
+ }
438
+ }
439
+ }
440
+ return actions_str;
441
+ }
442
+
443
+ void apply_actions_and_update_state(const std::string& actions_str_final) {
444
+ for (int i = 0; i < M_humans_global; ++i) {
445
+ char action = actions_str_final[i];
446
+ HumanInfo& human = humans_global_state[i];
447
+ if (action != '.' && (action == 'u' || action == 'd' || action == 'l' || action == 'r')) {
448
+ for(int k_dir=0; k_dir<4; ++k_dir){
449
+ if (action == DIR_CHARS_BUILD[k_dir]) {
450
+ Point wall_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
451
+ if (is_valid_point(wall_pos) && !is_impassable_grid_static[wall_pos.r][wall_pos.c]) {
452
+ is_impassable_grid_static[wall_pos.r][wall_pos.c] = true;
453
+ }
454
+ break;
455
+ }
456
+ }
457
+ }
458
+ }
459
+
460
+ for (int i = 0; i < M_humans_global; ++i) {
461
+ char action = actions_str_final[i];
462
+ HumanInfo& human = humans_global_state[i];
463
+ if (action != '.' && (action == 'U' || action == 'D' || action == 'L' || action == 'R')) {
464
+ for(int k_dir=0; k_dir<4; ++k_dir){
465
+ if (action == DIR_CHARS_MOVE[k_dir]) {
466
+ Point next_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
467
+ if (is_valid_point(next_pos) && !is_impassable_grid_static[next_pos.r][next_pos.c]) {
468
+ human.pos = next_pos;
469
+ }
470
+ break;
471
+ }
472
+ }
473
+ }
474
+ }
475
+
476
+ for (int i = 0; i < N_pets_global; ++i) {
477
+ std::string pet_moves_str;
478
+ std::cin >> pet_moves_str;
479
+ if (pet_moves_str == ".") continue;
480
+
481
+ for (char move_char : pet_moves_str) {
482
+ for(int k_dir=0; k_dir<4; ++k_dir){
483
+ if(move_char == PET_MOVE_CHARS[k_dir]){
484
+ pets_global_state[i].pos.r += DIRS[k_dir].r;
485
+ pets_global_state[i].pos.c += DIRS[k_dir].c;
486
+ break;
487
+ }
488
+ }
489
+ }
490
+ }
491
+ }
492
+
493
+ int main() {
494
+ std::ios_base::sync_with_stdio(false);
495
+ std::cin.tie(NULL);
496
+
497
+ initialize_game();
498
+
499
+ for (int turn_idx = 0; turn_idx < NUM_TURNS; ++turn_idx) {
500
+ std::string actions_to_perform = decide_human_actions();
501
+ std::cout << actions_to_perform << std::endl;
502
+
503
+ apply_actions_and_update_state(actions_to_perform);
504
+ }
505
+
506
+ return 0;
507
+ }
508
+ # EVOLVE-BLOCK-END
benchmarks/ale_bench/ale-bench-lite-problems/ahc011/best_program.cpp ADDED
@@ -0,0 +1,730 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EVOLVE-BLOCK-START
2
+ #include <iostream>
3
+ #include <vector>
4
+ #include <string>
5
+ #include <array>
6
+ #include <algorithm>
7
+ #include <unordered_map>
8
+ #include <map> // For A* visited set
9
+ #include <iomanip>
10
+ #include <chrono>
11
+ #include <functional> // For std::hash
12
+ #include <cmath> // For std::round
13
+ #include <random> // For std::mt19937
14
+ #include <numeric> // For std::iota
15
+ #include <queue> // For A* search (priority_queue)
16
+
17
+ // Constants for tile connections
18
+ const int LEFT_MASK = 1;
19
+ const int UP_MASK = 2;
20
+ const int RIGHT_MASK = 4;
21
+ const int DOWN_MASK = 8;
22
+
23
+ // Max N value, actual N read from input
24
+ const int N_MAX_CONST = 10;
25
+ int N_actual; // Actual N for the current test case
26
+ int T_param; // Actual T for the current test case
27
+
28
+ const int DR_TILE_RELATIVE_TO_EMPTY[] = {-1, 1, 0, 0};
29
+ const int DC_TILE_RELATIVE_TO_EMPTY[] = {0, 0, -1, 1};
30
+ const char MOVE_CHARS[] = {'U', 'D', 'L', 'R'};
31
+
32
+
33
+ std::mt19937 zobrist_rng_engine(123456789);
34
+ std::uniform_int_distribution<uint64_t> distrib_uint64;
35
+ uint64_t zobrist_tile_keys[N_MAX_CONST][N_MAX_CONST][16];
36
+
37
+ // Fast hex char -> int lookup
38
+ int CHAR_TO_VAL[256];
39
+ inline void init_char_to_val() {
40
+ for (int i = 0; i < 256; ++i) CHAR_TO_VAL[i] = 0;
41
+ for (int d = 0; d <= 9; ++d) CHAR_TO_VAL['0' + d] = d;
42
+ for (int d = 0; d < 6; ++d) {
43
+ CHAR_TO_VAL['a' + d] = 10 + d;
44
+ CHAR_TO_VAL['A' + d] = 10 + d;
45
+ }
46
+ }
47
+
48
+
49
+ void init_zobrist_keys() {
50
+ for (int i = 0; i < N_actual; ++i) {
51
+ for (int j = 0; j < N_actual; ++j) {
52
+ for (int k = 0; k < 16; ++k) {
53
+ zobrist_tile_keys[i][j][k] = distrib_uint64(zobrist_rng_engine);
54
+ }
55
+ }
56
+ }
57
+ }
58
+
59
+ int hex_char_to_int(char c) {
60
+ if (c >= '0' && c <= '9') return c - '0';
61
+ return c - 'a' + 10;
62
+ }
63
+
64
+
65
+ struct Board {
66
+ std::array<std::array<char, N_MAX_CONST>, N_MAX_CONST> tiles;
67
+ int empty_r, empty_c;
68
+ uint64_t zobrist_hash_value;
69
+
70
+ Board() : empty_r(0), empty_c(0), zobrist_hash_value(0) {}
71
+
72
+ void calculate_initial_hash() {
73
+ zobrist_hash_value = 0;
74
+ for (int i = 0; i < N_actual; ++i) {
75
+ for (int j = 0; j < N_actual; ++j) {
76
+ zobrist_hash_value ^= zobrist_tile_keys[i][j][CHAR_TO_VAL[(unsigned char)tiles[i][j]]];
77
+ }
78
+ }
79
+ }
80
+
81
+ void update_hash_after_move(int pos_tile_becomes_empty_r, int pos_tile_becomes_empty_c,
82
+ int pos_empty_gets_tile_r, int pos_empty_gets_tile_c) {
83
+ int moved_tile_val_int = hex_char_to_int(tiles[pos_empty_gets_tile_r][pos_empty_gets_tile_c]);
84
+
85
+ zobrist_hash_value ^= zobrist_tile_keys[pos_tile_becomes_empty_r][pos_tile_becomes_empty_c][moved_tile_val_int];
86
+ zobrist_hash_value ^= zobrist_tile_keys[pos_empty_gets_tile_r][pos_empty_gets_tile_c][0];
87
+
88
+ zobrist_hash_value ^= zobrist_tile_keys[pos_tile_becomes_empty_r][pos_tile_becomes_empty_c][0];
89
+ zobrist_hash_value ^= zobrist_tile_keys[pos_empty_gets_tile_r][pos_empty_gets_tile_c][moved_tile_val_int];
90
+ }
91
+
92
+ bool apply_move_char(char move_char) {
93
+ int move_dir_idx = -1;
94
+ for(int i=0; i<4; ++i) if(MOVE_CHARS[i] == move_char) move_dir_idx = i;
95
+
96
+ if(move_dir_idx == -1) return false;
97
+
98
+ int tile_to_move_r = empty_r + DR_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
99
+ int tile_to_move_c = empty_c + DC_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
100
+
101
+ if (tile_to_move_r < 0 || tile_to_move_r >= N_actual || tile_to_move_c < 0 || tile_to_move_c >= N_actual) {
102
+ return false;
103
+ }
104
+
105
+ char moved_tile_hex_val = tiles[tile_to_move_r][tile_to_move_c];
106
+ tiles[empty_r][empty_c] = moved_tile_hex_val;
107
+ tiles[tile_to_move_r][tile_to_move_c] = '0';
108
+
109
+ update_hash_after_move(tile_to_move_r, tile_to_move_c, empty_r, empty_c);
110
+
111
+ empty_r = tile_to_move_r;
112
+ empty_c = tile_to_move_c;
113
+ return true;
114
+ }
115
+ };
116
+
117
+
118
+ struct ScoreComponents {
119
+ int max_tree_size;
120
+ int num_components;
121
+ };
122
+ std::unordered_map<uint64_t, ScoreComponents> s_value_cache_by_hash;
123
+ const size_t MAX_SCORE_CACHE_SIZE_CONST = 2000000;
124
+
125
+ struct DSU {
126
+ std::vector<int> parent;
127
+ std::vector<int> nodes_in_set;
128
+ std::vector<int> edges_in_set;
129
+ int N_sq_total_cells;
130
+
131
+ DSU(int current_N) : N_sq_total_cells(current_N * current_N) {
132
+ parent.resize(N_sq_total_cells);
133
+ std::iota(parent.begin(), parent.end(), 0);
134
+ nodes_in_set.assign(N_sq_total_cells, 0);
135
+ edges_in_set.assign(N_sq_total_cells, 0);
136
+ }
137
+
138
+ int find(int i) {
139
+ if (parent[i] == i)
140
+ return i;
141
+ return parent[i] = find(parent[i]);
142
+ }
143
+
144
+ void unite(int i_idx, int j_idx) {
145
+ int root_i = find(i_idx);
146
+ int root_j = find(j_idx);
147
+
148
+ if (nodes_in_set[root_i] < nodes_in_set[root_j]) std::swap(root_i, root_j);
149
+
150
+ parent[root_j] = root_i;
151
+ nodes_in_set[root_i] += nodes_in_set[root_j];
152
+ edges_in_set[root_i] += edges_in_set[root_j];
153
+ }
154
+
155
+ void add_edge(int u_idx, int v_idx) {
156
+ int root_u = find(u_idx);
157
+ int root_v = find(v_idx);
158
+ if (root_u != root_v) {
159
+ unite(u_idx, v_idx);
160
+ edges_in_set[find(u_idx)]++;
161
+ } else {
162
+ edges_in_set[root_u]++;
163
+ }
164
+ }
165
+ };
166
+
167
+
168
+ ScoreComponents calculate_scores(const Board& board) {
169
+ auto it_cache = s_value_cache_by_hash.find(board.zobrist_hash_value);
170
+ if (it_cache != s_value_cache_by_hash.end()) {
171
+ return it_cache->second;
172
+ }
173
+
174
+ DSU dsu(N_actual);
175
+
176
+ for (int r = 0; r < N_actual; ++r) {
177
+ for (int c = 0; c < N_actual; ++c) {
178
+ int cell_idx = r * N_actual + c;
179
+ if (board.tiles[r][c] != '0') {
180
+ dsu.nodes_in_set[cell_idx] = 1;
181
+ } else {
182
+ dsu.nodes_in_set[cell_idx] = 0;
183
+ }
184
+ }
185
+ }
186
+
187
+ for (int r = 0; r < N_actual; ++r) {
188
+ for (int c = 0; c < N_actual - 1; ++c) {
189
+ int tile1_val = CHAR_TO_VAL[(unsigned char)board.tiles[r][c]];
190
+ int tile2_val = CHAR_TO_VAL[(unsigned char)board.tiles[r][c+1]];
191
+ if (tile1_val && tile2_val && (tile1_val & RIGHT_MASK) && (tile2_val & LEFT_MASK)) {
192
+ dsu.add_edge(r * N_actual + c, r * N_actual + (c + 1));
193
+ }
194
+ }
195
+ }
196
+ for (int r = 0; r < N_actual - 1; ++r) {
197
+ for (int c = 0; c < N_actual; ++c) {
198
+ int tile1_val = CHAR_TO_VAL[(unsigned char)board.tiles[r][c]];
199
+ int tile2_val = CHAR_TO_VAL[(unsigned char)board.tiles[r+1][c]];
200
+ if (tile1_val && tile2_val && (tile1_val & DOWN_MASK) && (tile2_val & UP_MASK)) {
201
+ dsu.add_edge(r * N_actual + c, (r + 1) * N_actual + c);
202
+ }
203
+ }
204
+ }
205
+
206
+ int max_tree_size = 0;
207
+ int total_num_components = 0;
208
+
209
+ for (int i = 0; i < dsu.N_sq_total_cells; ++i) {
210
+ if (dsu.parent[i] == i && dsu.nodes_in_set[i] > 0) {
211
+ total_num_components++;
212
+ if (dsu.edges_in_set[i] == dsu.nodes_in_set[i] - 1) {
213
+ if (dsu.nodes_in_set[i] > max_tree_size) {
214
+ max_tree_size = dsu.nodes_in_set[i];
215
+ }
216
+ }
217
+ }
218
+ }
219
+
220
+ ScoreComponents result = {max_tree_size, total_num_components};
221
+ if (s_value_cache_by_hash.size() < MAX_SCORE_CACHE_SIZE_CONST) {
222
+ s_value_cache_by_hash[board.zobrist_hash_value] = result;
223
+ }
224
+ return result;
225
+ }
226
+
227
+
228
+ int TARGET_EMPTY_R_GLOBAL_FOR_A_STAR, TARGET_EMPTY_C_GLOBAL_FOR_A_STAR; // Used by A* heuristic
229
+ bool A_STAR_PHASE_WAS_RUN = false; // Flag to adjust beam score empty penalty
230
+
231
+ double calculate_beam_score(const ScoreComponents& scores, int K_total, const Board& current_board_state) {
232
+ int S = scores.max_tree_size;
233
+
234
+ const double FULL_TREE_BASE_SCORE = 1e18;
235
+ if (S == N_actual * N_actual - 1) {
236
+ return FULL_TREE_BASE_SCORE + (double)(T_param * 2 - K_total);
237
+ }
238
+
239
+ double W_S = 1e9;
240
+ double W_NC = W_S * 0.6; // Slightly reduce component penalty to favor growing S faster.
241
+ double W_K = 1.0;
242
+ double W_empty_dist_penalty_main;
243
+
244
+ if (A_STAR_PHASE_WAS_RUN) { // A* moved empty to target initially
245
+ W_empty_dist_penalty_main = W_K * 0.5; // Very low penalty, allow free movement
246
+ } else { // Empty started at target, or A* failed (should not happen)
247
+ W_empty_dist_penalty_main = W_K * 10.0; // Moderate penalty
248
+ }
249
+
250
+ double score_val = (double)S * W_S;
251
+ if (scores.num_components > 1) {
252
+ score_val -= (double)(scores.num_components - 1) * W_NC;
253
+ } else if (scores.num_components == 0 && N_actual * N_actual - 1 > 0) {
254
+ score_val -= (double)(N_actual * N_actual -1) * W_NC;
255
+ }
256
+
257
+ // Bonus for being very close to a full tree and connected
258
+ if (S >= (N_actual * N_actual - 1) - 2 && scores.num_components == 1 && S < N_actual * N_actual - 1) {
259
+ score_val += W_S * 0.5; // Significant bonus to encourage the last step
260
+ }
261
+
262
+ score_val -= (double)K_total * W_K;
263
+
264
+ // Penalty for empty square relative to (N-1,N-1)
265
+ int dist_empty_to_corner = std::abs(current_board_state.empty_r - (N_actual - 1)) +
266
+ std::abs(current_board_state.empty_c - (N_actual - 1));
267
+ score_val -= dist_empty_to_corner * W_empty_dist_penalty_main;
268
+
269
+ return score_val;
270
+ }
271
+
272
+ double calculate_actual_score(int S, int K_total) {
273
+ if (N_actual * N_actual - 1 == 0) return 0;
274
+ if (S == N_actual * N_actual - 1) {
275
+ if (K_total > T_param) return 0;
276
+ return std::round(500000.0 * (2.0 - (double)K_total / T_param));
277
+ } else {
278
+ return std::round(500000.0 * (double)S / (N_actual * N_actual - 1.0));
279
+ }
280
+ }
281
+
282
+ /* Function: count_matched_edge_pair
283
+ Doc: Returns 1 if two adjacent cells form a valid connection (L-R or U-D), else 0.
284
+ Assumes (r1,c1) and (r2,c2) differ by exactly 1 in Manhattan distance.
285
+ */
286
+ inline int count_matched_edge_pair(const Board& b, int r1, int c1, int r2, int c2) {
287
+ if (r1 == r2) {
288
+ if (c1 > c2) std::swap(c1, c2);
289
+ if (c2 != c1 + 1) return 0;
290
+ int v1 = CHAR_TO_VAL[(unsigned char)b.tiles[r1][c1]];
291
+ int v2 = CHAR_TO_VAL[(unsigned char)b.tiles[r2][c2]];
292
+ if (!v1 || !v2) return 0;
293
+ return ((v1 & RIGHT_MASK) && (v2 & LEFT_MASK)) ? 1 : 0;
294
+ } else if (c1 == c2) {
295
+ if (r1 > r2) std::swap(r1, r2);
296
+ if (r2 != r1 + 1) return 0;
297
+ int v1 = CHAR_TO_VAL[(unsigned char)b.tiles[r1][c1]];
298
+ int v2 = CHAR_TO_VAL[(unsigned char)b.tiles[r2][c2]];
299
+ if (!v1 || !v2) return 0;
300
+ return ((v1 & DOWN_MASK) && (v2 & UP_MASK)) ? 1 : 0;
301
+ }
302
+ return 0;
303
+ }
304
+
305
+ /* Function: count_cell_matched_degree
306
+ Doc: Counts the number of matched edges incident to a given cell (r,c).
307
+ */
308
+ inline int count_cell_matched_degree(const Board& b, int r, int c) {
309
+ int deg = 0;
310
+ if (r > 0) deg += count_matched_edge_pair(b, r - 1, c, r, c);
311
+ if (r + 1 < N_actual) deg += count_matched_edge_pair(b, r, c, r + 1, c);
312
+ if (c > 0) deg += count_matched_edge_pair(b, r, c - 1, r, c);
313
+ if (c + 1 < N_actual) deg += count_matched_edge_pair(b, r, c, r, c + 1);
314
+ return deg;
315
+ }
316
+
317
+ /* Function: compute_total_matched_edges
318
+ Doc: Counts all matched undirected edges on the board by scanning right and down neighbors.
319
+ */
320
+ inline int compute_total_matched_edges(const Board& b) {
321
+ int cnt = 0;
322
+ for (int r = 0; r < N_actual; ++r) {
323
+ for (int c = 0; c + 1 < N_actual; ++c) {
324
+ cnt += count_matched_edge_pair(b, r, c, r, c + 1);
325
+ }
326
+ }
327
+ for (int r = 0; r + 1 < N_actual; ++r) {
328
+ for (int c = 0; c < N_actual; ++c) {
329
+ cnt += count_matched_edge_pair(b, r, c, r + 1, c);
330
+ }
331
+ }
332
+ return cnt;
333
+ }
334
+
335
+ struct BeamHistoryEntry {
336
+ int parent_history_idx;
337
+ char move_char_taken;
338
+ };
339
+ std::vector<BeamHistoryEntry> beam_history_storage;
340
+ const size_t MAX_BEAM_HISTORY_STORAGE_SIZE_CONST = 3000000;
341
+
342
+ struct BeamState {
343
+ Board board;
344
+ double beam_score_val;
345
+ int k_beam_moves;
346
+ int history_idx;
347
+ int prev_move_direction_idx;
348
+ int approx_edges; // heuristic: number of matched undirected edges
349
+
350
+ bool operator<(const BeamState& other) const {
351
+ if (beam_score_val != other.beam_score_val) return beam_score_val > other.beam_score_val;
352
+ return approx_edges > other.approx_edges;
353
+ }
354
+ };
355
+
356
+ struct CandidateLight {
357
+ // Doc: Lightweight candidate used to pre-filter by approximate edge count before expensive scoring.
358
+ Board board;
359
+ int approx_edges;
360
+ int k_beam_moves;
361
+ int history_idx;
362
+ int prev_move_direction_idx;
363
+ bool operator<(const CandidateLight& other) const {
364
+ return approx_edges > other.approx_edges; // sort descending by approx_edges
365
+ }
366
+ };
367
+
368
+ std::chrono::steady_clock::time_point T_START_CHRONO_MAIN;
369
+ const int TIME_LIMIT_MS_SLACK_CONST = 400; // Universal slack
370
+ long long TIME_LIMIT_MS_EFFECTIVE_MAIN;
371
+
372
+
373
+ std::mt19937 rng_stochastic_selection_main;
374
+ std::unordered_map<uint64_t, int> min_K_to_reach_by_hash_main;
375
+ const size_t MAX_MIN_K_CACHE_SIZE_CONST = 2000000;
376
+
377
+
378
+ struct AStarEmptyState {
379
+ int r, c;
380
+ int g_cost;
381
+ std::string path;
382
+
383
+ bool operator>(const AStarEmptyState& other) const {
384
+ int h_cost_this = std::abs(r - TARGET_EMPTY_R_GLOBAL_FOR_A_STAR) + std::abs(c - TARGET_EMPTY_C_GLOBAL_FOR_A_STAR);
385
+ int h_cost_other = std::abs(other.r - TARGET_EMPTY_R_GLOBAL_FOR_A_STAR) + std::abs(other.c - TARGET_EMPTY_C_GLOBAL_FOR_A_STAR);
386
+ if (g_cost + h_cost_this != other.g_cost + h_cost_other) {
387
+ return g_cost + h_cost_this > other.g_cost + h_cost_other;
388
+ }
389
+ return g_cost > other.g_cost;
390
+ }
391
+ };
392
+
393
+ std::string find_path_for_empty(const Board& initial_board_state_for_A_star, int target_r, int target_c) {
394
+ TARGET_EMPTY_R_GLOBAL_FOR_A_STAR = target_r;
395
+ TARGET_EMPTY_C_GLOBAL_FOR_A_STAR = target_c;
396
+
397
+ std::priority_queue<AStarEmptyState, std::vector<AStarEmptyState>, std::greater<AStarEmptyState>> pq;
398
+ std::vector<std::vector<int>> min_g_cost_grid(N_actual, std::vector<int>(N_actual, T_param + 1));
399
+
400
+ pq.push({initial_board_state_for_A_star.empty_r, initial_board_state_for_A_star.empty_c, 0, ""});
401
+ min_g_cost_grid[initial_board_state_for_A_star.empty_r][initial_board_state_for_A_star.empty_c] = 0;
402
+
403
+ int A_star_max_depth = N_actual * N_actual * 2; // Allow more depth just in case
404
+
405
+ while(!pq.empty()){
406
+ AStarEmptyState current = pq.top();
407
+ pq.pop();
408
+
409
+ if (current.g_cost > min_g_cost_grid[current.r][current.c]) {
410
+ continue;
411
+ }
412
+
413
+ if (current.r == target_r && current.c == target_c) {
414
+ return current.path;
415
+ }
416
+
417
+ if (current.g_cost >= A_star_max_depth) continue;
418
+
419
+ for (int move_idx = 0; move_idx < 4; ++move_idx) {
420
+ int tile_that_moves_r = current.r + DR_TILE_RELATIVE_TO_EMPTY[move_idx];
421
+ int tile_that_moves_c = current.c + DC_TILE_RELATIVE_TO_EMPTY[move_idx];
422
+
423
+ if (tile_that_moves_r < 0 || tile_that_moves_r >= N_actual || tile_that_moves_c < 0 || tile_that_moves_c >= N_actual) {
424
+ continue;
425
+ }
426
+
427
+ int next_empty_r = tile_that_moves_r;
428
+ int next_empty_c = tile_that_moves_c;
429
+
430
+ int next_g_cost = current.g_cost + 1;
431
+
432
+ if (min_g_cost_grid[next_empty_r][next_empty_c] <= next_g_cost) {
433
+ continue;
434
+ }
435
+ min_g_cost_grid[next_empty_r][next_empty_c] = next_g_cost;
436
+ pq.push({next_empty_r, next_empty_c, next_g_cost, current.path + MOVE_CHARS[move_idx]});
437
+ }
438
+ }
439
+ return "";
440
+ }
441
+
442
+ std::string reconstruct_beam_path(int final_history_idx) {
443
+ std::string path_str = "";
444
+ int current_trace_hist_idx = final_history_idx;
445
+ while(current_trace_hist_idx > 0 &&
446
+ static_cast<size_t>(current_trace_hist_idx) < beam_history_storage.size() &&
447
+ beam_history_storage[current_trace_hist_idx].parent_history_idx != -1) {
448
+ path_str += beam_history_storage[current_trace_hist_idx].move_char_taken;
449
+ current_trace_hist_idx = beam_history_storage[current_trace_hist_idx].parent_history_idx;
450
+ }
451
+ std::reverse(path_str.begin(), path_str.end());
452
+ return path_str;
453
+ }
454
+
455
+
456
+ int main(int /*argc*/, char** /*argv*/) {
457
+ std::ios_base::sync_with_stdio(false);
458
+ std::cin.tie(NULL);
459
+
460
+ unsigned int random_seed_stochastic = std::chrono::steady_clock::now().time_since_epoch().count();
461
+ rng_stochastic_selection_main.seed(random_seed_stochastic);
462
+
463
+ T_START_CHRONO_MAIN = std::chrono::steady_clock::now();
464
+
465
+ std::cin >> N_actual >> T_param;
466
+ init_char_to_val();
467
+
468
+ init_zobrist_keys();
469
+
470
+ Board current_board_obj;
471
+ for (int i = 0; i < N_actual; ++i) {
472
+ std::string row_str;
473
+ std::cin >> row_str;
474
+ for (int j = 0; j < N_actual; ++j) {
475
+ current_board_obj.tiles[i][j] = row_str[j];
476
+ if (current_board_obj.tiles[i][j] == '0') {
477
+ current_board_obj.empty_r = i;
478
+ current_board_obj.empty_c = j;
479
+ }
480
+ }
481
+ }
482
+ current_board_obj.calculate_initial_hash();
483
+
484
+ std::string initial_empty_moves_path = "";
485
+ // Try routing empty to each corner and pick the one that maximizes our beam score after routing.
486
+ {
487
+ const int cr[4] = {0, 0, N_actual - 1, N_actual - 1};
488
+ const int cc[4] = {0, N_actual - 1, 0, N_actual - 1};
489
+ double best_score = -1e300;
490
+ std::string best_path;
491
+ for (int i = 0; i < 4; ++i) {
492
+ std::string path = find_path_for_empty(current_board_obj, cr[i], cc[i]);
493
+ Board tmp = current_board_obj;
494
+ for (char ch : path) tmp.apply_move_char(ch);
495
+ ScoreComponents sc = calculate_scores(tmp);
496
+ A_STAR_PHASE_WAS_RUN = true; // relax empty-distance penalty after guided routing
497
+ double scv = calculate_beam_score(sc, (int)path.length(), tmp);
498
+ if (scv > best_score) { best_score = scv; best_path = path; }
499
+ }
500
+ initial_empty_moves_path = best_path;
501
+ }
502
+ for (char move_char : initial_empty_moves_path) {
503
+ current_board_obj.apply_move_char(move_char);
504
+ }
505
+ int K_initial_empty_moves = (int)initial_empty_moves_path.length();
506
+
507
+ // Adaptive time limit after A*
508
+ auto time_after_astar = std::chrono::steady_clock::now();
509
+ long long elapsed_astar_ms = std::chrono::duration_cast<std::chrono::milliseconds>(time_after_astar - T_START_CHRONO_MAIN).count();
510
+ TIME_LIMIT_MS_EFFECTIVE_MAIN = 2950 - elapsed_astar_ms - TIME_LIMIT_MS_SLACK_CONST;
511
+
512
+
513
+ // Reserve caches (still used by evaluation in MCTS)
514
+ beam_history_storage.reserve(MAX_BEAM_HISTORY_STORAGE_SIZE_CONST);
515
+ s_value_cache_by_hash.reserve(MAX_SCORE_CACHE_SIZE_CONST);
516
+ min_K_to_reach_by_hash_main.reserve(MAX_MIN_K_CACHE_SIZE_CONST);
517
+
518
+ // Initialize best known based on current board (after optional A* to corner)
519
+ ScoreComponents init_score_comp = calculate_scores(current_board_obj);
520
+ double overall_best_actual_score = calculate_actual_score(init_score_comp.max_tree_size, K_initial_empty_moves);
521
+ std::string overall_best_path_str = initial_empty_moves_path;
522
+
523
+ // -------------------------
524
+ // BEAM SEARCH (restored, time-bounded)
525
+ // -------------------------
526
+ // Doc: Deterministic beam search with:
527
+ // - Zobrist-based visited table storing minimal K to reach a hash
528
+ // - Strong primary score on largest tree size, penalties on #components and move count
529
+ // - Tiebreaker using local matched-edge heuristic around the moved tile and the previous empty
530
+ // - Elite retention + stochastic sampling for diversity
531
+ // - Stops on time/memory budget or when T is exhausted
532
+ std::vector<BeamState> current_beam;
533
+
534
+ ScoreComponents initial_scores_for_beam = calculate_scores(current_board_obj);
535
+ double initial_beam_eval_score = calculate_beam_score(initial_scores_for_beam, K_initial_empty_moves, current_board_obj);
536
+
537
+ beam_history_storage.push_back({-1, ' '}); // history idx 0 is sentinel
538
+ current_beam.push_back({current_board_obj, initial_beam_eval_score, 0, 0, -1, compute_total_matched_edges(current_board_obj)});
539
+
540
+ min_K_to_reach_by_hash_main[current_board_obj.zobrist_hash_value] = K_initial_empty_moves;
541
+
542
+ int beam_width;
543
+ float elite_ratio = 0.2f;
544
+ int stochastic_sample_pool_factor = 3;
545
+
546
+ if (N_actual <= 6) { beam_width = 1200;}
547
+ else if (N_actual == 7) { beam_width = 1000;}
548
+ else if (N_actual == 8) { beam_width = 700;}
549
+ else if (N_actual == 9) { beam_width = 400;}
550
+ else { beam_width = 250;}
551
+
552
+ std::vector<BeamState> candidates_pool;
553
+ candidates_pool.reserve(beam_width * 4 + 16);
554
+
555
+ std::vector<BeamState> next_beam_states_temp;
556
+ next_beam_states_temp.reserve(beam_width + 16);
557
+
558
+ std::vector<int> stochastic_selection_indices;
559
+ stochastic_selection_indices.reserve(stochastic_sample_pool_factor * beam_width + 16);
560
+
561
+ int k_iter_count_beam = 0;
562
+
563
+ for (int k_beam_iter = 0; K_initial_empty_moves + k_beam_iter < T_param; ++k_beam_iter) {
564
+ k_iter_count_beam++;
565
+ if (k_iter_count_beam % 10 == 0) {
566
+ long long now_ms = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - T_START_CHRONO_MAIN).count();
567
+ if (now_ms > 2950 - TIME_LIMIT_MS_SLACK_CONST) break;
568
+ }
569
+ if (beam_history_storage.size() >= MAX_BEAM_HISTORY_STORAGE_SIZE_CONST - ((size_t)beam_width * 4 + 128)) {
570
+ break;
571
+ }
572
+
573
+ candidates_pool.clear();
574
+
575
+ bool found_full_this_iter = false;
576
+
577
+ for (const auto& current_state_in_beam : current_beam) {
578
+ Board temp_board_for_moves = current_state_in_beam.board;
579
+
580
+ int parent_k_beam = current_state_in_beam.k_beam_moves;
581
+ int parent_history_idx = current_state_in_beam.history_idx;
582
+ int prev_m_dir_idx = current_state_in_beam.prev_move_direction_idx;
583
+
584
+ for (int move_dir_idx = 0; move_dir_idx < 4; ++move_dir_idx) {
585
+ if (prev_m_dir_idx != -1 && ((prev_m_dir_idx ^ 1) == move_dir_idx)) continue;
586
+
587
+ char current_move_char = MOVE_CHARS[move_dir_idx];
588
+ int original_empty_r = temp_board_for_moves.empty_r;
589
+ int original_empty_c = temp_board_for_moves.empty_c;
590
+ uint64_t original_hash = temp_board_for_moves.zobrist_hash_value;
591
+
592
+ int tile_to_move_r = original_empty_r + DR_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
593
+ int tile_to_move_c = original_empty_c + DC_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
594
+
595
+ if (tile_to_move_r < 0 || tile_to_move_r >= N_actual || tile_to_move_c < 0 || tile_to_move_c >= N_actual) {
596
+ continue;
597
+ }
598
+
599
+ // Inline move for speed (swap chars and update hash/coords)
600
+ char moved_tile_hex_val = temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c];
601
+ temp_board_for_moves.tiles[original_empty_r][original_empty_c] = moved_tile_hex_val;
602
+ temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = '0';
603
+ temp_board_for_moves.empty_r = tile_to_move_r;
604
+ temp_board_for_moves.empty_c = tile_to_move_c;
605
+ temp_board_for_moves.update_hash_after_move(tile_to_move_r, tile_to_move_c, original_empty_r, original_empty_c);
606
+
607
+ int next_k_beam = parent_k_beam + 1;
608
+ int next_K_total = K_initial_empty_moves + next_k_beam;
609
+
610
+ bool already_reached_better = false;
611
+ auto it_map = min_K_to_reach_by_hash_main.find(temp_board_for_moves.zobrist_hash_value);
612
+ if (it_map != min_K_to_reach_by_hash_main.end()) {
613
+ if (it_map->second <= next_K_total) {
614
+ already_reached_better = true;
615
+ } else {
616
+ it_map->second = next_K_total;
617
+ }
618
+ } else {
619
+ if (min_K_to_reach_by_hash_main.size() < MAX_MIN_K_CACHE_SIZE_CONST) {
620
+ min_K_to_reach_by_hash_main[temp_board_for_moves.zobrist_hash_value] = next_K_total;
621
+ }
622
+ }
623
+
624
+ if (already_reached_better) {
625
+ // revert
626
+ temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = moved_tile_hex_val;
627
+ temp_board_for_moves.tiles[original_empty_r][original_empty_c] = '0';
628
+ temp_board_for_moves.empty_r = original_empty_r;
629
+ temp_board_for_moves.empty_c = original_empty_c;
630
+ temp_board_for_moves.zobrist_hash_value = original_hash;
631
+ continue;
632
+ }
633
+
634
+ ScoreComponents next_scores = calculate_scores(temp_board_for_moves);
635
+ if (next_scores.max_tree_size == N_actual * N_actual - 1) found_full_this_iter = true;
636
+ double next_beam_eval_score = calculate_beam_score(next_scores, next_K_total, temp_board_for_moves);
637
+
638
+ beam_history_storage.push_back({parent_history_idx, current_move_char});
639
+ int new_history_idx = (int)beam_history_storage.size() - 1;
640
+
641
+ int approx_local = count_cell_matched_degree(temp_board_for_moves, original_empty_r, original_empty_c)
642
+ + count_cell_matched_degree(temp_board_for_moves, tile_to_move_r, tile_to_move_c);
643
+ candidates_pool.push_back({temp_board_for_moves, next_beam_eval_score, next_k_beam, new_history_idx, move_dir_idx, approx_local});
644
+
645
+ double current_actual_score_val = calculate_actual_score(next_scores.max_tree_size, next_K_total);
646
+ if (current_actual_score_val > overall_best_actual_score) {
647
+ overall_best_actual_score = current_actual_score_val;
648
+ overall_best_path_str = initial_empty_moves_path + reconstruct_beam_path(new_history_idx);
649
+ } else if (current_actual_score_val == overall_best_actual_score) {
650
+ std::string cand = initial_empty_moves_path + reconstruct_beam_path(new_history_idx);
651
+ if (cand.length() < overall_best_path_str.length()) overall_best_path_str = cand;
652
+ }
653
+
654
+ // revert
655
+ temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = moved_tile_hex_val;
656
+ temp_board_for_moves.tiles[original_empty_r][original_empty_c] = '0';
657
+ temp_board_for_moves.empty_r = original_empty_r;
658
+ temp_board_for_moves.empty_c = original_empty_c;
659
+ temp_board_for_moves.zobrist_hash_value = original_hash;
660
+ }
661
+ }
662
+
663
+ if (candidates_pool.empty()) break;
664
+
665
+ if (found_full_this_iter) { break; } // Early exit: earliest full tree yields minimal K in beam
666
+
667
+ std::sort(candidates_pool.begin(), candidates_pool.end());
668
+
669
+ next_beam_states_temp.clear();
670
+ int num_elites = std::min((int)candidates_pool.size(), (int)(beam_width * elite_ratio));
671
+ num_elites = std::max(0, num_elites);
672
+
673
+ for (int i = 0; i < num_elites && i < (int)candidates_pool.size(); ++i) {
674
+ next_beam_states_temp.push_back(candidates_pool[i]);
675
+ }
676
+
677
+ if ((int)next_beam_states_temp.size() < beam_width && (int)candidates_pool.size() > num_elites) {
678
+ stochastic_selection_indices.clear();
679
+ int pool_start_idx = num_elites;
680
+ int pool_end_idx = std::min((int)candidates_pool.size(), num_elites + stochastic_sample_pool_factor * beam_width);
681
+ for (int i = pool_start_idx; i < pool_end_idx; ++i) stochastic_selection_indices.push_back(i);
682
+ if (!stochastic_selection_indices.empty()) {
683
+ std::shuffle(stochastic_selection_indices.begin(), stochastic_selection_indices.end(), rng_stochastic_selection_main);
684
+ }
685
+ for (size_t i = 0; i < stochastic_selection_indices.size() && (int)next_beam_states_temp.size() < beam_width; ++i) {
686
+ next_beam_states_temp.push_back(candidates_pool[stochastic_selection_indices[i]]);
687
+ }
688
+ }
689
+
690
+ current_beam = next_beam_states_temp;
691
+ if (current_beam.empty()) break;
692
+ }
693
+
694
+ // Local refinement: quick greedy hill-climb on the best found solution within remaining time
695
+ auto t_ref_end = T_START_CHRONO_MAIN + std::chrono::milliseconds(2950 - 20);
696
+ Board refine_b = current_board_obj;
697
+ for (char ch : overall_best_path_str) refine_b.apply_move_char(ch);
698
+ int K_now = (int)overall_best_path_str.size();
699
+ ScoreComponents sc_best = calculate_scores(refine_b);
700
+ int edges_best = compute_total_matched_edges(refine_b);
701
+ int last_dir_ref = -1;
702
+ if (!overall_best_path_str.empty()) {
703
+ char lastch = overall_best_path_str.back();
704
+ for (int i = 0; i < 4; ++i) if (MOVE_CHARS[i] == lastch) last_dir_ref = i;
705
+ }
706
+ while (sc_best.max_tree_size < N_actual * N_actual - 1 && K_now < T_param && std::chrono::steady_clock::now() < t_ref_end) {
707
+ int best_mv = -1; int best_S = sc_best.max_tree_size; int best_edges = edges_best;
708
+ // Try all non-backtracking moves and keep the best (lexicographically by S, then edges)
709
+ for (int mv = 0; mv < 4; ++mv) {
710
+ if (last_dir_ref != -1 && (last_dir_ref ^ 1) == mv) continue;
711
+ Board b2 = refine_b;
712
+ if (!b2.apply_move_char(MOVE_CHARS[mv])) continue;
713
+ ScoreComponents sc2 = calculate_scores(b2);
714
+ int e2 = compute_total_matched_edges(b2);
715
+ if (sc2.max_tree_size > best_S || (sc2.max_tree_size == best_S && e2 > best_edges)) {
716
+ best_mv = mv; best_S = sc2.max_tree_size; best_edges = e2;
717
+ }
718
+ }
719
+ if (best_mv == -1) break;
720
+ refine_b.apply_move_char(MOVE_CHARS[best_mv]);
721
+ overall_best_path_str.push_back(MOVE_CHARS[best_mv]);
722
+ sc_best.max_tree_size = best_S;
723
+ edges_best = best_edges;
724
+ last_dir_ref = best_mv;
725
+ ++K_now;
726
+ }
727
+ std::cout << overall_best_path_str << std::endl;
728
+ return 0;
729
+ }
730
+ # EVOLVE-BLOCK-END
benchmarks/ale_bench/ale-bench-lite-problems/ahc015/evaluator.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ from pathlib import Path
3
+ from ale_bench.result import CaseResult, JudgeResult, Result
4
+ from ale_bench_eval.safe_ale_session import start_ale_bench_session
5
+ import logging
6
+ import sys
7
+ logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
8
+
9
+ def result_feedback(result: Result) -> CaseResult:
10
+ if result.overall_judge_result == JudgeResult.ACCEPTED:
11
+ return result.case_results[0]
12
+ else:
13
+ selected_case_idx = 0
14
+ for idx, case_result in enumerate(result.case_results):
15
+ if case_result.judge_result == result.overall_judge_result:
16
+ selected_case_idx = idx
17
+ break
18
+ return result.case_results[selected_case_idx]
19
+
20
+ def evaluate(program_path):
21
+ problem_id = "ahc015"
22
+ logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
23
+ try:
24
+ session = None
25
+ logger.info("Starting ALE-Bench session")
26
+ session = start_ale_bench_session(
27
+ problem_id=problem_id,
28
+ lite_version=True,
29
+ num_workers=13,
30
+ )
31
+ logger.info("ALE-Bench session started")
32
+ if not session:
33
+ raise RuntimeError("Failed to start or restart the session.")
34
+ optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
35
+ code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
36
+ logger.info("Code extracted")
37
+ num_public_cases = 50
38
+ cases = session.case_gen(list(range(num_public_cases)))
39
+ public_result = session.case_eval(
40
+ cases, code, code_language="cpp20", skip_local_visualization=True
41
+ )
42
+ logger.info("Public evaluation completed")
43
+ extracted_case = result_feedback(public_result)
44
+ logger.info("Result feedback completed")
45
+ logger.info("ALE-Bench session closed")
46
+ combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
47
+ if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
48
+ combined_score = -sys.maxsize - 1
49
+ session.close()
50
+ return {
51
+ "judge_result": public_result.overall_judge_result.value,
52
+ "overall_score": public_result.overall_absolute_score,
53
+ "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
54
+ "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
55
+ "standard_error": extracted_case.error_str,
56
+ "message": extracted_case.message,
57
+ "combined_score": combined_score,
58
+ }
59
+ except Exception as e:
60
+ logger.error(f"Evaluation failed completely: {str(e)}")
61
+ logger.error(traceback.format_exc())
62
+ return {
63
+ "overall_score": 0.0,
64
+ "error": str(e),
65
+ }
benchmarks/ale_bench/ale-bench-lite-problems/ahc016/evaluator.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ from pathlib import Path
3
+ from ale_bench.result import CaseResult, JudgeResult, Result
4
+ from ale_bench_eval.safe_ale_session import start_ale_bench_session
5
+ import logging
6
+ import sys
7
+ logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
8
+
9
+ def result_feedback(result: Result) -> CaseResult:
10
+ if result.overall_judge_result == JudgeResult.ACCEPTED:
11
+ return result.case_results[0]
12
+ else:
13
+ selected_case_idx = 0
14
+ for idx, case_result in enumerate(result.case_results):
15
+ if case_result.judge_result == result.overall_judge_result:
16
+ selected_case_idx = idx
17
+ break
18
+ return result.case_results[selected_case_idx]
19
+
20
+ def evaluate(program_path):
21
+ problem_id = "ahc016"
22
+ logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
23
+ try:
24
+ session = None
25
+ logger.info("Starting ALE-Bench session")
26
+ session = start_ale_bench_session(
27
+ problem_id=problem_id,
28
+ lite_version=True,
29
+ num_workers=13,
30
+ )
31
+ logger.info("ALE-Bench session started")
32
+ if not session:
33
+ raise RuntimeError("Failed to start or restart the session.")
34
+ optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
35
+ code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
36
+ logger.info("Code extracted")
37
+ num_public_cases = 50
38
+ cases = session.case_gen(list(range(num_public_cases)))
39
+ public_result = session.case_eval(
40
+ cases, code, code_language="cpp20", skip_local_visualization=True
41
+ )
42
+ logger.info("Public evaluation completed")
43
+ extracted_case = result_feedback(public_result)
44
+ logger.info("Result feedback completed")
45
+ logger.info("ALE-Bench session closed")
46
+ combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
47
+ if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
48
+ combined_score = -sys.maxsize - 1
49
+ session.close()
50
+ return {
51
+ "judge_result": public_result.overall_judge_result.value,
52
+ "overall_score": public_result.overall_absolute_score,
53
+ "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
54
+ "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
55
+ "standard_error": extracted_case.error_str,
56
+ "message": extracted_case.message,
57
+ "combined_score": combined_score,
58
+ }
59
+ except Exception as e:
60
+ logger.error(f"Evaluation failed completely: {str(e)}")
61
+ logger.error(traceback.format_exc())
62
+ return {
63
+ "overall_score": 0.0,
64
+ "error": str(e),
65
+ }
benchmarks/ale_bench/ale-bench-lite-problems/ahc039/best_program.cpp ADDED
@@ -0,0 +1,1003 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EVOLVE-BLOCK-START
2
+ #include <iostream>
3
+ #include <vector>
4
+ #include <algorithm>
5
+ #include <chrono>
6
+ #include <random>
7
+ #include <set>
8
+ #include <unordered_set>
9
+ #include <cmath>
10
+ #include <iomanip>
11
+ #include <numeric> // For std::iota
12
+ #include <string>
13
+ #include <map>
14
+
15
+ // === MACROS AND CONSTANTS ===
16
+ const int MAX_COORD_VAL = 100000;
17
+ const int MAX_VERTICES = 1000;
18
+ const int MAX_PERIMETER = 400000;
19
+ const double TIME_LIMIT_SECONDS_SAFETY_MARGIN = 0.1; // Increased safety margin
20
+ double ACTUAL_TIME_LIMIT_SECONDS = 2.0;
21
+
22
+
23
+ // === RANDOM NUMBER GENERATION ===
24
+ struct XorShift {
25
+ uint64_t x;
26
+ XorShift() : x(std::chrono::steady_clock::now().time_since_epoch().count() ^ ((uint64_t)std::random_device()() << 32) ^ std::random_device()()) {}
27
+ uint64_t next() {
28
+ x ^= x << 13;
29
+ x ^= x >> 7;
30
+ x ^= x << 17;
31
+ return x;
32
+ }
33
+ int next_int(int n) { if (n <= 0) return 0; return next() % n; }
34
+ int next_int(int a, int b) { if (a > b) return a; return a + next_int(b - a + 1); }
35
+ double next_double() { return next() / (double)UINT64_MAX; }
36
+ };
37
+ XorShift rng;
38
+
39
+ // === TIMER ===
40
+ struct Timer {
41
+ std::chrono::steady_clock::time_point start_time;
42
+ Timer() { reset(); }
43
+ void reset() { start_time = std::chrono::steady_clock::now(); }
44
+ double elapsed() const {
45
+ auto now = std::chrono::steady_clock::now();
46
+ return std::chrono::duration_cast<std::chrono::duration<double>>(now - start_time).count();
47
+ }
48
+ };
49
+ Timer global_timer;
50
+
51
+ // === GEOMETRIC STRUCTURES ===
52
+ struct Point {
53
+ int x, y;
54
+ bool operator<(const Point& other) const {
55
+ if (x != other.x) return x < other.x;
56
+ return y < other.y;
57
+ }
58
+ bool operator==(const Point& other) const {
59
+ return x == other.x && y == other.y;
60
+ }
61
+ Point operator-(const Point& other) const {
62
+ return {x - other.x, y - other.y};
63
+ }
64
+ };
65
+
66
+ struct PointHash {
67
+ std::size_t operator()(const Point& p) const {
68
+ auto h1 = std::hash<int>{}(p.x);
69
+ auto h2 = std::hash<int>{}(p.y);
70
+ // Combining hashes: simple XOR might not be best, but often good enough.
71
+ // For Point, a common way is boost::hash_combine.
72
+ // h1 ^ (h2 << 1) is a common way that's okay.
73
+ return h1 ^ (h2 << 1);
74
+ }
75
+ };
76
+
77
+ long long cross_product(Point a, Point b) {
78
+ return (long long)a.x * b.y - (long long)a.y * b.x;
79
+ }
80
+
81
+ struct Fish {
82
+ Point p;
83
+ int type; // 1 for mackerel, -1 for sardine
84
+ };
85
+ std::vector<Fish> all_fish_structs;
86
+
87
+
88
+ // === KD-TREE ===
89
+ struct KDNode {
90
+ Point pt;
91
+ int axis;
92
+ KDNode *left = nullptr, *right = nullptr;
93
+ int fish_struct_idx = -1;
94
+ };
95
+ KDNode* fish_kdtree_root = nullptr;
96
+
97
+
98
+ KDNode* build_kdtree(std::vector<int>& point_indices, int l, int r, int axis) {
99
+ if (l > r) return nullptr;
100
+ int mid = l + (r - l) / 2;
101
+
102
+ std::nth_element(point_indices.begin() + l, point_indices.begin() + mid, point_indices.begin() + r + 1,
103
+ [&](int a_idx, int b_idx) {
104
+ const Point& pa = all_fish_structs[a_idx].p;
105
+ const Point& pb = all_fish_structs[b_idx].p;
106
+ if (axis == 0) return pa.x < pb.x;
107
+ return pa.y < pb.y;
108
+ });
109
+
110
+ KDNode* node = new KDNode();
111
+ node->fish_struct_idx = point_indices[mid];
112
+ node->pt = all_fish_structs[node->fish_struct_idx].p;
113
+ node->axis = axis;
114
+
115
+ node->left = build_kdtree(point_indices, l, mid - 1, 1 - axis);
116
+ node->right = build_kdtree(point_indices, mid + 1, r, 1 - axis);
117
+ return node;
118
+ }
119
+
120
+ /*
121
+ Docstring:
122
+ KD-tree rectangle query (count-only).
123
+ Traverses the KD-tree and increments mackerel/sardine counters for points within the axis-aligned query rectangle,
124
+ avoiding materializing index lists (faster and less memory traffic).
125
+ */
126
+ void query_kdtree_rectangle(KDNode* node, int min_x, int max_x, int min_y, int max_y, int& cnt_m, int& cnt_s) {
127
+ if (!node || min_x > max_x || min_y > max_y) return;
128
+
129
+ const Point& pt = node->pt;
130
+ if (pt.x >= min_x && pt.x <= max_x && pt.y >= min_y && pt.y <= max_y) {
131
+ if (all_fish_structs[node->fish_struct_idx].type == 1) ++cnt_m; else ++cnt_s;
132
+ }
133
+
134
+ if (node->axis == 0) { // Split by X
135
+ if (node->left && min_x <= node->pt.x) query_kdtree_rectangle(node->left, min_x, max_x, min_y, max_y, cnt_m, cnt_s);
136
+ if (node->right && max_x >= node->pt.x) query_kdtree_rectangle(node->right, min_x, max_x, min_y, max_y, cnt_m, cnt_s);
137
+ } else { // Split by Y
138
+ if (node->left && min_y <= node->pt.y) query_kdtree_rectangle(node->left, min_x, max_x, min_y, max_y, cnt_m, cnt_s);
139
+ if (node->right && max_y >= node->pt.y) query_kdtree_rectangle(node->right, min_x, max_x, min_y, max_y, cnt_m, cnt_s);
140
+ }
141
+ }
142
+
143
+ void delete_kdtree(KDNode* node) { // Recursively delete KD-tree nodes
144
+ if (!node) return;
145
+ delete_kdtree(node->left);
146
+ delete_kdtree(node->right);
147
+ delete node;
148
+ }
149
+
150
+
151
+ // === POLYGON UTILITIES ===
152
+ long long calculate_perimeter(const std::vector<Point>& poly) {
153
+ if (poly.size() < 2) return 0;
154
+ long long perimeter = 0;
155
+ for (size_t i = 0; i < poly.size(); ++i) {
156
+ const Point& p1 = poly[i];
157
+ const Point& p2 = poly[(i + 1) % poly.size()];
158
+ perimeter += std::abs(p1.x - p2.x) + std::abs(p1.y - p2.y);
159
+ }
160
+ return perimeter;
161
+ }
162
+
163
+ bool is_on_segment(Point p, Point seg_a, Point seg_b) {
164
+ if (cross_product(seg_b - seg_a, p - seg_a) != 0) return false; // Not collinear
165
+ return std::min(seg_a.x, seg_b.x) <= p.x && p.x <= std::max(seg_a.x, seg_b.x) &&
166
+ std::min(seg_a.y, seg_b.y) <= p.y && p.y <= std::max(seg_a.y, seg_b.y);
167
+ }
168
+
169
+ bool is_inside_polygon_wn(Point p, const std::vector<Point>& polygon) {
170
+ int n = polygon.size();
171
+ if (n < 3) return false;
172
+
173
+ // Check if on boundary first
174
+ for (int i = 0; i < n; ++i) {
175
+ if (is_on_segment(p, polygon[i], polygon[(i + 1) % n])) return true;
176
+ }
177
+
178
+ int wn = 0; // Winding number
179
+ for (int i = 0; i < n; ++i) {
180
+ Point p1 = polygon[i];
181
+ Point p2 = polygon[(i + 1) % n];
182
+ if (p1.y <= p.y) { // Start y <= P.y
183
+ if (p2.y > p.y && cross_product(p2 - p1, p - p1) > 0) { // An upward crossing, P is left of edge
184
+ wn++;
185
+ }
186
+ } else { // Start y > P.y
187
+ if (p2.y <= p.y && cross_product(p2 - p1, p - p1) < 0) { // A downward crossing, P is right of edge
188
+ wn--;
189
+ }
190
+ }
191
+ }
192
+ return wn != 0; // wn != 0 means inside; wn == 0 means outside.
193
+ }
194
+
195
+ // Calculate score from scratch by checking all fish
196
+ void calculate_score_from_scratch(const std::vector<Point>& poly, int& m_count, int& s_count) {
197
+ m_count = 0; s_count = 0;
198
+ if (poly.size() < 3) return; // Not a valid polygon for containment
199
+ for (const auto& fish_s : all_fish_structs) {
200
+ if (is_inside_polygon_wn(fish_s.p, poly)) {
201
+ if (fish_s.type == 1) m_count++;
202
+ else s_count++;
203
+ }
204
+ }
205
+ }
206
+
207
+ // Calculate fish counts in a given rectangle using KD-tree
208
+ /*
209
+ Docstring:
210
+ Count fish inside an axis-aligned rectangle using a count-only KD-tree traversal.
211
+ This avoids building intermediate index arrays and reduces per-move overhead in SA.
212
+ */
213
+ void calculate_score_delta_for_rectangle(int r_min_x, int r_max_x, int r_min_y, int r_max_y,
214
+ int& delta_m, int& delta_s) {
215
+ delta_m = 0; delta_s = 0;
216
+ if (!fish_kdtree_root || r_min_x > r_max_x || r_min_y > r_max_y) return;
217
+ query_kdtree_rectangle(fish_kdtree_root, r_min_x, r_max_x, r_min_y, r_max_y, delta_m, delta_s);
218
+ }
219
+
220
+ // Check intersection between two orthogonal segments p1s-p1e and p2s-p2e
221
+ bool segments_intersect(Point p1s, Point p1e, Point p2s, Point p2e) {
222
+ // Normalize segments (sort endpoints to simplify overlap checks)
223
+ if (p1s.x == p1e.x) { if (p1s.y > p1e.y) std::swap(p1s.y, p1e.y); } // Vertical, sort by y
224
+ else { if (p1s.x > p1e.x) std::swap(p1s.x, p1e.x); } // Horizontal, sort by x
225
+ if (p2s.x == p2e.x) { if (p2s.y > p2e.y) std::swap(p2s.y, p2e.y); }
226
+ else { if (p2s.x > p2e.x) std::swap(p2s.x, p2e.x); }
227
+
228
+ bool seg1_is_H = (p1s.y == p1e.y);
229
+ bool seg2_is_H = (p2s.y == p2e.y);
230
+
231
+ if (seg1_is_H == seg2_is_H) { // Both horizontal or both vertical
232
+ if (seg1_is_H) { // Both horizontal
233
+ // Check for y-alignment and x-overlap
234
+ return p1s.y == p2s.y && std::max(p1s.x, p2s.x) <= std::min(p1e.x, p2e.x);
235
+ } else { // Both vertical
236
+ // Check for x-alignment and y-overlap
237
+ return p1s.x == p2s.x && std::max(p1s.y, p2s.y) <= std::min(p1e.y, p2e.y);
238
+ }
239
+ } else { // One horizontal, one vertical (potential T-junction or cross)
240
+ Point h_s = seg1_is_H ? p1s : p2s; Point h_e = seg1_is_H ? p1e : p2e;
241
+ Point v_s = seg1_is_H ? p2s : p1s; Point v_e = seg1_is_H ? p2e : p1e;
242
+ // Check if intersection point (v_s.x, h_s.y) lies on both segments
243
+ return v_s.x >= h_s.x && v_s.x <= h_e.x && // x_intersect within horizontal segment's x-range
244
+ h_s.y >= v_s.y && h_s.y <= v_e.y; // y_intersect within vertical segment's y-range
245
+ }
246
+ }
247
+
248
+ bool check_self_intersection_full(const std::vector<Point>& poly) {
249
+ int M = poly.size();
250
+ if (M < 4) return false;
251
+ for (int i = 0; i < M; ++i) {
252
+ Point p1s = poly[i];
253
+ Point p1e = poly[(i + 1) % M];
254
+ for (int j = i + 2; j < M; ++j) {
255
+ // Skip checking adjacent edges.
256
+ // Edge i is (poly[i], poly[(i+1)%M]). Edge j is (poly[j], poly[(j+1)%M]).
257
+ // If i=0 and j=M-1, then edge i is (poly[0], poly[1]) and edge j is (poly[M-1], poly[0]). These are adjacent.
258
+ if (i == 0 && j == M - 1) continue;
259
+
260
+ Point p2s = poly[j];
261
+ Point p2e = poly[(j + 1) % M];
262
+ if (segments_intersect(p1s, p1e, p2s, p2e)) return true;
263
+ }
264
+ }
265
+ return false;
266
+ }
267
+
268
+ // Local self-intersection check: checks edges starting at critical_edge_start_indices_const against all others
269
+ bool has_self_intersection_locally(const std::vector<Point>& poly, const std::vector<int>& critical_edge_start_indices_const) {
270
+ int M = poly.size();
271
+ if (M < 4) return false;
272
+
273
+ std::vector<int> critical_indices = critical_edge_start_indices_const; // Make a copy to modify
274
+ if (critical_indices.empty()) {
275
+ return false;
276
+ }
277
+
278
+ std::sort(critical_indices.begin(), critical_indices.end());
279
+ critical_indices.erase(std::unique(critical_indices.begin(), critical_indices.end()), critical_indices.end());
280
+
281
+ for (int edge1_s_idx_val_orig : critical_indices) {
282
+ int edge1_s_idx_val = (edge1_s_idx_val_orig % M + M) % M; // Ensure positive modulo
283
+ // No need to check edge1_s_idx_val bounds, it will be in [0, M-1]
284
+
285
+ Point p1s = poly[edge1_s_idx_val];
286
+ Point p1e = poly[(edge1_s_idx_val + 1) % M];
287
+
288
+ for (int edge2_s_idx = 0; edge2_s_idx < M; ++edge2_s_idx) {
289
+ bool is_adj_or_same_to_p1s_p1e = (edge2_s_idx == edge1_s_idx_val || // Same edge
290
+ edge2_s_idx == (edge1_s_idx_val + 1) % M || // edge2 starts where edge1 ends
291
+ (edge2_s_idx + 1) % M == edge1_s_idx_val); // edge2 ends where edge1 starts
292
+ if (is_adj_or_same_to_p1s_p1e) continue;
293
+
294
+ Point p2s = poly[edge2_s_idx];
295
+ Point p2e = poly[(edge2_s_idx + 1) % M];
296
+ if (segments_intersect(p1s, p1e, p2s, p2e)) {
297
+ return true;
298
+ }
299
+ }
300
+ }
301
+ return false;
302
+ }
303
+
304
+
305
+ bool has_distinct_vertices_unordered(const std::vector<Point>& poly) {
306
+ if (poly.empty()) return true;
307
+ std::unordered_set<Point, PointHash> distinct_pts;
308
+ distinct_pts.reserve(poly.size()); // Pre-allocate for efficiency
309
+ for(const auto& p : poly) {
310
+ if (!distinct_pts.insert(p).second) return false; // Insertion failed, duplicate found
311
+ }
312
+ return true;
313
+ }
314
+
315
+ /*
316
+ has_duplicate_vertices_local:
317
+ Fast local duplicate check used inside SA. It only verifies that the subset
318
+ of modified vertices does not collide (same coordinates) with any other vertex.
319
+ This is sufficient because all other vertices were already distinct before the move.
320
+ */
321
+ bool has_duplicate_vertices_local(const std::vector<Point>& poly, const std::vector<int>& changed_indices) {
322
+ int m = (int)poly.size();
323
+ if (m <= 1 || changed_indices.empty()) return false;
324
+ for (int idx : changed_indices) {
325
+ int i = ((idx % m) + m) % m;
326
+ const Point& p = poly[i];
327
+ for (int j = 0; j < m; ++j) {
328
+ if (j == i) continue;
329
+ if (poly[j].x == p.x && poly[j].y == p.y) return true;
330
+ }
331
+ }
332
+ return false;
333
+ }
334
+
335
+ // Check basic structural validity of the polygon with early perimeter exit
336
+ bool is_polygon_structurally_sound(const std::vector<Point>& poly) {
337
+ // Ensures axis-parallel edges, valid bounds, non-zero edges, and perimeter within limit
338
+ int m = (int)poly.size();
339
+ if (m != 0 && (m < 4 || m > MAX_VERTICES)) return false;
340
+ if (m == 0) return true;
341
+
342
+ long long perim = 0;
343
+ for (int i = 0; i < m; ++i) {
344
+ const Point& p1 = poly[i];
345
+ const Point& p2 = poly[(i + 1) % m];
346
+
347
+ // bounds check
348
+ if (p1.x < 0 || p1.x > MAX_COORD_VAL || p1.y < 0 || p1.y > MAX_COORD_VAL) return false;
349
+
350
+ // axis-aligned and non-zero
351
+ if (p1.x != p2.x && p1.y != p2.y) return false;
352
+ if (p1.x == p2.x && p1.y == p2.y) return false;
353
+
354
+ // perimeter accumulation with early abort
355
+ perim += std::abs(p1.x - p2.x) + std::abs(p1.y - p2.y);
356
+ if (perim > MAX_PERIMETER) return false;
357
+ }
358
+ return true;
359
+ }
360
+
361
+ // Initial polygon generation using Kadane's algorithm on a coarse grid
362
+ std::vector<Point> create_initial_polygon_kadane() {
363
+ const int GRID_SIZE_KADANE = 300; // Tunable parameter (smaller for faster initialization)
364
+ const int NUM_VALUES_KADANE = MAX_COORD_VAL + 1;
365
+ // Ensure ACTUAL_CELL_DIM_KADANE is at least 1
366
+ const int ACTUAL_CELL_DIM_KADANE = std::max(1, (NUM_VALUES_KADANE + GRID_SIZE_KADANE - 1) / GRID_SIZE_KADANE);
367
+
368
+ std::vector<std::vector<long long>> grid_scores(GRID_SIZE_KADANE, std::vector<long long>(GRID_SIZE_KADANE, 0));
369
+ for (const auto& fish_s : all_fish_structs) {
370
+ int r = fish_s.p.y / ACTUAL_CELL_DIM_KADANE;
371
+ int c = fish_s.p.x / ACTUAL_CELL_DIM_KADANE;
372
+ r = std::min(r, GRID_SIZE_KADANE - 1); r = std::max(r,0);
373
+ c = std::min(c, GRID_SIZE_KADANE - 1); c = std::max(c,0);
374
+ grid_scores[r][c] += fish_s.type; // Mackerel +1, Sardine -1
375
+ }
376
+
377
+ long long max_so_far = -3e18; // Sufficiently small number
378
+ int best_r1 = 0, best_c1 = 0, best_r2 = -1, best_c2 = -1;
379
+
380
+ // 2D Kadane's algorithm
381
+ for (int c1_idx = 0; c1_idx < GRID_SIZE_KADANE; ++c1_idx) {
382
+ std::vector<long long> col_strip_sum(GRID_SIZE_KADANE, 0);
383
+ for (int c2_idx = c1_idx; c2_idx < GRID_SIZE_KADANE; ++c2_idx) {
384
+ for (int r_idx = 0; r_idx < GRID_SIZE_KADANE; ++r_idx) {
385
+ col_strip_sum[r_idx] += grid_scores[r_idx][c2_idx];
386
+ }
387
+
388
+ // 1D Kadane's on col_strip_sum
389
+ long long current_strip_val = 0;
390
+ int current_r1_1d = 0;
391
+ for (int r2_idx_1d = 0; r2_idx_1d < GRID_SIZE_KADANE; ++r2_idx_1d) {
392
+ long long val_here = col_strip_sum[r2_idx_1d];
393
+ if (current_strip_val > 0 && current_strip_val + val_here > 0) { // Extend if sum remains positive
394
+ current_strip_val += val_here;
395
+ } else { // Start new subarray
396
+ current_strip_val = val_here;
397
+ current_r1_1d = r2_idx_1d;
398
+ }
399
+
400
+ if (current_strip_val > max_so_far) {
401
+ max_so_far = current_strip_val;
402
+ best_r1 = current_r1_1d;
403
+ best_r2 = r2_idx_1d;
404
+ best_c1 = c1_idx;
405
+ best_c2 = c2_idx;
406
+ }
407
+ }
408
+ }
409
+ }
410
+
411
+ std::vector<Point> default_poly = {{0,0}, {1,0}, {1,1}, {0,1}}; // Minimal valid polygon
412
+
413
+ // If no positive sum found, or issue, find best single cell
414
+ if (best_r2 == -1 || max_so_far <=0 ) {
415
+ max_so_far = -3e18; // Reset search for single best cell
416
+ bool found_cell = false;
417
+ for(int r=0; r<GRID_SIZE_KADANE; ++r) for(int c=0; c<GRID_SIZE_KADANE; ++c) {
418
+ if(grid_scores[r][c] > max_so_far) {
419
+ max_so_far = grid_scores[r][c];
420
+ best_r1 = r; best_r2 = r; // Single cell
421
+ best_c1 = c; best_c2 = c;
422
+ found_cell = true;
423
+ }
424
+ }
425
+ if (!found_cell || max_so_far <=0) return default_poly; // Still no good cell, return default
426
+ }
427
+
428
+ // Convert grid cell indices to actual coordinates
429
+ int x_start = best_c1 * ACTUAL_CELL_DIM_KADANE;
430
+ int y_start = best_r1 * ACTUAL_CELL_DIM_KADANE;
431
+ int x_end = (best_c2 + 1) * ACTUAL_CELL_DIM_KADANE -1;
432
+ int y_end = (best_r2 + 1) * ACTUAL_CELL_DIM_KADANE -1;
433
+
434
+ // Clamp coordinates to valid range
435
+ x_start = std::max(0, std::min(MAX_COORD_VAL, x_start));
436
+ y_start = std::max(0, std::min(MAX_COORD_VAL, y_start));
437
+ x_end = std::max(x_start, std::min(MAX_COORD_VAL, x_end)); // Ensure x_end >= x_start
438
+ y_end = std::max(y_start, std::min(MAX_COORD_VAL, y_end)); // Ensure y_end >= y_start
439
+
440
+ // Ensure non-zero dimensions for the polygon, minimum 1x1 actual area
441
+ if (x_start == x_end) {
442
+ if (x_start < MAX_COORD_VAL) x_end = x_start + 1;
443
+ else if (x_start > 0) x_start = x_start -1; // Can't expand right, try expand left
444
+ else return default_poly; // Single point at MAX_COORD_VAL, cannot form 1x1
445
+ }
446
+ if (y_start == y_end) {
447
+ if (y_start < MAX_COORD_VAL) y_end = y_start + 1;
448
+ else if (y_start > 0) y_start = y_start - 1;
449
+ else return default_poly;
450
+ }
451
+ // After adjustment, if still degenerate, use default. This is rare.
452
+ if (x_start == x_end || y_start == y_end) return default_poly;
453
+
454
+
455
+ std::vector<Point> initial_poly = {
456
+ {x_start, y_start}, {x_end, y_start}, {x_end, y_end}, {x_start, y_end}
457
+ };
458
+ return initial_poly;
459
+ }
460
+
461
+ // === SIMULATED ANNEALING ===
462
+ struct SAState {
463
+ std::vector<Point> poly;
464
+ int m_count;
465
+ int s_count;
466
+
467
+ SAState() : m_count(0), s_count(0) {}
468
+
469
+ long long get_objective_score() const {
470
+ return std::max(0LL, (long long)m_count - s_count + 1);
471
+ }
472
+ double get_raw_objective_score() const { // Used for SA acceptance probability
473
+ return (double)m_count - s_count;
474
+ }
475
+ };
476
+
477
+ // Calculates signed area * 2 of a polygon (shoelace formula)
478
+ long long polygon_signed_area_times_2(const std::vector<Point>& poly) {
479
+ if (poly.size() < 3) return 0;
480
+ long long area_sum = 0;
481
+ for (size_t i = 0; i < poly.size(); ++i) {
482
+ const Point& p1 = poly[i];
483
+ const Point& p2 = poly[(i + 1) % poly.size()];
484
+ area_sum += (long long)(p1.x - p2.x) * (p1.y + p2.y); // (x1-x2)(y1+y2) variant
485
+ }
486
+ return area_sum; // Positive for CCW, negative for CW
487
+ }
488
+
489
+ std::vector<int> sa_critical_edge_indices_cache; // Cache for local intersection check
490
+
491
+ // Guide coordinates for SA moves
492
+ std::vector<int> static_x_guides;
493
+ std::vector<int> static_y_guides;
494
+ std::vector<int> best_poly_x_guides;
495
+ std::vector<int> best_poly_y_guides;
496
+
497
+ void update_best_poly_guides(const SAState& new_best_state) {
498
+ best_poly_x_guides.clear();
499
+ best_poly_y_guides.clear();
500
+ if (new_best_state.poly.empty()) return;
501
+
502
+ std::set<int> temp_x_set, temp_y_set;
503
+ for (const auto& p : new_best_state.poly) {
504
+ temp_x_set.insert(p.x);
505
+ temp_y_set.insert(p.y);
506
+ }
507
+ best_poly_x_guides.assign(temp_x_set.begin(), temp_x_set.end());
508
+ best_poly_y_guides.assign(temp_y_set.begin(), temp_y_set.end());
509
+ }
510
+
511
+
512
+ /*
513
+ compress_polygon_collinear:
514
+ Remove all intermediate vertices that lie on straight segments (three consecutive vertices collinear).
515
+ This reduces vertex count/perimeter without changing the enclosed area or legality.
516
+ */
517
+ void compress_polygon_collinear(std::vector<Point>& poly) {
518
+ if (poly.size() < 5) return; // keep minimal 4-vertex polygon
519
+ bool changed = true;
520
+ int guard = 0;
521
+ while (changed && guard < 2) { // two passes are enough to handle wrap-around effects
522
+ changed = false;
523
+ for (size_t i = 0; i < poly.size();) {
524
+ size_t m = poly.size();
525
+ if (m <= 4) return;
526
+ size_t i0 = (i + m - 1) % m;
527
+ size_t i1 = i;
528
+ size_t i2 = (i + 1) % m;
529
+ const Point& p0 = poly[i0];
530
+ const Point& p1 = poly[i1];
531
+ const Point& p2 = poly[i2];
532
+ bool col_x = (p0.x == p1.x && p1.x == p2.x);
533
+ bool col_y = (p0.y == p1.y && p1.y == p2.y);
534
+ if (col_x || col_y) {
535
+ poly.erase(poly.begin() + (int)i1);
536
+ changed = true;
537
+ // re-check at this index after erase
538
+ } else {
539
+ ++i;
540
+ }
541
+ }
542
+ ++guard;
543
+ }
544
+ }
545
+
546
+
547
+
548
+
549
+
550
+ /*
551
+ Docstring:
552
+ Simulated annealing with three moves: move edge (snap/random), add bulge, and simplify.
553
+ Remove-bulge is disabled (empirically improves score/time and simplifies code).
554
+ Score deltas via KD-tree rectangle count; local checks enforce validity.
555
+ */
556
+ void simulated_annealing_main() {
557
+ SAState current_state;
558
+ current_state.poly = create_initial_polygon_kadane();
559
+ calculate_score_from_scratch(current_state.poly, current_state.m_count, current_state.s_count);
560
+
561
+ std::vector<Point> default_tiny_poly = {{0,0}, {1,0}, {1,1}, {0,1}};
562
+
563
+ // Ensure initial polygon is valid, otherwise use default
564
+ bool current_poly_initial_valid = is_polygon_structurally_sound(current_state.poly) &&
565
+ current_state.poly.size() >= 4 &&
566
+ has_distinct_vertices_unordered(current_state.poly) &&
567
+ !check_self_intersection_full(current_state.poly);
568
+
569
+ if (!current_poly_initial_valid) {
570
+ current_state.poly = default_tiny_poly;
571
+ calculate_score_from_scratch(current_state.poly, current_state.m_count, current_state.s_count);
572
+ }
573
+
574
+ SAState best_state = current_state;
575
+ update_best_poly_guides(best_state);
576
+
577
+ // Prepare static guide coordinates from fish locations
578
+ std::set<int> sx_set, sy_set;
579
+ for(const auto& f_s : all_fish_structs) {
580
+ sx_set.insert(f_s.p.x); sx_set.insert(std::max(0,f_s.p.x-1)); sx_set.insert(std::min(MAX_COORD_VAL, f_s.p.x+1));
581
+ sy_set.insert(f_s.p.y); sy_set.insert(std::max(0,f_s.p.y-1)); sy_set.insert(std::min(MAX_COORD_VAL, f_s.p.y+1));
582
+ }
583
+ sx_set.insert(0); sx_set.insert(MAX_COORD_VAL); // Boundary guides
584
+ sy_set.insert(0); sy_set.insert(MAX_COORD_VAL);
585
+
586
+ static_x_guides.assign(sx_set.begin(), sx_set.end());
587
+ static_y_guides.assign(sy_set.begin(), sy_set.end());
588
+
589
+
590
+ double start_temp = 150.0;
591
+ double end_temp = 0.01;
592
+
593
+ long long current_signed_area = polygon_signed_area_times_2(current_state.poly);
594
+ if (current_signed_area == 0 && current_state.poly.size() >=3) {
595
+ current_signed_area = 1; // Avoid issues with zero area for sign logic
596
+ }
597
+
598
+ sa_critical_edge_indices_cache.reserve(10); // Max expected critical edges for current moves
599
+ std::vector<int> changed_vertex_indices; // indices of vertices modified in the current move
600
+ changed_vertex_indices.reserve(4);
601
+
602
+ while (global_timer.elapsed() < ACTUAL_TIME_LIMIT_SECONDS) {
603
+ double time_ratio = global_timer.elapsed() / ACTUAL_TIME_LIMIT_SECONDS;
604
+ double temperature = start_temp * std::pow(end_temp / start_temp, time_ratio);
605
+ // Fine-tune temperature near end or if it drops too fast
606
+ if (temperature < end_temp && time_ratio < 0.95) temperature = end_temp;
607
+ if (time_ratio > 0.95 && temperature > end_temp * 0.1) temperature = end_temp * 0.1; // Lower temp aggressively at the very end
608
+
609
+ if (current_state.poly.size() < 4) { // Should not happen if logic is correct, but as a safeguard
610
+ current_state.poly = default_tiny_poly;
611
+ calculate_score_from_scratch(current_state.poly, current_state.m_count, current_state.s_count);
612
+ current_signed_area = polygon_signed_area_times_2(current_state.poly);
613
+ if (current_signed_area == 0 && current_state.poly.size() >=3) current_signed_area = 1;
614
+ }
615
+
616
+ SAState candidate_state = current_state;
617
+ sa_critical_edge_indices_cache.clear();
618
+ changed_vertex_indices.clear();
619
+
620
+ int move_type_roll = rng.next_int(100);
621
+ // Base probabilities for moves (simpler, empirically stronger)
622
+ int move_edge_prob = 48;
623
+ int add_bulge_prob = 24;
624
+ // Remaining probability for simplify polygon move
625
+
626
+ long long current_poly_perimeter_cached = 0;
627
+ bool check_limits = (candidate_state.poly.size() > 200 || candidate_state.poly.size() > MAX_VERTICES - 20);
628
+ if (check_limits && candidate_state.poly.size() > 200) {
629
+ current_poly_perimeter_cached = calculate_perimeter(candidate_state.poly);
630
+ }
631
+
632
+ // Adjust move probabilities based on polygon size/perimeter
633
+ if (candidate_state.poly.size() + 2 > MAX_VERTICES || (check_limits && current_poly_perimeter_cached > MAX_PERIMETER * 0.95)) { // If adding bulge would exceed max vertices
634
+ move_edge_prob = 45; add_bulge_prob = 0; // Disallow adding vertices near limits
635
+ } else if (candidate_state.poly.size() > 200 || (check_limits && current_poly_perimeter_cached > MAX_PERIMETER * 0.9)) {
636
+ move_edge_prob = 40; add_bulge_prob = 15;
637
+ } else if (candidate_state.poly.size() > 50) {
638
+ move_edge_prob = 45; add_bulge_prob = 20;
639
+ }
640
+
641
+ bool move_made = false;
642
+
643
+ // Probabilities for snapping to guide coordinates
644
+ double prob_dynamic_guide_snap = 0.20 + 0.20 * time_ratio;
645
+ double prob_static_guide_snap_if_not_dynamic = 0.75;
646
+
647
+ if (move_type_roll < move_edge_prob && candidate_state.poly.size() >= 4 ) { // Move Edge
648
+ int edge_idx = rng.next_int(candidate_state.poly.size());
649
+ Point p1_orig = candidate_state.poly[edge_idx];
650
+ Point p2_orig = candidate_state.poly[(edge_idx + 1) % candidate_state.poly.size()];
651
+
652
+ int new_coord_val = -1;
653
+ int cur_delta_m=0, cur_delta_s=0;
654
+ bool coord_selected_successfully = false;
655
+
656
+ // Determine which guides are relevant (X or Y)
657
+ const std::vector<int>* relevant_dyn_guides = (p1_orig.x == p2_orig.x) ? &best_poly_x_guides : &best_poly_y_guides;
658
+ const std::vector<int>* relevant_static_guides = (p1_orig.x == p2_orig.x) ? &static_x_guides : &static_y_guides;
659
+
660
+ // Try snapping to dynamic (best poly) guides
661
+ if (!relevant_dyn_guides->empty() && rng.next_double() < prob_dynamic_guide_snap) {
662
+ new_coord_val = (*relevant_dyn_guides)[rng.next_int(relevant_dyn_guides->size())];
663
+ coord_selected_successfully = true;
664
+ }
665
+ // If not, try snapping to static (fish) guides
666
+ if (!coord_selected_successfully) {
667
+ if (!relevant_static_guides->empty() && rng.next_double() < prob_static_guide_snap_if_not_dynamic) {
668
+ new_coord_val = (*relevant_static_guides)[rng.next_int(relevant_static_guides->size())];
669
+ coord_selected_successfully = true;
670
+ }
671
+ }
672
+ // If still not selected, use random displacement
673
+ if (!coord_selected_successfully) {
674
+ double step_factor = std::max(0.1, 1.0 - time_ratio * 0.95); // Step size decreases over time
675
+ int base_step_max = std::max(1, (int)( (MAX_COORD_VAL/150.0) * step_factor + 1 ) );
676
+ int random_displacement = rng.next_int(-base_step_max, base_step_max);
677
+ if (time_ratio > 0.75 && rng.next_double() < 0.7) { // Very small steps near end
678
+ random_displacement = rng.next_int(-2,2);
679
+ }
680
+ if (random_displacement == 0) random_displacement = (rng.next_double() < 0.5) ? -1:1;
681
+
682
+ if (p1_orig.x == p2_orig.x) new_coord_val = p1_orig.x + random_displacement; // Vertical edge, move X
683
+ else new_coord_val = p1_orig.y + random_displacement; // Horizontal edge, move Y
684
+ }
685
+
686
+ new_coord_val = std::max(0, std::min(MAX_COORD_VAL, new_coord_val)); // Clamp to bounds
687
+
688
+ if (p1_orig.x == p2_orig.x) { // Vertical edge: (X_orig, Y_s) to (X_orig, Y_e)
689
+ if (new_coord_val == p1_orig.x) {move_made = false; goto end_move_attempt_label;} // No change
690
+
691
+ int query_min_x, query_max_x;
692
+ if (new_coord_val > p1_orig.x) { // Moved right
693
+ query_min_x = p1_orig.x + 1;
694
+ query_max_x = new_coord_val;
695
+ } else { // Moved left (new_coord_val < p1_orig.x)
696
+ query_min_x = new_coord_val;
697
+ query_max_x = p1_orig.x - 1;
698
+ }
699
+
700
+ calculate_score_delta_for_rectangle(
701
+ query_min_x, query_max_x,
702
+ std::min(p1_orig.y, p2_orig.y), std::max(p1_orig.y, p2_orig.y),
703
+ cur_delta_m, cur_delta_s);
704
+
705
+ int sign = (new_coord_val > p1_orig.x) ? 1 : -1; // Moving right is positive X change
706
+ if (p1_orig.y > p2_orig.y) sign *= -1; // Correct for edge Y-direction (p1_orig.y to p2_orig.y)
707
+ if (current_signed_area < 0) sign *= -1; // Correct for CW polygon (area < 0)
708
+
709
+ candidate_state.poly[edge_idx].x = new_coord_val;
710
+ candidate_state.poly[(edge_idx + 1) % candidate_state.poly.size()].x = new_coord_val;
711
+ candidate_state.m_count += sign * cur_delta_m;
712
+ candidate_state.s_count += sign * cur_delta_s;
713
+ } else { // Horizontal edge: (X_s, Y_orig) to (X_e, Y_orig)
714
+ if (new_coord_val == p1_orig.y) {move_made = false; goto end_move_attempt_label;} // No change
715
+
716
+ int query_min_y, query_max_y;
717
+ if (new_coord_val > p1_orig.y) { // Moved up (Y increases)
718
+ query_min_y = p1_orig.y + 1;
719
+ query_max_y = new_coord_val;
720
+ } else { // Moved down (Y decreases, new_coord_val < p1_orig.y)
721
+ query_min_y = new_coord_val;
722
+ query_max_y = p1_orig.y - 1;
723
+ }
724
+
725
+ calculate_score_delta_for_rectangle(
726
+ std::min(p1_orig.x, p2_orig.x), std::max(p1_orig.x, p2_orig.x),
727
+ query_min_y, query_max_y,
728
+ cur_delta_m, cur_delta_s);
729
+
730
+ int sign = (new_coord_val < p1_orig.y) ? 1 : -1; // Moving "down" (Y decreases) means positive sign if it expands area
731
+ if (p1_orig.x > p2_orig.x) sign *= -1; // Correct for edge X-direction (p1_orig.x to p2_orig.x)
732
+ if (current_signed_area < 0) sign *= -1; // Correct for CW polygon
733
+
734
+ candidate_state.poly[edge_idx].y = new_coord_val;
735
+ candidate_state.poly[(edge_idx + 1) % candidate_state.poly.size()].y = new_coord_val;
736
+ candidate_state.m_count += sign * cur_delta_m;
737
+ candidate_state.s_count += sign * cur_delta_s;
738
+ }
739
+ int M_cand = candidate_state.poly.size();
740
+ sa_critical_edge_indices_cache.push_back((edge_idx - 1 + M_cand) % M_cand);
741
+ sa_critical_edge_indices_cache.push_back(edge_idx);
742
+ sa_critical_edge_indices_cache.push_back((edge_idx + 1) % M_cand);
743
+ changed_vertex_indices.clear();
744
+ changed_vertex_indices.push_back(edge_idx);
745
+ changed_vertex_indices.push_back((edge_idx + 1) % M_cand);
746
+ move_made = true;
747
+
748
+ } else if (move_type_roll < move_edge_prob + add_bulge_prob && candidate_state.poly.size() + 2 <= MAX_VERTICES && candidate_state.poly.size() >=4) { // Add Bulge
749
+ int edge_idx = rng.next_int(candidate_state.poly.size());
750
+ Point p_s = candidate_state.poly[edge_idx]; // Start point of edge
751
+ Point p_e = candidate_state.poly[(edge_idx + 1) % candidate_state.poly.size()]; // End point of edge
752
+
753
+ int new_coord_val = -1;
754
+ bool coord_selected_successfully = false;
755
+
756
+ const std::vector<int>* relevant_dyn_guides = (p_s.x == p_e.x) ? &best_poly_x_guides : &best_poly_y_guides;
757
+ const std::vector<int>* relevant_static_guides = (p_s.x == p_e.x) ? &static_x_guides : &static_y_guides;
758
+
759
+ // Try snapping bulge coord
760
+ if (!relevant_dyn_guides->empty() && rng.next_double() < prob_dynamic_guide_snap) {
761
+ new_coord_val = (*relevant_dyn_guides)[rng.next_int(relevant_dyn_guides->size())];
762
+ coord_selected_successfully = true;
763
+ }
764
+ if (!coord_selected_successfully) {
765
+ if (!relevant_static_guides->empty() && rng.next_double() < prob_static_guide_snap_if_not_dynamic) {
766
+ new_coord_val = (*relevant_static_guides)[rng.next_int(relevant_static_guides->size())];
767
+ coord_selected_successfully = true;
768
+ }
769
+ }
770
+ // If not snapped, random depth for bulge
771
+ if (!coord_selected_successfully) {
772
+ double depth_factor = std::max(0.1, 1.0 - time_ratio * 0.9);
773
+ int base_depth_max = std::max(1, (int)( (MAX_COORD_VAL/300.0) * depth_factor + 1 ) );
774
+ int random_abs_depth = rng.next_int(1, base_depth_max);
775
+ if (time_ratio > 0.75 && rng.next_double() < 0.7) {
776
+ random_abs_depth = rng.next_int(1,2);
777
+ }
778
+ int bulge_dir_sign = (rng.next_double() < 0.5) ? 1 : -1; // Randomly outwards or inwards relative to edge line
779
+ if (p_s.x == p_e.x) new_coord_val = p_s.x + bulge_dir_sign * random_abs_depth; // Vertical edge, bulge in X
780
+ else new_coord_val = p_s.y + bulge_dir_sign * random_abs_depth; // Horizontal edge, bulge in Y
781
+ }
782
+
783
+ new_coord_val = std::max(0, std::min(MAX_COORD_VAL, new_coord_val));
784
+
785
+ Point v1_mod, v2_mod; // New vertices for the bulge
786
+ int cur_delta_m=0, cur_delta_s=0;
787
+
788
+ if (p_s.x == p_e.x) { // Original edge is vertical
789
+ if (new_coord_val == p_s.x) {move_made = false; goto end_move_attempt_label;} // Bulge is flat
790
+ v1_mod = {new_coord_val, p_s.y}; v2_mod = {new_coord_val, p_e.y};
791
+ // Rectangle for delta score is between X=p_s.x and X=new_coord_val, over Y-span of original edge
792
+ calculate_score_delta_for_rectangle(
793
+ std::min(p_s.x, new_coord_val), std::max(p_s.x, new_coord_val),
794
+ std::min(p_s.y,p_e.y), std::max(p_s.y,p_e.y),
795
+ cur_delta_m, cur_delta_s);
796
+ int sign = (new_coord_val > p_s.x) ? 1 : -1; // Bulge to the right of edge is positive X change
797
+ if (p_s.y > p_e.y) sign *= -1; // Correct for edge Y-direction
798
+ if (current_signed_area < 0) sign *= -1; // Correct for CW polygon
799
+ candidate_state.m_count += sign * cur_delta_m;
800
+ candidate_state.s_count += sign * cur_delta_s;
801
+ } else { // Original edge is horizontal
802
+ if (new_coord_val == p_s.y) {move_made = false; goto end_move_attempt_label;} // Bulge is flat
803
+ v1_mod = {p_s.x, new_coord_val}; v2_mod = {p_e.x, new_coord_val};
804
+ // Rectangle for delta score is between Y=p_s.y and Y=new_coord_val, over X-span of original edge
805
+ calculate_score_delta_for_rectangle(
806
+ std::min(p_s.x,p_e.x), std::max(p_s.x,p_e.x),
807
+ std::min(p_s.y, new_coord_val), std::max(p_s.y, new_coord_val),
808
+ cur_delta_m, cur_delta_s);
809
+ int sign = (new_coord_val < p_s.y) ? 1 : -1; // Bulge "downwards" (Y decreases) means positive sign if it expands area
810
+ if (p_s.x > p_e.x) sign *= -1; // Correct for edge X-direction
811
+ if (current_signed_area < 0) sign *= -1; // Correct for CW polygon
812
+ candidate_state.m_count += sign * cur_delta_m;
813
+ candidate_state.s_count += sign * cur_delta_s;
814
+ }
815
+
816
+ // Insert new vertices into polygon
817
+ auto insert_pos_iter = candidate_state.poly.begin() + (edge_idx + 1);
818
+ insert_pos_iter = candidate_state.poly.insert(insert_pos_iter, v1_mod);
819
+ candidate_state.poly.insert(insert_pos_iter + 1, v2_mod);
820
+
821
+ // Mark affected edges/vertices as critical for local intersection check
822
+ sa_critical_edge_indices_cache.push_back(edge_idx);
823
+ sa_critical_edge_indices_cache.push_back(edge_idx + 1);
824
+ sa_critical_edge_indices_cache.push_back(edge_idx + 2);
825
+ changed_vertex_indices.clear();
826
+ int Mc = (int)candidate_state.poly.size();
827
+ changed_vertex_indices.push_back((edge_idx + 1) % Mc);
828
+ changed_vertex_indices.push_back((edge_idx + 2) % Mc);
829
+ move_made = true;
830
+
831
+ } else if (candidate_state.poly.size() > 4) { // Simplify Polygon (remove collinear vertex)
832
+ int R_start_idx = rng.next_int(candidate_state.poly.size()); // Random start for search
833
+ bool simplified_this_turn = false;
834
+ for(int k_offset=0; k_offset < candidate_state.poly.size() ; ++k_offset) {
835
+ int current_poly_size_before_erase = candidate_state.poly.size();
836
+ if (current_poly_size_before_erase <= 4) break; // Cannot simplify further
837
+
838
+ int p1_idx = (R_start_idx + k_offset) % current_poly_size_before_erase;
839
+ int p0_idx_old = (p1_idx - 1 + current_poly_size_before_erase) % current_poly_size_before_erase;
840
+ int p2_idx_old = (p1_idx + 1) % current_poly_size_before_erase;
841
+
842
+ const Point& p0 = candidate_state.poly[p0_idx_old];
843
+ const Point& p1 = candidate_state.poly[p1_idx];
844
+ const Point& p2 = candidate_state.poly[p2_idx_old];
845
+
846
+ bool collinear_x = (p0.x == p1.x && p1.x == p2.x);
847
+ bool collinear_y = (p0.y == p1.y && p1.y == p2.y);
848
+
849
+ if (collinear_x || collinear_y) {
850
+ candidate_state.poly.erase(candidate_state.poly.begin() + p1_idx);
851
+ simplified_this_turn = true;
852
+
853
+ int M_cand = candidate_state.poly.size();
854
+ int critical_vertex_idx_in_new_poly;
855
+ // Vertex p0 (at p0_idx_old) forms the new corner. Its index in new poly:
856
+ if (p1_idx == 0) { // If p1 was poly[0], p0 was poly[last]. p0 is now poly[new_last]
857
+ critical_vertex_idx_in_new_poly = M_cand -1;
858
+ } else { // Otherwise, p0's index p1_idx-1 is preserved.
859
+ critical_vertex_idx_in_new_poly = p1_idx - 1;
860
+ }
861
+
862
+ if (!candidate_state.poly.empty()) {
863
+ sa_critical_edge_indices_cache.push_back((critical_vertex_idx_in_new_poly - 1 + M_cand) % M_cand);
864
+ sa_critical_edge_indices_cache.push_back(critical_vertex_idx_in_new_poly);
865
+ sa_critical_edge_indices_cache.push_back((critical_vertex_idx_in_new_poly + 1) % M_cand);
866
+ }
867
+ break; // Simplified one vertex, enough for this turn
868
+ }
869
+ }
870
+ if (!simplified_this_turn) {move_made = false; goto end_move_attempt_label;} // No simplification found/possible
871
+ move_made = true;
872
+ }
873
+
874
+ end_move_attempt_label:; // Label for goto if a move is aborted (e.g. no change)
875
+ if (!move_made) continue; // No valid move attempted or made
876
+
877
+ // Validate candidate polygon
878
+ if (!is_polygon_structurally_sound(candidate_state.poly) || candidate_state.poly.size() < 4 ||
879
+ has_duplicate_vertices_local(candidate_state.poly, changed_vertex_indices)) {
880
+ continue; // Invalid basic structure or duplicate vertices near the modified area
881
+ }
882
+
883
+ if (has_self_intersection_locally(candidate_state.poly, sa_critical_edge_indices_cache)) {
884
+ continue; // Self-intersection found
885
+ }
886
+
887
+ // Accept or reject candidate based on SA criteria
888
+ double candidate_raw_obj_score = candidate_state.get_raw_objective_score();
889
+ double current_raw_obj_score = current_state.get_raw_objective_score();
890
+ double score_diff = candidate_raw_obj_score - current_raw_obj_score;
891
+
892
+ if (score_diff >= 0 || (temperature > 1e-9 && rng.next_double() < std::exp(score_diff / temperature))) {
893
+ current_state = std::move(candidate_state); // Accept move
894
+ current_signed_area = polygon_signed_area_times_2(current_state.poly); // Update signed area
895
+ if (current_signed_area == 0 && !current_state.poly.empty() && current_state.poly.size() >=3) current_signed_area = 1; // Handle degenerate
896
+
897
+ // Keep polygon compact is deferred to final compression for speed.
898
+
899
+ if (current_state.get_objective_score() > best_state.get_objective_score()) {
900
+ best_state = current_state; // New best solution found
901
+ update_best_poly_guides(best_state); // Update dynamic guides
902
+ }
903
+ }
904
+ } // End SA loop
905
+
906
+ // Simplify polygon by removing all collinear intermediate vertices to reduce size/perimeter
907
+ compress_polygon_collinear(best_state.poly);
908
+
909
+ // Final validation of the best found state
910
+ bool needs_reset_to_default = false;
911
+ if (!is_polygon_structurally_sound(best_state.poly) ||
912
+ best_state.poly.size() < 4 ||
913
+ !has_distinct_vertices_unordered(best_state.poly) ||
914
+ check_self_intersection_full(best_state.poly) ) { // Full intersection check on best
915
+ needs_reset_to_default = true;
916
+ }
917
+
918
+ if (needs_reset_to_default) { // If best state is invalid, revert to default
919
+ best_state.poly = default_tiny_poly;
920
+ calculate_score_from_scratch(best_state.poly, best_state.m_count, best_state.s_count);
921
+ }
922
+
923
+ // If best score is 0, check if default polygon gives >0. (max(0, val+1))
924
+ // The score is max(0, M-S+1). So if M-S = -1, score is 0. If M-S = 0, score is 1.
925
+ // If best_state.get_objective_score() == 0, it means M-S+1 <= 0, so M-S <= -1.
926
+ // Default polygon has M=0, S=0, so M-S+1 = 1. Score is 1.
927
+ // So, if best_state score is 0, default is always better (score 1) or equal (if default also somehow gets 0).
928
+ if (best_state.get_objective_score() == 0) {
929
+ // This case implies M-S <= -1 for best_state. Default gives score 1.
930
+ // It's possible that the problem setter implies an empty polygon is not allowed or scores 0.
931
+ // The problem implies outputting a polygon. The default_tiny_poly is a valid polygon.
932
+ // The current logic already handles falling back to default_tiny_poly if the Kadane one is invalid.
933
+ // This check ensures if SA ends up with a 0-score polygon (e.g. captures many sardines),
934
+ // we check if the basic tiny square is better.
935
+ SAState temp_default_state; // Create a temporary default state to calculate its score
936
+ temp_default_state.poly = default_tiny_poly;
937
+ calculate_score_from_scratch(temp_default_state.poly, temp_default_state.m_count, temp_default_state.s_count);
938
+ // If the objectively computed score of the best_state is less than the default one, use default.
939
+ // This is useful if best_state.get_objective_score() became 0 due to M-S+1 <= 0, while default_tiny_poly has M-S+1=1.
940
+ if (best_state.get_objective_score() < temp_default_state.get_objective_score()) {
941
+ best_state = temp_default_state;
942
+ }
943
+ }
944
+
945
+
946
+ // Output the best polygon
947
+ std::cout << best_state.poly.size() << "\n";
948
+ for (const auto& p : best_state.poly) {
949
+ std::cout << p.x << " " << p.y << "\n";
950
+ }
951
+ }
952
+
953
+
954
+ int main(int argc, char *argv[]) {
955
+ std::ios_base::sync_with_stdio(false);
956
+ std::cin.tie(NULL);
957
+
958
+ // Allow overriding time limit via command line arg, for local testing
959
+ if (argc > 1) {
960
+ try {
961
+ ACTUAL_TIME_LIMIT_SECONDS = std::stod(argv[1]);
962
+ } catch (const std::exception& e) { /* keep default if parse fails */ }
963
+ }
964
+ ACTUAL_TIME_LIMIT_SECONDS -= TIME_LIMIT_SECONDS_SAFETY_MARGIN;
965
+ if (ACTUAL_TIME_LIMIT_SECONDS < 0.2) ACTUAL_TIME_LIMIT_SECONDS = 0.2; // Minimum sensible time limit
966
+
967
+
968
+ sa_critical_edge_indices_cache.reserve(10); // Small, for a few critical edges
969
+
970
+
971
+ int N_half; // Number of mackerels (and sardines)
972
+ std::cin >> N_half;
973
+
974
+ all_fish_structs.resize(2 * N_half);
975
+ std::vector<int> fish_indices_for_kdtree(2 * N_half);
976
+ if (2 * N_half > 0) {
977
+ std::iota(fish_indices_for_kdtree.begin(), fish_indices_for_kdtree.end(), 0);
978
+ }
979
+
980
+ // Read mackerels
981
+ for (int i = 0; i < N_half; ++i) {
982
+ std::cin >> all_fish_structs[i].p.x >> all_fish_structs[i].p.y;
983
+ all_fish_structs[i].type = 1;
984
+ }
985
+ // Read sardines
986
+ for (int i = 0; i < N_half; ++i) {
987
+ std::cin >> all_fish_structs[N_half + i].p.x >> all_fish_structs[N_half + i].p.y;
988
+ all_fish_structs[N_half + i].type = -1;
989
+ }
990
+
991
+ // Build KD-tree if there are fish
992
+ if (!all_fish_structs.empty()) {
993
+ fish_kdtree_root = build_kdtree(fish_indices_for_kdtree, 0, (int)all_fish_structs.size() - 1, 0);
994
+ }
995
+
996
+ simulated_annealing_main();
997
+
998
+ // Clean up KD-tree memory
999
+ if (fish_kdtree_root) delete_kdtree(fish_kdtree_root);
1000
+
1001
+ return 0;
1002
+ }
1003
+ # EVOLVE-BLOCK-END
benchmarks/ale_bench/ale-bench-lite-problems/ahc039/config.yaml ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ALE-Bench ahc039 — AtCoder Heuristic Contest
2
+ # Usage: skydiscover-run initial_program.cpp evaluator.py -c config.yaml -s <strategy>
3
+ language: cpp
4
+ diff_based_generation: true
5
+ max_iterations: 100
6
+ checkpoint_interval: 10
7
+ max_solution_length: 60000
8
+ llm:
9
+ api_base: https://api.openai.com/v1
10
+ models:
11
+ - name: "gpt-5"
12
+ weight: 1.0
13
+ max_tokens: 32000
14
+ timeout: 600
15
+ prompt:
16
+ system_message: "You are a world-class algorithm engineer, and you are very good at programming. Now, you are participating\
17
+ \ in a programming contest. You are asked to solve a heuristic problem, known as an NP-hard problem.\n\nStory\n--------\n\
18
+ Takahashi is a skilled purse seine fisher.\nHis fishing boat is equipped with state-of-the-art sonar, allowing him to\
19
+ \ accurately determine the positions of fish within the fishing area.\nAdditionally, the boat is capable of high-speed\
20
+ \ movement, enabling him to assume that fish remain stationary while he sets up the fishing net.\n\nThe fishing method\
21
+ \ involves using the boat to deploy nets and form a closed polygon, capturing the fish within the enclosed area.\nTo optimize\
22
+ \ efficiency, each edge of the polygon formed by the nets must be aligned either parallel to the east-west or north-south\
23
+ \ direction.\nFurthermore, due to the limited length of the nets equipped on the boat, the polygon must be constructed\
24
+ \ within these constraints.\n\nThe fishing area contains two types of fish: mackerels and sardines.\nFor resource conservation\
25
+ \ reasons, sardines are currently prohibited from being caught in this fishing area.\nAny sardines caught in the net must\
26
+ \ be released back into the sea.\nBecause this process is labor-intensive, Takahashi should focus on maximizing the catch\
27
+ \ of mackerel while avoiding sardines as much as possible.\n\n\nProblem Statement\n--------\nThere are $N$ mackerels and\
28
+ \ $N$ sardines on a two-dimensional plane.\nConstruct a polygon that satisfies the following conditions and maximize the\
29
+ \ value obtained by subtracting the total number of sardines inside the polygon from the total number of mackerels inside\
30
+ \ it.\nNote that any points lying on the edges of the polygon are considered to be inside the polygon.\n\n### Conditions\n\
31
+ 1. The number of vertices in the polygon must not exceed $1000$, and the total length of its edges must not exceed $4\
32
+ \ \\times 10^5$.\n2. The coordinates of each vertex $(x, y)$ must be integers satisfying $0 \\leq x, y \\leq 10^5$.\n\
33
+ 3. Each edge of the polygon must be parallel to either the $x$-axis or the $y$-axis.\n4. The polygon must not self-intersect:\
34
+ \ non-adjacent edges must not share any points, and adjacent edges must only meet at their endpoints.\n\n\n\nScoring\n\
35
+ --------\nLet $a$ be the total number of mackerels inside the polygon and $b$ be the total number of sardines inside the\
36
+ \ polygon.\nThen, you will obtain the score of $\\max(0, a - b + 1)$.\n\nThere are $150$ test cases, and the score of\
37
+ \ a submission is the total score for each test case.\nIf your submission produces an illegal output or exceeds the time\
38
+ \ limit for some test cases, the submission itself will be judged as <span class='label label-warning' data-toggle='tooltip'\
39
+ \ data-placement='top' title=\"Wrong Answer\">WA</span> or <span class='label label-warning' data-toggle='tooltip' data-placement='top'\
40
+ \ title=\"Time Limit Exceeded\">TLE</span> , and the score of the submission will be zero.\nThe highest score obtained\
41
+ \ during the contest will determine the final ranking, and there will be no system test after the contest.\nIf more than\
42
+ \ one participant gets the same score, they will be ranked in the same place regardless of the submission time.\n\n\n\n\
43
+ Input\n--------\nInput is given from Standard Input in the following format:\n~~~\n$N$\n$x_0$ $y_0$\n$\\vdots$\n$x_{2N-1}$\
44
+ \ $y_{2N-1}$\n~~~\n\n- In all test cases, the number of mackerels and sardines, $N$, is fixed at $5000$.\n- For each $i\
45
+ \ = 0, 1, \\dots, N-1$, $(x_i, y_i)$ represents the coordinates of the $i$-th mackerel.\n- For each $i = 0, 1, \\dots,\
46
+ \ N-1$, $(x_{N+i}, y_{N+i})$ represents the coordinates of the $i$-th sardine.\n- Each coordinate $(x_i, y_i)$ satisfies\
47
+ \ $0 \\leq x_i, y_i \\leq 10^5$, and all coordinates are distinct.\n\n\nOutput\n--------\nLet the number of vertices in\
48
+ \ the polygon be $m$ ($4 \\leq m \\leq 1000$), and let $(a_i, b_i)$ denote the coordinates of the $i$-th vertex.\nThen,\
49
+ \ output to Standard Output in the following format:\n~~~\n$m$\n$a_0$ $b_0$\n$\\vdots$\n$a_{m-1}$ $b_{m-1}$\n~~~\n\nThe\
50
+ \ output vertices do not necessarily need to form the actual corners of the polygon.\nIn other words, three consecutive\
51
+ \ vertices $(a_i, b_i), (a_{i+1}, b_{i+1}), (a_{i+2}, b_{i+2})$ may lie on a straight line.\nHowever, all vertices must\
52
+ \ have distinct coordinates.\n\nThe vertices can be output in either clockwise or counterclockwise order.\n\n<a href=\"\
53
+ https://img.atcoder.jp/ahc039/KNtTkgAy.html?lang=en&seed=0&output=sample\">Show example</a>\n\n\nYour program may output\
54
+ \ multiple solutions.\nIf multiple solutions are output, only the last one is used for scoring.\nYou can compare multiple\
55
+ \ solutions using the web version of the visualizer.\n\n\n\n\n\nInput Generation\n--------\n- $\\mathrm{rand}(L, U)$:\
56
+ \ Generates a random integer uniformly distributed between $L$ and $U$ (inclusive).\n- $\\mathrm{rand\\\\_double}(L, U)$:\
57
+ \ Generates a random real number uniformly distributed between $L$ and $U$.\n- $\\mathrm{normal}(\\mu, \\sigma)$: Generates\
58
+ \ a random real number from a normal distribution with mean $\\mu$ and standard deviation $\\sigma$.\n\nFirst, generate\
59
+ \ the coordinates of mackerels.\nThe number of clusters $n$ is determined by generating $n = \\mathrm{rand}(10, 25)$.\n\
60
+ For each cluster $i$, generate the following parameters:\n\n- Weight $w_i = \\mathrm{rand\\\\_double}(0, 1)$\n- Center\
61
+ \ $(cx_i, cy_i) = (\\mathrm{rand}(20000, 80000), \\mathrm{rand}(20000, 80000))$\n- Standard deviation $\\sigma_i = \\\
62
+ mathrm{rand}(1000, 5000)$\n\nRepeat the following process $N$ times to generate the coordinates of $N$ mackerels:\n\n\
63
+ - Randomly select a cluster $i$ with probability proportional to its weight $w_i$.\n- Generate $x = \\mathrm{round}(\\\
64
+ mathrm{normal}(cx_i, \\sigma_i))$ and $y = \\mathrm{round}(\\mathrm{normal}(cy_i, \\sigma_i))$.\n- If the generated coordinates\
65
+ \ $(x, y)$ satisfy $0 \\leq x, y \\leq 10^5$ and are distinct from all previously generated coordinates, they are accepted\
66
+ \ as the coordinates of a mackerel. Otherwise, regenerate $(x, y)$.\n\nAfter generating the coordinates of mackerels,\
67
+ \ generate the coordinates of sardines in the same way.\n\n\n\nTools (Input generator and visualizer)\n--------\n- <a\
68
+ \ href=\"https://img.atcoder.jp/ahc039/KNtTkgAy.html?lang=en\">Web version</a>: This is more powerful than the local version\
69
+ \ providing animations.\n- <a href=\"https://img.atcoder.jp/ahc039/KNtTkgAy.zip\">Local version</a>: You need a compilation\
70
+ \ environment of <a href=\"https://www.rust-lang.org/\">Rust language</a>.\n - <a href=\"https://img.atcoder.jp/ahc039/KNtTkgAy_windows.zip\"\
71
+ >Pre-compiled binary for Windows</a>: If you are not familiar with the Rust language environment, please use this instead.\n\
72
+ \nPlease be aware that sharing visualization results or discussing solutions/ideas during the contest is prohibited.\n\
73
+ \n\n Problem constraints:\n time_limit=2.0 memory_limit=1073741824\n"
74
+ evaluator:
75
+ timeout: 10000
76
+ cascade_evaluation: false
77
+
benchmarks/ale_bench/ale-bench-lite-problems/ahc039/evaluator.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ from pathlib import Path
3
+ from ale_bench.result import CaseResult, JudgeResult, Result
4
+ from ale_bench_eval.safe_ale_session import start_ale_bench_session
5
+ import logging
6
+ import sys
7
+ logger = logging.getLogger(__name__ + "_" + "ALE_BENCH_EVALUATOR")
8
+
9
+ def result_feedback(result: Result) -> CaseResult:
10
+ if result.overall_judge_result == JudgeResult.ACCEPTED:
11
+ return result.case_results[0]
12
+ else:
13
+ selected_case_idx = 0
14
+ for idx, case_result in enumerate(result.case_results):
15
+ if case_result.judge_result == result.overall_judge_result:
16
+ selected_case_idx = idx
17
+ break
18
+ return result.case_results[selected_case_idx]
19
+
20
+ def evaluate(program_path):
21
+ problem_id = "ahc039"
22
+ logger.info(f"Evaluating program {program_path} for problem {problem_id} in ale bench evaluator")
23
+ try:
24
+ session = None
25
+ logger.info("Starting ALE-Bench session")
26
+ session = start_ale_bench_session(
27
+ problem_id=problem_id,
28
+ lite_version=True,
29
+ num_workers=13,
30
+ )
31
+ logger.info("ALE-Bench session started")
32
+ if not session:
33
+ raise RuntimeError("Failed to start or restart the session.")
34
+ optim_factor = 1 if session.problem.metadata.score_type == "maximize" else -1
35
+ code = Path(program_path).read_text().replace("# EVOLVE-BLOCK-START", "").replace("# EVOLVE-BLOCK-END", "").strip()
36
+ logger.info("Code extracted")
37
+ num_public_cases = 50
38
+ cases = session.case_gen(list(range(num_public_cases)))
39
+ public_result = session.case_eval(
40
+ cases, code, code_language="cpp20", skip_local_visualization=True
41
+ )
42
+ logger.info("Public evaluation completed")
43
+ extracted_case = result_feedback(public_result)
44
+ logger.info("Result feedback completed")
45
+ logger.info("ALE-Bench session closed")
46
+ combined_score = public_result.overall_absolute_score * optim_factor / num_public_cases
47
+ if public_result.overall_judge_result != JudgeResult.ACCEPTED and optim_factor == -1:
48
+ combined_score = -sys.maxsize - 1
49
+ session.close()
50
+ return {
51
+ "judge_result": public_result.overall_judge_result.value,
52
+ "overall_score": public_result.overall_absolute_score,
53
+ "max_execution_time_sec": max([case_result.execution_time for case_result in public_result.case_results]),
54
+ "max_memory_usage_mib": max([case_result.memory_usage for case_result in public_result.case_results]) // 1024 // 1024,
55
+ "standard_error": extracted_case.error_str,
56
+ "message": extracted_case.message,
57
+ "combined_score": combined_score,
58
+ }
59
+ except Exception as e:
60
+ logger.error(f"Evaluation failed completely: {str(e)}")
61
+ logger.error(traceback.format_exc())
62
+ return {
63
+ "overall_score": 0.0,
64
+ "error": str(e),
65
+ }
benchmarks/ale_bench/ale_agent_best/ahc008.cpp ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EVOLVE-BLOCK-START
2
+ #include <iostream>
3
+ #include <vector>
4
+ #include <string>
5
+ #include <algorithm>
6
+ // #include <map>
7
+ // #include <set>
8
+ #include <queue>
9
+ #include <cmath>
10
+ #include <iomanip>
11
+ #include <limits>
12
+
13
+ // --- Constants ---
14
+ constexpr int GRID_SIZE = 30;
15
+ constexpr int NUM_TURNS = 300;
16
+ constexpr int INF = std::numeric_limits<int>::max();
17
+
18
+ struct Point {
19
+ int r, c;
20
+
21
+ bool operator==(const Point& other) const { return r == other.r && c == other.c; }
22
+ bool operator!=(const Point& other) const { return !(*this == other); }
23
+ bool operator<(const Point& other) const {
24
+ if (r != other.r) return r < other.r;
25
+ return c < other.c;
26
+ }
27
+ };
28
+ const Point INVALID_POINT = {-1, -1};
29
+
30
+
31
+ // Tunable parameters
32
+ constexpr int STAND_OUTSIDE_INNER_SAFE_PENALTY = 1000;
33
+ constexpr int ADJACENT_WALL_PRIORITY_BONUS = 0;
34
+ constexpr int NEAR_PET_PENALTY_POINTS_PER_PET = 0;
35
+ constexpr int NEAR_PET_RADIUS = 2;
36
+ constexpr int MAX_STUCK_TURNS = 10; // Slightly increased
37
+
38
+ // Directions: Up, Down, Left, Right (indices 0, 1, 2, 3)
39
+ const Point DIRS[4] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
40
+ const char DIR_CHARS_BUILD[4] = {'u', 'd', 'l', 'r'};
41
+ const char DIR_CHARS_MOVE[4] = {'U', 'D', 'L', 'R'};
42
+ const char PET_MOVE_CHARS[4] = {'U', 'D', 'L', 'R'};
43
+
44
+ struct PetInfo {
45
+ Point pos;
46
+ int type;
47
+ int id;
48
+ };
49
+
50
+ enum class HumanObjective {
51
+ BUILDING_WALLS,
52
+ GOING_TO_SAFE_SPOT,
53
+ STAYING_IN_SAFE_SPOT,
54
+ REPOSITIONING_STUCK
55
+ // FLEEING_PET_IN_PEN removed, simplified objective setting
56
+ };
57
+
58
+ struct HumanInfo {
59
+ Point pos;
60
+ int id;
61
+
62
+ int strip_r_start;
63
+ int strip_r_end;
64
+
65
+ Point inner_safe_ul;
66
+ Point inner_safe_br;
67
+ Point final_stand_pos;
68
+
69
+ std::vector<Point> assigned_wall_cells;
70
+ HumanObjective objective;
71
+ int turns_stuck_building = 0;
72
+ };
73
+
74
+ // --- Game Grid and State ---
75
+ bool is_impassable_grid_static[GRID_SIZE + 1][GRID_SIZE + 1];
76
+ std::vector<PetInfo> pets_global_state;
77
+ std::vector<HumanInfo> humans_global_state;
78
+ int N_pets_global, M_humans_global;
79
+
80
+ Point bfs_parent_grid[GRID_SIZE + 1][GRID_SIZE + 1];
81
+ bool bfs_visited_grid[GRID_SIZE + 1][GRID_SIZE + 1];
82
+
83
+
84
+ // --- Utility Functions ---
85
+ bool is_valid_coord(int val) {
86
+ return val >= 1 && val <= GRID_SIZE;
87
+ }
88
+
89
+ bool is_valid_point(Point p) {
90
+ return is_valid_coord(p.r) && is_valid_coord(p.c);
91
+ }
92
+
93
+ int manhattan_distance(Point p1, Point p2) {
94
+ if (!is_valid_point(p1) || !is_valid_point(p2)) return INF;
95
+ return std::abs(p1.r - p2.r) + std::abs(p1.c - p2.c);
96
+ }
97
+
98
+ int count_adjacent_walls_or_boundaries(Point p) {
99
+ int count = 0;
100
+ for (int i = 0; i < 4; ++i) {
101
+ Point neighbor = {p.r + DIRS[i].r, p.c + DIRS[i].c};
102
+ if (!is_valid_point(neighbor) || (is_valid_point(neighbor) && is_impassable_grid_static[neighbor.r][neighbor.c])) {
103
+ count++;
104
+ }
105
+ }
106
+ return count;
107
+ }
108
+
109
+ bool can_theoretically_build_at(Point wall_pos, int builder_human_id) {
110
+ if (!is_valid_point(wall_pos)) return false;
111
+ if (is_impassable_grid_static[wall_pos.r][wall_pos.c]) return false;
112
+
113
+ for (const auto& pet : pets_global_state) {
114
+ if (pet.pos == wall_pos) return false;
115
+ if (manhattan_distance(wall_pos, pet.pos) == 1) return false;
116
+ }
117
+
118
+ for (const auto& human : humans_global_state) {
119
+ if (human.id == builder_human_id) continue; // Builder themself can be adjacent
120
+ if (human.pos == wall_pos) return false; // Other human on the wall_pos
121
+ }
122
+ return true;
123
+ }
124
+
125
+ char get_bfs_move_char(Point start_pos, Point target_pos,
126
+ const std::vector<Point>& current_turn_tentative_walls) {
127
+ if (start_pos == target_pos) return '.';
128
+
129
+ std::queue<Point> q;
130
+ q.push(start_pos);
131
+
132
+ for(int r_bfs = 1; r_bfs <= GRID_SIZE; ++r_bfs) for(int c_bfs = 1; c_bfs <= GRID_SIZE; ++c_bfs) {
133
+ bfs_visited_grid[r_bfs][c_bfs] = false;
134
+ bfs_parent_grid[r_bfs][c_bfs] = INVALID_POINT;
135
+ }
136
+ if (!is_valid_point(start_pos)) return '.';
137
+ bfs_visited_grid[start_pos.r][start_pos.c] = true;
138
+
139
+ Point path_found_dest = INVALID_POINT;
140
+
141
+ while(!q.empty()){
142
+ Point curr = q.front();
143
+ q.pop();
144
+
145
+ for(int i_dir=0; i_dir < 4; ++i_dir){
146
+ Point next_p = {curr.r + DIRS[i_dir].r, curr.c + DIRS[i_dir].c};
147
+
148
+ if(is_valid_point(next_p) &&
149
+ !is_impassable_grid_static[next_p.r][next_p.c] &&
150
+ !bfs_visited_grid[next_p.r][next_p.c]){
151
+
152
+ bool is_tentative_wall_conflict = false;
153
+ for(const auto& tw : current_turn_tentative_walls) {
154
+ if(next_p == tw) {
155
+ is_tentative_wall_conflict = true;
156
+ break;
157
+ }
158
+ }
159
+ if(is_tentative_wall_conflict) continue;
160
+
161
+ bfs_visited_grid[next_p.r][next_p.c] = true;
162
+ bfs_parent_grid[next_p.r][next_p.c] = curr;
163
+
164
+ if (next_p == target_pos) {
165
+ path_found_dest = next_p;
166
+ goto bfs_done_label;
167
+ }
168
+ q.push(next_p);
169
+ }
170
+ }
171
+ }
172
+
173
+ bfs_done_label:;
174
+ if (path_found_dest.r == -1) return '.';
175
+
176
+ Point current_step_in_path = path_found_dest;
177
+ while(!(bfs_parent_grid[current_step_in_path.r][current_step_in_path.c] == INVALID_POINT) &&
178
+ !(bfs_parent_grid[current_step_in_path.r][current_step_in_path.c] == start_pos)) {
179
+ current_step_in_path = bfs_parent_grid[current_step_in_path.r][current_step_in_path.c];
180
+ }
181
+
182
+ for(int i_dir = 0; i_dir < 4; ++i_dir){
183
+ if(start_pos.r + DIRS[i_dir].r == current_step_in_path.r &&
184
+ start_pos.c + DIRS[i_dir].c == current_step_in_path.c){
185
+ return DIR_CHARS_MOVE[i_dir];
186
+ }
187
+ }
188
+ return '.';
189
+ }
190
+
191
+
192
+ void initialize_game() {
193
+ std::cin >> N_pets_global;
194
+ pets_global_state.resize(N_pets_global);
195
+ for (int i = 0; i < N_pets_global; ++i) {
196
+ pets_global_state[i].id = i;
197
+ std::cin >> pets_global_state[i].pos.r >> pets_global_state[i].pos.c >> pets_global_state[i].type;
198
+ }
199
+
200
+ std::cin >> M_humans_global;
201
+ humans_global_state.resize(M_humans_global);
202
+
203
+ for(int r_grid=0; r_grid <= GRID_SIZE; ++r_grid) for(int c_grid=0; c_grid <= GRID_SIZE; ++c_grid) is_impassable_grid_static[r_grid][c_grid] = false;
204
+
205
+ int base_strip_height = GRID_SIZE / M_humans_global;
206
+ int remainder_heights = GRID_SIZE % M_humans_global;
207
+ int current_r_start_coord = 1;
208
+
209
+ for (int i = 0; i < M_humans_global; ++i) {
210
+ HumanInfo& human = humans_global_state[i];
211
+ human.id = i;
212
+ std::cin >> human.pos.r >> human.pos.c;
213
+
214
+ int strip_h_for_this_human = base_strip_height + (i < remainder_heights ? 1 : 0);
215
+ human.strip_r_start = current_r_start_coord;
216
+ human.strip_r_end = human.strip_r_start + strip_h_for_this_human - 1;
217
+ human.strip_r_end = std::min(human.strip_r_end, GRID_SIZE);
218
+
219
+ int actual_strip_h = human.strip_r_end - human.strip_r_start + 1;
220
+ int actual_strip_w = GRID_SIZE;
221
+
222
+ human.inner_safe_ul.r = human.strip_r_start + (actual_strip_h >= 3 ? 1 : 0);
223
+ human.inner_safe_ul.c = 1 + (actual_strip_w >= 3 ? 1 : 0);
224
+ human.inner_safe_br.r = human.strip_r_end - (actual_strip_h >= 3 ? 1 : 0);
225
+ human.inner_safe_br.c = GRID_SIZE - (actual_strip_w >= 3 ? 1 : 0);
226
+
227
+ if (human.inner_safe_ul.r > human.inner_safe_br.r) human.inner_safe_br.r = human.inner_safe_ul.r;
228
+ if (human.inner_safe_ul.c > human.inner_safe_br.c) human.inner_safe_br.c = human.inner_safe_ul.c;
229
+
230
+ human.final_stand_pos = {
231
+ human.inner_safe_ul.r + (human.inner_safe_br.r - human.inner_safe_ul.r) / 2,
232
+ human.inner_safe_ul.c + (human.inner_safe_br.c - human.inner_safe_ul.c) / 2
233
+ };
234
+ human.final_stand_pos.r = std::max(human.inner_safe_ul.r, std::min(human.inner_safe_br.r, human.final_stand_pos.r));
235
+ human.final_stand_pos.c = std::max(human.inner_safe_ul.c, std::min(human.inner_safe_br.c, human.final_stand_pos.c));
236
+ if (!is_valid_point(human.final_stand_pos)) {
237
+ human.final_stand_pos = {human.strip_r_start, 1};
238
+ }
239
+
240
+ human.assigned_wall_cells.clear();
241
+ int r_s = human.strip_r_start;
242
+ int r_e = human.strip_r_end;
243
+
244
+ if (i == 0) {
245
+ for (int c_coord = 1; c_coord <= GRID_SIZE; ++c_coord) human.assigned_wall_cells.push_back({r_s, c_coord});
246
+ } else {
247
+ for (int c_coord = GRID_SIZE / 2 + 1; c_coord <= GRID_SIZE; ++c_coord) human.assigned_wall_cells.push_back({r_s, c_coord});
248
+ }
249
+ if (i == M_humans_global - 1) {
250
+ for (int c_coord = 1; c_coord <= GRID_SIZE; ++c_coord) human.assigned_wall_cells.push_back({r_e, c_coord});
251
+ } else {
252
+ for (int c_coord = 1; c_coord <= GRID_SIZE / 2; ++c_coord) human.assigned_wall_cells.push_back({r_e, c_coord});
253
+ }
254
+ for (int r_mid = r_s + 1; r_mid <= r_e - 1; ++r_mid) {
255
+ human.assigned_wall_cells.push_back({r_mid, 1});
256
+ human.assigned_wall_cells.push_back({r_mid, GRID_SIZE});
257
+ }
258
+
259
+ std::sort(human.assigned_wall_cells.begin(), human.assigned_wall_cells.end());
260
+ human.assigned_wall_cells.erase(
261
+ std::unique(human.assigned_wall_cells.begin(), human.assigned_wall_cells.end()),
262
+ human.assigned_wall_cells.end()
263
+ );
264
+ current_r_start_coord = human.strip_r_end + 1;
265
+ }
266
+ }
267
+
268
+ std::string decide_human_actions() {
269
+ std::string actions_str(M_humans_global, '.');
270
+ std::vector<Point> tentative_walls_this_turn;
271
+ std::vector<Point> tentative_move_targets_this_turn(M_humans_global, INVALID_POINT);
272
+
273
+ for (int i = 0; i < M_humans_global; ++i) {
274
+ HumanInfo& human = humans_global_state[i];
275
+
276
+ int unbuilt_walls_count = 0;
277
+ for (const auto& wall_cell : human.assigned_wall_cells) {
278
+ if (is_valid_point(wall_cell) && !is_impassable_grid_static[wall_cell.r][wall_cell.c]) {
279
+ unbuilt_walls_count++;
280
+ }
281
+ }
282
+
283
+ if (unbuilt_walls_count == 0) {
284
+ human.objective = (human.pos == human.final_stand_pos) ?
285
+ HumanObjective::STAYING_IN_SAFE_SPOT :
286
+ HumanObjective::GOING_TO_SAFE_SPOT;
287
+ } else {
288
+ human.objective = HumanObjective::BUILDING_WALLS;
289
+ }
290
+
291
+ if(human.objective == HumanObjective::BUILDING_WALLS && human.turns_stuck_building >= MAX_STUCK_TURNS) {
292
+ human.objective = HumanObjective::REPOSITIONING_STUCK;
293
+ }
294
+
295
+ char chosen_action_for_human_i = '.';
296
+ if (human.objective == HumanObjective::STAYING_IN_SAFE_SPOT) {
297
+ chosen_action_for_human_i = '.';
298
+ } else if (human.objective == HumanObjective::GOING_TO_SAFE_SPOT ||
299
+ human.objective == HumanObjective::REPOSITIONING_STUCK) {
300
+ if(human.objective == HumanObjective::REPOSITIONING_STUCK) human.turns_stuck_building = 0;
301
+
302
+ chosen_action_for_human_i = get_bfs_move_char(human.pos, human.final_stand_pos, tentative_walls_this_turn);
303
+
304
+ } else if (human.objective == HumanObjective::BUILDING_WALLS) {
305
+ Point best_wall_target = INVALID_POINT;
306
+ Point best_stand_point = INVALID_POINT;
307
+ int min_eval_score = INF;
308
+
309
+ for (const auto& wall_coord : human.assigned_wall_cells) {
310
+ if (!is_valid_point(wall_coord) || is_impassable_grid_static[wall_coord.r][wall_coord.c]) continue;
311
+ if (!can_theoretically_build_at(wall_coord, human.id)) continue;
312
+
313
+ int adj_wall_bonus_val = count_adjacent_walls_or_boundaries(wall_coord) * ADJACENT_WALL_PRIORITY_BONUS;
314
+ int current_near_pet_penalty = 0; // NEAR_PET_PENALTY_POINTS_PER_PET is 0
315
+
316
+ for (int k_dir_idx = 0; k_dir_idx < 4; ++k_dir_idx) {
317
+ Point potential_stand_pos = {wall_coord.r + DIRS[k_dir_idx].r,
318
+ wall_coord.c + DIRS[k_dir_idx].c};
319
+
320
+ if (!is_valid_point(potential_stand_pos) || is_impassable_grid_static[potential_stand_pos.r][potential_stand_pos.c]) continue;
321
+
322
+ bool conflict_with_tentative_wall_build_spot = false;
323
+ for(const auto& tw : tentative_walls_this_turn) { if(potential_stand_pos == tw) { conflict_with_tentative_wall_build_spot = true; break; }}
324
+ if(conflict_with_tentative_wall_build_spot) continue;
325
+
326
+ bool conflict_with_tentative_move_dest = false;
327
+ for(int j=0; j < i; ++j) {
328
+ if (tentative_move_targets_this_turn[j] == potential_stand_pos) { conflict_with_tentative_move_dest = true; break; }
329
+ }
330
+ if (conflict_with_tentative_move_dest) continue;
331
+
332
+ int current_dist_to_stand = manhattan_distance(human.pos, potential_stand_pos);
333
+ int current_eval_score = current_dist_to_stand - adj_wall_bonus_val + current_near_pet_penalty;
334
+
335
+ bool is_inside_inner_safe_region =
336
+ (potential_stand_pos.r >= human.inner_safe_ul.r &&
337
+ potential_stand_pos.r <= human.inner_safe_br.r &&
338
+ potential_stand_pos.c >= human.inner_safe_ul.c &&
339
+ potential_stand_pos.c <= human.inner_safe_br.c);
340
+
341
+ if (!is_inside_inner_safe_region) {
342
+ current_eval_score += STAND_OUTSIDE_INNER_SAFE_PENALTY;
343
+ }
344
+
345
+ if (current_eval_score < min_eval_score) {
346
+ min_eval_score = current_eval_score;
347
+ best_wall_target = wall_coord;
348
+ best_stand_point = potential_stand_pos;
349
+ } else if (current_eval_score == min_eval_score) {
350
+ if (best_wall_target.r == -1 ||
351
+ wall_coord < best_wall_target ||
352
+ (wall_coord == best_wall_target && potential_stand_pos < best_stand_point)) {
353
+ best_wall_target = wall_coord;
354
+ best_stand_point = potential_stand_pos;
355
+ }
356
+ }
357
+ }
358
+ }
359
+
360
+ if (best_wall_target.r != -1) {
361
+ human.turns_stuck_building = 0;
362
+ if (human.pos == best_stand_point) {
363
+ for(int k_dir=0; k_dir<4; ++k_dir){
364
+ if(human.pos.r + DIRS[k_dir].r == best_wall_target.r &&
365
+ human.pos.c + DIRS[k_dir].c == best_wall_target.c){
366
+ chosen_action_for_human_i = DIR_CHARS_BUILD[k_dir];
367
+ break;
368
+ }
369
+ }
370
+ } else {
371
+ chosen_action_for_human_i = get_bfs_move_char(human.pos, best_stand_point, tentative_walls_this_turn);
372
+ }
373
+ } else {
374
+ if (unbuilt_walls_count > 0) {
375
+ human.turns_stuck_building++;
376
+ }
377
+ if (human.pos != human.final_stand_pos) {
378
+ chosen_action_for_human_i = get_bfs_move_char(human.pos, human.final_stand_pos, tentative_walls_this_turn);
379
+ } else {
380
+ chosen_action_for_human_i = '.';
381
+ }
382
+ }
383
+ }
384
+
385
+ actions_str[i] = chosen_action_for_human_i;
386
+
387
+ if (chosen_action_for_human_i != '.' && (chosen_action_for_human_i == 'u' || chosen_action_for_human_i == 'd' || chosen_action_for_human_i == 'l' || chosen_action_for_human_i == 'r')) {
388
+ for(int k_dir=0; k_dir<4; ++k_dir) {
389
+ if (chosen_action_for_human_i == DIR_CHARS_BUILD[k_dir]) {
390
+ Point built_wall_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
391
+ if (is_valid_point(built_wall_pos)) {
392
+ tentative_walls_this_turn.push_back(built_wall_pos);
393
+ }
394
+ break;
395
+ }
396
+ }
397
+ } else if (chosen_action_for_human_i != '.' && (chosen_action_for_human_i == 'U' || chosen_action_for_human_i == 'D' || chosen_action_for_human_i == 'L' || chosen_action_for_human_i == 'R')) {
398
+ for(int k_dir=0; k_dir<4; ++k_dir) {
399
+ if (chosen_action_for_human_i == DIR_CHARS_MOVE[k_dir]) {
400
+ Point target_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
401
+ if (is_valid_point(target_pos)) {
402
+ tentative_move_targets_this_turn[i] = target_pos;
403
+ } else {
404
+ actions_str[i] = '.';
405
+ }
406
+ break;
407
+ }
408
+ }
409
+ }
410
+ }
411
+
412
+ for (int i = 0; i < M_humans_global; ++i) {
413
+ if (actions_str[i] != '.' && (actions_str[i] == 'U' || actions_str[i] == 'D' || actions_str[i] == 'L' || actions_str[i] == 'R')) {
414
+ Point target_move_sq = tentative_move_targets_this_turn[i];
415
+ if (target_move_sq.r == -1) {
416
+ actions_str[i] = '.';
417
+ continue;
418
+ }
419
+
420
+ bool conflict_with_wall = false;
421
+ for (const auto& wall_being_built : tentative_walls_this_turn) {
422
+ if (target_move_sq == wall_being_built) {
423
+ conflict_with_wall = true;
424
+ break;
425
+ }
426
+ }
427
+ if (conflict_with_wall) {
428
+ actions_str[i] = '.';
429
+ } else {
430
+ for (int j = 0; j < i; ++j) {
431
+ if (actions_str[j] != '.' && (actions_str[j] == 'U' || actions_str[j] == 'D' || actions_str[j] == 'L' || actions_str[j] == 'R') &&
432
+ tentative_move_targets_this_turn[j] == target_move_sq) {
433
+ actions_str[i] = '.';
434
+ break;
435
+ }
436
+ }
437
+ }
438
+ }
439
+ }
440
+ return actions_str;
441
+ }
442
+
443
+ void apply_actions_and_update_state(const std::string& actions_str_final) {
444
+ for (int i = 0; i < M_humans_global; ++i) {
445
+ char action = actions_str_final[i];
446
+ HumanInfo& human = humans_global_state[i];
447
+ if (action != '.' && (action == 'u' || action == 'd' || action == 'l' || action == 'r')) {
448
+ for(int k_dir=0; k_dir<4; ++k_dir){
449
+ if (action == DIR_CHARS_BUILD[k_dir]) {
450
+ Point wall_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
451
+ if (is_valid_point(wall_pos) && !is_impassable_grid_static[wall_pos.r][wall_pos.c]) {
452
+ is_impassable_grid_static[wall_pos.r][wall_pos.c] = true;
453
+ }
454
+ break;
455
+ }
456
+ }
457
+ }
458
+ }
459
+
460
+ for (int i = 0; i < M_humans_global; ++i) {
461
+ char action = actions_str_final[i];
462
+ HumanInfo& human = humans_global_state[i];
463
+ if (action != '.' && (action == 'U' || action == 'D' || action == 'L' || action == 'R')) {
464
+ for(int k_dir=0; k_dir<4; ++k_dir){
465
+ if (action == DIR_CHARS_MOVE[k_dir]) {
466
+ Point next_pos = {human.pos.r + DIRS[k_dir].r, human.pos.c + DIRS[k_dir].c};
467
+ if (is_valid_point(next_pos) && !is_impassable_grid_static[next_pos.r][next_pos.c]) {
468
+ human.pos = next_pos;
469
+ }
470
+ break;
471
+ }
472
+ }
473
+ }
474
+ }
475
+
476
+ for (int i = 0; i < N_pets_global; ++i) {
477
+ std::string pet_moves_str;
478
+ std::cin >> pet_moves_str;
479
+ if (pet_moves_str == ".") continue;
480
+
481
+ for (char move_char : pet_moves_str) {
482
+ for(int k_dir=0; k_dir<4; ++k_dir){
483
+ if(move_char == PET_MOVE_CHARS[k_dir]){
484
+ pets_global_state[i].pos.r += DIRS[k_dir].r;
485
+ pets_global_state[i].pos.c += DIRS[k_dir].c;
486
+ break;
487
+ }
488
+ }
489
+ }
490
+ }
491
+ }
492
+
493
+ int main() {
494
+ std::ios_base::sync_with_stdio(false);
495
+ std::cin.tie(NULL);
496
+
497
+ initialize_game();
498
+
499
+ for (int turn_idx = 0; turn_idx < NUM_TURNS; ++turn_idx) {
500
+ std::string actions_to_perform = decide_human_actions();
501
+ std::cout << actions_to_perform << std::endl;
502
+
503
+ apply_actions_and_update_state(actions_to_perform);
504
+ }
505
+
506
+ return 0;
507
+ }
508
+ # EVOLVE-BLOCK-END
benchmarks/ale_bench/ale_agent_best/ahc011.cpp ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EVOLVE-BLOCK-START
2
+ #include <iostream>
3
+ #include <vector>
4
+ #include <string>
5
+ #include <array>
6
+ #include <algorithm>
7
+ #include <unordered_map>
8
+ #include <map> // For A* visited set
9
+ #include <iomanip>
10
+ #include <chrono>
11
+ #include <functional> // For std::hash
12
+ #include <cmath> // For std::round
13
+ #include <random> // For std::mt19937
14
+ #include <numeric> // For std::iota
15
+ #include <queue> // For A* search (priority_queue)
16
+
17
+ // Constants for tile connections
18
+ const int LEFT_MASK = 1;
19
+ const int UP_MASK = 2;
20
+ const int RIGHT_MASK = 4;
21
+ const int DOWN_MASK = 8;
22
+
23
+ // Max N value, actual N read from input
24
+ const int N_MAX_CONST = 10;
25
+ int N_actual; // Actual N for the current test case
26
+ int T_param; // Actual T for the current test case
27
+
28
+ const int DR_TILE_RELATIVE_TO_EMPTY[] = {-1, 1, 0, 0};
29
+ const int DC_TILE_RELATIVE_TO_EMPTY[] = {0, 0, -1, 1};
30
+ const char MOVE_CHARS[] = {'U', 'D', 'L', 'R'};
31
+
32
+
33
+ std::mt19937 zobrist_rng_engine(123456789);
34
+ std::uniform_int_distribution<uint64_t> distrib_uint64;
35
+ uint64_t zobrist_tile_keys[N_MAX_CONST][N_MAX_CONST][16];
36
+
37
+
38
+ void init_zobrist_keys() {
39
+ for (int i = 0; i < N_actual; ++i) {
40
+ for (int j = 0; j < N_actual; ++j) {
41
+ for (int k = 0; k < 16; ++k) {
42
+ zobrist_tile_keys[i][j][k] = distrib_uint64(zobrist_rng_engine);
43
+ }
44
+ }
45
+ }
46
+ }
47
+
48
+ int hex_char_to_int(char c) {
49
+ if (c >= '0' && c <= '9') return c - '0';
50
+ return c - 'a' + 10;
51
+ }
52
+
53
+
54
+ struct Board {
55
+ std::array<std::array<char, N_MAX_CONST>, N_MAX_CONST> tiles;
56
+ int empty_r, empty_c;
57
+ uint64_t zobrist_hash_value;
58
+
59
+ Board() : empty_r(0), empty_c(0), zobrist_hash_value(0) {}
60
+
61
+ void calculate_initial_hash() {
62
+ zobrist_hash_value = 0;
63
+ for (int i = 0; i < N_actual; ++i) {
64
+ for (int j = 0; j < N_actual; ++j) {
65
+ zobrist_hash_value ^= zobrist_tile_keys[i][j][hex_char_to_int(tiles[i][j])];
66
+ }
67
+ }
68
+ }
69
+
70
+ void update_hash_after_move(int pos_tile_becomes_empty_r, int pos_tile_becomes_empty_c,
71
+ int pos_empty_gets_tile_r, int pos_empty_gets_tile_c) {
72
+ int moved_tile_val_int = hex_char_to_int(tiles[pos_empty_gets_tile_r][pos_empty_gets_tile_c]);
73
+
74
+ zobrist_hash_value ^= zobrist_tile_keys[pos_tile_becomes_empty_r][pos_tile_becomes_empty_c][moved_tile_val_int];
75
+ zobrist_hash_value ^= zobrist_tile_keys[pos_empty_gets_tile_r][pos_empty_gets_tile_c][0];
76
+
77
+ zobrist_hash_value ^= zobrist_tile_keys[pos_tile_becomes_empty_r][pos_tile_becomes_empty_c][0];
78
+ zobrist_hash_value ^= zobrist_tile_keys[pos_empty_gets_tile_r][pos_empty_gets_tile_c][moved_tile_val_int];
79
+ }
80
+
81
+ bool apply_move_char(char move_char) {
82
+ int move_dir_idx = -1;
83
+ for(int i=0; i<4; ++i) if(MOVE_CHARS[i] == move_char) move_dir_idx = i;
84
+
85
+ if(move_dir_idx == -1) return false;
86
+
87
+ int tile_to_move_r = empty_r + DR_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
88
+ int tile_to_move_c = empty_c + DC_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
89
+
90
+ if (tile_to_move_r < 0 || tile_to_move_r >= N_actual || tile_to_move_c < 0 || tile_to_move_c >= N_actual) {
91
+ return false;
92
+ }
93
+
94
+ char moved_tile_hex_val = tiles[tile_to_move_r][tile_to_move_c];
95
+ tiles[empty_r][empty_c] = moved_tile_hex_val;
96
+ tiles[tile_to_move_r][tile_to_move_c] = '0';
97
+
98
+ update_hash_after_move(tile_to_move_r, tile_to_move_c, empty_r, empty_c);
99
+
100
+ empty_r = tile_to_move_r;
101
+ empty_c = tile_to_move_c;
102
+ return true;
103
+ }
104
+ };
105
+
106
+
107
+ struct ScoreComponents {
108
+ int max_tree_size;
109
+ int num_components;
110
+ };
111
+ std::unordered_map<uint64_t, ScoreComponents> s_value_cache_by_hash;
112
+ const size_t MAX_SCORE_CACHE_SIZE_CONST = 2000000;
113
+
114
+ struct DSU {
115
+ std::vector<int> parent;
116
+ std::vector<int> nodes_in_set;
117
+ std::vector<int> edges_in_set;
118
+ int N_sq_total_cells;
119
+
120
+ DSU(int current_N) : N_sq_total_cells(current_N * current_N) {
121
+ parent.resize(N_sq_total_cells);
122
+ std::iota(parent.begin(), parent.end(), 0);
123
+ nodes_in_set.assign(N_sq_total_cells, 0);
124
+ edges_in_set.assign(N_sq_total_cells, 0);
125
+ }
126
+
127
+ int find(int i) {
128
+ if (parent[i] == i)
129
+ return i;
130
+ return parent[i] = find(parent[i]);
131
+ }
132
+
133
+ void unite(int i_idx, int j_idx) {
134
+ int root_i = find(i_idx);
135
+ int root_j = find(j_idx);
136
+
137
+ if (nodes_in_set[root_i] < nodes_in_set[root_j]) std::swap(root_i, root_j);
138
+
139
+ parent[root_j] = root_i;
140
+ nodes_in_set[root_i] += nodes_in_set[root_j];
141
+ edges_in_set[root_i] += edges_in_set[root_j];
142
+ }
143
+
144
+ void add_edge(int u_idx, int v_idx) {
145
+ int root_u = find(u_idx);
146
+ int root_v = find(v_idx);
147
+ if (root_u != root_v) {
148
+ unite(u_idx, v_idx);
149
+ edges_in_set[find(u_idx)]++;
150
+ } else {
151
+ edges_in_set[root_u]++;
152
+ }
153
+ }
154
+ };
155
+
156
+
157
+ ScoreComponents calculate_scores(const Board& board) {
158
+ auto it_cache = s_value_cache_by_hash.find(board.zobrist_hash_value);
159
+ if (it_cache != s_value_cache_by_hash.end()) {
160
+ return it_cache->second;
161
+ }
162
+
163
+ DSU dsu(N_actual);
164
+
165
+ for (int r = 0; r < N_actual; ++r) {
166
+ for (int c = 0; c < N_actual; ++c) {
167
+ int cell_idx = r * N_actual + c;
168
+ if (board.tiles[r][c] != '0') {
169
+ dsu.nodes_in_set[cell_idx] = 1;
170
+ } else {
171
+ dsu.nodes_in_set[cell_idx] = 0;
172
+ }
173
+ }
174
+ }
175
+
176
+ for (int r = 0; r < N_actual; ++r) {
177
+ for (int c = 0; c < N_actual - 1; ++c) {
178
+ int tile1_val = hex_char_to_int(board.tiles[r][c]);
179
+ int tile2_val = hex_char_to_int(board.tiles[r][c+1]);
180
+ if (tile1_val != 0 && tile2_val != 0) {
181
+ if ((tile1_val & RIGHT_MASK) && (tile2_val & LEFT_MASK)) {
182
+ dsu.add_edge(r * N_actual + c, r * N_actual + (c + 1));
183
+ }
184
+ }
185
+ }
186
+ }
187
+ for (int r = 0; r < N_actual - 1; ++r) {
188
+ for (int c = 0; c < N_actual; ++c) {
189
+ int tile1_val = hex_char_to_int(board.tiles[r][c]);
190
+ int tile2_val = hex_char_to_int(board.tiles[r+1][c]);
191
+ if (tile1_val != 0 && tile2_val != 0) {
192
+ if ((tile1_val & DOWN_MASK) && (tile2_val & UP_MASK)) {
193
+ dsu.add_edge(r * N_actual + c, (r + 1) * N_actual + c);
194
+ }
195
+ }
196
+ }
197
+ }
198
+
199
+ int max_tree_size = 0;
200
+ int total_num_components = 0;
201
+
202
+ for (int i = 0; i < dsu.N_sq_total_cells; ++i) {
203
+ if (dsu.parent[i] == i && dsu.nodes_in_set[i] > 0) {
204
+ total_num_components++;
205
+ if (dsu.edges_in_set[i] == dsu.nodes_in_set[i] - 1) {
206
+ if (dsu.nodes_in_set[i] > max_tree_size) {
207
+ max_tree_size = dsu.nodes_in_set[i];
208
+ }
209
+ }
210
+ }
211
+ }
212
+
213
+ ScoreComponents result = {max_tree_size, total_num_components};
214
+ if (s_value_cache_by_hash.size() < MAX_SCORE_CACHE_SIZE_CONST) {
215
+ s_value_cache_by_hash[board.zobrist_hash_value] = result;
216
+ }
217
+ return result;
218
+ }
219
+
220
+
221
+ int TARGET_EMPTY_R_GLOBAL_FOR_A_STAR, TARGET_EMPTY_C_GLOBAL_FOR_A_STAR; // Used by A* heuristic
222
+ bool A_STAR_PHASE_WAS_RUN = false; // Flag to adjust beam score empty penalty
223
+
224
+ double calculate_beam_score(const ScoreComponents& scores, int K_total, const Board& current_board_state) {
225
+ int S = scores.max_tree_size;
226
+
227
+ const double FULL_TREE_BASE_SCORE = 1e18;
228
+ if (S == N_actual * N_actual - 1) {
229
+ return FULL_TREE_BASE_SCORE + (double)(T_param * 2 - K_total);
230
+ }
231
+
232
+ double W_S = 1e9;
233
+ double W_NC = W_S * 0.8; // Make W_NC very strong, almost as much as increasing S by 1.
234
+ double W_K = 1.0;
235
+ double W_empty_dist_penalty_main;
236
+
237
+ if (A_STAR_PHASE_WAS_RUN) { // A* moved empty to target initially
238
+ W_empty_dist_penalty_main = W_K * 0.5; // Very low penalty, allow free movement
239
+ } else { // Empty started at target, or A* failed (should not happen)
240
+ W_empty_dist_penalty_main = W_K * 10.0; // Moderate penalty
241
+ }
242
+
243
+ double score_val = (double)S * W_S;
244
+ if (scores.num_components > 1) {
245
+ score_val -= (double)(scores.num_components - 1) * W_NC;
246
+ } else if (scores.num_components == 0 && N_actual * N_actual - 1 > 0) {
247
+ score_val -= (double)(N_actual * N_actual -1) * W_NC;
248
+ }
249
+
250
+ // Bonus for being very close to a full tree and connected
251
+ if (S >= (N_actual * N_actual - 1) - 2 && scores.num_components == 1 && S < N_actual * N_actual - 1) {
252
+ score_val += W_S * 0.5; // Significant bonus to encourage the last step
253
+ }
254
+
255
+ score_val -= (double)K_total * W_K;
256
+
257
+ // Penalty for empty square relative to (N-1,N-1)
258
+ int dist_empty_to_corner = std::abs(current_board_state.empty_r - (N_actual - 1)) +
259
+ std::abs(current_board_state.empty_c - (N_actual - 1));
260
+ score_val -= dist_empty_to_corner * W_empty_dist_penalty_main;
261
+
262
+ return score_val;
263
+ }
264
+
265
+ double calculate_actual_score(int S, int K_total) {
266
+ if (N_actual * N_actual - 1 == 0) return 0;
267
+ if (S == N_actual * N_actual - 1) {
268
+ if (K_total > T_param) return 0;
269
+ return std::round(500000.0 * (2.0 - (double)K_total / T_param));
270
+ } else {
271
+ return std::round(500000.0 * (double)S / (N_actual * N_actual - 1.0));
272
+ }
273
+ }
274
+
275
+ struct BeamHistoryEntry {
276
+ int parent_history_idx;
277
+ char move_char_taken;
278
+ };
279
+ std::vector<BeamHistoryEntry> beam_history_storage;
280
+ const size_t MAX_BEAM_HISTORY_STORAGE_SIZE_CONST = 3000000;
281
+
282
+ struct BeamState {
283
+ Board board;
284
+ double beam_score_val;
285
+ int k_beam_moves;
286
+ int history_idx;
287
+ int prev_move_direction_idx;
288
+
289
+ bool operator<(const BeamState& other) const {
290
+ return beam_score_val > other.beam_score_val;
291
+ }
292
+ };
293
+
294
+ std::chrono::steady_clock::time_point T_START_CHRONO_MAIN;
295
+ const int TIME_LIMIT_MS_SLACK_CONST = 400; // Universal slack
296
+ long long TIME_LIMIT_MS_EFFECTIVE_MAIN;
297
+
298
+
299
+ std::mt19937 rng_stochastic_selection_main;
300
+ std::unordered_map<uint64_t, int> min_K_to_reach_by_hash_main;
301
+ const size_t MAX_MIN_K_CACHE_SIZE_CONST = 2000000;
302
+
303
+
304
+ struct AStarEmptyState {
305
+ int r, c;
306
+ int g_cost;
307
+ std::string path;
308
+
309
+ bool operator>(const AStarEmptyState& other) const {
310
+ int h_cost_this = std::abs(r - TARGET_EMPTY_R_GLOBAL_FOR_A_STAR) + std::abs(c - TARGET_EMPTY_C_GLOBAL_FOR_A_STAR);
311
+ int h_cost_other = std::abs(other.r - TARGET_EMPTY_R_GLOBAL_FOR_A_STAR) + std::abs(other.c - TARGET_EMPTY_C_GLOBAL_FOR_A_STAR);
312
+ if (g_cost + h_cost_this != other.g_cost + h_cost_other) {
313
+ return g_cost + h_cost_this > other.g_cost + h_cost_other;
314
+ }
315
+ return g_cost > other.g_cost;
316
+ }
317
+ };
318
+
319
+ std::string find_path_for_empty(const Board& initial_board_state_for_A_star, int target_r, int target_c) {
320
+ TARGET_EMPTY_R_GLOBAL_FOR_A_STAR = target_r;
321
+ TARGET_EMPTY_C_GLOBAL_FOR_A_STAR = target_c;
322
+
323
+ std::priority_queue<AStarEmptyState, std::vector<AStarEmptyState>, std::greater<AStarEmptyState>> pq;
324
+ std::vector<std::vector<int>> min_g_cost_grid(N_actual, std::vector<int>(N_actual, T_param + 1));
325
+
326
+ pq.push({initial_board_state_for_A_star.empty_r, initial_board_state_for_A_star.empty_c, 0, ""});
327
+ min_g_cost_grid[initial_board_state_for_A_star.empty_r][initial_board_state_for_A_star.empty_c] = 0;
328
+
329
+ int A_star_max_depth = N_actual * N_actual * 2; // Allow more depth just in case
330
+
331
+ while(!pq.empty()){
332
+ AStarEmptyState current = pq.top();
333
+ pq.pop();
334
+
335
+ if (current.g_cost > min_g_cost_grid[current.r][current.c]) {
336
+ continue;
337
+ }
338
+
339
+ if (current.r == target_r && current.c == target_c) {
340
+ return current.path;
341
+ }
342
+
343
+ if (current.g_cost >= A_star_max_depth) continue;
344
+
345
+ for (int move_idx = 0; move_idx < 4; ++move_idx) {
346
+ int tile_that_moves_r = current.r + DR_TILE_RELATIVE_TO_EMPTY[move_idx];
347
+ int tile_that_moves_c = current.c + DC_TILE_RELATIVE_TO_EMPTY[move_idx];
348
+
349
+ if (tile_that_moves_r < 0 || tile_that_moves_r >= N_actual || tile_that_moves_c < 0 || tile_that_moves_c >= N_actual) {
350
+ continue;
351
+ }
352
+
353
+ int next_empty_r = tile_that_moves_r;
354
+ int next_empty_c = tile_that_moves_c;
355
+
356
+ int next_g_cost = current.g_cost + 1;
357
+
358
+ if (min_g_cost_grid[next_empty_r][next_empty_c] <= next_g_cost) {
359
+ continue;
360
+ }
361
+ min_g_cost_grid[next_empty_r][next_empty_c] = next_g_cost;
362
+ pq.push({next_empty_r, next_empty_c, next_g_cost, current.path + MOVE_CHARS[move_idx]});
363
+ }
364
+ }
365
+ return "";
366
+ }
367
+
368
+ std::string reconstruct_beam_path(int final_history_idx) {
369
+ std::string path_str = "";
370
+ int current_trace_hist_idx = final_history_idx;
371
+ while(current_trace_hist_idx > 0 &&
372
+ static_cast<size_t>(current_trace_hist_idx) < beam_history_storage.size() &&
373
+ beam_history_storage[current_trace_hist_idx].parent_history_idx != -1) {
374
+ path_str += beam_history_storage[current_trace_hist_idx].move_char_taken;
375
+ current_trace_hist_idx = beam_history_storage[current_trace_hist_idx].parent_history_idx;
376
+ }
377
+ std::reverse(path_str.begin(), path_str.end());
378
+ return path_str;
379
+ }
380
+
381
+
382
+ int main(int /*argc*/, char** /*argv*/) {
383
+ std::ios_base::sync_with_stdio(false);
384
+ std::cin.tie(NULL);
385
+
386
+ unsigned int random_seed_stochastic = std::chrono::steady_clock::now().time_since_epoch().count();
387
+ rng_stochastic_selection_main.seed(random_seed_stochastic);
388
+
389
+ T_START_CHRONO_MAIN = std::chrono::steady_clock::now();
390
+
391
+ std::cin >> N_actual >> T_param;
392
+
393
+ init_zobrist_keys();
394
+
395
+ Board current_board_obj;
396
+ for (int i = 0; i < N_actual; ++i) {
397
+ std::string row_str;
398
+ std::cin >> row_str;
399
+ for (int j = 0; j < N_actual; ++j) {
400
+ current_board_obj.tiles[i][j] = row_str[j];
401
+ if (current_board_obj.tiles[i][j] == '0') {
402
+ current_board_obj.empty_r = i;
403
+ current_board_obj.empty_c = j;
404
+ }
405
+ }
406
+ }
407
+ current_board_obj.calculate_initial_hash();
408
+
409
+ std::string initial_empty_moves_path = "";
410
+ int target_empty_final_r = N_actual - 1;
411
+ int target_empty_final_c = N_actual - 1;
412
+
413
+ if (current_board_obj.empty_r != target_empty_final_r || current_board_obj.empty_c != target_empty_final_c) {
414
+ initial_empty_moves_path = find_path_for_empty(current_board_obj, target_empty_final_r, target_empty_final_c);
415
+ A_STAR_PHASE_WAS_RUN = !initial_empty_moves_path.empty();
416
+ }
417
+
418
+ for (char move_char : initial_empty_moves_path) {
419
+ current_board_obj.apply_move_char(move_char);
420
+ }
421
+ int K_initial_empty_moves = initial_empty_moves_path.length();
422
+
423
+ // Adaptive time limit after A*
424
+ auto time_after_astar = std::chrono::steady_clock::now();
425
+ long long elapsed_astar_ms = std::chrono::duration_cast<std::chrono::milliseconds>(time_after_astar - T_START_CHRONO_MAIN).count();
426
+ TIME_LIMIT_MS_EFFECTIVE_MAIN = 2950 - elapsed_astar_ms - TIME_LIMIT_MS_SLACK_CONST;
427
+
428
+
429
+ beam_history_storage.reserve(MAX_BEAM_HISTORY_STORAGE_SIZE_CONST);
430
+ s_value_cache_by_hash.reserve(MAX_SCORE_CACHE_SIZE_CONST);
431
+ min_K_to_reach_by_hash_main.reserve(MAX_MIN_K_CACHE_SIZE_CONST);
432
+
433
+ std::vector<BeamState> current_beam;
434
+
435
+ ScoreComponents initial_scores_for_beam = calculate_scores(current_board_obj);
436
+ double initial_beam_eval_score = calculate_beam_score(initial_scores_for_beam, K_initial_empty_moves, current_board_obj);
437
+
438
+ beam_history_storage.push_back({-1, ' '});
439
+ current_beam.push_back({current_board_obj, initial_beam_eval_score, 0, 0, -1});
440
+
441
+ double overall_best_actual_score = calculate_actual_score(initial_scores_for_beam.max_tree_size, K_initial_empty_moves);
442
+ std::string overall_best_path_str = initial_empty_moves_path;
443
+
444
+ min_K_to_reach_by_hash_main[current_board_obj.zobrist_hash_value] = K_initial_empty_moves;
445
+
446
+ int beam_width;
447
+ float elite_ratio = 0.2f; // Standard elite ratio
448
+ int stochastic_sample_pool_factor = 3;
449
+
450
+ if (N_actual <= 6) { beam_width = 1200;} // N=6 is small, can afford wider
451
+ else if (N_actual == 7) { beam_width = 1000;}
452
+ else if (N_actual == 8) { beam_width = 700;} // Reduced from 800 to save time slightly
453
+ else if (N_actual == 9) { beam_width = 400;} // Reduced from 500
454
+ else { beam_width = 250;} // N=10, reduced from 300
455
+
456
+ std::vector<BeamState> candidates_pool;
457
+ candidates_pool.reserve(beam_width * 4 + 10);
458
+
459
+ std::vector<BeamState> next_beam_states_temp;
460
+ next_beam_states_temp.reserve(beam_width + 10);
461
+
462
+ std::vector<int> stochastic_selection_indices;
463
+ stochastic_selection_indices.reserve(stochastic_sample_pool_factor * beam_width + 10);
464
+
465
+ int k_iter_count_beam = 0;
466
+
467
+ for (int k_beam_iter = 0; K_initial_empty_moves + k_beam_iter < T_param; ++k_beam_iter) {
468
+ k_iter_count_beam++;
469
+ if (k_iter_count_beam % 10 == 0) {
470
+ if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - T_START_CHRONO_MAIN).count() > TIME_LIMIT_MS_EFFECTIVE_MAIN + elapsed_astar_ms) {
471
+ // Compare against total time budget, not just remaining for beam.
472
+ // Total time used > total budget minus slack
473
+ if (std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - T_START_CHRONO_MAIN).count() > 2950 - TIME_LIMIT_MS_SLACK_CONST) {
474
+ break;
475
+ }
476
+ }
477
+ }
478
+ if (beam_history_storage.size() >= MAX_BEAM_HISTORY_STORAGE_SIZE_CONST - ( (size_t)beam_width * 4 + 100) ) {
479
+ break;
480
+ }
481
+
482
+ candidates_pool.clear();
483
+
484
+ for (const auto& current_state_in_beam : current_beam) {
485
+ Board temp_board_for_moves = current_state_in_beam.board;
486
+
487
+ int parent_k_beam = current_state_in_beam.k_beam_moves;
488
+ int parent_history_idx = current_state_in_beam.history_idx;
489
+ int prev_m_dir_idx = current_state_in_beam.prev_move_direction_idx;
490
+
491
+ for (int move_dir_idx = 0; move_dir_idx < 4; ++move_dir_idx) {
492
+ if (prev_m_dir_idx != -1) {
493
+ if ((prev_m_dir_idx ^ 1) == move_dir_idx) { // Check for U/D or L/R reversal using XOR trick
494
+ continue;
495
+ }
496
+ }
497
+
498
+ char current_move_char = MOVE_CHARS[move_dir_idx];
499
+ int original_empty_r = temp_board_for_moves.empty_r;
500
+ int original_empty_c = temp_board_for_moves.empty_c;
501
+ uint64_t original_hash = temp_board_for_moves.zobrist_hash_value;
502
+
503
+ int tile_to_move_r = original_empty_r + DR_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
504
+ int tile_to_move_c = original_empty_c + DC_TILE_RELATIVE_TO_EMPTY[move_dir_idx];
505
+
506
+ if (tile_to_move_r < 0 || tile_to_move_r >= N_actual || tile_to_move_c < 0 || tile_to_move_c >= N_actual) {
507
+ continue;
508
+ }
509
+
510
+ char moved_tile_hex_val = temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c];
511
+ temp_board_for_moves.tiles[original_empty_r][original_empty_c] = moved_tile_hex_val;
512
+ temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = '0';
513
+ temp_board_for_moves.empty_r = tile_to_move_r;
514
+ temp_board_for_moves.empty_c = tile_to_move_c;
515
+ temp_board_for_moves.update_hash_after_move(tile_to_move_r, tile_to_move_c, original_empty_r, original_empty_c);
516
+
517
+ int next_k_beam = parent_k_beam + 1;
518
+ int next_K_total = K_initial_empty_moves + next_k_beam;
519
+
520
+ bool already_reached_better = false;
521
+ auto it_map = min_K_to_reach_by_hash_main.find(temp_board_for_moves.zobrist_hash_value);
522
+ if (it_map != min_K_to_reach_by_hash_main.end()) {
523
+ if (it_map->second <= next_K_total) {
524
+ already_reached_better = true;
525
+ } else {
526
+ it_map->second = next_K_total;
527
+ }
528
+ } else {
529
+ if (min_K_to_reach_by_hash_main.size() < MAX_MIN_K_CACHE_SIZE_CONST) {
530
+ min_K_to_reach_by_hash_main[temp_board_for_moves.zobrist_hash_value] = next_K_total;
531
+ }
532
+ }
533
+
534
+ if(already_reached_better) {
535
+ temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = moved_tile_hex_val;
536
+ temp_board_for_moves.tiles[original_empty_r][original_empty_c] = '0';
537
+ temp_board_for_moves.empty_r = original_empty_r;
538
+ temp_board_for_moves.empty_c = original_empty_c;
539
+ temp_board_for_moves.zobrist_hash_value = original_hash;
540
+ continue;
541
+ }
542
+
543
+ ScoreComponents next_scores = calculate_scores(temp_board_for_moves);
544
+ double next_beam_eval_score = calculate_beam_score(next_scores, next_K_total, temp_board_for_moves);
545
+
546
+ beam_history_storage.push_back({parent_history_idx, current_move_char});
547
+ int new_history_idx = beam_history_storage.size() - 1;
548
+
549
+ candidates_pool.push_back({temp_board_for_moves, next_beam_eval_score, next_k_beam, new_history_idx, move_dir_idx});
550
+
551
+ double current_actual_score_val = calculate_actual_score(next_scores.max_tree_size, next_K_total);
552
+ if (current_actual_score_val > overall_best_actual_score) {
553
+ overall_best_actual_score = current_actual_score_val;
554
+ overall_best_path_str = initial_empty_moves_path + reconstruct_beam_path(new_history_idx);
555
+ } else if (current_actual_score_val == overall_best_actual_score) {
556
+ // Prefer shorter paths for same score
557
+ if ((initial_empty_moves_path + reconstruct_beam_path(new_history_idx)).length() < overall_best_path_str.length()){
558
+ overall_best_path_str = initial_empty_moves_path + reconstruct_beam_path(new_history_idx);
559
+ }
560
+ }
561
+
562
+ temp_board_for_moves.tiles[tile_to_move_r][tile_to_move_c] = moved_tile_hex_val;
563
+ temp_board_for_moves.tiles[original_empty_r][original_empty_c] = '0';
564
+ temp_board_for_moves.empty_r = original_empty_r;
565
+ temp_board_for_moves.empty_c = original_empty_c;
566
+ temp_board_for_moves.zobrist_hash_value = original_hash;
567
+ }
568
+ }
569
+
570
+ if (candidates_pool.empty()) break;
571
+
572
+ std::sort(candidates_pool.begin(), candidates_pool.end());
573
+
574
+ next_beam_states_temp.clear();
575
+ int num_elites = std::min(static_cast<int>(candidates_pool.size()), static_cast<int>(beam_width * elite_ratio));
576
+ num_elites = std::max(0, num_elites);
577
+
578
+ for(int i=0; i < num_elites && i < static_cast<int>(candidates_pool.size()); ++i) {
579
+ next_beam_states_temp.push_back(candidates_pool[i]);
580
+ }
581
+
582
+ if (next_beam_states_temp.size() < static_cast<size_t>(beam_width) && candidates_pool.size() > static_cast<size_t>(num_elites)) {
583
+ stochastic_selection_indices.clear();
584
+ int pool_start_idx = num_elites;
585
+ int pool_end_idx = std::min(static_cast<int>(candidates_pool.size()), num_elites + stochastic_sample_pool_factor * beam_width);
586
+
587
+ for(int i = pool_start_idx; i < pool_end_idx; ++i) {
588
+ stochastic_selection_indices.push_back(i);
589
+ }
590
+ if (!stochastic_selection_indices.empty()){
591
+ std::shuffle(stochastic_selection_indices.begin(), stochastic_selection_indices.end(), rng_stochastic_selection_main);
592
+ }
593
+
594
+ for(size_t i=0; i < stochastic_selection_indices.size() && next_beam_states_temp.size() < static_cast<size_t>(beam_width); ++i) {
595
+ next_beam_states_temp.push_back(candidates_pool[stochastic_selection_indices[i]]);
596
+ }
597
+ }
598
+
599
+ current_beam = next_beam_states_temp;
600
+ if (current_beam.empty()) break;
601
+ }
602
+
603
+ std::cout << overall_best_path_str << std::endl;
604
+
605
+ return 0;
606
+ }
607
+ # EVOLVE-BLOCK-END
benchmarks/ale_bench/ale_agent_best/ahc015.cpp ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EVOLVE-BLOCK-START
2
+ #include <iostream>
3
+ #include <vector>
4
+ #include <string>
5
+ #include <array>
6
+ #include <numeric>
7
+ #include <algorithm>
8
+ #include <cmath>
9
+ #include <limits>
10
+ #include <chrono> // For seeding RNG
11
+ // #include <iomanip> // For debugging output
12
+
13
+ // Constants
14
+ const int GRID_SIZE = 10;
15
+ const int NUM_TURNS = 100;
16
+ const int NUM_FLAVORS = 3; // Flavors are 1, 2, 3
17
+
18
+ // Directions: F, B, L, R (Up, Down, Left, Right on typical grid with (0,0) top-left)
19
+ const int DR[] = {-1, 1, 0, 0};
20
+ const int DC[] = {0, 0, -1, 1};
21
+ const char DIR_CHARS[] = {'F', 'B', 'L', 'R'};
22
+ const int NUM_DIRECTIONS = 4;
23
+
24
+ // Global data initialized once
25
+ std::array<int, NUM_TURNS> G_FLAVOR_SEQUENCE;
26
+ std::array<int, NUM_FLAVORS + 1> G_flavor_total_counts;
27
+ std::array<std::pair<int, int>, NUM_FLAVORS + 1> G_target_col_ranges;
28
+ std::array<bool, NUM_FLAVORS + 1> G_flavor_active;
29
+
30
+ // Lookahead parameters
31
+ const int MAX_LOOKAHEAD_DEPTH = 2;
32
+ // Final Iteration: Reverted to sample counts from Iteration 2, which scored highest.
33
+ static constexpr std::array<int, MAX_LOOKAHEAD_DEPTH> NUM_SAMPLES_CONFIG = {23, 9};
34
+
35
+
36
+ struct XorshiftRNG {
37
+ uint64_t x;
38
+ XorshiftRNG() : x(std::chrono::steady_clock::now().time_since_epoch().count()) {}
39
+
40
+ uint64_t next() {
41
+ x ^= x << 13;
42
+ x ^= x >> 7;
43
+ x ^= x << 17;
44
+ return x;
45
+ }
46
+
47
+ int uniform_int(int min_val, int max_val) {
48
+ if (min_val > max_val) return min_val;
49
+ if (min_val == max_val) return min_val;
50
+ uint64_t range = static_cast<uint64_t>(max_val) - min_val + 1;
51
+ return min_val + static_cast<int>(next() % range);
52
+ }
53
+ };
54
+ XorshiftRNG rng;
55
+
56
+
57
+ struct Candy {
58
+ int r, c, flavor;
59
+ };
60
+
61
+ struct GameState {
62
+ std::array<std::array<int, GRID_SIZE>, GRID_SIZE> board;
63
+ std::vector<Candy> candies_list;
64
+ int turn_num_1_indexed;
65
+
66
+ GameState() : turn_num_1_indexed(0) {
67
+ for (int i = 0; i < GRID_SIZE; ++i) {
68
+ board[i].fill(0);
69
+ }
70
+ candies_list.reserve(NUM_TURNS);
71
+ }
72
+
73
+ GameState(const GameState& other) = default;
74
+ GameState& operator=(const GameState& other) = default;
75
+ GameState(GameState&& other) noexcept = default;
76
+ GameState& operator=(GameState&& other) noexcept = default;
77
+
78
+ void place_candy(int r, int c, int flavor) {
79
+ board[r][c] = flavor;
80
+ candies_list.push_back({r, c, flavor});
81
+ }
82
+
83
+ std::pair<int, int> find_pth_empty_cell(int p_1_indexed) const {
84
+ int count = 0;
85
+ for (int r_idx = 0; r_idx < GRID_SIZE; ++r_idx) {
86
+ for (int c_idx = 0; c_idx < GRID_SIZE; ++c_idx) {
87
+ if (board[r_idx][c_idx] == 0) {
88
+ count++;
89
+ if (count == p_1_indexed) {
90
+ return {r_idx, c_idx};
91
+ }
92
+ }
93
+ }
94
+ }
95
+ return {-1, -1};
96
+ }
97
+
98
+ int count_empty_cells() const {
99
+ return GRID_SIZE * GRID_SIZE - static_cast<int>(candies_list.size());
100
+ }
101
+
102
+ void apply_tilt(int dir_idx) {
103
+ if (dir_idx == 0) { // F (Up)
104
+ for (int c = 0; c < GRID_SIZE; ++c) {
105
+ int current_write_r = 0;
106
+ for (int r = 0; r < GRID_SIZE; ++r) {
107
+ if (board[r][c] != 0) {
108
+ if (r != current_write_r) {
109
+ board[current_write_r][c] = board[r][c];
110
+ board[r][c] = 0;
111
+ }
112
+ current_write_r++;
113
+ }
114
+ }
115
+ }
116
+ } else if (dir_idx == 1) { // B (Down)
117
+ for (int c = 0; c < GRID_SIZE; ++c) {
118
+ int current_write_r = GRID_SIZE - 1;
119
+ for (int r = GRID_SIZE - 1; r >= 0; --r) {
120
+ if (board[r][c] != 0) {
121
+ if (r != current_write_r) {
122
+ board[current_write_r][c] = board[r][c];
123
+ board[r][c] = 0;
124
+ }
125
+ current_write_r--;
126
+ }
127
+ }
128
+ }
129
+ } else if (dir_idx == 2) { // L (Left)
130
+ for (int r = 0; r < GRID_SIZE; ++r) {
131
+ int current_write_c = 0;
132
+ for (int c = 0; c < GRID_SIZE; ++c) {
133
+ if (board[r][c] != 0) {
134
+ if (c != current_write_c) {
135
+ board[r][current_write_c] = board[r][c];
136
+ board[r][c] = 0;
137
+ }
138
+ current_write_c++;
139
+ }
140
+ }
141
+ }
142
+ } else { // R (Right, dir_idx == 3)
143
+ for (int r = 0; r < GRID_SIZE; ++r) {
144
+ int current_write_c = GRID_SIZE - 1;
145
+ for (int c = GRID_SIZE - 1; c >= 0; --c) {
146
+ if (board[r][c] != 0) {
147
+ if (c != current_write_c) {
148
+ board[r][current_write_c] = board[r][c];
149
+ board[r][c] = 0;
150
+ }
151
+ current_write_c--;
152
+ }
153
+ }
154
+ }
155
+ }
156
+ rebuild_candies_list_from_board();
157
+ }
158
+
159
+ void rebuild_candies_list_from_board() {
160
+ candies_list.clear();
161
+ for (int r_idx = 0; r_idx < GRID_SIZE; ++r_idx) {
162
+ for (int c_idx = 0; c_idx < GRID_SIZE; ++c_idx) {
163
+ if (board[r_idx][c_idx] != 0) {
164
+ candies_list.push_back({r_idx, c_idx, board[r_idx][c_idx]});
165
+ }
166
+ }
167
+ }
168
+ }
169
+
170
+ long long calculate_sum_sq_comp_size() const {
171
+ long long total_sq_sum = 0;
172
+ std::array<std::array<bool, GRID_SIZE>, GRID_SIZE> visited;
173
+ for (int i = 0; i < GRID_SIZE; ++i) visited[i].fill(false);
174
+
175
+ std::array<std::pair<int, int>, GRID_SIZE * GRID_SIZE> q_arr;
176
+
177
+ for (int r_start = 0; r_start < GRID_SIZE; ++r_start) {
178
+ for (int c_start = 0; c_start < GRID_SIZE; ++c_start) {
179
+ if (board[r_start][c_start] != 0 && !visited[r_start][c_start]) {
180
+ int current_flavor = board[r_start][c_start];
181
+ long long current_comp_size = 0;
182
+
183
+ q_arr[0] = {r_start, c_start};
184
+ visited[r_start][c_start] = true;
185
+ int head = 0;
186
+ int tail = 1;
187
+
188
+ while(head < tail){
189
+ current_comp_size++;
190
+ const std::pair<int,int>& curr_cell = q_arr[head];
191
+ const int curr_r = curr_cell.first;
192
+ const int curr_c = curr_cell.second;
193
+ head++;
194
+
195
+ for (int i = 0; i < NUM_DIRECTIONS; ++i) {
196
+ int nr = curr_r + DR[i];
197
+ int nc = curr_c + DC[i];
198
+ if (nr >= 0 && nr < GRID_SIZE && nc >= 0 && nc < GRID_SIZE &&
199
+ !visited[nr][nc] && board[nr][nc] == current_flavor) {
200
+ visited[nr][nc] = true;
201
+ q_arr[tail++] = {nr, nc};
202
+ }
203
+ }
204
+ }
205
+ total_sq_sum += current_comp_size * current_comp_size;
206
+ }
207
+ }
208
+ }
209
+ return total_sq_sum;
210
+ }
211
+
212
+ double calculate_distance_penalty_CoM() const {
213
+ if (candies_list.empty()) return 0.0;
214
+
215
+ std::array<double, NUM_FLAVORS + 1> sum_r; sum_r.fill(0.0);
216
+ std::array<double, NUM_FLAVORS + 1> sum_c; sum_c.fill(0.0);
217
+ std::array<int, NUM_FLAVORS + 1> counts; counts.fill(0);
218
+
219
+ for (const auto& candy : candies_list) {
220
+ counts[candy.flavor]++;
221
+ sum_r[candy.flavor] += candy.r;
222
+ sum_c[candy.flavor] += candy.c;
223
+ }
224
+
225
+ std::array<std::pair<double, double>, NUM_FLAVORS + 1> com_coords;
226
+ for (int fl = 1; fl <= NUM_FLAVORS; ++fl) {
227
+ if (counts[fl] > 0) {
228
+ com_coords[fl] = {sum_r[fl] / counts[fl], sum_c[fl] / counts[fl]};
229
+ }
230
+ }
231
+
232
+ double total_manhattan_dist_penalty = 0;
233
+ for (const auto& candy : candies_list) {
234
+ if (counts[candy.flavor] > 1) {
235
+ const auto& com = com_coords[candy.flavor];
236
+ total_manhattan_dist_penalty += std::abs(static_cast<double>(candy.r) - com.first) +
237
+ std::abs(static_cast<double>(candy.c) - com.second);
238
+ }
239
+ }
240
+ return total_manhattan_dist_penalty;
241
+ }
242
+
243
+ double calculate_region_penalty() const {
244
+ if (candies_list.empty()) return 0.0;
245
+ double penalty = 0.0;
246
+ for (const auto& candy : candies_list) {
247
+ if (!G_flavor_active[candy.flavor]) continue;
248
+
249
+ const auto& range = G_target_col_ranges[candy.flavor];
250
+ int min_target_c = range.first;
251
+ int max_target_c = range.second;
252
+
253
+ if (min_target_c > max_target_c) continue;
254
+
255
+ if (candy.c < min_target_c) {
256
+ penalty += (min_target_c - candy.c);
257
+ } else if (candy.c > max_target_c) {
258
+ penalty += (candy.c - max_target_c);
259
+ }
260
+ }
261
+ return penalty;
262
+ }
263
+
264
+ double calculate_edge_bonus() const {
265
+ double bonus_val = 0.0;
266
+ const double PER_CANDY_BONUS_FACTOR = 0.5;
267
+
268
+ for (const auto& candy : candies_list) {
269
+ if (!G_flavor_active[candy.flavor]) continue;
270
+
271
+ const auto& range = G_target_col_ranges[candy.flavor];
272
+ int min_target_c = range.first;
273
+ int max_target_c = range.second;
274
+
275
+ if (min_target_c > max_target_c) continue;
276
+
277
+ bool in_correct_strip = (candy.c >= min_target_c && candy.c <= max_target_c);
278
+
279
+ if (in_correct_strip) {
280
+ if (candy.r == 0 || candy.r == GRID_SIZE - 1) {
281
+ bonus_val += PER_CANDY_BONUS_FACTOR;
282
+ }
283
+ if ((candy.c == 0 && min_target_c == 0) ||
284
+ (candy.c == GRID_SIZE - 1 && max_target_c == GRID_SIZE - 1)) {
285
+ bonus_val += PER_CANDY_BONUS_FACTOR;
286
+ }
287
+ }
288
+ }
289
+ return bonus_val;
290
+ }
291
+
292
+ double evaluate() const {
293
+ if (candies_list.empty() && turn_num_1_indexed == 0) return 0.0;
294
+
295
+ long long sum_sq_comp = calculate_sum_sq_comp_size();
296
+ double dist_penalty_com = calculate_distance_penalty_CoM();
297
+ double region_penalty_val = calculate_region_penalty();
298
+ double edge_bonus_val = calculate_edge_bonus();
299
+
300
+ double current_turn_double = static_cast<double>(turn_num_1_indexed);
301
+
302
+ // Coefficients from Iteration 2 (best scoring), with small tweak to C
303
+ double A_coeff_conn = 15.0 + 1.1 * current_turn_double;
304
+ double B_coeff_com_base = std::max(0.0, 170.0 - 1.7 * current_turn_double);
305
+ // Final iteration tweak for C_coeff_region_penalty_direct:
306
+ double C_coeff_region_penalty_direct = std::max(2.0, 27.0 - 0.17 * current_turn_double);
307
+ double D_coeff_edge_bonus = 5.0 + 0.2 * current_turn_double;
308
+
309
+ return A_coeff_conn * sum_sq_comp
310
+ - B_coeff_com_base * dist_penalty_com
311
+ - C_coeff_region_penalty_direct * region_penalty_val
312
+ + D_coeff_edge_bonus * edge_bonus_val;
313
+ }
314
+ };
315
+
316
+ // Forward declaration
317
+ double eval_lookahead(const GameState& state_after_tilt, int turn_T_of_candy_just_processed, int depth_remaining);
318
+
319
+ char decide_tilt_direction_logic(const GameState& current_gs_after_placement) {
320
+ double best_overall_eval = std::numeric_limits<double>::lowest();
321
+ int best_dir_idx = 0;
322
+
323
+ int turn_T_for_lookahead_base = current_gs_after_placement.turn_num_1_indexed;
324
+
325
+ for (int i = 0; i < NUM_DIRECTIONS; ++i) {
326
+ GameState gs_after_tilt_T = current_gs_after_placement;
327
+ gs_after_tilt_T.apply_tilt(i);
328
+
329
+ double current_tilt_eval_for_dir_i = eval_lookahead(gs_after_tilt_T, turn_T_for_lookahead_base, MAX_LOOKAHEAD_DEPTH);
330
+
331
+ if (current_tilt_eval_for_dir_i > best_overall_eval) {
332
+ best_overall_eval = current_tilt_eval_for_dir_i;
333
+ best_dir_idx = i;
334
+ }
335
+ }
336
+ return DIR_CHARS[best_dir_idx];
337
+ }
338
+
339
+
340
+ double eval_lookahead(const GameState& state_after_tilt, int turn_T_of_candy_just_processed, int depth_remaining) {
341
+ if (depth_remaining == 0 || turn_T_of_candy_just_processed == NUM_TURNS) {
342
+ return state_after_tilt.evaluate();
343
+ }
344
+
345
+ int num_empty = state_after_tilt.count_empty_cells();
346
+ if (num_empty == 0) {
347
+ return state_after_tilt.evaluate();
348
+ }
349
+
350
+ int next_candy_flavor = G_FLAVOR_SEQUENCE[turn_T_of_candy_just_processed];
351
+ int sample_count_param_idx = MAX_LOOKAHEAD_DEPTH - depth_remaining;
352
+ int sample_count_this_depth = NUM_SAMPLES_CONFIG[sample_count_param_idx];
353
+ int actual_num_samples = std::min(sample_count_this_depth, num_empty);
354
+
355
+ if (actual_num_samples == 0) {
356
+ return state_after_tilt.evaluate();
357
+ }
358
+
359
+ double sum_over_sampled_placements = 0.0;
360
+ for (int s = 0; s < actual_num_samples; ++s) {
361
+ int p_val_1_indexed_sample;
362
+ if (actual_num_samples == num_empty) {
363
+ p_val_1_indexed_sample = s + 1;
364
+ } else {
365
+ p_val_1_indexed_sample = rng.uniform_int(1, num_empty);
366
+ }
367
+
368
+ GameState S_after_placement = state_after_tilt;
369
+ std::pair<int, int> candy_loc = S_after_placement.find_pth_empty_cell(p_val_1_indexed_sample);
370
+ S_after_placement.place_candy(candy_loc.first, candy_loc.second, next_candy_flavor);
371
+ S_after_placement.turn_num_1_indexed = turn_T_of_candy_just_processed + 1;
372
+
373
+ double max_eval_for_this_placement = std::numeric_limits<double>::lowest();
374
+ for (int dir_idx_next_tilt = 0; dir_idx_next_tilt < NUM_DIRECTIONS; ++dir_idx_next_tilt) {
375
+ GameState S_after_next_tilt = S_after_placement;
376
+ S_after_next_tilt.apply_tilt(dir_idx_next_tilt);
377
+ double val = eval_lookahead(S_after_next_tilt, S_after_placement.turn_num_1_indexed, depth_remaining - 1);
378
+ if (val > max_eval_for_this_placement) {
379
+ max_eval_for_this_placement = val;
380
+ }
381
+ }
382
+ sum_over_sampled_placements += max_eval_for_this_placement;
383
+ }
384
+
385
+ return sum_over_sampled_placements / actual_num_samples;
386
+ }
387
+
388
+
389
+ void initialize_global_data() {
390
+ G_flavor_total_counts.fill(0);
391
+ for (int t = 0; t < NUM_TURNS; ++t) {
392
+ std::cin >> G_FLAVOR_SEQUENCE[t];
393
+ G_flavor_total_counts[G_FLAVOR_SEQUENCE[t]]++;
394
+ }
395
+
396
+ G_flavor_active.fill(false);
397
+ std::vector<std::pair<int, int>> sorter_flavor_count_id;
398
+ for (int fl = 1; fl <= NUM_FLAVORS; ++fl) {
399
+ if (G_flavor_total_counts[fl] > 0) {
400
+ G_flavor_active[fl] = true;
401
+ sorter_flavor_count_id.push_back({G_flavor_total_counts[fl], fl});
402
+ }
403
+ }
404
+ std::sort(sorter_flavor_count_id.begin(), sorter_flavor_count_id.end(),
405
+ [](const std::pair<int, int>& a, const std::pair<int, int>& b) {
406
+ if (a.first != b.first) {
407
+ return a.first > b.first;
408
+ }
409
+ return a.second < b.second;
410
+ });
411
+
412
+ std::vector<int> active_flavor_ids_sorted_by_priority;
413
+ for(const auto& p : sorter_flavor_count_id) {
414
+ active_flavor_ids_sorted_by_priority.push_back(p.second);
415
+ }
416
+
417
+ std::vector<int> assigned_widths(NUM_FLAVORS + 1, 0);
418
+ int total_assigned_width_sum = 0;
419
+
420
+ if (!active_flavor_ids_sorted_by_priority.empty()) {
421
+ double total_candies_for_proportion = 0;
422
+ for(int fl_id : active_flavor_ids_sorted_by_priority) {
423
+ total_candies_for_proportion += G_flavor_total_counts[fl_id];
424
+ }
425
+ if (total_candies_for_proportion == 0) total_candies_for_proportion = 1;
426
+
427
+ for (int fl_id : active_flavor_ids_sorted_by_priority) {
428
+ assigned_widths[fl_id] = static_cast<int>(std::floor(
429
+ static_cast<double>(GRID_SIZE) * G_flavor_total_counts[fl_id] / total_candies_for_proportion
430
+ ));
431
+ total_assigned_width_sum += assigned_widths[fl_id];
432
+ }
433
+
434
+ int remaining_width_to_assign = GRID_SIZE - total_assigned_width_sum;
435
+ for (int i = 0; i < remaining_width_to_assign; ++i) {
436
+ assigned_widths[active_flavor_ids_sorted_by_priority[i % active_flavor_ids_sorted_by_priority.size()]]++;
437
+ }
438
+ }
439
+
440
+ int current_col_start = 0;
441
+ for (int fl_id_in_sorted_order : active_flavor_ids_sorted_by_priority) {
442
+ if (assigned_widths[fl_id_in_sorted_order] > 0) {
443
+ G_target_col_ranges[fl_id_in_sorted_order] = {current_col_start, current_col_start + assigned_widths[fl_id_in_sorted_order] - 1};
444
+ current_col_start += assigned_widths[fl_id_in_sorted_order];
445
+ } else {
446
+ G_target_col_ranges[fl_id_in_sorted_order] = {current_col_start, current_col_start - 1};
447
+ }
448
+ }
449
+
450
+ for (int fl = 1; fl <= NUM_FLAVORS; ++fl) {
451
+ if (!G_flavor_active[fl]) {
452
+ G_target_col_ranges[fl] = {0, -1};
453
+ }
454
+ }
455
+ }
456
+
457
+
458
+ int main() {
459
+ std::ios_base::sync_with_stdio(false);
460
+ std::cin.tie(NULL);
461
+
462
+ initialize_global_data();
463
+
464
+ GameState current_gs;
465
+ for (int t_0_indexed = 0; t_0_indexed < NUM_TURNS; ++t_0_indexed) {
466
+ current_gs.turn_num_1_indexed = t_0_indexed + 1;
467
+
468
+ int p_val_1_indexed;
469
+ std::cin >> p_val_1_indexed;
470
+
471
+ std::pair<int, int> candy_loc = current_gs.find_pth_empty_cell(p_val_1_indexed);
472
+
473
+ current_gs.place_candy(candy_loc.first, candy_loc.second, G_FLAVOR_SEQUENCE[t_0_indexed]);
474
+
475
+ char chosen_dir_char = decide_tilt_direction_logic(current_gs);
476
+
477
+ std::cout << chosen_dir_char << std::endl;
478
+
479
+ int dir_idx_to_apply = 0;
480
+ for(int k=0; k<NUM_DIRECTIONS; ++k) {
481
+ if(DIR_CHARS[k] == chosen_dir_char) {
482
+ dir_idx_to_apply = k;
483
+ break;
484
+ }
485
+ }
486
+ current_gs.apply_tilt(dir_idx_to_apply);
487
+ }
488
+
489
+ return 0;
490
+ }
491
+ # EVOLVE-BLOCK-END