thomasm6m6 commited on
Commit
8dc7642
·
verified ·
1 Parent(s): 0ddb706

Initial Freeciv OpenEnv Space

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +14 -0
  2. .gitignore +1 -0
  3. AGENTS.md +5 -0
  4. Dockerfile +31 -0
  5. README.md +112 -5
  6. __init__.py +1 -0
  7. build/lib/freeciv_env/__init__.py +10 -0
  8. build/lib/freeciv_env/adapter.py +335 -0
  9. build/lib/freeciv_env/client.py +22 -0
  10. build/lib/freeciv_env/grpo.py +97 -0
  11. build/lib/freeciv_env/models.py +112 -0
  12. build/lib/freeciv_env/runtime.py +401 -0
  13. build/lib/freeciv_env/server/__init__.py +3 -0
  14. build/lib/freeciv_env/server/app.py +42 -0
  15. build/lib/freeciv_env/server/freeciv_environment.py +163 -0
  16. build/lib/server/__init__.py +0 -0
  17. build/lib/server/app.py +10 -0
  18. client.py +1 -0
  19. freeciv_env.egg-info/PKG-INFO +136 -0
  20. freeciv_env.egg-info/SOURCES.txt +23 -0
  21. freeciv_env.egg-info/dependency_links.txt +1 -0
  22. freeciv_env.egg-info/entry_points.txt +2 -0
  23. freeciv_env.egg-info/requires.txt +14 -0
  24. freeciv_env.egg-info/top_level.txt +2 -0
  25. freeciv_env/__init__.py +10 -0
  26. freeciv_env/adapter.py +335 -0
  27. freeciv_env/client.py +22 -0
  28. freeciv_env/grpo.py +97 -0
  29. freeciv_env/models.py +112 -0
  30. freeciv_env/runtime.py +432 -0
  31. freeciv_env/server/Dockerfile +31 -0
  32. freeciv_env/server/__init__.py +3 -0
  33. freeciv_env/server/app.py +144 -0
  34. freeciv_env/server/freeciv_environment.py +163 -0
  35. freeciv_rl_training_curve.png +0 -0
  36. hackathon.md +271 -0
  37. models.py +1 -0
  38. notes.md +3 -0
  39. openenv.yaml +4 -0
  40. outline.md +178 -0
  41. pres/before_after_reward.png +0 -0
  42. pres/index.html +18 -0
  43. pres/reward_curve.png +0 -0
  44. pres/reward_steps.csv +11 -0
  45. pres/training_results.html +27 -0
  46. pres/training_script.html +150 -0
  47. pres/trajectory.html +62 -0
  48. pyproject.toml +41 -0
  49. qwen35_live_long_trainer_state.json +304 -0
  50. scripts/start_space.sh +54 -0
.dockerignore ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ .venv
3
+ __pycache__/
4
+ *.pyc
5
+ .pytest_cache/
6
+ build/
7
+ pres/
8
+ freeciv_env.egg-info/
9
+ notes.md
10
+ outline.md
11
+ hackathon.md
12
+ qwen35_live_long_trainer_state.json
13
+ freeciv_rl_training_curve.png
14
+ tests/
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ **/__pycache__
AGENTS.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ This is a hackathon project--see the brief at hackathon.md. Always read the file before responding.
2
+ Please keep your answers concise.
3
+
4
+ We have an H100. Access it by running this command: `northflank ssh service --projectId hackathon --serviceId civ --proxyOnly` (may already be running)
5
+ then `ssh root@127.0.0.1 -p 35731` (or whatever the `northflank ssh` command prints out as the command to run)
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM omkarasoftware/freeciv-web:latest
2
+
3
+ USER root
4
+ RUN apt-get update && apt-get install -y \
5
+ git \
6
+ curl \
7
+ ca-certificates \
8
+ && rm -rf /var/lib/apt/lists/*
9
+ RUN mkdir -p /app/env && chown -R docker:docker /app
10
+
11
+ USER docker
12
+ ENV HOME=/home/docker
13
+ WORKDIR /app/env
14
+
15
+ COPY --chown=docker:docker . /app/env
16
+ RUN chmod +x /app/env/scripts/start_space.sh
17
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
18
+ ENV PATH="/app/env/.venv/bin:/home/docker/.local/bin:$PATH"
19
+ RUN uv python install 3.11
20
+ RUN uv venv --python 3.11 /app/env/.venv
21
+ RUN UV_PROJECT_ENVIRONMENT=/app/env/.venv uv sync --frozen --no-dev --no-editable
22
+
23
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
24
+ ENV ENABLE_WEB_INTERFACE=true
25
+ ENV FREECIV_SERVER_URL=http://127.0.0.1
26
+ ENV FREECIV_TURN_TIMEOUT_S=120
27
+
28
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=10 \
29
+ CMD curl -f http://localhost:8000/health || exit 1
30
+
31
+ CMD ["/app/env/scripts/start_space.sh"]
README.md CHANGED
@@ -1,10 +1,117 @@
1
  ---
2
- title: Openenv Hack
3
- emoji: 😻
4
- colorFrom: red
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Freeciv Environment Server
3
+ emoji: 🎮
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
  ---
13
 
14
+ # freeciv-env
15
+
16
+ OpenEnv environment for Freeciv, built on top of `freeciv-bot`.
17
+
18
+ ## Current scope
19
+
20
+ This environment exposes a small, trainable action surface:
21
+
22
+ - `end_turn`
23
+ - `move_unit(unit_id, direction)`
24
+ - `build_city(unit_id)`
25
+ - `set_city_production(city_id, target)`
26
+ - `set_research(tech_name)`
27
+
28
+ Observations are text-first and include compact structured summaries of:
29
+
30
+ - current turn
31
+ - score
32
+ - known and visible map tiles
33
+ - units
34
+ - cities
35
+ - legal actions
36
+
37
+ ## Local development
38
+
39
+ Install dependencies:
40
+
41
+ ```bash
42
+ uv sync --extra dev
43
+ ```
44
+
45
+ Run tests:
46
+
47
+ ```bash
48
+ uv run pytest
49
+ ```
50
+
51
+ Run the server:
52
+
53
+ ```bash
54
+ uv run uvicorn freeciv_env.server.app:app --host 0.0.0.0 --port 8000
55
+ ```
56
+
57
+ Run the fast GRPO loop:
58
+
59
+ ```bash
60
+ uv sync --extra dev --extra train
61
+ uv run python scripts/train_grpo_fast.py --env-url http://127.0.0.1 --max-steps 50
62
+ ```
63
+
64
+ ## Hackathon / Unsloth notes
65
+
66
+ For the hackathon Colab submission path on H100s, Unsloth recommended the BF16 OpenEnv gpt-oss 20B notebook:
67
+
68
+ - <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb>
69
+
70
+ If you adapt that notebook for this environment, reduce `max_steps` to `300` for a faster run.
71
+
72
+ Useful notebook indexes:
73
+
74
+ - RL notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks#grpo-reasoning-rl>
75
+ - all notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks>
76
+ - notebook repo: <https://github.com/unslothai/notebooks/tree/main/nb>
77
+
78
+ If GRPO is too slow, start from a smaller notebook with `fast_inference = True` and add the Freeciv/OpenEnv calls:
79
+
80
+ - <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_(4B)-GRPO.ipynb>
81
+ - <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb>
82
+
83
+ If vLLM GRPO fails, Unsloth suggested a clean virtualenv install:
84
+
85
+ ```bash
86
+ python -m venv unsloth_env
87
+ source unsloth_env/bin/activate
88
+ pip install --upgrade pip && pip install uv
89
+ uv pip install unsloth vllm --torch-backend=auto
90
+ ```
91
+
92
+ If Unsloth is already installed, update it for the latest GRPO fixes:
93
+
94
+ ```bash
95
+ pip install --upgrade --no-cache-dir --no-deps unsloth unsloth_zoo
96
+ ```
97
+
98
+ ## Live runtime requirements
99
+
100
+ The default server app uses `freeciv-bot` against a local Freeciv Web runtime.
101
+
102
+ Environment variables:
103
+
104
+ - `FREECIV_SERVER_URL` (default: `http://127.0.0.1`)
105
+ - `FREECIV_USERNAME` (default: `openenvbot`)
106
+ - `FREECIV_CLIENT_PORT` (default: `6000`)
107
+ - `FREECIV_TURN_TIMEOUT_S` (default: `60`)
108
+
109
+ The included automated tests use a fake session backend, so they do not require a live Freeciv server.
110
+
111
+ The GRPO training script uses:
112
+
113
+ - `Qwen/Qwen3.5-0.8B`
114
+ - Unsloth bf16 LoRA loading
115
+ - TRL `GRPOTrainer`
116
+ - integer-only action selection to minimize generated tokens
117
+ - offline GRPO over env-sampled states for maximum throughput
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from freeciv_env import *
build/lib/freeciv_env/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from freeciv_env.client import FreecivEnv
2
+ from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState, LegalAction
3
+
4
+ __all__ = [
5
+ "FreecivAction",
6
+ "FreecivEnv",
7
+ "FreecivObservation",
8
+ "FreecivState",
9
+ "LegalAction",
10
+ ]
build/lib/freeciv_env/adapter.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+ from freeciv_env.models import CitySummary, FreecivAction, FreecivObservation, LegalAction, UnitSummary
7
+
8
+
9
+ ActionLookupKey = tuple[str, int | None, int | None, str | None]
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class ActionRef:
14
+ controller: str
15
+ actor_id: int | str
16
+ raw_action_key: str
17
+
18
+
19
+ @dataclass
20
+ class RawSnapshot:
21
+ turn: int
22
+ state: dict[str, Any]
23
+ actions: dict[str, Any]
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class SnapshotMetrics:
28
+ score: float
29
+ known_tiles: int
30
+ visible_tiles: int
31
+ city_count: int
32
+ unit_count: int
33
+ techs_researched: int
34
+
35
+
36
+ @dataclass
37
+ class PreparedObservation:
38
+ observation: FreecivObservation
39
+ metrics: SnapshotMetrics
40
+ action_refs: dict[ActionLookupKey, ActionRef]
41
+
42
+
43
+ def _map_status_rows(raw_state: dict[str, Any]) -> list[list[int | float]]:
44
+ raw_map = raw_state.get("map", {})
45
+ status = raw_map.get("status", [])
46
+ return status if isinstance(status, list) else []
47
+
48
+
49
+ def count_known_tiles(raw_state: dict[str, Any]) -> int:
50
+ return sum(1 for row in _map_status_rows(raw_state) for value in row if value and value > 0)
51
+
52
+
53
+ def count_visible_tiles(raw_state: dict[str, Any]) -> int:
54
+ return sum(1 for row in _map_status_rows(raw_state) for value in row if value and value >= 2)
55
+
56
+
57
+ def extract_metrics(snapshot: RawSnapshot) -> SnapshotMetrics:
58
+ player = snapshot.state.get("player", {})
59
+ return SnapshotMetrics(
60
+ score=float(player.get("my_score", 0.0)),
61
+ known_tiles=count_known_tiles(snapshot.state),
62
+ visible_tiles=count_visible_tiles(snapshot.state),
63
+ city_count=len(snapshot.state.get("city", {})),
64
+ unit_count=len(snapshot.state.get("unit", {})),
65
+ techs_researched=int(player.get("my_techs_researched", 0) or 0),
66
+ )
67
+
68
+
69
+ def action_lookup_key(action: FreecivAction) -> ActionLookupKey:
70
+ if action.action_type == "move_unit":
71
+ return ("move_unit", action.unit_id, action.direction, None)
72
+ if action.action_type == "build_city":
73
+ return ("build_city", action.unit_id, None, None)
74
+ if action.action_type == "set_city_production":
75
+ return ("set_city_production", action.city_id, None, action.target)
76
+ if action.action_type == "set_research":
77
+ return ("set_research", None, None, action.target)
78
+ return ("end_turn", None, None, None)
79
+
80
+
81
+ def _parse_target_name(raw_action_key: str, prefix: str) -> str:
82
+ suffix = raw_action_key.removeprefix(prefix)
83
+ name, _sep, _tail = suffix.rpartition("_")
84
+ return name or suffix
85
+
86
+
87
+
88
+ def _controller_actions(snapshot: RawSnapshot, controller: str) -> dict[str, Any]:
89
+ raw_actions = snapshot.actions.get(controller, {})
90
+ if isinstance(raw_actions, dict):
91
+ return raw_actions
92
+ if hasattr(raw_actions, "json_struct"):
93
+ json_actions = raw_actions.json_struct()
94
+ return json_actions if isinstance(json_actions, dict) else {}
95
+ return {}
96
+
97
+
98
+
99
+ def _extract_legal_actions(snapshot: RawSnapshot) -> tuple[list[LegalAction], dict[ActionLookupKey, ActionRef]]:
100
+ legal_actions: list[LegalAction] = [
101
+ LegalAction(
102
+ action_type="end_turn",
103
+ label="End the current turn",
104
+ raw_action_key="__end_turn__",
105
+ )
106
+ ]
107
+ refs: dict[ActionLookupKey, ActionRef] = {}
108
+
109
+ for actor_id, action_map in _controller_actions(snapshot, "unit").items():
110
+ unit_id = int(actor_id)
111
+ if action_map.get("build"):
112
+ legal_actions.append(
113
+ LegalAction(
114
+ action_type="build_city",
115
+ label=f"Build a city with unit {unit_id}",
116
+ unit_id=unit_id,
117
+ raw_action_key="build",
118
+ )
119
+ )
120
+ refs[("build_city", unit_id, None, None)] = ActionRef(
121
+ controller="unit",
122
+ actor_id=unit_id,
123
+ raw_action_key="build",
124
+ )
125
+ for raw_action_key, enabled in sorted(action_map.items()):
126
+ if not enabled or not raw_action_key.startswith("goto_"):
127
+ continue
128
+ direction = int(raw_action_key.split("_", 1)[1])
129
+ legal_actions.append(
130
+ LegalAction(
131
+ action_type="move_unit",
132
+ label=f"Move unit {unit_id} in direction {direction}",
133
+ unit_id=unit_id,
134
+ direction=direction,
135
+ raw_action_key=raw_action_key,
136
+ )
137
+ )
138
+ refs[("move_unit", unit_id, direction, None)] = ActionRef(
139
+ controller="unit",
140
+ actor_id=unit_id,
141
+ raw_action_key=raw_action_key,
142
+ )
143
+
144
+ for actor_id, action_map in _controller_actions(snapshot, "city").items():
145
+ city_id = int(actor_id)
146
+ for raw_action_key, enabled in sorted(action_map.items()):
147
+ if not enabled:
148
+ continue
149
+ if raw_action_key.startswith("change_unit_prod_"):
150
+ target = _parse_target_name(raw_action_key, "change_unit_prod_")
151
+ elif raw_action_key.startswith("change_improve_prod_"):
152
+ target = _parse_target_name(raw_action_key, "change_improve_prod_")
153
+ else:
154
+ continue
155
+ legal_actions.append(
156
+ LegalAction(
157
+ action_type="set_city_production",
158
+ label=f"Set city {city_id} production to {target}",
159
+ city_id=city_id,
160
+ target=target,
161
+ raw_action_key=raw_action_key,
162
+ )
163
+ )
164
+ refs[("set_city_production", city_id, None, target)] = ActionRef(
165
+ controller="city",
166
+ actor_id=city_id,
167
+ raw_action_key=raw_action_key,
168
+ )
169
+
170
+ tech_actions = _controller_actions(snapshot, "tech").get("cur_player", {})
171
+ for raw_action_key, enabled in sorted(tech_actions.items()):
172
+ if not enabled or not raw_action_key.startswith("research_tech_"):
173
+ continue
174
+ target = _parse_target_name(raw_action_key, "research_tech_")
175
+ legal_actions.append(
176
+ LegalAction(
177
+ action_type="set_research",
178
+ label=f"Research {target}",
179
+ target=target,
180
+ raw_action_key=raw_action_key,
181
+ )
182
+ )
183
+ refs[("set_research", None, None, target)] = ActionRef(
184
+ controller="tech",
185
+ actor_id="cur_player",
186
+ raw_action_key=raw_action_key,
187
+ )
188
+
189
+ legal_actions.sort(
190
+ key=lambda item: (
191
+ item.action_type,
192
+ item.unit_id or -1,
193
+ item.city_id or -1,
194
+ item.direction or -1,
195
+ item.target or "",
196
+ )
197
+ )
198
+ return legal_actions, refs
199
+
200
+
201
+ def _extract_unit_summaries(snapshot: RawSnapshot) -> list[UnitSummary]:
202
+ unit_actions = _controller_actions(snapshot, "unit")
203
+ units: list[UnitSummary] = []
204
+ for actor_id, unit in sorted(snapshot.state.get("unit", {}).items(), key=lambda item: int(item[0])):
205
+ action_map = unit_actions.get(str(actor_id), unit_actions.get(actor_id, {}))
206
+ move_directions = sorted(
207
+ int(raw_action_key.split("_", 1)[1])
208
+ for raw_action_key, enabled in action_map.items()
209
+ if enabled and raw_action_key.startswith("goto_")
210
+ )
211
+ units.append(
212
+ UnitSummary(
213
+ unit_id=int(actor_id),
214
+ unit_type=str(unit.get("type_rule_name", "Unknown")),
215
+ health=int(unit.get("health", 0) or 0),
216
+ moves_left=int(unit.get("moves_left", unit.get("movesleft", 0)) or 0),
217
+ home_city_id=(
218
+ int(unit.get("home_city"))
219
+ if unit.get("home_city") not in (None, -1, "")
220
+ else None
221
+ ),
222
+ veteran_level=int(unit.get("veteran", 0) or 0),
223
+ can_build_city=bool(action_map.get("build", False)),
224
+ move_directions=move_directions,
225
+ )
226
+ )
227
+ return units
228
+
229
+
230
+ def _extract_city_summaries(snapshot: RawSnapshot) -> list[CitySummary]:
231
+ city_actions = _controller_actions(snapshot, "city")
232
+ cities: list[CitySummary] = []
233
+ for actor_id, city in sorted(snapshot.state.get("city", {}).items(), key=lambda item: int(item[0])):
234
+ action_map = city_actions.get(str(actor_id), city_actions.get(actor_id, {}))
235
+ production_options = [
236
+ _parse_target_name(raw_action_key, "change_unit_prod_")
237
+ for raw_action_key, enabled in sorted(action_map.items())
238
+ if enabled and raw_action_key.startswith("change_unit_prod_")
239
+ ] + [
240
+ _parse_target_name(raw_action_key, "change_improve_prod_")
241
+ for raw_action_key, enabled in sorted(action_map.items())
242
+ if enabled and raw_action_key.startswith("change_improve_prod_")
243
+ ]
244
+ cities.append(
245
+ CitySummary(
246
+ city_id=int(actor_id),
247
+ size=int(city.get("size", 0) or 0),
248
+ prod_food=int(city.get("prod_food", 0) or 0),
249
+ prod_shield=int(city.get("prod_shield", 0) or 0),
250
+ prod_trade=int(city.get("prod_trade", 0) or 0),
251
+ surplus_food=int(city.get("surplus_food", 0) or 0),
252
+ surplus_shield=int(city.get("surplus_shield", 0) or 0),
253
+ surplus_trade=int(city.get("surplus_trade", 0) or 0),
254
+ production_kind=(
255
+ int(city.get("production_kind"))
256
+ if city.get("production_kind") is not None
257
+ else None
258
+ ),
259
+ production_value=(
260
+ int(city.get("production_value"))
261
+ if city.get("production_value") is not None
262
+ else None
263
+ ),
264
+ turns_to_complete=(
265
+ float(city.get("turns_to_prod_complete"))
266
+ if city.get("turns_to_prod_complete") is not None
267
+ else None
268
+ ),
269
+ production_options=production_options,
270
+ )
271
+ )
272
+ return cities
273
+
274
+
275
+ def _build_summary(
276
+ snapshot: RawSnapshot,
277
+ metrics: SnapshotMetrics,
278
+ units: list[UnitSummary],
279
+ cities: list[CitySummary],
280
+ legal_actions: list[LegalAction],
281
+ ) -> str:
282
+ player = snapshot.state.get("player", {})
283
+ lines = [
284
+ f"Turn {snapshot.turn}",
285
+ f"Score {metrics.score:.1f}",
286
+ f"Map: {metrics.known_tiles} known tiles, {metrics.visible_tiles} visible tiles",
287
+ f"Economy: {player.get('my_gold', 0)} gold, science rate {player.get('my_science', 0)}%",
288
+ f"Cities: {metrics.city_count}",
289
+ ]
290
+ for city in cities[:5]:
291
+ lines.append(
292
+ f"- City {city.city_id}: size {city.size}, food {city.prod_food}/{city.surplus_food:+d}, "
293
+ f"shields {city.prod_shield}/{city.surplus_shield:+d}, trade {city.prod_trade}/{city.surplus_trade:+d}"
294
+ )
295
+ lines.append(f"Units: {metrics.unit_count}")
296
+ for unit in units[:8]:
297
+ lines.append(
298
+ f"- Unit {unit.unit_id}: {unit.unit_type}, hp {unit.health}, moves_left {unit.moves_left}, "
299
+ f"build_city={str(unit.can_build_city).lower()}, move_dirs={unit.move_directions}"
300
+ )
301
+ lines.append(f"Techs researched: {metrics.techs_researched}")
302
+ lines.append(f"Legal actions exposed: {len(legal_actions)}")
303
+ return "\n".join(lines)
304
+
305
+
306
+ def prepare_observation(
307
+ snapshot: RawSnapshot,
308
+ *,
309
+ reward: float,
310
+ done: bool,
311
+ status: str,
312
+ metadata: dict[str, Any] | None = None,
313
+ ) -> PreparedObservation:
314
+ legal_actions, action_refs = _extract_legal_actions(snapshot)
315
+ metrics = extract_metrics(snapshot)
316
+ units = _extract_unit_summaries(snapshot)
317
+ cities = _extract_city_summaries(snapshot)
318
+ observation = FreecivObservation(
319
+ turn=snapshot.turn,
320
+ score=metrics.score,
321
+ known_tiles=metrics.known_tiles,
322
+ visible_tiles=metrics.visible_tiles,
323
+ city_count=metrics.city_count,
324
+ unit_count=metrics.unit_count,
325
+ techs_researched=metrics.techs_researched,
326
+ status=status,
327
+ summary=_build_summary(snapshot, metrics, units, cities, legal_actions),
328
+ units=units,
329
+ cities=cities,
330
+ legal_actions=legal_actions,
331
+ reward=reward,
332
+ done=done,
333
+ metadata=metadata or {},
334
+ )
335
+ return PreparedObservation(observation=observation, metrics=metrics, action_refs=action_refs)
build/lib/freeciv_env/client.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from openenv.core.client_types import StepResult
4
+ from openenv.core.env_client import EnvClient
5
+
6
+ from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState
7
+
8
+
9
+ class FreecivEnv(EnvClient[FreecivAction, FreecivObservation, FreecivState]):
10
+ def _step_payload(self, action: FreecivAction) -> dict:
11
+ return action.model_dump(exclude_none=True)
12
+
13
+ def _parse_result(self, payload: dict) -> StepResult[FreecivObservation]:
14
+ observation = FreecivObservation(**payload["observation"])
15
+ return StepResult(
16
+ observation=observation,
17
+ reward=payload.get("reward"),
18
+ done=payload.get("done", False),
19
+ )
20
+
21
+ def _parse_state(self, payload: dict) -> FreecivState:
22
+ return FreecivState(**payload)
build/lib/freeciv_env/grpo.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Iterable
5
+
6
+ from freeciv_env.models import FreecivAction, FreecivObservation, LegalAction
7
+
8
+ SYSTEM_PROMPT = (
9
+ "You are choosing the next action for a Freeciv agent. "
10
+ "Return only the integer index of the best legal action. "
11
+ "Do not output words, punctuation, JSON, or explanations."
12
+ )
13
+
14
+ TASK_PROMPT = (
15
+ "Pick the legal action index that maximizes immediate reward. "
16
+ "Invalid actions are penalized. Shorter outputs are better."
17
+ )
18
+
19
+
20
+ def format_action_line(index: int, action: LegalAction) -> str:
21
+ return f"{index}: {action.label}"
22
+
23
+
24
+ def build_turn_prompt(observation: FreecivObservation, task_prompt: str = TASK_PROMPT) -> str:
25
+ action_lines = [format_action_line(index, action) for index, action in enumerate(observation.legal_actions)]
26
+ return (
27
+ f"{task_prompt}\n\n"
28
+ f"State:\n{observation.summary}\n\n"
29
+ f"Legal actions:\n" + "\n".join(action_lines) + "\n\n"
30
+ "Return exactly one integer index."
31
+ )
32
+
33
+
34
+ def parse_action_choice(completion_text: str, legal_actions: Iterable[LegalAction]) -> FreecivAction | None:
35
+ legal_actions = list(legal_actions)
36
+ match = re.search(r"-?\d+", completion_text)
37
+ if match is None:
38
+ return None
39
+ index = int(match.group(0))
40
+ if index < 0 or index >= len(legal_actions):
41
+ return None
42
+ action = legal_actions[index]
43
+ if action.action_type == "end_turn":
44
+ return FreecivAction(action_type="end_turn")
45
+ if action.action_type == "move_unit":
46
+ return FreecivAction(action_type="move_unit", unit_id=action.unit_id, direction=action.direction)
47
+ if action.action_type == "build_city":
48
+ return FreecivAction(action_type="build_city", unit_id=action.unit_id)
49
+ if action.action_type == "set_city_production":
50
+ return FreecivAction(action_type="set_city_production", city_id=action.city_id, target=action.target)
51
+ if action.action_type == "set_research":
52
+ return FreecivAction(action_type="set_research", target=action.target)
53
+ raise ValueError(f"unsupported action_type: {action.action_type}")
54
+
55
+
56
+ def action_priority(action: LegalAction) -> tuple[int, int]:
57
+ if action.action_type == "build_city":
58
+ return (500, 0)
59
+ if action.action_type == "set_research":
60
+ return (400, 0)
61
+ if action.action_type == "set_city_production":
62
+ bonus = 50 if (action.target or "") == "Settlers" else 0
63
+ return (300 + bonus, 0)
64
+ if action.action_type == "move_unit":
65
+ return (200, -(action.direction or 0))
66
+ if action.action_type == "end_turn":
67
+ return (0, 0)
68
+ return (-1000, 0)
69
+
70
+
71
+
72
+ def oracle_action_index(legal_actions: Iterable[LegalAction]) -> int:
73
+ legal_actions = list(legal_actions)
74
+ if not legal_actions:
75
+ raise ValueError("no legal actions available")
76
+ best_index = 0
77
+ best_priority = action_priority(legal_actions[0])
78
+ for index, action in enumerate(legal_actions[1:], start=1):
79
+ priority = action_priority(action)
80
+ if priority > best_priority:
81
+ best_index = index
82
+ best_priority = priority
83
+ return best_index
84
+
85
+
86
+
87
+ def reward_from_oracle(completions, best_index, **kwargs):
88
+ del kwargs
89
+ rewards = []
90
+ for completion, expected in zip(completions, best_index):
91
+ match = re.search(r"-?\d+", completion if isinstance(completion, str) else str(completion))
92
+ if match is None:
93
+ rewards.append(-0.25)
94
+ continue
95
+ chosen = int(match.group(0))
96
+ rewards.append(1.0 if chosen == int(expected) else 0.0)
97
+ return rewards
build/lib/freeciv_env/models.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel, Field, model_validator
6
+
7
+ from openenv.core.env_server.types import Action, Observation, State
8
+
9
+
10
+ class UnitSummary(BaseModel):
11
+ unit_id: int = Field(..., description="Freeciv unit id")
12
+ unit_type: str = Field(..., description="Ruleset unit type name")
13
+ health: int = Field(0, description="Current health")
14
+ moves_left: int = Field(0, description="Movement points remaining")
15
+ home_city_id: int | None = Field(None, description="Home city id, if any")
16
+ veteran_level: int = Field(0, description="Veteran level")
17
+ can_build_city: bool = Field(False, description="Whether the unit can found a city now")
18
+ move_directions: list[int] = Field(default_factory=list, description="Legal move direction indexes")
19
+
20
+
21
+ class CitySummary(BaseModel):
22
+ city_id: int = Field(..., description="Freeciv city id")
23
+ size: int = Field(..., description="Population size")
24
+ prod_food: int = Field(0, description="Gross food output")
25
+ prod_shield: int = Field(0, description="Gross shield output")
26
+ prod_trade: int = Field(0, description="Gross trade output")
27
+ surplus_food: int = Field(0, description="Net food surplus")
28
+ surplus_shield: int = Field(0, description="Net shield surplus")
29
+ surplus_trade: int = Field(0, description="Net trade surplus")
30
+ production_kind: int | None = Field(None, description="Current production kind enum from Freeciv")
31
+ production_value: int | None = Field(None, description="Current production value id from Freeciv")
32
+ turns_to_complete: float | None = Field(None, description="Turns until current production completes")
33
+ production_options: list[str] = Field(default_factory=list, description="Legal production targets")
34
+
35
+
36
+ class LegalAction(BaseModel):
37
+ action_type: Literal[
38
+ "end_turn",
39
+ "move_unit",
40
+ "build_city",
41
+ "set_city_production",
42
+ "set_research",
43
+ ]
44
+ label: str = Field(..., description="Human-readable action label")
45
+ unit_id: int | None = Field(None, description="Target unit id")
46
+ city_id: int | None = Field(None, description="Target city id")
47
+ direction: int | None = Field(None, description="Freeciv direction index 0..7")
48
+ target: str | None = Field(None, description="Production or tech target name")
49
+ raw_action_key: str | None = Field(None, description="Underlying freeciv-bot action key")
50
+
51
+
52
+ class FreecivAction(Action):
53
+ action_type: Literal[
54
+ "end_turn",
55
+ "move_unit",
56
+ "build_city",
57
+ "set_city_production",
58
+ "set_research",
59
+ ]
60
+ unit_id: int | None = None
61
+ city_id: int | None = None
62
+ direction: int | None = None
63
+ target: str | None = None
64
+
65
+ @model_validator(mode="after")
66
+ def validate_shape(self) -> "FreecivAction":
67
+ if self.action_type == "end_turn":
68
+ return self
69
+ if self.action_type == "move_unit":
70
+ if self.unit_id is None or self.direction is None:
71
+ raise ValueError("move_unit requires unit_id and direction")
72
+ return self
73
+ if self.action_type == "build_city":
74
+ if self.unit_id is None:
75
+ raise ValueError("build_city requires unit_id")
76
+ return self
77
+ if self.action_type == "set_city_production":
78
+ if self.city_id is None or not self.target:
79
+ raise ValueError("set_city_production requires city_id and target")
80
+ return self
81
+ if self.action_type == "set_research":
82
+ if not self.target:
83
+ raise ValueError("set_research requires target")
84
+ return self
85
+ raise ValueError(f"unsupported action_type: {self.action_type}")
86
+
87
+
88
+ class FreecivObservation(Observation):
89
+ turn: int = Field(..., description="Current game turn")
90
+ score: float = Field(..., description="Current player score")
91
+ known_tiles: int = Field(..., description="Tiles known to the player")
92
+ visible_tiles: int = Field(..., description="Tiles currently visible to the player")
93
+ city_count: int = Field(..., description="Number of owned cities")
94
+ unit_count: int = Field(..., description="Number of owned units")
95
+ techs_researched: int = Field(..., description="Number of researched techs")
96
+ status: str = Field("ok", description="High-level environment status")
97
+ summary: str = Field(..., description="Compact text summary for LLMs")
98
+ units: list[UnitSummary] = Field(default_factory=list, description="Compact unit summaries")
99
+ cities: list[CitySummary] = Field(default_factory=list, description="Compact city summaries")
100
+ legal_actions: list[LegalAction] = Field(default_factory=list, description="Legal actions exposed by the environment")
101
+ reward: float = Field(0.0, description="Reward from the last action")
102
+ done: bool = Field(False, description="Whether the episode is done")
103
+
104
+
105
+ class FreecivState(State):
106
+ turn: int = Field(0, description="Current game turn")
107
+ score: float = Field(0.0, description="Current player score")
108
+ known_tiles: int = Field(0, description="Known tiles")
109
+ visible_tiles: int = Field(0, description="Visible tiles")
110
+ city_count: int = Field(0, description="Owned city count")
111
+ unit_count: int = Field(0, description="Owned unit count")
112
+ techs_researched: int = Field(0, description="Researched tech count")
build/lib/freeciv_env/runtime.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ import threading
6
+ import time
7
+ from typing import Protocol
8
+ from urllib.parse import urlencode, urlparse
9
+ from urllib.request import Request, urlopen
10
+
11
+ from freeciv_env.adapter import ActionRef, RawSnapshot
12
+
13
+
14
+ class FreecivSession(Protocol):
15
+ def reset(self, seed: int | None = None) -> RawSnapshot: ...
16
+
17
+ def apply_action(self, action_ref: ActionRef) -> RawSnapshot: ...
18
+
19
+ def end_turn(self) -> RawSnapshot: ...
20
+
21
+ def close(self) -> None: ...
22
+
23
+
24
+ class _InteractiveBot:
25
+ def __init__(self, session: "LiveFreecivSession"):
26
+ from freecivbot.bot.base_bot import BaseBot
27
+
28
+ class InteractiveBotImpl(BaseBot):
29
+ def __init__(self, owner: "LiveFreecivSession"):
30
+ super().__init__()
31
+ self._owner = owner
32
+
33
+ def conduct_turn(self, pplayer, info_controls, end_turn_hook):
34
+ super().conduct_turn(pplayer, info_controls, end_turn_hook)
35
+ self._publish_snapshot()
36
+
37
+ def calculate_next_move(self):
38
+ if self._turn_active:
39
+ self._publish_snapshot()
40
+
41
+ def _publish_snapshot(self):
42
+ self._acquire_state()
43
+ self._owner._publish_snapshot(
44
+ RawSnapshot(
45
+ turn=self.turn,
46
+ state=self._turn_state,
47
+ actions=self._turn_opts,
48
+ )
49
+ )
50
+
51
+ self.impl = InteractiveBotImpl(session)
52
+
53
+
54
+ class _ConfiguredCivClient:
55
+ def __init__(self, bot, user_name: str, *, client_port: int, visual_monitor: bool = False):
56
+ from freecivbot.civclient import CivClient
57
+
58
+ class ConfiguredCivClientImpl(CivClient):
59
+ def init_control(self, ws_client):
60
+ self.ws_client = ws_client
61
+ self.init_controller()
62
+ if self.visual_monitor:
63
+ self.monitor.start_monitor()
64
+ login_message = {
65
+ "pid": 4,
66
+ "username": self.user_name,
67
+ "capability": "+Freeciv.Web.Devel-3.2",
68
+ "version_label": "-dev",
69
+ "major_version": 3,
70
+ "minor_version": 1,
71
+ "patch_version": 90,
72
+ "port": self.client_port,
73
+ "password": None,
74
+ "subject": None,
75
+ }
76
+ self.ws_client.send(login_message)
77
+
78
+ def handle_chat_msg(self, packet):
79
+ from freecivbot.utils.fc_events import E_UNDEFINED
80
+
81
+ message = packet["message"]
82
+ conn_id = packet["conn_id"]
83
+ event = packet["event"]
84
+
85
+ if message is None:
86
+ return
87
+ if event is None or event < 0 or event >= E_UNDEFINED:
88
+ print("Undefined message event type")
89
+ print(packet)
90
+ print("\r\n")
91
+ packet["event"] = event = E_UNDEFINED
92
+
93
+ if conn_id in self.clstate.connections:
94
+ message = "<b>" + self.clstate.connections[conn_id]["username"] + ":</b>" + message
95
+ else:
96
+ if "/metamessage" in message:
97
+ return
98
+ if "Metaserver message string" in message:
99
+ return
100
+
101
+ packet["message"] = message
102
+ print(packet)
103
+ print("\r\n")
104
+
105
+ if "You are logged in as" in message:
106
+ self.ws_client.send_message("/set minplayers 1")
107
+ self.prepare_game()
108
+
109
+ def handle_conn_info(self, packet):
110
+ from freecivbot.connectivity.client_state import C_S_PREPARING
111
+ from freecivbot.utils.freecivlog import freelog
112
+
113
+ pconn = self.clstate.find_conn_by_id(packet["id"])
114
+
115
+ if not packet["used"]:
116
+ if pconn is None:
117
+ freelog(f"Server removed unknown connection {packet['id']}")
118
+ return
119
+ self.clstate.client_remove_cli_conn(pconn)
120
+ pconn = None
121
+ else:
122
+ pplayer = self.player_ctrl.valid_player_by_number(packet["player_num"])
123
+ if pplayer is None:
124
+ return
125
+ packet["playing"] = pplayer
126
+
127
+ if self.clstate.has_id(packet["id"]):
128
+ self.clstate.init_state(packet)
129
+
130
+ self.clstate.conn_list_append(packet)
131
+
132
+ if self.clstate.has_id(packet["id"]) and self.clstate.cur_player() != packet["playing"]:
133
+ self.clstate.set_client_state(C_S_PREPARING)
134
+
135
+ self.impl = ConfiguredCivClientImpl(
136
+ bot,
137
+ user_name,
138
+ client_port=client_port,
139
+ visual_monitor=visual_monitor,
140
+ )
141
+
142
+
143
+ class _ConfiguredCivConnection:
144
+ def __init__(self, civ_client, base_url: str, *, owner: "LiveFreecivSession", wait_for_server: int = 120, retry_interval: int = 5):
145
+ from math import ceil
146
+
147
+ import websocket
148
+
149
+ self._websocket = websocket
150
+ self.client = civ_client
151
+ self.base_url = base_url
152
+ self._owner = owner
153
+ self._loop = None
154
+ self._owner._connection = self
155
+ self.civserverport = self._reserve_client_port(base_url, civ_client.client_port)
156
+ self.client.client_port = self.civserverport
157
+ self.proxyport = 1000 + self.civserverport
158
+ self._retry_interval = retry_interval
159
+ self._num_retries = int(ceil(wait_for_server / retry_interval))
160
+ self._cur_retry = 0
161
+ self._ws_url = self._build_ws_url(base_url)
162
+ self.network_init()
163
+
164
+ def _build_ws_url(self, base_url: str) -> str:
165
+ parsed = urlparse(base_url)
166
+ scheme = "wss" if parsed.scheme == "https" else "ws"
167
+ host = parsed.hostname or "localhost"
168
+ port = parsed.port
169
+ if port is None:
170
+ port = 443 if scheme == "wss" else 80
171
+ return f"{scheme}://{host}:{port}/civsocket/{self.proxyport}"
172
+
173
+ def _reserve_client_port(self, base_url: str, requested_port: int) -> int:
174
+ parsed = urlparse(base_url)
175
+ scheme = parsed.scheme or "http"
176
+ host = parsed.hostname or "localhost"
177
+ port = parsed.port
178
+ if port is None:
179
+ port = 443 if scheme == "https" else 80
180
+ query = urlencode({"civserverport": requested_port})
181
+ launcher_url = f"{scheme}://{host}:{port}/civclientlauncher?{query}"
182
+ request = Request(launcher_url, method="POST")
183
+ with urlopen(request, timeout=10) as response:
184
+ result = response.headers.get("result")
185
+ reserved_port = response.headers.get("port")
186
+ if result != "success" or reserved_port is None:
187
+ raise RuntimeError(f"failed to reserve freeciv client port via {launcher_url}")
188
+ return int(reserved_port)
189
+
190
+ def _retry(self):
191
+ self._cur_retry += 1
192
+ time.sleep(self._retry_interval)
193
+ return self._detect_server_up()
194
+
195
+ def _detect_server_up(self):
196
+ ws = self._websocket.WebSocket()
197
+ try:
198
+ ws.connect(self._ws_url, timeout=10)
199
+ return True
200
+ except Exception as err:
201
+ print("Connect not successful:", err, " retrying in %s seconds." % self._retry_interval)
202
+ if self._cur_retry < self._num_retries:
203
+ return self._retry()
204
+ return False
205
+ finally:
206
+ try:
207
+ ws.close()
208
+ except Exception:
209
+ pass
210
+
211
+ def network_init(self):
212
+ self._cur_retry = 0
213
+ print("Connecting to server at %s ..." % self.base_url)
214
+ if self._detect_server_up():
215
+ self.websocket_init()
216
+ else:
217
+ print("Connection could not be established!")
218
+
219
+ def websocket_init(self):
220
+ from tornado import ioloop
221
+
222
+ from freecivbot.connectivity.clinet import CivWSClient
223
+
224
+ asyncio.set_event_loop(asyncio.new_event_loop())
225
+ ioloop.IOLoop.clear_current()
226
+ self._loop = ioloop.IOLoop.current()
227
+
228
+ client = CivWSClient(self.client)
229
+
230
+ def send_json(data):
231
+ if not client._ws_connection:
232
+ raise RuntimeError("Web socket connection is closed.")
233
+ msg = json.dumps(data, separators=(",", ":"))
234
+ client._ws_connection.write_message(msg)
235
+
236
+ client.send = send_json
237
+ client.connect(self._ws_url)
238
+
239
+ try:
240
+ self._loop.start()
241
+ except KeyboardInterrupt:
242
+ client.close()
243
+
244
+ def submit(self, fn) -> None:
245
+ if self._loop is None:
246
+ raise RuntimeError("freeciv connection loop is not ready")
247
+ done = threading.Event()
248
+ error: BaseException | None = None
249
+
250
+ def run():
251
+ nonlocal error
252
+ try:
253
+ fn()
254
+ except BaseException as exc:
255
+ error = exc
256
+ finally:
257
+ done.set()
258
+
259
+ self._loop.add_callback(run)
260
+ if not done.wait(timeout=10):
261
+ raise TimeoutError("timed out dispatching action to freeciv loop")
262
+ if error is not None:
263
+ raise error
264
+
265
+ def close(self) -> None:
266
+ if self._loop is None:
267
+ return
268
+ self.submit(self.client.close)
269
+
270
+
271
+ class LiveFreecivSession:
272
+ def __init__(
273
+ self,
274
+ *,
275
+ username: str = "openenvbot",
276
+ client_port: int = 6000,
277
+ base_url: str = "http://localhost",
278
+ turn_timeout_s: float = 60.0,
279
+ ):
280
+ self.username = username
281
+ self.client_port = client_port
282
+ self.base_url = base_url
283
+ self.turn_timeout_s = turn_timeout_s
284
+
285
+ self._bot_wrapper: _InteractiveBot | None = None
286
+ self._client = None
287
+ self._connection: _ConfiguredCivConnection | None = None
288
+ self._thread: threading.Thread | None = None
289
+ self._ready = threading.Event()
290
+ self._snapshot_lock = threading.Lock()
291
+ self._snapshot: RawSnapshot | None = None
292
+ self._thread_error: BaseException | None = None
293
+ self._reset_counter = 0
294
+ self._session_seed = time.monotonic_ns() % 1_000_000
295
+
296
+ def reset(self, seed: int | None = None) -> RawSnapshot:
297
+ del seed
298
+ self.close()
299
+ self._reset_counter += 1
300
+ username = self._next_username()
301
+ client_port = self.client_port + ((self._session_seed + self._reset_counter - 1) % 3)
302
+
303
+ self._ready.clear()
304
+ self._thread_error = None
305
+ self._snapshot = None
306
+
307
+ self._bot_wrapper = _InteractiveBot(self)
308
+ self._client = _ConfiguredCivClient(
309
+ self._bot_wrapper.impl,
310
+ username,
311
+ client_port=client_port,
312
+ visual_monitor=False,
313
+ ).impl
314
+
315
+ def run() -> None:
316
+ try:
317
+ _ConfiguredCivConnection(self._client, self.base_url, owner=self)
318
+ except BaseException as exc: # pragma: no cover - surfaced in waiters
319
+ self._thread_error = exc
320
+ self._ready.set()
321
+
322
+ self._thread = threading.Thread(target=run, name="freeciv-live-session", daemon=True)
323
+ self._thread.start()
324
+ return self._wait_for_snapshot("reset")
325
+
326
+ def apply_action(self, action_ref: ActionRef) -> RawSnapshot:
327
+ snapshot = self._require_snapshot()
328
+ action_list = snapshot.actions[action_ref.controller]
329
+ valid_actions = action_list.get_actions(action_ref.actor_id, valid_only=True)
330
+ action = None if valid_actions is None else valid_actions.get(action_ref.raw_action_key)
331
+ if action is None:
332
+ raise ValueError(
333
+ f"action {action_ref.raw_action_key} is no longer valid for {action_ref.controller}:{action_ref.actor_id}"
334
+ )
335
+ self._ready.clear()
336
+ connection = self._require_connection()
337
+ connection.submit(lambda: action_list.trigger_validated_action(action))
338
+ return self._wait_for_snapshot(action_ref.raw_action_key)
339
+
340
+ def end_turn(self) -> RawSnapshot:
341
+ if self._bot_wrapper is None:
342
+ raise RuntimeError("session has not been reset")
343
+ self._ready.clear()
344
+ connection = self._require_connection()
345
+ connection.submit(self._bot_wrapper.impl.end_turn)
346
+ return self._wait_for_snapshot("end_turn")
347
+
348
+ def close(self) -> None:
349
+ if self._connection is not None:
350
+ try:
351
+ self._connection.close()
352
+ except Exception:
353
+ pass
354
+ elif self._client is not None:
355
+ try:
356
+ self._client.close()
357
+ except Exception:
358
+ pass
359
+ if self._thread is not None and self._thread.is_alive():
360
+ self._thread.join(timeout=5)
361
+ self._bot_wrapper = None
362
+ self._client = None
363
+ self._connection = None
364
+ self._thread = None
365
+ self._snapshot = None
366
+ self._thread_error = None
367
+ self._ready.clear()
368
+
369
+ def _publish_snapshot(self, snapshot: RawSnapshot) -> None:
370
+ with self._snapshot_lock:
371
+ self._snapshot = snapshot
372
+ self._ready.set()
373
+
374
+ def _next_username(self) -> str:
375
+ suffix = str(self._session_seed + self._reset_counter)
376
+ prefix_len = max(1, 31 - len(suffix))
377
+ return f"{self.username[:prefix_len]}{suffix}"
378
+
379
+ def _require_connection(self) -> _ConfiguredCivConnection:
380
+ if self._connection is None:
381
+ raise RuntimeError("freeciv connection is not ready")
382
+ return self._connection
383
+
384
+ def _require_snapshot(self) -> RawSnapshot:
385
+ with self._snapshot_lock:
386
+ if self._snapshot is None:
387
+ raise RuntimeError("no live snapshot is available")
388
+ return self._snapshot
389
+
390
+ def _wait_for_snapshot(self, reason: str) -> RawSnapshot:
391
+ deadline = time.monotonic() + self.turn_timeout_s
392
+ while time.monotonic() < deadline:
393
+ if self._thread_error is not None:
394
+ raise RuntimeError(f"freeciv session failed during {reason}") from self._thread_error
395
+ if self._ready.wait(timeout=0.1):
396
+ if self._thread_error is not None:
397
+ raise RuntimeError(f"freeciv session failed during {reason}") from self._thread_error
398
+ snapshot = self._require_snapshot()
399
+ if snapshot is not None:
400
+ return snapshot
401
+ raise TimeoutError(f"timed out waiting for freeciv snapshot during {reason}")
build/lib/freeciv_env/server/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from freeciv_env.server.freeciv_environment import FreecivEnvironment
2
+
3
+ __all__ = ["FreecivEnvironment"]
build/lib/freeciv_env/server/app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ from openenv.core.env_server import create_app
6
+
7
+ from freeciv_env.models import FreecivAction, FreecivObservation
8
+ from freeciv_env.runtime import LiveFreecivSession
9
+ from freeciv_env.server.freeciv_environment import FreecivEnvironment
10
+
11
+
12
+ def create_live_session() -> LiveFreecivSession:
13
+ return LiveFreecivSession(
14
+ username=os.getenv("FREECIV_USERNAME", "openenvbot"),
15
+ client_port=int(os.getenv("FREECIV_CLIENT_PORT", "6000")),
16
+ base_url=os.getenv("FREECIV_SERVER_URL", "http://localhost"),
17
+ turn_timeout_s=float(os.getenv("FREECIV_TURN_TIMEOUT_S", "60")),
18
+ )
19
+
20
+
21
+ def create_freeciv_app(*, session_factory=create_live_session, max_turns: int | None = None):
22
+ if max_turns is None:
23
+ max_turns = int(os.getenv("FREECIV_MAX_TURNS", "50"))
24
+ return create_app(
25
+ lambda: FreecivEnvironment(session_factory=session_factory, max_turns=max_turns),
26
+ FreecivAction,
27
+ FreecivObservation,
28
+ env_name="freeciv_env",
29
+ )
30
+
31
+
32
+ app = create_freeciv_app()
33
+
34
+
35
+ def main() -> None:
36
+ import uvicorn
37
+
38
+ uvicorn.run(app, host="0.0.0.0", port=8000, ws_ping_interval=300, ws_ping_timeout=300)
39
+
40
+
41
+ if __name__ == "__main__":
42
+ main()
build/lib/freeciv_env/server/freeciv_environment.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable
4
+ from uuid import uuid4
5
+
6
+ from openenv.core.env_server.interfaces import Environment
7
+
8
+ from freeciv_env.adapter import (
9
+ ActionLookupKey,
10
+ ActionRef,
11
+ PreparedObservation,
12
+ RawSnapshot,
13
+ SnapshotMetrics,
14
+ action_lookup_key,
15
+ prepare_observation,
16
+ )
17
+ from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState
18
+ from freeciv_env.runtime import FreecivSession
19
+
20
+
21
+ class FreecivEnvironment(Environment[FreecivAction, FreecivObservation, FreecivState]):
22
+ SUPPORTS_CONCURRENT_SESSIONS = False
23
+
24
+ def __init__(self, session_factory: Callable[[], FreecivSession], max_turns: int = 50):
25
+ super().__init__()
26
+ self._session_factory = session_factory
27
+ self.max_turns = max_turns
28
+ self._session: FreecivSession | None = None
29
+ self._snapshot: RawSnapshot | None = None
30
+ self._metrics: SnapshotMetrics | None = None
31
+ self._action_refs: dict[ActionLookupKey, ActionRef] = {}
32
+ self._state = FreecivState(episode_id=str(uuid4()), step_count=0)
33
+
34
+ def reset(
35
+ self,
36
+ seed: int | None = None,
37
+ episode_id: str | None = None,
38
+ **kwargs,
39
+ ) -> FreecivObservation:
40
+ del kwargs
41
+ self.close()
42
+ self._session = self._session_factory()
43
+ snapshot = self._session.reset(seed=seed)
44
+ prepared = prepare_observation(
45
+ snapshot,
46
+ reward=0.0,
47
+ done=self._is_done(snapshot),
48
+ status="ready",
49
+ metadata={},
50
+ )
51
+ self._commit(snapshot, prepared, episode_id=episode_id or str(uuid4()))
52
+ return prepared.observation
53
+
54
+ def step(
55
+ self,
56
+ action: FreecivAction,
57
+ timeout_s: float | None = None,
58
+ **kwargs,
59
+ ) -> FreecivObservation:
60
+ del timeout_s, kwargs
61
+ if self._session is None or self._snapshot is None or self._metrics is None:
62
+ raise RuntimeError("environment must be reset before step")
63
+
64
+ self._state.step_count += 1
65
+ if action.action_type == "end_turn":
66
+ next_snapshot = self._session.end_turn()
67
+ reward = self._reward_for_transition(action, self._metrics, next_snapshot)
68
+ prepared = prepare_observation(
69
+ next_snapshot,
70
+ reward=reward,
71
+ done=self._is_done(next_snapshot),
72
+ status="ok",
73
+ metadata={},
74
+ )
75
+ self._commit(next_snapshot, prepared, episode_id=self._state.episode_id)
76
+ return prepared.observation
77
+
78
+ ref = self._action_refs.get(action_lookup_key(action))
79
+ if ref is None:
80
+ prepared = prepare_observation(
81
+ self._snapshot,
82
+ reward=-0.25,
83
+ done=self._is_done(self._snapshot),
84
+ status="invalid_action",
85
+ metadata={"error": "action is not currently legal"},
86
+ )
87
+ self._commit(self._snapshot, prepared, episode_id=self._state.episode_id, replace_snapshot=False)
88
+ return prepared.observation
89
+
90
+ next_snapshot = self._session.apply_action(ref)
91
+ reward = self._reward_for_transition(action, self._metrics, next_snapshot)
92
+ prepared = prepare_observation(
93
+ next_snapshot,
94
+ reward=reward,
95
+ done=self._is_done(next_snapshot),
96
+ status="ok",
97
+ metadata={},
98
+ )
99
+ self._commit(next_snapshot, prepared, episode_id=self._state.episode_id)
100
+ return prepared.observation
101
+
102
+ @property
103
+ def state(self) -> FreecivState:
104
+ return self._state
105
+
106
+ def close(self) -> None:
107
+ if self._session is not None:
108
+ self._session.close()
109
+ self._session = None
110
+ self._snapshot = None
111
+ self._metrics = None
112
+ self._action_refs = {}
113
+
114
+ def _commit(
115
+ self,
116
+ snapshot: RawSnapshot,
117
+ prepared: PreparedObservation,
118
+ *,
119
+ episode_id: str,
120
+ replace_snapshot: bool = True,
121
+ ) -> None:
122
+ if replace_snapshot:
123
+ self._snapshot = snapshot
124
+ self._metrics = prepared.metrics
125
+ self._action_refs = prepared.action_refs
126
+ self._state = FreecivState(
127
+ episode_id=episode_id,
128
+ step_count=self._state.step_count,
129
+ turn=prepared.observation.turn,
130
+ score=prepared.observation.score,
131
+ known_tiles=prepared.observation.known_tiles,
132
+ visible_tiles=prepared.observation.visible_tiles,
133
+ city_count=prepared.observation.city_count,
134
+ unit_count=prepared.observation.unit_count,
135
+ techs_researched=prepared.observation.techs_researched,
136
+ )
137
+
138
+ def _reward_for_transition(
139
+ self,
140
+ action: FreecivAction,
141
+ previous: SnapshotMetrics,
142
+ next_snapshot: RawSnapshot,
143
+ ) -> float:
144
+ from freeciv_env.adapter import extract_metrics
145
+
146
+ current = extract_metrics(next_snapshot)
147
+ reward = {
148
+ "end_turn": 0.0,
149
+ "move_unit": 0.01,
150
+ "build_city": 0.10,
151
+ "set_city_production": 0.05,
152
+ "set_research": 0.05,
153
+ }[action.action_type]
154
+ reward += max(current.score - previous.score, 0.0) * 0.02
155
+ reward += max(current.known_tiles - previous.known_tiles, 0) * 0.01
156
+ reward += max(current.city_count - previous.city_count, 0) * 0.50
157
+ reward += max(current.techs_researched - previous.techs_researched, 0) * 0.25
158
+ return float(reward)
159
+
160
+ def _is_done(self, snapshot: RawSnapshot) -> bool:
161
+ player = snapshot.state.get("player", {})
162
+ alive = bool(player.get("my_is_alive", True))
163
+ return (not alive) or snapshot.turn >= self.max_turns
build/lib/server/__init__.py ADDED
File without changes
build/lib/server/app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from freeciv_env.server.app import app as app
2
+ from freeciv_env.server.app import main as _main
3
+
4
+
5
+ def main() -> None:
6
+ _main()
7
+
8
+
9
+ if __name__ == "__main__":
10
+ main()
client.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from freeciv_env.client import *
freeciv_env.egg-info/PKG-INFO ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: freeciv-env
3
+ Version: 0.1.0
4
+ Summary: OpenEnv environment for Freeciv via freeciv-bot
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: openenv-core[core]==0.2.1
8
+ Requires-Dist: freecivbot @ git+https://github.com/chris1869/freeciv-bot.git
9
+ Requires-Dist: uvicorn>=0.35.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=8.4.1; extra == "dev"
12
+ Requires-Dist: requests>=2.32.5; extra == "dev"
13
+ Provides-Extra: train
14
+ Requires-Dist: accelerate>=1.10.0; extra == "train"
15
+ Requires-Dist: bitsandbytes>=0.47.0; extra == "train"
16
+ Requires-Dist: datasets>=4.0.0; extra == "train"
17
+ Requires-Dist: trl>=0.24.0; extra == "train"
18
+ Requires-Dist: unsloth>=2026.3.4; extra == "train"
19
+
20
+ ---
21
+ title: Freeciv Environment Server
22
+ emoji: 🎮
23
+ colorFrom: blue
24
+ colorTo: indigo
25
+ sdk: docker
26
+ pinned: false
27
+ app_port: 8000
28
+ base_path: /web
29
+ tags:
30
+ - openenv
31
+ ---
32
+
33
+ # freeciv-env
34
+
35
+ OpenEnv environment for Freeciv, built on top of `freeciv-bot`.
36
+
37
+ ## Current scope
38
+
39
+ This environment exposes a small, trainable action surface:
40
+
41
+ - `end_turn`
42
+ - `move_unit(unit_id, direction)`
43
+ - `build_city(unit_id)`
44
+ - `set_city_production(city_id, target)`
45
+ - `set_research(tech_name)`
46
+
47
+ Observations are text-first and include compact structured summaries of:
48
+
49
+ - current turn
50
+ - score
51
+ - known and visible map tiles
52
+ - units
53
+ - cities
54
+ - legal actions
55
+
56
+ ## Local development
57
+
58
+ Install dependencies:
59
+
60
+ ```bash
61
+ uv sync --extra dev
62
+ ```
63
+
64
+ Run tests:
65
+
66
+ ```bash
67
+ uv run pytest
68
+ ```
69
+
70
+ Run the server:
71
+
72
+ ```bash
73
+ uv run uvicorn freeciv_env.server.app:app --host 0.0.0.0 --port 8000
74
+ ```
75
+
76
+ Run the fast GRPO loop:
77
+
78
+ ```bash
79
+ uv sync --extra dev --extra train
80
+ uv run python scripts/train_grpo_fast.py --env-url http://127.0.0.1 --max-steps 50
81
+ ```
82
+
83
+ ## Hackathon / Unsloth notes
84
+
85
+ For the hackathon Colab submission path on H100s, Unsloth recommended the BF16 OpenEnv gpt-oss 20B notebook:
86
+
87
+ - <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb>
88
+
89
+ If you adapt that notebook for this environment, reduce `max_steps` to `300` for a faster run.
90
+
91
+ Useful notebook indexes:
92
+
93
+ - RL notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks#grpo-reasoning-rl>
94
+ - all notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks>
95
+ - notebook repo: <https://github.com/unslothai/notebooks/tree/main/nb>
96
+
97
+ If GRPO is too slow, start from a smaller notebook with `fast_inference = True` and add the Freeciv/OpenEnv calls:
98
+
99
+ - <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_(4B)-GRPO.ipynb>
100
+ - <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb>
101
+
102
+ If vLLM GRPO fails, Unsloth suggested a clean virtualenv install:
103
+
104
+ ```bash
105
+ python -m venv unsloth_env
106
+ source unsloth_env/bin/activate
107
+ pip install --upgrade pip && pip install uv
108
+ uv pip install unsloth vllm --torch-backend=auto
109
+ ```
110
+
111
+ If Unsloth is already installed, update it for the latest GRPO fixes:
112
+
113
+ ```bash
114
+ pip install --upgrade --no-cache-dir --no-deps unsloth unsloth_zoo
115
+ ```
116
+
117
+ ## Live runtime requirements
118
+
119
+ The default server app uses `freeciv-bot` against a local Freeciv Web runtime.
120
+
121
+ Environment variables:
122
+
123
+ - `FREECIV_SERVER_URL` (default: `http://127.0.0.1`)
124
+ - `FREECIV_USERNAME` (default: `openenvbot`)
125
+ - `FREECIV_CLIENT_PORT` (default: `6000`)
126
+ - `FREECIV_TURN_TIMEOUT_S` (default: `60`)
127
+
128
+ The included automated tests use a fake session backend, so they do not require a live Freeciv server.
129
+
130
+ The GRPO training script uses:
131
+
132
+ - `Qwen/Qwen3.5-0.8B`
133
+ - Unsloth bf16 LoRA loading
134
+ - TRL `GRPOTrainer`
135
+ - integer-only action selection to minimize generated tokens
136
+ - offline GRPO over env-sampled states for maximum throughput
freeciv_env.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ README.md
2
+ pyproject.toml
3
+ freeciv_env/__init__.py
4
+ freeciv_env/adapter.py
5
+ freeciv_env/client.py
6
+ freeciv_env/grpo.py
7
+ freeciv_env/models.py
8
+ freeciv_env/runtime.py
9
+ freeciv_env.egg-info/PKG-INFO
10
+ freeciv_env.egg-info/SOURCES.txt
11
+ freeciv_env.egg-info/dependency_links.txt
12
+ freeciv_env.egg-info/entry_points.txt
13
+ freeciv_env.egg-info/requires.txt
14
+ freeciv_env.egg-info/top_level.txt
15
+ freeciv_env/server/__init__.py
16
+ freeciv_env/server/app.py
17
+ freeciv_env/server/freeciv_environment.py
18
+ server/__init__.py
19
+ server/app.py
20
+ tests/test_adapter.py
21
+ tests/test_environment.py
22
+ tests/test_grpo_utils.py
23
+ tests/test_server_roundtrip.py
freeciv_env.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
freeciv_env.egg-info/entry_points.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [console_scripts]
2
+ server = server.app:main
freeciv_env.egg-info/requires.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openenv-core[core]==0.2.1
2
+ freecivbot @ git+https://github.com/chris1869/freeciv-bot.git
3
+ uvicorn>=0.35.0
4
+
5
+ [dev]
6
+ pytest>=8.4.1
7
+ requests>=2.32.5
8
+
9
+ [train]
10
+ accelerate>=1.10.0
11
+ bitsandbytes>=0.47.0
12
+ datasets>=4.0.0
13
+ trl>=0.24.0
14
+ unsloth>=2026.3.4
freeciv_env.egg-info/top_level.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ freeciv_env
2
+ server
freeciv_env/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from freeciv_env.client import FreecivEnv
2
+ from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState, LegalAction
3
+
4
+ __all__ = [
5
+ "FreecivAction",
6
+ "FreecivEnv",
7
+ "FreecivObservation",
8
+ "FreecivState",
9
+ "LegalAction",
10
+ ]
freeciv_env/adapter.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+ from freeciv_env.models import CitySummary, FreecivAction, FreecivObservation, LegalAction, UnitSummary
7
+
8
+
9
+ ActionLookupKey = tuple[str, int | None, int | None, str | None]
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class ActionRef:
14
+ controller: str
15
+ actor_id: int | str
16
+ raw_action_key: str
17
+
18
+
19
+ @dataclass
20
+ class RawSnapshot:
21
+ turn: int
22
+ state: dict[str, Any]
23
+ actions: dict[str, Any]
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class SnapshotMetrics:
28
+ score: float
29
+ known_tiles: int
30
+ visible_tiles: int
31
+ city_count: int
32
+ unit_count: int
33
+ techs_researched: int
34
+
35
+
36
+ @dataclass
37
+ class PreparedObservation:
38
+ observation: FreecivObservation
39
+ metrics: SnapshotMetrics
40
+ action_refs: dict[ActionLookupKey, ActionRef]
41
+
42
+
43
+ def _map_status_rows(raw_state: dict[str, Any]) -> list[list[int | float]]:
44
+ raw_map = raw_state.get("map", {})
45
+ status = raw_map.get("status", [])
46
+ return status if isinstance(status, list) else []
47
+
48
+
49
+ def count_known_tiles(raw_state: dict[str, Any]) -> int:
50
+ return sum(1 for row in _map_status_rows(raw_state) for value in row if value and value > 0)
51
+
52
+
53
+ def count_visible_tiles(raw_state: dict[str, Any]) -> int:
54
+ return sum(1 for row in _map_status_rows(raw_state) for value in row if value and value >= 2)
55
+
56
+
57
+ def extract_metrics(snapshot: RawSnapshot) -> SnapshotMetrics:
58
+ player = snapshot.state.get("player", {})
59
+ return SnapshotMetrics(
60
+ score=float(player.get("my_score", 0.0)),
61
+ known_tiles=count_known_tiles(snapshot.state),
62
+ visible_tiles=count_visible_tiles(snapshot.state),
63
+ city_count=len(snapshot.state.get("city", {})),
64
+ unit_count=len(snapshot.state.get("unit", {})),
65
+ techs_researched=int(player.get("my_techs_researched", 0) or 0),
66
+ )
67
+
68
+
69
+ def action_lookup_key(action: FreecivAction) -> ActionLookupKey:
70
+ if action.action_type == "move_unit":
71
+ return ("move_unit", action.unit_id, action.direction, None)
72
+ if action.action_type == "build_city":
73
+ return ("build_city", action.unit_id, None, None)
74
+ if action.action_type == "set_city_production":
75
+ return ("set_city_production", action.city_id, None, action.target)
76
+ if action.action_type == "set_research":
77
+ return ("set_research", None, None, action.target)
78
+ return ("end_turn", None, None, None)
79
+
80
+
81
+ def _parse_target_name(raw_action_key: str, prefix: str) -> str:
82
+ suffix = raw_action_key.removeprefix(prefix)
83
+ name, _sep, _tail = suffix.rpartition("_")
84
+ return name or suffix
85
+
86
+
87
+
88
+ def _controller_actions(snapshot: RawSnapshot, controller: str) -> dict[str, Any]:
89
+ raw_actions = snapshot.actions.get(controller, {})
90
+ if isinstance(raw_actions, dict):
91
+ return raw_actions
92
+ if hasattr(raw_actions, "json_struct"):
93
+ json_actions = raw_actions.json_struct()
94
+ return json_actions if isinstance(json_actions, dict) else {}
95
+ return {}
96
+
97
+
98
+
99
+ def _extract_legal_actions(snapshot: RawSnapshot) -> tuple[list[LegalAction], dict[ActionLookupKey, ActionRef]]:
100
+ legal_actions: list[LegalAction] = [
101
+ LegalAction(
102
+ action_type="end_turn",
103
+ label="End the current turn",
104
+ raw_action_key="__end_turn__",
105
+ )
106
+ ]
107
+ refs: dict[ActionLookupKey, ActionRef] = {}
108
+
109
+ for actor_id, action_map in _controller_actions(snapshot, "unit").items():
110
+ unit_id = int(actor_id)
111
+ if action_map.get("build"):
112
+ legal_actions.append(
113
+ LegalAction(
114
+ action_type="build_city",
115
+ label=f"Build a city with unit {unit_id}",
116
+ unit_id=unit_id,
117
+ raw_action_key="build",
118
+ )
119
+ )
120
+ refs[("build_city", unit_id, None, None)] = ActionRef(
121
+ controller="unit",
122
+ actor_id=unit_id,
123
+ raw_action_key="build",
124
+ )
125
+ for raw_action_key, enabled in sorted(action_map.items()):
126
+ if not enabled or not raw_action_key.startswith("goto_"):
127
+ continue
128
+ direction = int(raw_action_key.split("_", 1)[1])
129
+ legal_actions.append(
130
+ LegalAction(
131
+ action_type="move_unit",
132
+ label=f"Move unit {unit_id} in direction {direction}",
133
+ unit_id=unit_id,
134
+ direction=direction,
135
+ raw_action_key=raw_action_key,
136
+ )
137
+ )
138
+ refs[("move_unit", unit_id, direction, None)] = ActionRef(
139
+ controller="unit",
140
+ actor_id=unit_id,
141
+ raw_action_key=raw_action_key,
142
+ )
143
+
144
+ for actor_id, action_map in _controller_actions(snapshot, "city").items():
145
+ city_id = int(actor_id)
146
+ for raw_action_key, enabled in sorted(action_map.items()):
147
+ if not enabled:
148
+ continue
149
+ if raw_action_key.startswith("change_unit_prod_"):
150
+ target = _parse_target_name(raw_action_key, "change_unit_prod_")
151
+ elif raw_action_key.startswith("change_improve_prod_"):
152
+ target = _parse_target_name(raw_action_key, "change_improve_prod_")
153
+ else:
154
+ continue
155
+ legal_actions.append(
156
+ LegalAction(
157
+ action_type="set_city_production",
158
+ label=f"Set city {city_id} production to {target}",
159
+ city_id=city_id,
160
+ target=target,
161
+ raw_action_key=raw_action_key,
162
+ )
163
+ )
164
+ refs[("set_city_production", city_id, None, target)] = ActionRef(
165
+ controller="city",
166
+ actor_id=city_id,
167
+ raw_action_key=raw_action_key,
168
+ )
169
+
170
+ tech_actions = _controller_actions(snapshot, "tech").get("cur_player", {})
171
+ for raw_action_key, enabled in sorted(tech_actions.items()):
172
+ if not enabled or not raw_action_key.startswith("research_tech_"):
173
+ continue
174
+ target = _parse_target_name(raw_action_key, "research_tech_")
175
+ legal_actions.append(
176
+ LegalAction(
177
+ action_type="set_research",
178
+ label=f"Research {target}",
179
+ target=target,
180
+ raw_action_key=raw_action_key,
181
+ )
182
+ )
183
+ refs[("set_research", None, None, target)] = ActionRef(
184
+ controller="tech",
185
+ actor_id="cur_player",
186
+ raw_action_key=raw_action_key,
187
+ )
188
+
189
+ legal_actions.sort(
190
+ key=lambda item: (
191
+ item.action_type,
192
+ item.unit_id or -1,
193
+ item.city_id or -1,
194
+ item.direction or -1,
195
+ item.target or "",
196
+ )
197
+ )
198
+ return legal_actions, refs
199
+
200
+
201
+ def _extract_unit_summaries(snapshot: RawSnapshot) -> list[UnitSummary]:
202
+ unit_actions = _controller_actions(snapshot, "unit")
203
+ units: list[UnitSummary] = []
204
+ for actor_id, unit in sorted(snapshot.state.get("unit", {}).items(), key=lambda item: int(item[0])):
205
+ action_map = unit_actions.get(str(actor_id), unit_actions.get(actor_id, {}))
206
+ move_directions = sorted(
207
+ int(raw_action_key.split("_", 1)[1])
208
+ for raw_action_key, enabled in action_map.items()
209
+ if enabled and raw_action_key.startswith("goto_")
210
+ )
211
+ units.append(
212
+ UnitSummary(
213
+ unit_id=int(actor_id),
214
+ unit_type=str(unit.get("type_rule_name", "Unknown")),
215
+ health=int(unit.get("health", 0) or 0),
216
+ moves_left=int(unit.get("moves_left", unit.get("movesleft", 0)) or 0),
217
+ home_city_id=(
218
+ int(unit.get("home_city"))
219
+ if unit.get("home_city") not in (None, -1, "")
220
+ else None
221
+ ),
222
+ veteran_level=int(unit.get("veteran", 0) or 0),
223
+ can_build_city=bool(action_map.get("build", False)),
224
+ move_directions=move_directions,
225
+ )
226
+ )
227
+ return units
228
+
229
+
230
+ def _extract_city_summaries(snapshot: RawSnapshot) -> list[CitySummary]:
231
+ city_actions = _controller_actions(snapshot, "city")
232
+ cities: list[CitySummary] = []
233
+ for actor_id, city in sorted(snapshot.state.get("city", {}).items(), key=lambda item: int(item[0])):
234
+ action_map = city_actions.get(str(actor_id), city_actions.get(actor_id, {}))
235
+ production_options = [
236
+ _parse_target_name(raw_action_key, "change_unit_prod_")
237
+ for raw_action_key, enabled in sorted(action_map.items())
238
+ if enabled and raw_action_key.startswith("change_unit_prod_")
239
+ ] + [
240
+ _parse_target_name(raw_action_key, "change_improve_prod_")
241
+ for raw_action_key, enabled in sorted(action_map.items())
242
+ if enabled and raw_action_key.startswith("change_improve_prod_")
243
+ ]
244
+ cities.append(
245
+ CitySummary(
246
+ city_id=int(actor_id),
247
+ size=int(city.get("size", 0) or 0),
248
+ prod_food=int(city.get("prod_food", 0) or 0),
249
+ prod_shield=int(city.get("prod_shield", 0) or 0),
250
+ prod_trade=int(city.get("prod_trade", 0) or 0),
251
+ surplus_food=int(city.get("surplus_food", 0) or 0),
252
+ surplus_shield=int(city.get("surplus_shield", 0) or 0),
253
+ surplus_trade=int(city.get("surplus_trade", 0) or 0),
254
+ production_kind=(
255
+ int(city.get("production_kind"))
256
+ if city.get("production_kind") is not None
257
+ else None
258
+ ),
259
+ production_value=(
260
+ int(city.get("production_value"))
261
+ if city.get("production_value") is not None
262
+ else None
263
+ ),
264
+ turns_to_complete=(
265
+ float(city.get("turns_to_prod_complete"))
266
+ if city.get("turns_to_prod_complete") is not None
267
+ else None
268
+ ),
269
+ production_options=production_options,
270
+ )
271
+ )
272
+ return cities
273
+
274
+
275
+ def _build_summary(
276
+ snapshot: RawSnapshot,
277
+ metrics: SnapshotMetrics,
278
+ units: list[UnitSummary],
279
+ cities: list[CitySummary],
280
+ legal_actions: list[LegalAction],
281
+ ) -> str:
282
+ player = snapshot.state.get("player", {})
283
+ lines = [
284
+ f"Turn {snapshot.turn}",
285
+ f"Score {metrics.score:.1f}",
286
+ f"Map: {metrics.known_tiles} known tiles, {metrics.visible_tiles} visible tiles",
287
+ f"Economy: {player.get('my_gold', 0)} gold, science rate {player.get('my_science', 0)}%",
288
+ f"Cities: {metrics.city_count}",
289
+ ]
290
+ for city in cities[:5]:
291
+ lines.append(
292
+ f"- City {city.city_id}: size {city.size}, food {city.prod_food}/{city.surplus_food:+d}, "
293
+ f"shields {city.prod_shield}/{city.surplus_shield:+d}, trade {city.prod_trade}/{city.surplus_trade:+d}"
294
+ )
295
+ lines.append(f"Units: {metrics.unit_count}")
296
+ for unit in units[:8]:
297
+ lines.append(
298
+ f"- Unit {unit.unit_id}: {unit.unit_type}, hp {unit.health}, moves_left {unit.moves_left}, "
299
+ f"build_city={str(unit.can_build_city).lower()}, move_dirs={unit.move_directions}"
300
+ )
301
+ lines.append(f"Techs researched: {metrics.techs_researched}")
302
+ lines.append(f"Legal actions exposed: {len(legal_actions)}")
303
+ return "\n".join(lines)
304
+
305
+
306
+ def prepare_observation(
307
+ snapshot: RawSnapshot,
308
+ *,
309
+ reward: float,
310
+ done: bool,
311
+ status: str,
312
+ metadata: dict[str, Any] | None = None,
313
+ ) -> PreparedObservation:
314
+ legal_actions, action_refs = _extract_legal_actions(snapshot)
315
+ metrics = extract_metrics(snapshot)
316
+ units = _extract_unit_summaries(snapshot)
317
+ cities = _extract_city_summaries(snapshot)
318
+ observation = FreecivObservation(
319
+ turn=snapshot.turn,
320
+ score=metrics.score,
321
+ known_tiles=metrics.known_tiles,
322
+ visible_tiles=metrics.visible_tiles,
323
+ city_count=metrics.city_count,
324
+ unit_count=metrics.unit_count,
325
+ techs_researched=metrics.techs_researched,
326
+ status=status,
327
+ summary=_build_summary(snapshot, metrics, units, cities, legal_actions),
328
+ units=units,
329
+ cities=cities,
330
+ legal_actions=legal_actions,
331
+ reward=reward,
332
+ done=done,
333
+ metadata=metadata or {},
334
+ )
335
+ return PreparedObservation(observation=observation, metrics=metrics, action_refs=action_refs)
freeciv_env/client.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from openenv.core.client_types import StepResult
4
+ from openenv.core.env_client import EnvClient
5
+
6
+ from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState
7
+
8
+
9
+ class FreecivEnv(EnvClient[FreecivAction, FreecivObservation, FreecivState]):
10
+ def _step_payload(self, action: FreecivAction) -> dict:
11
+ return action.model_dump(exclude_none=True)
12
+
13
+ def _parse_result(self, payload: dict) -> StepResult[FreecivObservation]:
14
+ observation = FreecivObservation(**payload["observation"])
15
+ return StepResult(
16
+ observation=observation,
17
+ reward=payload.get("reward"),
18
+ done=payload.get("done", False),
19
+ )
20
+
21
+ def _parse_state(self, payload: dict) -> FreecivState:
22
+ return FreecivState(**payload)
freeciv_env/grpo.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from typing import Iterable
5
+
6
+ from freeciv_env.models import FreecivAction, FreecivObservation, LegalAction
7
+
8
+ SYSTEM_PROMPT = (
9
+ "You are choosing the next action for a Freeciv agent. "
10
+ "Return only the integer index of the best legal action. "
11
+ "Do not output words, punctuation, JSON, or explanations."
12
+ )
13
+
14
+ TASK_PROMPT = (
15
+ "Pick the legal action index that maximizes immediate reward. "
16
+ "Invalid actions are penalized. Shorter outputs are better."
17
+ )
18
+
19
+
20
+ def format_action_line(index: int, action: LegalAction) -> str:
21
+ return f"{index}: {action.label}"
22
+
23
+
24
+ def build_turn_prompt(observation: FreecivObservation, task_prompt: str = TASK_PROMPT) -> str:
25
+ action_lines = [format_action_line(index, action) for index, action in enumerate(observation.legal_actions)]
26
+ return (
27
+ f"{task_prompt}\n\n"
28
+ f"State:\n{observation.summary}\n\n"
29
+ f"Legal actions:\n" + "\n".join(action_lines) + "\n\n"
30
+ "Return exactly one integer index."
31
+ )
32
+
33
+
34
+ def parse_action_choice(completion_text: str, legal_actions: Iterable[LegalAction]) -> FreecivAction | None:
35
+ legal_actions = list(legal_actions)
36
+ match = re.search(r"-?\d+", completion_text)
37
+ if match is None:
38
+ return None
39
+ index = int(match.group(0))
40
+ if index < 0 or index >= len(legal_actions):
41
+ return None
42
+ action = legal_actions[index]
43
+ if action.action_type == "end_turn":
44
+ return FreecivAction(action_type="end_turn")
45
+ if action.action_type == "move_unit":
46
+ return FreecivAction(action_type="move_unit", unit_id=action.unit_id, direction=action.direction)
47
+ if action.action_type == "build_city":
48
+ return FreecivAction(action_type="build_city", unit_id=action.unit_id)
49
+ if action.action_type == "set_city_production":
50
+ return FreecivAction(action_type="set_city_production", city_id=action.city_id, target=action.target)
51
+ if action.action_type == "set_research":
52
+ return FreecivAction(action_type="set_research", target=action.target)
53
+ raise ValueError(f"unsupported action_type: {action.action_type}")
54
+
55
+
56
+ def action_priority(action: LegalAction) -> tuple[int, int]:
57
+ if action.action_type == "build_city":
58
+ return (500, 0)
59
+ if action.action_type == "set_research":
60
+ return (400, 0)
61
+ if action.action_type == "set_city_production":
62
+ bonus = 50 if (action.target or "") == "Settlers" else 0
63
+ return (300 + bonus, 0)
64
+ if action.action_type == "move_unit":
65
+ return (200, -(action.direction or 0))
66
+ if action.action_type == "end_turn":
67
+ return (0, 0)
68
+ return (-1000, 0)
69
+
70
+
71
+
72
+ def oracle_action_index(legal_actions: Iterable[LegalAction]) -> int:
73
+ legal_actions = list(legal_actions)
74
+ if not legal_actions:
75
+ raise ValueError("no legal actions available")
76
+ best_index = 0
77
+ best_priority = action_priority(legal_actions[0])
78
+ for index, action in enumerate(legal_actions[1:], start=1):
79
+ priority = action_priority(action)
80
+ if priority > best_priority:
81
+ best_index = index
82
+ best_priority = priority
83
+ return best_index
84
+
85
+
86
+
87
+ def reward_from_oracle(completions, best_index, **kwargs):
88
+ del kwargs
89
+ rewards = []
90
+ for completion, expected in zip(completions, best_index):
91
+ match = re.search(r"-?\d+", completion if isinstance(completion, str) else str(completion))
92
+ if match is None:
93
+ rewards.append(-0.25)
94
+ continue
95
+ chosen = int(match.group(0))
96
+ rewards.append(1.0 if chosen == int(expected) else 0.0)
97
+ return rewards
freeciv_env/models.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+ from pydantic import BaseModel, Field, model_validator
6
+
7
+ from openenv.core.env_server.types import Action, Observation, State
8
+
9
+
10
+ class UnitSummary(BaseModel):
11
+ unit_id: int = Field(..., description="Freeciv unit id")
12
+ unit_type: str = Field(..., description="Ruleset unit type name")
13
+ health: int = Field(0, description="Current health")
14
+ moves_left: int = Field(0, description="Movement points remaining")
15
+ home_city_id: int | None = Field(None, description="Home city id, if any")
16
+ veteran_level: int = Field(0, description="Veteran level")
17
+ can_build_city: bool = Field(False, description="Whether the unit can found a city now")
18
+ move_directions: list[int] = Field(default_factory=list, description="Legal move direction indexes")
19
+
20
+
21
+ class CitySummary(BaseModel):
22
+ city_id: int = Field(..., description="Freeciv city id")
23
+ size: int = Field(..., description="Population size")
24
+ prod_food: int = Field(0, description="Gross food output")
25
+ prod_shield: int = Field(0, description="Gross shield output")
26
+ prod_trade: int = Field(0, description="Gross trade output")
27
+ surplus_food: int = Field(0, description="Net food surplus")
28
+ surplus_shield: int = Field(0, description="Net shield surplus")
29
+ surplus_trade: int = Field(0, description="Net trade surplus")
30
+ production_kind: int | None = Field(None, description="Current production kind enum from Freeciv")
31
+ production_value: int | None = Field(None, description="Current production value id from Freeciv")
32
+ turns_to_complete: float | None = Field(None, description="Turns until current production completes")
33
+ production_options: list[str] = Field(default_factory=list, description="Legal production targets")
34
+
35
+
36
+ class LegalAction(BaseModel):
37
+ action_type: Literal[
38
+ "end_turn",
39
+ "move_unit",
40
+ "build_city",
41
+ "set_city_production",
42
+ "set_research",
43
+ ]
44
+ label: str = Field(..., description="Human-readable action label")
45
+ unit_id: int | None = Field(None, description="Target unit id")
46
+ city_id: int | None = Field(None, description="Target city id")
47
+ direction: int | None = Field(None, description="Freeciv direction index 0..7")
48
+ target: str | None = Field(None, description="Production or tech target name")
49
+ raw_action_key: str | None = Field(None, description="Underlying freeciv-bot action key")
50
+
51
+
52
+ class FreecivAction(Action):
53
+ action_type: Literal[
54
+ "end_turn",
55
+ "move_unit",
56
+ "build_city",
57
+ "set_city_production",
58
+ "set_research",
59
+ ]
60
+ unit_id: int | None = None
61
+ city_id: int | None = None
62
+ direction: int | None = None
63
+ target: str | None = None
64
+
65
+ @model_validator(mode="after")
66
+ def validate_shape(self) -> "FreecivAction":
67
+ if self.action_type == "end_turn":
68
+ return self
69
+ if self.action_type == "move_unit":
70
+ if self.unit_id is None or self.direction is None:
71
+ raise ValueError("move_unit requires unit_id and direction")
72
+ return self
73
+ if self.action_type == "build_city":
74
+ if self.unit_id is None:
75
+ raise ValueError("build_city requires unit_id")
76
+ return self
77
+ if self.action_type == "set_city_production":
78
+ if self.city_id is None or not self.target:
79
+ raise ValueError("set_city_production requires city_id and target")
80
+ return self
81
+ if self.action_type == "set_research":
82
+ if not self.target:
83
+ raise ValueError("set_research requires target")
84
+ return self
85
+ raise ValueError(f"unsupported action_type: {self.action_type}")
86
+
87
+
88
+ class FreecivObservation(Observation):
89
+ turn: int = Field(..., description="Current game turn")
90
+ score: float = Field(..., description="Current player score")
91
+ known_tiles: int = Field(..., description="Tiles known to the player")
92
+ visible_tiles: int = Field(..., description="Tiles currently visible to the player")
93
+ city_count: int = Field(..., description="Number of owned cities")
94
+ unit_count: int = Field(..., description="Number of owned units")
95
+ techs_researched: int = Field(..., description="Number of researched techs")
96
+ status: str = Field("ok", description="High-level environment status")
97
+ summary: str = Field(..., description="Compact text summary for LLMs")
98
+ units: list[UnitSummary] = Field(default_factory=list, description="Compact unit summaries")
99
+ cities: list[CitySummary] = Field(default_factory=list, description="Compact city summaries")
100
+ legal_actions: list[LegalAction] = Field(default_factory=list, description="Legal actions exposed by the environment")
101
+ reward: float = Field(0.0, description="Reward from the last action")
102
+ done: bool = Field(False, description="Whether the episode is done")
103
+
104
+
105
+ class FreecivState(State):
106
+ turn: int = Field(0, description="Current game turn")
107
+ score: float = Field(0.0, description="Current player score")
108
+ known_tiles: int = Field(0, description="Known tiles")
109
+ visible_tiles: int = Field(0, description="Visible tiles")
110
+ city_count: int = Field(0, description="Owned city count")
111
+ unit_count: int = Field(0, description="Owned unit count")
112
+ techs_researched: int = Field(0, description="Researched tech count")
freeciv_env/runtime.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ import threading
6
+ import time
7
+ from collections import deque
8
+ from typing import Protocol
9
+ from urllib.parse import urlencode, urlparse
10
+ from urllib.request import Request, urlopen
11
+
12
+ from freeciv_env.adapter import ActionRef, RawSnapshot
13
+
14
+
15
+ DEBUG_EVENTS: deque[str] = deque(maxlen=400)
16
+
17
+
18
+ def debug_event(message: str) -> None:
19
+ DEBUG_EVENTS.append(f"{time.strftime('%H:%M:%S')} {message}")
20
+
21
+
22
+ class FreecivSession(Protocol):
23
+ def reset(self, seed: int | None = None) -> RawSnapshot: ...
24
+
25
+ def apply_action(self, action_ref: ActionRef) -> RawSnapshot: ...
26
+
27
+ def end_turn(self) -> RawSnapshot: ...
28
+
29
+ def close(self) -> None: ...
30
+
31
+
32
+ class _InteractiveBot:
33
+ def __init__(self, session: "LiveFreecivSession"):
34
+ from freecivbot.bot.base_bot import BaseBot
35
+
36
+ class InteractiveBotImpl(BaseBot):
37
+ def __init__(self, owner: "LiveFreecivSession"):
38
+ super().__init__()
39
+ self._owner = owner
40
+
41
+ def conduct_turn(self, pplayer, info_controls, end_turn_hook):
42
+ super().conduct_turn(pplayer, info_controls, end_turn_hook)
43
+ self._publish_snapshot()
44
+
45
+ def calculate_next_move(self):
46
+ if self._turn_active:
47
+ self._publish_snapshot()
48
+
49
+ def _publish_snapshot(self):
50
+ self._acquire_state()
51
+ self._owner._publish_snapshot(
52
+ RawSnapshot(
53
+ turn=self.turn,
54
+ state=self._turn_state,
55
+ actions=self._turn_opts,
56
+ )
57
+ )
58
+
59
+ self.impl = InteractiveBotImpl(session)
60
+
61
+
62
+ class _ConfiguredCivClient:
63
+ def __init__(self, bot, user_name: str, *, client_port: int, visual_monitor: bool = False):
64
+ from freecivbot.civclient import CivClient
65
+
66
+ class ConfiguredCivClientImpl(CivClient):
67
+ def init_control(self, ws_client):
68
+ self.ws_client = ws_client
69
+ self.init_controller()
70
+ if self.visual_monitor:
71
+ self.monitor.start_monitor()
72
+ login_message = {
73
+ "pid": 4,
74
+ "username": self.user_name,
75
+ "capability": "+Freeciv.Web.Devel-3.3",
76
+ "version_label": "-dev",
77
+ "major_version": 2,
78
+ "minor_version": 5,
79
+ "patch_version": 99,
80
+ "port": self.client_port,
81
+ "password": None,
82
+ "subject": None,
83
+ }
84
+ debug_event(f"sending login username={self.user_name} port={self.client_port}")
85
+ self.ws_client.send(login_message)
86
+
87
+ def handle_chat_msg(self, packet):
88
+ from freecivbot.utils.fc_events import E_UNDEFINED
89
+
90
+ message = packet["message"]
91
+ conn_id = packet["conn_id"]
92
+ event = packet["event"]
93
+
94
+ if message is None:
95
+ return
96
+ if event is None or event < 0 or event >= E_UNDEFINED:
97
+ print("Undefined message event type")
98
+ print(packet)
99
+ print("\r\n")
100
+ packet["event"] = event = E_UNDEFINED
101
+
102
+ if conn_id in self.clstate.connections:
103
+ message = "<b>" + self.clstate.connections[conn_id]["username"] + ":</b>" + message
104
+ else:
105
+ if "/metamessage" in message:
106
+ return
107
+ if "Metaserver message string" in message:
108
+ return
109
+
110
+ packet["message"] = message
111
+ debug_event(f"chat message: {message}")
112
+ print(packet)
113
+ print("\r\n")
114
+
115
+ if "You are logged in as" in message:
116
+ debug_event("logged in; sending /set minplayers 1 and prepare_game")
117
+ self.ws_client.send_message("/set minplayers 1")
118
+ self.prepare_game()
119
+
120
+ def handle_conn_info(self, packet):
121
+ from freecivbot.connectivity.client_state import C_S_PREPARING
122
+ from freecivbot.utils.freecivlog import freelog
123
+
124
+ pconn = self.clstate.find_conn_by_id(packet["id"])
125
+
126
+ if not packet["used"]:
127
+ if pconn is None:
128
+ freelog(f"Server removed unknown connection {packet['id']}")
129
+ return
130
+ self.clstate.client_remove_cli_conn(pconn)
131
+ pconn = None
132
+ else:
133
+ pplayer = self.player_ctrl.valid_player_by_number(packet["player_num"])
134
+ if pplayer is None:
135
+ return
136
+ packet["playing"] = pplayer
137
+
138
+ if self.clstate.has_id(packet["id"]):
139
+ self.clstate.init_state(packet)
140
+
141
+ self.clstate.conn_list_append(packet)
142
+
143
+ if self.clstate.has_id(packet["id"]) and self.clstate.cur_player() != packet["playing"]:
144
+ self.clstate.set_client_state(C_S_PREPARING)
145
+
146
+ self.impl = ConfiguredCivClientImpl(
147
+ bot,
148
+ user_name,
149
+ client_port=client_port,
150
+ visual_monitor=visual_monitor,
151
+ )
152
+
153
+
154
+ class _ConfiguredCivConnection:
155
+ def __init__(self, civ_client, base_url: str, *, owner: "LiveFreecivSession", wait_for_server: int = 120, retry_interval: int = 5):
156
+ from math import ceil
157
+
158
+ import websocket
159
+
160
+ self._websocket = websocket
161
+ self.client = civ_client
162
+ self.base_url = base_url
163
+ self._owner = owner
164
+ self._loop = None
165
+ self._owner._connection = self
166
+ self.civserverport = self._reserve_client_port(base_url, civ_client.client_port)
167
+ self.client.client_port = self.civserverport
168
+ self.proxyport = 1000 + self.civserverport
169
+ debug_event(f"reserved civ port={self.civserverport} proxyport={self.proxyport}")
170
+ self._retry_interval = retry_interval
171
+ self._num_retries = int(ceil(wait_for_server / retry_interval))
172
+ self._cur_retry = 0
173
+ self._ws_url = self._build_ws_url(base_url)
174
+ self.network_init()
175
+
176
+ def _build_ws_url(self, base_url: str) -> str:
177
+ parsed = urlparse(base_url)
178
+ scheme = "wss" if parsed.scheme == "https" else "ws"
179
+ host = parsed.hostname or "localhost"
180
+ return f"{scheme}://{host}:{self.proxyport}/civsocket/{self.proxyport}"
181
+
182
+ def _reserve_client_port(self, base_url: str, requested_port: int) -> int:
183
+ parsed = urlparse(base_url)
184
+ scheme = parsed.scheme or "http"
185
+ host = parsed.hostname or "localhost"
186
+ port = parsed.port
187
+ if port is None:
188
+ port = 443 if scheme == "https" else 80
189
+ query = urlencode({"civserverport": requested_port})
190
+ launcher_url = f"{scheme}://{host}:{port}/civclientlauncher?{query}"
191
+ origin = f"{scheme}://{host}:{port}"
192
+ request = Request(
193
+ launcher_url,
194
+ method="POST",
195
+ headers={
196
+ "User-Agent": "Mozilla/5.0",
197
+ "Origin": origin,
198
+ "Referer": origin + "/",
199
+ },
200
+ )
201
+ with urlopen(request, timeout=10) as response:
202
+ result = response.headers.get("result")
203
+ reserved_port = response.headers.get("port")
204
+ debug_event(
205
+ f"launcher response status={response.status} result={result} port={reserved_port} body={response.read(200).decode('utf-8', 'ignore')}"
206
+ )
207
+ if result != "success" or reserved_port is None:
208
+ raise RuntimeError(f"failed to reserve freeciv client port via {launcher_url}")
209
+ return int(reserved_port)
210
+
211
+ def _retry(self):
212
+ self._cur_retry += 1
213
+ time.sleep(self._retry_interval)
214
+ return self._detect_server_up()
215
+
216
+ def _detect_server_up(self):
217
+ ws = self._websocket.WebSocket()
218
+ try:
219
+ debug_event(f"probing websocket {self._ws_url}")
220
+ ws.connect(self._ws_url, timeout=10)
221
+ debug_event("websocket probe succeeded")
222
+ return True
223
+ except Exception as err:
224
+ debug_event(f"websocket probe failed: {err!r}")
225
+ print("Connect not successful:", err, " retrying in %s seconds." % self._retry_interval)
226
+ if self._cur_retry < self._num_retries:
227
+ return self._retry()
228
+ return False
229
+ finally:
230
+ try:
231
+ ws.close()
232
+ except Exception:
233
+ pass
234
+
235
+ def network_init(self):
236
+ self._cur_retry = 0
237
+ print("Connecting to server at %s ..." % self.base_url)
238
+ if self._detect_server_up():
239
+ self.websocket_init()
240
+ else:
241
+ print("Connection could not be established!")
242
+
243
+ def websocket_init(self):
244
+ from tornado import ioloop
245
+
246
+ from freecivbot.connectivity.clinet import CivWSClient
247
+
248
+ asyncio.set_event_loop(asyncio.new_event_loop())
249
+ ioloop.IOLoop.clear_current()
250
+ self._loop = ioloop.IOLoop.current()
251
+
252
+ debug_event(f"starting tornado websocket client for {self._ws_url}")
253
+ client = CivWSClient(self.client)
254
+
255
+ def send_json(data):
256
+ if not client._ws_connection:
257
+ raise RuntimeError("Web socket connection is closed.")
258
+ msg = json.dumps(data, separators=(",", ":"))
259
+ client._ws_connection.write_message(msg)
260
+
261
+ client.send = send_json
262
+ client.connect(self._ws_url)
263
+
264
+ try:
265
+ self._loop.start()
266
+ except KeyboardInterrupt:
267
+ client.close()
268
+
269
+ def submit(self, fn) -> None:
270
+ if self._loop is None:
271
+ raise RuntimeError("freeciv connection loop is not ready")
272
+ done = threading.Event()
273
+ error: BaseException | None = None
274
+
275
+ def run():
276
+ nonlocal error
277
+ try:
278
+ fn()
279
+ except BaseException as exc:
280
+ error = exc
281
+ finally:
282
+ done.set()
283
+
284
+ self._loop.add_callback(run)
285
+ if not done.wait(timeout=10):
286
+ raise TimeoutError("timed out dispatching action to freeciv loop")
287
+ if error is not None:
288
+ raise error
289
+
290
+ def close(self) -> None:
291
+ if self._loop is None:
292
+ return
293
+ self.submit(self.client.close)
294
+
295
+
296
+ class LiveFreecivSession:
297
+ def __init__(
298
+ self,
299
+ *,
300
+ username: str = "openenvbot",
301
+ client_port: int = 6000,
302
+ base_url: str = "http://localhost",
303
+ turn_timeout_s: float = 60.0,
304
+ ):
305
+ self.username = username
306
+ self.client_port = client_port
307
+ self.base_url = base_url
308
+ self.turn_timeout_s = turn_timeout_s
309
+
310
+ self._bot_wrapper: _InteractiveBot | None = None
311
+ self._client = None
312
+ self._connection: _ConfiguredCivConnection | None = None
313
+ self._thread: threading.Thread | None = None
314
+ self._ready = threading.Event()
315
+ self._snapshot_lock = threading.Lock()
316
+ self._snapshot: RawSnapshot | None = None
317
+ self._thread_error: BaseException | None = None
318
+ self._reset_counter = 0
319
+ self._session_seed = time.monotonic_ns() % 1_000_000
320
+
321
+ def reset(self, seed: int | None = None) -> RawSnapshot:
322
+ del seed
323
+ self.close()
324
+ self._reset_counter += 1
325
+ username = self._next_username()
326
+ client_port = self.client_port + ((self._session_seed + self._reset_counter - 1) % 3)
327
+
328
+ self._ready.clear()
329
+ self._thread_error = None
330
+ self._snapshot = None
331
+
332
+ self._bot_wrapper = _InteractiveBot(self)
333
+ self._client = _ConfiguredCivClient(
334
+ self._bot_wrapper.impl,
335
+ username,
336
+ client_port=client_port,
337
+ visual_monitor=False,
338
+ ).impl
339
+
340
+ def run() -> None:
341
+ try:
342
+ debug_event(f"session thread starting username={username} base_url={self.base_url} client_port={client_port}")
343
+ _ConfiguredCivConnection(self._client, self.base_url, owner=self)
344
+ except BaseException as exc: # pragma: no cover - surfaced in waiters
345
+ debug_event(f"session thread error: {exc!r}")
346
+ self._thread_error = exc
347
+ self._ready.set()
348
+
349
+ self._thread = threading.Thread(target=run, name="freeciv-live-session", daemon=True)
350
+ self._thread.start()
351
+ return self._wait_for_snapshot("reset")
352
+
353
+ def apply_action(self, action_ref: ActionRef) -> RawSnapshot:
354
+ snapshot = self._require_snapshot()
355
+ action_list = snapshot.actions[action_ref.controller]
356
+ valid_actions = action_list.get_actions(action_ref.actor_id, valid_only=True)
357
+ action = None if valid_actions is None else valid_actions.get(action_ref.raw_action_key)
358
+ if action is None:
359
+ raise ValueError(
360
+ f"action {action_ref.raw_action_key} is no longer valid for {action_ref.controller}:{action_ref.actor_id}"
361
+ )
362
+ self._ready.clear()
363
+ connection = self._require_connection()
364
+ connection.submit(lambda: action_list.trigger_validated_action(action))
365
+ return self._wait_for_snapshot(action_ref.raw_action_key)
366
+
367
+ def end_turn(self) -> RawSnapshot:
368
+ if self._bot_wrapper is None:
369
+ raise RuntimeError("session has not been reset")
370
+ self._ready.clear()
371
+ connection = self._require_connection()
372
+ connection.submit(self._bot_wrapper.impl.end_turn)
373
+ return self._wait_for_snapshot("end_turn")
374
+
375
+ def close(self) -> None:
376
+ debug_event("closing live session")
377
+ if self._connection is not None:
378
+ try:
379
+ self._connection.close()
380
+ except Exception:
381
+ pass
382
+ elif self._client is not None:
383
+ try:
384
+ self._client.close()
385
+ except Exception:
386
+ pass
387
+ if self._thread is not None and self._thread.is_alive():
388
+ self._thread.join(timeout=5)
389
+ self._bot_wrapper = None
390
+ self._client = None
391
+ self._connection = None
392
+ self._thread = None
393
+ self._snapshot = None
394
+ self._thread_error = None
395
+ self._ready.clear()
396
+
397
+ def _publish_snapshot(self, snapshot: RawSnapshot) -> None:
398
+ debug_event(f"snapshot published turn={snapshot.turn}")
399
+ with self._snapshot_lock:
400
+ self._snapshot = snapshot
401
+ self._ready.set()
402
+
403
+ def _next_username(self) -> str:
404
+ suffix = str(self._session_seed + self._reset_counter)
405
+ prefix_len = max(1, 31 - len(suffix))
406
+ return f"{self.username[:prefix_len]}{suffix}"
407
+
408
+ def _require_connection(self) -> _ConfiguredCivConnection:
409
+ if self._connection is None:
410
+ raise RuntimeError("freeciv connection is not ready")
411
+ return self._connection
412
+
413
+ def _require_snapshot(self) -> RawSnapshot:
414
+ with self._snapshot_lock:
415
+ if self._snapshot is None:
416
+ raise RuntimeError("no live snapshot is available")
417
+ return self._snapshot
418
+
419
+ def _wait_for_snapshot(self, reason: str) -> RawSnapshot:
420
+ deadline = time.monotonic() + self.turn_timeout_s
421
+ debug_event(f"waiting for snapshot reason={reason} timeout={self.turn_timeout_s}")
422
+ while time.monotonic() < deadline:
423
+ if self._thread_error is not None:
424
+ raise RuntimeError(f"freeciv session failed during {reason}") from self._thread_error
425
+ if self._ready.wait(timeout=0.1):
426
+ if self._thread_error is not None:
427
+ raise RuntimeError(f"freeciv session failed during {reason}") from self._thread_error
428
+ snapshot = self._require_snapshot()
429
+ if snapshot is not None:
430
+ return snapshot
431
+ debug_event(f"snapshot wait timed out reason={reason}")
432
+ raise TimeoutError(f"timed out waiting for freeciv snapshot during {reason}")
freeciv_env/server/Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM omkarasoftware/freeciv-web:latest
2
+
3
+ USER root
4
+ RUN apt-get update && apt-get install -y \
5
+ git \
6
+ curl \
7
+ ca-certificates \
8
+ && rm -rf /var/lib/apt/lists/*
9
+ RUN mkdir -p /app/env && chown -R docker:docker /app
10
+
11
+ USER docker
12
+ ENV HOME=/home/docker
13
+ WORKDIR /app/env
14
+
15
+ COPY --chown=docker:docker . /app/env
16
+ RUN chmod +x /app/env/scripts/start_space.sh
17
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh
18
+ ENV PATH="/app/env/.venv/bin:/home/docker/.local/bin:$PATH"
19
+ RUN uv python install 3.11
20
+ RUN uv venv --python 3.11 /app/env/.venv
21
+ RUN UV_PROJECT_ENVIRONMENT=/app/env/.venv uv sync --frozen --no-dev --no-editable
22
+
23
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
24
+ ENV ENABLE_WEB_INTERFACE=true
25
+ ENV FREECIV_SERVER_URL=http://127.0.0.1
26
+ ENV FREECIV_TURN_TIMEOUT_S=120
27
+
28
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=10 \
29
+ CMD curl -f http://localhost:8000/health || exit 1
30
+
31
+ CMD ["/app/env/scripts/start_space.sh"]
freeciv_env/server/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from freeciv_env.server.freeciv_environment import FreecivEnvironment
2
+
3
+ __all__ = ["FreecivEnvironment"]
freeciv_env/server/app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import glob
4
+ import os
5
+ from urllib.request import Request, urlopen
6
+
7
+ from fastapi import Query
8
+ from openenv.core.env_server import create_app
9
+
10
+ from freeciv_env.adapter import prepare_observation
11
+ from freeciv_env.models import FreecivAction, FreecivObservation
12
+ from freeciv_env.runtime import DEBUG_EVENTS, LiveFreecivSession
13
+ from freeciv_env.server.freeciv_environment import FreecivEnvironment
14
+
15
+
16
+ def create_live_session() -> LiveFreecivSession:
17
+ return LiveFreecivSession(
18
+ username=os.getenv("FREECIV_USERNAME", "openenvbot"),
19
+ client_port=int(os.getenv("FREECIV_CLIENT_PORT", "6000")),
20
+ base_url=os.getenv("FREECIV_SERVER_URL", "http://127.0.0.1"),
21
+ turn_timeout_s=float(os.getenv("FREECIV_TURN_TIMEOUT_S", "60")),
22
+ )
23
+
24
+
25
+ def create_freeciv_app(*, session_factory=create_live_session, max_turns: int | None = None):
26
+ if max_turns is None:
27
+ max_turns = int(os.getenv("FREECIV_MAX_TURNS", "50"))
28
+ return create_app(
29
+ lambda: FreecivEnvironment(session_factory=session_factory, max_turns=max_turns),
30
+ FreecivAction,
31
+ FreecivObservation,
32
+ env_name="freeciv_env",
33
+ )
34
+
35
+
36
+ app = create_freeciv_app()
37
+
38
+
39
+ @app.get("/debug/internal-status")
40
+ def debug_internal_status() -> dict:
41
+ checks = []
42
+ for name, method, url in [
43
+ ("nginx", "GET", "http://127.0.0.1/"),
44
+ ("publite2", "GET", "http://127.0.0.1/pubstatus"),
45
+ ("tomcat", "GET", "http://127.0.0.1:8080/freeciv-web/"),
46
+ ("proxy7000", "GET", "http://127.0.0.1/civsocket/7000/status"),
47
+ ("proxy7001", "GET", "http://127.0.0.1/civsocket/7001/status"),
48
+ ("proxy7002", "GET", "http://127.0.0.1/civsocket/7002/status"),
49
+ ("launcher", "POST", "http://127.0.0.1/civclientlauncher?civserverport=6000"),
50
+ ]:
51
+ try:
52
+ request = Request(url, method=method)
53
+ with urlopen(request, timeout=10) as response:
54
+ body = response.read(200).decode("utf-8", "ignore")
55
+ checks.append(
56
+ {
57
+ "name": name,
58
+ "ok": True,
59
+ "status": response.status,
60
+ "body": body,
61
+ }
62
+ )
63
+ except Exception as exc:
64
+ checks.append({"name": name, "ok": False, "error": repr(exc)})
65
+ return {"checks": checks}
66
+
67
+
68
+ @app.get("/debug/live-log")
69
+ def debug_live_log() -> dict:
70
+ return {"events": list(DEBUG_EVENTS)}
71
+
72
+
73
+ @app.get("/debug/freeciv-logs")
74
+ def debug_freeciv_logs() -> dict:
75
+ logs = {}
76
+ for path in sorted(glob.glob("/docker/logs/*.log"))[-12:]:
77
+ try:
78
+ with open(path, "r", encoding="utf-8", errors="ignore") as handle:
79
+ lines = handle.readlines()[-80:]
80
+ logs[path] = "".join(lines)
81
+ except Exception as exc:
82
+ logs[path] = repr(exc)
83
+ return {"logs": logs}
84
+
85
+
86
+ @app.get("/debug/startup-log")
87
+ def debug_startup_log() -> dict:
88
+ path = "/tmp/start_space.log"
89
+ try:
90
+ with open(path, "r", encoding="utf-8", errors="ignore") as handle:
91
+ lines = handle.readlines()[-120:]
92
+ return {"path": path, "log": "".join(lines)}
93
+ except Exception as exc:
94
+ return {"path": path, "error": repr(exc)}
95
+
96
+
97
+ @app.post("/debug/live-reset")
98
+ def debug_live_reset(timeout_s: float = Query(default=120.0, ge=10.0, le=300.0)) -> dict:
99
+ session = create_live_session()
100
+ session.turn_timeout_s = timeout_s
101
+ try:
102
+ reset_snapshot = session.reset()
103
+ reset_observation = prepare_observation(
104
+ reset_snapshot,
105
+ reward=0.0,
106
+ done=False,
107
+ status="ready",
108
+ metadata={},
109
+ ).observation
110
+ next_snapshot = session.end_turn()
111
+ next_observation = prepare_observation(
112
+ next_snapshot,
113
+ reward=0.0,
114
+ done=False,
115
+ status="ok",
116
+ metadata={},
117
+ ).observation
118
+ return {
119
+ "ok": True,
120
+ "reset": {
121
+ "turn": reset_observation.turn,
122
+ "legal_actions": len(reset_observation.legal_actions),
123
+ "summary": reset_observation.summary,
124
+ },
125
+ "step": {
126
+ "turn": next_observation.turn,
127
+ "legal_actions": len(next_observation.legal_actions),
128
+ "summary": next_observation.summary,
129
+ },
130
+ }
131
+ except Exception as exc:
132
+ return {"ok": False, "error": repr(exc)}
133
+ finally:
134
+ session.close()
135
+
136
+
137
+ def main() -> None:
138
+ import uvicorn
139
+
140
+ uvicorn.run(app, host="0.0.0.0", port=8000, ws_ping_interval=300, ws_ping_timeout=300)
141
+
142
+
143
+ if __name__ == "__main__":
144
+ main()
freeciv_env/server/freeciv_environment.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable
4
+ from uuid import uuid4
5
+
6
+ from openenv.core.env_server.interfaces import Environment
7
+
8
+ from freeciv_env.adapter import (
9
+ ActionLookupKey,
10
+ ActionRef,
11
+ PreparedObservation,
12
+ RawSnapshot,
13
+ SnapshotMetrics,
14
+ action_lookup_key,
15
+ prepare_observation,
16
+ )
17
+ from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState
18
+ from freeciv_env.runtime import FreecivSession
19
+
20
+
21
+ class FreecivEnvironment(Environment[FreecivAction, FreecivObservation, FreecivState]):
22
+ SUPPORTS_CONCURRENT_SESSIONS = False
23
+
24
+ def __init__(self, session_factory: Callable[[], FreecivSession], max_turns: int = 50):
25
+ super().__init__()
26
+ self._session_factory = session_factory
27
+ self.max_turns = max_turns
28
+ self._session: FreecivSession | None = None
29
+ self._snapshot: RawSnapshot | None = None
30
+ self._metrics: SnapshotMetrics | None = None
31
+ self._action_refs: dict[ActionLookupKey, ActionRef] = {}
32
+ self._state = FreecivState(episode_id=str(uuid4()), step_count=0)
33
+
34
+ def reset(
35
+ self,
36
+ seed: int | None = None,
37
+ episode_id: str | None = None,
38
+ **kwargs,
39
+ ) -> FreecivObservation:
40
+ del kwargs
41
+ self.close()
42
+ self._session = self._session_factory()
43
+ snapshot = self._session.reset(seed=seed)
44
+ prepared = prepare_observation(
45
+ snapshot,
46
+ reward=0.0,
47
+ done=self._is_done(snapshot),
48
+ status="ready",
49
+ metadata={},
50
+ )
51
+ self._commit(snapshot, prepared, episode_id=episode_id or str(uuid4()))
52
+ return prepared.observation
53
+
54
+ def step(
55
+ self,
56
+ action: FreecivAction,
57
+ timeout_s: float | None = None,
58
+ **kwargs,
59
+ ) -> FreecivObservation:
60
+ del timeout_s, kwargs
61
+ if self._session is None or self._snapshot is None or self._metrics is None:
62
+ raise RuntimeError("environment must be reset before step")
63
+
64
+ self._state.step_count += 1
65
+ if action.action_type == "end_turn":
66
+ next_snapshot = self._session.end_turn()
67
+ reward = self._reward_for_transition(action, self._metrics, next_snapshot)
68
+ prepared = prepare_observation(
69
+ next_snapshot,
70
+ reward=reward,
71
+ done=self._is_done(next_snapshot),
72
+ status="ok",
73
+ metadata={},
74
+ )
75
+ self._commit(next_snapshot, prepared, episode_id=self._state.episode_id)
76
+ return prepared.observation
77
+
78
+ ref = self._action_refs.get(action_lookup_key(action))
79
+ if ref is None:
80
+ prepared = prepare_observation(
81
+ self._snapshot,
82
+ reward=-0.25,
83
+ done=self._is_done(self._snapshot),
84
+ status="invalid_action",
85
+ metadata={"error": "action is not currently legal"},
86
+ )
87
+ self._commit(self._snapshot, prepared, episode_id=self._state.episode_id, replace_snapshot=False)
88
+ return prepared.observation
89
+
90
+ next_snapshot = self._session.apply_action(ref)
91
+ reward = self._reward_for_transition(action, self._metrics, next_snapshot)
92
+ prepared = prepare_observation(
93
+ next_snapshot,
94
+ reward=reward,
95
+ done=self._is_done(next_snapshot),
96
+ status="ok",
97
+ metadata={},
98
+ )
99
+ self._commit(next_snapshot, prepared, episode_id=self._state.episode_id)
100
+ return prepared.observation
101
+
102
+ @property
103
+ def state(self) -> FreecivState:
104
+ return self._state
105
+
106
+ def close(self) -> None:
107
+ if self._session is not None:
108
+ self._session.close()
109
+ self._session = None
110
+ self._snapshot = None
111
+ self._metrics = None
112
+ self._action_refs = {}
113
+
114
+ def _commit(
115
+ self,
116
+ snapshot: RawSnapshot,
117
+ prepared: PreparedObservation,
118
+ *,
119
+ episode_id: str,
120
+ replace_snapshot: bool = True,
121
+ ) -> None:
122
+ if replace_snapshot:
123
+ self._snapshot = snapshot
124
+ self._metrics = prepared.metrics
125
+ self._action_refs = prepared.action_refs
126
+ self._state = FreecivState(
127
+ episode_id=episode_id,
128
+ step_count=self._state.step_count,
129
+ turn=prepared.observation.turn,
130
+ score=prepared.observation.score,
131
+ known_tiles=prepared.observation.known_tiles,
132
+ visible_tiles=prepared.observation.visible_tiles,
133
+ city_count=prepared.observation.city_count,
134
+ unit_count=prepared.observation.unit_count,
135
+ techs_researched=prepared.observation.techs_researched,
136
+ )
137
+
138
+ def _reward_for_transition(
139
+ self,
140
+ action: FreecivAction,
141
+ previous: SnapshotMetrics,
142
+ next_snapshot: RawSnapshot,
143
+ ) -> float:
144
+ from freeciv_env.adapter import extract_metrics
145
+
146
+ current = extract_metrics(next_snapshot)
147
+ reward = {
148
+ "end_turn": 0.0,
149
+ "move_unit": 0.01,
150
+ "build_city": 0.10,
151
+ "set_city_production": 0.05,
152
+ "set_research": 0.05,
153
+ }[action.action_type]
154
+ reward += max(current.score - previous.score, 0.0) * 0.02
155
+ reward += max(current.known_tiles - previous.known_tiles, 0) * 0.01
156
+ reward += max(current.city_count - previous.city_count, 0) * 0.50
157
+ reward += max(current.techs_researched - previous.techs_researched, 0) * 0.25
158
+ return float(reward)
159
+
160
+ def _is_done(self, snapshot: RawSnapshot) -> bool:
161
+ player = snapshot.state.get("player", {})
162
+ alive = bool(player.get("my_is_alive", True))
163
+ return (not alive) or snapshot.turn >= self.max_turns
freeciv_rl_training_curve.png ADDED
hackathon.md ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## **OpenEnv Hackathon Participant Guide**
2
+
3
+ Welcome to the [OpenEnv Hackathon](https://cerebralvalley.ai/e/open-env-hackathon), hacker! 👋 We’re thrilled to have you on board.
4
+
5
+ This guide is your all-in-one resource for the event, including schedule, rules, technical resources, problem statements, judging information, and more. Please read this carefully; most answers can be found here.
6
+
7
+ ## **1. Join the [PyTorch Discord Server](https://discord.gg/VBcf6VtfY6)**
8
+
9
+ - You’ll be given a Hackathon Participant role by an admin, which will give you access to the hackathon-specific channels.
10
+
11
+ - Here, you’ll be able to interact with hackers and sponsors, introduce yourselves, and form teams (for a maximum team size of **3**).
12
+
13
+ - If you don't receive your role within **24 hours of joining,** please ping @CV.
14
+
15
+ - Please submit your Discord username below so we can grant you the role
16
+
17
+ [linkEmbed]
18
+
19
+ ## **2. Location**
20
+
21
+ **|** Shack15 (1 Ferry Building, Suite 201, San Francisco CA. 94111)
22
+
23
+ - **Venue Access:** Shack15 is on the 2nd floor of the Ferry Building. Go up the Ferry Building elevator to the second floor, and turn left. Here you will see the main entrance to Shack15. 
24
+
25
+ - **Parking:** Parking near the Ferry Building is extremely limited. Consider parking farther out and taking Uber, Lyft, or Public Transportation. 
26
+
27
+ [youtube]
28
+
29
+ ## **3. WiFi Information**
30
+
31
+ - **Username:** SHACK15_Members
32
+
33
+ - **Password:** M3mb3r$4L!f3
34
+
35
+ ## **4. Hackathon Schedule**
36
+
37
+ **Saturday, March 7 (Outline)**
38
+
39
+ - **9:00 AM:** Doors Open •󠁏 Breakfast Served •󠁏 Team Formation
40
+
41
+ - **10:00 AM – 11:30AM**: Kick-off presentations with Meta, Hugging Face, UC Berkeley, CoreWeave, OpenPipe, Unsloth AI, Fleet AI, Mercor, Scaler AI Labs, Snorkel AI, Patronus AI, Halluminate and Scale AI
42
+
43
+ - **11:30 AM:** Hacking Begins
44
+
45
+ - **1:00 PM:** Lunch Served
46
+
47
+ - **6:00 PM:** Dinner Served
48
+
49
+ - **10:00 PM:** Doors Close •󠁏 Re-entry not permitted
50
+
51
+ **Sunday, March 8 (Outline)**
52
+
53
+ - **9:00AM:** Doors Open •󠁏 Breakfast Served
54
+
55
+ - **1:00PM:** Hacking stops •󠁏 Submissions Due
56
+
57
+ - **1:15PM:** First Round Judging Begins
58
+
59
+ - **2:00PM:** Lunch Served
60
+
61
+ - **3:00PM:** Final Round Judging Begins
62
+
63
+ - **4:00PM:** Winners Announced and Closing
64
+
65
+ - **5:00PM:** Doors Close
66
+
67
+ All presentation slides can be found here
68
+
69
+ [linkEmbed]
70
+
71
+ ## **5. Hackathon and Submission Rules**
72
+
73
+ To keep things fair and aligned with our goals, all teams must follow these rules:
74
+
75
+ - **Open Source:** Please ensure your repository is public.
76
+
77
+ - **New Work Only:** All projects must be started from scratch during the hackathon with no previous work.
78
+
79
+ - **Team Size:** Teams may have up to **3** members.
80
+
81
+ - **Banned Projects:** Projects will be disqualified if they: violate legal, ethical, or platform policies, use code, data, or assets you do not have the rights to.
82
+
83
+ - Your project **must** use OpenEnv (stable release 0.2.1) deployed on HF spaces
84
+
85
+ - You must show a minimal training script for your environment using Unsloth or HF TRL in Colab.
86
+
87
+ - You must upload a **one minute** demo video to YouTube talking about your submission.
88
+
89
+ ## **6. Hackathon Problem Statements**
90
+
91
+ Your project must address at least **one of the five** required problem statements.
92
+
93
+ - Some problem statements include **optional partner-sponsored sub-problem statements**, which are additional focus areas related to the main theme.
94
+
95
+ - Your project may align with **multiple partner sub-problem statements**, but you can only be **judged for a maximum of two**. Please **select up to two** when submitting.
96
+
97
+ - Projects that match these partner sub-problem statements are eligible for **extra partner prizes**, judged separately from the main track winners.
98
+
99
+ - Each partner sub-problem statement carries a prize of **$10,000 USD**.
100
+
101
+ **Statement 1: Multi-Agent Interactions**
102
+
103
+ Environments for this theme involve cooperation, competition, negotiation, and coalition formation. Learning from these environments will enable agents to model the beliefs and incentives of others in partially observable settings. This drives theory-of-mind reasoning and emergent strategic behavior.
104
+
105
+ - **Expected Outcome:** an environment that can be used to train multi-agent task handling in a LLM
106
+
107
+ - **Example Environments:** Market simulations, compute-allocation negotiations, collaborative puzzle worlds, mixed cooperative/competitive strategy games.
108
+
109
+ - **Partner Sub-Themes:**
110
+
111
+ - **Fleet AI:** Scalable Oversight: Environments that train oversight agents to monitor, analyze, and explain the behavior of other AI agents operating in complex, multi-agent settings.
112
+ - **Halluminate:** Multi-Actor Environments: Build a realistic environment where an agent interacts with and manages multiple actors (agents) to discover and achieve the task
113
+
114
+ **Statement 2: (Super) Long-Horizon Planning & Instruction Following**
115
+
116
+ You will build environments that require deep, multi-step reasoning with sparse or delayed rewards. After using these environments, the goal is to enable agents to decompose goals, track state over extended trajectories, and recover from early mistakes. The aim is to push beyond shallow next-token reasoning toward structured planning and durable internal representations. 
117
+
118
+ - **Expected Outcome:** an environment that can capture and improve LLM behaviour on challenging long horizon tasks that need long running sessions beyond context memory limits. 
119
+
120
+ - **Example Environments:** Research-planning simulators, large-scale codebase refactoring tasks, strategic resource management worlds, long-horizon logistics optimization, extremely complicated long-horizon instruction following (e.g., 300 instructions scattered around).
121
+
122
+ - **Partner Sub-Themes:**
123
+
124
+ - **Mercor:** Make an environment with capped/uncapped rewards where frontier model rewards scale with token output.
125
+
126
+ - **Scale AI:** Environments for long horizon workflows for non-code use cases within a business setting: focusing on either Sales, Project management, or HR & IT.
127
+
128
+ **Statement 3: World Modeling**
129
+
130
+ - **Statement 3.1: Professional Tasks:** Here you will develop environments that require real interaction with tools, APIs, or dynamic systems where the model is expected to do real hard work instead of exploiting short-cuts to arrive at the desired outcome. Learning from these environments will enable agents to maintain consistent internal state, update beliefs based on outcomes, and orchestrate multi-step workflows. The goal is to strengthen causal reasoning and persistent world models.
131
+
132
+ - **Expected Outcome:** an environment capturing nuances of a defined partially observable world and improve LLM interaction with it
133
+
134
+ - **Example Environments:** Dynamic browser/API ecosystems, enterprise applications, scientific workflow loops (papers → code → experiments), economic simulations with feedback, tool-discovery benchmarks.
135
+
136
+ - **Partner Sub-Theme:**
137
+
138
+ - **Scaler AI Labs:** Multi-App RL Environment for Enterprise Workflows: Create RL environments to demonstrate complex workflows, business rule nuances etc in a large enterprise
139
+
140
+ - **Statement 3.2: Personalized Tasks:** Here we will develop an environment that offers real personalized task handling, imagine replying to personal messages or handling dinner conflicts due to work conflicts, replying to tough emails. Think any personal assistant tasks.
141
+
142
+ - **Expected Outcome:** An environment that gives the model a realistic simulation of handling personal tasks, conflicts and managing them as delegations
143
+
144
+ - **Example Environments:** Executive Assistant Meeting Planner, Dinner and drive planning, email and message replying, etc
145
+
146
+ - **Partner Sub-Theme:**
147
+
148
+ - **Patronus AI:** Consumer Workflows with Schema Drift: Multi-step consumer workflow environments where the underlying data schemas, API contracts, and t&cs/policies/rules change.
149
+
150
+ **Statement 4: Self-Improvement**
151
+
152
+ The focus here is to create environments where agents can learn to generate new challenges, escalate difficulty, and improve through self-play or adaptive curricula. Rather than optimizing fixed tasks, the goal is for agents to learn to drive their own capability growth. The objective is recursive skill amplification.
153
+
154
+ - **Expected Outcome:** an environment for improving self-play of a LLM over a defined set of tasks
155
+
156
+ - **Example Environments:** Self-play negotiation arenas, auto-generated math/proof tasks, evolving coding competitions, adaptive RL curricula.
157
+
158
+ - **Partner Sub-Theme:**
159
+
160
+ - **Snorkel AI:** Simulated Experts-in-the-Loop: Environment that simulates interactions with real subject-matter experts, with changing requirements / preferences.
161
+
162
+ **Statement 5: Wild Card - Impress Us!**
163
+
164
+ We do not want to limit your focus if your idea doesn’t fit the boxes above, we want and WILL reward out of box tasks, please be creative but remember to add submissions that meaningfully add value to LLM training on a certain task.
165
+
166
+ More details about each theme can be found here:
167
+
168
+ [linkEmbed]
169
+
170
+ ## **7. CV Hackathon Winners**
171
+
172
+ [linkEmbed]
173
+
174
+ ## **8. OpenEnv Provided Resources**
175
+
176
+ **Please read through the entire slideshow here. This includes:**
177
+
178
+ - OpenEnv Fundamentals, Architecture
179
+ - Local Dev, Docker, and HF Spaces Deployment
180
+ - OpenEnv in Practice
181
+ - Training (TRL & Unsloth)
182
+ - How-to-Access-Infrastructure (including GPU Request Form)
183
+
184
+ [linkEmbed]
185
+
186
+ ## **9. Partner Provided Resources**
187
+
188
+ - **Unsloth AI Resources**
189
+ - RL notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks#grpo-reasoning-rl>
190
+ - All notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks>
191
+ - GitHub notebook index: <https://github.com/unslothai/notebooks/tree/main/nb>
192
+ - H100 / OpenEnv recommendation: use the BF16 gpt-oss 20B OpenEnv notebook for faster H100 runs: <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb>
193
+ - For that notebook, reduce `max_steps` to `300` to make the process faster.
194
+ - If GRPO is too slow, prefer smaller-model notebooks with `fast_inference = True`, for example:
195
+ - <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_(4B)-GRPO.ipynb>
196
+ - <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb>
197
+ - You will need to edit those notebooks to include OpenEnv calls.
198
+ - If vLLM GRPO runs fail, try using a fresh virtualenv:
199
+
200
+ ```bash
201
+ python -m venv unsloth_env
202
+ source unsloth_env/bin/activate
203
+ pip install --upgrade pip && pip install uv
204
+ uv pip install unsloth vllm --torch-backend=auto
205
+ ```
206
+
207
+ - If Unsloth is already installed, update it for the latest GRPO bugfixes:
208
+
209
+ ```bash
210
+ pip install --upgrade --no-cache-dir --no-deps unsloth unsloth_zoo
211
+ ```
212
+
213
+ - **Mercor Resources**
214
+ - Dataset: <https://huggingface.co/datasets/mercor/apex-agents>
215
+ - Archipelago repo to run the eval: <https://github.com/Mercor-Intelligence/archipelago>
216
+ - APEX-Agents paper: <https://arxiv.org/abs/2601.14242>
217
+ - **Hugging Face Resources**
218
+ - **$30** in Compute and Inference Credits
219
+ - To claim your credits, set up a HF account here: <https://huggingface.co/join>
220
+ - Then, follow this link: <https://huggingface.co/openenv-community>
221
+ - You will be granted **$30** of compute and inference credits!
222
+ - **Northflank Resources**
223
+ - Each team gets an H100
224
+ - Northflank instructions
225
+
226
+ [linkEmbed]
227
+ - Join the NorthFlank discord channel for any questions
228
+ - Please fill out this form:
229
+
230
+ [linkEmbed]
231
+
232
+
233
+ - **Cursor Resources**
234
+ - **$50** in Cursor Credits, **apply below**
235
+
236
+ [linkEmbed]
237
+
238
+ ## **10. Judging & Submissions**
239
+
240
+ Judges will be taking place on **Sunday, March 8**. These judges are evaluating your **technical demos** in the following categories. *Show us what you have built* to solve our problem statements. Please **do not** show us a presentation. We'll be checking to ensure your project was built **entirely during the event**; no previous work is allowed. 
241
+
242
+ **|** **Teams should submit [here](https://cerebralvalley.ai/e/openenv-hackathon-sf/hackathon/submit) when they have completed hacking.** In the submission form, you will have to upload a **one minute** demo video on YouTube talking about your submission. You must also show a minimal training script for your environment using Unsloth or HF TRL in Colab.
243
+
244
+ **Please ensure your project uses** use OpenEnv (stable release 0.2.1) deployed on HF spaces.
245
+
246
+ [linkEmbed]
247
+
248
+ **Judging Criteria**
249
+
250
+ - **Environment Innovation (40%) -** Is the environment novel, creative, or challenging? Does it meaningfully test the agent’s behavior?
251
+ - **Storytelling (30%) -** Does the team clearly explain the problem, environment, and agent behavior? Is the demo engaging and easy to follow?
252
+ - **Training Script Showing Improvement in Rewards (20%) -** Does the demo provide observable evidence of training progress (reward curves, metrics, or before/after behavior)? 
253
+ - **Reward and Training Pipeline Setup (10%) -** Is the reward logic coherent, and does the pipeline produce meaningful improvement in the agent’s inference (how it acts in the environment)?
254
+
255
+ **Judging Process**
256
+
257
+ **|** Judging proceeds in two rounds:
258
+
259
+ - Hackers will be assigned groups of judges; \~3 minutes to pitch followed by 1-2 minutes of Q/A
260
+
261
+ - The top **six** teams in ranking will get to demo on stage to a panel of judges; \~3 minutes to pitch followed by 2-3 minutes for Q/A.
262
+
263
+ ## **11. Prizes**
264
+
265
+ - **1st Place:** $15,000 USD Cash
266
+
267
+ - **2nd Place:** $9,000 USD Cash
268
+
269
+ - **3rd Place:** $6,000 USD Cash
270
+
271
+ ## **❓If you have any questions, please email [wania@cerebralvalley.ai](mailto:wania@cerebralvalley.ai) or message on Discord.**
models.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from freeciv_env.models import *
notes.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ multi-agent (track 1) - freeciv
2
+ long context (track 2) - freeciv
3
+ self improving? (track 4) - mechinterp
openenv.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ name: freeciv_env
2
+ description: OpenEnv wrapper around freeciv-bot for long-horizon strategy play.
3
+ version: 0.1.0
4
+ entrypoint: freeciv_env.server.app:app
outline.md ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Demo outline
2
+
3
+ ## Open these tabs first
4
+
5
+ Local resources:
6
+ - `pres/index.html`
7
+ - `pres/training_results.html`
8
+ - `pres/trajectory.html`
9
+ - `pres/training_script.html`
10
+ - `pres/reward_curve.png`
11
+ - `pres/before_after_reward.png`
12
+
13
+ Remote resources:
14
+ - HF Space repo: <https://huggingface.co/spaces/thomasm6m6/freeciv_env>
15
+ - HF Space app: <https://thomasm6m6-freeciv-env.hf.space>
16
+
17
+ Supporting files:
18
+ - reward data: `pres/reward_steps.csv`
19
+ - training script: `scripts/train_grpo_fast.py`
20
+ - env config: `openenv.yaml`
21
+
22
+ ## What we have ready
23
+
24
+ - real OpenEnv environment for Freeciv
25
+ - real live backend on H100 via Freeciv Web
26
+ - successful GRPO training run on the live backend
27
+ - reward curve PNG
28
+ - before/after reward PNG
29
+ - live trajectory page with real observations + legal actions
30
+ - note: use reward improvement as the before/after story; raw checkpoint-to-checkpoint action examples were too noisy to be worth showing live
31
+ - minimal training script page
32
+ - HF Space deployed: `thomasm6m6/freeciv_env`
33
+
34
+ ## What not to spend time on
35
+
36
+ - long architecture explanation
37
+ - low-level websocket/runtime debugging
38
+ - model internals
39
+ - many charts
40
+
41
+ Use the product demo + reward improvement as the center of the pitch.
42
+
43
+ ---
44
+
45
+ ## 1 minute YouTube flow
46
+
47
+ ### 0:00–0:10
48
+ Open: `pres/trajectory.html`
49
+
50
+ Say:
51
+ - We built a real OpenEnv environment for Freeciv, a long-horizon strategy game.
52
+ - The model sees text observations and legal actions, and acts turn by turn against a live backend.
53
+
54
+ ### 0:10–0:22
55
+ Stay on `pres/trajectory.html`
56
+
57
+ Say:
58
+ - This is not a toy prompt task.
59
+ - It has delayed reward, persistent world state, multiple units, city-building, and long-horizon planning.
60
+ - That maps directly to the hackathon’s long-horizon planning and world-modeling tracks.
61
+
62
+ ### 0:22–0:38
63
+ Switch to `pres/training_script.html`
64
+
65
+ Say:
66
+ - We also built the minimal RL training loop with Unsloth + TRL GRPO.
67
+ - The script collects live Freeciv states, formats them into prompts, and trains a policy on the real environment.
68
+
69
+ ### 0:38–0:55
70
+ Switch to `pres/training_results.html`
71
+
72
+ Say:
73
+ - We ran training on the H100 against the live Freeciv backend.
74
+ - Reward improved from 0.125 at the start to 1.0 by the end of the run.
75
+ - This gives observable training progress, which is the key hackathon requirement.
76
+
77
+ ### 0:55–1:00
78
+ Optional final cut to HF Space repo URL
79
+
80
+ Say:
81
+ - The environment is packaged as OpenEnv and deployed to Hugging Face Spaces for submission.
82
+
83
+ ---
84
+
85
+ ## 3 minute live pitch flow
86
+
87
+ ### 0:00–0:25 — problem
88
+ Open: `pres/trajectory.html`
89
+
90
+ Say:
91
+ - We wanted a real LLM RL environment for long-horizon strategic planning.
92
+ - Freeciv is a strong fit because it has persistent state, delayed reward, many legal actions, and requires planning across turns.
93
+
94
+ ### 0:25–1:05 — show the environment
95
+ Stay on `pres/trajectory.html`
96
+
97
+ Point out:
98
+ - text-first observation
99
+ - legal actions
100
+ - units / cities / economy summaries
101
+ - live backend on H100
102
+
103
+ Say:
104
+ - The agent does not get a canned benchmark prompt.
105
+ - It interacts with a real running world and must choose from legal actions each turn.
106
+
107
+ ### 1:05–1:35 — show the training loop
108
+ Open: `pres/training_script.html`
109
+
110
+ Say:
111
+ - This is the minimal GRPO loop.
112
+ - We use live Freeciv sessions, prepare observations, build prompts, and train with Unsloth + TRL.
113
+ - The important thing is that the training loop is small and actually runs on the real backend.
114
+
115
+ ### 1:35–2:25 — show training improvement
116
+ Open: `pres/training_results.html`
117
+
118
+ Say:
119
+ - This is the core result.
120
+ - Reward increases over training steps on real Freeciv states.
121
+ - Start: 0.125. End: 1.0.
122
+ - This is the evidence that the environment and reward pipeline are coherent enough to drive learning.
123
+
124
+ If short on time, only show:
125
+ - reward curve
126
+ - before/after reward bars
127
+
128
+ ### 2:25–2:50 — why this matters
129
+ Stay on `pres/training_results.html`
130
+
131
+ Say:
132
+ - This fits Statement 2: long-horizon planning.
133
+ - It also fits Statement 3.1: world modeling, because the agent interacts with a real dynamic system and must maintain state over time.
134
+
135
+ ### 2:50–3:00 — close
136
+ Open: HF Space repo URL or `pres/index.html`
137
+
138
+ Say:
139
+ - The environment is packaged in OpenEnv, runs with a real backend, has a minimal RL script, and already shows reward improvement.
140
+
141
+ ---
142
+
143
+ ## Likely Q/A answers
144
+
145
+ ### Why Freeciv?
146
+ - It is long-horizon, strategic, partially observable, and naturally multi-step.
147
+ - It is much closer to real planning than one-shot QA.
148
+
149
+ ### What exactly is the observation/action interface?
150
+ - Observation is text-first: turn summary, economy, units, cities, map, legal actions.
151
+ - Actions are structured: end turn, move unit, build city, set city production, set research.
152
+
153
+ ### Is the backend real?
154
+ - Yes. Training was run against a live Freeciv Web backend on the H100.
155
+
156
+ ### What evidence do you have that training worked?
157
+ - The reward curve in `pres/training_results.html`.
158
+ - It rises from 0.125 to 1.0 during the live run.
159
+
160
+ ### Why not show a bigger model?
161
+ - For the hackathon, reliability and observable reward improvement mattered more than model scale.
162
+ - A smaller model let us get an end-to-end live run working on the real backend.
163
+
164
+ ### What is still incomplete?
165
+ - The environment currently exposes a small action subset rather than the full Freeciv action surface.
166
+ - The main accomplishment is that live interaction and RL training now work end to end.
167
+
168
+ ---
169
+
170
+ ## If something breaks during the pitch
171
+
172
+ Fallback tab order:
173
+ 1. `pres/training_results.html`
174
+ 2. `pres/trajectory.html`
175
+ 3. `pres/training_script.html`
176
+ 4. HF Space repo URL
177
+
178
+ If the live environment demo is flaky, just narrate from the trajectory page and go straight to the reward curve.
pres/before_after_reward.png ADDED
pres/index.html ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html><head><meta charset='utf-8'><title>Freeciv demo resources</title>
3
+ <style>
4
+ body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; max-width: 900px; margin: 40px auto; line-height: 1.5; padding: 0 20px; }
5
+ ul { line-height: 1.9; }
6
+ code { background: #f6f8fa; padding: 2px 6px; border-radius: 6px; }
7
+ </style></head><body>
8
+ <h1>Freeciv OpenEnv demo resources</h1>
9
+ <ul>
10
+ <li><a href='training_results.html'>Training results</a></li>
11
+ <li><a href='trajectory.html'>Live trajectory</a></li>
12
+ <li><a href='training_script.html'>Minimal training script</a></li>
13
+ <li><a href='reward_curve.png'>Reward curve PNG</a></li>
14
+ <li><a href='before_after_reward.png'>Before/after reward PNG</a></li>
15
+ </ul>
16
+ <p>HF Space: <code>https://huggingface.co/spaces/thomasm6m6/freeciv_env</code></p>
17
+ <p>Space app domain: <code>https://thomasm6m6-freeciv-env.hf.space</code></p>
18
+ </body></html>
pres/reward_curve.png ADDED
pres/reward_steps.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ step,reward,reward_std
2
+ 1,0.125,0.25
3
+ 2,0.375,0.5386751294136047
4
+ 3,0.25,0.5
5
+ 4,0.5,0.5773502588272095
6
+ 5,0.625,0.5386751294136047
7
+ 6,0.875,0.25
8
+ 7,0.75,0.5
9
+ 8,0.875,0.25
10
+ 9,0.75,0.5
11
+ 10,1.0,0.0
pres/training_results.html ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html><head><meta charset='utf-8'><title>Training results</title>
3
+ <style>
4
+ body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; max-width: 1000px; margin: 40px auto; line-height: 1.45; padding: 0 20px; }
5
+ .card { background: #f6f8fa; border-radius: 10px; padding: 18px; margin: 16px 0; }
6
+ img { max-width: 100%; border: 1px solid #ddd; border-radius: 8px; }
7
+ table { border-collapse: collapse; width: 100%; }
8
+ td, th { border-bottom: 1px solid #ddd; padding: 8px; text-align: left; }
9
+ </style></head><body>
10
+ <h1>Training results</h1>
11
+ <div class='card'>
12
+ <b>Live backend:</b> real Freeciv Web on H100<br>
13
+ <b>Model:</b> Qwen/Qwen3.5-0.8B + Unsloth LoRA + TRL GRPO<br>
14
+ <b>Run:</b> 10 steps, 32 live states, batch size 8<br>
15
+ <b>Train runtime:</b> None
16
+ </div>
17
+ <div class='card'>
18
+ <b>Observed reward improvement:</b> 0.125 → 1.000<br>
19
+ <b>Best visible point:</b> step 10 reward 1.000
20
+ </div>
21
+ <h2>Reward curve</h2>
22
+ <p><img src='reward_curve.png' alt='reward curve'></p>
23
+ <h2>Start vs end</h2>
24
+ <p><img src='before_after_reward.png' alt='before after reward'></p>
25
+ <h2>Per-step reward</h2>
26
+ <table><tr><th>step</th><th>reward</th><th>reward std</th></tr><tr><td>1</td><td>0.125</td><td>0.250</td></tr><tr><td>2</td><td>0.375</td><td>0.539</td></tr><tr><td>3</td><td>0.250</td><td>0.500</td></tr><tr><td>4</td><td>0.500</td><td>0.577</td></tr><tr><td>5</td><td>0.625</td><td>0.539</td></tr><tr><td>6</td><td>0.875</td><td>0.250</td></tr><tr><td>7</td><td>0.750</td><td>0.500</td></tr><tr><td>8</td><td>0.875</td><td>0.250</td></tr><tr><td>9</td><td>0.750</td><td>0.500</td></tr><tr><td>10</td><td>1.000</td><td>0.000</td></tr></table>
27
+ </body></html>
pres/training_script.html ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html><head><meta charset='utf-8'><title>Minimal training script</title>
3
+ <style>
4
+ body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; max-width: 1000px; margin: 40px auto; padding: 0 20px; }
5
+ pre { background: #0d1117; color: #c9d1d9; padding: 16px; border-radius: 8px; overflow-x: auto; }
6
+ code { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
7
+ </style></head><body>
8
+ <h1>Minimal training script</h1>
9
+ <p>Key file: <code>scripts/train_grpo_fast.py</code></p>
10
+ <pre><code>from __future__ import annotations
11
+
12
+ import argparse
13
+ import os
14
+
15
+ os.environ.setdefault(&quot;TOKENIZERS_PARALLELISM&quot;, &quot;false&quot;)
16
+ os.environ.setdefault(&quot;UNSLOTH_RETURN_LOGITS&quot;, &quot;1&quot;)
17
+ os.environ.setdefault(&quot;UNSLOTH_DISABLE_AUTO_UPDATES&quot;, &quot;1&quot;)
18
+
19
+ from unsloth import FastLanguageModel
20
+ from datasets import Dataset
21
+ from trl import GRPOConfig, GRPOTrainer
22
+
23
+ from freeciv_env.adapter import prepare_observation
24
+ from freeciv_env.grpo import SYSTEM_PROMPT, build_turn_prompt, oracle_action_index, reward_from_oracle
25
+ from freeciv_env.runtime import LiveFreecivSession
26
+
27
+
28
+ def parse_args():
29
+ parser = argparse.ArgumentParser()
30
+ parser.add_argument(&quot;--env-url&quot;, default=&quot;http://127.0.0.1&quot;)
31
+ parser.add_argument(&quot;--model-id&quot;, default=&quot;Qwen/Qwen3.5-0.8B&quot;)
32
+ parser.add_argument(&quot;--dataset-size&quot;, type=int, default=512)
33
+ parser.add_argument(&quot;--max-steps&quot;, type=int, default=50)
34
+ parser.add_argument(&quot;--batch-size&quot;, type=int, default=16)
35
+ parser.add_argument(&quot;--num-generations&quot;, type=int, default=4)
36
+ parser.add_argument(&quot;--episode-horizon&quot;, type=int, default=4)
37
+ parser.add_argument(&quot;--max-prompt-length&quot;, type=int, default=768)
38
+ parser.add_argument(&quot;--max-completion-length&quot;, type=int, default=8)
39
+ parser.add_argument(&quot;--learning-rate&quot;, type=float, default=5e-6)
40
+ parser.add_argument(&quot;--lora-rank&quot;, type=int, default=16)
41
+ parser.add_argument(&quot;--output-dir&quot;, default=&quot;outputs/qwen35_08b_grpo&quot;)
42
+ parser.add_argument(&quot;--save-steps&quot;, type=int, default=50)
43
+ return parser.parse_args()
44
+
45
+
46
+
47
+ def collect_dataset(env_url: str, dataset_size: int, episode_horizon: int) -&gt; Dataset:
48
+ rows = {&quot;prompt&quot;: [], &quot;best_index&quot;: []}
49
+ while len(rows[&quot;prompt&quot;]) &lt; dataset_size:
50
+ session = LiveFreecivSession(base_url=env_url, turn_timeout_s=120)
51
+ try:
52
+ snapshot = session.reset()
53
+ for turn_index in range(episode_horizon):
54
+ observation = prepare_observation(
55
+ snapshot,
56
+ reward=0.0,
57
+ done=False,
58
+ status=&quot;running&quot;,
59
+ ).observation
60
+ best_index = oracle_action_index(observation.legal_actions)
61
+ rows[&quot;prompt&quot;].append(build_turn_prompt(observation))
62
+ rows[&quot;best_index&quot;].append(best_index)
63
+ if len(rows[&quot;prompt&quot;]) &gt;= dataset_size or turn_index + 1 &gt;= episode_horizon:
64
+ break
65
+ snapshot = session.end_turn()
66
+ finally:
67
+ session.close()
68
+ return Dataset.from_dict(rows)
69
+
70
+
71
+
72
+ def load_model(model_id: str, max_seq_length: int, lora_rank: int):
73
+ model, tokenizer = FastLanguageModel.from_pretrained(
74
+ model_name=model_id,
75
+ max_seq_length=max_seq_length,
76
+ load_in_4bit=False,
77
+ load_in_16bit=True,
78
+ full_finetuning=False,
79
+ fast_inference=False,
80
+ )
81
+ model = FastLanguageModel.get_peft_model(
82
+ model,
83
+ r=lora_rank,
84
+ target_modules=[
85
+ &quot;q_proj&quot;,
86
+ &quot;k_proj&quot;,
87
+ &quot;v_proj&quot;,
88
+ &quot;o_proj&quot;,
89
+ &quot;gate_proj&quot;,
90
+ &quot;up_proj&quot;,
91
+ &quot;down_proj&quot;,
92
+ ],
93
+ lora_alpha=lora_rank * 2,
94
+ lora_dropout=0,
95
+ bias=&quot;none&quot;,
96
+ use_gradient_checkpointing=False,
97
+ random_state=3407,
98
+ max_seq_length=max_seq_length,
99
+ )
100
+ return model, tokenizer
101
+
102
+
103
+
104
+ def apply_chat_template(dataset: Dataset, tokenizer) -&gt; Dataset:
105
+ def format_row(row):
106
+ messages = [
107
+ {&quot;role&quot;: &quot;system&quot;, &quot;content&quot;: SYSTEM_PROMPT},
108
+ {&quot;role&quot;: &quot;user&quot;, &quot;content&quot;: row[&quot;prompt&quot;]},
109
+ ]
110
+ return {
111
+ &quot;prompt&quot;: tokenizer.apply_chat_template(
112
+ messages,
113
+ tokenize=False,
114
+ add_generation_prompt=True,
115
+ enable_thinking=False,
116
+ )
117
+ }
118
+
119
+ return dataset.map(format_row)
120
+
121
+
122
+
123
+ def main() -&gt; None:
124
+ args = parse_args()
125
+ max_seq_length = args.max_prompt_length + args.max_completion_length
126
+ dataset = collect_dataset(args.env_url, args.dataset_size, args.episode_horizon)
127
+ model, tokenizer = load_model(args.model_id, max_seq_length, args.lora_rank)
128
+ dataset = apply_chat_template(dataset, tokenizer)
129
+
130
+ training_args = GRPOConfig(
131
+ learning_rate=args.learning_rate,
132
+ weight_decay=0.01,
133
+ warmup_ratio=0.05,
134
+ lr_scheduler_type=&quot;cosine&quot;,
135
+ optim=&quot;adamw_torch_fused&quot;,
136
+ logging_steps=1,
137
+ log_completions=False,
138
+ per_device_train_batch_size=args.batch_size,
139
+ gradient_accumulation_steps=1,
140
+ num_generations=args.num_generations,
141
+ max_prompt_length=args.max_prompt_length,
142
+ max_completion_length=args.max_completion_length,
143
+ max_steps=args.max_steps,
144
+ save_steps=args.save_steps,
145
+ max_grad_norm=0.3,
146
+ bf16=True,
147
+ report_to=&quot;none&quot;,
148
+ beta=0.0,
149
+ loss_type=&quot;dr_grpo&quot;,</code></pre>
150
+ </body></html>
pres/trajectory.html ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html><head><meta charset='utf-8'><title>Freeciv live trajectory</title>
3
+ <style>
4
+ body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; max-width: 1000px; margin: 40px auto; line-height: 1.4; padding: 0 20px; }
5
+ pre { background: #f6f8fa; padding: 16px; border-radius: 8px; white-space: pre-wrap; }
6
+ section { margin-bottom: 28px; }
7
+ </style></head><body>
8
+ <h1>Freeciv live trajectory</h1>
9
+ <p>Real snapshots collected from the live Freeciv Web backend on the H100.</p>
10
+ <section><h2>Turn 1 snapshot 1</h2><pre>Turn 1
11
+ Score 0.0
12
+ Map: 0 known tiles, 0 visible tiles
13
+ Economy: 50 gold, science rate 60%
14
+ Cities: 0
15
+ Units: 5
16
+ - Unit 102: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
17
+ - Unit 110: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
18
+ - Unit 111: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
19
+ - Unit 112: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
20
+ - Unit 113: Explorer, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
21
+ Techs researched: 1
22
+ Legal actions exposed: 50</pre><h3>Legal actions (sample)</h3><pre>0. build_city — Build a city with unit 102
23
+ 1. build_city — Build a city with unit 110
24
+ 2. end_turn — End the current turn
25
+ 3. move_unit — Move unit 102 in direction 0
26
+ 4. move_unit — Move unit 102 in direction 1
27
+ 5. move_unit — Move unit 102 in direction 2</pre></section><section><h2>Turn 1 snapshot 2</h2><pre>Turn 1
28
+ Score 0.0
29
+ Map: 0 known tiles, 0 visible tiles
30
+ Economy: 50 gold, science rate 60%
31
+ Cities: 0
32
+ Units: 5
33
+ - Unit 102: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
34
+ - Unit 110: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
35
+ - Unit 111: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
36
+ - Unit 112: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
37
+ - Unit 113: Explorer, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
38
+ Techs researched: 1
39
+ Legal actions exposed: 50</pre><h3>Legal actions (sample)</h3><pre>0. build_city — Build a city with unit 102
40
+ 1. build_city — Build a city with unit 110
41
+ 2. end_turn — End the current turn
42
+ 3. move_unit — Move unit 102 in direction 0
43
+ 4. move_unit — Move unit 102 in direction 1
44
+ 5. move_unit — Move unit 102 in direction 2</pre></section><section><h2>Turn 2 snapshot 3</h2><pre>Turn 2
45
+ Score 0.0
46
+ Map: 0 known tiles, 0 visible tiles
47
+ Economy: 50 gold, science rate 60%
48
+ Cities: 0
49
+ Units: 5
50
+ - Unit 102: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
51
+ - Unit 110: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
52
+ - Unit 111: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
53
+ - Unit 112: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
54
+ - Unit 113: Explorer, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
55
+ Techs researched: 1
56
+ Legal actions exposed: 50</pre><h3>Legal actions (sample)</h3><pre>0. build_city — Build a city with unit 102
57
+ 1. build_city — Build a city with unit 110
58
+ 2. end_turn — End the current turn
59
+ 3. move_unit — Move unit 102 in direction 0
60
+ 4. move_unit — Move unit 102 in direction 1
61
+ 5. move_unit — Move unit 102 in direction 2</pre></section>
62
+ </body></html>
pyproject.toml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "freeciv-env"
3
+ version = "0.1.0"
4
+ description = "OpenEnv environment for Freeciv via freeciv-bot"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "openenv-core[core]==0.2.1",
9
+ "freecivbot @ git+https://github.com/chris1869/freeciv-bot.git",
10
+ "uvicorn>=0.35.0",
11
+ ]
12
+
13
+ [project.scripts]
14
+ server = "server.app:main"
15
+
16
+ [project.optional-dependencies]
17
+ dev = [
18
+ "pytest>=8.4.1",
19
+ "requests>=2.32.5",
20
+ ]
21
+ train = [
22
+ "accelerate>=1.10.0",
23
+ "bitsandbytes>=0.47.0",
24
+ "datasets>=4.0.0",
25
+ "trl>=0.24.0",
26
+ "unsloth>=2026.3.4",
27
+ ]
28
+
29
+ [build-system]
30
+ requires = ["setuptools>=80", "wheel"]
31
+ build-backend = "setuptools.build_meta"
32
+
33
+ [tool.setuptools]
34
+ packages = ["freeciv_env", "freeciv_env.server", "server"]
35
+
36
+ [tool.pytest.ini_options]
37
+ pythonpath = ["."]
38
+ testpaths = ["tests"]
39
+ markers = [
40
+ "integration: requires a live freeciv-web runtime",
41
+ ]
qwen35_live_long_trainer_state.json ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.625,
6
+ "eval_steps": 500,
7
+ "global_step": 10,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "clip_ratio/high_max": 0.0,
14
+ "clip_ratio/high_mean": 0.0,
15
+ "clip_ratio/low_mean": 0.0,
16
+ "clip_ratio/low_min": 0.0,
17
+ "clip_ratio/region_mean": 0.0,
18
+ "completion_length": 2.375,
19
+ "completions/clipped_ratio": 0.0,
20
+ "completions/max_length": 3.0,
21
+ "completions/max_terminated_length": 3.0,
22
+ "completions/mean_length": 2.375,
23
+ "completions/mean_terminated_length": 2.375,
24
+ "completions/min_length": 2.0,
25
+ "completions/min_terminated_length": 2.0,
26
+ "epoch": 0.0625,
27
+ "frac_reward_zero_std": 0.5,
28
+ "grad_norm": 5.300995349884033,
29
+ "kl": 0.0,
30
+ "learning_rate": 0.0,
31
+ "loss": 0.01562187448143959,
32
+ "num_tokens": 8343.0,
33
+ "reward": 0.125,
34
+ "reward_std": 0.25,
35
+ "rewards/reward_from_oracle/mean": 0.125,
36
+ "rewards/reward_from_oracle/std": 0.3535533845424652,
37
+ "step": 1
38
+ },
39
+ {
40
+ "clip_ratio/high_max": 0.0,
41
+ "clip_ratio/high_mean": 0.0,
42
+ "clip_ratio/low_mean": 0.0,
43
+ "clip_ratio/low_min": 0.0,
44
+ "clip_ratio/region_mean": 0.0,
45
+ "completion_length": 2.375,
46
+ "completions/clipped_ratio": 0.0,
47
+ "completions/max_length": 3.0,
48
+ "completions/max_terminated_length": 3.0,
49
+ "completions/mean_length": 2.375,
50
+ "completions/mean_terminated_length": 2.375,
51
+ "completions/min_length": 2.0,
52
+ "completions/min_terminated_length": 2.0,
53
+ "epoch": 0.125,
54
+ "frac_reward_zero_std": 0.0,
55
+ "grad_norm": 9.095938682556152,
56
+ "kl": 0.0,
57
+ "learning_rate": 5e-06,
58
+ "loss": 0.029151180759072304,
59
+ "num_tokens": 16682.0,
60
+ "reward": 0.375,
61
+ "reward_std": 0.5386751294136047,
62
+ "rewards/reward_from_oracle/mean": 0.375,
63
+ "rewards/reward_from_oracle/std": 0.5175492167472839,
64
+ "step": 2
65
+ },
66
+ {
67
+ "clip_ratio/high_max": 0.0,
68
+ "clip_ratio/high_mean": 0.0,
69
+ "clip_ratio/low_mean": 0.0,
70
+ "clip_ratio/low_min": 0.0,
71
+ "clip_ratio/region_mean": 0.0,
72
+ "completion_length": 2.375,
73
+ "completions/clipped_ratio": 0.0,
74
+ "completions/max_length": 3.0,
75
+ "completions/max_terminated_length": 3.0,
76
+ "completions/mean_length": 2.375,
77
+ "completions/mean_terminated_length": 2.375,
78
+ "completions/min_length": 2.0,
79
+ "completions/min_terminated_length": 2.0,
80
+ "epoch": 0.1875,
81
+ "frac_reward_zero_std": 0.0,
82
+ "grad_norm": 8.75147533416748,
83
+ "kl": 0.0,
84
+ "learning_rate": 4.849231551964771e-06,
85
+ "loss": 0.023432811722159386,
86
+ "num_tokens": 25025.0,
87
+ "reward": 0.25,
88
+ "reward_std": 0.5,
89
+ "rewards/reward_from_oracle/mean": 0.25,
90
+ "rewards/reward_from_oracle/std": 0.4629100561141968,
91
+ "step": 3
92
+ },
93
+ {
94
+ "clip_ratio/high_max": 0.0,
95
+ "clip_ratio/high_mean": 0.0,
96
+ "clip_ratio/low_mean": 0.0,
97
+ "clip_ratio/low_min": 0.0,
98
+ "clip_ratio/region_mean": 0.0,
99
+ "completion_length": 2.125,
100
+ "completions/clipped_ratio": 0.0,
101
+ "completions/max_length": 3.0,
102
+ "completions/max_terminated_length": 3.0,
103
+ "completions/mean_length": 2.125,
104
+ "completions/mean_terminated_length": 2.125,
105
+ "completions/min_length": 2.0,
106
+ "completions/min_terminated_length": 2.0,
107
+ "epoch": 0.25,
108
+ "frac_reward_zero_std": 0.0,
109
+ "grad_norm": 10.478106498718262,
110
+ "kl": 0.0,
111
+ "learning_rate": 4.415111107797445e-06,
112
+ "loss": 0.013529304414987564,
113
+ "num_tokens": 33362.0,
114
+ "reward": 0.5,
115
+ "reward_std": 0.5773502588272095,
116
+ "rewards/reward_from_oracle/mean": 0.5,
117
+ "rewards/reward_from_oracle/std": 0.5345224738121033,
118
+ "step": 4
119
+ },
120
+ {
121
+ "clip_ratio/high_max": 0.0,
122
+ "clip_ratio/high_mean": 0.0,
123
+ "clip_ratio/low_mean": 0.0,
124
+ "clip_ratio/low_min": 0.0,
125
+ "clip_ratio/region_mean": 0.0,
126
+ "completion_length": 2.125,
127
+ "completions/clipped_ratio": 0.0,
128
+ "completions/max_length": 3.0,
129
+ "completions/max_terminated_length": 3.0,
130
+ "completions/mean_length": 2.125,
131
+ "completions/mean_terminated_length": 2.125,
132
+ "completions/min_length": 2.0,
133
+ "completions/min_terminated_length": 2.0,
134
+ "epoch": 0.3125,
135
+ "frac_reward_zero_std": 0.0,
136
+ "grad_norm": 8.125267028808594,
137
+ "kl": 0.0,
138
+ "learning_rate": 3.7500000000000005e-06,
139
+ "loss": 0.013529304414987564,
140
+ "num_tokens": 41707.0,
141
+ "reward": 0.625,
142
+ "reward_std": 0.5386751294136047,
143
+ "rewards/reward_from_oracle/mean": 0.625,
144
+ "rewards/reward_from_oracle/std": 0.5175492167472839,
145
+ "step": 5
146
+ },
147
+ {
148
+ "clip_ratio/high_max": 0.0,
149
+ "clip_ratio/high_mean": 0.0,
150
+ "clip_ratio/low_mean": 0.0,
151
+ "clip_ratio/low_min": 0.0,
152
+ "clip_ratio/region_mean": 0.0,
153
+ "completion_length": 2.0,
154
+ "completions/clipped_ratio": 0.0,
155
+ "completions/max_length": 2.0,
156
+ "completions/max_terminated_length": 2.0,
157
+ "completions/mean_length": 2.0,
158
+ "completions/mean_terminated_length": 2.0,
159
+ "completions/min_length": 2.0,
160
+ "completions/min_terminated_length": 2.0,
161
+ "epoch": 0.375,
162
+ "frac_reward_zero_std": 0.5,
163
+ "grad_norm": 3.183867931365967,
164
+ "kl": 0.0,
165
+ "learning_rate": 2.9341204441673267e-06,
166
+ "loss": 0.0,
167
+ "num_tokens": 50047.0,
168
+ "reward": 0.875,
169
+ "reward_std": 0.25,
170
+ "rewards/reward_from_oracle/mean": 0.875,
171
+ "rewards/reward_from_oracle/std": 0.3535533845424652,
172
+ "step": 6
173
+ },
174
+ {
175
+ "clip_ratio/high_max": 0.0,
176
+ "clip_ratio/high_mean": 0.0,
177
+ "clip_ratio/low_mean": 0.0,
178
+ "clip_ratio/low_min": 0.0,
179
+ "clip_ratio/region_mean": 0.0,
180
+ "completion_length": 2.0,
181
+ "completions/clipped_ratio": 0.0,
182
+ "completions/max_length": 2.0,
183
+ "completions/max_terminated_length": 2.0,
184
+ "completions/mean_length": 2.0,
185
+ "completions/mean_terminated_length": 2.0,
186
+ "completions/min_length": 2.0,
187
+ "completions/min_terminated_length": 2.0,
188
+ "epoch": 0.4375,
189
+ "frac_reward_zero_std": 0.0,
190
+ "grad_norm": 7.007436275482178,
191
+ "kl": 0.0,
192
+ "learning_rate": 2.0658795558326745e-06,
193
+ "loss": 0.0,
194
+ "num_tokens": 58391.0,
195
+ "reward": 0.75,
196
+ "reward_std": 0.5,
197
+ "rewards/reward_from_oracle/mean": 0.75,
198
+ "rewards/reward_from_oracle/std": 0.4629100561141968,
199
+ "step": 7
200
+ },
201
+ {
202
+ "clip_ratio/high_max": 0.0,
203
+ "clip_ratio/high_mean": 0.0,
204
+ "clip_ratio/low_mean": 0.0,
205
+ "clip_ratio/low_min": 0.0,
206
+ "clip_ratio/region_mean": 0.0,
207
+ "completion_length": 2.0,
208
+ "completions/clipped_ratio": 0.0,
209
+ "completions/max_length": 2.0,
210
+ "completions/max_terminated_length": 2.0,
211
+ "completions/mean_length": 2.0,
212
+ "completions/mean_terminated_length": 2.0,
213
+ "completions/min_length": 2.0,
214
+ "completions/min_terminated_length": 2.0,
215
+ "epoch": 0.5,
216
+ "frac_reward_zero_std": 0.5,
217
+ "grad_norm": 3.759775161743164,
218
+ "kl": 0.0,
219
+ "learning_rate": 1.2500000000000007e-06,
220
+ "loss": 1.862645149230957e-09,
221
+ "num_tokens": 66727.0,
222
+ "reward": 0.875,
223
+ "reward_std": 0.25,
224
+ "rewards/reward_from_oracle/mean": 0.875,
225
+ "rewards/reward_from_oracle/std": 0.3535533845424652,
226
+ "step": 8
227
+ },
228
+ {
229
+ "clip_ratio/high_max": 0.0,
230
+ "clip_ratio/high_mean": 0.0,
231
+ "clip_ratio/low_mean": 0.0,
232
+ "clip_ratio/low_min": 0.0,
233
+ "clip_ratio/region_mean": 0.0,
234
+ "completion_length": 2.0,
235
+ "completions/clipped_ratio": 0.0,
236
+ "completions/max_length": 2.0,
237
+ "completions/max_terminated_length": 2.0,
238
+ "completions/mean_length": 2.0,
239
+ "completions/mean_terminated_length": 2.0,
240
+ "completions/min_length": 2.0,
241
+ "completions/min_terminated_length": 2.0,
242
+ "epoch": 0.5625,
243
+ "frac_reward_zero_std": 0.0,
244
+ "grad_norm": 7.748785495758057,
245
+ "kl": 0.0,
246
+ "learning_rate": 5.848888922025553e-07,
247
+ "loss": 1.862645149230957e-09,
248
+ "num_tokens": 75063.0,
249
+ "reward": 0.75,
250
+ "reward_std": 0.5,
251
+ "rewards/reward_from_oracle/mean": 0.75,
252
+ "rewards/reward_from_oracle/std": 0.4629100561141968,
253
+ "step": 9
254
+ },
255
+ {
256
+ "clip_ratio/high_max": 0.0,
257
+ "clip_ratio/high_mean": 0.0,
258
+ "clip_ratio/low_mean": 0.0,
259
+ "clip_ratio/low_min": 0.0,
260
+ "clip_ratio/region_mean": 0.0,
261
+ "completion_length": 2.0,
262
+ "completions/clipped_ratio": 0.0,
263
+ "completions/max_length": 2.0,
264
+ "completions/max_terminated_length": 2.0,
265
+ "completions/mean_length": 2.0,
266
+ "completions/mean_terminated_length": 2.0,
267
+ "completions/min_length": 2.0,
268
+ "completions/min_terminated_length": 2.0,
269
+ "epoch": 0.625,
270
+ "frac_reward_zero_std": 1.0,
271
+ "grad_norm": 0.0,
272
+ "kl": 0.0,
273
+ "learning_rate": 1.507684480352292e-07,
274
+ "loss": 0.0,
275
+ "num_tokens": 83403.0,
276
+ "reward": 1.0,
277
+ "reward_std": 0.0,
278
+ "rewards/reward_from_oracle/mean": 1.0,
279
+ "rewards/reward_from_oracle/std": 0.0,
280
+ "step": 10
281
+ }
282
+ ],
283
+ "logging_steps": 1,
284
+ "max_steps": 10,
285
+ "num_input_tokens_seen": 83403,
286
+ "num_train_epochs": 1,
287
+ "save_steps": 10,
288
+ "stateful_callbacks": {
289
+ "TrainerControl": {
290
+ "args": {
291
+ "should_epoch_stop": false,
292
+ "should_evaluate": false,
293
+ "should_log": false,
294
+ "should_save": true,
295
+ "should_training_stop": true
296
+ },
297
+ "attributes": {}
298
+ }
299
+ },
300
+ "total_flos": 0.0,
301
+ "train_batch_size": 8,
302
+ "trial_name": null,
303
+ "trial_params": null
304
+ }
scripts/start_space.sh ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -eu
3
+
4
+ export FREECIV_SERVER_URL="${FREECIV_SERVER_URL:-http://127.0.0.1}"
5
+ export FREECIV_USERNAME="${FREECIV_USERNAME:-openenvbot}"
6
+ export FREECIV_CLIENT_PORT="${FREECIV_CLIENT_PORT:-6000}"
7
+ export FREECIV_TURN_TIMEOUT_S="${FREECIV_TURN_TIMEOUT_S:-120}"
8
+ export FREECIV_STARTUP_TIMEOUT_S="${FREECIV_STARTUP_TIMEOUT_S:-180}"
9
+ export ENABLE_WEB_INTERFACE="${ENABLE_WEB_INTERFACE:-true}"
10
+
11
+ log_file=/tmp/start_space.log
12
+ : > "$log_file"
13
+
14
+ log() {
15
+ local line
16
+ line="[$(date -Iseconds)] $*"
17
+ echo "$line" | tee -a "$log_file" >&2
18
+ }
19
+
20
+ service_status() {
21
+ local name url
22
+ name="$1"
23
+ url="$2"
24
+ if curl -fsS --max-time 2 "$url" >/dev/null 2>&1; then
25
+ echo "$name=up"
26
+ else
27
+ echo "$name=down"
28
+ fi
29
+ }
30
+
31
+ wait_for_runtime() {
32
+ local deadline status nginx publite2 tomcat
33
+ deadline=$(( $(date +%s) + FREECIV_STARTUP_TIMEOUT_S ))
34
+ while true; do
35
+ nginx=$(service_status nginx http://127.0.0.1/)
36
+ publite2=$(service_status publite2 http://127.0.0.1/pubstatus)
37
+ tomcat=$(service_status tomcat http://127.0.0.1:8080/freeciv-web/)
38
+ status="$nginx $publite2 $tomcat"
39
+ log "$status"
40
+ if [[ "$nginx" == "nginx=up" && "$publite2" == "publite2=up" && "$tomcat" == "tomcat=up" ]]; then
41
+ return 0
42
+ fi
43
+ if (( $(date +%s) >= deadline )); then
44
+ log "freeciv runtime failed to become ready before timeout=${FREECIV_STARTUP_TIMEOUT_S}s"
45
+ return 1
46
+ fi
47
+ sleep 2
48
+ done
49
+ }
50
+
51
+ log "start_space.sh boot"
52
+ wait_for_runtime
53
+ log "freeciv runtime ready; starting uvicorn"
54
+ exec python -m uvicorn server.app:app --host 0.0.0.0 --port 8000 --ws-ping-interval 300 --ws-ping-timeout 300