Spaces:
Runtime error
Runtime error
Initial Freeciv OpenEnv Space
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +14 -0
- .gitignore +1 -0
- AGENTS.md +5 -0
- Dockerfile +31 -0
- README.md +112 -5
- __init__.py +1 -0
- build/lib/freeciv_env/__init__.py +10 -0
- build/lib/freeciv_env/adapter.py +335 -0
- build/lib/freeciv_env/client.py +22 -0
- build/lib/freeciv_env/grpo.py +97 -0
- build/lib/freeciv_env/models.py +112 -0
- build/lib/freeciv_env/runtime.py +401 -0
- build/lib/freeciv_env/server/__init__.py +3 -0
- build/lib/freeciv_env/server/app.py +42 -0
- build/lib/freeciv_env/server/freeciv_environment.py +163 -0
- build/lib/server/__init__.py +0 -0
- build/lib/server/app.py +10 -0
- client.py +1 -0
- freeciv_env.egg-info/PKG-INFO +136 -0
- freeciv_env.egg-info/SOURCES.txt +23 -0
- freeciv_env.egg-info/dependency_links.txt +1 -0
- freeciv_env.egg-info/entry_points.txt +2 -0
- freeciv_env.egg-info/requires.txt +14 -0
- freeciv_env.egg-info/top_level.txt +2 -0
- freeciv_env/__init__.py +10 -0
- freeciv_env/adapter.py +335 -0
- freeciv_env/client.py +22 -0
- freeciv_env/grpo.py +97 -0
- freeciv_env/models.py +112 -0
- freeciv_env/runtime.py +432 -0
- freeciv_env/server/Dockerfile +31 -0
- freeciv_env/server/__init__.py +3 -0
- freeciv_env/server/app.py +144 -0
- freeciv_env/server/freeciv_environment.py +163 -0
- freeciv_rl_training_curve.png +0 -0
- hackathon.md +271 -0
- models.py +1 -0
- notes.md +3 -0
- openenv.yaml +4 -0
- outline.md +178 -0
- pres/before_after_reward.png +0 -0
- pres/index.html +18 -0
- pres/reward_curve.png +0 -0
- pres/reward_steps.csv +11 -0
- pres/training_results.html +27 -0
- pres/training_script.html +150 -0
- pres/trajectory.html +62 -0
- pyproject.toml +41 -0
- qwen35_live_long_trainer_state.json +304 -0
- scripts/start_space.sh +54 -0
.dockerignore
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.git
|
| 2 |
+
.venv
|
| 3 |
+
__pycache__/
|
| 4 |
+
*.pyc
|
| 5 |
+
.pytest_cache/
|
| 6 |
+
build/
|
| 7 |
+
pres/
|
| 8 |
+
freeciv_env.egg-info/
|
| 9 |
+
notes.md
|
| 10 |
+
outline.md
|
| 11 |
+
hackathon.md
|
| 12 |
+
qwen35_live_long_trainer_state.json
|
| 13 |
+
freeciv_rl_training_curve.png
|
| 14 |
+
tests/
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
**/__pycache__
|
AGENTS.md
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
This is a hackathon project--see the brief at hackathon.md. Always read the file before responding.
|
| 2 |
+
Please keep your answers concise.
|
| 3 |
+
|
| 4 |
+
We have an H100. Access it by running this command: `northflank ssh service --projectId hackathon --serviceId civ --proxyOnly` (may already be running)
|
| 5 |
+
then `ssh root@127.0.0.1 -p 35731` (or whatever the `northflank ssh` command prints out as the command to run)
|
Dockerfile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM omkarasoftware/freeciv-web:latest
|
| 2 |
+
|
| 3 |
+
USER root
|
| 4 |
+
RUN apt-get update && apt-get install -y \
|
| 5 |
+
git \
|
| 6 |
+
curl \
|
| 7 |
+
ca-certificates \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
RUN mkdir -p /app/env && chown -R docker:docker /app
|
| 10 |
+
|
| 11 |
+
USER docker
|
| 12 |
+
ENV HOME=/home/docker
|
| 13 |
+
WORKDIR /app/env
|
| 14 |
+
|
| 15 |
+
COPY --chown=docker:docker . /app/env
|
| 16 |
+
RUN chmod +x /app/env/scripts/start_space.sh
|
| 17 |
+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 18 |
+
ENV PATH="/app/env/.venv/bin:/home/docker/.local/bin:$PATH"
|
| 19 |
+
RUN uv python install 3.11
|
| 20 |
+
RUN uv venv --python 3.11 /app/env/.venv
|
| 21 |
+
RUN UV_PROJECT_ENVIRONMENT=/app/env/.venv uv sync --frozen --no-dev --no-editable
|
| 22 |
+
|
| 23 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 24 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 25 |
+
ENV FREECIV_SERVER_URL=http://127.0.0.1
|
| 26 |
+
ENV FREECIV_TURN_TIMEOUT_S=120
|
| 27 |
+
|
| 28 |
+
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=10 \
|
| 29 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 30 |
+
|
| 31 |
+
CMD ["/app/env/scripts/start_space.sh"]
|
README.md
CHANGED
|
@@ -1,10 +1,117 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Freeciv Environment Server
|
| 3 |
+
emoji: 🎮
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: indigo
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# freeciv-env
|
| 15 |
+
|
| 16 |
+
OpenEnv environment for Freeciv, built on top of `freeciv-bot`.
|
| 17 |
+
|
| 18 |
+
## Current scope
|
| 19 |
+
|
| 20 |
+
This environment exposes a small, trainable action surface:
|
| 21 |
+
|
| 22 |
+
- `end_turn`
|
| 23 |
+
- `move_unit(unit_id, direction)`
|
| 24 |
+
- `build_city(unit_id)`
|
| 25 |
+
- `set_city_production(city_id, target)`
|
| 26 |
+
- `set_research(tech_name)`
|
| 27 |
+
|
| 28 |
+
Observations are text-first and include compact structured summaries of:
|
| 29 |
+
|
| 30 |
+
- current turn
|
| 31 |
+
- score
|
| 32 |
+
- known and visible map tiles
|
| 33 |
+
- units
|
| 34 |
+
- cities
|
| 35 |
+
- legal actions
|
| 36 |
+
|
| 37 |
+
## Local development
|
| 38 |
+
|
| 39 |
+
Install dependencies:
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
uv sync --extra dev
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
Run tests:
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
uv run pytest
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
Run the server:
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
uv run uvicorn freeciv_env.server.app:app --host 0.0.0.0 --port 8000
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
Run the fast GRPO loop:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
uv sync --extra dev --extra train
|
| 61 |
+
uv run python scripts/train_grpo_fast.py --env-url http://127.0.0.1 --max-steps 50
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
## Hackathon / Unsloth notes
|
| 65 |
+
|
| 66 |
+
For the hackathon Colab submission path on H100s, Unsloth recommended the BF16 OpenEnv gpt-oss 20B notebook:
|
| 67 |
+
|
| 68 |
+
- <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb>
|
| 69 |
+
|
| 70 |
+
If you adapt that notebook for this environment, reduce `max_steps` to `300` for a faster run.
|
| 71 |
+
|
| 72 |
+
Useful notebook indexes:
|
| 73 |
+
|
| 74 |
+
- RL notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks#grpo-reasoning-rl>
|
| 75 |
+
- all notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks>
|
| 76 |
+
- notebook repo: <https://github.com/unslothai/notebooks/tree/main/nb>
|
| 77 |
+
|
| 78 |
+
If GRPO is too slow, start from a smaller notebook with `fast_inference = True` and add the Freeciv/OpenEnv calls:
|
| 79 |
+
|
| 80 |
+
- <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_(4B)-GRPO.ipynb>
|
| 81 |
+
- <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb>
|
| 82 |
+
|
| 83 |
+
If vLLM GRPO fails, Unsloth suggested a clean virtualenv install:
|
| 84 |
+
|
| 85 |
+
```bash
|
| 86 |
+
python -m venv unsloth_env
|
| 87 |
+
source unsloth_env/bin/activate
|
| 88 |
+
pip install --upgrade pip && pip install uv
|
| 89 |
+
uv pip install unsloth vllm --torch-backend=auto
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
If Unsloth is already installed, update it for the latest GRPO fixes:
|
| 93 |
+
|
| 94 |
+
```bash
|
| 95 |
+
pip install --upgrade --no-cache-dir --no-deps unsloth unsloth_zoo
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
## Live runtime requirements
|
| 99 |
+
|
| 100 |
+
The default server app uses `freeciv-bot` against a local Freeciv Web runtime.
|
| 101 |
+
|
| 102 |
+
Environment variables:
|
| 103 |
+
|
| 104 |
+
- `FREECIV_SERVER_URL` (default: `http://127.0.0.1`)
|
| 105 |
+
- `FREECIV_USERNAME` (default: `openenvbot`)
|
| 106 |
+
- `FREECIV_CLIENT_PORT` (default: `6000`)
|
| 107 |
+
- `FREECIV_TURN_TIMEOUT_S` (default: `60`)
|
| 108 |
+
|
| 109 |
+
The included automated tests use a fake session backend, so they do not require a live Freeciv server.
|
| 110 |
+
|
| 111 |
+
The GRPO training script uses:
|
| 112 |
+
|
| 113 |
+
- `Qwen/Qwen3.5-0.8B`
|
| 114 |
+
- Unsloth bf16 LoRA loading
|
| 115 |
+
- TRL `GRPOTrainer`
|
| 116 |
+
- integer-only action selection to minimize generated tokens
|
| 117 |
+
- offline GRPO over env-sampled states for maximum throughput
|
__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from freeciv_env import *
|
build/lib/freeciv_env/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from freeciv_env.client import FreecivEnv
|
| 2 |
+
from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState, LegalAction
|
| 3 |
+
|
| 4 |
+
__all__ = [
|
| 5 |
+
"FreecivAction",
|
| 6 |
+
"FreecivEnv",
|
| 7 |
+
"FreecivObservation",
|
| 8 |
+
"FreecivState",
|
| 9 |
+
"LegalAction",
|
| 10 |
+
]
|
build/lib/freeciv_env/adapter.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
from freeciv_env.models import CitySummary, FreecivAction, FreecivObservation, LegalAction, UnitSummary
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
ActionLookupKey = tuple[str, int | None, int | None, str | None]
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass(frozen=True)
|
| 13 |
+
class ActionRef:
|
| 14 |
+
controller: str
|
| 15 |
+
actor_id: int | str
|
| 16 |
+
raw_action_key: str
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@dataclass
|
| 20 |
+
class RawSnapshot:
|
| 21 |
+
turn: int
|
| 22 |
+
state: dict[str, Any]
|
| 23 |
+
actions: dict[str, Any]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass(frozen=True)
|
| 27 |
+
class SnapshotMetrics:
|
| 28 |
+
score: float
|
| 29 |
+
known_tiles: int
|
| 30 |
+
visible_tiles: int
|
| 31 |
+
city_count: int
|
| 32 |
+
unit_count: int
|
| 33 |
+
techs_researched: int
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class PreparedObservation:
|
| 38 |
+
observation: FreecivObservation
|
| 39 |
+
metrics: SnapshotMetrics
|
| 40 |
+
action_refs: dict[ActionLookupKey, ActionRef]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _map_status_rows(raw_state: dict[str, Any]) -> list[list[int | float]]:
|
| 44 |
+
raw_map = raw_state.get("map", {})
|
| 45 |
+
status = raw_map.get("status", [])
|
| 46 |
+
return status if isinstance(status, list) else []
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def count_known_tiles(raw_state: dict[str, Any]) -> int:
|
| 50 |
+
return sum(1 for row in _map_status_rows(raw_state) for value in row if value and value > 0)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def count_visible_tiles(raw_state: dict[str, Any]) -> int:
|
| 54 |
+
return sum(1 for row in _map_status_rows(raw_state) for value in row if value and value >= 2)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def extract_metrics(snapshot: RawSnapshot) -> SnapshotMetrics:
|
| 58 |
+
player = snapshot.state.get("player", {})
|
| 59 |
+
return SnapshotMetrics(
|
| 60 |
+
score=float(player.get("my_score", 0.0)),
|
| 61 |
+
known_tiles=count_known_tiles(snapshot.state),
|
| 62 |
+
visible_tiles=count_visible_tiles(snapshot.state),
|
| 63 |
+
city_count=len(snapshot.state.get("city", {})),
|
| 64 |
+
unit_count=len(snapshot.state.get("unit", {})),
|
| 65 |
+
techs_researched=int(player.get("my_techs_researched", 0) or 0),
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def action_lookup_key(action: FreecivAction) -> ActionLookupKey:
|
| 70 |
+
if action.action_type == "move_unit":
|
| 71 |
+
return ("move_unit", action.unit_id, action.direction, None)
|
| 72 |
+
if action.action_type == "build_city":
|
| 73 |
+
return ("build_city", action.unit_id, None, None)
|
| 74 |
+
if action.action_type == "set_city_production":
|
| 75 |
+
return ("set_city_production", action.city_id, None, action.target)
|
| 76 |
+
if action.action_type == "set_research":
|
| 77 |
+
return ("set_research", None, None, action.target)
|
| 78 |
+
return ("end_turn", None, None, None)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _parse_target_name(raw_action_key: str, prefix: str) -> str:
|
| 82 |
+
suffix = raw_action_key.removeprefix(prefix)
|
| 83 |
+
name, _sep, _tail = suffix.rpartition("_")
|
| 84 |
+
return name or suffix
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _controller_actions(snapshot: RawSnapshot, controller: str) -> dict[str, Any]:
|
| 89 |
+
raw_actions = snapshot.actions.get(controller, {})
|
| 90 |
+
if isinstance(raw_actions, dict):
|
| 91 |
+
return raw_actions
|
| 92 |
+
if hasattr(raw_actions, "json_struct"):
|
| 93 |
+
json_actions = raw_actions.json_struct()
|
| 94 |
+
return json_actions if isinstance(json_actions, dict) else {}
|
| 95 |
+
return {}
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _extract_legal_actions(snapshot: RawSnapshot) -> tuple[list[LegalAction], dict[ActionLookupKey, ActionRef]]:
|
| 100 |
+
legal_actions: list[LegalAction] = [
|
| 101 |
+
LegalAction(
|
| 102 |
+
action_type="end_turn",
|
| 103 |
+
label="End the current turn",
|
| 104 |
+
raw_action_key="__end_turn__",
|
| 105 |
+
)
|
| 106 |
+
]
|
| 107 |
+
refs: dict[ActionLookupKey, ActionRef] = {}
|
| 108 |
+
|
| 109 |
+
for actor_id, action_map in _controller_actions(snapshot, "unit").items():
|
| 110 |
+
unit_id = int(actor_id)
|
| 111 |
+
if action_map.get("build"):
|
| 112 |
+
legal_actions.append(
|
| 113 |
+
LegalAction(
|
| 114 |
+
action_type="build_city",
|
| 115 |
+
label=f"Build a city with unit {unit_id}",
|
| 116 |
+
unit_id=unit_id,
|
| 117 |
+
raw_action_key="build",
|
| 118 |
+
)
|
| 119 |
+
)
|
| 120 |
+
refs[("build_city", unit_id, None, None)] = ActionRef(
|
| 121 |
+
controller="unit",
|
| 122 |
+
actor_id=unit_id,
|
| 123 |
+
raw_action_key="build",
|
| 124 |
+
)
|
| 125 |
+
for raw_action_key, enabled in sorted(action_map.items()):
|
| 126 |
+
if not enabled or not raw_action_key.startswith("goto_"):
|
| 127 |
+
continue
|
| 128 |
+
direction = int(raw_action_key.split("_", 1)[1])
|
| 129 |
+
legal_actions.append(
|
| 130 |
+
LegalAction(
|
| 131 |
+
action_type="move_unit",
|
| 132 |
+
label=f"Move unit {unit_id} in direction {direction}",
|
| 133 |
+
unit_id=unit_id,
|
| 134 |
+
direction=direction,
|
| 135 |
+
raw_action_key=raw_action_key,
|
| 136 |
+
)
|
| 137 |
+
)
|
| 138 |
+
refs[("move_unit", unit_id, direction, None)] = ActionRef(
|
| 139 |
+
controller="unit",
|
| 140 |
+
actor_id=unit_id,
|
| 141 |
+
raw_action_key=raw_action_key,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
for actor_id, action_map in _controller_actions(snapshot, "city").items():
|
| 145 |
+
city_id = int(actor_id)
|
| 146 |
+
for raw_action_key, enabled in sorted(action_map.items()):
|
| 147 |
+
if not enabled:
|
| 148 |
+
continue
|
| 149 |
+
if raw_action_key.startswith("change_unit_prod_"):
|
| 150 |
+
target = _parse_target_name(raw_action_key, "change_unit_prod_")
|
| 151 |
+
elif raw_action_key.startswith("change_improve_prod_"):
|
| 152 |
+
target = _parse_target_name(raw_action_key, "change_improve_prod_")
|
| 153 |
+
else:
|
| 154 |
+
continue
|
| 155 |
+
legal_actions.append(
|
| 156 |
+
LegalAction(
|
| 157 |
+
action_type="set_city_production",
|
| 158 |
+
label=f"Set city {city_id} production to {target}",
|
| 159 |
+
city_id=city_id,
|
| 160 |
+
target=target,
|
| 161 |
+
raw_action_key=raw_action_key,
|
| 162 |
+
)
|
| 163 |
+
)
|
| 164 |
+
refs[("set_city_production", city_id, None, target)] = ActionRef(
|
| 165 |
+
controller="city",
|
| 166 |
+
actor_id=city_id,
|
| 167 |
+
raw_action_key=raw_action_key,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
tech_actions = _controller_actions(snapshot, "tech").get("cur_player", {})
|
| 171 |
+
for raw_action_key, enabled in sorted(tech_actions.items()):
|
| 172 |
+
if not enabled or not raw_action_key.startswith("research_tech_"):
|
| 173 |
+
continue
|
| 174 |
+
target = _parse_target_name(raw_action_key, "research_tech_")
|
| 175 |
+
legal_actions.append(
|
| 176 |
+
LegalAction(
|
| 177 |
+
action_type="set_research",
|
| 178 |
+
label=f"Research {target}",
|
| 179 |
+
target=target,
|
| 180 |
+
raw_action_key=raw_action_key,
|
| 181 |
+
)
|
| 182 |
+
)
|
| 183 |
+
refs[("set_research", None, None, target)] = ActionRef(
|
| 184 |
+
controller="tech",
|
| 185 |
+
actor_id="cur_player",
|
| 186 |
+
raw_action_key=raw_action_key,
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
legal_actions.sort(
|
| 190 |
+
key=lambda item: (
|
| 191 |
+
item.action_type,
|
| 192 |
+
item.unit_id or -1,
|
| 193 |
+
item.city_id or -1,
|
| 194 |
+
item.direction or -1,
|
| 195 |
+
item.target or "",
|
| 196 |
+
)
|
| 197 |
+
)
|
| 198 |
+
return legal_actions, refs
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def _extract_unit_summaries(snapshot: RawSnapshot) -> list[UnitSummary]:
|
| 202 |
+
unit_actions = _controller_actions(snapshot, "unit")
|
| 203 |
+
units: list[UnitSummary] = []
|
| 204 |
+
for actor_id, unit in sorted(snapshot.state.get("unit", {}).items(), key=lambda item: int(item[0])):
|
| 205 |
+
action_map = unit_actions.get(str(actor_id), unit_actions.get(actor_id, {}))
|
| 206 |
+
move_directions = sorted(
|
| 207 |
+
int(raw_action_key.split("_", 1)[1])
|
| 208 |
+
for raw_action_key, enabled in action_map.items()
|
| 209 |
+
if enabled and raw_action_key.startswith("goto_")
|
| 210 |
+
)
|
| 211 |
+
units.append(
|
| 212 |
+
UnitSummary(
|
| 213 |
+
unit_id=int(actor_id),
|
| 214 |
+
unit_type=str(unit.get("type_rule_name", "Unknown")),
|
| 215 |
+
health=int(unit.get("health", 0) or 0),
|
| 216 |
+
moves_left=int(unit.get("moves_left", unit.get("movesleft", 0)) or 0),
|
| 217 |
+
home_city_id=(
|
| 218 |
+
int(unit.get("home_city"))
|
| 219 |
+
if unit.get("home_city") not in (None, -1, "")
|
| 220 |
+
else None
|
| 221 |
+
),
|
| 222 |
+
veteran_level=int(unit.get("veteran", 0) or 0),
|
| 223 |
+
can_build_city=bool(action_map.get("build", False)),
|
| 224 |
+
move_directions=move_directions,
|
| 225 |
+
)
|
| 226 |
+
)
|
| 227 |
+
return units
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def _extract_city_summaries(snapshot: RawSnapshot) -> list[CitySummary]:
|
| 231 |
+
city_actions = _controller_actions(snapshot, "city")
|
| 232 |
+
cities: list[CitySummary] = []
|
| 233 |
+
for actor_id, city in sorted(snapshot.state.get("city", {}).items(), key=lambda item: int(item[0])):
|
| 234 |
+
action_map = city_actions.get(str(actor_id), city_actions.get(actor_id, {}))
|
| 235 |
+
production_options = [
|
| 236 |
+
_parse_target_name(raw_action_key, "change_unit_prod_")
|
| 237 |
+
for raw_action_key, enabled in sorted(action_map.items())
|
| 238 |
+
if enabled and raw_action_key.startswith("change_unit_prod_")
|
| 239 |
+
] + [
|
| 240 |
+
_parse_target_name(raw_action_key, "change_improve_prod_")
|
| 241 |
+
for raw_action_key, enabled in sorted(action_map.items())
|
| 242 |
+
if enabled and raw_action_key.startswith("change_improve_prod_")
|
| 243 |
+
]
|
| 244 |
+
cities.append(
|
| 245 |
+
CitySummary(
|
| 246 |
+
city_id=int(actor_id),
|
| 247 |
+
size=int(city.get("size", 0) or 0),
|
| 248 |
+
prod_food=int(city.get("prod_food", 0) or 0),
|
| 249 |
+
prod_shield=int(city.get("prod_shield", 0) or 0),
|
| 250 |
+
prod_trade=int(city.get("prod_trade", 0) or 0),
|
| 251 |
+
surplus_food=int(city.get("surplus_food", 0) or 0),
|
| 252 |
+
surplus_shield=int(city.get("surplus_shield", 0) or 0),
|
| 253 |
+
surplus_trade=int(city.get("surplus_trade", 0) or 0),
|
| 254 |
+
production_kind=(
|
| 255 |
+
int(city.get("production_kind"))
|
| 256 |
+
if city.get("production_kind") is not None
|
| 257 |
+
else None
|
| 258 |
+
),
|
| 259 |
+
production_value=(
|
| 260 |
+
int(city.get("production_value"))
|
| 261 |
+
if city.get("production_value") is not None
|
| 262 |
+
else None
|
| 263 |
+
),
|
| 264 |
+
turns_to_complete=(
|
| 265 |
+
float(city.get("turns_to_prod_complete"))
|
| 266 |
+
if city.get("turns_to_prod_complete") is not None
|
| 267 |
+
else None
|
| 268 |
+
),
|
| 269 |
+
production_options=production_options,
|
| 270 |
+
)
|
| 271 |
+
)
|
| 272 |
+
return cities
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
def _build_summary(
|
| 276 |
+
snapshot: RawSnapshot,
|
| 277 |
+
metrics: SnapshotMetrics,
|
| 278 |
+
units: list[UnitSummary],
|
| 279 |
+
cities: list[CitySummary],
|
| 280 |
+
legal_actions: list[LegalAction],
|
| 281 |
+
) -> str:
|
| 282 |
+
player = snapshot.state.get("player", {})
|
| 283 |
+
lines = [
|
| 284 |
+
f"Turn {snapshot.turn}",
|
| 285 |
+
f"Score {metrics.score:.1f}",
|
| 286 |
+
f"Map: {metrics.known_tiles} known tiles, {metrics.visible_tiles} visible tiles",
|
| 287 |
+
f"Economy: {player.get('my_gold', 0)} gold, science rate {player.get('my_science', 0)}%",
|
| 288 |
+
f"Cities: {metrics.city_count}",
|
| 289 |
+
]
|
| 290 |
+
for city in cities[:5]:
|
| 291 |
+
lines.append(
|
| 292 |
+
f"- City {city.city_id}: size {city.size}, food {city.prod_food}/{city.surplus_food:+d}, "
|
| 293 |
+
f"shields {city.prod_shield}/{city.surplus_shield:+d}, trade {city.prod_trade}/{city.surplus_trade:+d}"
|
| 294 |
+
)
|
| 295 |
+
lines.append(f"Units: {metrics.unit_count}")
|
| 296 |
+
for unit in units[:8]:
|
| 297 |
+
lines.append(
|
| 298 |
+
f"- Unit {unit.unit_id}: {unit.unit_type}, hp {unit.health}, moves_left {unit.moves_left}, "
|
| 299 |
+
f"build_city={str(unit.can_build_city).lower()}, move_dirs={unit.move_directions}"
|
| 300 |
+
)
|
| 301 |
+
lines.append(f"Techs researched: {metrics.techs_researched}")
|
| 302 |
+
lines.append(f"Legal actions exposed: {len(legal_actions)}")
|
| 303 |
+
return "\n".join(lines)
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def prepare_observation(
|
| 307 |
+
snapshot: RawSnapshot,
|
| 308 |
+
*,
|
| 309 |
+
reward: float,
|
| 310 |
+
done: bool,
|
| 311 |
+
status: str,
|
| 312 |
+
metadata: dict[str, Any] | None = None,
|
| 313 |
+
) -> PreparedObservation:
|
| 314 |
+
legal_actions, action_refs = _extract_legal_actions(snapshot)
|
| 315 |
+
metrics = extract_metrics(snapshot)
|
| 316 |
+
units = _extract_unit_summaries(snapshot)
|
| 317 |
+
cities = _extract_city_summaries(snapshot)
|
| 318 |
+
observation = FreecivObservation(
|
| 319 |
+
turn=snapshot.turn,
|
| 320 |
+
score=metrics.score,
|
| 321 |
+
known_tiles=metrics.known_tiles,
|
| 322 |
+
visible_tiles=metrics.visible_tiles,
|
| 323 |
+
city_count=metrics.city_count,
|
| 324 |
+
unit_count=metrics.unit_count,
|
| 325 |
+
techs_researched=metrics.techs_researched,
|
| 326 |
+
status=status,
|
| 327 |
+
summary=_build_summary(snapshot, metrics, units, cities, legal_actions),
|
| 328 |
+
units=units,
|
| 329 |
+
cities=cities,
|
| 330 |
+
legal_actions=legal_actions,
|
| 331 |
+
reward=reward,
|
| 332 |
+
done=done,
|
| 333 |
+
metadata=metadata or {},
|
| 334 |
+
)
|
| 335 |
+
return PreparedObservation(observation=observation, metrics=metrics, action_refs=action_refs)
|
build/lib/freeciv_env/client.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from openenv.core.client_types import StepResult
|
| 4 |
+
from openenv.core.env_client import EnvClient
|
| 5 |
+
|
| 6 |
+
from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class FreecivEnv(EnvClient[FreecivAction, FreecivObservation, FreecivState]):
|
| 10 |
+
def _step_payload(self, action: FreecivAction) -> dict:
|
| 11 |
+
return action.model_dump(exclude_none=True)
|
| 12 |
+
|
| 13 |
+
def _parse_result(self, payload: dict) -> StepResult[FreecivObservation]:
|
| 14 |
+
observation = FreecivObservation(**payload["observation"])
|
| 15 |
+
return StepResult(
|
| 16 |
+
observation=observation,
|
| 17 |
+
reward=payload.get("reward"),
|
| 18 |
+
done=payload.get("done", False),
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def _parse_state(self, payload: dict) -> FreecivState:
|
| 22 |
+
return FreecivState(**payload)
|
build/lib/freeciv_env/grpo.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from typing import Iterable
|
| 5 |
+
|
| 6 |
+
from freeciv_env.models import FreecivAction, FreecivObservation, LegalAction
|
| 7 |
+
|
| 8 |
+
SYSTEM_PROMPT = (
|
| 9 |
+
"You are choosing the next action for a Freeciv agent. "
|
| 10 |
+
"Return only the integer index of the best legal action. "
|
| 11 |
+
"Do not output words, punctuation, JSON, or explanations."
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
TASK_PROMPT = (
|
| 15 |
+
"Pick the legal action index that maximizes immediate reward. "
|
| 16 |
+
"Invalid actions are penalized. Shorter outputs are better."
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def format_action_line(index: int, action: LegalAction) -> str:
|
| 21 |
+
return f"{index}: {action.label}"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def build_turn_prompt(observation: FreecivObservation, task_prompt: str = TASK_PROMPT) -> str:
|
| 25 |
+
action_lines = [format_action_line(index, action) for index, action in enumerate(observation.legal_actions)]
|
| 26 |
+
return (
|
| 27 |
+
f"{task_prompt}\n\n"
|
| 28 |
+
f"State:\n{observation.summary}\n\n"
|
| 29 |
+
f"Legal actions:\n" + "\n".join(action_lines) + "\n\n"
|
| 30 |
+
"Return exactly one integer index."
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def parse_action_choice(completion_text: str, legal_actions: Iterable[LegalAction]) -> FreecivAction | None:
|
| 35 |
+
legal_actions = list(legal_actions)
|
| 36 |
+
match = re.search(r"-?\d+", completion_text)
|
| 37 |
+
if match is None:
|
| 38 |
+
return None
|
| 39 |
+
index = int(match.group(0))
|
| 40 |
+
if index < 0 or index >= len(legal_actions):
|
| 41 |
+
return None
|
| 42 |
+
action = legal_actions[index]
|
| 43 |
+
if action.action_type == "end_turn":
|
| 44 |
+
return FreecivAction(action_type="end_turn")
|
| 45 |
+
if action.action_type == "move_unit":
|
| 46 |
+
return FreecivAction(action_type="move_unit", unit_id=action.unit_id, direction=action.direction)
|
| 47 |
+
if action.action_type == "build_city":
|
| 48 |
+
return FreecivAction(action_type="build_city", unit_id=action.unit_id)
|
| 49 |
+
if action.action_type == "set_city_production":
|
| 50 |
+
return FreecivAction(action_type="set_city_production", city_id=action.city_id, target=action.target)
|
| 51 |
+
if action.action_type == "set_research":
|
| 52 |
+
return FreecivAction(action_type="set_research", target=action.target)
|
| 53 |
+
raise ValueError(f"unsupported action_type: {action.action_type}")
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def action_priority(action: LegalAction) -> tuple[int, int]:
|
| 57 |
+
if action.action_type == "build_city":
|
| 58 |
+
return (500, 0)
|
| 59 |
+
if action.action_type == "set_research":
|
| 60 |
+
return (400, 0)
|
| 61 |
+
if action.action_type == "set_city_production":
|
| 62 |
+
bonus = 50 if (action.target or "") == "Settlers" else 0
|
| 63 |
+
return (300 + bonus, 0)
|
| 64 |
+
if action.action_type == "move_unit":
|
| 65 |
+
return (200, -(action.direction or 0))
|
| 66 |
+
if action.action_type == "end_turn":
|
| 67 |
+
return (0, 0)
|
| 68 |
+
return (-1000, 0)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def oracle_action_index(legal_actions: Iterable[LegalAction]) -> int:
|
| 73 |
+
legal_actions = list(legal_actions)
|
| 74 |
+
if not legal_actions:
|
| 75 |
+
raise ValueError("no legal actions available")
|
| 76 |
+
best_index = 0
|
| 77 |
+
best_priority = action_priority(legal_actions[0])
|
| 78 |
+
for index, action in enumerate(legal_actions[1:], start=1):
|
| 79 |
+
priority = action_priority(action)
|
| 80 |
+
if priority > best_priority:
|
| 81 |
+
best_index = index
|
| 82 |
+
best_priority = priority
|
| 83 |
+
return best_index
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def reward_from_oracle(completions, best_index, **kwargs):
|
| 88 |
+
del kwargs
|
| 89 |
+
rewards = []
|
| 90 |
+
for completion, expected in zip(completions, best_index):
|
| 91 |
+
match = re.search(r"-?\d+", completion if isinstance(completion, str) else str(completion))
|
| 92 |
+
if match is None:
|
| 93 |
+
rewards.append(-0.25)
|
| 94 |
+
continue
|
| 95 |
+
chosen = int(match.group(0))
|
| 96 |
+
rewards.append(1.0 if chosen == int(expected) else 0.0)
|
| 97 |
+
return rewards
|
build/lib/freeciv_env/models.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Literal
|
| 4 |
+
|
| 5 |
+
from pydantic import BaseModel, Field, model_validator
|
| 6 |
+
|
| 7 |
+
from openenv.core.env_server.types import Action, Observation, State
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class UnitSummary(BaseModel):
|
| 11 |
+
unit_id: int = Field(..., description="Freeciv unit id")
|
| 12 |
+
unit_type: str = Field(..., description="Ruleset unit type name")
|
| 13 |
+
health: int = Field(0, description="Current health")
|
| 14 |
+
moves_left: int = Field(0, description="Movement points remaining")
|
| 15 |
+
home_city_id: int | None = Field(None, description="Home city id, if any")
|
| 16 |
+
veteran_level: int = Field(0, description="Veteran level")
|
| 17 |
+
can_build_city: bool = Field(False, description="Whether the unit can found a city now")
|
| 18 |
+
move_directions: list[int] = Field(default_factory=list, description="Legal move direction indexes")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class CitySummary(BaseModel):
|
| 22 |
+
city_id: int = Field(..., description="Freeciv city id")
|
| 23 |
+
size: int = Field(..., description="Population size")
|
| 24 |
+
prod_food: int = Field(0, description="Gross food output")
|
| 25 |
+
prod_shield: int = Field(0, description="Gross shield output")
|
| 26 |
+
prod_trade: int = Field(0, description="Gross trade output")
|
| 27 |
+
surplus_food: int = Field(0, description="Net food surplus")
|
| 28 |
+
surplus_shield: int = Field(0, description="Net shield surplus")
|
| 29 |
+
surplus_trade: int = Field(0, description="Net trade surplus")
|
| 30 |
+
production_kind: int | None = Field(None, description="Current production kind enum from Freeciv")
|
| 31 |
+
production_value: int | None = Field(None, description="Current production value id from Freeciv")
|
| 32 |
+
turns_to_complete: float | None = Field(None, description="Turns until current production completes")
|
| 33 |
+
production_options: list[str] = Field(default_factory=list, description="Legal production targets")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class LegalAction(BaseModel):
|
| 37 |
+
action_type: Literal[
|
| 38 |
+
"end_turn",
|
| 39 |
+
"move_unit",
|
| 40 |
+
"build_city",
|
| 41 |
+
"set_city_production",
|
| 42 |
+
"set_research",
|
| 43 |
+
]
|
| 44 |
+
label: str = Field(..., description="Human-readable action label")
|
| 45 |
+
unit_id: int | None = Field(None, description="Target unit id")
|
| 46 |
+
city_id: int | None = Field(None, description="Target city id")
|
| 47 |
+
direction: int | None = Field(None, description="Freeciv direction index 0..7")
|
| 48 |
+
target: str | None = Field(None, description="Production or tech target name")
|
| 49 |
+
raw_action_key: str | None = Field(None, description="Underlying freeciv-bot action key")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class FreecivAction(Action):
|
| 53 |
+
action_type: Literal[
|
| 54 |
+
"end_turn",
|
| 55 |
+
"move_unit",
|
| 56 |
+
"build_city",
|
| 57 |
+
"set_city_production",
|
| 58 |
+
"set_research",
|
| 59 |
+
]
|
| 60 |
+
unit_id: int | None = None
|
| 61 |
+
city_id: int | None = None
|
| 62 |
+
direction: int | None = None
|
| 63 |
+
target: str | None = None
|
| 64 |
+
|
| 65 |
+
@model_validator(mode="after")
|
| 66 |
+
def validate_shape(self) -> "FreecivAction":
|
| 67 |
+
if self.action_type == "end_turn":
|
| 68 |
+
return self
|
| 69 |
+
if self.action_type == "move_unit":
|
| 70 |
+
if self.unit_id is None or self.direction is None:
|
| 71 |
+
raise ValueError("move_unit requires unit_id and direction")
|
| 72 |
+
return self
|
| 73 |
+
if self.action_type == "build_city":
|
| 74 |
+
if self.unit_id is None:
|
| 75 |
+
raise ValueError("build_city requires unit_id")
|
| 76 |
+
return self
|
| 77 |
+
if self.action_type == "set_city_production":
|
| 78 |
+
if self.city_id is None or not self.target:
|
| 79 |
+
raise ValueError("set_city_production requires city_id and target")
|
| 80 |
+
return self
|
| 81 |
+
if self.action_type == "set_research":
|
| 82 |
+
if not self.target:
|
| 83 |
+
raise ValueError("set_research requires target")
|
| 84 |
+
return self
|
| 85 |
+
raise ValueError(f"unsupported action_type: {self.action_type}")
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class FreecivObservation(Observation):
|
| 89 |
+
turn: int = Field(..., description="Current game turn")
|
| 90 |
+
score: float = Field(..., description="Current player score")
|
| 91 |
+
known_tiles: int = Field(..., description="Tiles known to the player")
|
| 92 |
+
visible_tiles: int = Field(..., description="Tiles currently visible to the player")
|
| 93 |
+
city_count: int = Field(..., description="Number of owned cities")
|
| 94 |
+
unit_count: int = Field(..., description="Number of owned units")
|
| 95 |
+
techs_researched: int = Field(..., description="Number of researched techs")
|
| 96 |
+
status: str = Field("ok", description="High-level environment status")
|
| 97 |
+
summary: str = Field(..., description="Compact text summary for LLMs")
|
| 98 |
+
units: list[UnitSummary] = Field(default_factory=list, description="Compact unit summaries")
|
| 99 |
+
cities: list[CitySummary] = Field(default_factory=list, description="Compact city summaries")
|
| 100 |
+
legal_actions: list[LegalAction] = Field(default_factory=list, description="Legal actions exposed by the environment")
|
| 101 |
+
reward: float = Field(0.0, description="Reward from the last action")
|
| 102 |
+
done: bool = Field(False, description="Whether the episode is done")
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
class FreecivState(State):
|
| 106 |
+
turn: int = Field(0, description="Current game turn")
|
| 107 |
+
score: float = Field(0.0, description="Current player score")
|
| 108 |
+
known_tiles: int = Field(0, description="Known tiles")
|
| 109 |
+
visible_tiles: int = Field(0, description="Visible tiles")
|
| 110 |
+
city_count: int = Field(0, description="Owned city count")
|
| 111 |
+
unit_count: int = Field(0, description="Owned unit count")
|
| 112 |
+
techs_researched: int = Field(0, description="Researched tech count")
|
build/lib/freeciv_env/runtime.py
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import json
|
| 5 |
+
import threading
|
| 6 |
+
import time
|
| 7 |
+
from typing import Protocol
|
| 8 |
+
from urllib.parse import urlencode, urlparse
|
| 9 |
+
from urllib.request import Request, urlopen
|
| 10 |
+
|
| 11 |
+
from freeciv_env.adapter import ActionRef, RawSnapshot
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class FreecivSession(Protocol):
|
| 15 |
+
def reset(self, seed: int | None = None) -> RawSnapshot: ...
|
| 16 |
+
|
| 17 |
+
def apply_action(self, action_ref: ActionRef) -> RawSnapshot: ...
|
| 18 |
+
|
| 19 |
+
def end_turn(self) -> RawSnapshot: ...
|
| 20 |
+
|
| 21 |
+
def close(self) -> None: ...
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class _InteractiveBot:
|
| 25 |
+
def __init__(self, session: "LiveFreecivSession"):
|
| 26 |
+
from freecivbot.bot.base_bot import BaseBot
|
| 27 |
+
|
| 28 |
+
class InteractiveBotImpl(BaseBot):
|
| 29 |
+
def __init__(self, owner: "LiveFreecivSession"):
|
| 30 |
+
super().__init__()
|
| 31 |
+
self._owner = owner
|
| 32 |
+
|
| 33 |
+
def conduct_turn(self, pplayer, info_controls, end_turn_hook):
|
| 34 |
+
super().conduct_turn(pplayer, info_controls, end_turn_hook)
|
| 35 |
+
self._publish_snapshot()
|
| 36 |
+
|
| 37 |
+
def calculate_next_move(self):
|
| 38 |
+
if self._turn_active:
|
| 39 |
+
self._publish_snapshot()
|
| 40 |
+
|
| 41 |
+
def _publish_snapshot(self):
|
| 42 |
+
self._acquire_state()
|
| 43 |
+
self._owner._publish_snapshot(
|
| 44 |
+
RawSnapshot(
|
| 45 |
+
turn=self.turn,
|
| 46 |
+
state=self._turn_state,
|
| 47 |
+
actions=self._turn_opts,
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
self.impl = InteractiveBotImpl(session)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class _ConfiguredCivClient:
|
| 55 |
+
def __init__(self, bot, user_name: str, *, client_port: int, visual_monitor: bool = False):
|
| 56 |
+
from freecivbot.civclient import CivClient
|
| 57 |
+
|
| 58 |
+
class ConfiguredCivClientImpl(CivClient):
|
| 59 |
+
def init_control(self, ws_client):
|
| 60 |
+
self.ws_client = ws_client
|
| 61 |
+
self.init_controller()
|
| 62 |
+
if self.visual_monitor:
|
| 63 |
+
self.monitor.start_monitor()
|
| 64 |
+
login_message = {
|
| 65 |
+
"pid": 4,
|
| 66 |
+
"username": self.user_name,
|
| 67 |
+
"capability": "+Freeciv.Web.Devel-3.2",
|
| 68 |
+
"version_label": "-dev",
|
| 69 |
+
"major_version": 3,
|
| 70 |
+
"minor_version": 1,
|
| 71 |
+
"patch_version": 90,
|
| 72 |
+
"port": self.client_port,
|
| 73 |
+
"password": None,
|
| 74 |
+
"subject": None,
|
| 75 |
+
}
|
| 76 |
+
self.ws_client.send(login_message)
|
| 77 |
+
|
| 78 |
+
def handle_chat_msg(self, packet):
|
| 79 |
+
from freecivbot.utils.fc_events import E_UNDEFINED
|
| 80 |
+
|
| 81 |
+
message = packet["message"]
|
| 82 |
+
conn_id = packet["conn_id"]
|
| 83 |
+
event = packet["event"]
|
| 84 |
+
|
| 85 |
+
if message is None:
|
| 86 |
+
return
|
| 87 |
+
if event is None or event < 0 or event >= E_UNDEFINED:
|
| 88 |
+
print("Undefined message event type")
|
| 89 |
+
print(packet)
|
| 90 |
+
print("\r\n")
|
| 91 |
+
packet["event"] = event = E_UNDEFINED
|
| 92 |
+
|
| 93 |
+
if conn_id in self.clstate.connections:
|
| 94 |
+
message = "<b>" + self.clstate.connections[conn_id]["username"] + ":</b>" + message
|
| 95 |
+
else:
|
| 96 |
+
if "/metamessage" in message:
|
| 97 |
+
return
|
| 98 |
+
if "Metaserver message string" in message:
|
| 99 |
+
return
|
| 100 |
+
|
| 101 |
+
packet["message"] = message
|
| 102 |
+
print(packet)
|
| 103 |
+
print("\r\n")
|
| 104 |
+
|
| 105 |
+
if "You are logged in as" in message:
|
| 106 |
+
self.ws_client.send_message("/set minplayers 1")
|
| 107 |
+
self.prepare_game()
|
| 108 |
+
|
| 109 |
+
def handle_conn_info(self, packet):
|
| 110 |
+
from freecivbot.connectivity.client_state import C_S_PREPARING
|
| 111 |
+
from freecivbot.utils.freecivlog import freelog
|
| 112 |
+
|
| 113 |
+
pconn = self.clstate.find_conn_by_id(packet["id"])
|
| 114 |
+
|
| 115 |
+
if not packet["used"]:
|
| 116 |
+
if pconn is None:
|
| 117 |
+
freelog(f"Server removed unknown connection {packet['id']}")
|
| 118 |
+
return
|
| 119 |
+
self.clstate.client_remove_cli_conn(pconn)
|
| 120 |
+
pconn = None
|
| 121 |
+
else:
|
| 122 |
+
pplayer = self.player_ctrl.valid_player_by_number(packet["player_num"])
|
| 123 |
+
if pplayer is None:
|
| 124 |
+
return
|
| 125 |
+
packet["playing"] = pplayer
|
| 126 |
+
|
| 127 |
+
if self.clstate.has_id(packet["id"]):
|
| 128 |
+
self.clstate.init_state(packet)
|
| 129 |
+
|
| 130 |
+
self.clstate.conn_list_append(packet)
|
| 131 |
+
|
| 132 |
+
if self.clstate.has_id(packet["id"]) and self.clstate.cur_player() != packet["playing"]:
|
| 133 |
+
self.clstate.set_client_state(C_S_PREPARING)
|
| 134 |
+
|
| 135 |
+
self.impl = ConfiguredCivClientImpl(
|
| 136 |
+
bot,
|
| 137 |
+
user_name,
|
| 138 |
+
client_port=client_port,
|
| 139 |
+
visual_monitor=visual_monitor,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
class _ConfiguredCivConnection:
|
| 144 |
+
def __init__(self, civ_client, base_url: str, *, owner: "LiveFreecivSession", wait_for_server: int = 120, retry_interval: int = 5):
|
| 145 |
+
from math import ceil
|
| 146 |
+
|
| 147 |
+
import websocket
|
| 148 |
+
|
| 149 |
+
self._websocket = websocket
|
| 150 |
+
self.client = civ_client
|
| 151 |
+
self.base_url = base_url
|
| 152 |
+
self._owner = owner
|
| 153 |
+
self._loop = None
|
| 154 |
+
self._owner._connection = self
|
| 155 |
+
self.civserverport = self._reserve_client_port(base_url, civ_client.client_port)
|
| 156 |
+
self.client.client_port = self.civserverport
|
| 157 |
+
self.proxyport = 1000 + self.civserverport
|
| 158 |
+
self._retry_interval = retry_interval
|
| 159 |
+
self._num_retries = int(ceil(wait_for_server / retry_interval))
|
| 160 |
+
self._cur_retry = 0
|
| 161 |
+
self._ws_url = self._build_ws_url(base_url)
|
| 162 |
+
self.network_init()
|
| 163 |
+
|
| 164 |
+
def _build_ws_url(self, base_url: str) -> str:
|
| 165 |
+
parsed = urlparse(base_url)
|
| 166 |
+
scheme = "wss" if parsed.scheme == "https" else "ws"
|
| 167 |
+
host = parsed.hostname or "localhost"
|
| 168 |
+
port = parsed.port
|
| 169 |
+
if port is None:
|
| 170 |
+
port = 443 if scheme == "wss" else 80
|
| 171 |
+
return f"{scheme}://{host}:{port}/civsocket/{self.proxyport}"
|
| 172 |
+
|
| 173 |
+
def _reserve_client_port(self, base_url: str, requested_port: int) -> int:
|
| 174 |
+
parsed = urlparse(base_url)
|
| 175 |
+
scheme = parsed.scheme or "http"
|
| 176 |
+
host = parsed.hostname or "localhost"
|
| 177 |
+
port = parsed.port
|
| 178 |
+
if port is None:
|
| 179 |
+
port = 443 if scheme == "https" else 80
|
| 180 |
+
query = urlencode({"civserverport": requested_port})
|
| 181 |
+
launcher_url = f"{scheme}://{host}:{port}/civclientlauncher?{query}"
|
| 182 |
+
request = Request(launcher_url, method="POST")
|
| 183 |
+
with urlopen(request, timeout=10) as response:
|
| 184 |
+
result = response.headers.get("result")
|
| 185 |
+
reserved_port = response.headers.get("port")
|
| 186 |
+
if result != "success" or reserved_port is None:
|
| 187 |
+
raise RuntimeError(f"failed to reserve freeciv client port via {launcher_url}")
|
| 188 |
+
return int(reserved_port)
|
| 189 |
+
|
| 190 |
+
def _retry(self):
|
| 191 |
+
self._cur_retry += 1
|
| 192 |
+
time.sleep(self._retry_interval)
|
| 193 |
+
return self._detect_server_up()
|
| 194 |
+
|
| 195 |
+
def _detect_server_up(self):
|
| 196 |
+
ws = self._websocket.WebSocket()
|
| 197 |
+
try:
|
| 198 |
+
ws.connect(self._ws_url, timeout=10)
|
| 199 |
+
return True
|
| 200 |
+
except Exception as err:
|
| 201 |
+
print("Connect not successful:", err, " retrying in %s seconds." % self._retry_interval)
|
| 202 |
+
if self._cur_retry < self._num_retries:
|
| 203 |
+
return self._retry()
|
| 204 |
+
return False
|
| 205 |
+
finally:
|
| 206 |
+
try:
|
| 207 |
+
ws.close()
|
| 208 |
+
except Exception:
|
| 209 |
+
pass
|
| 210 |
+
|
| 211 |
+
def network_init(self):
|
| 212 |
+
self._cur_retry = 0
|
| 213 |
+
print("Connecting to server at %s ..." % self.base_url)
|
| 214 |
+
if self._detect_server_up():
|
| 215 |
+
self.websocket_init()
|
| 216 |
+
else:
|
| 217 |
+
print("Connection could not be established!")
|
| 218 |
+
|
| 219 |
+
def websocket_init(self):
|
| 220 |
+
from tornado import ioloop
|
| 221 |
+
|
| 222 |
+
from freecivbot.connectivity.clinet import CivWSClient
|
| 223 |
+
|
| 224 |
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
| 225 |
+
ioloop.IOLoop.clear_current()
|
| 226 |
+
self._loop = ioloop.IOLoop.current()
|
| 227 |
+
|
| 228 |
+
client = CivWSClient(self.client)
|
| 229 |
+
|
| 230 |
+
def send_json(data):
|
| 231 |
+
if not client._ws_connection:
|
| 232 |
+
raise RuntimeError("Web socket connection is closed.")
|
| 233 |
+
msg = json.dumps(data, separators=(",", ":"))
|
| 234 |
+
client._ws_connection.write_message(msg)
|
| 235 |
+
|
| 236 |
+
client.send = send_json
|
| 237 |
+
client.connect(self._ws_url)
|
| 238 |
+
|
| 239 |
+
try:
|
| 240 |
+
self._loop.start()
|
| 241 |
+
except KeyboardInterrupt:
|
| 242 |
+
client.close()
|
| 243 |
+
|
| 244 |
+
def submit(self, fn) -> None:
|
| 245 |
+
if self._loop is None:
|
| 246 |
+
raise RuntimeError("freeciv connection loop is not ready")
|
| 247 |
+
done = threading.Event()
|
| 248 |
+
error: BaseException | None = None
|
| 249 |
+
|
| 250 |
+
def run():
|
| 251 |
+
nonlocal error
|
| 252 |
+
try:
|
| 253 |
+
fn()
|
| 254 |
+
except BaseException as exc:
|
| 255 |
+
error = exc
|
| 256 |
+
finally:
|
| 257 |
+
done.set()
|
| 258 |
+
|
| 259 |
+
self._loop.add_callback(run)
|
| 260 |
+
if not done.wait(timeout=10):
|
| 261 |
+
raise TimeoutError("timed out dispatching action to freeciv loop")
|
| 262 |
+
if error is not None:
|
| 263 |
+
raise error
|
| 264 |
+
|
| 265 |
+
def close(self) -> None:
|
| 266 |
+
if self._loop is None:
|
| 267 |
+
return
|
| 268 |
+
self.submit(self.client.close)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
class LiveFreecivSession:
|
| 272 |
+
def __init__(
|
| 273 |
+
self,
|
| 274 |
+
*,
|
| 275 |
+
username: str = "openenvbot",
|
| 276 |
+
client_port: int = 6000,
|
| 277 |
+
base_url: str = "http://localhost",
|
| 278 |
+
turn_timeout_s: float = 60.0,
|
| 279 |
+
):
|
| 280 |
+
self.username = username
|
| 281 |
+
self.client_port = client_port
|
| 282 |
+
self.base_url = base_url
|
| 283 |
+
self.turn_timeout_s = turn_timeout_s
|
| 284 |
+
|
| 285 |
+
self._bot_wrapper: _InteractiveBot | None = None
|
| 286 |
+
self._client = None
|
| 287 |
+
self._connection: _ConfiguredCivConnection | None = None
|
| 288 |
+
self._thread: threading.Thread | None = None
|
| 289 |
+
self._ready = threading.Event()
|
| 290 |
+
self._snapshot_lock = threading.Lock()
|
| 291 |
+
self._snapshot: RawSnapshot | None = None
|
| 292 |
+
self._thread_error: BaseException | None = None
|
| 293 |
+
self._reset_counter = 0
|
| 294 |
+
self._session_seed = time.monotonic_ns() % 1_000_000
|
| 295 |
+
|
| 296 |
+
def reset(self, seed: int | None = None) -> RawSnapshot:
|
| 297 |
+
del seed
|
| 298 |
+
self.close()
|
| 299 |
+
self._reset_counter += 1
|
| 300 |
+
username = self._next_username()
|
| 301 |
+
client_port = self.client_port + ((self._session_seed + self._reset_counter - 1) % 3)
|
| 302 |
+
|
| 303 |
+
self._ready.clear()
|
| 304 |
+
self._thread_error = None
|
| 305 |
+
self._snapshot = None
|
| 306 |
+
|
| 307 |
+
self._bot_wrapper = _InteractiveBot(self)
|
| 308 |
+
self._client = _ConfiguredCivClient(
|
| 309 |
+
self._bot_wrapper.impl,
|
| 310 |
+
username,
|
| 311 |
+
client_port=client_port,
|
| 312 |
+
visual_monitor=False,
|
| 313 |
+
).impl
|
| 314 |
+
|
| 315 |
+
def run() -> None:
|
| 316 |
+
try:
|
| 317 |
+
_ConfiguredCivConnection(self._client, self.base_url, owner=self)
|
| 318 |
+
except BaseException as exc: # pragma: no cover - surfaced in waiters
|
| 319 |
+
self._thread_error = exc
|
| 320 |
+
self._ready.set()
|
| 321 |
+
|
| 322 |
+
self._thread = threading.Thread(target=run, name="freeciv-live-session", daemon=True)
|
| 323 |
+
self._thread.start()
|
| 324 |
+
return self._wait_for_snapshot("reset")
|
| 325 |
+
|
| 326 |
+
def apply_action(self, action_ref: ActionRef) -> RawSnapshot:
|
| 327 |
+
snapshot = self._require_snapshot()
|
| 328 |
+
action_list = snapshot.actions[action_ref.controller]
|
| 329 |
+
valid_actions = action_list.get_actions(action_ref.actor_id, valid_only=True)
|
| 330 |
+
action = None if valid_actions is None else valid_actions.get(action_ref.raw_action_key)
|
| 331 |
+
if action is None:
|
| 332 |
+
raise ValueError(
|
| 333 |
+
f"action {action_ref.raw_action_key} is no longer valid for {action_ref.controller}:{action_ref.actor_id}"
|
| 334 |
+
)
|
| 335 |
+
self._ready.clear()
|
| 336 |
+
connection = self._require_connection()
|
| 337 |
+
connection.submit(lambda: action_list.trigger_validated_action(action))
|
| 338 |
+
return self._wait_for_snapshot(action_ref.raw_action_key)
|
| 339 |
+
|
| 340 |
+
def end_turn(self) -> RawSnapshot:
|
| 341 |
+
if self._bot_wrapper is None:
|
| 342 |
+
raise RuntimeError("session has not been reset")
|
| 343 |
+
self._ready.clear()
|
| 344 |
+
connection = self._require_connection()
|
| 345 |
+
connection.submit(self._bot_wrapper.impl.end_turn)
|
| 346 |
+
return self._wait_for_snapshot("end_turn")
|
| 347 |
+
|
| 348 |
+
def close(self) -> None:
|
| 349 |
+
if self._connection is not None:
|
| 350 |
+
try:
|
| 351 |
+
self._connection.close()
|
| 352 |
+
except Exception:
|
| 353 |
+
pass
|
| 354 |
+
elif self._client is not None:
|
| 355 |
+
try:
|
| 356 |
+
self._client.close()
|
| 357 |
+
except Exception:
|
| 358 |
+
pass
|
| 359 |
+
if self._thread is not None and self._thread.is_alive():
|
| 360 |
+
self._thread.join(timeout=5)
|
| 361 |
+
self._bot_wrapper = None
|
| 362 |
+
self._client = None
|
| 363 |
+
self._connection = None
|
| 364 |
+
self._thread = None
|
| 365 |
+
self._snapshot = None
|
| 366 |
+
self._thread_error = None
|
| 367 |
+
self._ready.clear()
|
| 368 |
+
|
| 369 |
+
def _publish_snapshot(self, snapshot: RawSnapshot) -> None:
|
| 370 |
+
with self._snapshot_lock:
|
| 371 |
+
self._snapshot = snapshot
|
| 372 |
+
self._ready.set()
|
| 373 |
+
|
| 374 |
+
def _next_username(self) -> str:
|
| 375 |
+
suffix = str(self._session_seed + self._reset_counter)
|
| 376 |
+
prefix_len = max(1, 31 - len(suffix))
|
| 377 |
+
return f"{self.username[:prefix_len]}{suffix}"
|
| 378 |
+
|
| 379 |
+
def _require_connection(self) -> _ConfiguredCivConnection:
|
| 380 |
+
if self._connection is None:
|
| 381 |
+
raise RuntimeError("freeciv connection is not ready")
|
| 382 |
+
return self._connection
|
| 383 |
+
|
| 384 |
+
def _require_snapshot(self) -> RawSnapshot:
|
| 385 |
+
with self._snapshot_lock:
|
| 386 |
+
if self._snapshot is None:
|
| 387 |
+
raise RuntimeError("no live snapshot is available")
|
| 388 |
+
return self._snapshot
|
| 389 |
+
|
| 390 |
+
def _wait_for_snapshot(self, reason: str) -> RawSnapshot:
|
| 391 |
+
deadline = time.monotonic() + self.turn_timeout_s
|
| 392 |
+
while time.monotonic() < deadline:
|
| 393 |
+
if self._thread_error is not None:
|
| 394 |
+
raise RuntimeError(f"freeciv session failed during {reason}") from self._thread_error
|
| 395 |
+
if self._ready.wait(timeout=0.1):
|
| 396 |
+
if self._thread_error is not None:
|
| 397 |
+
raise RuntimeError(f"freeciv session failed during {reason}") from self._thread_error
|
| 398 |
+
snapshot = self._require_snapshot()
|
| 399 |
+
if snapshot is not None:
|
| 400 |
+
return snapshot
|
| 401 |
+
raise TimeoutError(f"timed out waiting for freeciv snapshot during {reason}")
|
build/lib/freeciv_env/server/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from freeciv_env.server.freeciv_environment import FreecivEnvironment
|
| 2 |
+
|
| 3 |
+
__all__ = ["FreecivEnvironment"]
|
build/lib/freeciv_env/server/app.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
from openenv.core.env_server import create_app
|
| 6 |
+
|
| 7 |
+
from freeciv_env.models import FreecivAction, FreecivObservation
|
| 8 |
+
from freeciv_env.runtime import LiveFreecivSession
|
| 9 |
+
from freeciv_env.server.freeciv_environment import FreecivEnvironment
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def create_live_session() -> LiveFreecivSession:
|
| 13 |
+
return LiveFreecivSession(
|
| 14 |
+
username=os.getenv("FREECIV_USERNAME", "openenvbot"),
|
| 15 |
+
client_port=int(os.getenv("FREECIV_CLIENT_PORT", "6000")),
|
| 16 |
+
base_url=os.getenv("FREECIV_SERVER_URL", "http://localhost"),
|
| 17 |
+
turn_timeout_s=float(os.getenv("FREECIV_TURN_TIMEOUT_S", "60")),
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def create_freeciv_app(*, session_factory=create_live_session, max_turns: int | None = None):
|
| 22 |
+
if max_turns is None:
|
| 23 |
+
max_turns = int(os.getenv("FREECIV_MAX_TURNS", "50"))
|
| 24 |
+
return create_app(
|
| 25 |
+
lambda: FreecivEnvironment(session_factory=session_factory, max_turns=max_turns),
|
| 26 |
+
FreecivAction,
|
| 27 |
+
FreecivObservation,
|
| 28 |
+
env_name="freeciv_env",
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
app = create_freeciv_app()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def main() -> None:
|
| 36 |
+
import uvicorn
|
| 37 |
+
|
| 38 |
+
uvicorn.run(app, host="0.0.0.0", port=8000, ws_ping_interval=300, ws_ping_timeout=300)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
if __name__ == "__main__":
|
| 42 |
+
main()
|
build/lib/freeciv_env/server/freeciv_environment.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Callable
|
| 4 |
+
from uuid import uuid4
|
| 5 |
+
|
| 6 |
+
from openenv.core.env_server.interfaces import Environment
|
| 7 |
+
|
| 8 |
+
from freeciv_env.adapter import (
|
| 9 |
+
ActionLookupKey,
|
| 10 |
+
ActionRef,
|
| 11 |
+
PreparedObservation,
|
| 12 |
+
RawSnapshot,
|
| 13 |
+
SnapshotMetrics,
|
| 14 |
+
action_lookup_key,
|
| 15 |
+
prepare_observation,
|
| 16 |
+
)
|
| 17 |
+
from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState
|
| 18 |
+
from freeciv_env.runtime import FreecivSession
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class FreecivEnvironment(Environment[FreecivAction, FreecivObservation, FreecivState]):
|
| 22 |
+
SUPPORTS_CONCURRENT_SESSIONS = False
|
| 23 |
+
|
| 24 |
+
def __init__(self, session_factory: Callable[[], FreecivSession], max_turns: int = 50):
|
| 25 |
+
super().__init__()
|
| 26 |
+
self._session_factory = session_factory
|
| 27 |
+
self.max_turns = max_turns
|
| 28 |
+
self._session: FreecivSession | None = None
|
| 29 |
+
self._snapshot: RawSnapshot | None = None
|
| 30 |
+
self._metrics: SnapshotMetrics | None = None
|
| 31 |
+
self._action_refs: dict[ActionLookupKey, ActionRef] = {}
|
| 32 |
+
self._state = FreecivState(episode_id=str(uuid4()), step_count=0)
|
| 33 |
+
|
| 34 |
+
def reset(
|
| 35 |
+
self,
|
| 36 |
+
seed: int | None = None,
|
| 37 |
+
episode_id: str | None = None,
|
| 38 |
+
**kwargs,
|
| 39 |
+
) -> FreecivObservation:
|
| 40 |
+
del kwargs
|
| 41 |
+
self.close()
|
| 42 |
+
self._session = self._session_factory()
|
| 43 |
+
snapshot = self._session.reset(seed=seed)
|
| 44 |
+
prepared = prepare_observation(
|
| 45 |
+
snapshot,
|
| 46 |
+
reward=0.0,
|
| 47 |
+
done=self._is_done(snapshot),
|
| 48 |
+
status="ready",
|
| 49 |
+
metadata={},
|
| 50 |
+
)
|
| 51 |
+
self._commit(snapshot, prepared, episode_id=episode_id or str(uuid4()))
|
| 52 |
+
return prepared.observation
|
| 53 |
+
|
| 54 |
+
def step(
|
| 55 |
+
self,
|
| 56 |
+
action: FreecivAction,
|
| 57 |
+
timeout_s: float | None = None,
|
| 58 |
+
**kwargs,
|
| 59 |
+
) -> FreecivObservation:
|
| 60 |
+
del timeout_s, kwargs
|
| 61 |
+
if self._session is None or self._snapshot is None or self._metrics is None:
|
| 62 |
+
raise RuntimeError("environment must be reset before step")
|
| 63 |
+
|
| 64 |
+
self._state.step_count += 1
|
| 65 |
+
if action.action_type == "end_turn":
|
| 66 |
+
next_snapshot = self._session.end_turn()
|
| 67 |
+
reward = self._reward_for_transition(action, self._metrics, next_snapshot)
|
| 68 |
+
prepared = prepare_observation(
|
| 69 |
+
next_snapshot,
|
| 70 |
+
reward=reward,
|
| 71 |
+
done=self._is_done(next_snapshot),
|
| 72 |
+
status="ok",
|
| 73 |
+
metadata={},
|
| 74 |
+
)
|
| 75 |
+
self._commit(next_snapshot, prepared, episode_id=self._state.episode_id)
|
| 76 |
+
return prepared.observation
|
| 77 |
+
|
| 78 |
+
ref = self._action_refs.get(action_lookup_key(action))
|
| 79 |
+
if ref is None:
|
| 80 |
+
prepared = prepare_observation(
|
| 81 |
+
self._snapshot,
|
| 82 |
+
reward=-0.25,
|
| 83 |
+
done=self._is_done(self._snapshot),
|
| 84 |
+
status="invalid_action",
|
| 85 |
+
metadata={"error": "action is not currently legal"},
|
| 86 |
+
)
|
| 87 |
+
self._commit(self._snapshot, prepared, episode_id=self._state.episode_id, replace_snapshot=False)
|
| 88 |
+
return prepared.observation
|
| 89 |
+
|
| 90 |
+
next_snapshot = self._session.apply_action(ref)
|
| 91 |
+
reward = self._reward_for_transition(action, self._metrics, next_snapshot)
|
| 92 |
+
prepared = prepare_observation(
|
| 93 |
+
next_snapshot,
|
| 94 |
+
reward=reward,
|
| 95 |
+
done=self._is_done(next_snapshot),
|
| 96 |
+
status="ok",
|
| 97 |
+
metadata={},
|
| 98 |
+
)
|
| 99 |
+
self._commit(next_snapshot, prepared, episode_id=self._state.episode_id)
|
| 100 |
+
return prepared.observation
|
| 101 |
+
|
| 102 |
+
@property
|
| 103 |
+
def state(self) -> FreecivState:
|
| 104 |
+
return self._state
|
| 105 |
+
|
| 106 |
+
def close(self) -> None:
|
| 107 |
+
if self._session is not None:
|
| 108 |
+
self._session.close()
|
| 109 |
+
self._session = None
|
| 110 |
+
self._snapshot = None
|
| 111 |
+
self._metrics = None
|
| 112 |
+
self._action_refs = {}
|
| 113 |
+
|
| 114 |
+
def _commit(
|
| 115 |
+
self,
|
| 116 |
+
snapshot: RawSnapshot,
|
| 117 |
+
prepared: PreparedObservation,
|
| 118 |
+
*,
|
| 119 |
+
episode_id: str,
|
| 120 |
+
replace_snapshot: bool = True,
|
| 121 |
+
) -> None:
|
| 122 |
+
if replace_snapshot:
|
| 123 |
+
self._snapshot = snapshot
|
| 124 |
+
self._metrics = prepared.metrics
|
| 125 |
+
self._action_refs = prepared.action_refs
|
| 126 |
+
self._state = FreecivState(
|
| 127 |
+
episode_id=episode_id,
|
| 128 |
+
step_count=self._state.step_count,
|
| 129 |
+
turn=prepared.observation.turn,
|
| 130 |
+
score=prepared.observation.score,
|
| 131 |
+
known_tiles=prepared.observation.known_tiles,
|
| 132 |
+
visible_tiles=prepared.observation.visible_tiles,
|
| 133 |
+
city_count=prepared.observation.city_count,
|
| 134 |
+
unit_count=prepared.observation.unit_count,
|
| 135 |
+
techs_researched=prepared.observation.techs_researched,
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
def _reward_for_transition(
|
| 139 |
+
self,
|
| 140 |
+
action: FreecivAction,
|
| 141 |
+
previous: SnapshotMetrics,
|
| 142 |
+
next_snapshot: RawSnapshot,
|
| 143 |
+
) -> float:
|
| 144 |
+
from freeciv_env.adapter import extract_metrics
|
| 145 |
+
|
| 146 |
+
current = extract_metrics(next_snapshot)
|
| 147 |
+
reward = {
|
| 148 |
+
"end_turn": 0.0,
|
| 149 |
+
"move_unit": 0.01,
|
| 150 |
+
"build_city": 0.10,
|
| 151 |
+
"set_city_production": 0.05,
|
| 152 |
+
"set_research": 0.05,
|
| 153 |
+
}[action.action_type]
|
| 154 |
+
reward += max(current.score - previous.score, 0.0) * 0.02
|
| 155 |
+
reward += max(current.known_tiles - previous.known_tiles, 0) * 0.01
|
| 156 |
+
reward += max(current.city_count - previous.city_count, 0) * 0.50
|
| 157 |
+
reward += max(current.techs_researched - previous.techs_researched, 0) * 0.25
|
| 158 |
+
return float(reward)
|
| 159 |
+
|
| 160 |
+
def _is_done(self, snapshot: RawSnapshot) -> bool:
|
| 161 |
+
player = snapshot.state.get("player", {})
|
| 162 |
+
alive = bool(player.get("my_is_alive", True))
|
| 163 |
+
return (not alive) or snapshot.turn >= self.max_turns
|
build/lib/server/__init__.py
ADDED
|
File without changes
|
build/lib/server/app.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from freeciv_env.server.app import app as app
|
| 2 |
+
from freeciv_env.server.app import main as _main
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def main() -> None:
|
| 6 |
+
_main()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
if __name__ == "__main__":
|
| 10 |
+
main()
|
client.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from freeciv_env.client import *
|
freeciv_env.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: freeciv-env
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: OpenEnv environment for Freeciv via freeciv-bot
|
| 5 |
+
Requires-Python: >=3.11
|
| 6 |
+
Description-Content-Type: text/markdown
|
| 7 |
+
Requires-Dist: openenv-core[core]==0.2.1
|
| 8 |
+
Requires-Dist: freecivbot @ git+https://github.com/chris1869/freeciv-bot.git
|
| 9 |
+
Requires-Dist: uvicorn>=0.35.0
|
| 10 |
+
Provides-Extra: dev
|
| 11 |
+
Requires-Dist: pytest>=8.4.1; extra == "dev"
|
| 12 |
+
Requires-Dist: requests>=2.32.5; extra == "dev"
|
| 13 |
+
Provides-Extra: train
|
| 14 |
+
Requires-Dist: accelerate>=1.10.0; extra == "train"
|
| 15 |
+
Requires-Dist: bitsandbytes>=0.47.0; extra == "train"
|
| 16 |
+
Requires-Dist: datasets>=4.0.0; extra == "train"
|
| 17 |
+
Requires-Dist: trl>=0.24.0; extra == "train"
|
| 18 |
+
Requires-Dist: unsloth>=2026.3.4; extra == "train"
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
title: Freeciv Environment Server
|
| 22 |
+
emoji: 🎮
|
| 23 |
+
colorFrom: blue
|
| 24 |
+
colorTo: indigo
|
| 25 |
+
sdk: docker
|
| 26 |
+
pinned: false
|
| 27 |
+
app_port: 8000
|
| 28 |
+
base_path: /web
|
| 29 |
+
tags:
|
| 30 |
+
- openenv
|
| 31 |
+
---
|
| 32 |
+
|
| 33 |
+
# freeciv-env
|
| 34 |
+
|
| 35 |
+
OpenEnv environment for Freeciv, built on top of `freeciv-bot`.
|
| 36 |
+
|
| 37 |
+
## Current scope
|
| 38 |
+
|
| 39 |
+
This environment exposes a small, trainable action surface:
|
| 40 |
+
|
| 41 |
+
- `end_turn`
|
| 42 |
+
- `move_unit(unit_id, direction)`
|
| 43 |
+
- `build_city(unit_id)`
|
| 44 |
+
- `set_city_production(city_id, target)`
|
| 45 |
+
- `set_research(tech_name)`
|
| 46 |
+
|
| 47 |
+
Observations are text-first and include compact structured summaries of:
|
| 48 |
+
|
| 49 |
+
- current turn
|
| 50 |
+
- score
|
| 51 |
+
- known and visible map tiles
|
| 52 |
+
- units
|
| 53 |
+
- cities
|
| 54 |
+
- legal actions
|
| 55 |
+
|
| 56 |
+
## Local development
|
| 57 |
+
|
| 58 |
+
Install dependencies:
|
| 59 |
+
|
| 60 |
+
```bash
|
| 61 |
+
uv sync --extra dev
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
Run tests:
|
| 65 |
+
|
| 66 |
+
```bash
|
| 67 |
+
uv run pytest
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
Run the server:
|
| 71 |
+
|
| 72 |
+
```bash
|
| 73 |
+
uv run uvicorn freeciv_env.server.app:app --host 0.0.0.0 --port 8000
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
Run the fast GRPO loop:
|
| 77 |
+
|
| 78 |
+
```bash
|
| 79 |
+
uv sync --extra dev --extra train
|
| 80 |
+
uv run python scripts/train_grpo_fast.py --env-url http://127.0.0.1 --max-steps 50
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
## Hackathon / Unsloth notes
|
| 84 |
+
|
| 85 |
+
For the hackathon Colab submission path on H100s, Unsloth recommended the BF16 OpenEnv gpt-oss 20B notebook:
|
| 86 |
+
|
| 87 |
+
- <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb>
|
| 88 |
+
|
| 89 |
+
If you adapt that notebook for this environment, reduce `max_steps` to `300` for a faster run.
|
| 90 |
+
|
| 91 |
+
Useful notebook indexes:
|
| 92 |
+
|
| 93 |
+
- RL notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks#grpo-reasoning-rl>
|
| 94 |
+
- all notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks>
|
| 95 |
+
- notebook repo: <https://github.com/unslothai/notebooks/tree/main/nb>
|
| 96 |
+
|
| 97 |
+
If GRPO is too slow, start from a smaller notebook with `fast_inference = True` and add the Freeciv/OpenEnv calls:
|
| 98 |
+
|
| 99 |
+
- <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_(4B)-GRPO.ipynb>
|
| 100 |
+
- <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb>
|
| 101 |
+
|
| 102 |
+
If vLLM GRPO fails, Unsloth suggested a clean virtualenv install:
|
| 103 |
+
|
| 104 |
+
```bash
|
| 105 |
+
python -m venv unsloth_env
|
| 106 |
+
source unsloth_env/bin/activate
|
| 107 |
+
pip install --upgrade pip && pip install uv
|
| 108 |
+
uv pip install unsloth vllm --torch-backend=auto
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
If Unsloth is already installed, update it for the latest GRPO fixes:
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
pip install --upgrade --no-cache-dir --no-deps unsloth unsloth_zoo
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
## Live runtime requirements
|
| 118 |
+
|
| 119 |
+
The default server app uses `freeciv-bot` against a local Freeciv Web runtime.
|
| 120 |
+
|
| 121 |
+
Environment variables:
|
| 122 |
+
|
| 123 |
+
- `FREECIV_SERVER_URL` (default: `http://127.0.0.1`)
|
| 124 |
+
- `FREECIV_USERNAME` (default: `openenvbot`)
|
| 125 |
+
- `FREECIV_CLIENT_PORT` (default: `6000`)
|
| 126 |
+
- `FREECIV_TURN_TIMEOUT_S` (default: `60`)
|
| 127 |
+
|
| 128 |
+
The included automated tests use a fake session backend, so they do not require a live Freeciv server.
|
| 129 |
+
|
| 130 |
+
The GRPO training script uses:
|
| 131 |
+
|
| 132 |
+
- `Qwen/Qwen3.5-0.8B`
|
| 133 |
+
- Unsloth bf16 LoRA loading
|
| 134 |
+
- TRL `GRPOTrainer`
|
| 135 |
+
- integer-only action selection to minimize generated tokens
|
| 136 |
+
- offline GRPO over env-sampled states for maximum throughput
|
freeciv_env.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
pyproject.toml
|
| 3 |
+
freeciv_env/__init__.py
|
| 4 |
+
freeciv_env/adapter.py
|
| 5 |
+
freeciv_env/client.py
|
| 6 |
+
freeciv_env/grpo.py
|
| 7 |
+
freeciv_env/models.py
|
| 8 |
+
freeciv_env/runtime.py
|
| 9 |
+
freeciv_env.egg-info/PKG-INFO
|
| 10 |
+
freeciv_env.egg-info/SOURCES.txt
|
| 11 |
+
freeciv_env.egg-info/dependency_links.txt
|
| 12 |
+
freeciv_env.egg-info/entry_points.txt
|
| 13 |
+
freeciv_env.egg-info/requires.txt
|
| 14 |
+
freeciv_env.egg-info/top_level.txt
|
| 15 |
+
freeciv_env/server/__init__.py
|
| 16 |
+
freeciv_env/server/app.py
|
| 17 |
+
freeciv_env/server/freeciv_environment.py
|
| 18 |
+
server/__init__.py
|
| 19 |
+
server/app.py
|
| 20 |
+
tests/test_adapter.py
|
| 21 |
+
tests/test_environment.py
|
| 22 |
+
tests/test_grpo_utils.py
|
| 23 |
+
tests/test_server_roundtrip.py
|
freeciv_env.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
freeciv_env.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = server.app:main
|
freeciv_env.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core[core]==0.2.1
|
| 2 |
+
freecivbot @ git+https://github.com/chris1869/freeciv-bot.git
|
| 3 |
+
uvicorn>=0.35.0
|
| 4 |
+
|
| 5 |
+
[dev]
|
| 6 |
+
pytest>=8.4.1
|
| 7 |
+
requests>=2.32.5
|
| 8 |
+
|
| 9 |
+
[train]
|
| 10 |
+
accelerate>=1.10.0
|
| 11 |
+
bitsandbytes>=0.47.0
|
| 12 |
+
datasets>=4.0.0
|
| 13 |
+
trl>=0.24.0
|
| 14 |
+
unsloth>=2026.3.4
|
freeciv_env.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
freeciv_env
|
| 2 |
+
server
|
freeciv_env/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from freeciv_env.client import FreecivEnv
|
| 2 |
+
from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState, LegalAction
|
| 3 |
+
|
| 4 |
+
__all__ = [
|
| 5 |
+
"FreecivAction",
|
| 6 |
+
"FreecivEnv",
|
| 7 |
+
"FreecivObservation",
|
| 8 |
+
"FreecivState",
|
| 9 |
+
"LegalAction",
|
| 10 |
+
]
|
freeciv_env/adapter.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
from freeciv_env.models import CitySummary, FreecivAction, FreecivObservation, LegalAction, UnitSummary
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
ActionLookupKey = tuple[str, int | None, int | None, str | None]
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass(frozen=True)
|
| 13 |
+
class ActionRef:
|
| 14 |
+
controller: str
|
| 15 |
+
actor_id: int | str
|
| 16 |
+
raw_action_key: str
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@dataclass
|
| 20 |
+
class RawSnapshot:
|
| 21 |
+
turn: int
|
| 22 |
+
state: dict[str, Any]
|
| 23 |
+
actions: dict[str, Any]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass(frozen=True)
|
| 27 |
+
class SnapshotMetrics:
|
| 28 |
+
score: float
|
| 29 |
+
known_tiles: int
|
| 30 |
+
visible_tiles: int
|
| 31 |
+
city_count: int
|
| 32 |
+
unit_count: int
|
| 33 |
+
techs_researched: int
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class PreparedObservation:
|
| 38 |
+
observation: FreecivObservation
|
| 39 |
+
metrics: SnapshotMetrics
|
| 40 |
+
action_refs: dict[ActionLookupKey, ActionRef]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _map_status_rows(raw_state: dict[str, Any]) -> list[list[int | float]]:
|
| 44 |
+
raw_map = raw_state.get("map", {})
|
| 45 |
+
status = raw_map.get("status", [])
|
| 46 |
+
return status if isinstance(status, list) else []
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def count_known_tiles(raw_state: dict[str, Any]) -> int:
|
| 50 |
+
return sum(1 for row in _map_status_rows(raw_state) for value in row if value and value > 0)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def count_visible_tiles(raw_state: dict[str, Any]) -> int:
|
| 54 |
+
return sum(1 for row in _map_status_rows(raw_state) for value in row if value and value >= 2)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def extract_metrics(snapshot: RawSnapshot) -> SnapshotMetrics:
|
| 58 |
+
player = snapshot.state.get("player", {})
|
| 59 |
+
return SnapshotMetrics(
|
| 60 |
+
score=float(player.get("my_score", 0.0)),
|
| 61 |
+
known_tiles=count_known_tiles(snapshot.state),
|
| 62 |
+
visible_tiles=count_visible_tiles(snapshot.state),
|
| 63 |
+
city_count=len(snapshot.state.get("city", {})),
|
| 64 |
+
unit_count=len(snapshot.state.get("unit", {})),
|
| 65 |
+
techs_researched=int(player.get("my_techs_researched", 0) or 0),
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def action_lookup_key(action: FreecivAction) -> ActionLookupKey:
|
| 70 |
+
if action.action_type == "move_unit":
|
| 71 |
+
return ("move_unit", action.unit_id, action.direction, None)
|
| 72 |
+
if action.action_type == "build_city":
|
| 73 |
+
return ("build_city", action.unit_id, None, None)
|
| 74 |
+
if action.action_type == "set_city_production":
|
| 75 |
+
return ("set_city_production", action.city_id, None, action.target)
|
| 76 |
+
if action.action_type == "set_research":
|
| 77 |
+
return ("set_research", None, None, action.target)
|
| 78 |
+
return ("end_turn", None, None, None)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _parse_target_name(raw_action_key: str, prefix: str) -> str:
|
| 82 |
+
suffix = raw_action_key.removeprefix(prefix)
|
| 83 |
+
name, _sep, _tail = suffix.rpartition("_")
|
| 84 |
+
return name or suffix
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def _controller_actions(snapshot: RawSnapshot, controller: str) -> dict[str, Any]:
|
| 89 |
+
raw_actions = snapshot.actions.get(controller, {})
|
| 90 |
+
if isinstance(raw_actions, dict):
|
| 91 |
+
return raw_actions
|
| 92 |
+
if hasattr(raw_actions, "json_struct"):
|
| 93 |
+
json_actions = raw_actions.json_struct()
|
| 94 |
+
return json_actions if isinstance(json_actions, dict) else {}
|
| 95 |
+
return {}
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _extract_legal_actions(snapshot: RawSnapshot) -> tuple[list[LegalAction], dict[ActionLookupKey, ActionRef]]:
|
| 100 |
+
legal_actions: list[LegalAction] = [
|
| 101 |
+
LegalAction(
|
| 102 |
+
action_type="end_turn",
|
| 103 |
+
label="End the current turn",
|
| 104 |
+
raw_action_key="__end_turn__",
|
| 105 |
+
)
|
| 106 |
+
]
|
| 107 |
+
refs: dict[ActionLookupKey, ActionRef] = {}
|
| 108 |
+
|
| 109 |
+
for actor_id, action_map in _controller_actions(snapshot, "unit").items():
|
| 110 |
+
unit_id = int(actor_id)
|
| 111 |
+
if action_map.get("build"):
|
| 112 |
+
legal_actions.append(
|
| 113 |
+
LegalAction(
|
| 114 |
+
action_type="build_city",
|
| 115 |
+
label=f"Build a city with unit {unit_id}",
|
| 116 |
+
unit_id=unit_id,
|
| 117 |
+
raw_action_key="build",
|
| 118 |
+
)
|
| 119 |
+
)
|
| 120 |
+
refs[("build_city", unit_id, None, None)] = ActionRef(
|
| 121 |
+
controller="unit",
|
| 122 |
+
actor_id=unit_id,
|
| 123 |
+
raw_action_key="build",
|
| 124 |
+
)
|
| 125 |
+
for raw_action_key, enabled in sorted(action_map.items()):
|
| 126 |
+
if not enabled or not raw_action_key.startswith("goto_"):
|
| 127 |
+
continue
|
| 128 |
+
direction = int(raw_action_key.split("_", 1)[1])
|
| 129 |
+
legal_actions.append(
|
| 130 |
+
LegalAction(
|
| 131 |
+
action_type="move_unit",
|
| 132 |
+
label=f"Move unit {unit_id} in direction {direction}",
|
| 133 |
+
unit_id=unit_id,
|
| 134 |
+
direction=direction,
|
| 135 |
+
raw_action_key=raw_action_key,
|
| 136 |
+
)
|
| 137 |
+
)
|
| 138 |
+
refs[("move_unit", unit_id, direction, None)] = ActionRef(
|
| 139 |
+
controller="unit",
|
| 140 |
+
actor_id=unit_id,
|
| 141 |
+
raw_action_key=raw_action_key,
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
for actor_id, action_map in _controller_actions(snapshot, "city").items():
|
| 145 |
+
city_id = int(actor_id)
|
| 146 |
+
for raw_action_key, enabled in sorted(action_map.items()):
|
| 147 |
+
if not enabled:
|
| 148 |
+
continue
|
| 149 |
+
if raw_action_key.startswith("change_unit_prod_"):
|
| 150 |
+
target = _parse_target_name(raw_action_key, "change_unit_prod_")
|
| 151 |
+
elif raw_action_key.startswith("change_improve_prod_"):
|
| 152 |
+
target = _parse_target_name(raw_action_key, "change_improve_prod_")
|
| 153 |
+
else:
|
| 154 |
+
continue
|
| 155 |
+
legal_actions.append(
|
| 156 |
+
LegalAction(
|
| 157 |
+
action_type="set_city_production",
|
| 158 |
+
label=f"Set city {city_id} production to {target}",
|
| 159 |
+
city_id=city_id,
|
| 160 |
+
target=target,
|
| 161 |
+
raw_action_key=raw_action_key,
|
| 162 |
+
)
|
| 163 |
+
)
|
| 164 |
+
refs[("set_city_production", city_id, None, target)] = ActionRef(
|
| 165 |
+
controller="city",
|
| 166 |
+
actor_id=city_id,
|
| 167 |
+
raw_action_key=raw_action_key,
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
tech_actions = _controller_actions(snapshot, "tech").get("cur_player", {})
|
| 171 |
+
for raw_action_key, enabled in sorted(tech_actions.items()):
|
| 172 |
+
if not enabled or not raw_action_key.startswith("research_tech_"):
|
| 173 |
+
continue
|
| 174 |
+
target = _parse_target_name(raw_action_key, "research_tech_")
|
| 175 |
+
legal_actions.append(
|
| 176 |
+
LegalAction(
|
| 177 |
+
action_type="set_research",
|
| 178 |
+
label=f"Research {target}",
|
| 179 |
+
target=target,
|
| 180 |
+
raw_action_key=raw_action_key,
|
| 181 |
+
)
|
| 182 |
+
)
|
| 183 |
+
refs[("set_research", None, None, target)] = ActionRef(
|
| 184 |
+
controller="tech",
|
| 185 |
+
actor_id="cur_player",
|
| 186 |
+
raw_action_key=raw_action_key,
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
legal_actions.sort(
|
| 190 |
+
key=lambda item: (
|
| 191 |
+
item.action_type,
|
| 192 |
+
item.unit_id or -1,
|
| 193 |
+
item.city_id or -1,
|
| 194 |
+
item.direction or -1,
|
| 195 |
+
item.target or "",
|
| 196 |
+
)
|
| 197 |
+
)
|
| 198 |
+
return legal_actions, refs
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def _extract_unit_summaries(snapshot: RawSnapshot) -> list[UnitSummary]:
|
| 202 |
+
unit_actions = _controller_actions(snapshot, "unit")
|
| 203 |
+
units: list[UnitSummary] = []
|
| 204 |
+
for actor_id, unit in sorted(snapshot.state.get("unit", {}).items(), key=lambda item: int(item[0])):
|
| 205 |
+
action_map = unit_actions.get(str(actor_id), unit_actions.get(actor_id, {}))
|
| 206 |
+
move_directions = sorted(
|
| 207 |
+
int(raw_action_key.split("_", 1)[1])
|
| 208 |
+
for raw_action_key, enabled in action_map.items()
|
| 209 |
+
if enabled and raw_action_key.startswith("goto_")
|
| 210 |
+
)
|
| 211 |
+
units.append(
|
| 212 |
+
UnitSummary(
|
| 213 |
+
unit_id=int(actor_id),
|
| 214 |
+
unit_type=str(unit.get("type_rule_name", "Unknown")),
|
| 215 |
+
health=int(unit.get("health", 0) or 0),
|
| 216 |
+
moves_left=int(unit.get("moves_left", unit.get("movesleft", 0)) or 0),
|
| 217 |
+
home_city_id=(
|
| 218 |
+
int(unit.get("home_city"))
|
| 219 |
+
if unit.get("home_city") not in (None, -1, "")
|
| 220 |
+
else None
|
| 221 |
+
),
|
| 222 |
+
veteran_level=int(unit.get("veteran", 0) or 0),
|
| 223 |
+
can_build_city=bool(action_map.get("build", False)),
|
| 224 |
+
move_directions=move_directions,
|
| 225 |
+
)
|
| 226 |
+
)
|
| 227 |
+
return units
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def _extract_city_summaries(snapshot: RawSnapshot) -> list[CitySummary]:
|
| 231 |
+
city_actions = _controller_actions(snapshot, "city")
|
| 232 |
+
cities: list[CitySummary] = []
|
| 233 |
+
for actor_id, city in sorted(snapshot.state.get("city", {}).items(), key=lambda item: int(item[0])):
|
| 234 |
+
action_map = city_actions.get(str(actor_id), city_actions.get(actor_id, {}))
|
| 235 |
+
production_options = [
|
| 236 |
+
_parse_target_name(raw_action_key, "change_unit_prod_")
|
| 237 |
+
for raw_action_key, enabled in sorted(action_map.items())
|
| 238 |
+
if enabled and raw_action_key.startswith("change_unit_prod_")
|
| 239 |
+
] + [
|
| 240 |
+
_parse_target_name(raw_action_key, "change_improve_prod_")
|
| 241 |
+
for raw_action_key, enabled in sorted(action_map.items())
|
| 242 |
+
if enabled and raw_action_key.startswith("change_improve_prod_")
|
| 243 |
+
]
|
| 244 |
+
cities.append(
|
| 245 |
+
CitySummary(
|
| 246 |
+
city_id=int(actor_id),
|
| 247 |
+
size=int(city.get("size", 0) or 0),
|
| 248 |
+
prod_food=int(city.get("prod_food", 0) or 0),
|
| 249 |
+
prod_shield=int(city.get("prod_shield", 0) or 0),
|
| 250 |
+
prod_trade=int(city.get("prod_trade", 0) or 0),
|
| 251 |
+
surplus_food=int(city.get("surplus_food", 0) or 0),
|
| 252 |
+
surplus_shield=int(city.get("surplus_shield", 0) or 0),
|
| 253 |
+
surplus_trade=int(city.get("surplus_trade", 0) or 0),
|
| 254 |
+
production_kind=(
|
| 255 |
+
int(city.get("production_kind"))
|
| 256 |
+
if city.get("production_kind") is not None
|
| 257 |
+
else None
|
| 258 |
+
),
|
| 259 |
+
production_value=(
|
| 260 |
+
int(city.get("production_value"))
|
| 261 |
+
if city.get("production_value") is not None
|
| 262 |
+
else None
|
| 263 |
+
),
|
| 264 |
+
turns_to_complete=(
|
| 265 |
+
float(city.get("turns_to_prod_complete"))
|
| 266 |
+
if city.get("turns_to_prod_complete") is not None
|
| 267 |
+
else None
|
| 268 |
+
),
|
| 269 |
+
production_options=production_options,
|
| 270 |
+
)
|
| 271 |
+
)
|
| 272 |
+
return cities
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
def _build_summary(
|
| 276 |
+
snapshot: RawSnapshot,
|
| 277 |
+
metrics: SnapshotMetrics,
|
| 278 |
+
units: list[UnitSummary],
|
| 279 |
+
cities: list[CitySummary],
|
| 280 |
+
legal_actions: list[LegalAction],
|
| 281 |
+
) -> str:
|
| 282 |
+
player = snapshot.state.get("player", {})
|
| 283 |
+
lines = [
|
| 284 |
+
f"Turn {snapshot.turn}",
|
| 285 |
+
f"Score {metrics.score:.1f}",
|
| 286 |
+
f"Map: {metrics.known_tiles} known tiles, {metrics.visible_tiles} visible tiles",
|
| 287 |
+
f"Economy: {player.get('my_gold', 0)} gold, science rate {player.get('my_science', 0)}%",
|
| 288 |
+
f"Cities: {metrics.city_count}",
|
| 289 |
+
]
|
| 290 |
+
for city in cities[:5]:
|
| 291 |
+
lines.append(
|
| 292 |
+
f"- City {city.city_id}: size {city.size}, food {city.prod_food}/{city.surplus_food:+d}, "
|
| 293 |
+
f"shields {city.prod_shield}/{city.surplus_shield:+d}, trade {city.prod_trade}/{city.surplus_trade:+d}"
|
| 294 |
+
)
|
| 295 |
+
lines.append(f"Units: {metrics.unit_count}")
|
| 296 |
+
for unit in units[:8]:
|
| 297 |
+
lines.append(
|
| 298 |
+
f"- Unit {unit.unit_id}: {unit.unit_type}, hp {unit.health}, moves_left {unit.moves_left}, "
|
| 299 |
+
f"build_city={str(unit.can_build_city).lower()}, move_dirs={unit.move_directions}"
|
| 300 |
+
)
|
| 301 |
+
lines.append(f"Techs researched: {metrics.techs_researched}")
|
| 302 |
+
lines.append(f"Legal actions exposed: {len(legal_actions)}")
|
| 303 |
+
return "\n".join(lines)
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def prepare_observation(
|
| 307 |
+
snapshot: RawSnapshot,
|
| 308 |
+
*,
|
| 309 |
+
reward: float,
|
| 310 |
+
done: bool,
|
| 311 |
+
status: str,
|
| 312 |
+
metadata: dict[str, Any] | None = None,
|
| 313 |
+
) -> PreparedObservation:
|
| 314 |
+
legal_actions, action_refs = _extract_legal_actions(snapshot)
|
| 315 |
+
metrics = extract_metrics(snapshot)
|
| 316 |
+
units = _extract_unit_summaries(snapshot)
|
| 317 |
+
cities = _extract_city_summaries(snapshot)
|
| 318 |
+
observation = FreecivObservation(
|
| 319 |
+
turn=snapshot.turn,
|
| 320 |
+
score=metrics.score,
|
| 321 |
+
known_tiles=metrics.known_tiles,
|
| 322 |
+
visible_tiles=metrics.visible_tiles,
|
| 323 |
+
city_count=metrics.city_count,
|
| 324 |
+
unit_count=metrics.unit_count,
|
| 325 |
+
techs_researched=metrics.techs_researched,
|
| 326 |
+
status=status,
|
| 327 |
+
summary=_build_summary(snapshot, metrics, units, cities, legal_actions),
|
| 328 |
+
units=units,
|
| 329 |
+
cities=cities,
|
| 330 |
+
legal_actions=legal_actions,
|
| 331 |
+
reward=reward,
|
| 332 |
+
done=done,
|
| 333 |
+
metadata=metadata or {},
|
| 334 |
+
)
|
| 335 |
+
return PreparedObservation(observation=observation, metrics=metrics, action_refs=action_refs)
|
freeciv_env/client.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from openenv.core.client_types import StepResult
|
| 4 |
+
from openenv.core.env_client import EnvClient
|
| 5 |
+
|
| 6 |
+
from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class FreecivEnv(EnvClient[FreecivAction, FreecivObservation, FreecivState]):
|
| 10 |
+
def _step_payload(self, action: FreecivAction) -> dict:
|
| 11 |
+
return action.model_dump(exclude_none=True)
|
| 12 |
+
|
| 13 |
+
def _parse_result(self, payload: dict) -> StepResult[FreecivObservation]:
|
| 14 |
+
observation = FreecivObservation(**payload["observation"])
|
| 15 |
+
return StepResult(
|
| 16 |
+
observation=observation,
|
| 17 |
+
reward=payload.get("reward"),
|
| 18 |
+
done=payload.get("done", False),
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def _parse_state(self, payload: dict) -> FreecivState:
|
| 22 |
+
return FreecivState(**payload)
|
freeciv_env/grpo.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from typing import Iterable
|
| 5 |
+
|
| 6 |
+
from freeciv_env.models import FreecivAction, FreecivObservation, LegalAction
|
| 7 |
+
|
| 8 |
+
SYSTEM_PROMPT = (
|
| 9 |
+
"You are choosing the next action for a Freeciv agent. "
|
| 10 |
+
"Return only the integer index of the best legal action. "
|
| 11 |
+
"Do not output words, punctuation, JSON, or explanations."
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
TASK_PROMPT = (
|
| 15 |
+
"Pick the legal action index that maximizes immediate reward. "
|
| 16 |
+
"Invalid actions are penalized. Shorter outputs are better."
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def format_action_line(index: int, action: LegalAction) -> str:
|
| 21 |
+
return f"{index}: {action.label}"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def build_turn_prompt(observation: FreecivObservation, task_prompt: str = TASK_PROMPT) -> str:
|
| 25 |
+
action_lines = [format_action_line(index, action) for index, action in enumerate(observation.legal_actions)]
|
| 26 |
+
return (
|
| 27 |
+
f"{task_prompt}\n\n"
|
| 28 |
+
f"State:\n{observation.summary}\n\n"
|
| 29 |
+
f"Legal actions:\n" + "\n".join(action_lines) + "\n\n"
|
| 30 |
+
"Return exactly one integer index."
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def parse_action_choice(completion_text: str, legal_actions: Iterable[LegalAction]) -> FreecivAction | None:
|
| 35 |
+
legal_actions = list(legal_actions)
|
| 36 |
+
match = re.search(r"-?\d+", completion_text)
|
| 37 |
+
if match is None:
|
| 38 |
+
return None
|
| 39 |
+
index = int(match.group(0))
|
| 40 |
+
if index < 0 or index >= len(legal_actions):
|
| 41 |
+
return None
|
| 42 |
+
action = legal_actions[index]
|
| 43 |
+
if action.action_type == "end_turn":
|
| 44 |
+
return FreecivAction(action_type="end_turn")
|
| 45 |
+
if action.action_type == "move_unit":
|
| 46 |
+
return FreecivAction(action_type="move_unit", unit_id=action.unit_id, direction=action.direction)
|
| 47 |
+
if action.action_type == "build_city":
|
| 48 |
+
return FreecivAction(action_type="build_city", unit_id=action.unit_id)
|
| 49 |
+
if action.action_type == "set_city_production":
|
| 50 |
+
return FreecivAction(action_type="set_city_production", city_id=action.city_id, target=action.target)
|
| 51 |
+
if action.action_type == "set_research":
|
| 52 |
+
return FreecivAction(action_type="set_research", target=action.target)
|
| 53 |
+
raise ValueError(f"unsupported action_type: {action.action_type}")
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def action_priority(action: LegalAction) -> tuple[int, int]:
|
| 57 |
+
if action.action_type == "build_city":
|
| 58 |
+
return (500, 0)
|
| 59 |
+
if action.action_type == "set_research":
|
| 60 |
+
return (400, 0)
|
| 61 |
+
if action.action_type == "set_city_production":
|
| 62 |
+
bonus = 50 if (action.target or "") == "Settlers" else 0
|
| 63 |
+
return (300 + bonus, 0)
|
| 64 |
+
if action.action_type == "move_unit":
|
| 65 |
+
return (200, -(action.direction or 0))
|
| 66 |
+
if action.action_type == "end_turn":
|
| 67 |
+
return (0, 0)
|
| 68 |
+
return (-1000, 0)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def oracle_action_index(legal_actions: Iterable[LegalAction]) -> int:
|
| 73 |
+
legal_actions = list(legal_actions)
|
| 74 |
+
if not legal_actions:
|
| 75 |
+
raise ValueError("no legal actions available")
|
| 76 |
+
best_index = 0
|
| 77 |
+
best_priority = action_priority(legal_actions[0])
|
| 78 |
+
for index, action in enumerate(legal_actions[1:], start=1):
|
| 79 |
+
priority = action_priority(action)
|
| 80 |
+
if priority > best_priority:
|
| 81 |
+
best_index = index
|
| 82 |
+
best_priority = priority
|
| 83 |
+
return best_index
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def reward_from_oracle(completions, best_index, **kwargs):
|
| 88 |
+
del kwargs
|
| 89 |
+
rewards = []
|
| 90 |
+
for completion, expected in zip(completions, best_index):
|
| 91 |
+
match = re.search(r"-?\d+", completion if isinstance(completion, str) else str(completion))
|
| 92 |
+
if match is None:
|
| 93 |
+
rewards.append(-0.25)
|
| 94 |
+
continue
|
| 95 |
+
chosen = int(match.group(0))
|
| 96 |
+
rewards.append(1.0 if chosen == int(expected) else 0.0)
|
| 97 |
+
return rewards
|
freeciv_env/models.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Literal
|
| 4 |
+
|
| 5 |
+
from pydantic import BaseModel, Field, model_validator
|
| 6 |
+
|
| 7 |
+
from openenv.core.env_server.types import Action, Observation, State
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class UnitSummary(BaseModel):
|
| 11 |
+
unit_id: int = Field(..., description="Freeciv unit id")
|
| 12 |
+
unit_type: str = Field(..., description="Ruleset unit type name")
|
| 13 |
+
health: int = Field(0, description="Current health")
|
| 14 |
+
moves_left: int = Field(0, description="Movement points remaining")
|
| 15 |
+
home_city_id: int | None = Field(None, description="Home city id, if any")
|
| 16 |
+
veteran_level: int = Field(0, description="Veteran level")
|
| 17 |
+
can_build_city: bool = Field(False, description="Whether the unit can found a city now")
|
| 18 |
+
move_directions: list[int] = Field(default_factory=list, description="Legal move direction indexes")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class CitySummary(BaseModel):
|
| 22 |
+
city_id: int = Field(..., description="Freeciv city id")
|
| 23 |
+
size: int = Field(..., description="Population size")
|
| 24 |
+
prod_food: int = Field(0, description="Gross food output")
|
| 25 |
+
prod_shield: int = Field(0, description="Gross shield output")
|
| 26 |
+
prod_trade: int = Field(0, description="Gross trade output")
|
| 27 |
+
surplus_food: int = Field(0, description="Net food surplus")
|
| 28 |
+
surplus_shield: int = Field(0, description="Net shield surplus")
|
| 29 |
+
surplus_trade: int = Field(0, description="Net trade surplus")
|
| 30 |
+
production_kind: int | None = Field(None, description="Current production kind enum from Freeciv")
|
| 31 |
+
production_value: int | None = Field(None, description="Current production value id from Freeciv")
|
| 32 |
+
turns_to_complete: float | None = Field(None, description="Turns until current production completes")
|
| 33 |
+
production_options: list[str] = Field(default_factory=list, description="Legal production targets")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class LegalAction(BaseModel):
|
| 37 |
+
action_type: Literal[
|
| 38 |
+
"end_turn",
|
| 39 |
+
"move_unit",
|
| 40 |
+
"build_city",
|
| 41 |
+
"set_city_production",
|
| 42 |
+
"set_research",
|
| 43 |
+
]
|
| 44 |
+
label: str = Field(..., description="Human-readable action label")
|
| 45 |
+
unit_id: int | None = Field(None, description="Target unit id")
|
| 46 |
+
city_id: int | None = Field(None, description="Target city id")
|
| 47 |
+
direction: int | None = Field(None, description="Freeciv direction index 0..7")
|
| 48 |
+
target: str | None = Field(None, description="Production or tech target name")
|
| 49 |
+
raw_action_key: str | None = Field(None, description="Underlying freeciv-bot action key")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class FreecivAction(Action):
|
| 53 |
+
action_type: Literal[
|
| 54 |
+
"end_turn",
|
| 55 |
+
"move_unit",
|
| 56 |
+
"build_city",
|
| 57 |
+
"set_city_production",
|
| 58 |
+
"set_research",
|
| 59 |
+
]
|
| 60 |
+
unit_id: int | None = None
|
| 61 |
+
city_id: int | None = None
|
| 62 |
+
direction: int | None = None
|
| 63 |
+
target: str | None = None
|
| 64 |
+
|
| 65 |
+
@model_validator(mode="after")
|
| 66 |
+
def validate_shape(self) -> "FreecivAction":
|
| 67 |
+
if self.action_type == "end_turn":
|
| 68 |
+
return self
|
| 69 |
+
if self.action_type == "move_unit":
|
| 70 |
+
if self.unit_id is None or self.direction is None:
|
| 71 |
+
raise ValueError("move_unit requires unit_id and direction")
|
| 72 |
+
return self
|
| 73 |
+
if self.action_type == "build_city":
|
| 74 |
+
if self.unit_id is None:
|
| 75 |
+
raise ValueError("build_city requires unit_id")
|
| 76 |
+
return self
|
| 77 |
+
if self.action_type == "set_city_production":
|
| 78 |
+
if self.city_id is None or not self.target:
|
| 79 |
+
raise ValueError("set_city_production requires city_id and target")
|
| 80 |
+
return self
|
| 81 |
+
if self.action_type == "set_research":
|
| 82 |
+
if not self.target:
|
| 83 |
+
raise ValueError("set_research requires target")
|
| 84 |
+
return self
|
| 85 |
+
raise ValueError(f"unsupported action_type: {self.action_type}")
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class FreecivObservation(Observation):
|
| 89 |
+
turn: int = Field(..., description="Current game turn")
|
| 90 |
+
score: float = Field(..., description="Current player score")
|
| 91 |
+
known_tiles: int = Field(..., description="Tiles known to the player")
|
| 92 |
+
visible_tiles: int = Field(..., description="Tiles currently visible to the player")
|
| 93 |
+
city_count: int = Field(..., description="Number of owned cities")
|
| 94 |
+
unit_count: int = Field(..., description="Number of owned units")
|
| 95 |
+
techs_researched: int = Field(..., description="Number of researched techs")
|
| 96 |
+
status: str = Field("ok", description="High-level environment status")
|
| 97 |
+
summary: str = Field(..., description="Compact text summary for LLMs")
|
| 98 |
+
units: list[UnitSummary] = Field(default_factory=list, description="Compact unit summaries")
|
| 99 |
+
cities: list[CitySummary] = Field(default_factory=list, description="Compact city summaries")
|
| 100 |
+
legal_actions: list[LegalAction] = Field(default_factory=list, description="Legal actions exposed by the environment")
|
| 101 |
+
reward: float = Field(0.0, description="Reward from the last action")
|
| 102 |
+
done: bool = Field(False, description="Whether the episode is done")
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
class FreecivState(State):
|
| 106 |
+
turn: int = Field(0, description="Current game turn")
|
| 107 |
+
score: float = Field(0.0, description="Current player score")
|
| 108 |
+
known_tiles: int = Field(0, description="Known tiles")
|
| 109 |
+
visible_tiles: int = Field(0, description="Visible tiles")
|
| 110 |
+
city_count: int = Field(0, description="Owned city count")
|
| 111 |
+
unit_count: int = Field(0, description="Owned unit count")
|
| 112 |
+
techs_researched: int = Field(0, description="Researched tech count")
|
freeciv_env/runtime.py
ADDED
|
@@ -0,0 +1,432 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import asyncio
|
| 4 |
+
import json
|
| 5 |
+
import threading
|
| 6 |
+
import time
|
| 7 |
+
from collections import deque
|
| 8 |
+
from typing import Protocol
|
| 9 |
+
from urllib.parse import urlencode, urlparse
|
| 10 |
+
from urllib.request import Request, urlopen
|
| 11 |
+
|
| 12 |
+
from freeciv_env.adapter import ActionRef, RawSnapshot
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
DEBUG_EVENTS: deque[str] = deque(maxlen=400)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def debug_event(message: str) -> None:
|
| 19 |
+
DEBUG_EVENTS.append(f"{time.strftime('%H:%M:%S')} {message}")
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class FreecivSession(Protocol):
|
| 23 |
+
def reset(self, seed: int | None = None) -> RawSnapshot: ...
|
| 24 |
+
|
| 25 |
+
def apply_action(self, action_ref: ActionRef) -> RawSnapshot: ...
|
| 26 |
+
|
| 27 |
+
def end_turn(self) -> RawSnapshot: ...
|
| 28 |
+
|
| 29 |
+
def close(self) -> None: ...
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class _InteractiveBot:
|
| 33 |
+
def __init__(self, session: "LiveFreecivSession"):
|
| 34 |
+
from freecivbot.bot.base_bot import BaseBot
|
| 35 |
+
|
| 36 |
+
class InteractiveBotImpl(BaseBot):
|
| 37 |
+
def __init__(self, owner: "LiveFreecivSession"):
|
| 38 |
+
super().__init__()
|
| 39 |
+
self._owner = owner
|
| 40 |
+
|
| 41 |
+
def conduct_turn(self, pplayer, info_controls, end_turn_hook):
|
| 42 |
+
super().conduct_turn(pplayer, info_controls, end_turn_hook)
|
| 43 |
+
self._publish_snapshot()
|
| 44 |
+
|
| 45 |
+
def calculate_next_move(self):
|
| 46 |
+
if self._turn_active:
|
| 47 |
+
self._publish_snapshot()
|
| 48 |
+
|
| 49 |
+
def _publish_snapshot(self):
|
| 50 |
+
self._acquire_state()
|
| 51 |
+
self._owner._publish_snapshot(
|
| 52 |
+
RawSnapshot(
|
| 53 |
+
turn=self.turn,
|
| 54 |
+
state=self._turn_state,
|
| 55 |
+
actions=self._turn_opts,
|
| 56 |
+
)
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
self.impl = InteractiveBotImpl(session)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class _ConfiguredCivClient:
|
| 63 |
+
def __init__(self, bot, user_name: str, *, client_port: int, visual_monitor: bool = False):
|
| 64 |
+
from freecivbot.civclient import CivClient
|
| 65 |
+
|
| 66 |
+
class ConfiguredCivClientImpl(CivClient):
|
| 67 |
+
def init_control(self, ws_client):
|
| 68 |
+
self.ws_client = ws_client
|
| 69 |
+
self.init_controller()
|
| 70 |
+
if self.visual_monitor:
|
| 71 |
+
self.monitor.start_monitor()
|
| 72 |
+
login_message = {
|
| 73 |
+
"pid": 4,
|
| 74 |
+
"username": self.user_name,
|
| 75 |
+
"capability": "+Freeciv.Web.Devel-3.3",
|
| 76 |
+
"version_label": "-dev",
|
| 77 |
+
"major_version": 2,
|
| 78 |
+
"minor_version": 5,
|
| 79 |
+
"patch_version": 99,
|
| 80 |
+
"port": self.client_port,
|
| 81 |
+
"password": None,
|
| 82 |
+
"subject": None,
|
| 83 |
+
}
|
| 84 |
+
debug_event(f"sending login username={self.user_name} port={self.client_port}")
|
| 85 |
+
self.ws_client.send(login_message)
|
| 86 |
+
|
| 87 |
+
def handle_chat_msg(self, packet):
|
| 88 |
+
from freecivbot.utils.fc_events import E_UNDEFINED
|
| 89 |
+
|
| 90 |
+
message = packet["message"]
|
| 91 |
+
conn_id = packet["conn_id"]
|
| 92 |
+
event = packet["event"]
|
| 93 |
+
|
| 94 |
+
if message is None:
|
| 95 |
+
return
|
| 96 |
+
if event is None or event < 0 or event >= E_UNDEFINED:
|
| 97 |
+
print("Undefined message event type")
|
| 98 |
+
print(packet)
|
| 99 |
+
print("\r\n")
|
| 100 |
+
packet["event"] = event = E_UNDEFINED
|
| 101 |
+
|
| 102 |
+
if conn_id in self.clstate.connections:
|
| 103 |
+
message = "<b>" + self.clstate.connections[conn_id]["username"] + ":</b>" + message
|
| 104 |
+
else:
|
| 105 |
+
if "/metamessage" in message:
|
| 106 |
+
return
|
| 107 |
+
if "Metaserver message string" in message:
|
| 108 |
+
return
|
| 109 |
+
|
| 110 |
+
packet["message"] = message
|
| 111 |
+
debug_event(f"chat message: {message}")
|
| 112 |
+
print(packet)
|
| 113 |
+
print("\r\n")
|
| 114 |
+
|
| 115 |
+
if "You are logged in as" in message:
|
| 116 |
+
debug_event("logged in; sending /set minplayers 1 and prepare_game")
|
| 117 |
+
self.ws_client.send_message("/set minplayers 1")
|
| 118 |
+
self.prepare_game()
|
| 119 |
+
|
| 120 |
+
def handle_conn_info(self, packet):
|
| 121 |
+
from freecivbot.connectivity.client_state import C_S_PREPARING
|
| 122 |
+
from freecivbot.utils.freecivlog import freelog
|
| 123 |
+
|
| 124 |
+
pconn = self.clstate.find_conn_by_id(packet["id"])
|
| 125 |
+
|
| 126 |
+
if not packet["used"]:
|
| 127 |
+
if pconn is None:
|
| 128 |
+
freelog(f"Server removed unknown connection {packet['id']}")
|
| 129 |
+
return
|
| 130 |
+
self.clstate.client_remove_cli_conn(pconn)
|
| 131 |
+
pconn = None
|
| 132 |
+
else:
|
| 133 |
+
pplayer = self.player_ctrl.valid_player_by_number(packet["player_num"])
|
| 134 |
+
if pplayer is None:
|
| 135 |
+
return
|
| 136 |
+
packet["playing"] = pplayer
|
| 137 |
+
|
| 138 |
+
if self.clstate.has_id(packet["id"]):
|
| 139 |
+
self.clstate.init_state(packet)
|
| 140 |
+
|
| 141 |
+
self.clstate.conn_list_append(packet)
|
| 142 |
+
|
| 143 |
+
if self.clstate.has_id(packet["id"]) and self.clstate.cur_player() != packet["playing"]:
|
| 144 |
+
self.clstate.set_client_state(C_S_PREPARING)
|
| 145 |
+
|
| 146 |
+
self.impl = ConfiguredCivClientImpl(
|
| 147 |
+
bot,
|
| 148 |
+
user_name,
|
| 149 |
+
client_port=client_port,
|
| 150 |
+
visual_monitor=visual_monitor,
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
class _ConfiguredCivConnection:
|
| 155 |
+
def __init__(self, civ_client, base_url: str, *, owner: "LiveFreecivSession", wait_for_server: int = 120, retry_interval: int = 5):
|
| 156 |
+
from math import ceil
|
| 157 |
+
|
| 158 |
+
import websocket
|
| 159 |
+
|
| 160 |
+
self._websocket = websocket
|
| 161 |
+
self.client = civ_client
|
| 162 |
+
self.base_url = base_url
|
| 163 |
+
self._owner = owner
|
| 164 |
+
self._loop = None
|
| 165 |
+
self._owner._connection = self
|
| 166 |
+
self.civserverport = self._reserve_client_port(base_url, civ_client.client_port)
|
| 167 |
+
self.client.client_port = self.civserverport
|
| 168 |
+
self.proxyport = 1000 + self.civserverport
|
| 169 |
+
debug_event(f"reserved civ port={self.civserverport} proxyport={self.proxyport}")
|
| 170 |
+
self._retry_interval = retry_interval
|
| 171 |
+
self._num_retries = int(ceil(wait_for_server / retry_interval))
|
| 172 |
+
self._cur_retry = 0
|
| 173 |
+
self._ws_url = self._build_ws_url(base_url)
|
| 174 |
+
self.network_init()
|
| 175 |
+
|
| 176 |
+
def _build_ws_url(self, base_url: str) -> str:
|
| 177 |
+
parsed = urlparse(base_url)
|
| 178 |
+
scheme = "wss" if parsed.scheme == "https" else "ws"
|
| 179 |
+
host = parsed.hostname or "localhost"
|
| 180 |
+
return f"{scheme}://{host}:{self.proxyport}/civsocket/{self.proxyport}"
|
| 181 |
+
|
| 182 |
+
def _reserve_client_port(self, base_url: str, requested_port: int) -> int:
|
| 183 |
+
parsed = urlparse(base_url)
|
| 184 |
+
scheme = parsed.scheme or "http"
|
| 185 |
+
host = parsed.hostname or "localhost"
|
| 186 |
+
port = parsed.port
|
| 187 |
+
if port is None:
|
| 188 |
+
port = 443 if scheme == "https" else 80
|
| 189 |
+
query = urlencode({"civserverport": requested_port})
|
| 190 |
+
launcher_url = f"{scheme}://{host}:{port}/civclientlauncher?{query}"
|
| 191 |
+
origin = f"{scheme}://{host}:{port}"
|
| 192 |
+
request = Request(
|
| 193 |
+
launcher_url,
|
| 194 |
+
method="POST",
|
| 195 |
+
headers={
|
| 196 |
+
"User-Agent": "Mozilla/5.0",
|
| 197 |
+
"Origin": origin,
|
| 198 |
+
"Referer": origin + "/",
|
| 199 |
+
},
|
| 200 |
+
)
|
| 201 |
+
with urlopen(request, timeout=10) as response:
|
| 202 |
+
result = response.headers.get("result")
|
| 203 |
+
reserved_port = response.headers.get("port")
|
| 204 |
+
debug_event(
|
| 205 |
+
f"launcher response status={response.status} result={result} port={reserved_port} body={response.read(200).decode('utf-8', 'ignore')}"
|
| 206 |
+
)
|
| 207 |
+
if result != "success" or reserved_port is None:
|
| 208 |
+
raise RuntimeError(f"failed to reserve freeciv client port via {launcher_url}")
|
| 209 |
+
return int(reserved_port)
|
| 210 |
+
|
| 211 |
+
def _retry(self):
|
| 212 |
+
self._cur_retry += 1
|
| 213 |
+
time.sleep(self._retry_interval)
|
| 214 |
+
return self._detect_server_up()
|
| 215 |
+
|
| 216 |
+
def _detect_server_up(self):
|
| 217 |
+
ws = self._websocket.WebSocket()
|
| 218 |
+
try:
|
| 219 |
+
debug_event(f"probing websocket {self._ws_url}")
|
| 220 |
+
ws.connect(self._ws_url, timeout=10)
|
| 221 |
+
debug_event("websocket probe succeeded")
|
| 222 |
+
return True
|
| 223 |
+
except Exception as err:
|
| 224 |
+
debug_event(f"websocket probe failed: {err!r}")
|
| 225 |
+
print("Connect not successful:", err, " retrying in %s seconds." % self._retry_interval)
|
| 226 |
+
if self._cur_retry < self._num_retries:
|
| 227 |
+
return self._retry()
|
| 228 |
+
return False
|
| 229 |
+
finally:
|
| 230 |
+
try:
|
| 231 |
+
ws.close()
|
| 232 |
+
except Exception:
|
| 233 |
+
pass
|
| 234 |
+
|
| 235 |
+
def network_init(self):
|
| 236 |
+
self._cur_retry = 0
|
| 237 |
+
print("Connecting to server at %s ..." % self.base_url)
|
| 238 |
+
if self._detect_server_up():
|
| 239 |
+
self.websocket_init()
|
| 240 |
+
else:
|
| 241 |
+
print("Connection could not be established!")
|
| 242 |
+
|
| 243 |
+
def websocket_init(self):
|
| 244 |
+
from tornado import ioloop
|
| 245 |
+
|
| 246 |
+
from freecivbot.connectivity.clinet import CivWSClient
|
| 247 |
+
|
| 248 |
+
asyncio.set_event_loop(asyncio.new_event_loop())
|
| 249 |
+
ioloop.IOLoop.clear_current()
|
| 250 |
+
self._loop = ioloop.IOLoop.current()
|
| 251 |
+
|
| 252 |
+
debug_event(f"starting tornado websocket client for {self._ws_url}")
|
| 253 |
+
client = CivWSClient(self.client)
|
| 254 |
+
|
| 255 |
+
def send_json(data):
|
| 256 |
+
if not client._ws_connection:
|
| 257 |
+
raise RuntimeError("Web socket connection is closed.")
|
| 258 |
+
msg = json.dumps(data, separators=(",", ":"))
|
| 259 |
+
client._ws_connection.write_message(msg)
|
| 260 |
+
|
| 261 |
+
client.send = send_json
|
| 262 |
+
client.connect(self._ws_url)
|
| 263 |
+
|
| 264 |
+
try:
|
| 265 |
+
self._loop.start()
|
| 266 |
+
except KeyboardInterrupt:
|
| 267 |
+
client.close()
|
| 268 |
+
|
| 269 |
+
def submit(self, fn) -> None:
|
| 270 |
+
if self._loop is None:
|
| 271 |
+
raise RuntimeError("freeciv connection loop is not ready")
|
| 272 |
+
done = threading.Event()
|
| 273 |
+
error: BaseException | None = None
|
| 274 |
+
|
| 275 |
+
def run():
|
| 276 |
+
nonlocal error
|
| 277 |
+
try:
|
| 278 |
+
fn()
|
| 279 |
+
except BaseException as exc:
|
| 280 |
+
error = exc
|
| 281 |
+
finally:
|
| 282 |
+
done.set()
|
| 283 |
+
|
| 284 |
+
self._loop.add_callback(run)
|
| 285 |
+
if not done.wait(timeout=10):
|
| 286 |
+
raise TimeoutError("timed out dispatching action to freeciv loop")
|
| 287 |
+
if error is not None:
|
| 288 |
+
raise error
|
| 289 |
+
|
| 290 |
+
def close(self) -> None:
|
| 291 |
+
if self._loop is None:
|
| 292 |
+
return
|
| 293 |
+
self.submit(self.client.close)
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
class LiveFreecivSession:
|
| 297 |
+
def __init__(
|
| 298 |
+
self,
|
| 299 |
+
*,
|
| 300 |
+
username: str = "openenvbot",
|
| 301 |
+
client_port: int = 6000,
|
| 302 |
+
base_url: str = "http://localhost",
|
| 303 |
+
turn_timeout_s: float = 60.0,
|
| 304 |
+
):
|
| 305 |
+
self.username = username
|
| 306 |
+
self.client_port = client_port
|
| 307 |
+
self.base_url = base_url
|
| 308 |
+
self.turn_timeout_s = turn_timeout_s
|
| 309 |
+
|
| 310 |
+
self._bot_wrapper: _InteractiveBot | None = None
|
| 311 |
+
self._client = None
|
| 312 |
+
self._connection: _ConfiguredCivConnection | None = None
|
| 313 |
+
self._thread: threading.Thread | None = None
|
| 314 |
+
self._ready = threading.Event()
|
| 315 |
+
self._snapshot_lock = threading.Lock()
|
| 316 |
+
self._snapshot: RawSnapshot | None = None
|
| 317 |
+
self._thread_error: BaseException | None = None
|
| 318 |
+
self._reset_counter = 0
|
| 319 |
+
self._session_seed = time.monotonic_ns() % 1_000_000
|
| 320 |
+
|
| 321 |
+
def reset(self, seed: int | None = None) -> RawSnapshot:
|
| 322 |
+
del seed
|
| 323 |
+
self.close()
|
| 324 |
+
self._reset_counter += 1
|
| 325 |
+
username = self._next_username()
|
| 326 |
+
client_port = self.client_port + ((self._session_seed + self._reset_counter - 1) % 3)
|
| 327 |
+
|
| 328 |
+
self._ready.clear()
|
| 329 |
+
self._thread_error = None
|
| 330 |
+
self._snapshot = None
|
| 331 |
+
|
| 332 |
+
self._bot_wrapper = _InteractiveBot(self)
|
| 333 |
+
self._client = _ConfiguredCivClient(
|
| 334 |
+
self._bot_wrapper.impl,
|
| 335 |
+
username,
|
| 336 |
+
client_port=client_port,
|
| 337 |
+
visual_monitor=False,
|
| 338 |
+
).impl
|
| 339 |
+
|
| 340 |
+
def run() -> None:
|
| 341 |
+
try:
|
| 342 |
+
debug_event(f"session thread starting username={username} base_url={self.base_url} client_port={client_port}")
|
| 343 |
+
_ConfiguredCivConnection(self._client, self.base_url, owner=self)
|
| 344 |
+
except BaseException as exc: # pragma: no cover - surfaced in waiters
|
| 345 |
+
debug_event(f"session thread error: {exc!r}")
|
| 346 |
+
self._thread_error = exc
|
| 347 |
+
self._ready.set()
|
| 348 |
+
|
| 349 |
+
self._thread = threading.Thread(target=run, name="freeciv-live-session", daemon=True)
|
| 350 |
+
self._thread.start()
|
| 351 |
+
return self._wait_for_snapshot("reset")
|
| 352 |
+
|
| 353 |
+
def apply_action(self, action_ref: ActionRef) -> RawSnapshot:
|
| 354 |
+
snapshot = self._require_snapshot()
|
| 355 |
+
action_list = snapshot.actions[action_ref.controller]
|
| 356 |
+
valid_actions = action_list.get_actions(action_ref.actor_id, valid_only=True)
|
| 357 |
+
action = None if valid_actions is None else valid_actions.get(action_ref.raw_action_key)
|
| 358 |
+
if action is None:
|
| 359 |
+
raise ValueError(
|
| 360 |
+
f"action {action_ref.raw_action_key} is no longer valid for {action_ref.controller}:{action_ref.actor_id}"
|
| 361 |
+
)
|
| 362 |
+
self._ready.clear()
|
| 363 |
+
connection = self._require_connection()
|
| 364 |
+
connection.submit(lambda: action_list.trigger_validated_action(action))
|
| 365 |
+
return self._wait_for_snapshot(action_ref.raw_action_key)
|
| 366 |
+
|
| 367 |
+
def end_turn(self) -> RawSnapshot:
|
| 368 |
+
if self._bot_wrapper is None:
|
| 369 |
+
raise RuntimeError("session has not been reset")
|
| 370 |
+
self._ready.clear()
|
| 371 |
+
connection = self._require_connection()
|
| 372 |
+
connection.submit(self._bot_wrapper.impl.end_turn)
|
| 373 |
+
return self._wait_for_snapshot("end_turn")
|
| 374 |
+
|
| 375 |
+
def close(self) -> None:
|
| 376 |
+
debug_event("closing live session")
|
| 377 |
+
if self._connection is not None:
|
| 378 |
+
try:
|
| 379 |
+
self._connection.close()
|
| 380 |
+
except Exception:
|
| 381 |
+
pass
|
| 382 |
+
elif self._client is not None:
|
| 383 |
+
try:
|
| 384 |
+
self._client.close()
|
| 385 |
+
except Exception:
|
| 386 |
+
pass
|
| 387 |
+
if self._thread is not None and self._thread.is_alive():
|
| 388 |
+
self._thread.join(timeout=5)
|
| 389 |
+
self._bot_wrapper = None
|
| 390 |
+
self._client = None
|
| 391 |
+
self._connection = None
|
| 392 |
+
self._thread = None
|
| 393 |
+
self._snapshot = None
|
| 394 |
+
self._thread_error = None
|
| 395 |
+
self._ready.clear()
|
| 396 |
+
|
| 397 |
+
def _publish_snapshot(self, snapshot: RawSnapshot) -> None:
|
| 398 |
+
debug_event(f"snapshot published turn={snapshot.turn}")
|
| 399 |
+
with self._snapshot_lock:
|
| 400 |
+
self._snapshot = snapshot
|
| 401 |
+
self._ready.set()
|
| 402 |
+
|
| 403 |
+
def _next_username(self) -> str:
|
| 404 |
+
suffix = str(self._session_seed + self._reset_counter)
|
| 405 |
+
prefix_len = max(1, 31 - len(suffix))
|
| 406 |
+
return f"{self.username[:prefix_len]}{suffix}"
|
| 407 |
+
|
| 408 |
+
def _require_connection(self) -> _ConfiguredCivConnection:
|
| 409 |
+
if self._connection is None:
|
| 410 |
+
raise RuntimeError("freeciv connection is not ready")
|
| 411 |
+
return self._connection
|
| 412 |
+
|
| 413 |
+
def _require_snapshot(self) -> RawSnapshot:
|
| 414 |
+
with self._snapshot_lock:
|
| 415 |
+
if self._snapshot is None:
|
| 416 |
+
raise RuntimeError("no live snapshot is available")
|
| 417 |
+
return self._snapshot
|
| 418 |
+
|
| 419 |
+
def _wait_for_snapshot(self, reason: str) -> RawSnapshot:
|
| 420 |
+
deadline = time.monotonic() + self.turn_timeout_s
|
| 421 |
+
debug_event(f"waiting for snapshot reason={reason} timeout={self.turn_timeout_s}")
|
| 422 |
+
while time.monotonic() < deadline:
|
| 423 |
+
if self._thread_error is not None:
|
| 424 |
+
raise RuntimeError(f"freeciv session failed during {reason}") from self._thread_error
|
| 425 |
+
if self._ready.wait(timeout=0.1):
|
| 426 |
+
if self._thread_error is not None:
|
| 427 |
+
raise RuntimeError(f"freeciv session failed during {reason}") from self._thread_error
|
| 428 |
+
snapshot = self._require_snapshot()
|
| 429 |
+
if snapshot is not None:
|
| 430 |
+
return snapshot
|
| 431 |
+
debug_event(f"snapshot wait timed out reason={reason}")
|
| 432 |
+
raise TimeoutError(f"timed out waiting for freeciv snapshot during {reason}")
|
freeciv_env/server/Dockerfile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM omkarasoftware/freeciv-web:latest
|
| 2 |
+
|
| 3 |
+
USER root
|
| 4 |
+
RUN apt-get update && apt-get install -y \
|
| 5 |
+
git \
|
| 6 |
+
curl \
|
| 7 |
+
ca-certificates \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
RUN mkdir -p /app/env && chown -R docker:docker /app
|
| 10 |
+
|
| 11 |
+
USER docker
|
| 12 |
+
ENV HOME=/home/docker
|
| 13 |
+
WORKDIR /app/env
|
| 14 |
+
|
| 15 |
+
COPY --chown=docker:docker . /app/env
|
| 16 |
+
RUN chmod +x /app/env/scripts/start_space.sh
|
| 17 |
+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
|
| 18 |
+
ENV PATH="/app/env/.venv/bin:/home/docker/.local/bin:$PATH"
|
| 19 |
+
RUN uv python install 3.11
|
| 20 |
+
RUN uv venv --python 3.11 /app/env/.venv
|
| 21 |
+
RUN UV_PROJECT_ENVIRONMENT=/app/env/.venv uv sync --frozen --no-dev --no-editable
|
| 22 |
+
|
| 23 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 24 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 25 |
+
ENV FREECIV_SERVER_URL=http://127.0.0.1
|
| 26 |
+
ENV FREECIV_TURN_TIMEOUT_S=120
|
| 27 |
+
|
| 28 |
+
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=10 \
|
| 29 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 30 |
+
|
| 31 |
+
CMD ["/app/env/scripts/start_space.sh"]
|
freeciv_env/server/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from freeciv_env.server.freeciv_environment import FreecivEnvironment
|
| 2 |
+
|
| 3 |
+
__all__ = ["FreecivEnvironment"]
|
freeciv_env/server/app.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import glob
|
| 4 |
+
import os
|
| 5 |
+
from urllib.request import Request, urlopen
|
| 6 |
+
|
| 7 |
+
from fastapi import Query
|
| 8 |
+
from openenv.core.env_server import create_app
|
| 9 |
+
|
| 10 |
+
from freeciv_env.adapter import prepare_observation
|
| 11 |
+
from freeciv_env.models import FreecivAction, FreecivObservation
|
| 12 |
+
from freeciv_env.runtime import DEBUG_EVENTS, LiveFreecivSession
|
| 13 |
+
from freeciv_env.server.freeciv_environment import FreecivEnvironment
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def create_live_session() -> LiveFreecivSession:
|
| 17 |
+
return LiveFreecivSession(
|
| 18 |
+
username=os.getenv("FREECIV_USERNAME", "openenvbot"),
|
| 19 |
+
client_port=int(os.getenv("FREECIV_CLIENT_PORT", "6000")),
|
| 20 |
+
base_url=os.getenv("FREECIV_SERVER_URL", "http://127.0.0.1"),
|
| 21 |
+
turn_timeout_s=float(os.getenv("FREECIV_TURN_TIMEOUT_S", "60")),
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def create_freeciv_app(*, session_factory=create_live_session, max_turns: int | None = None):
|
| 26 |
+
if max_turns is None:
|
| 27 |
+
max_turns = int(os.getenv("FREECIV_MAX_TURNS", "50"))
|
| 28 |
+
return create_app(
|
| 29 |
+
lambda: FreecivEnvironment(session_factory=session_factory, max_turns=max_turns),
|
| 30 |
+
FreecivAction,
|
| 31 |
+
FreecivObservation,
|
| 32 |
+
env_name="freeciv_env",
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
app = create_freeciv_app()
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@app.get("/debug/internal-status")
|
| 40 |
+
def debug_internal_status() -> dict:
|
| 41 |
+
checks = []
|
| 42 |
+
for name, method, url in [
|
| 43 |
+
("nginx", "GET", "http://127.0.0.1/"),
|
| 44 |
+
("publite2", "GET", "http://127.0.0.1/pubstatus"),
|
| 45 |
+
("tomcat", "GET", "http://127.0.0.1:8080/freeciv-web/"),
|
| 46 |
+
("proxy7000", "GET", "http://127.0.0.1/civsocket/7000/status"),
|
| 47 |
+
("proxy7001", "GET", "http://127.0.0.1/civsocket/7001/status"),
|
| 48 |
+
("proxy7002", "GET", "http://127.0.0.1/civsocket/7002/status"),
|
| 49 |
+
("launcher", "POST", "http://127.0.0.1/civclientlauncher?civserverport=6000"),
|
| 50 |
+
]:
|
| 51 |
+
try:
|
| 52 |
+
request = Request(url, method=method)
|
| 53 |
+
with urlopen(request, timeout=10) as response:
|
| 54 |
+
body = response.read(200).decode("utf-8", "ignore")
|
| 55 |
+
checks.append(
|
| 56 |
+
{
|
| 57 |
+
"name": name,
|
| 58 |
+
"ok": True,
|
| 59 |
+
"status": response.status,
|
| 60 |
+
"body": body,
|
| 61 |
+
}
|
| 62 |
+
)
|
| 63 |
+
except Exception as exc:
|
| 64 |
+
checks.append({"name": name, "ok": False, "error": repr(exc)})
|
| 65 |
+
return {"checks": checks}
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
@app.get("/debug/live-log")
|
| 69 |
+
def debug_live_log() -> dict:
|
| 70 |
+
return {"events": list(DEBUG_EVENTS)}
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
@app.get("/debug/freeciv-logs")
|
| 74 |
+
def debug_freeciv_logs() -> dict:
|
| 75 |
+
logs = {}
|
| 76 |
+
for path in sorted(glob.glob("/docker/logs/*.log"))[-12:]:
|
| 77 |
+
try:
|
| 78 |
+
with open(path, "r", encoding="utf-8", errors="ignore") as handle:
|
| 79 |
+
lines = handle.readlines()[-80:]
|
| 80 |
+
logs[path] = "".join(lines)
|
| 81 |
+
except Exception as exc:
|
| 82 |
+
logs[path] = repr(exc)
|
| 83 |
+
return {"logs": logs}
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
@app.get("/debug/startup-log")
|
| 87 |
+
def debug_startup_log() -> dict:
|
| 88 |
+
path = "/tmp/start_space.log"
|
| 89 |
+
try:
|
| 90 |
+
with open(path, "r", encoding="utf-8", errors="ignore") as handle:
|
| 91 |
+
lines = handle.readlines()[-120:]
|
| 92 |
+
return {"path": path, "log": "".join(lines)}
|
| 93 |
+
except Exception as exc:
|
| 94 |
+
return {"path": path, "error": repr(exc)}
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
@app.post("/debug/live-reset")
|
| 98 |
+
def debug_live_reset(timeout_s: float = Query(default=120.0, ge=10.0, le=300.0)) -> dict:
|
| 99 |
+
session = create_live_session()
|
| 100 |
+
session.turn_timeout_s = timeout_s
|
| 101 |
+
try:
|
| 102 |
+
reset_snapshot = session.reset()
|
| 103 |
+
reset_observation = prepare_observation(
|
| 104 |
+
reset_snapshot,
|
| 105 |
+
reward=0.0,
|
| 106 |
+
done=False,
|
| 107 |
+
status="ready",
|
| 108 |
+
metadata={},
|
| 109 |
+
).observation
|
| 110 |
+
next_snapshot = session.end_turn()
|
| 111 |
+
next_observation = prepare_observation(
|
| 112 |
+
next_snapshot,
|
| 113 |
+
reward=0.0,
|
| 114 |
+
done=False,
|
| 115 |
+
status="ok",
|
| 116 |
+
metadata={},
|
| 117 |
+
).observation
|
| 118 |
+
return {
|
| 119 |
+
"ok": True,
|
| 120 |
+
"reset": {
|
| 121 |
+
"turn": reset_observation.turn,
|
| 122 |
+
"legal_actions": len(reset_observation.legal_actions),
|
| 123 |
+
"summary": reset_observation.summary,
|
| 124 |
+
},
|
| 125 |
+
"step": {
|
| 126 |
+
"turn": next_observation.turn,
|
| 127 |
+
"legal_actions": len(next_observation.legal_actions),
|
| 128 |
+
"summary": next_observation.summary,
|
| 129 |
+
},
|
| 130 |
+
}
|
| 131 |
+
except Exception as exc:
|
| 132 |
+
return {"ok": False, "error": repr(exc)}
|
| 133 |
+
finally:
|
| 134 |
+
session.close()
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def main() -> None:
|
| 138 |
+
import uvicorn
|
| 139 |
+
|
| 140 |
+
uvicorn.run(app, host="0.0.0.0", port=8000, ws_ping_interval=300, ws_ping_timeout=300)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
if __name__ == "__main__":
|
| 144 |
+
main()
|
freeciv_env/server/freeciv_environment.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Callable
|
| 4 |
+
from uuid import uuid4
|
| 5 |
+
|
| 6 |
+
from openenv.core.env_server.interfaces import Environment
|
| 7 |
+
|
| 8 |
+
from freeciv_env.adapter import (
|
| 9 |
+
ActionLookupKey,
|
| 10 |
+
ActionRef,
|
| 11 |
+
PreparedObservation,
|
| 12 |
+
RawSnapshot,
|
| 13 |
+
SnapshotMetrics,
|
| 14 |
+
action_lookup_key,
|
| 15 |
+
prepare_observation,
|
| 16 |
+
)
|
| 17 |
+
from freeciv_env.models import FreecivAction, FreecivObservation, FreecivState
|
| 18 |
+
from freeciv_env.runtime import FreecivSession
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class FreecivEnvironment(Environment[FreecivAction, FreecivObservation, FreecivState]):
|
| 22 |
+
SUPPORTS_CONCURRENT_SESSIONS = False
|
| 23 |
+
|
| 24 |
+
def __init__(self, session_factory: Callable[[], FreecivSession], max_turns: int = 50):
|
| 25 |
+
super().__init__()
|
| 26 |
+
self._session_factory = session_factory
|
| 27 |
+
self.max_turns = max_turns
|
| 28 |
+
self._session: FreecivSession | None = None
|
| 29 |
+
self._snapshot: RawSnapshot | None = None
|
| 30 |
+
self._metrics: SnapshotMetrics | None = None
|
| 31 |
+
self._action_refs: dict[ActionLookupKey, ActionRef] = {}
|
| 32 |
+
self._state = FreecivState(episode_id=str(uuid4()), step_count=0)
|
| 33 |
+
|
| 34 |
+
def reset(
|
| 35 |
+
self,
|
| 36 |
+
seed: int | None = None,
|
| 37 |
+
episode_id: str | None = None,
|
| 38 |
+
**kwargs,
|
| 39 |
+
) -> FreecivObservation:
|
| 40 |
+
del kwargs
|
| 41 |
+
self.close()
|
| 42 |
+
self._session = self._session_factory()
|
| 43 |
+
snapshot = self._session.reset(seed=seed)
|
| 44 |
+
prepared = prepare_observation(
|
| 45 |
+
snapshot,
|
| 46 |
+
reward=0.0,
|
| 47 |
+
done=self._is_done(snapshot),
|
| 48 |
+
status="ready",
|
| 49 |
+
metadata={},
|
| 50 |
+
)
|
| 51 |
+
self._commit(snapshot, prepared, episode_id=episode_id or str(uuid4()))
|
| 52 |
+
return prepared.observation
|
| 53 |
+
|
| 54 |
+
def step(
|
| 55 |
+
self,
|
| 56 |
+
action: FreecivAction,
|
| 57 |
+
timeout_s: float | None = None,
|
| 58 |
+
**kwargs,
|
| 59 |
+
) -> FreecivObservation:
|
| 60 |
+
del timeout_s, kwargs
|
| 61 |
+
if self._session is None or self._snapshot is None or self._metrics is None:
|
| 62 |
+
raise RuntimeError("environment must be reset before step")
|
| 63 |
+
|
| 64 |
+
self._state.step_count += 1
|
| 65 |
+
if action.action_type == "end_turn":
|
| 66 |
+
next_snapshot = self._session.end_turn()
|
| 67 |
+
reward = self._reward_for_transition(action, self._metrics, next_snapshot)
|
| 68 |
+
prepared = prepare_observation(
|
| 69 |
+
next_snapshot,
|
| 70 |
+
reward=reward,
|
| 71 |
+
done=self._is_done(next_snapshot),
|
| 72 |
+
status="ok",
|
| 73 |
+
metadata={},
|
| 74 |
+
)
|
| 75 |
+
self._commit(next_snapshot, prepared, episode_id=self._state.episode_id)
|
| 76 |
+
return prepared.observation
|
| 77 |
+
|
| 78 |
+
ref = self._action_refs.get(action_lookup_key(action))
|
| 79 |
+
if ref is None:
|
| 80 |
+
prepared = prepare_observation(
|
| 81 |
+
self._snapshot,
|
| 82 |
+
reward=-0.25,
|
| 83 |
+
done=self._is_done(self._snapshot),
|
| 84 |
+
status="invalid_action",
|
| 85 |
+
metadata={"error": "action is not currently legal"},
|
| 86 |
+
)
|
| 87 |
+
self._commit(self._snapshot, prepared, episode_id=self._state.episode_id, replace_snapshot=False)
|
| 88 |
+
return prepared.observation
|
| 89 |
+
|
| 90 |
+
next_snapshot = self._session.apply_action(ref)
|
| 91 |
+
reward = self._reward_for_transition(action, self._metrics, next_snapshot)
|
| 92 |
+
prepared = prepare_observation(
|
| 93 |
+
next_snapshot,
|
| 94 |
+
reward=reward,
|
| 95 |
+
done=self._is_done(next_snapshot),
|
| 96 |
+
status="ok",
|
| 97 |
+
metadata={},
|
| 98 |
+
)
|
| 99 |
+
self._commit(next_snapshot, prepared, episode_id=self._state.episode_id)
|
| 100 |
+
return prepared.observation
|
| 101 |
+
|
| 102 |
+
@property
|
| 103 |
+
def state(self) -> FreecivState:
|
| 104 |
+
return self._state
|
| 105 |
+
|
| 106 |
+
def close(self) -> None:
|
| 107 |
+
if self._session is not None:
|
| 108 |
+
self._session.close()
|
| 109 |
+
self._session = None
|
| 110 |
+
self._snapshot = None
|
| 111 |
+
self._metrics = None
|
| 112 |
+
self._action_refs = {}
|
| 113 |
+
|
| 114 |
+
def _commit(
|
| 115 |
+
self,
|
| 116 |
+
snapshot: RawSnapshot,
|
| 117 |
+
prepared: PreparedObservation,
|
| 118 |
+
*,
|
| 119 |
+
episode_id: str,
|
| 120 |
+
replace_snapshot: bool = True,
|
| 121 |
+
) -> None:
|
| 122 |
+
if replace_snapshot:
|
| 123 |
+
self._snapshot = snapshot
|
| 124 |
+
self._metrics = prepared.metrics
|
| 125 |
+
self._action_refs = prepared.action_refs
|
| 126 |
+
self._state = FreecivState(
|
| 127 |
+
episode_id=episode_id,
|
| 128 |
+
step_count=self._state.step_count,
|
| 129 |
+
turn=prepared.observation.turn,
|
| 130 |
+
score=prepared.observation.score,
|
| 131 |
+
known_tiles=prepared.observation.known_tiles,
|
| 132 |
+
visible_tiles=prepared.observation.visible_tiles,
|
| 133 |
+
city_count=prepared.observation.city_count,
|
| 134 |
+
unit_count=prepared.observation.unit_count,
|
| 135 |
+
techs_researched=prepared.observation.techs_researched,
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
def _reward_for_transition(
|
| 139 |
+
self,
|
| 140 |
+
action: FreecivAction,
|
| 141 |
+
previous: SnapshotMetrics,
|
| 142 |
+
next_snapshot: RawSnapshot,
|
| 143 |
+
) -> float:
|
| 144 |
+
from freeciv_env.adapter import extract_metrics
|
| 145 |
+
|
| 146 |
+
current = extract_metrics(next_snapshot)
|
| 147 |
+
reward = {
|
| 148 |
+
"end_turn": 0.0,
|
| 149 |
+
"move_unit": 0.01,
|
| 150 |
+
"build_city": 0.10,
|
| 151 |
+
"set_city_production": 0.05,
|
| 152 |
+
"set_research": 0.05,
|
| 153 |
+
}[action.action_type]
|
| 154 |
+
reward += max(current.score - previous.score, 0.0) * 0.02
|
| 155 |
+
reward += max(current.known_tiles - previous.known_tiles, 0) * 0.01
|
| 156 |
+
reward += max(current.city_count - previous.city_count, 0) * 0.50
|
| 157 |
+
reward += max(current.techs_researched - previous.techs_researched, 0) * 0.25
|
| 158 |
+
return float(reward)
|
| 159 |
+
|
| 160 |
+
def _is_done(self, snapshot: RawSnapshot) -> bool:
|
| 161 |
+
player = snapshot.state.get("player", {})
|
| 162 |
+
alive = bool(player.get("my_is_alive", True))
|
| 163 |
+
return (not alive) or snapshot.turn >= self.max_turns
|
freeciv_rl_training_curve.png
ADDED
|
hackathon.md
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## **OpenEnv Hackathon Participant Guide**
|
| 2 |
+
|
| 3 |
+
Welcome to the [OpenEnv Hackathon](https://cerebralvalley.ai/e/open-env-hackathon), hacker! 👋 We’re thrilled to have you on board.
|
| 4 |
+
|
| 5 |
+
This guide is your all-in-one resource for the event, including schedule, rules, technical resources, problem statements, judging information, and more. Please read this carefully; most answers can be found here.
|
| 6 |
+
|
| 7 |
+
## **1. Join the [PyTorch Discord Server](https://discord.gg/VBcf6VtfY6)**
|
| 8 |
+
|
| 9 |
+
- You’ll be given a Hackathon Participant role by an admin, which will give you access to the hackathon-specific channels.
|
| 10 |
+
|
| 11 |
+
- Here, you’ll be able to interact with hackers and sponsors, introduce yourselves, and form teams (for a maximum team size of **3**).
|
| 12 |
+
|
| 13 |
+
- If you don't receive your role within **24 hours of joining,** please ping @CV.
|
| 14 |
+
|
| 15 |
+
- Please submit your Discord username below so we can grant you the role
|
| 16 |
+
|
| 17 |
+
[linkEmbed]
|
| 18 |
+
|
| 19 |
+
## **2. Location**
|
| 20 |
+
|
| 21 |
+
**|** Shack15 (1 Ferry Building, Suite 201, San Francisco CA. 94111)
|
| 22 |
+
|
| 23 |
+
- **Venue Access:** Shack15 is on the 2nd floor of the Ferry Building. Go up the Ferry Building elevator to the second floor, and turn left. Here you will see the main entrance to Shack15.
|
| 24 |
+
|
| 25 |
+
- **Parking:** Parking near the Ferry Building is extremely limited. Consider parking farther out and taking Uber, Lyft, or Public Transportation.
|
| 26 |
+
|
| 27 |
+
[youtube]
|
| 28 |
+
|
| 29 |
+
## **3. WiFi Information**
|
| 30 |
+
|
| 31 |
+
- **Username:** SHACK15_Members
|
| 32 |
+
|
| 33 |
+
- **Password:** M3mb3r$4L!f3
|
| 34 |
+
|
| 35 |
+
## **4. Hackathon Schedule**
|
| 36 |
+
|
| 37 |
+
**Saturday, March 7 (Outline)**
|
| 38 |
+
|
| 39 |
+
- **9:00 AM:** Doors Open • Breakfast Served • Team Formation
|
| 40 |
+
|
| 41 |
+
- **10:00 AM – 11:30AM**: Kick-off presentations with Meta, Hugging Face, UC Berkeley, CoreWeave, OpenPipe, Unsloth AI, Fleet AI, Mercor, Scaler AI Labs, Snorkel AI, Patronus AI, Halluminate and Scale AI
|
| 42 |
+
|
| 43 |
+
- **11:30 AM:** Hacking Begins
|
| 44 |
+
|
| 45 |
+
- **1:00 PM:** Lunch Served
|
| 46 |
+
|
| 47 |
+
- **6:00 PM:** Dinner Served
|
| 48 |
+
|
| 49 |
+
- **10:00 PM:** Doors Close • Re-entry not permitted
|
| 50 |
+
|
| 51 |
+
**Sunday, March 8 (Outline)**
|
| 52 |
+
|
| 53 |
+
- **9:00AM:** Doors Open • Breakfast Served
|
| 54 |
+
|
| 55 |
+
- **1:00PM:** Hacking stops • Submissions Due
|
| 56 |
+
|
| 57 |
+
- **1:15PM:** First Round Judging Begins
|
| 58 |
+
|
| 59 |
+
- **2:00PM:** Lunch Served
|
| 60 |
+
|
| 61 |
+
- **3:00PM:** Final Round Judging Begins
|
| 62 |
+
|
| 63 |
+
- **4:00PM:** Winners Announced and Closing
|
| 64 |
+
|
| 65 |
+
- **5:00PM:** Doors Close
|
| 66 |
+
|
| 67 |
+
All presentation slides can be found here
|
| 68 |
+
|
| 69 |
+
[linkEmbed]
|
| 70 |
+
|
| 71 |
+
## **5. Hackathon and Submission Rules**
|
| 72 |
+
|
| 73 |
+
To keep things fair and aligned with our goals, all teams must follow these rules:
|
| 74 |
+
|
| 75 |
+
- **Open Source:** Please ensure your repository is public.
|
| 76 |
+
|
| 77 |
+
- **New Work Only:** All projects must be started from scratch during the hackathon with no previous work.
|
| 78 |
+
|
| 79 |
+
- **Team Size:** Teams may have up to **3** members.
|
| 80 |
+
|
| 81 |
+
- **Banned Projects:** Projects will be disqualified if they: violate legal, ethical, or platform policies, use code, data, or assets you do not have the rights to.
|
| 82 |
+
|
| 83 |
+
- Your project **must** use OpenEnv (stable release 0.2.1) deployed on HF spaces
|
| 84 |
+
|
| 85 |
+
- You must show a minimal training script for your environment using Unsloth or HF TRL in Colab.
|
| 86 |
+
|
| 87 |
+
- You must upload a **one minute** demo video to YouTube talking about your submission.
|
| 88 |
+
|
| 89 |
+
## **6. Hackathon Problem Statements**
|
| 90 |
+
|
| 91 |
+
Your project must address at least **one of the five** required problem statements.
|
| 92 |
+
|
| 93 |
+
- Some problem statements include **optional partner-sponsored sub-problem statements**, which are additional focus areas related to the main theme.
|
| 94 |
+
|
| 95 |
+
- Your project may align with **multiple partner sub-problem statements**, but you can only be **judged for a maximum of two**. Please **select up to two** when submitting.
|
| 96 |
+
|
| 97 |
+
- Projects that match these partner sub-problem statements are eligible for **extra partner prizes**, judged separately from the main track winners.
|
| 98 |
+
|
| 99 |
+
- Each partner sub-problem statement carries a prize of **$10,000 USD**.
|
| 100 |
+
|
| 101 |
+
**Statement 1: Multi-Agent Interactions**
|
| 102 |
+
|
| 103 |
+
Environments for this theme involve cooperation, competition, negotiation, and coalition formation. Learning from these environments will enable agents to model the beliefs and incentives of others in partially observable settings. This drives theory-of-mind reasoning and emergent strategic behavior.
|
| 104 |
+
|
| 105 |
+
- **Expected Outcome:** an environment that can be used to train multi-agent task handling in a LLM
|
| 106 |
+
|
| 107 |
+
- **Example Environments:** Market simulations, compute-allocation negotiations, collaborative puzzle worlds, mixed cooperative/competitive strategy games.
|
| 108 |
+
|
| 109 |
+
- **Partner Sub-Themes:**
|
| 110 |
+
|
| 111 |
+
- **Fleet AI:** Scalable Oversight: Environments that train oversight agents to monitor, analyze, and explain the behavior of other AI agents operating in complex, multi-agent settings.
|
| 112 |
+
- **Halluminate:** Multi-Actor Environments: Build a realistic environment where an agent interacts with and manages multiple actors (agents) to discover and achieve the task
|
| 113 |
+
|
| 114 |
+
**Statement 2: (Super) Long-Horizon Planning & Instruction Following**
|
| 115 |
+
|
| 116 |
+
You will build environments that require deep, multi-step reasoning with sparse or delayed rewards. After using these environments, the goal is to enable agents to decompose goals, track state over extended trajectories, and recover from early mistakes. The aim is to push beyond shallow next-token reasoning toward structured planning and durable internal representations.
|
| 117 |
+
|
| 118 |
+
- **Expected Outcome:** an environment that can capture and improve LLM behaviour on challenging long horizon tasks that need long running sessions beyond context memory limits.
|
| 119 |
+
|
| 120 |
+
- **Example Environments:** Research-planning simulators, large-scale codebase refactoring tasks, strategic resource management worlds, long-horizon logistics optimization, extremely complicated long-horizon instruction following (e.g., 300 instructions scattered around).
|
| 121 |
+
|
| 122 |
+
- **Partner Sub-Themes:**
|
| 123 |
+
|
| 124 |
+
- **Mercor:** Make an environment with capped/uncapped rewards where frontier model rewards scale with token output.
|
| 125 |
+
|
| 126 |
+
- **Scale AI:** Environments for long horizon workflows for non-code use cases within a business setting: focusing on either Sales, Project management, or HR & IT.
|
| 127 |
+
|
| 128 |
+
**Statement 3: World Modeling**
|
| 129 |
+
|
| 130 |
+
- **Statement 3.1: Professional Tasks:** Here you will develop environments that require real interaction with tools, APIs, or dynamic systems where the model is expected to do real hard work instead of exploiting short-cuts to arrive at the desired outcome. Learning from these environments will enable agents to maintain consistent internal state, update beliefs based on outcomes, and orchestrate multi-step workflows. The goal is to strengthen causal reasoning and persistent world models.
|
| 131 |
+
|
| 132 |
+
- **Expected Outcome:** an environment capturing nuances of a defined partially observable world and improve LLM interaction with it
|
| 133 |
+
|
| 134 |
+
- **Example Environments:** Dynamic browser/API ecosystems, enterprise applications, scientific workflow loops (papers → code → experiments), economic simulations with feedback, tool-discovery benchmarks.
|
| 135 |
+
|
| 136 |
+
- **Partner Sub-Theme:**
|
| 137 |
+
|
| 138 |
+
- **Scaler AI Labs:** Multi-App RL Environment for Enterprise Workflows: Create RL environments to demonstrate complex workflows, business rule nuances etc in a large enterprise
|
| 139 |
+
|
| 140 |
+
- **Statement 3.2: Personalized Tasks:** Here we will develop an environment that offers real personalized task handling, imagine replying to personal messages or handling dinner conflicts due to work conflicts, replying to tough emails. Think any personal assistant tasks.
|
| 141 |
+
|
| 142 |
+
- **Expected Outcome:** An environment that gives the model a realistic simulation of handling personal tasks, conflicts and managing them as delegations
|
| 143 |
+
|
| 144 |
+
- **Example Environments:** Executive Assistant Meeting Planner, Dinner and drive planning, email and message replying, etc
|
| 145 |
+
|
| 146 |
+
- **Partner Sub-Theme:**
|
| 147 |
+
|
| 148 |
+
- **Patronus AI:** Consumer Workflows with Schema Drift: Multi-step consumer workflow environments where the underlying data schemas, API contracts, and t&cs/policies/rules change.
|
| 149 |
+
|
| 150 |
+
**Statement 4: Self-Improvement**
|
| 151 |
+
|
| 152 |
+
The focus here is to create environments where agents can learn to generate new challenges, escalate difficulty, and improve through self-play or adaptive curricula. Rather than optimizing fixed tasks, the goal is for agents to learn to drive their own capability growth. The objective is recursive skill amplification.
|
| 153 |
+
|
| 154 |
+
- **Expected Outcome:** an environment for improving self-play of a LLM over a defined set of tasks
|
| 155 |
+
|
| 156 |
+
- **Example Environments:** Self-play negotiation arenas, auto-generated math/proof tasks, evolving coding competitions, adaptive RL curricula.
|
| 157 |
+
|
| 158 |
+
- **Partner Sub-Theme:**
|
| 159 |
+
|
| 160 |
+
- **Snorkel AI:** Simulated Experts-in-the-Loop: Environment that simulates interactions with real subject-matter experts, with changing requirements / preferences.
|
| 161 |
+
|
| 162 |
+
**Statement 5: Wild Card - Impress Us!**
|
| 163 |
+
|
| 164 |
+
We do not want to limit your focus if your idea doesn’t fit the boxes above, we want and WILL reward out of box tasks, please be creative but remember to add submissions that meaningfully add value to LLM training on a certain task.
|
| 165 |
+
|
| 166 |
+
More details about each theme can be found here:
|
| 167 |
+
|
| 168 |
+
[linkEmbed]
|
| 169 |
+
|
| 170 |
+
## **7. CV Hackathon Winners**
|
| 171 |
+
|
| 172 |
+
[linkEmbed]
|
| 173 |
+
|
| 174 |
+
## **8. OpenEnv Provided Resources**
|
| 175 |
+
|
| 176 |
+
**Please read through the entire slideshow here. This includes:**
|
| 177 |
+
|
| 178 |
+
- OpenEnv Fundamentals, Architecture
|
| 179 |
+
- Local Dev, Docker, and HF Spaces Deployment
|
| 180 |
+
- OpenEnv in Practice
|
| 181 |
+
- Training (TRL & Unsloth)
|
| 182 |
+
- How-to-Access-Infrastructure (including GPU Request Form)
|
| 183 |
+
|
| 184 |
+
[linkEmbed]
|
| 185 |
+
|
| 186 |
+
## **9. Partner Provided Resources**
|
| 187 |
+
|
| 188 |
+
- **Unsloth AI Resources**
|
| 189 |
+
- RL notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks#grpo-reasoning-rl>
|
| 190 |
+
- All notebooks: <https://unsloth.ai/docs/get-started/unsloth-notebooks>
|
| 191 |
+
- GitHub notebook index: <https://github.com/unslothai/notebooks/tree/main/nb>
|
| 192 |
+
- H100 / OpenEnv recommendation: use the BF16 gpt-oss 20B OpenEnv notebook for faster H100 runs: <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb>
|
| 193 |
+
- For that notebook, reduce `max_steps` to `300` to make the process faster.
|
| 194 |
+
- If GRPO is too slow, prefer smaller-model notebooks with `fast_inference = True`, for example:
|
| 195 |
+
- <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Qwen3_(4B)-GRPO.ipynb>
|
| 196 |
+
- <https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb>
|
| 197 |
+
- You will need to edit those notebooks to include OpenEnv calls.
|
| 198 |
+
- If vLLM GRPO runs fail, try using a fresh virtualenv:
|
| 199 |
+
|
| 200 |
+
```bash
|
| 201 |
+
python -m venv unsloth_env
|
| 202 |
+
source unsloth_env/bin/activate
|
| 203 |
+
pip install --upgrade pip && pip install uv
|
| 204 |
+
uv pip install unsloth vllm --torch-backend=auto
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
- If Unsloth is already installed, update it for the latest GRPO bugfixes:
|
| 208 |
+
|
| 209 |
+
```bash
|
| 210 |
+
pip install --upgrade --no-cache-dir --no-deps unsloth unsloth_zoo
|
| 211 |
+
```
|
| 212 |
+
|
| 213 |
+
- **Mercor Resources**
|
| 214 |
+
- Dataset: <https://huggingface.co/datasets/mercor/apex-agents>
|
| 215 |
+
- Archipelago repo to run the eval: <https://github.com/Mercor-Intelligence/archipelago>
|
| 216 |
+
- APEX-Agents paper: <https://arxiv.org/abs/2601.14242>
|
| 217 |
+
- **Hugging Face Resources**
|
| 218 |
+
- **$30** in Compute and Inference Credits
|
| 219 |
+
- To claim your credits, set up a HF account here: <https://huggingface.co/join>
|
| 220 |
+
- Then, follow this link: <https://huggingface.co/openenv-community>
|
| 221 |
+
- You will be granted **$30** of compute and inference credits!
|
| 222 |
+
- **Northflank Resources**
|
| 223 |
+
- Each team gets an H100
|
| 224 |
+
- Northflank instructions
|
| 225 |
+
|
| 226 |
+
[linkEmbed]
|
| 227 |
+
- Join the NorthFlank discord channel for any questions
|
| 228 |
+
- Please fill out this form:
|
| 229 |
+
|
| 230 |
+
[linkEmbed]
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
- **Cursor Resources**
|
| 234 |
+
- **$50** in Cursor Credits, **apply below**
|
| 235 |
+
|
| 236 |
+
[linkEmbed]
|
| 237 |
+
|
| 238 |
+
## **10. Judging & Submissions**
|
| 239 |
+
|
| 240 |
+
Judges will be taking place on **Sunday, March 8**. These judges are evaluating your **technical demos** in the following categories. *Show us what you have built* to solve our problem statements. Please **do not** show us a presentation. We'll be checking to ensure your project was built **entirely during the event**; no previous work is allowed.
|
| 241 |
+
|
| 242 |
+
**|** **Teams should submit [here](https://cerebralvalley.ai/e/openenv-hackathon-sf/hackathon/submit) when they have completed hacking.** In the submission form, you will have to upload a **one minute** demo video on YouTube talking about your submission. You must also show a minimal training script for your environment using Unsloth or HF TRL in Colab.
|
| 243 |
+
|
| 244 |
+
**Please ensure your project uses** use OpenEnv (stable release 0.2.1) deployed on HF spaces.
|
| 245 |
+
|
| 246 |
+
[linkEmbed]
|
| 247 |
+
|
| 248 |
+
**Judging Criteria**
|
| 249 |
+
|
| 250 |
+
- **Environment Innovation (40%) -** Is the environment novel, creative, or challenging? Does it meaningfully test the agent’s behavior?
|
| 251 |
+
- **Storytelling (30%) -** Does the team clearly explain the problem, environment, and agent behavior? Is the demo engaging and easy to follow?
|
| 252 |
+
- **Training Script Showing Improvement in Rewards (20%) -** Does the demo provide observable evidence of training progress (reward curves, metrics, or before/after behavior)?
|
| 253 |
+
- **Reward and Training Pipeline Setup (10%) -** Is the reward logic coherent, and does the pipeline produce meaningful improvement in the agent’s inference (how it acts in the environment)?
|
| 254 |
+
|
| 255 |
+
**Judging Process**
|
| 256 |
+
|
| 257 |
+
**|** Judging proceeds in two rounds:
|
| 258 |
+
|
| 259 |
+
- Hackers will be assigned groups of judges; \~3 minutes to pitch followed by 1-2 minutes of Q/A
|
| 260 |
+
|
| 261 |
+
- The top **six** teams in ranking will get to demo on stage to a panel of judges; \~3 minutes to pitch followed by 2-3 minutes for Q/A.
|
| 262 |
+
|
| 263 |
+
## **11. Prizes**
|
| 264 |
+
|
| 265 |
+
- **1st Place:** $15,000 USD Cash
|
| 266 |
+
|
| 267 |
+
- **2nd Place:** $9,000 USD Cash
|
| 268 |
+
|
| 269 |
+
- **3rd Place:** $6,000 USD Cash
|
| 270 |
+
|
| 271 |
+
## **❓If you have any questions, please email [wania@cerebralvalley.ai](mailto:wania@cerebralvalley.ai) or message on Discord.**
|
models.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from freeciv_env.models import *
|
notes.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
multi-agent (track 1) - freeciv
|
| 2 |
+
long context (track 2) - freeciv
|
| 3 |
+
self improving? (track 4) - mechinterp
|
openenv.yaml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: freeciv_env
|
| 2 |
+
description: OpenEnv wrapper around freeciv-bot for long-horizon strategy play.
|
| 3 |
+
version: 0.1.0
|
| 4 |
+
entrypoint: freeciv_env.server.app:app
|
outline.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Demo outline
|
| 2 |
+
|
| 3 |
+
## Open these tabs first
|
| 4 |
+
|
| 5 |
+
Local resources:
|
| 6 |
+
- `pres/index.html`
|
| 7 |
+
- `pres/training_results.html`
|
| 8 |
+
- `pres/trajectory.html`
|
| 9 |
+
- `pres/training_script.html`
|
| 10 |
+
- `pres/reward_curve.png`
|
| 11 |
+
- `pres/before_after_reward.png`
|
| 12 |
+
|
| 13 |
+
Remote resources:
|
| 14 |
+
- HF Space repo: <https://huggingface.co/spaces/thomasm6m6/freeciv_env>
|
| 15 |
+
- HF Space app: <https://thomasm6m6-freeciv-env.hf.space>
|
| 16 |
+
|
| 17 |
+
Supporting files:
|
| 18 |
+
- reward data: `pres/reward_steps.csv`
|
| 19 |
+
- training script: `scripts/train_grpo_fast.py`
|
| 20 |
+
- env config: `openenv.yaml`
|
| 21 |
+
|
| 22 |
+
## What we have ready
|
| 23 |
+
|
| 24 |
+
- real OpenEnv environment for Freeciv
|
| 25 |
+
- real live backend on H100 via Freeciv Web
|
| 26 |
+
- successful GRPO training run on the live backend
|
| 27 |
+
- reward curve PNG
|
| 28 |
+
- before/after reward PNG
|
| 29 |
+
- live trajectory page with real observations + legal actions
|
| 30 |
+
- note: use reward improvement as the before/after story; raw checkpoint-to-checkpoint action examples were too noisy to be worth showing live
|
| 31 |
+
- minimal training script page
|
| 32 |
+
- HF Space deployed: `thomasm6m6/freeciv_env`
|
| 33 |
+
|
| 34 |
+
## What not to spend time on
|
| 35 |
+
|
| 36 |
+
- long architecture explanation
|
| 37 |
+
- low-level websocket/runtime debugging
|
| 38 |
+
- model internals
|
| 39 |
+
- many charts
|
| 40 |
+
|
| 41 |
+
Use the product demo + reward improvement as the center of the pitch.
|
| 42 |
+
|
| 43 |
+
---
|
| 44 |
+
|
| 45 |
+
## 1 minute YouTube flow
|
| 46 |
+
|
| 47 |
+
### 0:00–0:10
|
| 48 |
+
Open: `pres/trajectory.html`
|
| 49 |
+
|
| 50 |
+
Say:
|
| 51 |
+
- We built a real OpenEnv environment for Freeciv, a long-horizon strategy game.
|
| 52 |
+
- The model sees text observations and legal actions, and acts turn by turn against a live backend.
|
| 53 |
+
|
| 54 |
+
### 0:10–0:22
|
| 55 |
+
Stay on `pres/trajectory.html`
|
| 56 |
+
|
| 57 |
+
Say:
|
| 58 |
+
- This is not a toy prompt task.
|
| 59 |
+
- It has delayed reward, persistent world state, multiple units, city-building, and long-horizon planning.
|
| 60 |
+
- That maps directly to the hackathon’s long-horizon planning and world-modeling tracks.
|
| 61 |
+
|
| 62 |
+
### 0:22–0:38
|
| 63 |
+
Switch to `pres/training_script.html`
|
| 64 |
+
|
| 65 |
+
Say:
|
| 66 |
+
- We also built the minimal RL training loop with Unsloth + TRL GRPO.
|
| 67 |
+
- The script collects live Freeciv states, formats them into prompts, and trains a policy on the real environment.
|
| 68 |
+
|
| 69 |
+
### 0:38–0:55
|
| 70 |
+
Switch to `pres/training_results.html`
|
| 71 |
+
|
| 72 |
+
Say:
|
| 73 |
+
- We ran training on the H100 against the live Freeciv backend.
|
| 74 |
+
- Reward improved from 0.125 at the start to 1.0 by the end of the run.
|
| 75 |
+
- This gives observable training progress, which is the key hackathon requirement.
|
| 76 |
+
|
| 77 |
+
### 0:55–1:00
|
| 78 |
+
Optional final cut to HF Space repo URL
|
| 79 |
+
|
| 80 |
+
Say:
|
| 81 |
+
- The environment is packaged as OpenEnv and deployed to Hugging Face Spaces for submission.
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
## 3 minute live pitch flow
|
| 86 |
+
|
| 87 |
+
### 0:00–0:25 — problem
|
| 88 |
+
Open: `pres/trajectory.html`
|
| 89 |
+
|
| 90 |
+
Say:
|
| 91 |
+
- We wanted a real LLM RL environment for long-horizon strategic planning.
|
| 92 |
+
- Freeciv is a strong fit because it has persistent state, delayed reward, many legal actions, and requires planning across turns.
|
| 93 |
+
|
| 94 |
+
### 0:25–1:05 — show the environment
|
| 95 |
+
Stay on `pres/trajectory.html`
|
| 96 |
+
|
| 97 |
+
Point out:
|
| 98 |
+
- text-first observation
|
| 99 |
+
- legal actions
|
| 100 |
+
- units / cities / economy summaries
|
| 101 |
+
- live backend on H100
|
| 102 |
+
|
| 103 |
+
Say:
|
| 104 |
+
- The agent does not get a canned benchmark prompt.
|
| 105 |
+
- It interacts with a real running world and must choose from legal actions each turn.
|
| 106 |
+
|
| 107 |
+
### 1:05–1:35 — show the training loop
|
| 108 |
+
Open: `pres/training_script.html`
|
| 109 |
+
|
| 110 |
+
Say:
|
| 111 |
+
- This is the minimal GRPO loop.
|
| 112 |
+
- We use live Freeciv sessions, prepare observations, build prompts, and train with Unsloth + TRL.
|
| 113 |
+
- The important thing is that the training loop is small and actually runs on the real backend.
|
| 114 |
+
|
| 115 |
+
### 1:35–2:25 — show training improvement
|
| 116 |
+
Open: `pres/training_results.html`
|
| 117 |
+
|
| 118 |
+
Say:
|
| 119 |
+
- This is the core result.
|
| 120 |
+
- Reward increases over training steps on real Freeciv states.
|
| 121 |
+
- Start: 0.125. End: 1.0.
|
| 122 |
+
- This is the evidence that the environment and reward pipeline are coherent enough to drive learning.
|
| 123 |
+
|
| 124 |
+
If short on time, only show:
|
| 125 |
+
- reward curve
|
| 126 |
+
- before/after reward bars
|
| 127 |
+
|
| 128 |
+
### 2:25–2:50 — why this matters
|
| 129 |
+
Stay on `pres/training_results.html`
|
| 130 |
+
|
| 131 |
+
Say:
|
| 132 |
+
- This fits Statement 2: long-horizon planning.
|
| 133 |
+
- It also fits Statement 3.1: world modeling, because the agent interacts with a real dynamic system and must maintain state over time.
|
| 134 |
+
|
| 135 |
+
### 2:50–3:00 — close
|
| 136 |
+
Open: HF Space repo URL or `pres/index.html`
|
| 137 |
+
|
| 138 |
+
Say:
|
| 139 |
+
- The environment is packaged in OpenEnv, runs with a real backend, has a minimal RL script, and already shows reward improvement.
|
| 140 |
+
|
| 141 |
+
---
|
| 142 |
+
|
| 143 |
+
## Likely Q/A answers
|
| 144 |
+
|
| 145 |
+
### Why Freeciv?
|
| 146 |
+
- It is long-horizon, strategic, partially observable, and naturally multi-step.
|
| 147 |
+
- It is much closer to real planning than one-shot QA.
|
| 148 |
+
|
| 149 |
+
### What exactly is the observation/action interface?
|
| 150 |
+
- Observation is text-first: turn summary, economy, units, cities, map, legal actions.
|
| 151 |
+
- Actions are structured: end turn, move unit, build city, set city production, set research.
|
| 152 |
+
|
| 153 |
+
### Is the backend real?
|
| 154 |
+
- Yes. Training was run against a live Freeciv Web backend on the H100.
|
| 155 |
+
|
| 156 |
+
### What evidence do you have that training worked?
|
| 157 |
+
- The reward curve in `pres/training_results.html`.
|
| 158 |
+
- It rises from 0.125 to 1.0 during the live run.
|
| 159 |
+
|
| 160 |
+
### Why not show a bigger model?
|
| 161 |
+
- For the hackathon, reliability and observable reward improvement mattered more than model scale.
|
| 162 |
+
- A smaller model let us get an end-to-end live run working on the real backend.
|
| 163 |
+
|
| 164 |
+
### What is still incomplete?
|
| 165 |
+
- The environment currently exposes a small action subset rather than the full Freeciv action surface.
|
| 166 |
+
- The main accomplishment is that live interaction and RL training now work end to end.
|
| 167 |
+
|
| 168 |
+
---
|
| 169 |
+
|
| 170 |
+
## If something breaks during the pitch
|
| 171 |
+
|
| 172 |
+
Fallback tab order:
|
| 173 |
+
1. `pres/training_results.html`
|
| 174 |
+
2. `pres/trajectory.html`
|
| 175 |
+
3. `pres/training_script.html`
|
| 176 |
+
4. HF Space repo URL
|
| 177 |
+
|
| 178 |
+
If the live environment demo is flaky, just narrate from the trajectory page and go straight to the reward curve.
|
pres/before_after_reward.png
ADDED
|
pres/index.html
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html><head><meta charset='utf-8'><title>Freeciv demo resources</title>
|
| 3 |
+
<style>
|
| 4 |
+
body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; max-width: 900px; margin: 40px auto; line-height: 1.5; padding: 0 20px; }
|
| 5 |
+
ul { line-height: 1.9; }
|
| 6 |
+
code { background: #f6f8fa; padding: 2px 6px; border-radius: 6px; }
|
| 7 |
+
</style></head><body>
|
| 8 |
+
<h1>Freeciv OpenEnv demo resources</h1>
|
| 9 |
+
<ul>
|
| 10 |
+
<li><a href='training_results.html'>Training results</a></li>
|
| 11 |
+
<li><a href='trajectory.html'>Live trajectory</a></li>
|
| 12 |
+
<li><a href='training_script.html'>Minimal training script</a></li>
|
| 13 |
+
<li><a href='reward_curve.png'>Reward curve PNG</a></li>
|
| 14 |
+
<li><a href='before_after_reward.png'>Before/after reward PNG</a></li>
|
| 15 |
+
</ul>
|
| 16 |
+
<p>HF Space: <code>https://huggingface.co/spaces/thomasm6m6/freeciv_env</code></p>
|
| 17 |
+
<p>Space app domain: <code>https://thomasm6m6-freeciv-env.hf.space</code></p>
|
| 18 |
+
</body></html>
|
pres/reward_curve.png
ADDED
|
pres/reward_steps.csv
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
step,reward,reward_std
|
| 2 |
+
1,0.125,0.25
|
| 3 |
+
2,0.375,0.5386751294136047
|
| 4 |
+
3,0.25,0.5
|
| 5 |
+
4,0.5,0.5773502588272095
|
| 6 |
+
5,0.625,0.5386751294136047
|
| 7 |
+
6,0.875,0.25
|
| 8 |
+
7,0.75,0.5
|
| 9 |
+
8,0.875,0.25
|
| 10 |
+
9,0.75,0.5
|
| 11 |
+
10,1.0,0.0
|
pres/training_results.html
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html><head><meta charset='utf-8'><title>Training results</title>
|
| 3 |
+
<style>
|
| 4 |
+
body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; max-width: 1000px; margin: 40px auto; line-height: 1.45; padding: 0 20px; }
|
| 5 |
+
.card { background: #f6f8fa; border-radius: 10px; padding: 18px; margin: 16px 0; }
|
| 6 |
+
img { max-width: 100%; border: 1px solid #ddd; border-radius: 8px; }
|
| 7 |
+
table { border-collapse: collapse; width: 100%; }
|
| 8 |
+
td, th { border-bottom: 1px solid #ddd; padding: 8px; text-align: left; }
|
| 9 |
+
</style></head><body>
|
| 10 |
+
<h1>Training results</h1>
|
| 11 |
+
<div class='card'>
|
| 12 |
+
<b>Live backend:</b> real Freeciv Web on H100<br>
|
| 13 |
+
<b>Model:</b> Qwen/Qwen3.5-0.8B + Unsloth LoRA + TRL GRPO<br>
|
| 14 |
+
<b>Run:</b> 10 steps, 32 live states, batch size 8<br>
|
| 15 |
+
<b>Train runtime:</b> None
|
| 16 |
+
</div>
|
| 17 |
+
<div class='card'>
|
| 18 |
+
<b>Observed reward improvement:</b> 0.125 → 1.000<br>
|
| 19 |
+
<b>Best visible point:</b> step 10 reward 1.000
|
| 20 |
+
</div>
|
| 21 |
+
<h2>Reward curve</h2>
|
| 22 |
+
<p><img src='reward_curve.png' alt='reward curve'></p>
|
| 23 |
+
<h2>Start vs end</h2>
|
| 24 |
+
<p><img src='before_after_reward.png' alt='before after reward'></p>
|
| 25 |
+
<h2>Per-step reward</h2>
|
| 26 |
+
<table><tr><th>step</th><th>reward</th><th>reward std</th></tr><tr><td>1</td><td>0.125</td><td>0.250</td></tr><tr><td>2</td><td>0.375</td><td>0.539</td></tr><tr><td>3</td><td>0.250</td><td>0.500</td></tr><tr><td>4</td><td>0.500</td><td>0.577</td></tr><tr><td>5</td><td>0.625</td><td>0.539</td></tr><tr><td>6</td><td>0.875</td><td>0.250</td></tr><tr><td>7</td><td>0.750</td><td>0.500</td></tr><tr><td>8</td><td>0.875</td><td>0.250</td></tr><tr><td>9</td><td>0.750</td><td>0.500</td></tr><tr><td>10</td><td>1.000</td><td>0.000</td></tr></table>
|
| 27 |
+
</body></html>
|
pres/training_script.html
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html><head><meta charset='utf-8'><title>Minimal training script</title>
|
| 3 |
+
<style>
|
| 4 |
+
body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; max-width: 1000px; margin: 40px auto; padding: 0 20px; }
|
| 5 |
+
pre { background: #0d1117; color: #c9d1d9; padding: 16px; border-radius: 8px; overflow-x: auto; }
|
| 6 |
+
code { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
|
| 7 |
+
</style></head><body>
|
| 8 |
+
<h1>Minimal training script</h1>
|
| 9 |
+
<p>Key file: <code>scripts/train_grpo_fast.py</code></p>
|
| 10 |
+
<pre><code>from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import os
|
| 14 |
+
|
| 15 |
+
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
| 16 |
+
os.environ.setdefault("UNSLOTH_RETURN_LOGITS", "1")
|
| 17 |
+
os.environ.setdefault("UNSLOTH_DISABLE_AUTO_UPDATES", "1")
|
| 18 |
+
|
| 19 |
+
from unsloth import FastLanguageModel
|
| 20 |
+
from datasets import Dataset
|
| 21 |
+
from trl import GRPOConfig, GRPOTrainer
|
| 22 |
+
|
| 23 |
+
from freeciv_env.adapter import prepare_observation
|
| 24 |
+
from freeciv_env.grpo import SYSTEM_PROMPT, build_turn_prompt, oracle_action_index, reward_from_oracle
|
| 25 |
+
from freeciv_env.runtime import LiveFreecivSession
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def parse_args():
|
| 29 |
+
parser = argparse.ArgumentParser()
|
| 30 |
+
parser.add_argument("--env-url", default="http://127.0.0.1")
|
| 31 |
+
parser.add_argument("--model-id", default="Qwen/Qwen3.5-0.8B")
|
| 32 |
+
parser.add_argument("--dataset-size", type=int, default=512)
|
| 33 |
+
parser.add_argument("--max-steps", type=int, default=50)
|
| 34 |
+
parser.add_argument("--batch-size", type=int, default=16)
|
| 35 |
+
parser.add_argument("--num-generations", type=int, default=4)
|
| 36 |
+
parser.add_argument("--episode-horizon", type=int, default=4)
|
| 37 |
+
parser.add_argument("--max-prompt-length", type=int, default=768)
|
| 38 |
+
parser.add_argument("--max-completion-length", type=int, default=8)
|
| 39 |
+
parser.add_argument("--learning-rate", type=float, default=5e-6)
|
| 40 |
+
parser.add_argument("--lora-rank", type=int, default=16)
|
| 41 |
+
parser.add_argument("--output-dir", default="outputs/qwen35_08b_grpo")
|
| 42 |
+
parser.add_argument("--save-steps", type=int, default=50)
|
| 43 |
+
return parser.parse_args()
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def collect_dataset(env_url: str, dataset_size: int, episode_horizon: int) -> Dataset:
|
| 48 |
+
rows = {"prompt": [], "best_index": []}
|
| 49 |
+
while len(rows["prompt"]) < dataset_size:
|
| 50 |
+
session = LiveFreecivSession(base_url=env_url, turn_timeout_s=120)
|
| 51 |
+
try:
|
| 52 |
+
snapshot = session.reset()
|
| 53 |
+
for turn_index in range(episode_horizon):
|
| 54 |
+
observation = prepare_observation(
|
| 55 |
+
snapshot,
|
| 56 |
+
reward=0.0,
|
| 57 |
+
done=False,
|
| 58 |
+
status="running",
|
| 59 |
+
).observation
|
| 60 |
+
best_index = oracle_action_index(observation.legal_actions)
|
| 61 |
+
rows["prompt"].append(build_turn_prompt(observation))
|
| 62 |
+
rows["best_index"].append(best_index)
|
| 63 |
+
if len(rows["prompt"]) >= dataset_size or turn_index + 1 >= episode_horizon:
|
| 64 |
+
break
|
| 65 |
+
snapshot = session.end_turn()
|
| 66 |
+
finally:
|
| 67 |
+
session.close()
|
| 68 |
+
return Dataset.from_dict(rows)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def load_model(model_id: str, max_seq_length: int, lora_rank: int):
|
| 73 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 74 |
+
model_name=model_id,
|
| 75 |
+
max_seq_length=max_seq_length,
|
| 76 |
+
load_in_4bit=False,
|
| 77 |
+
load_in_16bit=True,
|
| 78 |
+
full_finetuning=False,
|
| 79 |
+
fast_inference=False,
|
| 80 |
+
)
|
| 81 |
+
model = FastLanguageModel.get_peft_model(
|
| 82 |
+
model,
|
| 83 |
+
r=lora_rank,
|
| 84 |
+
target_modules=[
|
| 85 |
+
"q_proj",
|
| 86 |
+
"k_proj",
|
| 87 |
+
"v_proj",
|
| 88 |
+
"o_proj",
|
| 89 |
+
"gate_proj",
|
| 90 |
+
"up_proj",
|
| 91 |
+
"down_proj",
|
| 92 |
+
],
|
| 93 |
+
lora_alpha=lora_rank * 2,
|
| 94 |
+
lora_dropout=0,
|
| 95 |
+
bias="none",
|
| 96 |
+
use_gradient_checkpointing=False,
|
| 97 |
+
random_state=3407,
|
| 98 |
+
max_seq_length=max_seq_length,
|
| 99 |
+
)
|
| 100 |
+
return model, tokenizer
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def apply_chat_template(dataset: Dataset, tokenizer) -> Dataset:
|
| 105 |
+
def format_row(row):
|
| 106 |
+
messages = [
|
| 107 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 108 |
+
{"role": "user", "content": row["prompt"]},
|
| 109 |
+
]
|
| 110 |
+
return {
|
| 111 |
+
"prompt": tokenizer.apply_chat_template(
|
| 112 |
+
messages,
|
| 113 |
+
tokenize=False,
|
| 114 |
+
add_generation_prompt=True,
|
| 115 |
+
enable_thinking=False,
|
| 116 |
+
)
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
return dataset.map(format_row)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def main() -> None:
|
| 124 |
+
args = parse_args()
|
| 125 |
+
max_seq_length = args.max_prompt_length + args.max_completion_length
|
| 126 |
+
dataset = collect_dataset(args.env_url, args.dataset_size, args.episode_horizon)
|
| 127 |
+
model, tokenizer = load_model(args.model_id, max_seq_length, args.lora_rank)
|
| 128 |
+
dataset = apply_chat_template(dataset, tokenizer)
|
| 129 |
+
|
| 130 |
+
training_args = GRPOConfig(
|
| 131 |
+
learning_rate=args.learning_rate,
|
| 132 |
+
weight_decay=0.01,
|
| 133 |
+
warmup_ratio=0.05,
|
| 134 |
+
lr_scheduler_type="cosine",
|
| 135 |
+
optim="adamw_torch_fused",
|
| 136 |
+
logging_steps=1,
|
| 137 |
+
log_completions=False,
|
| 138 |
+
per_device_train_batch_size=args.batch_size,
|
| 139 |
+
gradient_accumulation_steps=1,
|
| 140 |
+
num_generations=args.num_generations,
|
| 141 |
+
max_prompt_length=args.max_prompt_length,
|
| 142 |
+
max_completion_length=args.max_completion_length,
|
| 143 |
+
max_steps=args.max_steps,
|
| 144 |
+
save_steps=args.save_steps,
|
| 145 |
+
max_grad_norm=0.3,
|
| 146 |
+
bf16=True,
|
| 147 |
+
report_to="none",
|
| 148 |
+
beta=0.0,
|
| 149 |
+
loss_type="dr_grpo",</code></pre>
|
| 150 |
+
</body></html>
|
pres/trajectory.html
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html><head><meta charset='utf-8'><title>Freeciv live trajectory</title>
|
| 3 |
+
<style>
|
| 4 |
+
body { font-family: -apple-system, BlinkMacSystemFont, sans-serif; max-width: 1000px; margin: 40px auto; line-height: 1.4; padding: 0 20px; }
|
| 5 |
+
pre { background: #f6f8fa; padding: 16px; border-radius: 8px; white-space: pre-wrap; }
|
| 6 |
+
section { margin-bottom: 28px; }
|
| 7 |
+
</style></head><body>
|
| 8 |
+
<h1>Freeciv live trajectory</h1>
|
| 9 |
+
<p>Real snapshots collected from the live Freeciv Web backend on the H100.</p>
|
| 10 |
+
<section><h2>Turn 1 snapshot 1</h2><pre>Turn 1
|
| 11 |
+
Score 0.0
|
| 12 |
+
Map: 0 known tiles, 0 visible tiles
|
| 13 |
+
Economy: 50 gold, science rate 60%
|
| 14 |
+
Cities: 0
|
| 15 |
+
Units: 5
|
| 16 |
+
- Unit 102: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 17 |
+
- Unit 110: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 18 |
+
- Unit 111: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 19 |
+
- Unit 112: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 20 |
+
- Unit 113: Explorer, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 21 |
+
Techs researched: 1
|
| 22 |
+
Legal actions exposed: 50</pre><h3>Legal actions (sample)</h3><pre>0. build_city — Build a city with unit 102
|
| 23 |
+
1. build_city — Build a city with unit 110
|
| 24 |
+
2. end_turn — End the current turn
|
| 25 |
+
3. move_unit — Move unit 102 in direction 0
|
| 26 |
+
4. move_unit — Move unit 102 in direction 1
|
| 27 |
+
5. move_unit — Move unit 102 in direction 2</pre></section><section><h2>Turn 1 snapshot 2</h2><pre>Turn 1
|
| 28 |
+
Score 0.0
|
| 29 |
+
Map: 0 known tiles, 0 visible tiles
|
| 30 |
+
Economy: 50 gold, science rate 60%
|
| 31 |
+
Cities: 0
|
| 32 |
+
Units: 5
|
| 33 |
+
- Unit 102: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 34 |
+
- Unit 110: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 35 |
+
- Unit 111: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 36 |
+
- Unit 112: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 37 |
+
- Unit 113: Explorer, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 38 |
+
Techs researched: 1
|
| 39 |
+
Legal actions exposed: 50</pre><h3>Legal actions (sample)</h3><pre>0. build_city — Build a city with unit 102
|
| 40 |
+
1. build_city — Build a city with unit 110
|
| 41 |
+
2. end_turn — End the current turn
|
| 42 |
+
3. move_unit — Move unit 102 in direction 0
|
| 43 |
+
4. move_unit — Move unit 102 in direction 1
|
| 44 |
+
5. move_unit — Move unit 102 in direction 2</pre></section><section><h2>Turn 2 snapshot 3</h2><pre>Turn 2
|
| 45 |
+
Score 0.0
|
| 46 |
+
Map: 0 known tiles, 0 visible tiles
|
| 47 |
+
Economy: 50 gold, science rate 60%
|
| 48 |
+
Cities: 0
|
| 49 |
+
Units: 5
|
| 50 |
+
- Unit 102: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 51 |
+
- Unit 110: Settlers, hp 20, moves_left 3, build_city=true, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 52 |
+
- Unit 111: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 53 |
+
- Unit 112: Workers, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 54 |
+
- Unit 113: Explorer, hp 10, moves_left 3, build_city=false, move_dirs=[0, 1, 2, 3, 4, 5, 6, 7]
|
| 55 |
+
Techs researched: 1
|
| 56 |
+
Legal actions exposed: 50</pre><h3>Legal actions (sample)</h3><pre>0. build_city — Build a city with unit 102
|
| 57 |
+
1. build_city — Build a city with unit 110
|
| 58 |
+
2. end_turn — End the current turn
|
| 59 |
+
3. move_unit — Move unit 102 in direction 0
|
| 60 |
+
4. move_unit — Move unit 102 in direction 1
|
| 61 |
+
5. move_unit — Move unit 102 in direction 2</pre></section>
|
| 62 |
+
</body></html>
|
pyproject.toml
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "freeciv-env"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "OpenEnv environment for Freeciv via freeciv-bot"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.11"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"openenv-core[core]==0.2.1",
|
| 9 |
+
"freecivbot @ git+https://github.com/chris1869/freeciv-bot.git",
|
| 10 |
+
"uvicorn>=0.35.0",
|
| 11 |
+
]
|
| 12 |
+
|
| 13 |
+
[project.scripts]
|
| 14 |
+
server = "server.app:main"
|
| 15 |
+
|
| 16 |
+
[project.optional-dependencies]
|
| 17 |
+
dev = [
|
| 18 |
+
"pytest>=8.4.1",
|
| 19 |
+
"requests>=2.32.5",
|
| 20 |
+
]
|
| 21 |
+
train = [
|
| 22 |
+
"accelerate>=1.10.0",
|
| 23 |
+
"bitsandbytes>=0.47.0",
|
| 24 |
+
"datasets>=4.0.0",
|
| 25 |
+
"trl>=0.24.0",
|
| 26 |
+
"unsloth>=2026.3.4",
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
[build-system]
|
| 30 |
+
requires = ["setuptools>=80", "wheel"]
|
| 31 |
+
build-backend = "setuptools.build_meta"
|
| 32 |
+
|
| 33 |
+
[tool.setuptools]
|
| 34 |
+
packages = ["freeciv_env", "freeciv_env.server", "server"]
|
| 35 |
+
|
| 36 |
+
[tool.pytest.ini_options]
|
| 37 |
+
pythonpath = ["."]
|
| 38 |
+
testpaths = ["tests"]
|
| 39 |
+
markers = [
|
| 40 |
+
"integration: requires a live freeciv-web runtime",
|
| 41 |
+
]
|
qwen35_live_long_trainer_state.json
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.625,
|
| 6 |
+
"eval_steps": 500,
|
| 7 |
+
"global_step": 10,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"clip_ratio/high_max": 0.0,
|
| 14 |
+
"clip_ratio/high_mean": 0.0,
|
| 15 |
+
"clip_ratio/low_mean": 0.0,
|
| 16 |
+
"clip_ratio/low_min": 0.0,
|
| 17 |
+
"clip_ratio/region_mean": 0.0,
|
| 18 |
+
"completion_length": 2.375,
|
| 19 |
+
"completions/clipped_ratio": 0.0,
|
| 20 |
+
"completions/max_length": 3.0,
|
| 21 |
+
"completions/max_terminated_length": 3.0,
|
| 22 |
+
"completions/mean_length": 2.375,
|
| 23 |
+
"completions/mean_terminated_length": 2.375,
|
| 24 |
+
"completions/min_length": 2.0,
|
| 25 |
+
"completions/min_terminated_length": 2.0,
|
| 26 |
+
"epoch": 0.0625,
|
| 27 |
+
"frac_reward_zero_std": 0.5,
|
| 28 |
+
"grad_norm": 5.300995349884033,
|
| 29 |
+
"kl": 0.0,
|
| 30 |
+
"learning_rate": 0.0,
|
| 31 |
+
"loss": 0.01562187448143959,
|
| 32 |
+
"num_tokens": 8343.0,
|
| 33 |
+
"reward": 0.125,
|
| 34 |
+
"reward_std": 0.25,
|
| 35 |
+
"rewards/reward_from_oracle/mean": 0.125,
|
| 36 |
+
"rewards/reward_from_oracle/std": 0.3535533845424652,
|
| 37 |
+
"step": 1
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"clip_ratio/high_max": 0.0,
|
| 41 |
+
"clip_ratio/high_mean": 0.0,
|
| 42 |
+
"clip_ratio/low_mean": 0.0,
|
| 43 |
+
"clip_ratio/low_min": 0.0,
|
| 44 |
+
"clip_ratio/region_mean": 0.0,
|
| 45 |
+
"completion_length": 2.375,
|
| 46 |
+
"completions/clipped_ratio": 0.0,
|
| 47 |
+
"completions/max_length": 3.0,
|
| 48 |
+
"completions/max_terminated_length": 3.0,
|
| 49 |
+
"completions/mean_length": 2.375,
|
| 50 |
+
"completions/mean_terminated_length": 2.375,
|
| 51 |
+
"completions/min_length": 2.0,
|
| 52 |
+
"completions/min_terminated_length": 2.0,
|
| 53 |
+
"epoch": 0.125,
|
| 54 |
+
"frac_reward_zero_std": 0.0,
|
| 55 |
+
"grad_norm": 9.095938682556152,
|
| 56 |
+
"kl": 0.0,
|
| 57 |
+
"learning_rate": 5e-06,
|
| 58 |
+
"loss": 0.029151180759072304,
|
| 59 |
+
"num_tokens": 16682.0,
|
| 60 |
+
"reward": 0.375,
|
| 61 |
+
"reward_std": 0.5386751294136047,
|
| 62 |
+
"rewards/reward_from_oracle/mean": 0.375,
|
| 63 |
+
"rewards/reward_from_oracle/std": 0.5175492167472839,
|
| 64 |
+
"step": 2
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"clip_ratio/high_max": 0.0,
|
| 68 |
+
"clip_ratio/high_mean": 0.0,
|
| 69 |
+
"clip_ratio/low_mean": 0.0,
|
| 70 |
+
"clip_ratio/low_min": 0.0,
|
| 71 |
+
"clip_ratio/region_mean": 0.0,
|
| 72 |
+
"completion_length": 2.375,
|
| 73 |
+
"completions/clipped_ratio": 0.0,
|
| 74 |
+
"completions/max_length": 3.0,
|
| 75 |
+
"completions/max_terminated_length": 3.0,
|
| 76 |
+
"completions/mean_length": 2.375,
|
| 77 |
+
"completions/mean_terminated_length": 2.375,
|
| 78 |
+
"completions/min_length": 2.0,
|
| 79 |
+
"completions/min_terminated_length": 2.0,
|
| 80 |
+
"epoch": 0.1875,
|
| 81 |
+
"frac_reward_zero_std": 0.0,
|
| 82 |
+
"grad_norm": 8.75147533416748,
|
| 83 |
+
"kl": 0.0,
|
| 84 |
+
"learning_rate": 4.849231551964771e-06,
|
| 85 |
+
"loss": 0.023432811722159386,
|
| 86 |
+
"num_tokens": 25025.0,
|
| 87 |
+
"reward": 0.25,
|
| 88 |
+
"reward_std": 0.5,
|
| 89 |
+
"rewards/reward_from_oracle/mean": 0.25,
|
| 90 |
+
"rewards/reward_from_oracle/std": 0.4629100561141968,
|
| 91 |
+
"step": 3
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"clip_ratio/high_max": 0.0,
|
| 95 |
+
"clip_ratio/high_mean": 0.0,
|
| 96 |
+
"clip_ratio/low_mean": 0.0,
|
| 97 |
+
"clip_ratio/low_min": 0.0,
|
| 98 |
+
"clip_ratio/region_mean": 0.0,
|
| 99 |
+
"completion_length": 2.125,
|
| 100 |
+
"completions/clipped_ratio": 0.0,
|
| 101 |
+
"completions/max_length": 3.0,
|
| 102 |
+
"completions/max_terminated_length": 3.0,
|
| 103 |
+
"completions/mean_length": 2.125,
|
| 104 |
+
"completions/mean_terminated_length": 2.125,
|
| 105 |
+
"completions/min_length": 2.0,
|
| 106 |
+
"completions/min_terminated_length": 2.0,
|
| 107 |
+
"epoch": 0.25,
|
| 108 |
+
"frac_reward_zero_std": 0.0,
|
| 109 |
+
"grad_norm": 10.478106498718262,
|
| 110 |
+
"kl": 0.0,
|
| 111 |
+
"learning_rate": 4.415111107797445e-06,
|
| 112 |
+
"loss": 0.013529304414987564,
|
| 113 |
+
"num_tokens": 33362.0,
|
| 114 |
+
"reward": 0.5,
|
| 115 |
+
"reward_std": 0.5773502588272095,
|
| 116 |
+
"rewards/reward_from_oracle/mean": 0.5,
|
| 117 |
+
"rewards/reward_from_oracle/std": 0.5345224738121033,
|
| 118 |
+
"step": 4
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"clip_ratio/high_max": 0.0,
|
| 122 |
+
"clip_ratio/high_mean": 0.0,
|
| 123 |
+
"clip_ratio/low_mean": 0.0,
|
| 124 |
+
"clip_ratio/low_min": 0.0,
|
| 125 |
+
"clip_ratio/region_mean": 0.0,
|
| 126 |
+
"completion_length": 2.125,
|
| 127 |
+
"completions/clipped_ratio": 0.0,
|
| 128 |
+
"completions/max_length": 3.0,
|
| 129 |
+
"completions/max_terminated_length": 3.0,
|
| 130 |
+
"completions/mean_length": 2.125,
|
| 131 |
+
"completions/mean_terminated_length": 2.125,
|
| 132 |
+
"completions/min_length": 2.0,
|
| 133 |
+
"completions/min_terminated_length": 2.0,
|
| 134 |
+
"epoch": 0.3125,
|
| 135 |
+
"frac_reward_zero_std": 0.0,
|
| 136 |
+
"grad_norm": 8.125267028808594,
|
| 137 |
+
"kl": 0.0,
|
| 138 |
+
"learning_rate": 3.7500000000000005e-06,
|
| 139 |
+
"loss": 0.013529304414987564,
|
| 140 |
+
"num_tokens": 41707.0,
|
| 141 |
+
"reward": 0.625,
|
| 142 |
+
"reward_std": 0.5386751294136047,
|
| 143 |
+
"rewards/reward_from_oracle/mean": 0.625,
|
| 144 |
+
"rewards/reward_from_oracle/std": 0.5175492167472839,
|
| 145 |
+
"step": 5
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"clip_ratio/high_max": 0.0,
|
| 149 |
+
"clip_ratio/high_mean": 0.0,
|
| 150 |
+
"clip_ratio/low_mean": 0.0,
|
| 151 |
+
"clip_ratio/low_min": 0.0,
|
| 152 |
+
"clip_ratio/region_mean": 0.0,
|
| 153 |
+
"completion_length": 2.0,
|
| 154 |
+
"completions/clipped_ratio": 0.0,
|
| 155 |
+
"completions/max_length": 2.0,
|
| 156 |
+
"completions/max_terminated_length": 2.0,
|
| 157 |
+
"completions/mean_length": 2.0,
|
| 158 |
+
"completions/mean_terminated_length": 2.0,
|
| 159 |
+
"completions/min_length": 2.0,
|
| 160 |
+
"completions/min_terminated_length": 2.0,
|
| 161 |
+
"epoch": 0.375,
|
| 162 |
+
"frac_reward_zero_std": 0.5,
|
| 163 |
+
"grad_norm": 3.183867931365967,
|
| 164 |
+
"kl": 0.0,
|
| 165 |
+
"learning_rate": 2.9341204441673267e-06,
|
| 166 |
+
"loss": 0.0,
|
| 167 |
+
"num_tokens": 50047.0,
|
| 168 |
+
"reward": 0.875,
|
| 169 |
+
"reward_std": 0.25,
|
| 170 |
+
"rewards/reward_from_oracle/mean": 0.875,
|
| 171 |
+
"rewards/reward_from_oracle/std": 0.3535533845424652,
|
| 172 |
+
"step": 6
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"clip_ratio/high_max": 0.0,
|
| 176 |
+
"clip_ratio/high_mean": 0.0,
|
| 177 |
+
"clip_ratio/low_mean": 0.0,
|
| 178 |
+
"clip_ratio/low_min": 0.0,
|
| 179 |
+
"clip_ratio/region_mean": 0.0,
|
| 180 |
+
"completion_length": 2.0,
|
| 181 |
+
"completions/clipped_ratio": 0.0,
|
| 182 |
+
"completions/max_length": 2.0,
|
| 183 |
+
"completions/max_terminated_length": 2.0,
|
| 184 |
+
"completions/mean_length": 2.0,
|
| 185 |
+
"completions/mean_terminated_length": 2.0,
|
| 186 |
+
"completions/min_length": 2.0,
|
| 187 |
+
"completions/min_terminated_length": 2.0,
|
| 188 |
+
"epoch": 0.4375,
|
| 189 |
+
"frac_reward_zero_std": 0.0,
|
| 190 |
+
"grad_norm": 7.007436275482178,
|
| 191 |
+
"kl": 0.0,
|
| 192 |
+
"learning_rate": 2.0658795558326745e-06,
|
| 193 |
+
"loss": 0.0,
|
| 194 |
+
"num_tokens": 58391.0,
|
| 195 |
+
"reward": 0.75,
|
| 196 |
+
"reward_std": 0.5,
|
| 197 |
+
"rewards/reward_from_oracle/mean": 0.75,
|
| 198 |
+
"rewards/reward_from_oracle/std": 0.4629100561141968,
|
| 199 |
+
"step": 7
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"clip_ratio/high_max": 0.0,
|
| 203 |
+
"clip_ratio/high_mean": 0.0,
|
| 204 |
+
"clip_ratio/low_mean": 0.0,
|
| 205 |
+
"clip_ratio/low_min": 0.0,
|
| 206 |
+
"clip_ratio/region_mean": 0.0,
|
| 207 |
+
"completion_length": 2.0,
|
| 208 |
+
"completions/clipped_ratio": 0.0,
|
| 209 |
+
"completions/max_length": 2.0,
|
| 210 |
+
"completions/max_terminated_length": 2.0,
|
| 211 |
+
"completions/mean_length": 2.0,
|
| 212 |
+
"completions/mean_terminated_length": 2.0,
|
| 213 |
+
"completions/min_length": 2.0,
|
| 214 |
+
"completions/min_terminated_length": 2.0,
|
| 215 |
+
"epoch": 0.5,
|
| 216 |
+
"frac_reward_zero_std": 0.5,
|
| 217 |
+
"grad_norm": 3.759775161743164,
|
| 218 |
+
"kl": 0.0,
|
| 219 |
+
"learning_rate": 1.2500000000000007e-06,
|
| 220 |
+
"loss": 1.862645149230957e-09,
|
| 221 |
+
"num_tokens": 66727.0,
|
| 222 |
+
"reward": 0.875,
|
| 223 |
+
"reward_std": 0.25,
|
| 224 |
+
"rewards/reward_from_oracle/mean": 0.875,
|
| 225 |
+
"rewards/reward_from_oracle/std": 0.3535533845424652,
|
| 226 |
+
"step": 8
|
| 227 |
+
},
|
| 228 |
+
{
|
| 229 |
+
"clip_ratio/high_max": 0.0,
|
| 230 |
+
"clip_ratio/high_mean": 0.0,
|
| 231 |
+
"clip_ratio/low_mean": 0.0,
|
| 232 |
+
"clip_ratio/low_min": 0.0,
|
| 233 |
+
"clip_ratio/region_mean": 0.0,
|
| 234 |
+
"completion_length": 2.0,
|
| 235 |
+
"completions/clipped_ratio": 0.0,
|
| 236 |
+
"completions/max_length": 2.0,
|
| 237 |
+
"completions/max_terminated_length": 2.0,
|
| 238 |
+
"completions/mean_length": 2.0,
|
| 239 |
+
"completions/mean_terminated_length": 2.0,
|
| 240 |
+
"completions/min_length": 2.0,
|
| 241 |
+
"completions/min_terminated_length": 2.0,
|
| 242 |
+
"epoch": 0.5625,
|
| 243 |
+
"frac_reward_zero_std": 0.0,
|
| 244 |
+
"grad_norm": 7.748785495758057,
|
| 245 |
+
"kl": 0.0,
|
| 246 |
+
"learning_rate": 5.848888922025553e-07,
|
| 247 |
+
"loss": 1.862645149230957e-09,
|
| 248 |
+
"num_tokens": 75063.0,
|
| 249 |
+
"reward": 0.75,
|
| 250 |
+
"reward_std": 0.5,
|
| 251 |
+
"rewards/reward_from_oracle/mean": 0.75,
|
| 252 |
+
"rewards/reward_from_oracle/std": 0.4629100561141968,
|
| 253 |
+
"step": 9
|
| 254 |
+
},
|
| 255 |
+
{
|
| 256 |
+
"clip_ratio/high_max": 0.0,
|
| 257 |
+
"clip_ratio/high_mean": 0.0,
|
| 258 |
+
"clip_ratio/low_mean": 0.0,
|
| 259 |
+
"clip_ratio/low_min": 0.0,
|
| 260 |
+
"clip_ratio/region_mean": 0.0,
|
| 261 |
+
"completion_length": 2.0,
|
| 262 |
+
"completions/clipped_ratio": 0.0,
|
| 263 |
+
"completions/max_length": 2.0,
|
| 264 |
+
"completions/max_terminated_length": 2.0,
|
| 265 |
+
"completions/mean_length": 2.0,
|
| 266 |
+
"completions/mean_terminated_length": 2.0,
|
| 267 |
+
"completions/min_length": 2.0,
|
| 268 |
+
"completions/min_terminated_length": 2.0,
|
| 269 |
+
"epoch": 0.625,
|
| 270 |
+
"frac_reward_zero_std": 1.0,
|
| 271 |
+
"grad_norm": 0.0,
|
| 272 |
+
"kl": 0.0,
|
| 273 |
+
"learning_rate": 1.507684480352292e-07,
|
| 274 |
+
"loss": 0.0,
|
| 275 |
+
"num_tokens": 83403.0,
|
| 276 |
+
"reward": 1.0,
|
| 277 |
+
"reward_std": 0.0,
|
| 278 |
+
"rewards/reward_from_oracle/mean": 1.0,
|
| 279 |
+
"rewards/reward_from_oracle/std": 0.0,
|
| 280 |
+
"step": 10
|
| 281 |
+
}
|
| 282 |
+
],
|
| 283 |
+
"logging_steps": 1,
|
| 284 |
+
"max_steps": 10,
|
| 285 |
+
"num_input_tokens_seen": 83403,
|
| 286 |
+
"num_train_epochs": 1,
|
| 287 |
+
"save_steps": 10,
|
| 288 |
+
"stateful_callbacks": {
|
| 289 |
+
"TrainerControl": {
|
| 290 |
+
"args": {
|
| 291 |
+
"should_epoch_stop": false,
|
| 292 |
+
"should_evaluate": false,
|
| 293 |
+
"should_log": false,
|
| 294 |
+
"should_save": true,
|
| 295 |
+
"should_training_stop": true
|
| 296 |
+
},
|
| 297 |
+
"attributes": {}
|
| 298 |
+
}
|
| 299 |
+
},
|
| 300 |
+
"total_flos": 0.0,
|
| 301 |
+
"train_batch_size": 8,
|
| 302 |
+
"trial_name": null,
|
| 303 |
+
"trial_params": null
|
| 304 |
+
}
|
scripts/start_space.sh
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -eu
|
| 3 |
+
|
| 4 |
+
export FREECIV_SERVER_URL="${FREECIV_SERVER_URL:-http://127.0.0.1}"
|
| 5 |
+
export FREECIV_USERNAME="${FREECIV_USERNAME:-openenvbot}"
|
| 6 |
+
export FREECIV_CLIENT_PORT="${FREECIV_CLIENT_PORT:-6000}"
|
| 7 |
+
export FREECIV_TURN_TIMEOUT_S="${FREECIV_TURN_TIMEOUT_S:-120}"
|
| 8 |
+
export FREECIV_STARTUP_TIMEOUT_S="${FREECIV_STARTUP_TIMEOUT_S:-180}"
|
| 9 |
+
export ENABLE_WEB_INTERFACE="${ENABLE_WEB_INTERFACE:-true}"
|
| 10 |
+
|
| 11 |
+
log_file=/tmp/start_space.log
|
| 12 |
+
: > "$log_file"
|
| 13 |
+
|
| 14 |
+
log() {
|
| 15 |
+
local line
|
| 16 |
+
line="[$(date -Iseconds)] $*"
|
| 17 |
+
echo "$line" | tee -a "$log_file" >&2
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
service_status() {
|
| 21 |
+
local name url
|
| 22 |
+
name="$1"
|
| 23 |
+
url="$2"
|
| 24 |
+
if curl -fsS --max-time 2 "$url" >/dev/null 2>&1; then
|
| 25 |
+
echo "$name=up"
|
| 26 |
+
else
|
| 27 |
+
echo "$name=down"
|
| 28 |
+
fi
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
wait_for_runtime() {
|
| 32 |
+
local deadline status nginx publite2 tomcat
|
| 33 |
+
deadline=$(( $(date +%s) + FREECIV_STARTUP_TIMEOUT_S ))
|
| 34 |
+
while true; do
|
| 35 |
+
nginx=$(service_status nginx http://127.0.0.1/)
|
| 36 |
+
publite2=$(service_status publite2 http://127.0.0.1/pubstatus)
|
| 37 |
+
tomcat=$(service_status tomcat http://127.0.0.1:8080/freeciv-web/)
|
| 38 |
+
status="$nginx $publite2 $tomcat"
|
| 39 |
+
log "$status"
|
| 40 |
+
if [[ "$nginx" == "nginx=up" && "$publite2" == "publite2=up" && "$tomcat" == "tomcat=up" ]]; then
|
| 41 |
+
return 0
|
| 42 |
+
fi
|
| 43 |
+
if (( $(date +%s) >= deadline )); then
|
| 44 |
+
log "freeciv runtime failed to become ready before timeout=${FREECIV_STARTUP_TIMEOUT_S}s"
|
| 45 |
+
return 1
|
| 46 |
+
fi
|
| 47 |
+
sleep 2
|
| 48 |
+
done
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
log "start_space.sh boot"
|
| 52 |
+
wait_for_runtime
|
| 53 |
+
log "freeciv runtime ready; starting uvicorn"
|
| 54 |
+
exec python -m uvicorn server.app:app --host 0.0.0.0 --port 8000 --ws-ping-interval 300 --ws-ping-timeout 300
|