Spaces:

qpluslab
/

OpenRA-Bench

Running

yxc20098 commited on May 21

Commit

9063f2b

1 Parent(s): aa68464

Vendor the necessary openra_rl_training + openra_env modules

Makes the bench self-contained for Python code — no external
OpenRA-RL-Training / OpenRA-RL checkouts needed (addresses the other
half of PR #12's concern, the correct way: faithful frozen copies, not
stubs).

Vendored verbatim (sha-verified identical to source) — only the
modules the bench actually uses:

openra_rl_training/
scenario.py — ScenarioDefinition, VALID_ACTOR_TYPES
training/reward_funcs.py — DEFAULT_REWARD_WEIGHTS
training/rust_env_pool.py — RustEnvPool
training/minimap_renderer.py — render_minimap (terrain minimap)
openra_env/
game_data.py — RA_UNITS / RA_BUILDINGS

openra_env/__init__.py deliberately does NOT pull client/models — the
only reference to those is example code inside a docstring; the bench
never imports the legacy gRPC client at runtime.

requirements.txt: add the now-needed deps (pydantic, pyyaml, pillow,
numpy, matplotlib). VENDOR.md documents provenance + the re-vendor
procedure. The Rust engine (openra_train) still builds from OpenRA-Rust
via maturin — a compiled extension can't be vendored as source.

Verified: the vendored packages resolve to the in-repo copies (not the
sibling checkouts) and DEFAULT_REWARD_WEIGHTS.outcome == 0.5 (the real
value; PR #12's stub had a wrong 0.2).

Files changed (10) hide show

VENDOR.md +25 -0
openra_env/__init__.py +7 -0
openra_env/game_data.py +984 -0
openra_rl_training/__init__.py +9 -0
openra_rl_training/scenario.py +401 -0
openra_rl_training/training/__init__.py +1 -0
openra_rl_training/training/minimap_renderer.py +333 -0
openra_rl_training/training/reward_funcs.py +264 -0
openra_rl_training/training/rust_env_pool.py +173 -0
requirements.txt +7 -0

VENDOR.md ADDED Viewed

	@@ -0,0 +1,25 @@

+# Vendored dependencies
+OpenRA-Bench vendors a small, faithful subset of two sibling repos so the
+benchmark runs without external source checkouts, and so the evaluation
+stack is frozen for reproducibility.
+## `openra_rl_training/` — from OpenRA-RL-Training
+- `scenario.py` — `ScenarioDefinition`, `VALID_ACTOR_TYPES`
+- `training/reward_funcs.py` — `DEFAULT_REWARD_WEIGHTS` (composite scorer)
+- `training/rust_env_pool.py` — `RustEnvPool` (wraps the engine)
+- `training/minimap_renderer.py` — `render_minimap` (terrain minimap)
+## `openra_env/` — from OpenRA-RL
+- `game_data.py` — `RA_UNITS` / `RA_BUILDINGS` (consumed by `scenario.py`)
+These are **verbatim copies** — do not hand-edit. To update: re-copy from
+the source repos and re-run the full suite (`pytest tests/`).
+## NOT vendored
+- **`openra_train`** — the Rust engine, a compiled extension. Build it
+  from the OpenRA-Rust repo: `maturin develop --release`. It cannot be
+  vendored as source.
+- **`openra_env.client` / `openra_env.models`** — the legacy gRPC client.
+  The bench never imports them at runtime (the only reference is example
+  code inside a docstring), so they are intentionally left out.

openra_env/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""Vendored subset of OpenRA-RL's `openra_env` — see VENDOR.md.
+Only `game_data` (RA_UNITS / RA_BUILDINGS, consumed by the vendored
+`openra_rl_training.scenario`) is vendored. The gRPC `client` / `models`
+are deliberately NOT vendored — the bench never imports them at runtime
+(the one reference is example code inside a docstring).
+"""

openra_env/game_data.py ADDED Viewed

	@@ -0,0 +1,984 @@

+"""Static Red Alert mod data for game knowledge tools.
+Provides unit stats, building stats, tech tree, and faction information
+extracted from OpenRA Red Alert mod rules. This gives an LLM agent the same
+reference knowledge a human player would have from experience.
+"""
+from typing import Optional
+# ─── Unit Data ────────────────────────────────────────────────────────────────
+RA_UNITS: dict[str, dict] = {
+    # Infantry
+    "e1": {
+        "name": "Rifle Infantry",
+        "category": "infantry",
+        "cost": 100,
+        "hp": 5000,
+        "speed": 56,
+        "armor": "none",
+        "side": "both",
+        "prerequisites": ["barr|tent"],
+        "description": "Basic infantry unit. Cheap and fast to produce.",
+    },
+    "e2": {
+        "name": "Grenadier",
+        "category": "infantry",
+        "cost": 150,
+        "hp": 5000,
+        "speed": 56,
+        "armor": "none",
+        "side": "both",
+        "prerequisites": ["barr|tent"],
+        "description": "Anti-structure infantry. Grenades deal area damage.",
+    },
+    "e3": {
+        "name": "Rocket Soldier",
+        "category": "infantry",
+        "cost": 300,
+        "hp": 4500,
+        "speed": 56,
+        "armor": "none",
+        "side": "both",
+        "prerequisites": ["barr|tent"],
+        "description": "Anti-armor and anti-air infantry.",
+    },
+    "e4": {
+        "name": "Flamethrower",
+        "category": "infantry",
+        "cost": 300,
+        "hp": 4000,
+        "speed": 56,
+        "armor": "none",
+        "side": "soviet",
+        "prerequisites": ["barr", "ftur"],
+        "description": "Short-range anti-infantry/structure. Soviet only.",
+    },
+    "e6": {
+        "name": "Engineer",
+        "category": "infantry",
+        "cost": 400,
+        "hp": 4000,
+        "speed": 56,
+        "armor": "none",
+        "side": "both",
+        "prerequisites": ["barr|tent"],
+        "description": "Captures enemy buildings. Cannot attack.",
+    },
+    "e7": {
+        "name": "Tanya",
+        "category": "infantry",
+        "cost": 1800,
+        "hp": 10000,
+        "speed": 68,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent", "atek"],
+        "build_limit": 1,
+        "description": "Elite commando. Destroys buildings with C4, kills infantry instantly. Allied only.",
+    },
+    "medi": {
+        "name": "Medic",
+        "category": "infantry",
+        "cost": 200,
+        "hp": 6000,
+        "speed": 49,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent"],
+        "description": "Heals nearby infantry. Cannot attack.",
+    },
+    "mech": {
+        "name": "Mechanic",
+        "category": "infantry",
+        "cost": 500,
+        "hp": 8000,
+        "speed": 49,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent", "fix"],
+        "description": "Repairs nearby vehicles. Cannot attack.",
+    },
+    "spy": {
+        "name": "Spy",
+        "category": "infantry",
+        "cost": 500,
+        "hp": 2500,
+        "speed": 56,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent", "dome"],
+        "description": "Disguises as enemy infantry. Infiltrates buildings for bonuses.",
+    },
+    "thf": {
+        "name": "Thief",
+        "category": "infantry",
+        "cost": 500,
+        "hp": 5000,
+        "speed": 68,
+        "armor": "none",
+        "side": "allied",
+        "prerequisites": ["tent", "dome"],
+        "description": "Steals credits from enemy refineries.",
+    },
+    "shok": {
+        "name": "Shock Trooper",
+        "category": "infantry",
+        "cost": 350,
+        "hp": 5000,
+        "speed": 49,
+        "armor": "none",
+        "side": "soviet",
+        "prerequisites": ["barr", "stek", "tsla"],
+        "description": "Tesla infantry. High damage vs all targets. Soviet only.",
+    },
+    "dog": {
+        "name": "Attack Dog",
+        "category": "infantry",
+        "cost": 200,
+        "hp": 2000,
+        "speed": 99,
+        "armor": "none",
+        "side": "soviet",
+        "prerequisites": ["kenn"],
+        "description": "Fast anti-infantry unit. Kills spies. Soviet only.",
+    },
+    # Vehicles
+    "1tnk": {
+        "name": "Light Tank",
+        "category": "vehicle",
+        "cost": 700,
+        "hp": 23000,
+        "speed": 113,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["weap"],
+        "description": "Fast medium tank. Good all-around. Allied only.",
+    },
+    "2tnk": {
+        "name": "Medium Tank",
+        "category": "vehicle",
+        "cost": 850,
+        "hp": 30000,
+        "speed": 72,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["weap", "fix"],
+        "description": "Main battle tank. Balanced stats. Allied only. Requires Repair Facility.",
+    },
+    "3tnk": {
+        "name": "Heavy Tank",
+        "category": "vehicle",
+        "cost": 1150,
+        "hp": 46000,
+        "speed": 64,
+        "armor": "heavy",
+        "side": "soviet",
+        "prerequisites": ["weap", "fix"],
+        "description": "Powerful main battle tank. Dual cannons. Soviet only. Requires Repair Facility.",
+    },
+    "4tnk": {
+        "name": "Mammoth Tank",
+        "category": "vehicle",
+        "cost": 2000,
+        "hp": 60000,
+        "speed": 43,
+        "armor": "heavy",
+        "side": "soviet",
+        "prerequisites": ["weap", "fix", "stek"],
+        "description": "Heaviest tank. Dual cannons + missiles. Self-healing. Soviet only.",
+    },
+    "v2rl": {
+        "name": "V2 Rocket Launcher",
+        "category": "vehicle",
+        "cost": 900,
+        "hp": 15000,
+        "speed": 72,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["weap", "dome"],
+        "description": "Long-range artillery. High damage, inaccurate. Soviet only.",
+    },
+    "jeep": {
+        "name": "Ranger",
+        "category": "vehicle",
+        "cost": 500,
+        "hp": 15000,
+        "speed": 164,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["weap"],
+        "description": "Fast scout vehicle with machine gun. Allied only.",
+    },
+    "apc": {
+        "name": "APC",
+        "category": "vehicle",
+        "cost": 850,
+        "hp": 20000,
+        "speed": 128,
+        "armor": "heavy",
+        "side": "soviet",
+        "prerequisites": ["weap"],
+        "description": "Armored troop transport. Carries 5 infantry. Soviet only.",
+    },
+    "arty": {
+        "name": "Artillery",
+        "category": "vehicle",
+        "cost": 850,
+        "hp": 7500,
+        "speed": 54,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["weap", "dome"],
+        "description": "Long-range siege weapon. Allied only.",
+    },
+    "harv": {
+        "name": "Ore Truck",
+        "category": "vehicle",
+        "cost": 1100,
+        "hp": 60000,
+        "speed": 72,
+        "armor": "heavy",
+        "side": "both",
+        "prerequisites": ["proc"],
+        "description": "Harvests ore and delivers to refinery. Free with refinery.",
+    },
+    "mcv": {
+        "name": "MCV",
+        "category": "vehicle",
+        "cost": 2000,
+        "hp": 60000,
+        "speed": 60,
+        "armor": "light",
+        "side": "both",
+        "prerequisites": ["weap", "fix"],
+        "description": "Deploys into Construction Yard. Mobile base.",
+    },
+    "ftrk": {
+        "name": "Flak Truck",
+        "category": "vehicle",
+        "cost": 600,
+        "hp": 15000,
+        "speed": 113,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["weap"],
+        "description": "Mobile anti-air unit. Soviet only.",
+    },
+    "mnly": {
+        "name": "Minelayer",
+        "category": "vehicle",
+        "cost": 800,
+        "hp": 30000,
+        "speed": 113,
+        "armor": "heavy",
+        "side": "both",
+        "prerequisites": ["weap", "fix"],
+        "description": "Lays anti-tank mines.",
+    },
+    "ttnk": {
+        "name": "Tesla Tank",
+        "category": "vehicle",
+        "cost": 1350,
+        "hp": 30000,
+        "speed": 92,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["weap", "stek", "tsla"],
+        "description": "Tesla weapon on tracks. Effective vs all targets. Soviet only.",
+    },
+    "ctnk": {
+        "name": "Chrono Tank",
+        "category": "vehicle",
+        "cost": 1350,
+        "hp": 20000,
+        "speed": 86,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["weap", "atek"],
+        "description": "Teleporting tank. Hit and run tactics. Allied only.",
+    },
+    "stnk": {
+        "name": "Phase Transport",
+        "category": "vehicle",
+        "cost": 1000,
+        "hp": 11000,
+        "speed": 128,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["weap", "atek"],
+        "description": "Cloaked APC. Invisible when not firing. Allied only.",
+    },
+    "qtnk": {
+        "name": "MAD Tank",
+        "category": "vehicle",
+        "cost": 2000,
+        "hp": 22000,
+        "speed": 46,
+        "armor": "heavy",
+        "side": "soviet",
+        "prerequisites": ["weap", "stek"],
+        "description": "Deploys seismic charge, destroying self and nearby vehicles. Soviet only.",
+    },
+    "dtrk": {
+        "name": "Demolition Truck",
+        "category": "vehicle",
+        "cost": 2500,
+        "hp": 11000,
+        "speed": 113,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["weap", "stek"],
+        "description": "Suicide vehicle. Massive area nuclear explosion on death. Soviet only.",
+    },
+    "mgg": {
+        "name": "Mobile Gap Generator",
+        "category": "vehicle",
+        "cost": 1000,
+        "hp": 11000,
+        "speed": 72,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["weap", "atek"],
+        "description": "Creates mobile shroud area. Allied only.",
+    },
+    "mrj": {
+        "name": "Mobile Radar Jammer",
+        "category": "vehicle",
+        "cost": 1000,
+        "hp": 11000,
+        "speed": 68,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["weap", "atek"],
+        "description": "Jams enemy radar in area. Allied only.",
+    },
+    "truk": {
+        "name": "Supply Truck",
+        "category": "vehicle",
+        "cost": 500,
+        "hp": 11000,
+        "speed": 113,
+        "armor": "light",
+        "side": "both",
+        "prerequisites": ["weap"],
+        "description": "Delivers cash when reaching allied structures.",
+    },
+    # Aircraft
+    "heli": {
+        "name": "Longbow",
+        "category": "aircraft",
+        "cost": 2000,
+        "hp": 12000,
+        "speed": 149,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["hpad"],
+        "description": "Anti-armor helicopter with missiles. Allied only.",
+    },
+    "hind": {
+        "name": "Hind",
+        "category": "aircraft",
+        "cost": 1500,
+        "hp": 12000,
+        "speed": 112,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["afld"],
+        "description": "Anti-ground attack helicopter. Soviet only.",
+    },
+    "mh60": {
+        "name": "Black Hawk",
+        "category": "aircraft",
+        "cost": 1500,
+        "hp": 12000,
+        "speed": 112,
+        "armor": "light",
+        "side": "allied",
+        "prerequisites": ["hpad"],
+        "description": "Transport/attack helicopter. Allied only.",
+    },
+    "tran": {
+        "name": "Chinook",
+        "category": "aircraft",
+        "cost": 900,
+        "hp": 14000,
+        "speed": 128,
+        "armor": "light",
+        "side": "both",
+        "prerequisites": ["hpad|afld"],
+        "description": "Transport helicopter. Carries 5 infantry.",
+    },
+    "yak": {
+        "name": "Yak",
+        "category": "aircraft",
+        "cost": 1350,
+        "hp": 6000,
+        "speed": 178,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["afld"],
+        "description": "Fast anti-infantry attack plane. Soviet only.",
+    },
+    "mig": {
+        "name": "MiG",
+        "category": "aircraft",
+        "cost": 2000,
+        "hp": 8000,
+        "speed": 223,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["afld", "stek"],
+        "description": "Anti-structure/armor attack plane with missiles. Soviet only.",
+    },
+    # Ships
+    "ss": {
+        "name": "Submarine",
+        "category": "ship",
+        "cost": 950,
+        "hp": 25000,
+        "speed": 78,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["spen"],
+        "description": "Invisible anti-ship unit. Soviet only.",
+    },
+    "dd": {
+        "name": "Destroyer",
+        "category": "ship",
+        "cost": 1000,
+        "hp": 40000,
+        "speed": 92,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["syrd", "dome"],
+        "description": "Multi-role warship. Anti-sub, anti-air, anti-surface. Allied only.",
+    },
+    "ca": {
+        "name": "Cruiser",
+        "category": "ship",
+        "cost": 2400,
+        "hp": 80000,
+        "speed": 44,
+        "armor": "heavy",
+        "side": "allied",
+        "prerequisites": ["syrd", "atek"],
+        "description": "Heavy bombardment ship. Long range. Allied only.",
+    },
+    "pt": {
+        "name": "Gunboat",
+        "category": "ship",
+        "cost": 500,
+        "hp": 20000,
+        "speed": 142,
+        "armor": "heavy",
+        "side": "both",
+        "prerequisites": ["syrd|spen"],
+        "description": "Fast patrol boat.",
+    },
+    "lst": {
+        "name": "Transport",
+        "category": "ship",
+        "cost": 500,
+        "hp": 40000,
+        "speed": 115,
+        "armor": "heavy",
+        "side": "both",
+        "prerequisites": ["syrd|spen"],
+        "description": "Naval transport. Carries vehicles and infantry.",
+    },
+    "msub": {
+        "name": "Missile Submarine",
+        "category": "ship",
+        "cost": 2000,
+        "hp": 40000,
+        "speed": 44,
+        "armor": "light",
+        "side": "soviet",
+        "prerequisites": ["spen", "stek"],
+        "description": "Long-range missile submarine. Soviet only.",
+    },
+}
+# ─── Building Data ────────────────────────────────────────────────────────────
+RA_BUILDINGS: dict[str, dict] = {
+    "fact": {
+        "name": "Construction Yard",
+        "cost": 2000,
+        "hp": 150000,
+        "power": 0,
+        "side": "both",
+        "prerequisites": [],
+        "produces": ["Building", "Defense"],
+        "description": "Primary base structure. Required to build other structures.",
+    },
+    "powr": {
+        "name": "Power Plant",
+        "cost": 300,
+        "hp": 40000,
+        "power": 100,
+        "side": "both",
+        "prerequisites": [],
+        "produces": [],
+        "description": "Basic power supply. Most structures need power to function.",
+    },
+    "apwr": {
+        "name": "Advanced Power Plant",
+        "cost": 500,
+        "hp": 70000,
+        "power": 200,
+        "side": "both",
+        "prerequisites": ["dome"],
+        "produces": [],
+        "description": "Double power output. Requires radar dome tech.",
+    },
+    "barr": {
+        "name": "Soviet Barracks",
+        "cost": 500,
+        "hp": 60000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["powr"],
+        "produces": ["Infantry"],
+        "description": "Soviet infantry production. Required for all Soviet infantry.",
+    },
+    "tent": {
+        "name": "Allied Barracks",
+        "cost": 500,
+        "hp": 60000,
+        "power": -20,
+        "side": "allied",
+        "prerequisites": ["powr"],
+        "produces": ["Infantry"],
+        "description": "Allied infantry production. Required for all Allied infantry.",
+    },
+    "proc": {
+        "name": "Ore Refinery",
+        "cost": 1400,
+        "hp": 90000,
+        "power": -30,
+        "side": "both",
+        "prerequisites": ["powr"],
+        "produces": [],
+        "description": "Processes ore into credits. Comes with a free Ore Truck.",
+    },
+    "weap": {
+        "name": "War Factory",
+        "cost": 2000,
+        "hp": 150000,
+        "power": -30,
+        "side": "both",
+        "prerequisites": ["proc"],
+        "produces": ["Vehicle"],
+        "description": "Vehicle production facility. Required for all vehicles.",
+    },
+    "dome": {
+        "name": "Radar Dome",
+        "cost": 1500,
+        "hp": 100000,
+        "power": -40,
+        "side": "both",
+        "prerequisites": ["proc"],
+        "produces": [],
+        "description": "Provides minimap radar. Unlocks advanced tech.",
+    },
+    "fix": {
+        "name": "Service Depot",
+        "cost": 1200,
+        "hp": 80000,
+        "power": -30,
+        "side": "both",
+        "prerequisites": ["weap"],
+        "produces": [],
+        "description": "Repairs vehicles. Unlocks MCV and Minelayer.",
+    },
+    "atek": {
+        "name": "Allied Tech Center",
+        "cost": 1500,
+        "hp": 60000,
+        "power": -200,
+        "side": "allied",
+        "prerequisites": ["dome", "weap"],
+        "produces": [],
+        "description": "Unlocks advanced Allied units. GPS satellite.",
+    },
+    "stek": {
+        "name": "Soviet Tech Center",
+        "cost": 1500,
+        "hp": 80000,
+        "power": -100,
+        "side": "soviet",
+        "prerequisites": ["dome", "weap"],
+        "produces": [],
+        "description": "Unlocks advanced Soviet units.",
+    },
+    "hpad": {
+        "name": "Helipad",
+        "cost": 500,
+        "hp": 80000,
+        "power": -10,
+        "side": "allied",
+        "prerequisites": ["dome"],
+        "produces": ["Aircraft"],
+        "description": "Allied aircraft production. Rearming pad.",
+    },
+    "afld": {
+        "name": "Airfield",
+        "cost": 500,
+        "hp": 100000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["dome"],
+        "produces": ["Aircraft"],
+        "description": "Soviet aircraft production. Rearming strip.",
+    },
+    "spen": {
+        "name": "Sub Pen",
+        "cost": 800,
+        "hp": 100000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["powr"],
+        "produces": ["Ship"],
+        "terrain": "water",
+        "description": "Soviet naval production. Repairs ships. REQUIRES WATER — cannot build on land maps.",
+    },
+    "syrd": {
+        "name": "Naval Yard",
+        "cost": 1000,
+        "hp": 100000,
+        "power": -20,
+        "side": "allied",
+        "prerequisites": ["powr"],
+        "produces": ["Ship"],
+        "terrain": "water",
+        "description": "Allied naval production. Repairs ships. REQUIRES WATER — cannot build on land maps.",
+    },
+    "silo": {
+        "name": "Ore Silo",
+        "cost": 150,
+        "hp": 30000,
+        "power": -10,
+        "side": "both",
+        "prerequisites": ["proc"],
+        "produces": [],
+        "description": "Additional ore storage capacity.",
+    },
+    "kenn": {
+        "name": "Kennel",
+        "cost": 200,
+        "hp": 30000,
+        "power": -10,
+        "side": "soviet",
+        "prerequisites": ["powr"],
+        "produces": ["Infantry"],
+        "description": "Produces attack dogs. Soviet only.",
+    },
+    # Defenses
+    "pbox": {
+        "name": "Pillbox",
+        "cost": 600,
+        "hp": 40000,
+        "power": 0,
+        "side": "allied",
+        "prerequisites": ["tent"],
+        "produces": [],
+        "description": "Anti-infantry defense turret. Allied only.",
+    },
+    "hbox": {
+        "name": "Camo Pillbox",
+        "cost": 750,
+        "hp": 40000,
+        "power": 0,
+        "side": "allied",
+        "prerequisites": ["tent"],
+        "produces": [],
+        "description": "Hidden anti-infantry defense. Allied only.",
+    },
+    "gun": {
+        "name": "Turret",
+        "cost": 800,
+        "hp": 40000,
+        "power": -20,
+        "side": "allied",
+        "prerequisites": ["weap"],
+        "produces": [],
+        "description": "Anti-armor defense turret. Allied only.",
+    },
+    "ftur": {
+        "name": "Flame Tower",
+        "cost": 600,
+        "hp": 40000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["barr"],
+        "produces": [],
+        "description": "Short-range anti-infantry defense. Soviet only.",
+    },
+    "tsla": {
+        "name": "Tesla Coil",
+        "cost": 1200,
+        "hp": 40000,
+        "power": -75,
+        "side": "soviet",
+        "prerequisites": ["weap"],
+        "produces": [],
+        "description": "Powerful anti-ground defense. High power cost. Soviet only.",
+    },
+    "agun": {
+        "name": "AA Gun",
+        "cost": 800,
+        "hp": 40000,
+        "power": -50,
+        "side": "allied",
+        "prerequisites": ["dome"],
+        "produces": [],
+        "description": "Anti-air defense turret. Allied only.",
+    },
+    "sam": {
+        "name": "SAM Site",
+        "cost": 700,
+        "hp": 40000,
+        "power": -20,
+        "side": "soviet",
+        "prerequisites": ["dome"],
+        "produces": [],
+        "description": "Anti-air missile defense. Soviet only.",
+    },
+    "gap": {
+        "name": "Gap Generator",
+        "cost": 800,
+        "hp": 50000,
+        "power": -60,
+        "side": "allied",
+        "prerequisites": ["atek"],
+        "produces": [],
+        "description": "Creates shroud area over your base. Allied only.",
+    },
+    # Superweapons
+    "iron": {
+        "name": "Iron Curtain",
+        "cost": 2000,
+        "hp": 100000,
+        "power": -200,
+        "side": "soviet",
+        "prerequisites": ["stek"],
+        "produces": [],
+        "build_limit": 1,
+        "description": "Superweapon: Makes one unit/building invulnerable temporarily.",
+    },
+    "pdox": {
+        "name": "Chronosphere",
+        "cost": 1500,
+        "hp": 100000,
+        "power": -200,
+        "side": "allied",
+        "prerequisites": ["atek"],
+        "produces": [],
+        "build_limit": 1,
+        "description": "Superweapon: Teleports units across the map.",
+    },
+    "mslo": {
+        "name": "Missile Silo",
+        "cost": 2500,
+        "hp": 100000,
+        "power": -150,
+        "side": "soviet",
+        "prerequisites": ["stek"],
+        "produces": [],
+        "build_limit": 1,
+        "description": "Superweapon: Launches nuclear missile at target location.",
+    },
+}
+# ─── Tech Tree ────────────────────────────────────────────────────────────────
+RA_TECH_TREE: dict[str, list[str]] = {
+    "soviet": [
+        "powr",     # Power Plant (base)
+        "barr",     # Barracks → infantry (requires powr)
+        "kenn",     # Kennel → dogs (requires powr)
+        "proc",     # Ore Refinery (requires powr)
+        "weap",     # War Factory (requires proc)
+        "spen",     # Sub Pen (requires powr, needs water)
+        "dome",     # Radar Dome (requires proc)
+        "fix",      # Service Depot (requires weap)
+        "afld",     # Airfield (requires dome)
+        "stek",     # Tech Center (requires dome + weap)
+        "tsla",     # Tesla Coil (requires weap)
+        "sam",      # SAM Site (requires dome)
+        "ftur",     # Flame Tower (requires barr)
+        "iron",     # Iron Curtain (requires stek)
+        "mslo",     # Missile Silo (requires stek)
+    ],
+    "allied": [
+        "powr",     # Power Plant (base)
+        "tent",     # Barracks → infantry (requires powr)
+        "proc",     # Ore Refinery (requires powr)
+        "weap",     # War Factory (requires proc)
+        "syrd",     # Naval Yard (requires powr, needs water)
+        "dome",     # Radar Dome (requires proc)
+        "fix",      # Service Depot (requires weap)
+        "hpad",     # Helipad (requires dome)
+        "atek",     # Tech Center (requires dome + weap)
+        "gun",      # Turret (requires weap)
+        "pbox",     # Pillbox (requires tent)
+        "agun",     # AA Gun (requires dome)
+        "gap",      # Gap Generator (requires atek)
+        "pdox",     # Chronosphere (requires atek)
+    ],
+}
+# ─── Faction Data ─────────────────────────────────────────────────────────────
+RA_FACTIONS: dict[str, dict] = {
+    "england": {
+        "side": "allied",
+        "display_name": "England",
+        "unique_units": [],
+        "description": "Standard Allied faction.",
+    },
+    "france": {
+        "side": "allied",
+        "display_name": "France",
+        "unique_units": ["stnk"],
+        "description": "Allied faction with Phase Transport (cloaked APC).",
+    },
+    "germany": {
+        "side": "allied",
+        "display_name": "Germany",
+        "unique_units": ["ctnk"],
+        "description": "Allied faction with Chrono Tank (teleporting tank).",
+    },
+    "russia": {
+        "side": "soviet",
+        "display_name": "Russia",
+        "unique_units": ["ttnk"],
+        "description": "Soviet faction with Tesla Tank.",
+    },
+    "ukraine": {
+        "side": "soviet",
+        "display_name": "Ukraine",
+        "unique_units": ["dtrk"],
+        "description": "Soviet faction with Demolition Truck (nuclear suicide vehicle).",
+    },
+}
+# ─── Query Functions ──────────────────────────────────────────────────────────
+def get_unit_stats(unit_type: str) -> Optional[dict]:
+    """Get stats for a unit type. Returns None if not found."""
+    return RA_UNITS.get(unit_type.lower())
+def get_building_stats(building_type: str) -> Optional[dict]:
+    """Get stats for a building type. Returns None if not found."""
+    return RA_BUILDINGS.get(building_type.lower())
+def get_tech_tree(faction: Optional[str] = None) -> dict:
+    """Get the tech tree build order.
+    Args:
+        faction: Faction name (e.g., 'russia') or side ('allied', 'soviet').
+                If None, returns both sides.
+    """
+    if faction is None:
+        return RA_TECH_TREE
+    # Map faction to side
+    side = faction.lower()
+    if side in RA_FACTIONS:
+        side = RA_FACTIONS[side]["side"]
+    if side in RA_TECH_TREE:
+        return {side: RA_TECH_TREE[side]}
+    return {}
+def get_faction_info(faction: str) -> Optional[dict]:
+    """Get faction info including available units and buildings."""
+    faction = faction.lower()
+    info = RA_FACTIONS.get(faction)
+    if info is None:
+        return None
+    side = info["side"]
+    # Collect units available to this faction
+    available_units = []
+    for unit_type, data in RA_UNITS.items():
+        unit_side = data.get("side", "")
+        if unit_side == "both" or unit_side == side:
+            available_units.append(unit_type)
+    # Add faction-unique units
+    for u in info.get("unique_units", []):
+        if u not in available_units and u in RA_UNITS:
+            available_units.append(u)
+    # Collect buildings
+    available_buildings = []
+    for bldg_type, data in RA_BUILDINGS.items():
+        bldg_side = data.get("side", "")
+        if bldg_side == "both" or bldg_side == side:
+            available_buildings.append(bldg_type)
+    return {
+        **info,
+        "faction": faction,
+        "available_units": sorted(available_units),
+        "available_buildings": sorted(available_buildings),
+    }
+def get_all_unit_types() -> list[str]:
+    """Get all available unit type names."""
+    return sorted(RA_UNITS.keys())
+def get_all_building_types() -> list[str]:
+    """Get all available building type names."""
+    return sorted(RA_BUILDINGS.keys())
+def get_all_units_for_side(side: str) -> dict[str, dict]:
+    """Get all units available to a side ('allied' or 'soviet') with full stats.
+    Returns dict keyed by unit type name, each value is the full stats dict.
+    Includes units with side='both' plus units specific to the given side.
+    """
+    side = side.lower()
+    return {
+        utype: dict(data)
+        for utype, data in RA_UNITS.items()
+        if data.get("side") in (side, "both")
+    }
+def get_all_buildings_for_side(side: str) -> dict[str, dict]:
+    """Get all buildings available to a side ('allied' or 'soviet') with full stats.
+    Returns dict keyed by building type name, each value is the full stats dict.
+    Includes buildings with side='both' plus buildings specific to the given side.
+    """
+    side = side.lower()
+    return {
+        btype: dict(data)
+        for btype, data in RA_BUILDINGS.items()
+        if data.get("side") in (side, "both")
+    }

openra_rl_training/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""Vendored subset of OpenRA-RL-Training — see VENDOR.md.
+Faithful, frozen copies of exactly the modules OpenRA-Bench needs from
+the `openra_rl_training` package, so the bench runs without an external
+OpenRA-RL-Training checkout. Do NOT hand-edit the vendored modules —
+re-vendor from source (and re-run the full suite) if they must change.
+"""
+__version__ = "0.1.0-vendored"

openra_rl_training/scenario.py ADDED Viewed

	@@ -0,0 +1,401 @@

+"""Pydantic models for scenario and curriculum YAML definitions.
+Scenarios define custom starting conditions for RL training episodes:
+units, positions, stances, factions, and termination conditions.
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import Literal, Optional, Union
+import yaml
+from openra_env.game_data import RA_BUILDINGS, RA_UNITS
+from pydantic import BaseModel, Field, field_validator, model_validator
+logger = logging.getLogger(__name__)
+# All valid actor types that can be placed on maps
+VALID_ACTOR_TYPES = set(RA_UNITS.keys()) | set(RA_BUILDINGS.keys())
+# Unit stances matching OpenRA's UnitStance enum
+STANCE_HOLD_FIRE = 0
+STANCE_RETURN_FIRE = 1
+STANCE_DEFEND = 2
+STANCE_ATTACK_ANYTHING = 3
+STANCE_NAMES = {
+    STANCE_HOLD_FIRE: "HoldFire",
+    STANCE_RETURN_FIRE: "ReturnFire",
+    STANCE_DEFEND: "Defend",
+    STANCE_ATTACK_ANYTHING: "AttackAnything",
+}
+# ── Randomization models ─────────────────────────────────────────────────────
+class TypeFilter(BaseModel):
+    """Filter-based type randomization: pick a random unit matching criteria."""
+    category: str = Field(..., description="Unit category: infantry, vehicle, aircraft, ship")
+    side: str = Field(default="both", description="Faction filter: allied, soviet, both")
+    max_cost: Optional[int] = Field(default=None, description="Maximum unit cost")
+    min_cost: Optional[int] = Field(default=None, description="Minimum unit cost")
+    armor: Optional[str] = Field(default=None, description="Armor type: none, light, heavy")
+    @field_validator("category")
+    @classmethod
+    def validate_category(cls, v: str) -> str:
+        v = v.lower()
+        valid = {"infantry", "vehicle", "aircraft", "ship"}
+        if v not in valid:
+            raise ValueError(f"category must be one of {sorted(valid)}, got '{v}'")
+        return v
+    @field_validator("side")
+    @classmethod
+    def validate_side(cls, v: str) -> str:
+        v = v.lower()
+        valid = {"allied", "soviet", "both"}
+        if v not in valid:
+            raise ValueError(f"side must be one of {sorted(valid)}, got '{v}'")
+        return v
+    @field_validator("armor")
+    @classmethod
+    def validate_armor(cls, v: Optional[str]) -> Optional[str]:
+        if v is not None:
+            v = v.lower()
+            valid = {"none", "light", "heavy"}
+            if v not in valid:
+                raise ValueError(f"armor must be one of {sorted(valid)}, got '{v}'")
+        return v
+    def matching_types(self) -> list[str]:
+        """Return all RA_UNITS keys matching this filter."""
+        results = []
+        for utype, data in RA_UNITS.items():
+            if data.get("category") != self.category:
+                continue
+            unit_side = data.get("side", "both")
+            if self.side != "both" and unit_side not in (self.side, "both"):
+                continue
+            cost = data.get("cost", 0)
+            if self.max_cost is not None and cost > self.max_cost:
+                continue
+            if self.min_cost is not None and cost < self.min_cost:
+                continue
+            if self.armor is not None and data.get("armor") != self.armor:
+                continue
+            results.append(utype)
+        return sorted(results)
+class PositionOffset(BaseModel):
+    """Offset-based position randomization: random within ±offset of base."""
+    base: tuple[int, int] = Field(..., description="Base position [x, y]")
+    offset: int = Field(..., description="Max offset in cells (applies to both x and y)")
+    @field_validator("offset")
+    @classmethod
+    def validate_offset(cls, v: int) -> int:
+        if v < 1 or v > 50:
+            raise ValueError(f"offset must be 1-50, got {v}")
+        return v
+class HealthRange(BaseModel):
+    """Range-based health randomization."""
+    min: int = Field(default=1, description="Minimum health percentage")
+    max: int = Field(default=100, description="Maximum health percentage")
+    @model_validator(mode="after")
+    def validate_range(self) -> "HealthRange":
+        if self.min < 1 or self.max > 100:
+            raise ValueError(f"Health range must be 1-100, got {self.min}-{self.max}")
+        if self.min > self.max:
+            raise ValueError(f"min ({self.min}) must be <= max ({self.max})")
+        return self
+class ActorRandomization(BaseModel):
+    """Per-field randomization options for an actor placement."""
+    type: Optional[Union[list[str], TypeFilter]] = Field(
+        default=None, description="Type alternatives: list of names or category filter"
+    )
+    position: Optional[Union[list[tuple[int, int]], PositionOffset]] = Field(
+        default=None, description="Position alternatives: preset list or offset from base"
+    )
+    stance: Optional[list[int]] = Field(default=None, description="Stance alternatives (0-3)")
+    health: Optional[HealthRange] = Field(default=None, description="Health range {min, max}")
+    facing: Optional[list[int]] = Field(default=None, description="Facing alternatives (0-1023)")
+    @field_validator("type")
+    @classmethod
+    def validate_type_alternatives(cls, v: Optional[Union[list[str], TypeFilter]]):
+        if isinstance(v, list):
+            if not v:
+                raise ValueError("type list must not be empty")
+            for t in v:
+                if t.lower() not in VALID_ACTOR_TYPES:
+                    raise ValueError(f"Unknown actor type in randomize.type: '{t}'")
+        return v
+    @field_validator("position")
+    @classmethod
+    def validate_position_alternatives(
+        cls, v: Optional[Union[list[tuple[int, int]], PositionOffset]]
+    ):
+        if isinstance(v, list) and not v:
+            raise ValueError("position list must not be empty")
+        return v
+    @field_validator("stance")
+    @classmethod
+    def validate_stance_alternatives(cls, v: Optional[list[int]]):
+        if v is not None:
+            if not v:
+                raise ValueError("stance list must not be empty")
+            for s in v:
+                if s < 0 or s > 3:
+                    raise ValueError(f"Stance must be 0-3, got {s}")
+        return v
+    @field_validator("facing")
+    @classmethod
+    def validate_facing_alternatives(cls, v: Optional[list[int]]):
+        if v is not None:
+            if not v:
+                raise ValueError("facing list must not be empty")
+            for f in v:
+                if f < 0 or f > 1023:
+                    raise ValueError(f"Facing must be 0-1023, got {f}")
+        return v
+# ── Core scenario models ─────────────────────────────────────────────────────
+class ActorPlacement(BaseModel):
+    """A unit or building to spawn at game start."""
+    type: str = Field(..., description="Actor type (e.g., '2tnk', 'e1', 'fact')")
+    owner: Literal["agent", "enemy", "neutral"] = Field(
+        default="agent", description="Which player owns this actor"
+    )
+    position: tuple[int, int] = Field(..., description="Cell coordinates [x, y]")
+    stance: int = Field(
+        default=STANCE_ATTACK_ANYTHING,
+        description="0=HoldFire, 1=ReturnFire, 2=Defend, 3=AttackAnything",
+    )
+    health: int = Field(default=100, description="HP percentage 1-100")
+    facing: int = Field(default=-1, description="-1=auto, 0-1023 WAngle")
+    count: int = Field(default=1, description="Spawn N copies with auto-offset positions")
+    spawn_point: Optional[int] = Field(
+        default=None,
+        description="Spawn point group (0-N). If set, only included when this spawn point is selected. "
+                    "None = always included (enemies, neutral).",
+    )
+    randomize: Optional[ActorRandomization] = Field(
+        default=None,
+        description="Per-field randomization options (resolved before map generation)",
+    )
+    @field_validator("type")
+    @classmethod
+    def validate_type(cls, v: str) -> str:
+        v = v.lower()
+        if v not in VALID_ACTOR_TYPES:
+            raise ValueError(
+                f"Unknown actor type '{v}'. "
+                f"Valid units: {sorted(RA_UNITS.keys())[:10]}... "
+                f"Valid buildings: {sorted(RA_BUILDINGS.keys())[:10]}..."
+            )
+        return v
+    @field_validator("stance")
+    @classmethod
+    def validate_stance(cls, v: int) -> int:
+        if v < 0 or v > 3:
+            raise ValueError(f"Stance must be 0-3, got {v}")
+        return v
+    @field_validator("health")
+    @classmethod
+    def validate_health(cls, v: int) -> int:
+        if v < 1 or v > 100:
+            raise ValueError(f"Health must be 1-100, got {v}")
+        return v
+    @field_validator("facing")
+    @classmethod
+    def validate_facing(cls, v: int) -> int:
+        if v != -1 and (v < 0 or v > 1023):
+            raise ValueError(f"Facing must be -1 (auto) or 0-1023, got {v}")
+        return v
+    @field_validator("count")
+    @classmethod
+    def validate_count(cls, v: int) -> int:
+        if v < 1 or v > 50:
+            raise ValueError(f"Count must be 1-50, got {v}")
+        return v
+    @property
+    def is_building(self) -> bool:
+        return self.type in RA_BUILDINGS
+class PlayerSetup(BaseModel):
+    """Configuration for the agent player."""
+    faction: Literal["allies", "soviet", "random"] = Field(
+        default="random", description="Player faction"
+    )
+    cash: int = Field(default=0, description="Starting cash override")
+    @field_validator("cash")
+    @classmethod
+    def validate_cash(cls, v: int) -> int:
+        if v < 0:
+            raise ValueError(f"Cash must be non-negative, got {v}")
+        return v
+class EnemySetup(PlayerSetup):
+    """Configuration for the enemy player."""
+    bot_type: str = Field(
+        default="", description="AI bot type (empty = no AI, stance-only behavior)"
+    )
+class TerminationConfig(BaseModel):
+    """When to end a scenario episode."""
+    max_ticks: int = Field(default=5000, description="Tick limit (0 = unlimited)")
+    max_time: Optional[float] = Field(
+        default=None,
+        description="Time limit in seconds (overrides max_ticks). 25 ticks = 1 second.",
+    )
+    agent_units_killed: bool = Field(
+        default=True, description="End as 'lose' when all agent units destroyed"
+    )
+    enemy_units_killed: bool = Field(
+        default=True, description="End as 'win' when all enemy units/buildings destroyed"
+    )
+    @field_validator("max_ticks")
+    @classmethod
+    def validate_max_ticks(cls, v: int) -> int:
+        if v < 0:
+            raise ValueError(f"max_ticks must be non-negative, got {v}")
+        return v
+    @model_validator(mode="after")
+    def resolve_max_time(self) -> "TerminationConfig":
+        """Convert max_time (seconds) to max_ticks if specified."""
+        if self.max_time is not None:
+            self.max_ticks = int(self.max_time * 25)
+        return self
+class ScenarioDefinition(BaseModel):
+    """Complete scenario definition loaded from YAML."""
+    name: str = Field(..., description="Scenario display name")
+    description: str = Field(default="", description="Human-readable description")
+    base_map: str = Field(default="singles.oramap", description="Base map filename for terrain")
+    agent: PlayerSetup = Field(default_factory=PlayerSetup)
+    enemy: EnemySetup = Field(default_factory=EnemySetup)
+    actors: list[ActorPlacement] = Field(..., description="Units/buildings to spawn")
+    termination: TerminationConfig = Field(default_factory=TerminationConfig)
+    reward: dict[str, float] = Field(default_factory=dict, description="Override reward weights")
+    reward_calibration: dict[str, float] = Field(
+        default_factory=dict,
+        description="Manual overrides for reward calibration constants (auto-computed if empty)",
+    )
+    tools: list[str] = Field(default_factory=list, description="Allowed tool names (empty = all)")
+    interrupts: dict[str, bool] = Field(
+        default_factory=dict,
+        description="Override interrupt signals: signal_name → enabled/disabled. All enabled by default.",
+    )
+    planning: bool = Field(default=False, description="Enable pre-game planning phase")
+    difficulty: int = Field(default=1, description="Difficulty level for ordering")
+    tags: list[str] = Field(default_factory=list, description="Tags for filtering")
+    @field_validator("tools")
+    @classmethod
+    def strip_internal_tools(cls, v: list[str]) -> list[str]:
+        """Remove internal-only tools that the LLM should never call directly."""
+        _INTERNAL_TOOLS = {"get_game_state", "surrender"}
+        return [t for t in v if t not in _INTERNAL_TOOLS]
+    @field_validator("actors")
+    @classmethod
+    def validate_actors_not_empty(cls, v: list[ActorPlacement]) -> list[ActorPlacement]:
+        if not v:
+            raise ValueError("Scenario must have at least one actor")
+        return v
+    @model_validator(mode="after")
+    def validate_has_agent_actor(self) -> "ScenarioDefinition":
+        agent_actors = [a for a in self.actors if a.owner == "agent"]
+        if not agent_actors:
+            raise ValueError("Scenario must have at least one agent-owned actor")
+        return self
+    @property
+    def agent_actors(self) -> list[ActorPlacement]:
+        return [a for a in self.actors if a.owner == "agent"]
+    @property
+    def enemy_actors(self) -> list[ActorPlacement]:
+        return [a for a in self.actors if a.owner == "enemy"]
+    @property
+    def neutral_actors(self) -> list[ActorPlacement]:
+        return [a for a in self.actors if a.owner == "neutral"]
+def load_scenario(path: str | Path) -> ScenarioDefinition:
+    """Load a scenario definition from a YAML file.
+    Args:
+        path: Path to the scenario YAML file.
+    Returns:
+        Parsed and validated ScenarioDefinition.
+    """
+    path = Path(path)
+    if not path.exists():
+        raise FileNotFoundError(f"Scenario file not found: {path}")
+    with open(path) as f:
+        data = yaml.safe_load(f)
+    if data is None:
+        raise ValueError(f"Empty scenario file: {path}")
+    logger.info("Loading scenario '%s' from %s", data.get("name", "?"), path)
+    return ScenarioDefinition.model_validate(data)
+def load_scenario_from_string(yaml_string: str) -> ScenarioDefinition:
+    """Load a scenario definition from a YAML string.
+    Args:
+        yaml_string: YAML content.
+    Returns:
+        Parsed and validated ScenarioDefinition.
+    """
+    data = yaml.safe_load(yaml_string)
+    if data is None:
+        raise ValueError("Empty scenario YAML")
+    return ScenarioDefinition.model_validate(data)

openra_rl_training/training/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Vendored subset of openra_rl_training.training — see VENDOR.md."""

openra_rl_training/training/minimap_renderer.py ADDED Viewed

	@@ -0,0 +1,333 @@

+"""Render a vision minimap for the planning phase.
+Produces a small PNG image (~448x222, ~96 vision tokens) showing:
+- Actual terrain from the base map (map.png)
+- Visibility layers: visible (bright), fog of war (dimmed), unexplored (dark)
+- Own units (cyan circles), enemy units (red circles), enemy buildings (red squares)
+- Coordinate grid and compact legend
+The image is returned as a base64-encoded PNG for injection into the
+OpenAI-compatible vision API (SGLang/vLLM).
+"""
+from __future__ import annotations
+import base64
+import io
+import logging
+import matplotlib
+import numpy as np
+from PIL import Image
+matplotlib.use("Agg")
+# Use Figure() OO API instead of pyplot — pyplot's global figure manager is
+# NOT thread-safe, which prevents off-loading rendering from the event loop.
+from matplotlib.figure import Figure
+from matplotlib.lines import Line2D
+logger = logging.getLogger(__name__)
+# Buildings from OpenRA game data
+BUILDINGS = {
+    "fact", "powr", "apwr", "tent", "barr", "proc", "weap", "dome",
+    "fix", "hpad", "afld", "spen", "syrd", "pbox", "hbox", "gun",
+    "ftur", "tsla", "agun", "sam", "gap", "iron", "mslo", "atek", "stek",
+    "kenn", "silo",
+}
+# Visibility brightness multipliers
+VIS_BRIGHT = 1.0    # Currently visible (unit line of sight)
+VIS_FOG = 0.40       # Previously explored, no current vision
+VIS_UNEXPLORED = 0.08  # Never seen
+# Unit vision radius in cells
+VISION_RADIUS = 10
+# Supersampling: render at 2x, downsample with LANCZOS
+RENDER_SCALE = 2
+TARGET_WIDTH = 448
+def _blur_2d(arr: np.ndarray, sigma: float = 1.5, size: int = 7) -> np.ndarray:
+    """Simple separable gaussian blur without scipy dependency."""
+    x = np.arange(size) - size // 2
+    k = np.exp(-x**2 / (2 * sigma**2))
+    k /= k.sum()
+    r = np.apply_along_axis(lambda row: np.convolve(row, k, mode="same"), 1, arr)
+    return np.apply_along_axis(lambda col: np.convolve(col, k, mode="same"), 0, r)
+def _parse_ascii_minimap(
+    ascii_minimap: str, map_width: int, map_height: int
+) -> np.ndarray:
+    """Parse ASCII minimap to get explored mask at full map resolution.
+    Characters: # = unexplored, everything else = explored.
+    The ASCII grid is downsampled by scale = ceil(map_width / 28).
+    Returns:
+        Boolean array (map_height, map_width) — True = explored.
+    """
+    lines = [l for l in ascii_minimap.strip().split("\n") if l.strip()]
+    # Skip header lines (e.g. "Map (28x14, 1cell=4x4):")
+    grid_lines = []
+    for line in lines:
+        stripped = line.strip()
+        if stripped and all(c in "#.@!X~$B " for c in stripped):
+            grid_lines.append(stripped)
+    if not grid_lines:
+        return np.zeros((map_height, map_width), dtype=bool)
+    grid_h = len(grid_lines)
+    grid_w = max(len(l) for l in grid_lines)
+    scale_x = max(1, map_width // grid_w) if grid_w > 0 else 1
+    scale_y = max(1, map_height // grid_h) if grid_h > 0 else 1
+    explored = np.zeros((map_height, map_width), dtype=bool)
+    for gy, line in enumerate(grid_lines):
+        for gx, ch in enumerate(line):
+            if ch != "#":
+                # Mark the corresponding map cells as explored
+                y0 = gy * scale_y
+                x0 = gx * scale_x
+                y1 = min(y0 + scale_y, map_height)
+                x1 = min(x0 + scale_x, map_width)
+                explored[y0:y1, x0:x1] = True
+    return explored
+def _compute_visible_mask(
+    own_units: list[dict], map_width: int, map_height: int
+) -> np.ndarray:
+    """Compute currently visible cells from own unit positions."""
+    visible = np.zeros((map_height, map_width), dtype=bool)
+    r = VISION_RADIUS
+    for u in own_units:
+        cx = u.get("cell_x", 0)
+        cy = u.get("cell_y", 0)
+        y_lo = max(0, cy - r)
+        y_hi = min(map_height, cy + r + 1)
+        x_lo = max(0, cx - r)
+        x_hi = min(map_width, cx + r + 1)
+        for y in range(y_lo, y_hi):
+            for x in range(x_lo, x_hi):
+                if (x - cx) ** 2 + (y - cy) ** 2 <= r * r:
+                    visible[y, x] = True
+    return visible
+def render_minimap(
+    terrain_png: bytes,
+    map_width: int,
+    map_height: int,
+    bounds_x: int,
+    bounds_y: int,
+    own_units: list[dict],
+    enemy_units: list[dict],
+    ascii_minimap: str,
+    output_width: int = TARGET_WIDTH,
+) -> str | None:
+    """Render a vision minimap and return base64-encoded PNG.
+    Args:
+        terrain_png: Raw bytes of map.png from the .oramap file.
+        map_width: Full map width in cells.
+        map_height: Full map height in cells.
+        bounds_x: Playable area X offset.
+        bounds_y: Playable area Y offset.
+        own_units: List of own unit dicts with cell_x, cell_y, type.
+        enemy_units: List of visible enemy unit dicts with cell_x, cell_y, type.
+        ascii_minimap: ASCII minimap string from game state.
+        output_width: Target image width in pixels.
+    Returns:
+        Base64-encoded PNG string, or None on failure.
+    """
+    try:
+        # Load terrain
+        terrain_img = Image.open(io.BytesIO(terrain_png)).convert("RGB")
+        pw, ph = terrain_img.size  # terrain image pixel dimensions
+        terrain_arr = np.array(terrain_img).astype(float) / 255.0
+        # Compute visibility masks in cell coordinates
+        explored = _parse_ascii_minimap(ascii_minimap, map_width, map_height)
+        visible = _compute_visible_mask(own_units, map_width, map_height)
+        explored |= visible
+        # Use full map (including borders) so terrain boundaries are visible
+        playable_w = min(map_width - bounds_x, map_width)
+        playable_h = min(map_height - bounds_y, map_height)
+        explored_full = explored[:ph, :pw] if explored.shape[0] >= ph and explored.shape[1] >= pw else explored
+        visible_full = visible[:ph, :pw] if visible.shape[0] >= ph and visible.shape[1] >= pw else visible
+        # Resize visibility masks to match terrain image pixel dimensions
+        if explored_full.shape != (ph, pw):
+            explored_full = np.array(Image.fromarray(explored_full).resize((pw, ph), Image.NEAREST))
+            visible_full = np.array(Image.fromarray(visible_full).resize((pw, ph), Image.NEAREST))
+        # Smooth edges
+        explored_s = np.clip(_blur_2d(explored_full.astype(float), sigma=1.5, size=7), 0, 1)
+        visible_s = np.clip(_blur_2d(visible_full.astype(float), sigma=1.5, size=7), 0, 1)
+        # Composite terrain with visibility (vectorized)
+        brightness = VIS_UNEXPLORED * (1 - explored_s) + VIS_FOG * explored_s
+        brightness = brightness * (1 - visible_s) + VIS_BRIGHT * visible_s
+        # Ensure terrain borders (water/cliffs) are always visible — detect by
+        # checking if the terrain pixel is distinctly different from grass.
+        # Water/cliff pixels are blue-ish (high B, low G), grass is green-ish.
+        _is_water = terrain_arr[..., 2] > terrain_arr[..., 1]  # blue > green
+        brightness = np.where(_is_water, np.maximum(brightness, VIS_FOG), brightness)
+        composite = terrain_arr * brightness[..., np.newaxis]
+        # Render with matplotlib at 2x for supersampling
+        render_dpi = 192 * RENDER_SCALE
+        fig_w = 3.5
+        fig_h = fig_w * ph / pw  # maintain aspect ratio
+        # OO API (thread-safe, no global figure manager)
+        fig = Figure(figsize=(fig_w, fig_h), dpi=render_dpi)
+        ax = fig.add_subplot(1, 1, 1)
+        bg = "#0a0a0f"
+        fig.patch.set_facecolor(bg)
+        ax.set_facecolor(bg)
+        ax.imshow(
+            composite,
+            extent=[0, pw, ph, 0],
+            interpolation="bilinear",
+            aspect="auto",
+        )
+        # Grid
+        for x in range(0, map_width + 1, 20):
+            ax.axvline(x, color="white", alpha=0.15, linewidth=0.4)
+        for y in range(0, map_height + 1, 10):
+            ax.axhline(y, color="white", alpha=0.15, linewidth=0.4)
+        # Plot own units — cyan circles with glow.
+        # Halted-unreachable units (unit halted on a bad target — pathfinding
+        # failed repeatedly) get a YELLOW X overlay so the model can spot
+        # them clearly on the minimap and understand they need a new target.
+        for u in own_units:
+            ux, uy = u.get("cell_x", 0), u.get("cell_y", 0)
+            is_halted = bool(u.get("halted_unreachable"))
+            ax.plot(ux, uy, "o", color="#00b8d4", markersize=8, alpha=0.3, zorder=9)
+            ax.plot(
+                ux, uy, "o", color="#00e5ff", markersize=5,
+                markeredgecolor="white", markeredgewidth=0.6, zorder=10,
+            )
+            if is_halted:
+                # Yellow X overlay marking the unit as halted/unreachable.
+                ax.plot(
+                    ux, uy, marker="x", color="#ffe600", markersize=8,
+                    markeredgewidth=1.5, zorder=11,
+                )
+        # Plot enemy units — red circles/squares with glow
+        for u in enemy_units:
+            ux, uy = u.get("cell_x", 0), u.get("cell_y", 0)
+            utype = u.get("type", "").lower()
+            is_bldg = utype in BUILDINGS
+            marker = "s" if is_bldg else "o"
+            ms = 6 if is_bldg else 5
+            ax.plot(
+                ux, uy, marker, color="#ff1744", markersize=ms + 3,
+                alpha=0.25, zorder=9,
+            )
+            ax.plot(
+                ux, uy, marker, color="#ff1744", markersize=ms,
+                markeredgecolor="white", markeredgewidth=0.5, zorder=10,
+            )
+        # Show the FULL map including water/cliff borders — not just playable area.
+        # This lets the model see terrain boundaries clearly.
+        _x_max = map_width
+        _y_max = map_height
+        ax.set_xlim(0, _x_max)
+        ax.set_ylim(_y_max, 0)
+        # Ticks: evenly spaced within playable area + boundary values
+        _xticks = [x for x in range(0, _x_max + 1, 20) if x <= _x_max]
+        if _xticks[-1] != _x_max:
+            _xticks.append(_x_max)
+        _yticks = [y for y in range(0, _y_max + 1, 10) if y <= _y_max]
+        if _yticks[-1] != _y_max:
+            _yticks.append(_y_max)
+        ax.set_xticks(_xticks)
+        ax.set_yticks(_yticks)
+        ax.tick_params(
+            axis="both", colors="#8899aa", labelsize=6,
+            length=2, width=0.4, pad=1,
+        )
+        for spine in ax.spines.values():
+            spine.set_color("#2a3a50")
+            spine.set_linewidth(0.5)
+        # Compact legend — units + terrain
+        legend_elements = [
+            Line2D(
+                [0], [0], marker="o", color="w", markerfacecolor="#00e5ff",
+                markersize=5, label="Own", linestyle="None",
+            ),
+            Line2D(
+                [0], [0], marker="o", color="w", markerfacecolor="#ff1744",
+                markersize=5, label="Enemy", linestyle="None",
+            ),
+            Line2D(
+                [0], [0], marker="x", color="#ffe600", markersize=5,
+                label="Halted", linestyle="None", markeredgewidth=1.5,
+            ),
+            Line2D(
+                [0], [0], marker="s", color="w", markerfacecolor="#50a03c",
+                markersize=5, label="Land", linestyle="None",
+            ),
+            Line2D(
+                [0], [0], marker="s", color="w", markerfacecolor="#1e3c78",
+                markersize=5, label="Water", linestyle="None",
+            ),
+            Line2D(
+                [0], [0], marker="s", color="w", markerfacecolor="#6b5b3a",
+                markersize=5, label="Cliff", linestyle="None",
+            ),
+        ]
+        ax.legend(
+            handles=legend_elements, loc="upper right", fontsize=5,
+            framealpha=0.85, facecolor="#0a0a0f", edgecolor="#2a3a50",
+            labelcolor="#ccddee", handletextpad=0.3, borderpad=0.3,
+            columnspacing=0.6, ncol=6,
+        )
+        fig.tight_layout(pad=0.3)
+        # Render to buffer
+        buf = io.BytesIO()
+        fig.savefig(
+            buf, dpi=render_dpi, bbox_inches="tight",
+            facecolor=bg, pad_inches=0.03, format="png",
+        )
+        # No plt.close needed — Figure is local, no global state to release
+        # LANCZOS downsample to target size
+        buf.seek(0)
+        hi_res = Image.open(buf)
+        scale = output_width / hi_res.width
+        target_h = int(hi_res.height * scale)
+        final = hi_res.resize((output_width, target_h), Image.LANCZOS)
+        # Encode as base64
+        out_buf = io.BytesIO()
+        final.save(out_buf, format="PNG", optimize=True)
+        b64 = base64.b64encode(out_buf.getvalue()).decode("ascii")
+        logger.info(
+            "Rendered minimap: %dx%d, %d bytes, ~%d vision tokens",
+            final.width, final.height,
+            len(out_buf.getvalue()),
+            (final.width * final.height) // (32 * 32),
+        )
+        return b64
+    except Exception as e:
+        logger.warning("Minimap render failed: %s", e)
+        return None

openra_rl_training/training/reward_funcs.py ADDED Viewed

	@@ -0,0 +1,264 @@

+"""GRPO reward functions for OpenRA training.
+Each function receives completions (list[str]) and extra kwargs from rollout_func.
+Returns list[float] rewards, one per completion.
+Per-scenario weighting: When ``scenario_weights`` is present in kwargs
+(a list of dicts, one per completion), each function multiplies its base
+reward by the scenario-specific weight for its signal.  This lets combat-
+focused scenarios boost combat reward while economy scenarios boost economy.
+"""
+from collections import defaultdict
+DEFAULT_REWARD_WEIGHTS: dict[str, float] = {
+    "outcome": 0.50,
+    "combat": 0.15,
+    "economy": 0.10,
+    "tempo": 0.10,
+    "density": 0.00,
+    "format": 0.05,
+    "survival": 0.10,
+    "discovery": 0.00,
+    "disruption": 0.00,
+    "exploration": 0.00,
+}
+def _apply_weights(
+    base: list[float], key: str, scenario_weights: list[dict] | None,
+) -> list[float]:
+    """Multiply base rewards by per-scenario weight for *key*."""
+    default_w = DEFAULT_REWARD_WEIGHTS[key]
+    if not scenario_weights:
+        return [r * default_w for r in base]
+    return [
+        r * (scenario_weights[i].get(key, default_w) if i < len(scenario_weights) else default_w)
+        for i, r in enumerate(base)
+    ]
+def _normalize_within_group(rewards: list[float], spawn_groups: list[int]) -> list[float]:
+    """Center rewards within each spawn group (Fix A).
+    Within each group (same map), subtract the group mean so that GRPO
+    advantages measure behavioral differences, not spawn luck.
+    Groups with only 1 episode are left unchanged.
+    """
+    if not spawn_groups or len(spawn_groups) != len(rewards):
+        return rewards
+    groups: dict[int, list[tuple[int, float]]] = defaultdict(list)
+    for i, g in enumerate(spawn_groups):
+        groups[g].append((i, rewards[i]))
+    result = list(rewards)
+    for g, entries in groups.items():
+        if len(entries) < 2:
+            continue
+        vals = [v for _, v in entries]
+        gmean = sum(vals) / len(vals)
+        for idx, _ in entries:
+            result[idx] -= gmean
+    return result
+def _rank_normalize(values: list[float]) -> list[float]:
+    """Map values to [-1, +1] via rank normalization with tie handling.
+    Robust to outliers — one amazing episode doesn't compress the rest.
+    Guarantees equal spacing: best episode ALWAYS gets +1.0, worst -1.0.
+    """
+    n = len(values)
+    if n < 2:
+        return [0.0] * n
+    sorted_indices = sorted(range(n), key=lambda i: values[i])
+    ranks = [0.0] * n
+    i = 0
+    while i < n:
+        j = i
+        while j < n - 1 and values[sorted_indices[j + 1]] == values[sorted_indices[j]]:
+            j += 1
+        avg_rank = (i + j) / 2.0 + 1.0  # 1-based
+        for k in range(i, j + 1):
+            ranks[sorted_indices[k]] = avg_rank
+        i = j + 1
+    return [2.0 * (r - 1.0) / (n - 1.0) - 1.0 for r in ranks]
+def _zscore_batch(values: list[float]) -> list[float]:
+    """Rank-normalize within batch.
+    Replaces z-score: rank normalization is robust to outliers and
+    guarantees even advantage spacing regardless of score distribution.
+    """
+    return _rank_normalize(values)
+def _zscore_per_group(values: list[float], spawn_groups: list[int] | None) -> list[float]:
+    """Rank-normalize within each spawn group.
+    Each spawn group = different map layout = different "prompt".
+    Rank-normalizing per group ensures advantages reflect behavioral
+    differences within the SAME conditions, not map difficulty.
+    Falls back to global rank normalization if no spawn groups provided.
+    """
+    if not values or len(values) < 2:
+        return values
+    if not spawn_groups:
+        return _rank_normalize(values)
+    groups: dict[int, list[int]] = {}
+    for i, g in enumerate(spawn_groups):
+        groups.setdefault(g, []).append(i)
+    result = list(values)
+    for indices in groups.values():
+        if len(indices) < 2:
+            result[indices[0]] = 0.0
+            continue
+        group_vals = [values[i] for i in indices]
+        ranked = _rank_normalize(group_vals)
+        for idx, rank_val in zip(indices, ranked):
+            result[idx] = rank_val
+    return result
+def _neutralize_infra(rewards: list[float], kwargs: dict) -> list[float]:
+    """Replace infra-failure and tool-call-failure episode rewards with valid-episode mean.
+    DAPO-style dynamic sampling (arXiv:2503.14476 Section 3.1): episodes
+    that failed due to infrastructure issues (game server crash, vLLM 500
+    errors) or tool call degeneration (model produced gibberish instead of
+    tool calls) get their reward set to the batch mean of valid episodes.
+    After GRPO normalization: advantage = (mean - mean) / std = 0,
+    so these episodes contribute zero gradient.
+    """
+    infra = kwargs.get("infra_failure", [])
+    tool_fail = kwargs.get("tool_call_failure", [])
+    n = len(rewards)
+    # Build combined failure mask
+    failed = [False] * n
+    for i in range(n):
+        if (i < len(infra) and infra[i]) or (i < len(tool_fail) and tool_fail[i]):
+            failed[i] = True
+    if not any(failed):
+        return rewards
+    valid = [r for r, f in zip(rewards, failed) if not f]
+    if not valid:
+        return rewards  # all failed — nothing to anchor on
+    vmean = sum(valid) / len(valid)
+    return [vmean if failed[i] else r for i, r in enumerate(rewards)]
+def reward_outcome(completions: list[str], **kwargs) -> list[float]:
+    """Terminal game outcome: +1.0 win, -1.0 lose, 0.0 draw/incomplete."""
+    outcomes = kwargs.get("outcome", [])
+    if not outcomes:
+        base = [0.0] * len(completions)
+    else:
+        mapping = {"win": 1.0, "lose": -1.0, "draw": 0.0}
+        base = [mapping.get(o, 0.0) for o in outcomes]
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "outcome", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)
+def reward_combat(completions: list[str], **kwargs) -> list[float]:
+    """Combat efficiency from the 8-dim reward vector."""
+    scores = kwargs.get("combat_score", [])
+    base = [float(s) for s in scores] if scores else [0.0] * len(completions)
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "combat", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)
+def reward_economy(completions: list[str], **kwargs) -> list[float]:
+    """Economic performance from the 8-dim reward vector."""
+    scores = kwargs.get("economy_score", [])
+    base = [float(s) for s in scores] if scores else [0.0] * len(completions)
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "economy", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)
+def reward_tempo(completions: list[str], **kwargs) -> list[float]:
+    """Action efficiency — fewer redundant actions = higher reward.
+    Tempo IS spawn-correlated (r=0.74 with discovery in Sprint scenario):
+    closer spawns → less travel time → better tempo. Apply spawn-group
+    normalization to isolate the behavioral component.
+    """
+    scores = kwargs.get("tempo_score", [])
+    base = [float(s) for s in scores] if scores else [0.0] * len(completions)
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "tempo", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)
+def reward_density(completions: list[str], **kwargs) -> list[float]:
+    """Action density — parallel utilization of controllable resources.
+    Measures how many distinct objectives are pursued per turn relative
+    to available units. Independent of tempo (which measures activity/idle).
+    3 units with 3 separate commands to 3 places → high density.
+    3 units with 1 blob command → low density.
+    """
+    scores = kwargs.get("density_score", [])
+    base = [float(s) for s in scores] if scores else [0.0] * len(completions)
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "density", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)
+def reward_format(completions: list[str], **kwargs) -> list[float]:
+    """Format compliance — fraction of turns with valid structured action syntax."""
+    scores = kwargs.get("format_score", [])
+    base = [float(s) for s in scores] if scores else [0.0] * len(completions)
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "format", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)
+def reward_survival(completions: list[str], **kwargs) -> list[float]:
+    """Unit HP preservation — discourages suicide attacks."""
+    scores = kwargs.get("survival_score", [])
+    base = [float(s) for s in scores] if scores else [0.0] * len(completions)
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "survival", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)
+def reward_discovery(completions: list[str], **kwargs) -> list[float]:
+    """Discovery reward — accumulated intelligence score from scouting.
+    The game engine awards 0.05 per new enemy unit sighting + bonuses for
+    buildings (0.2 production, 0.5 base).  Values are accumulated across all
+    ticks and clamped to [0, 1].
+    """
+    scores = kwargs.get("discovery_score", [])
+    if not scores:
+        base = [0.0] * len(completions)
+    else:
+        base = [min(max(float(s), 0.0), 1.0) for s in scores]
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "discovery", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)
+def reward_disruption(completions: list[str], **kwargs) -> list[float]:
+    """Strategic sabotage — destroying enemy power, production, tech."""
+    scores = kwargs.get("disruption_score", [])
+    base = [float(s) for s in scores] if scores else [0.0] * len(completions)
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "disruption", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)
+def reward_exploration(completions: list[str], **kwargs) -> list[float]:
+    """Map exploration percentage — rewards fog-of-war clearing."""
+    scores = kwargs.get("exploration_score", [])
+    base = [float(s) for s in scores] if scores else [0.0] * len(completions)
+    normalized = _zscore_per_group(base, kwargs.get("spawn_group"))
+    weighted = _apply_weights(normalized, "exploration", kwargs.get("scenario_weights"))
+    return _neutralize_infra(weighted, kwargs)

openra_rl_training/training/rust_env_pool.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""Pool of Rust-backed OpenRA environments for fast in-process rollout.
+Mirrors the surface of `env_pool.EnvPool` but swaps the gRPC-backed
+`OpenRAEnvironment` for the native `openra_train.OpenRAEnv` (a Rust
+deterministic simulator built via maturin/PyO3).
+Key differences from the gRPC pool:
+  * No game server / port allocation. Each `OpenRAEnv` is a
+    self-contained Rust object — instantiation is microseconds.
+  * Episodes are deterministic given (scenario_path, seed). The pool
+    accepts a `seed_generator` (defaults to a monotonic counter) so
+    callers can reseed each acquire if desired.
+  * `step` accepts a list of `openra_train.Command` objects (build
+    them with `Command.move_units(...)`, `Command.attack_unit(...)`,
+    `Command.observe()`).
+The pool is process-local; for honest parallelism, fan out via
+`concurrent.futures.ProcessPoolExecutor` and have each worker own its
+own `RustEnvPool` (or just instantiate `OpenRAEnv` directly).
+Drop-in for the existing `env_pool.EnvPool`:
+  * `acquire(timeout=...) -> env`
+  * `release(env)`
+  * `update_scenario(path)` — refreshes the default scenario for new
+    envs and resets the seed counter.
+  * `shutdown()` — drops references; Rust GC frees the worlds.
+"""
+from __future__ import annotations
+import itertools
+import logging
+import queue
+import threading
+from typing import Any, Callable, Iterator
+logger = logging.getLogger(__name__)
+def _default_seed_generator(start: int = 0) -> Iterator[int]:
+    return itertools.count(start)
+class RustEnvHandle:
+    """Thin wrapper to give the Rust env a uniform `reset / step / close`
+    interface that mirrors the gRPC env without leaking PyO3 types
+    upward."""
+    def __init__(self, scenario_path: str, seed: int):
+        # Lazy import so import-time failures don't break the rest of
+        # the training package on machines without the wheel built.
+        import openra_train
+        self._cls_command = openra_train.Command
+        self._env = openra_train.OpenRAEnv(scenario_path, int(seed))
+        self.scenario_path = scenario_path
+        self.seed = int(seed)
+    @property
+    def Command(self):
+        """Expose `openra_train.Command` for callers that want to
+        construct Move/Attack/Observe payloads without re-importing."""
+        return self._cls_command
+    def reset(self, seed: int | None = None) -> dict[str, Any]:
+        if seed is not None and int(seed) != self.seed:
+            # Re-instantiate to pick up the new seed (the underlying
+            # Rust env owns the world; reset() re-uses the original
+            # seed). Cheap — Rust instantiation is sub-millisecond.
+            import openra_train
+            self._env = openra_train.OpenRAEnv(self.scenario_path, int(seed))
+            self.seed = int(seed)
+        return self._env.reset()
+    def step(self, commands: list[Any]) -> tuple[dict[str, Any], float, bool, dict[str, Any]]:
+        """Apply a list of `openra_train.Command` objects, returns
+        (obs, reward, done, info)."""
+        return self._env.step(commands)
+    def close(self) -> None:
+        # No external resources to release; the Rust world is freed
+        # when this handle is dropped.
+        self._env = None
+class RustEnvPool:
+    """Thread-safe pool of Rust-backed environments.
+    Args:
+        size: Number of environment instances.
+        scenario_path: Path to the rush-hour-style scenario YAML.
+        seed_generator: Iterator yielding seeds for each new env. If
+            None, defaults to `itertools.count(0)`.
+        env_factory: Optional override; receives `(scenario_path, seed)`
+            and returns an env-like object exposing `reset(...)` /
+            `step(...)`. Useful for testing.
+    """
+    def __init__(
+        self,
+        size: int = 4,
+        scenario_path: str = "",
+        seed_generator: Iterator[int] | None = None,
+        env_factory: Callable[[str, int], Any] | None = None,
+    ):
+        if size < 1:
+            raise ValueError(f"RustEnvPool size must be >=1, got {size}")
+        if not scenario_path:
+            raise ValueError("RustEnvPool requires a non-empty scenario_path")
+        self._size = size
+        self._scenario_path = scenario_path
+        self._seed_gen = seed_generator or _default_seed_generator()
+        self._factory = env_factory or (lambda path, seed: RustEnvHandle(path, seed))
+        self._pool: queue.Queue = queue.Queue()
+        self._envs: list = []
+        self._lock = threading.Lock()
+        for _ in range(size):
+            seed = next(self._seed_gen)
+            env = self._factory(scenario_path, seed)
+            self._envs.append(env)
+            self._pool.put(env)
+    def acquire(self, timeout: float = 30.0):
+        """Get an available environment (blocks if all busy).
+        The Rust env is in-process and deterministic, so the timeout
+        only applies if all envs are checked out by other threads.
+        """
+        return self._pool.get(timeout=timeout)
+    def release(self, env) -> None:
+        """Return an environment to the pool."""
+        self._pool.put(env)
+    def update_scenario(self, scenario_path: str) -> None:
+        """Replace the scenario used for newly-instantiated envs.
+        Existing pooled envs keep their current scenario until released
+        and re-acquired with `acquire(reset=True)` (callers should use
+        this method in conjunction with explicit env replacement).
+        """
+        with self._lock:
+            self._scenario_path = scenario_path
+    @property
+    def scenario_path(self) -> str:
+        return self._scenario_path
+    @property
+    def size(self) -> int:
+        return self._size
+    @property
+    def available(self) -> int:
+        return self._pool.qsize()
+    def shutdown(self) -> None:
+        """Drop all env references and drain the pool."""
+        with self._lock:
+            for env in self._envs:
+                try:
+                    if hasattr(env, "close"):
+                        env.close()
+                except Exception:
+                    logger.exception("Error closing Rust env")
+            self._envs.clear()
+            while not self._pool.empty():
+                try:
+                    self._pool.get_nowait()
+                except queue.Empty:
+                    break

requirements.txt CHANGED Viewed

@@ -3,3 +3,10 @@ pandas>=2.0.0
 httpx>=0.24.0
 huggingface_hub>=0.20.0
 openra-rl-util>=0.1.0

 httpx>=0.24.0
 huggingface_hub>=0.20.0
 openra-rl-util>=0.1.0
+# Used by the bench + the vendored openra_rl_training / openra_env
+# modules (see VENDOR.md).
+pydantic>=2.0
+pyyaml>=6.0
+pillow>=10.0
+numpy>=1.24
+matplotlib>=3.7