Spaces:
Running
Running
Bench: consume S9 economy obs + economy win-conditions + full toolset
Browse files- rust_adapter: ingest economy/own_buildings/production into
EpisodeSignals (cash/power/harvesters/building types/queue).
- win_conditions: +cash_gte, harvesters_gte, power_surplus_gte,
has_building, buildings_owned_gte — contributors can now declare
economy/production constraints declaratively.
- agent: tool schemas + parsing for all 14 engine commands
(build/harvest/place_building/deploy/sell/repair/stop/...).
- tests: update tool-schema + move-target assertions (engine correctly
refuses off-map pathing). 154 passed, 1 skipped.
- .DS_Store +0 -0
- openra_bench/agent.py +109 -1
- openra_bench/rust_adapter.py +25 -0
- openra_bench/scenarios/win_conditions.py +10 -0
- tests/test_agent.py +5 -5
- tests/test_rust_integration.py +5 -1
.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
openra_bench/agent.py
CHANGED
|
@@ -73,8 +73,101 @@ _TOOL_SCHEMAS: dict[str, dict] = {
|
|
| 73 |
},
|
| 74 |
},
|
| 75 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
# Aliases tolerated from models trained on slightly different names.
|
| 77 |
-
_TOOL_ALIASES = {"attack_target": "attack_unit", "stop_units": "
|
| 78 |
|
| 79 |
|
| 80 |
def _tool_schemas(allowed: list[str] | None) -> list[dict]:
|
|
@@ -150,6 +243,21 @@ def _to_commands(tool_calls: list[dict], Command: Any) -> list:
|
|
| 150 |
cmds.append(Command.attack_unit(ids, str(args["target_id"])))
|
| 151 |
elif name == "observe":
|
| 152 |
cmds.append(Command.observe())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
except (KeyError, TypeError, ValueError) as e:
|
| 154 |
logger.debug("dropping malformed tool call %s: %s", call, e)
|
| 155 |
return cmds
|
|
|
|
| 73 |
},
|
| 74 |
},
|
| 75 |
}
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _units_xy(name: str, desc: str) -> dict:
|
| 79 |
+
return {
|
| 80 |
+
"type": "function",
|
| 81 |
+
"function": {
|
| 82 |
+
"name": name,
|
| 83 |
+
"description": desc,
|
| 84 |
+
"parameters": {
|
| 85 |
+
"type": "object",
|
| 86 |
+
"properties": {
|
| 87 |
+
"unit_ids": {"type": "array", "items": {"type": "integer"}},
|
| 88 |
+
"target_x": {"type": "integer"},
|
| 89 |
+
"target_y": {"type": "integer"},
|
| 90 |
+
},
|
| 91 |
+
"required": ["unit_ids", "target_x", "target_y"],
|
| 92 |
+
},
|
| 93 |
+
},
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def _units_only(name: str, desc: str) -> dict:
|
| 98 |
+
return {
|
| 99 |
+
"type": "function",
|
| 100 |
+
"function": {
|
| 101 |
+
"name": name,
|
| 102 |
+
"description": desc,
|
| 103 |
+
"parameters": {
|
| 104 |
+
"type": "object",
|
| 105 |
+
"properties": {
|
| 106 |
+
"unit_ids": {"type": "array", "items": {"type": "integer"}}
|
| 107 |
+
},
|
| 108 |
+
"required": ["unit_ids"],
|
| 109 |
+
},
|
| 110 |
+
},
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _item_only(name: str, desc: str) -> dict:
|
| 115 |
+
return {
|
| 116 |
+
"type": "function",
|
| 117 |
+
"function": {
|
| 118 |
+
"name": name,
|
| 119 |
+
"description": desc,
|
| 120 |
+
"parameters": {
|
| 121 |
+
"type": "object",
|
| 122 |
+
"properties": {"item": {"type": "string"}},
|
| 123 |
+
"required": ["item"],
|
| 124 |
+
},
|
| 125 |
+
},
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
_TOOL_SCHEMAS.update(
|
| 130 |
+
{
|
| 131 |
+
"attack_move": _units_xy(
|
| 132 |
+
"attack_move", "Move toward a cell, engaging hostiles encountered."
|
| 133 |
+
),
|
| 134 |
+
"harvest": _units_xy(
|
| 135 |
+
"harvest", "Send harvesters to collect ore at a resource cell."
|
| 136 |
+
),
|
| 137 |
+
"set_rally_point": _units_xy(
|
| 138 |
+
"set_rally_point", "Set a production building's unit rally cell."
|
| 139 |
+
),
|
| 140 |
+
"stop": _units_only("stop", "Cancel the units' current orders (go idle)."),
|
| 141 |
+
"deploy": _units_only("deploy", "Transform an MCV into a construction yard."),
|
| 142 |
+
"sell": _units_only("sell", "Sell a building for a partial refund."),
|
| 143 |
+
"repair": _units_only("repair", "Toggle repair on a damaged building."),
|
| 144 |
+
"power_down": _units_only("power_down", "Toggle a building's power."),
|
| 145 |
+
"build": _item_only(
|
| 146 |
+
"build", "Queue production of a unit/building by type (e.g. 'e1')."
|
| 147 |
+
),
|
| 148 |
+
"cancel_production": _item_only(
|
| 149 |
+
"cancel_production", "Cancel the last queued item of this type (refund)."
|
| 150 |
+
),
|
| 151 |
+
"place_building": {
|
| 152 |
+
"type": "function",
|
| 153 |
+
"function": {
|
| 154 |
+
"name": "place_building",
|
| 155 |
+
"description": "Place a completed building at a cell.",
|
| 156 |
+
"parameters": {
|
| 157 |
+
"type": "object",
|
| 158 |
+
"properties": {
|
| 159 |
+
"item": {"type": "string"},
|
| 160 |
+
"target_x": {"type": "integer"},
|
| 161 |
+
"target_y": {"type": "integer"},
|
| 162 |
+
},
|
| 163 |
+
"required": ["item", "target_x", "target_y"],
|
| 164 |
+
},
|
| 165 |
+
},
|
| 166 |
+
},
|
| 167 |
+
}
|
| 168 |
+
)
|
| 169 |
# Aliases tolerated from models trained on slightly different names.
|
| 170 |
+
_TOOL_ALIASES = {"attack_target": "attack_unit", "stop_units": "stop"}
|
| 171 |
|
| 172 |
|
| 173 |
def _tool_schemas(allowed: list[str] | None) -> list[dict]:
|
|
|
|
| 243 |
cmds.append(Command.attack_unit(ids, str(args["target_id"])))
|
| 244 |
elif name == "observe":
|
| 245 |
cmds.append(Command.observe())
|
| 246 |
+
elif name in ("attack_move", "harvest", "set_rally_point"):
|
| 247 |
+
ids = [str(i) for i in args["unit_ids"]]
|
| 248 |
+
fn = getattr(Command, name)
|
| 249 |
+
cmds.append(fn(ids, int(args["target_x"]), int(args["target_y"])))
|
| 250 |
+
elif name in ("stop", "deploy", "sell", "repair", "power_down"):
|
| 251 |
+
ids = [str(i) for i in args["unit_ids"]]
|
| 252 |
+
cmds.append(getattr(Command, name)(ids))
|
| 253 |
+
elif name in ("build", "cancel_production"):
|
| 254 |
+
cmds.append(getattr(Command, name)(str(args["item"])))
|
| 255 |
+
elif name == "place_building":
|
| 256 |
+
cmds.append(
|
| 257 |
+
Command.place_building(
|
| 258 |
+
str(args["item"]), int(args["target_x"]), int(args["target_y"])
|
| 259 |
+
)
|
| 260 |
+
)
|
| 261 |
except (KeyError, TypeError, ValueError) as e:
|
| 262 |
logger.debug("dropping malformed tool call %s: %s", call, e)
|
| 263 |
return cmds
|
openra_bench/rust_adapter.py
CHANGED
|
@@ -87,6 +87,13 @@ class EpisodeSignals:
|
|
| 87 |
new_buildings_this_step: int = 0
|
| 88 |
game_tick: int = 0
|
| 89 |
done: bool = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# Outcome is synthesized (Rust has no result field): a scenario is
|
| 91 |
# "won" when all enemy buildings have been discovered AND/OR all
|
| 92 |
# enemy units neutralized — refined per-scenario in Phase 2 rubrics.
|
|
@@ -103,6 +110,9 @@ class EpisodeSignals:
|
|
| 103 |
"outcome": self.outcome,
|
| 104 |
"game_tick": self.game_tick,
|
| 105 |
"done": self.done,
|
|
|
|
|
|
|
|
|
|
| 106 |
}
|
| 107 |
|
| 108 |
|
|
@@ -160,6 +170,21 @@ class RustObsAdapter:
|
|
| 160 |
s.enemy_buildings_seen_ids.add(str(b["id"]))
|
| 161 |
s.new_buildings_this_step = len(s.enemy_buildings_seen_ids) - before_b
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
s.game_tick = int(self._raw.get("game_tick", s.game_tick) or 0)
|
| 164 |
s.done = bool(done)
|
| 165 |
|
|
|
|
| 87 |
new_buildings_this_step: int = 0
|
| 88 |
game_tick: int = 0
|
| 89 |
done: bool = False
|
| 90 |
+
# S9 economy/production (0/empty until the engine grounds them).
|
| 91 |
+
cash: int = 0
|
| 92 |
+
power_provided: int = 0
|
| 93 |
+
power_drained: int = 0
|
| 94 |
+
harvesters: int = 0
|
| 95 |
+
own_building_types: set[str] = field(default_factory=set)
|
| 96 |
+
production_items: list[str] = field(default_factory=list)
|
| 97 |
# Outcome is synthesized (Rust has no result field): a scenario is
|
| 98 |
# "won" when all enemy buildings have been discovered AND/OR all
|
| 99 |
# enemy units neutralized — refined per-scenario in Phase 2 rubrics.
|
|
|
|
| 110 |
"outcome": self.outcome,
|
| 111 |
"game_tick": self.game_tick,
|
| 112 |
"done": self.done,
|
| 113 |
+
"cash": self.cash,
|
| 114 |
+
"harvesters": self.harvesters,
|
| 115 |
+
"buildings_owned": len(self.own_building_types),
|
| 116 |
}
|
| 117 |
|
| 118 |
|
|
|
|
| 170 |
s.enemy_buildings_seen_ids.add(str(b["id"]))
|
| 171 |
s.new_buildings_this_step = len(s.enemy_buildings_seen_ids) - before_b
|
| 172 |
|
| 173 |
+
econ = self._raw.get("economy") or {}
|
| 174 |
+
if isinstance(econ, dict):
|
| 175 |
+
s.cash = int(econ.get("cash", s.cash) or 0)
|
| 176 |
+
s.power_provided = int(econ.get("power_provided", 0) or 0)
|
| 177 |
+
s.power_drained = int(econ.get("power_drained", 0) or 0)
|
| 178 |
+
s.harvesters = int(econ.get("harvesters", 0) or 0)
|
| 179 |
+
for b in self._raw.get("own_buildings", []) or []:
|
| 180 |
+
if isinstance(b, dict) and b.get("type"):
|
| 181 |
+
s.own_building_types.add(str(b["type"]).lower())
|
| 182 |
+
s.production_items = [
|
| 183 |
+
str(p.get("item", "")).lower()
|
| 184 |
+
for p in (self._raw.get("production", []) or [])
|
| 185 |
+
if isinstance(p, dict)
|
| 186 |
+
]
|
| 187 |
+
|
| 188 |
s.game_tick = int(self._raw.get("game_tick", s.game_tick) or 0)
|
| 189 |
s.done = bool(done)
|
| 190 |
|
openra_bench/scenarios/win_conditions.py
CHANGED
|
@@ -61,6 +61,16 @@ _PREDICATES: dict[str, Callable[[WinContext, Any], bool]] = {
|
|
| 61 |
"all_units_in_region": lambda c, v: len(_agent_units(c)) > 0
|
| 62 |
and _in_radius(_agent_units(c), int(v["x"]), int(v["y"]), float(v.get("radius", 3)))
|
| 63 |
== len(_agent_units(c)),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
}
|
| 65 |
|
| 66 |
LEAF_KEYS = frozenset(_PREDICATES)
|
|
|
|
| 61 |
"all_units_in_region": lambda c, v: len(_agent_units(c)) > 0
|
| 62 |
and _in_radius(_agent_units(c), int(v["x"]), int(v["y"]), float(v.get("radius", 3)))
|
| 63 |
== len(_agent_units(c)),
|
| 64 |
+
# S9 economy / production constraints (require the engine economy
|
| 65 |
+
# subsystem; 0/empty on movement-only scenarios).
|
| 66 |
+
"cash_gte": lambda c, v: c.signals.cash >= int(v),
|
| 67 |
+
"harvesters_gte": lambda c, v: c.signals.harvesters >= int(v),
|
| 68 |
+
"power_surplus_gte": lambda c, v: (
|
| 69 |
+
c.signals.power_provided - c.signals.power_drained
|
| 70 |
+
)
|
| 71 |
+
>= int(v),
|
| 72 |
+
"has_building": lambda c, v: str(v).lower() in c.signals.own_building_types,
|
| 73 |
+
"buildings_owned_gte": lambda c, v: len(c.signals.own_building_types) >= int(v),
|
| 74 |
}
|
| 75 |
|
| 76 |
LEAF_KEYS = frozenset(_PREDICATES)
|
tests/test_agent.py
CHANGED
|
@@ -45,11 +45,11 @@ def test_tool_schema_filtering():
|
|
| 45 |
assert "move_units" in names
|
| 46 |
assert "attack_unit" not in names
|
| 47 |
assert "observe" in names, "a no-op must always be offered"
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
}
|
| 53 |
|
| 54 |
|
| 55 |
def test_build_briefing_format():
|
|
|
|
| 45 |
assert "move_units" in names
|
| 46 |
assert "attack_unit" not in names
|
| 47 |
assert "observe" in names, "a no-op must always be offered"
|
| 48 |
+
full = {t["function"]["name"] for t in _tool_schemas(None)}
|
| 49 |
+
# Core movement/combat always present.
|
| 50 |
+
assert {"move_units", "attack_unit", "observe"} <= full
|
| 51 |
+
# Economy/production/structure commands are exposed too (S9 wiring).
|
| 52 |
+
assert {"build", "harvest", "place_building", "stop", "deploy"} <= full
|
| 53 |
|
| 54 |
|
| 55 |
def test_build_briefing_format():
|
tests/test_rust_integration.py
CHANGED
|
@@ -122,7 +122,11 @@ class TestRustEngineTools:
|
|
| 122 |
env = ot.OpenRAEnv(RUSH_HOUR, 7)
|
| 123 |
obs = env.reset()
|
| 124 |
uid, start = _first_unit(obs)
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
last = start
|
| 127 |
for _ in range(15):
|
| 128 |
obs, *_ = env.step([ot.Command.move_units([uid], target[0], target[1])])
|
|
|
|
| 122 |
env = ot.OpenRAEnv(RUSH_HOUR, 7)
|
| 123 |
obs = env.reset()
|
| 124 |
uid, start = _first_unit(obs)
|
| 125 |
+
# Target an in-bounds interior cell: rush-hour is ~128x40 and the
|
| 126 |
+
# first unit may spawn at the east edge (x~120), so a blind +25
|
| 127 |
+
# would be off-map (the engine correctly refuses to path off-map).
|
| 128 |
+
tx = start[0] - 25 if start[0] > 64 else start[0] + 25
|
| 129 |
+
target = (tx, min(max(start[1], 3), 36))
|
| 130 |
last = start
|
| 131 |
for _ in range(15):
|
| 132 |
obs, *_ = env.step([ot.Command.move_units([uid], target[0], target[1])])
|