yxc20098 commited on
Commit
5a1cf72
·
1 Parent(s): 5b68a55

Bench: consume S9 economy obs + economy win-conditions + full toolset

Browse files

- rust_adapter: ingest economy/own_buildings/production into
EpisodeSignals (cash/power/harvesters/building types/queue).
- win_conditions: +cash_gte, harvesters_gte, power_surplus_gte,
has_building, buildings_owned_gte — contributors can now declare
economy/production constraints declaratively.
- agent: tool schemas + parsing for all 14 engine commands
(build/harvest/place_building/deploy/sell/repair/stop/...).
- tests: update tool-schema + move-target assertions (engine correctly
refuses off-map pathing). 154 passed, 1 skipped.

.DS_Store ADDED
Binary file (8.2 kB). View file
 
openra_bench/agent.py CHANGED
@@ -73,8 +73,101 @@ _TOOL_SCHEMAS: dict[str, dict] = {
73
  },
74
  },
75
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # Aliases tolerated from models trained on slightly different names.
77
- _TOOL_ALIASES = {"attack_target": "attack_unit", "stop_units": "observe"}
78
 
79
 
80
  def _tool_schemas(allowed: list[str] | None) -> list[dict]:
@@ -150,6 +243,21 @@ def _to_commands(tool_calls: list[dict], Command: Any) -> list:
150
  cmds.append(Command.attack_unit(ids, str(args["target_id"])))
151
  elif name == "observe":
152
  cmds.append(Command.observe())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  except (KeyError, TypeError, ValueError) as e:
154
  logger.debug("dropping malformed tool call %s: %s", call, e)
155
  return cmds
 
73
  },
74
  },
75
  }
76
+
77
+
78
+ def _units_xy(name: str, desc: str) -> dict:
79
+ return {
80
+ "type": "function",
81
+ "function": {
82
+ "name": name,
83
+ "description": desc,
84
+ "parameters": {
85
+ "type": "object",
86
+ "properties": {
87
+ "unit_ids": {"type": "array", "items": {"type": "integer"}},
88
+ "target_x": {"type": "integer"},
89
+ "target_y": {"type": "integer"},
90
+ },
91
+ "required": ["unit_ids", "target_x", "target_y"],
92
+ },
93
+ },
94
+ }
95
+
96
+
97
+ def _units_only(name: str, desc: str) -> dict:
98
+ return {
99
+ "type": "function",
100
+ "function": {
101
+ "name": name,
102
+ "description": desc,
103
+ "parameters": {
104
+ "type": "object",
105
+ "properties": {
106
+ "unit_ids": {"type": "array", "items": {"type": "integer"}}
107
+ },
108
+ "required": ["unit_ids"],
109
+ },
110
+ },
111
+ }
112
+
113
+
114
+ def _item_only(name: str, desc: str) -> dict:
115
+ return {
116
+ "type": "function",
117
+ "function": {
118
+ "name": name,
119
+ "description": desc,
120
+ "parameters": {
121
+ "type": "object",
122
+ "properties": {"item": {"type": "string"}},
123
+ "required": ["item"],
124
+ },
125
+ },
126
+ }
127
+
128
+
129
+ _TOOL_SCHEMAS.update(
130
+ {
131
+ "attack_move": _units_xy(
132
+ "attack_move", "Move toward a cell, engaging hostiles encountered."
133
+ ),
134
+ "harvest": _units_xy(
135
+ "harvest", "Send harvesters to collect ore at a resource cell."
136
+ ),
137
+ "set_rally_point": _units_xy(
138
+ "set_rally_point", "Set a production building's unit rally cell."
139
+ ),
140
+ "stop": _units_only("stop", "Cancel the units' current orders (go idle)."),
141
+ "deploy": _units_only("deploy", "Transform an MCV into a construction yard."),
142
+ "sell": _units_only("sell", "Sell a building for a partial refund."),
143
+ "repair": _units_only("repair", "Toggle repair on a damaged building."),
144
+ "power_down": _units_only("power_down", "Toggle a building's power."),
145
+ "build": _item_only(
146
+ "build", "Queue production of a unit/building by type (e.g. 'e1')."
147
+ ),
148
+ "cancel_production": _item_only(
149
+ "cancel_production", "Cancel the last queued item of this type (refund)."
150
+ ),
151
+ "place_building": {
152
+ "type": "function",
153
+ "function": {
154
+ "name": "place_building",
155
+ "description": "Place a completed building at a cell.",
156
+ "parameters": {
157
+ "type": "object",
158
+ "properties": {
159
+ "item": {"type": "string"},
160
+ "target_x": {"type": "integer"},
161
+ "target_y": {"type": "integer"},
162
+ },
163
+ "required": ["item", "target_x", "target_y"],
164
+ },
165
+ },
166
+ },
167
+ }
168
+ )
169
  # Aliases tolerated from models trained on slightly different names.
170
+ _TOOL_ALIASES = {"attack_target": "attack_unit", "stop_units": "stop"}
171
 
172
 
173
  def _tool_schemas(allowed: list[str] | None) -> list[dict]:
 
243
  cmds.append(Command.attack_unit(ids, str(args["target_id"])))
244
  elif name == "observe":
245
  cmds.append(Command.observe())
246
+ elif name in ("attack_move", "harvest", "set_rally_point"):
247
+ ids = [str(i) for i in args["unit_ids"]]
248
+ fn = getattr(Command, name)
249
+ cmds.append(fn(ids, int(args["target_x"]), int(args["target_y"])))
250
+ elif name in ("stop", "deploy", "sell", "repair", "power_down"):
251
+ ids = [str(i) for i in args["unit_ids"]]
252
+ cmds.append(getattr(Command, name)(ids))
253
+ elif name in ("build", "cancel_production"):
254
+ cmds.append(getattr(Command, name)(str(args["item"])))
255
+ elif name == "place_building":
256
+ cmds.append(
257
+ Command.place_building(
258
+ str(args["item"]), int(args["target_x"]), int(args["target_y"])
259
+ )
260
+ )
261
  except (KeyError, TypeError, ValueError) as e:
262
  logger.debug("dropping malformed tool call %s: %s", call, e)
263
  return cmds
openra_bench/rust_adapter.py CHANGED
@@ -87,6 +87,13 @@ class EpisodeSignals:
87
  new_buildings_this_step: int = 0
88
  game_tick: int = 0
89
  done: bool = False
 
 
 
 
 
 
 
90
  # Outcome is synthesized (Rust has no result field): a scenario is
91
  # "won" when all enemy buildings have been discovered AND/OR all
92
  # enemy units neutralized — refined per-scenario in Phase 2 rubrics.
@@ -103,6 +110,9 @@ class EpisodeSignals:
103
  "outcome": self.outcome,
104
  "game_tick": self.game_tick,
105
  "done": self.done,
 
 
 
106
  }
107
 
108
 
@@ -160,6 +170,21 @@ class RustObsAdapter:
160
  s.enemy_buildings_seen_ids.add(str(b["id"]))
161
  s.new_buildings_this_step = len(s.enemy_buildings_seen_ids) - before_b
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  s.game_tick = int(self._raw.get("game_tick", s.game_tick) or 0)
164
  s.done = bool(done)
165
 
 
87
  new_buildings_this_step: int = 0
88
  game_tick: int = 0
89
  done: bool = False
90
+ # S9 economy/production (0/empty until the engine grounds them).
91
+ cash: int = 0
92
+ power_provided: int = 0
93
+ power_drained: int = 0
94
+ harvesters: int = 0
95
+ own_building_types: set[str] = field(default_factory=set)
96
+ production_items: list[str] = field(default_factory=list)
97
  # Outcome is synthesized (Rust has no result field): a scenario is
98
  # "won" when all enemy buildings have been discovered AND/OR all
99
  # enemy units neutralized — refined per-scenario in Phase 2 rubrics.
 
110
  "outcome": self.outcome,
111
  "game_tick": self.game_tick,
112
  "done": self.done,
113
+ "cash": self.cash,
114
+ "harvesters": self.harvesters,
115
+ "buildings_owned": len(self.own_building_types),
116
  }
117
 
118
 
 
170
  s.enemy_buildings_seen_ids.add(str(b["id"]))
171
  s.new_buildings_this_step = len(s.enemy_buildings_seen_ids) - before_b
172
 
173
+ econ = self._raw.get("economy") or {}
174
+ if isinstance(econ, dict):
175
+ s.cash = int(econ.get("cash", s.cash) or 0)
176
+ s.power_provided = int(econ.get("power_provided", 0) or 0)
177
+ s.power_drained = int(econ.get("power_drained", 0) or 0)
178
+ s.harvesters = int(econ.get("harvesters", 0) or 0)
179
+ for b in self._raw.get("own_buildings", []) or []:
180
+ if isinstance(b, dict) and b.get("type"):
181
+ s.own_building_types.add(str(b["type"]).lower())
182
+ s.production_items = [
183
+ str(p.get("item", "")).lower()
184
+ for p in (self._raw.get("production", []) or [])
185
+ if isinstance(p, dict)
186
+ ]
187
+
188
  s.game_tick = int(self._raw.get("game_tick", s.game_tick) or 0)
189
  s.done = bool(done)
190
 
openra_bench/scenarios/win_conditions.py CHANGED
@@ -61,6 +61,16 @@ _PREDICATES: dict[str, Callable[[WinContext, Any], bool]] = {
61
  "all_units_in_region": lambda c, v: len(_agent_units(c)) > 0
62
  and _in_radius(_agent_units(c), int(v["x"]), int(v["y"]), float(v.get("radius", 3)))
63
  == len(_agent_units(c)),
 
 
 
 
 
 
 
 
 
 
64
  }
65
 
66
  LEAF_KEYS = frozenset(_PREDICATES)
 
61
  "all_units_in_region": lambda c, v: len(_agent_units(c)) > 0
62
  and _in_radius(_agent_units(c), int(v["x"]), int(v["y"]), float(v.get("radius", 3)))
63
  == len(_agent_units(c)),
64
+ # S9 economy / production constraints (require the engine economy
65
+ # subsystem; 0/empty on movement-only scenarios).
66
+ "cash_gte": lambda c, v: c.signals.cash >= int(v),
67
+ "harvesters_gte": lambda c, v: c.signals.harvesters >= int(v),
68
+ "power_surplus_gte": lambda c, v: (
69
+ c.signals.power_provided - c.signals.power_drained
70
+ )
71
+ >= int(v),
72
+ "has_building": lambda c, v: str(v).lower() in c.signals.own_building_types,
73
+ "buildings_owned_gte": lambda c, v: len(c.signals.own_building_types) >= int(v),
74
  }
75
 
76
  LEAF_KEYS = frozenset(_PREDICATES)
tests/test_agent.py CHANGED
@@ -45,11 +45,11 @@ def test_tool_schema_filtering():
45
  assert "move_units" in names
46
  assert "attack_unit" not in names
47
  assert "observe" in names, "a no-op must always be offered"
48
- assert {t["function"]["name"] for t in _tool_schemas(None)} == {
49
- "move_units",
50
- "attack_unit",
51
- "observe",
52
- }
53
 
54
 
55
  def test_build_briefing_format():
 
45
  assert "move_units" in names
46
  assert "attack_unit" not in names
47
  assert "observe" in names, "a no-op must always be offered"
48
+ full = {t["function"]["name"] for t in _tool_schemas(None)}
49
+ # Core movement/combat always present.
50
+ assert {"move_units", "attack_unit", "observe"} <= full
51
+ # Economy/production/structure commands are exposed too (S9 wiring).
52
+ assert {"build", "harvest", "place_building", "stop", "deploy"} <= full
53
 
54
 
55
  def test_build_briefing_format():
tests/test_rust_integration.py CHANGED
@@ -122,7 +122,11 @@ class TestRustEngineTools:
122
  env = ot.OpenRAEnv(RUSH_HOUR, 7)
123
  obs = env.reset()
124
  uid, start = _first_unit(obs)
125
- target = (start[0] + 25, start[1])
 
 
 
 
126
  last = start
127
  for _ in range(15):
128
  obs, *_ = env.step([ot.Command.move_units([uid], target[0], target[1])])
 
122
  env = ot.OpenRAEnv(RUSH_HOUR, 7)
123
  obs = env.reset()
124
  uid, start = _first_unit(obs)
125
+ # Target an in-bounds interior cell: rush-hour is ~128x40 and the
126
+ # first unit may spawn at the east edge (x~120), so a blind +25
127
+ # would be off-map (the engine correctly refuses to path off-map).
128
+ tx = start[0] - 25 if start[0] > 64 else start[0] + 25
129
+ target = (tx, min(max(start[1], 3), 36))
130
  last = start
131
  for _ in range(15):
132
  obs, *_ = env.step([ot.Command.move_units([uid], target[0], target[1])])