Spaces:

qpluslab
/

OpenRA-Bench

Running

yxc20098 commited on May 17

Commit

5a1cf72

1 Parent(s): 5b68a55

Bench: consume S9 economy obs + economy win-conditions + full toolset

- rust_adapter: ingest economy/own_buildings/production into
EpisodeSignals (cash/power/harvesters/building types/queue).
- win_conditions: +cash_gte, harvesters_gte, power_surplus_gte,
has_building, buildings_owned_gte — contributors can now declare
economy/production constraints declaratively.
- agent: tool schemas + parsing for all 14 engine commands
(build/harvest/place_building/deploy/sell/repair/stop/...).
- tests: update tool-schema + move-target assertions (engine correctly
refuses off-map pathing). 154 passed, 1 skipped.

Files changed (6) hide show

.DS_Store +0 -0
openra_bench/agent.py +109 -1
openra_bench/rust_adapter.py +25 -0
openra_bench/scenarios/win_conditions.py +10 -0
tests/test_agent.py +5 -5
tests/test_rust_integration.py +5 -1

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

openra_bench/agent.py CHANGED Viewed

@@ -73,8 +73,101 @@ _TOOL_SCHEMAS: dict[str, dict] = {
         },
     },
 }
 # Aliases tolerated from models trained on slightly different names.
-_TOOL_ALIASES = {"attack_target": "attack_unit", "stop_units": "observe"}
 def _tool_schemas(allowed: list[str] | None) -> list[dict]:
@@ -150,6 +243,21 @@ def _to_commands(tool_calls: list[dict], Command: Any) -> list:
                 cmds.append(Command.attack_unit(ids, str(args["target_id"])))
             elif name == "observe":
                 cmds.append(Command.observe())
         except (KeyError, TypeError, ValueError) as e:
             logger.debug("dropping malformed tool call %s: %s", call, e)
     return cmds

         },
     },
 }
+def _units_xy(name: str, desc: str) -> dict:
+    return {
+        "type": "function",
+        "function": {
+            "name": name,
+            "description": desc,
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "unit_ids": {"type": "array", "items": {"type": "integer"}},
+                    "target_x": {"type": "integer"},
+                    "target_y": {"type": "integer"},
+                },
+                "required": ["unit_ids", "target_x", "target_y"],
+            },
+        },
+    }
+def _units_only(name: str, desc: str) -> dict:
+    return {
+        "type": "function",
+        "function": {
+            "name": name,
+            "description": desc,
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "unit_ids": {"type": "array", "items": {"type": "integer"}}
+                },
+                "required": ["unit_ids"],
+            },
+        },
+    }
+def _item_only(name: str, desc: str) -> dict:
+    return {
+        "type": "function",
+        "function": {
+            "name": name,
+            "description": desc,
+            "parameters": {
+                "type": "object",
+                "properties": {"item": {"type": "string"}},
+                "required": ["item"],
+            },
+        },
+    }
+_TOOL_SCHEMAS.update(
+    {
+        "attack_move": _units_xy(
+            "attack_move", "Move toward a cell, engaging hostiles encountered."
+        ),
+        "harvest": _units_xy(
+            "harvest", "Send harvesters to collect ore at a resource cell."
+        ),
+        "set_rally_point": _units_xy(
+            "set_rally_point", "Set a production building's unit rally cell."
+        ),
+        "stop": _units_only("stop", "Cancel the units' current orders (go idle)."),
+        "deploy": _units_only("deploy", "Transform an MCV into a construction yard."),
+        "sell": _units_only("sell", "Sell a building for a partial refund."),
+        "repair": _units_only("repair", "Toggle repair on a damaged building."),
+        "power_down": _units_only("power_down", "Toggle a building's power."),
+        "build": _item_only(
+            "build", "Queue production of a unit/building by type (e.g. 'e1')."
+        ),
+        "cancel_production": _item_only(
+            "cancel_production", "Cancel the last queued item of this type (refund)."
+        ),
+        "place_building": {
+            "type": "function",
+            "function": {
+                "name": "place_building",
+                "description": "Place a completed building at a cell.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "item": {"type": "string"},
+                        "target_x": {"type": "integer"},
+                        "target_y": {"type": "integer"},
+                    },
+                    "required": ["item", "target_x", "target_y"],
+                },
+            },
+        },
+    }
+)
 # Aliases tolerated from models trained on slightly different names.
+_TOOL_ALIASES = {"attack_target": "attack_unit", "stop_units": "stop"}
 def _tool_schemas(allowed: list[str] | None) -> list[dict]:
                 cmds.append(Command.attack_unit(ids, str(args["target_id"])))
             elif name == "observe":
                 cmds.append(Command.observe())
+            elif name in ("attack_move", "harvest", "set_rally_point"):
+                ids = [str(i) for i in args["unit_ids"]]
+                fn = getattr(Command, name)
+                cmds.append(fn(ids, int(args["target_x"]), int(args["target_y"])))
+            elif name in ("stop", "deploy", "sell", "repair", "power_down"):
+                ids = [str(i) for i in args["unit_ids"]]
+                cmds.append(getattr(Command, name)(ids))
+            elif name in ("build", "cancel_production"):
+                cmds.append(getattr(Command, name)(str(args["item"])))
+            elif name == "place_building":
+                cmds.append(
+                    Command.place_building(
+                        str(args["item"]), int(args["target_x"]), int(args["target_y"])
+                    )
+                )
         except (KeyError, TypeError, ValueError) as e:
             logger.debug("dropping malformed tool call %s: %s", call, e)
     return cmds

openra_bench/rust_adapter.py CHANGED Viewed

@@ -87,6 +87,13 @@ class EpisodeSignals:
     new_buildings_this_step: int = 0
     game_tick: int = 0
     done: bool = False
     # Outcome is synthesized (Rust has no result field): a scenario is
     # "won" when all enemy buildings have been discovered AND/OR all
     # enemy units neutralized — refined per-scenario in Phase 2 rubrics.
@@ -103,6 +110,9 @@ class EpisodeSignals:
             "outcome": self.outcome,
             "game_tick": self.game_tick,
             "done": self.done,
         }
@@ -160,6 +170,21 @@ class RustObsAdapter:
                 s.enemy_buildings_seen_ids.add(str(b["id"]))
         s.new_buildings_this_step = len(s.enemy_buildings_seen_ids) - before_b
         s.game_tick = int(self._raw.get("game_tick", s.game_tick) or 0)
         s.done = bool(done)

     new_buildings_this_step: int = 0
     game_tick: int = 0
     done: bool = False
+    # S9 economy/production (0/empty until the engine grounds them).
+    cash: int = 0
+    power_provided: int = 0
+    power_drained: int = 0
+    harvesters: int = 0
+    own_building_types: set[str] = field(default_factory=set)
+    production_items: list[str] = field(default_factory=list)
     # Outcome is synthesized (Rust has no result field): a scenario is
     # "won" when all enemy buildings have been discovered AND/OR all
     # enemy units neutralized — refined per-scenario in Phase 2 rubrics.
             "outcome": self.outcome,
             "game_tick": self.game_tick,
             "done": self.done,
+            "cash": self.cash,
+            "harvesters": self.harvesters,
+            "buildings_owned": len(self.own_building_types),
         }
                 s.enemy_buildings_seen_ids.add(str(b["id"]))
         s.new_buildings_this_step = len(s.enemy_buildings_seen_ids) - before_b
+        econ = self._raw.get("economy") or {}
+        if isinstance(econ, dict):
+            s.cash = int(econ.get("cash", s.cash) or 0)
+            s.power_provided = int(econ.get("power_provided", 0) or 0)
+            s.power_drained = int(econ.get("power_drained", 0) or 0)
+            s.harvesters = int(econ.get("harvesters", 0) or 0)
+        for b in self._raw.get("own_buildings", []) or []:
+            if isinstance(b, dict) and b.get("type"):
+                s.own_building_types.add(str(b["type"]).lower())
+        s.production_items = [
+            str(p.get("item", "")).lower()
+            for p in (self._raw.get("production", []) or [])
+            if isinstance(p, dict)
+        ]
         s.game_tick = int(self._raw.get("game_tick", s.game_tick) or 0)
         s.done = bool(done)

openra_bench/scenarios/win_conditions.py CHANGED Viewed

@@ -61,6 +61,16 @@ _PREDICATES: dict[str, Callable[[WinContext, Any], bool]] = {
     "all_units_in_region": lambda c, v: len(_agent_units(c)) > 0
     and _in_radius(_agent_units(c), int(v["x"]), int(v["y"]), float(v.get("radius", 3)))
     == len(_agent_units(c)),
 }
 LEAF_KEYS = frozenset(_PREDICATES)

     "all_units_in_region": lambda c, v: len(_agent_units(c)) > 0
     and _in_radius(_agent_units(c), int(v["x"]), int(v["y"]), float(v.get("radius", 3)))
     == len(_agent_units(c)),
+    # S9 economy / production constraints (require the engine economy
+    # subsystem; 0/empty on movement-only scenarios).
+    "cash_gte": lambda c, v: c.signals.cash >= int(v),
+    "harvesters_gte": lambda c, v: c.signals.harvesters >= int(v),
+    "power_surplus_gte": lambda c, v: (
+        c.signals.power_provided - c.signals.power_drained
+    )
+    >= int(v),
+    "has_building": lambda c, v: str(v).lower() in c.signals.own_building_types,
+    "buildings_owned_gte": lambda c, v: len(c.signals.own_building_types) >= int(v),
 }
 LEAF_KEYS = frozenset(_PREDICATES)

tests/test_agent.py CHANGED Viewed

@@ -45,11 +45,11 @@ def test_tool_schema_filtering():
     assert "move_units" in names
     assert "attack_unit" not in names
     assert "observe" in names, "a no-op must always be offered"
-    assert {t["function"]["name"] for t in _tool_schemas(None)} == {
-        "move_units",
-        "attack_unit",
-        "observe",
-    }
 def test_build_briefing_format():

     assert "move_units" in names
     assert "attack_unit" not in names
     assert "observe" in names, "a no-op must always be offered"
+    full = {t["function"]["name"] for t in _tool_schemas(None)}
+    # Core movement/combat always present.
+    assert {"move_units", "attack_unit", "observe"} <= full
+    # Economy/production/structure commands are exposed too (S9 wiring).
+    assert {"build", "harvest", "place_building", "stop", "deploy"} <= full
 def test_build_briefing_format():

tests/test_rust_integration.py CHANGED Viewed

@@ -122,7 +122,11 @@ class TestRustEngineTools:
         env = ot.OpenRAEnv(RUSH_HOUR, 7)
         obs = env.reset()
         uid, start = _first_unit(obs)
-        target = (start[0] + 25, start[1])
         last = start
         for _ in range(15):
             obs, *_ = env.step([ot.Command.move_units([uid], target[0], target[1])])

         env = ot.OpenRAEnv(RUSH_HOUR, 7)
         obs = env.reset()
         uid, start = _first_unit(obs)
+        # Target an in-bounds interior cell: rush-hour is ~128x40 and the
+        # first unit may spawn at the east edge (x~120), so a blind +25
+        # would be off-map (the engine correctly refuses to path off-map).
+        tx = start[0] - 25 if start[0] > 64 else start[0] + 25
+        target = (tx, min(max(start[1], 3), 36))
         last = start
         for _ in range(15):
             obs, *_ = env.step([ot.Command.move_units([uid], target[0], target[1])])