Spaces:

retroam
/

vendsim-vb2

Sleeping

App Files Files Community

retroam Amp commited on Mar 8

Commit

00b2ea2

1 Parent(s): 821b942

Add VendSim VB2 environment

Browse files

Amp-Thread-ID: https://ampcode.com/threads/T-019cce9e-be2b-718e-880f-eeb8e81cf219
Co-authored-by: Amp <amp@ampcode.com>

Files changed (22) hide show

Dockerfile +12 -0
README.md +120 -10
pyproject.toml +25 -0
vendsim_vb2/__init__.py +8 -0
vendsim_vb2/billing.py +13 -0
vendsim_vb2/client.py +238 -0
vendsim_vb2/compat.py +36 -0
vendsim_vb2/config.py +21 -0
vendsim_vb2/customer_service.py +41 -0
vendsim_vb2/demand.py +171 -0
vendsim_vb2/environment.py +395 -0
vendsim_vb2/mcp_env.py +205 -0
vendsim_vb2/prompts.py +6 -0
vendsim_vb2/rewards.py +9 -0
vendsim_vb2/server/__init__.py +1 -0
vendsim_vb2/server/app.py +21 -0
vendsim_vb2/state.py +63 -0
vendsim_vb2/subagent.py +62 -0
vendsim_vb2/suppliers.py +180 -0
vendsim_vb2/tools/__init__.py +1 -0
vendsim_vb2/tools/main_agent_tools.py +34 -0
vendsim_vb2/tools/memory_tools.py +25 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.12-slim
+WORKDIR /app
+COPY pyproject.toml README.md ./
+COPY vendsim_vb2/ vendsim_vb2/
+RUN pip install --no-cache-dir ".[server]"
+EXPOSE 7860
+CMD ["uvicorn", "vendsim_vb2.server.app:create_app", "--factory", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,120 @@
----
-title: Vendsim Vb2
-emoji: 👁
-colorFrom: yellow
-colorTo: purple
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# vendsim-vb2
+`vendsim-vb2` is an OpenEnv 0.2.1-compatible implementation of a Vending-Bench 2 style environment.
+The agent runs a vending machine business over a 365-day horizon. It sets prices, manages storage and machine inventory, negotiates with adversarial suppliers, delegates physical actions to a sub-agent, tracks notes/reminders, and is scored by final bank balance.
+## Environment Summary
+- Starting balance: `$500`
+- Episode length: `365` simulated days
+- Daily machine fee: `$2`
+- Bankruptcy rule: `10` consecutive negative-balance days
+- Weekly token billing: `$100 / 1M output tokens`
+- Machine layout: `4 x 3` slots
+  `2` small rows and `2` large rows
+- Restock travel time: `75` minutes
+- Reward:
+  Default benchmark reward is sparse terminal reward equal to final bank balance.
+  Dense shaping is available behind a training flag.
+## MCP Tool Surface
+Main-agent tools:
+- `set_price`
+- `send_email`
+- `check_balance`
+- `check_storage_inventory`
+- `wait_for_next_day`
+- `run_sub_agent`
+- `chat_with_sub_agent`
+- `request_supplier_quote`
+- `negotiate_supplier`
+- `place_supplier_order`
+- `check_delivery`
+- `get_status`
+Memory tools:
+- `write_scratchpad`
+- `read_scratchpad`
+- `search_notes`
+- `set_reminder`
+Sub-agent tools exposed through `run_sub_agent`:
+- `restock_machine`
+- `collect_cash`
+- `get_machine_inventory`
+## Repository Artifacts
+Code:
+- Environment server: [vendsim_vb2/server/app.py](./vendsim_vb2/server/app.py)
+- MCP wrapper: [vendsim_vb2/mcp_env.py](./vendsim_vb2/mcp_env.py)
+- Core simulation: [vendsim_vb2/environment.py](./vendsim_vb2/environment.py)
+Notebooks:
+- Setup verification: [00_setup_verification.ipynb](../notebooks/00_setup_verification.ipynb)
+- Training notebook: [01_vb2_training_grpo.ipynb](../notebooks/01_vb2_training_grpo.ipynb)
+- Final benchmark run: [02_vb2_final_run.ipynb](../notebooks/02_vb2_final_run.ipynb)
+Tests:
+- Test suite: [tests](./tests)
+## Local Setup
+From the repository root:
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -e ./vendsim_vb2[server,dev]
+```
+Run the tests:
+```bash
+PYTHONPATH=vendsim_vb2 pytest vendsim_vb2/tests -q
+```
+## Run Locally
+Start the OpenEnv-compatible server:
+```bash
+PYTHONPATH=vendsim_vb2 python -m uvicorn vendsim_vb2.server.app:create_app --factory --host 0.0.0.0 --port 8000
+```
+Then connect with `VB2Client` or use the notebooks.
+## Hugging Face Spaces Deployment
+Build and verify locally first:
+```bash
+cd vendsim_vb2
+docker build -t vendsim-vb2 .
+```
+Then deploy with OpenEnv tooling from the repo root after configuring your Hugging Face credentials:
+```bash
+openenv push
+```
+Submission artifact placeholders:
+- HF Space URL: `TODO`
+- Installable package / repo URL: `TODO`
+- Demo video URL: `TODO`
+## Training Artifact
+A minimal training script in Colab using Unsloth or HF TRL is included:
+- [01_vb2_training_grpo.ipynb](../notebooks/01_vb2_training_grpo.ipynb)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,25 @@

+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "vendsim-vb2"
+version = "0.1.0"
+description = "OpenEnv-compatible Vending-Bench 2 simulation environment"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "openenv-core==0.2.1",
+    "fastmcp",
+]
+[project.optional-dependencies]
+server = ["fastapi>=0.115", "uvicorn>=0.34"]
+dev = ["pytest>=8.0", "ruff>=0.11"]
+[tool.setuptools]
+package-dir = {"" = "."}
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["vendsim_vb2*"]

vendsim_vb2/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""Vending-Bench 2 environment package."""
+from vendsim_vb2.client import VB2Client
+from vendsim_vb2.config import VB2Config
+from vendsim_vb2.environment import VendingBench2Environment
+from vendsim_vb2.mcp_env import VB2MCPEnvironment
+__all__ = ["VB2Client", "VB2Config", "VendingBench2Environment", "VB2MCPEnvironment"]

vendsim_vb2/billing.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from __future__ import annotations
+def apply_weekly_costs(
+    cash_balance: float,
+    weekly_output_tokens: int,
+    token_cost_per_million: float,
+    daily_fee: float,
+    days_in_week: int,
+) -> float:
+    token_cost = (weekly_output_tokens / 1_000_000) * token_cost_per_million
+    total_cost = token_cost + (daily_fee * days_in_week)
+    return round(cash_balance - total_cost, 2)

vendsim_vb2/client.py ADDED Viewed

	@@ -0,0 +1,238 @@

+"""VB2 environment client for agents and training scripts."""
+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+from openenv.core.env_server.mcp_types import (
+    CallToolAction,
+    CallToolObservation,
+    ListToolsAction,
+    ListToolsObservation,
+    Observation,
+    Tool,
+    ToolError,
+)
+from openenv.core.env_client import EnvClient, StepResult
+from openenv.core.mcp_client import State
+class VB2Client(EnvClient[Any, Observation, State]):
+    """
+    Client for the Vending-Bench 2 MCP environment.
+    Provides typed convenience methods for every VB2 tool, plus the full
+    ``step()`` / ``reset()`` API inherited from :class:`EnvClient`.
+    Example::
+        with VB2Client(base_url="http://localhost:8000") as env:
+            env.reset()
+            balance = env.check_balance()
+            env.set_price("soda", 1.75)
+            quote = env.request_supplier_quote("chips", 20)
+            sales = env.wait_for_next_day()
+    """
+    def __init__(
+        self,
+        base_url: str,
+        connect_timeout_s: float = 10.0,
+        message_timeout_s: float = 60.0,
+        provider: Optional[Any] = None,
+    ) -> None:
+        super().__init__(
+            base_url=base_url,
+            connect_timeout_s=connect_timeout_s,
+            message_timeout_s=message_timeout_s,
+            provider=provider,
+        )
+        self._tools_cache: Optional[List[Tool]] = None
+    # ------------------------------------------------------------------
+    # Abstract method implementations
+    # ------------------------------------------------------------------
+    def _step_payload(self, action: Any) -> Dict[str, Any]:
+        if isinstance(action, ListToolsAction):
+            return {"type": "list_tools"}
+        if isinstance(action, CallToolAction):
+            return {
+                "type": "call_tool",
+                "tool_name": action.tool_name,
+                "arguments": action.arguments,
+            }
+        if hasattr(action, "model_dump"):
+            return action.model_dump()
+        return {"action": str(action)}
+    def _parse_result(self, payload: Dict[str, Any]) -> StepResult[Observation]:
+        obs_data = payload.get("observation", {})
+        if "tools" in obs_data:
+            tools = [
+                Tool(
+                    name=t.get("name", ""),
+                    description=t.get("description", ""),
+                    input_schema=t.get("input_schema", t.get("inputSchema", {})),
+                )
+                for t in obs_data.get("tools", [])
+            ]
+            observation: Observation = ListToolsObservation(
+                tools=tools,
+                done=payload.get("done", False),
+                reward=payload.get("reward"),
+                metadata=obs_data.get("metadata", {}),
+            )
+        elif "tool_name" in obs_data:
+            error = None
+            if obs_data.get("error"):
+                error = ToolError(**obs_data["error"])
+            observation = CallToolObservation(
+                tool_name=obs_data.get("tool_name", ""),
+                result=obs_data.get("result"),
+                error=error,
+                done=payload.get("done", False),
+                reward=payload.get("reward"),
+                metadata=obs_data.get("metadata", {}),
+            )
+        else:
+            observation = Observation(
+                done=payload.get("done", False),
+                reward=payload.get("reward"),
+                metadata=obs_data.get("metadata", {}),
+            )
+        return StepResult(
+            observation=observation,
+            reward=payload.get("reward"),
+            done=payload.get("done", False),
+        )
+    def _parse_state(self, payload: Dict[str, Any]) -> State:
+        return State(
+            episode_id=payload.get("episode_id"),
+            step_count=payload.get("step_count", 0),
+        )
+    # ------------------------------------------------------------------
+    # Helper: call a tool and return its result
+    # ------------------------------------------------------------------
+    def _call_tool(self, tool_name: str, **kwargs: Any) -> Any:
+        """Call a tool by name and return its result (or raise on error)."""
+        result = self.call_tool_step(tool_name, **kwargs)
+        obs = result.observation
+        if isinstance(obs, CallToolObservation) and obs.error is not None:
+            raise RuntimeError(
+                f"Tool '{tool_name}' failed: {obs.error.message} "
+                f"(type: {obs.error.error_type.value})"
+            )
+        if isinstance(obs, CallToolObservation):
+            res = obs.result
+            if hasattr(res, "data"):
+                return res.data
+            if isinstance(res, dict) and "data" in res:
+                return res["data"]
+            return res
+        return obs
+    def call_tool_step(self, tool_name: str, **kwargs: Any) -> StepResult[Observation]:
+        """Call a tool and return the full StepResult with reward/done metadata."""
+        action = CallToolAction(tool_name=tool_name, arguments=kwargs)
+        return self.step(action)
+    # ------------------------------------------------------------------
+    # Convenience methods
+    # ------------------------------------------------------------------
+    def list_tools(self, use_cache: bool = True) -> List[Tool]:
+        """Discover available tools from the environment."""
+        if use_cache and self._tools_cache is not None:
+            return self._tools_cache
+        result = self.step(ListToolsAction())
+        if isinstance(result.observation, ListToolsObservation):
+            self._tools_cache = result.observation.tools
+            return self._tools_cache
+        return []
+    def set_price(self, product: str, price: float) -> Any:
+        """Update the price of a product in the vending machine."""
+        return self._call_tool("set_price", product=product, price=price)
+    def check_balance(self) -> Any:
+        """Review current bank balance."""
+        return self._call_tool("check_balance")
+    def check_storage_inventory(self) -> Any:
+        """Inspect the storage inventory."""
+        return self._call_tool("check_storage_inventory")
+    def wait_for_next_day(self, output_tokens: int = 0) -> Any:
+        """Advance simulation to the next business day."""
+        return self._call_tool("wait_for_next_day", output_tokens=output_tokens)
+    def send_email(self, recipient: str, subject: str, body: str) -> Any:
+        """Send an email to a supplier or service provider."""
+        return self._call_tool(
+            "send_email", recipient=recipient, subject=subject, body=body
+        )
+    def restock_machine(self, product: str, qty: int) -> Any:
+        """Delegate to sub-agent: restock the vending machine from storage."""
+        return self._call_tool(
+            "run_sub_agent",
+            tool_name="restock_machine",
+            arguments={"product": product, "qty": qty},
+        )
+    def collect_cash(self) -> Any:
+        """Delegate to sub-agent: collect cash from the vending machine."""
+        return self._call_tool("run_sub_agent", tool_name="collect_cash", arguments={})
+    def get_machine_inventory(self) -> Any:
+        """Delegate to sub-agent: get current machine inventory."""
+        return self._call_tool(
+            "run_sub_agent",
+            tool_name="get_machine_inventory",
+            arguments={},
+        )
+    def chat_with_sub_agent(self, message: str) -> Any:
+        """Message the sub-agent without taking action."""
+        return self._call_tool("chat_with_sub_agent", message=message)
+    def write_scratchpad(self, note: str) -> Any:
+        """Append a note to working memory."""
+        return self._call_tool("write_scratchpad", note=note)
+    def read_scratchpad(self) -> Any:
+        """Read the working-memory scratchpad."""
+        return self._call_tool("read_scratchpad")
+    def search_notes(self, query: str) -> Any:
+        """Search saved notes for a keyword."""
+        return self._call_tool("search_notes", query=query)
+    def set_reminder(self, day: int, message: str) -> Any:
+        """Schedule a future reminder."""
+        return self._call_tool("set_reminder", day=day, message=message)
+    def request_supplier_quote(self, product: str, qty: int) -> Any:
+        """Request a price quote from a supplier for a product."""
+        return self._call_tool("request_supplier_quote", product=product, qty=qty)
+    def negotiate_supplier(self, quote_id: str, proposed_unit_price: float) -> Any:
+        """Negotiate a supplier quote with a proposed unit price."""
+        return self._call_tool("negotiate_supplier", quote_id=quote_id, proposed_unit_price=proposed_unit_price)
+    def place_supplier_order(self, product: str, qty: int) -> Any:
+        """Place a confirmed order with a supplier."""
+        return self._call_tool("place_supplier_order", product=product, qty=qty)
+    def check_delivery(self, order_id: str) -> Any:
+        """Check the delivery status of a supplier order."""
+        return self._call_tool("check_delivery", order_id=order_id)

vendsim_vb2/compat.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""Compatibility shims for optional third-party dependencies."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Callable
+@dataclass(slots=True)
+class Route:
+    method: str
+    path: str
+    endpoint: Callable[..., Any]
+class FastAPI:
+    """Small subset of FastAPI used for local smoke tests when FastAPI is absent."""
+    def __init__(self, *, title: str) -> None:
+        self.title = title
+        self.routes: list[Route] = []
+    def get(self, path: str) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
+        return self._register("GET", path)
+    def post(self, path: str) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
+        return self._register("POST", path)
+    def _register(
+        self, method: str, path: str
+    ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
+        def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+            self.routes.append(Route(method=method, path=path, endpoint=func))
+            return func
+        return decorator

vendsim_vb2/config.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass(slots=True)
+class VB2Config:
+    starting_balance: float = 500.0
+    daily_machine_fee: float = 2.0
+    episode_days: int = 365
+    bankruptcy_consecutive_negative_days: int = 10
+    output_token_cost_per_million: float = 100.0
+    storage_address: str = "1680 Mission St, San Francisco"
+    machine_address: str = "1421 Bay St, San Francisco"
+    restock_travel_time_minutes: int = 75
+    supplier_message_time_minutes: int = 10
+    delivery_check_time_minutes: int = 5
+    minutes_per_day: int = 24 * 60
+    machine_small_rows: int = 2
+    machine_large_rows: int = 2
+    machine_slots_per_row: int = 3

vendsim_vb2/customer_service.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from random import Random
+@dataclass(slots=True)
+class ComplaintTicket:
+    ticket_id: str
+    type: str
+    day: int
+    amount: float
+    reason: str
+class CustomerServiceEngine:
+    def __init__(self, seed: int | None = None) -> None:
+        self._rng = Random(seed)
+        self._ticket_counter = 0
+    def maybe_create_complaint(
+        self, day: int, sales: dict[str, int]
+    ) -> ComplaintTicket | None:
+        total_units = sum(sales.values())
+        if total_units <= 0:
+            return None
+        complaint_probability = min(0.35, total_units / 150)
+        if self._rng.random() >= complaint_probability:
+            return None
+        self._ticket_counter += 1
+        amount = round(1.5 + self._rng.random() * 4.0, 2)
+        return ComplaintTicket(
+            ticket_id=f"ticket-{self._ticket_counter}",
+            type="refund_request",
+            day=day,
+            amount=amount,
+            reason="Customer reported a vending issue.",
+        )
+    def process_refund(self, cash_balance: float, amount: float) -> float:
+        return round(cash_balance - amount, 2)

vendsim_vb2/demand.py ADDED Viewed

	@@ -0,0 +1,171 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from random import Random
+PRODUCTS: dict[str, dict[str, float | str]] = {
+    "soda": {
+        "size": "small",
+        "base_daily_demand": 7.0,
+        "ideal_price": 1.50,
+        "wholesale_price": 0.58,
+        "weather_bias": "hot",
+    },
+    "water": {
+        "size": "small",
+        "base_daily_demand": 6.0,
+        "ideal_price": 1.25,
+        "wholesale_price": 0.42,
+        "weather_bias": "hot",
+    },
+    "candy": {
+        "size": "small",
+        "base_daily_demand": 4.0,
+        "ideal_price": 1.25,
+        "wholesale_price": 0.35,
+        "weather_bias": "neutral",
+    },
+    "chips": {
+        "size": "large",
+        "base_daily_demand": 5.0,
+        "ideal_price": 2.00,
+        "wholesale_price": 0.72,
+        "weather_bias": "neutral",
+    },
+    "sandwich": {
+        "size": "large",
+        "base_daily_demand": 2.0,
+        "ideal_price": 4.50,
+        "wholesale_price": 2.20,
+        "weather_bias": "cold",
+    },
+}
+SEASON_MULTIPLIERS = {
+    "winter": 0.9,
+    "spring": 1.0,
+    "summer": 1.15,
+    "autumn": 1.0,
+}
+DAY_OF_WEEK_MULTIPLIERS = {
+    "monday": 0.95,
+    "tuesday": 1.0,
+    "wednesday": 1.0,
+    "thursday": 1.0,
+    "friday": 1.1,
+    "saturday": 1.2,
+    "sunday": 0.85,
+}
+WEATHER_MULTIPLIERS = {
+    "sunny": 1.15,
+    "cloudy": 1.0,
+    "rainy": 0.85,
+    "foggy": 0.9,
+    "heatwave": 1.25,
+}
+WEATHER_SEQUENCE = ["sunny", "cloudy", "rainy", "sunny", "foggy", "cloudy", "sunny"]
+DAY_NAMES = [
+    "monday",
+    "tuesday",
+    "wednesday",
+    "thursday",
+    "friday",
+    "saturday",
+    "sunday",
+]
+@dataclass(slots=True)
+class DailySalesResult:
+    units_sold: dict[str, int]
+    revenue: float
+    debug: dict[str, float]
+def season_for_day(day_index: int) -> str:
+    day_of_year = ((day_index - 1) % 365) + 1
+    if day_of_year <= 79:
+        return "winter"
+    if day_of_year <= 171:
+        return "spring"
+    if day_of_year <= 265:
+        return "summer"
+    if day_of_year <= 354:
+        return "autumn"
+    return "winter"
+def day_of_week_for_day(day_index: int) -> str:
+    return DAY_NAMES[(day_index - 1) % len(DAY_NAMES)]
+def weather_for_day(day_index: int) -> str:
+    return WEATHER_SEQUENCE[(day_index - 1) % len(WEATHER_SEQUENCE)]
+def _weather_bias_multiplier(product: str, weather: str) -> float:
+    bias = str(PRODUCTS.get(product, {}).get("weather_bias", "neutral"))
+    if bias == "hot" and weather in {"sunny", "heatwave"}:
+        return 1.1
+    if bias == "cold" and weather in {"rainy", "foggy"}:
+        return 1.08
+    return 1.0
+def compute_daily_sales(
+    products: list[str],
+    prices: dict[str, float],
+    weather: str,
+    season: str,
+    day_of_week: str,
+    inventory: dict[str, int] | None = None,
+    seed: int | None = None,
+) -> DailySalesResult:
+    rng = Random(seed)
+    choice_multiplier = 1.0 + min(max(len(products) - 1, 0), 5) * 0.05
+    weather_multiplier = WEATHER_MULTIPLIERS.get(weather, 1.0)
+    season_multiplier = SEASON_MULTIPLIERS.get(season, 1.0)
+    dow_multiplier = DAY_OF_WEEK_MULTIPLIERS.get(day_of_week, 1.0)
+    inventory = inventory or {}
+    units_sold: dict[str, int] = {}
+    revenue = 0.0
+    for product in products:
+        catalog = PRODUCTS.get(product, {})
+        base_demand = float(catalog.get("base_daily_demand", 1.0))
+        ideal_price = float(
+            catalog.get("ideal_price", max(prices.get(product, 1.0), 0.01))
+        )
+        price = float(prices.get(product, ideal_price))
+        price_multiplier = max(
+            0.15, 1.0 - ((price - ideal_price) / max(ideal_price, 0.01)) * 0.45
+        )
+        noise_multiplier = 0.9 + (rng.random() * 0.2)
+        expected_units = (
+            base_demand
+            * choice_multiplier
+            * weather_multiplier
+            * season_multiplier
+            * dow_multiplier
+            * price_multiplier
+            * _weather_bias_multiplier(product, weather)
+            * noise_multiplier
+        )
+        sold = max(0, int(round(expected_units)))
+        if product in inventory:
+            sold = min(sold, inventory[product])
+        units_sold[product] = sold
+        revenue += sold * price
+    debug = {
+        "choice_multiplier": round(choice_multiplier, 3),
+        "weather_multiplier": round(weather_multiplier, 3),
+        "season_multiplier": round(season_multiplier, 3),
+        "day_of_week_multiplier": round(dow_multiplier, 3),
+    }
+    return DailySalesResult(
+        units_sold=units_sold, revenue=round(revenue, 2), debug=debug
+    )

vendsim_vb2/environment.py ADDED Viewed

	@@ -0,0 +1,395 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from random import Random
+from typing import Any
+from vendsim_vb2.billing import apply_weekly_costs
+from vendsim_vb2.config import VB2Config
+from vendsim_vb2.customer_service import CustomerServiceEngine
+from vendsim_vb2.demand import (
+    PRODUCTS,
+    compute_daily_sales,
+    day_of_week_for_day,
+    season_for_day,
+    weather_for_day,
+)
+from vendsim_vb2.state import SimulationState
+from vendsim_vb2.subagent import SubAgent
+from vendsim_vb2.suppliers import SupplierEngine
+from vendsim_vb2.tools.main_agent_tools import get_main_tool_specs
+from vendsim_vb2.tools.memory_tools import get_memory_tool_specs
+@dataclass(slots=True)
+class ToolCallResult:
+    status: str
+    payload: dict[str, Any]
+class VendingBench2Environment:
+    def __init__(
+        self,
+        config: VB2Config | None = None,
+        seed: int | None = None,
+        use_dense_rewards: bool = False,
+    ) -> None:
+        self.config = config or VB2Config()
+        self._seed = seed
+        self._rng = Random(seed)
+        self.use_dense_rewards = use_dense_rewards
+        self.suppliers = SupplierEngine(seed=seed)
+        self.customer_service = CustomerServiceEngine(seed=seed)
+        self.subagent = SubAgent(config=self.config)
+        self.state = self.reset()
+    def reset(self) -> SimulationState:
+        self._rng = Random(self._seed)
+        self.suppliers = SupplierEngine(seed=self._seed)
+        self.customer_service = CustomerServiceEngine(seed=self._seed)
+        self.subagent = SubAgent(config=self.config)
+        self.state = SimulationState.new_episode(self.config)
+        self.state.prices = {
+            product: float(spec["ideal_price"]) for product, spec in PRODUCTS.items()
+        }
+        return self.state
+    def tool_registry(self) -> dict[str, list[str]]:
+        return {
+            "main": [spec.name for spec in get_main_tool_specs()],
+            "memory": [spec.name for spec in get_memory_tool_specs()],
+            "subagent": list(self.subagent.specs()["tools"]),
+        }
+    def _log_email(
+        self,
+        *,
+        sender: str,
+        recipient: str,
+        subject: str,
+        body: str,
+        category: str = "email",
+    ) -> None:
+        self.state.email_log.append(
+            {
+                "day": self.state.day_index,
+                "minute_of_day": self.state.minute_of_day,
+                "sender": sender,
+                "recipient": recipient,
+                "subject": subject,
+                "body": body,
+                "category": category,
+            }
+        )
+    def resolve_delivery(self, order_id: str) -> ToolCallResult:
+        """Check delivery status; on success, add items to storage and charge cost."""
+        delivery = self.suppliers.simulate_delivery(order_id)
+        order = self.suppliers._orders[order_id]
+        if delivery.status in {"delivered", "delayed", "partial"} and delivery.delivered_qty > 0:
+            product = order.product
+            self.state.storage_inventory[product] = (
+                self.state.storage_inventory.get(product, 0) + delivery.delivered_qty
+            )
+            cost = round(delivery.final_unit_price * delivery.delivered_qty, 2)
+            self.state.cash_balance = round(self.state.cash_balance - cost, 2)
+        self.state.advance_minutes(self.config.delivery_check_time_minutes)
+        self._log_email(
+            sender=order.supplier_name,
+            recipient="charles.paxton",
+            subject=f"Delivery update for {order.order_id}",
+            body=(
+                f"Status={delivery.status}; delivered_qty={delivery.delivered_qty}; "
+                f"days_late={delivery.days_late}; final_unit_price={delivery.final_unit_price}"
+            ),
+            category="supplier_delivery",
+        )
+        return ToolCallResult(
+            delivery.status,
+            {
+                "order_id": delivery.order_id,
+                "delivered_qty": delivery.delivered_qty,
+                "days_late": delivery.days_late,
+                "final_unit_price": delivery.final_unit_price,
+            },
+        )
+    def set_price(self, product: str, price: float) -> ToolCallResult:
+        self.state.prices[product] = round(price, 2)
+        self.state.advance_minutes(5)
+        return ToolCallResult(
+            "ok", {"product": product, "price": self.state.prices[product]}
+        )
+    def send_email(self, recipient: str, subject: str, body: str) -> ToolCallResult:
+        self._log_email(
+            sender="charles.paxton",
+            recipient=recipient,
+            subject=subject,
+            body=body,
+            category="manual_email",
+        )
+        self.state.advance_minutes(self.config.supplier_message_time_minutes)
+        return ToolCallResult("ok", {"recipient": recipient, "queued": True})
+    def check_balance(self) -> ToolCallResult:
+        self.state.advance_minutes(1)
+        return ToolCallResult("ok", {"cash_balance": round(self.state.cash_balance, 2)})
+    def check_storage_inventory(self) -> ToolCallResult:
+        self.state.advance_minutes(2)
+        return ToolCallResult(
+            "ok", {"storage_inventory": dict(self.state.storage_inventory)}
+        )
+    def chat_with_sub_agent(self, message: str) -> ToolCallResult:
+        self.state.subagent_chat_log.append(message)
+        self.state.advance_minutes(5)
+        return ToolCallResult("ok", {"message": message})
+    def request_supplier_quote(self, product: str, qty: int) -> ToolCallResult:
+        quote = self.suppliers.request_quote(product, qty)
+        subject = f"Quote request for {qty} units of {product}"
+        self._log_email(
+            sender="charles.paxton",
+            recipient=quote.supplier_name,
+            subject=subject,
+            body=f"Please quote {qty} units of {product}.",
+            category="supplier_quote_request",
+        )
+        self.state.advance_minutes(self.config.supplier_message_time_minutes)
+        self._log_email(
+            sender=quote.supplier_name,
+            recipient="charles.paxton",
+            subject=f"Quote response for {product}",
+            body=(
+                f"quote_id={quote.quote_id}; qty={quote.qty}; "
+                f"unit_price={quote.unit_price}; fair_unit_price={quote.fair_unit_price}"
+            ),
+            category="supplier_quote_response",
+        )
+        return ToolCallResult(
+            "ok",
+            {
+                "quote_id": quote.quote_id,
+                "product": quote.product,
+                "qty": quote.qty,
+                "unit_price": quote.unit_price,
+                "supplier_name": quote.supplier_name,
+            },
+        )
+    def negotiate_supplier(
+        self, quote_id: str, proposed_unit_price: float
+    ) -> ToolCallResult:
+        quote = self.suppliers._quotes[quote_id]
+        response = self.suppliers.negotiate(quote_id, proposed_unit_price)
+        self._log_email(
+            sender="charles.paxton",
+            recipient=quote.supplier_name,
+            subject=f"Counteroffer for {quote.product}",
+            body=(
+                f"quote_id={quote_id}; proposed_unit_price={round(proposed_unit_price, 2)}"
+            ),
+            category="supplier_negotiation_request",
+        )
+        self.state.advance_minutes(self.config.supplier_message_time_minutes)
+        self._log_email(
+            sender=quote.supplier_name,
+            recipient="charles.paxton",
+            subject=f"Negotiation response for {quote.product}",
+            body=(
+                f"quote_id={response.quote_id}; status={response.status}; "
+                f"unit_price={response.unit_price}; message={response.message}"
+            ),
+            category="supplier_negotiation_response",
+        )
+        return ToolCallResult(
+            response.status,
+            {
+                "quote_id": response.quote_id,
+                "unit_price": response.unit_price,
+                "message": response.message,
+            },
+        )
+    def place_supplier_order(self, product: str, qty: int) -> ToolCallResult:
+        order = self.suppliers.place_email_confirmed_order(product, qty)
+        self._log_email(
+            sender="charles.paxton",
+            recipient=order.supplier_name,
+            subject=f"Purchase order for {product}",
+            body=f"Please ship {qty} units of {product}.",
+            category="supplier_order_request",
+        )
+        self.state.advance_minutes(self.config.supplier_message_time_minutes)
+        self._log_email(
+            sender=order.supplier_name,
+            recipient="charles.paxton",
+            subject=f"Order confirmation for {product}",
+            body=(
+                f"order_id={order.order_id}; qty={order.qty}; unit_price={order.unit_price}; "
+                f"may_bait_and_switch={order.may_bait_and_switch}"
+            ),
+            category="supplier_order_confirmation",
+        )
+        return ToolCallResult(
+            order.status,
+            {
+                "order_id": order.order_id,
+                "product": order.product,
+                "qty": order.qty,
+                "unit_price": order.unit_price,
+                "supplier_name": order.supplier_name,
+            },
+        )
+    def run_sub_agent(self, tool_name: str, **kwargs: Any) -> ToolCallResult:
+        if tool_name == "restock_machine":
+            product = str(kwargs["product"])
+            qty = int(kwargs["qty"])
+            available = self.state.storage_inventory.get(product, 0)
+            if available < qty:
+                return ToolCallResult(
+                    "rejected",
+                    {
+                        "message": f"insufficient storage inventory for {product}",
+                        "available": available,
+                    },
+                )
+            result = self.subagent.restock_machine(product, qty)
+            if result.get("status") == "ok":
+                self.state.storage_inventory[product] = available - qty
+                if self.state.storage_inventory[product] == 0:
+                    del self.state.storage_inventory[product]
+                self.state.machine_inventory = dict(self.subagent.machine_inventory)
+                self.state.advance_minutes(int(result["time_cost_minutes"]))
+            return ToolCallResult(str(result["status"]), dict(result))
+        if tool_name == "collect_cash":
+            self.subagent.machine_cash = self.state.machine_cash
+            result = self.subagent.collect_cash()
+            self.state.machine_cash = self.subagent.machine_cash
+            self.state.cash_balance = round(
+                self.state.cash_balance + float(result["amount_collected"]), 2
+            )
+            self.state.advance_minutes(int(result["time_cost_minutes"]))
+            return ToolCallResult("ok", dict(result))
+        if tool_name == "get_machine_inventory":
+            return ToolCallResult(
+                "ok", {"machine_inventory": self.subagent.get_machine_inventory()}
+            )
+        raise KeyError(f"unknown sub-agent tool: {tool_name}")
+    def write_scratchpad(self, note: str) -> ToolCallResult:
+        self.state.scratchpad.append(note)
+        self.state.notes.append(note)
+        return ToolCallResult("ok", {"note_count": len(self.state.scratchpad)})
+    def read_scratchpad(self) -> ToolCallResult:
+        return ToolCallResult("ok", {"scratchpad": list(self.state.scratchpad)})
+    def search_notes(self, query: str) -> ToolCallResult:
+        query_lower = query.lower()
+        matches = [note for note in self.state.notes if query_lower in note.lower()]
+        return ToolCallResult("ok", {"matches": matches})
+    def set_reminder(self, day: int, message: str) -> ToolCallResult:
+        self.state.add_reminder(day, message)
+        return ToolCallResult("ok", {"day": day, "message": message})
+    def record_output_tokens(self, count: int) -> None:
+        self.state.weekly_output_tokens += count
+    def wait_for_next_day(self, output_tokens: int = 0) -> ToolCallResult:
+        self.record_output_tokens(output_tokens)
+        weather = weather_for_day(self.state.day_index)
+        season = season_for_day(self.state.day_index)
+        day_of_week = day_of_week_for_day(self.state.day_index)
+        sales_result = compute_daily_sales(
+            products=list(self.state.machine_inventory),
+            prices=self.state.prices,
+            weather=weather,
+            season=season,
+            day_of_week=day_of_week,
+            inventory=self.state.machine_inventory,
+            seed=self._rng.randint(0, 1_000_000),
+        )
+        for product, sold in sales_result.units_sold.items():
+            if product in self.subagent.machine_inventory:
+                remaining = self.subagent.machine_inventory[product] - sold
+                if remaining > 0:
+                    self.subagent.machine_inventory[product] = remaining
+                else:
+                    del self.subagent.machine_inventory[product]
+        self.state.machine_inventory = dict(self.subagent.machine_inventory)
+        # Revenue goes into the machine coin box only — collected via collect_cash
+        self.state.machine_cash = round(
+            self.state.machine_cash + sales_result.revenue, 2
+        )
+        complaint = self.customer_service.maybe_create_complaint(
+            self.state.day_index, sales_result.units_sold
+        )
+        refund_amount = 0.0
+        if complaint is not None:
+            refund_amount = complaint.amount
+            self.state.cash_balance = self.customer_service.process_refund(
+                self.state.cash_balance, complaint.amount
+            )
+        self.state.cash_balance = round(
+            self.state.cash_balance - self.config.daily_machine_fee, 2
+        )
+        if self.state.cash_balance < 0:
+            self.state.consecutive_negative_days += 1
+        else:
+            self.state.consecutive_negative_days = 0
+        if self.state.day_index % 7 == 0:
+            self.state.cash_balance = apply_weekly_costs(
+                cash_balance=self.state.cash_balance
+                + (self.config.daily_machine_fee * 7),
+                weekly_output_tokens=self.state.weekly_output_tokens,
+                token_cost_per_million=self.config.output_token_cost_per_million,
+                daily_fee=self.config.daily_machine_fee,
+                days_in_week=7,
+            )
+            self.state.weekly_output_tokens = 0
+        self.suppliers.tick_supplier_health(days=1)
+        self.state.daily_sales_history.append(
+            {
+                "day": self.state.day_index,
+                "weather": weather,
+                "season": season,
+                "day_of_week": day_of_week,
+                "sales": dict(sales_result.units_sold),
+                "revenue": sales_result.revenue,
+                "refund_amount": refund_amount,
+                "debug": dict(sales_result.debug),
+            }
+        )
+        self.state.day_index += 1
+        self.state.minute_of_day = 0
+        return ToolCallResult(
+            "ok",
+            {
+                "sales": dict(sales_result.units_sold),
+                "revenue": sales_result.revenue,
+                "weather": weather,
+                "refund_amount": refund_amount,
+            },
+        )
+    def final_score(self) -> float:
+        """Score is final bank balance only (per spec)."""
+        return round(self.state.cash_balance, 2)
+    def is_done(self) -> bool:
+        return (
+            self.state.day_index > self.config.episode_days
+            or self.state.consecutive_negative_days
+            >= self.config.bankruptcy_consecutive_negative_days
+        )
+    def snapshot(self) -> dict[str, Any]:
+        data = self.state.snapshot()
+        data["tools"] = self.tool_registry()
+        data["done"] = self.is_done()
+        return data

vendsim_vb2/mcp_env.py ADDED Viewed

	@@ -0,0 +1,205 @@

+from __future__ import annotations
+from typing import Any, Optional
+from fastmcp import FastMCP
+from openenv.core.env_server.mcp_environment import MCPEnvironment
+from openenv.core.env_server.mcp_types import CallToolAction, CallToolObservation
+from openenv.core.env_server.types import Action, Observation
+from vendsim_vb2.config import VB2Config
+from vendsim_vb2.environment import VendingBench2Environment
+class VB2MCPEnvironment(MCPEnvironment):
+    """OpenEnv MCP wrapper around VendingBench2Environment."""
+    def __init__(
+        self,
+        config: VB2Config | None = None,
+        seed: int | None = None,
+        use_dense_rewards: bool = False,
+    ) -> None:
+        self._config = config or VB2Config()
+        self._seed = seed
+        self._use_dense_rewards = use_dense_rewards
+        self._inner_env: VendingBench2Environment | None = None
+        self._prev_score: float = 0.0
+        mcp = FastMCP("vending-bench-2")
+        self._register_tools(mcp)
+        super().__init__(mcp)
+    # ------------------------------------------------------------------
+    # Tool registration
+    # ------------------------------------------------------------------
+    def _register_tools(self, mcp: FastMCP) -> None:
+        env_ref = self
+        @mcp.tool()
+        def set_price(product: str, price: float) -> dict:
+            """Update the price of a product in the vending machine."""
+            r = env_ref._inner_env.set_price(product, price)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def send_email(recipient: str, subject: str, body: str) -> dict:
+            """Send an email to a supplier or service provider."""
+            r = env_ref._inner_env.send_email(recipient, subject, body)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def check_balance() -> dict:
+            """Review current bank balance."""
+            r = env_ref._inner_env.check_balance()
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def check_storage_inventory() -> dict:
+            """Inspect the storage inventory."""
+            r = env_ref._inner_env.check_storage_inventory()
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def wait_for_next_day(output_tokens: int = 0) -> dict:
+            """Advance simulation to the next business day."""
+            r = env_ref._inner_env.wait_for_next_day(output_tokens)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def run_sub_agent(tool_name: str, arguments: dict[str, Any] | None = None) -> dict:
+            """Delegate a physical-world action to the sub-agent."""
+            r = env_ref._inner_env.run_sub_agent(tool_name, **(arguments or {}))
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def chat_with_sub_agent(message: str) -> dict:
+            """Message the sub-agent without taking action."""
+            r = env_ref._inner_env.chat_with_sub_agent(message)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def write_scratchpad(note: str) -> dict:
+            """Append a note to working memory."""
+            r = env_ref._inner_env.write_scratchpad(note)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def read_scratchpad() -> dict:
+            """Read the working-memory scratchpad."""
+            r = env_ref._inner_env.read_scratchpad()
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def search_notes(query: str) -> dict:
+            """Search saved notes for a keyword."""
+            r = env_ref._inner_env.search_notes(query)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def set_reminder(day: int, message: str) -> dict:
+            """Schedule a future reminder."""
+            r = env_ref._inner_env.set_reminder(day, message)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def request_supplier_quote(product: str, qty: int) -> dict:
+            """Request a price quote from a supplier for a product."""
+            r = env_ref._inner_env.request_supplier_quote(product, qty)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def negotiate_supplier(quote_id: str, proposed_unit_price: float) -> dict:
+            """Negotiate a supplier quote with a proposed unit price."""
+            r = env_ref._inner_env.negotiate_supplier(quote_id, proposed_unit_price)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def place_supplier_order(product: str, qty: int) -> dict:
+            """Place a confirmed order with a supplier."""
+            r = env_ref._inner_env.place_supplier_order(product, qty)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def check_delivery(order_id: str) -> dict:
+            """Check delivery status. On success, items are added to storage and cost is charged."""
+            r = env_ref._inner_env.resolve_delivery(order_id)
+            return {"status": r.status, **r.payload}
+        @mcp.tool()
+        def get_status() -> dict:
+            """Return a full snapshot of the current environment state."""
+            return env_ref._inner_env.snapshot()
+    # ------------------------------------------------------------------
+    # MCPEnvironment interface
+    # ------------------------------------------------------------------
+    def step(
+        self,
+        action: Action,
+        timeout_s: Optional[float] = None,
+        **kwargs: Any,
+    ) -> Observation:
+        """Override step to propagate reward/done on the Observation object."""
+        obs = super().step(action, timeout_s=timeout_s, **kwargs)
+        if not isinstance(obs, CallToolObservation) or self._inner_env is None:
+            return obs
+        done = self._inner_env.is_done()
+        obs.done = done
+        if isinstance(action, CallToolAction) and action.tool_name == "wait_for_next_day":
+            if self._use_dense_rewards:
+                # Dense: per-day delta of bank balance
+                new_score = self._inner_env.final_score()
+                obs.reward = round(new_score - self._prev_score, 2)
+                self._prev_score = new_score
+            elif done:
+                # Sparse default: final bank balance at terminal step only
+                obs.reward = self._inner_env.final_score()
+            else:
+                obs.reward = 0.0
+        else:
+            obs.reward = 0.0
+        return obs
+    def reset(
+        self,
+        seed: int | None = None,
+        episode_id: str | None = None,
+        **kwargs: Any,
+    ) -> CallToolObservation:
+        effective_seed = seed if seed is not None else self._seed
+        self._inner_env = VendingBench2Environment(
+            config=self._config,
+            seed=effective_seed,
+            use_dense_rewards=self._use_dense_rewards,
+        )
+        self._prev_score = self._inner_env.final_score()
+        snapshot = self._inner_env.snapshot()
+        snapshot["reward"] = 0.0
+        snapshot["done"] = False
+        return CallToolObservation(
+            tool_name="reset",
+            result=snapshot,
+            reward=0.0,
+            done=False,
+        )
+    def _step_impl(
+        self,
+        action: Action,
+        timeout_s: float | None = None,
+        **kwargs: Any,
+    ) -> Observation:
+        raise NotImplementedError("All actions are routed through MCP tools.")
+    @property
+    def state(self) -> dict[str, Any]:
+        if self._inner_env is None:
+            return {}
+        return self._inner_env.snapshot()

vendsim_vb2/prompts.py ADDED Viewed

	@@ -0,0 +1,6 @@

+SYSTEM_PROMPT = """You are Charles Paxton, an autonomous AI agent running a vending machine business.
+There is no user in this environment.
+You have full agency to manage pricing, inventory, supplier negotiations, and reminders.
+Your objective is to maximize final bank balance over a one-year operating horizon.
+Weekly output token usage is billed at $100 per million output tokens.
+"""

vendsim_vb2/rewards.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from __future__ import annotations
+def compute_reward(
+    final_bank_balance: float, dense_components: list[float], use_dense: bool
+) -> float:
+    if not use_dense:
+        return final_bank_balance
+    return final_bank_balance + sum(dense_components)

vendsim_vb2/server/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Server package for the Vending-Bench 2 app factory."""

vendsim_vb2/server/app.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from __future__ import annotations
+from fastapi import FastAPI
+from openenv.core.env_server.http_server import HTTPEnvServer
+from openenv.core.env_server.mcp_types import CallToolAction, CallToolObservation
+from vendsim_vb2.mcp_env import VB2MCPEnvironment
+def create_app() -> FastAPI:
+    app = FastAPI(title="Vending-Bench 2 Environment")
+    server = HTTPEnvServer(
+        env=VB2MCPEnvironment,
+        action_cls=CallToolAction,
+        observation_cls=CallToolObservation,
+    )
+    server.register_routes(app)
+    return app
+app = create_app()

vendsim_vb2/state.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from vendsim_vb2.config import VB2Config
+@dataclass(slots=True)
+class Reminder:
+    day: int
+    message: str
+@dataclass(slots=True)
+class SimulationState:
+    day_index: int
+    minute_of_day: int
+    cash_balance: float
+    storage_inventory: dict[str, int] = field(default_factory=dict)
+    machine_inventory: dict[str, int] = field(default_factory=dict)
+    machine_cash: float = 0.0
+    weekly_output_tokens: int = 0
+    consecutive_negative_days: int = 0
+    scratchpad: list[str] = field(default_factory=list)
+    reminders: list[Reminder] = field(default_factory=list)
+    notes: list[str] = field(default_factory=list)
+    email_log: list[dict[str, object]] = field(default_factory=list)
+    subagent_chat_log: list[str] = field(default_factory=list)
+    daily_sales_history: list[dict[str, object]] = field(default_factory=list)
+    prices: dict[str, float] = field(default_factory=dict)
+    @classmethod
+    def new_episode(cls, config: VB2Config | None = None) -> "SimulationState":
+        cfg = config or VB2Config()
+        return cls(day_index=1, minute_of_day=0, cash_balance=cfg.starting_balance)
+    def advance_minutes(self, minutes: int) -> None:
+        if minutes < 0:
+            raise ValueError("minutes must be non-negative")
+        total = self.minute_of_day + minutes
+        self.day_index += total // (24 * 60)
+        self.minute_of_day = total % (24 * 60)
+    def add_reminder(self, day: int, message: str) -> None:
+        self.reminders.append(Reminder(day=day, message=message))
+    def snapshot(self) -> dict[str, object]:
+        return {
+            "day_index": self.day_index,
+            "minute_of_day": self.minute_of_day,
+            "cash_balance": round(self.cash_balance, 2),
+            "storage_inventory": dict(self.storage_inventory),
+            "machine_inventory": dict(self.machine_inventory),
+            "machine_cash": round(self.machine_cash, 2),
+            "weekly_output_tokens": self.weekly_output_tokens,
+            "consecutive_negative_days": self.consecutive_negative_days,
+            "scratchpad": list(self.scratchpad),
+            "reminders": [{"day": r.day, "message": r.message} for r in self.reminders],
+            "notes": list(self.notes),
+            "email_log": [dict(entry) for entry in self.email_log],
+            "subagent_chat_log": list(self.subagent_chat_log),
+            "prices": dict(self.prices),
+        }

vendsim_vb2/subagent.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from __future__ import annotations
+from dataclasses import dataclass, field
+from vendsim_vb2.config import VB2Config
+from vendsim_vb2.demand import PRODUCTS
+MACHINE_LAYOUT = {
+    "small_rows": 2,
+    "large_rows": 2,
+    "slots_per_row": 3,
+    "total_slots": 12,
+}
+RESTOCK_TRAVEL_TIME_MINUTES = 75
+@dataclass(slots=True)
+class SubAgent:
+    config: VB2Config = field(default_factory=VB2Config)
+    machine_inventory: dict[str, int] = field(default_factory=dict)
+    machine_cash: float = 0.0
+    def specs(self) -> dict[str, object]:
+        return {
+            "name": "physical-ops-sub-agent",
+            "tools": ["restock_machine", "collect_cash", "get_machine_inventory"],
+        }
+    def machine_layout(self) -> dict[str, int]:
+        return dict(MACHINE_LAYOUT)
+    def restock_machine(self, product: str, qty: int) -> dict[str, object]:
+        if qty <= 0:
+            return {"status": "rejected", "message": "qty must be positive"}
+        size = str(PRODUCTS.get(product, {}).get("size", "small"))
+        max_slots = MACHINE_LAYOUT[f"{size}_rows"] * MACHINE_LAYOUT["slots_per_row"]
+        current = sum(
+            units
+            for stocked_product, units in self.machine_inventory.items()
+            if str(PRODUCTS.get(stocked_product, {}).get("size", "small")) == size
+        )
+        if current + qty > max_slots:
+            return {"status": "rejected", "message": f"{size} slots full"}
+        self.machine_inventory[product] = self.machine_inventory.get(product, 0) + qty
+        return {
+            "status": "ok",
+            "time_cost_minutes": self.config.restock_travel_time_minutes,
+            "machine_inventory": dict(self.machine_inventory),
+        }
+    def collect_cash(self) -> dict[str, object]:
+        collected = round(self.machine_cash, 2)
+        self.machine_cash = 0.0
+        return {
+            "status": "ok",
+            "amount_collected": collected,
+            "time_cost_minutes": self.config.restock_travel_time_minutes,
+        }
+    def get_machine_inventory(self) -> dict[str, int]:
+        return dict(self.machine_inventory)

vendsim_vb2/suppliers.py ADDED Viewed

	@@ -0,0 +1,180 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from random import Random
+from vendsim_vb2.demand import PRODUCTS
+@dataclass(slots=True)
+class Quote:
+    quote_id: str
+    product: str
+    qty: int
+    unit_price: float
+    fair_unit_price: float
+    supplier_name: str
+@dataclass(slots=True)
+class NegotiationResponse:
+    quote_id: str
+    status: str
+    unit_price: float
+    message: str
+@dataclass(slots=True)
+class SupplierOrder:
+    order_id: str
+    product: str
+    qty: int
+    unit_price: float
+    supplier_name: str
+    may_bait_and_switch: bool
+    status: str = "confirmed"
+@dataclass(slots=True)
+class DeliveryTimeline:
+    order_id: str
+    status: str
+    delivered_qty: int
+    days_late: int
+    final_unit_price: float
+class SupplierEngine:
+    def __init__(self, seed: int | None = None) -> None:
+        self._rng = Random(seed)
+        self._quotes: dict[str, Quote] = {}
+        self._orders: dict[str, SupplierOrder] = {}
+        self._resolved_deliveries: dict[str, DeliveryTimeline] = {}
+        self._quote_counter = 0
+        self._order_counter = 0
+        self._health = "active"
+    def request_quote(self, product: str, qty: int) -> Quote:
+        fair_price = float(PRODUCTS.get(product, {}).get("wholesale_price", 1.0))
+        markup = 0.85 + (self._rng.random() * 1.4)
+        quoted_price = round(fair_price * markup, 2)
+        self._quote_counter += 1
+        quote = Quote(
+            quote_id=f"quote-{self._quote_counter}",
+            product=product,
+            qty=qty,
+            unit_price=quoted_price,
+            fair_unit_price=round(fair_price, 2),
+            supplier_name=f"supplier-{self._rng.randint(1, 5)}",
+        )
+        self._quotes[quote.quote_id] = quote
+        return quote
+    def negotiate(
+        self, quote_id: str, proposed_unit_price: float
+    ) -> NegotiationResponse:
+        quote = self._quotes[quote_id]
+        floor_price = round(quote.fair_unit_price * 0.9, 2)
+        if proposed_unit_price >= quote.unit_price:
+            return NegotiationResponse(
+                quote_id=quote_id,
+                status="accepted",
+                unit_price=round(proposed_unit_price, 2),
+                message="Accepted at your proposed price.",
+            )
+        if proposed_unit_price >= floor_price:
+            if self._rng.random() < 0.55:
+                return NegotiationResponse(
+                    quote_id=quote_id,
+                    status="accepted",
+                    unit_price=round(proposed_unit_price, 2),
+                    message="Accepted after negotiation.",
+                )
+            counter_price = round((proposed_unit_price + quote.unit_price) / 2, 2)
+            return NegotiationResponse(
+                quote_id=quote_id,
+                status="countered",
+                unit_price=counter_price,
+                message="Counteroffer issued.",
+            )
+        return NegotiationResponse(
+            quote_id=quote_id,
+            status="rejected",
+            unit_price=quote.unit_price,
+            message="Offer too low.",
+        )
+    def place_email_confirmed_order(self, product: str, qty: int) -> SupplierOrder:
+        fair_price = float(PRODUCTS.get(product, {}).get("wholesale_price", 1.0))
+        unit_price = round(fair_price * (0.95 + self._rng.random() * 0.5), 2)
+        self._order_counter += 1
+        order = SupplierOrder(
+            order_id=f"order-{self._order_counter}",
+            product=product,
+            qty=qty,
+            unit_price=unit_price,
+            supplier_name=f"supplier-{self._rng.randint(1, 5)}",
+            may_bait_and_switch=self._rng.random() < 0.35,
+        )
+        self._orders[order.order_id] = order
+        return order
+    def simulate_delivery(self, order_id: str) -> DeliveryTimeline:
+        # Return cached result if already resolved (idempotent)
+        if order_id in self._resolved_deliveries:
+            return self._resolved_deliveries[order_id]
+        order = self._orders[order_id]
+        if self._health == "out_of_business":
+            result = DeliveryTimeline(
+                order_id=order_id,
+                status="failed",
+                delivered_qty=0,
+                days_late=0,
+                final_unit_price=order.unit_price,
+            )
+            self._resolved_deliveries[order_id] = result
+            return result
+        roll = self._rng.random()
+        if roll < 0.55:
+            status = "delivered"
+            delivered_qty = order.qty
+            days_late = 0
+        elif roll < 0.8:
+            status = "delayed"
+            delivered_qty = order.qty
+            days_late = self._rng.randint(1, 7)
+        elif roll < 0.92:
+            status = "partial"
+            delivered_qty = max(1, int(order.qty * (0.4 + self._rng.random() * 0.4)))
+            days_late = self._rng.randint(0, 5)
+        else:
+            status = "failed"
+            delivered_qty = 0
+            days_late = 0
+        final_unit_price = order.unit_price
+        if (
+            order.may_bait_and_switch
+            and status in {"delivered", "delayed", "partial"}
+            and self._rng.random() < 0.5
+        ):
+            final_unit_price = round(
+                order.unit_price * (1.05 + self._rng.random() * 0.25), 2
+            )
+        result = DeliveryTimeline(
+            order_id=order_id,
+            status=status,
+            delivered_qty=delivered_qty,
+            days_late=days_late,
+            final_unit_price=final_unit_price,
+        )
+        self._resolved_deliveries[order_id] = result
+        return result
+    def tick_supplier_health(self, days: int = 1) -> str:
+        if self._health == "out_of_business":
+            return self._health
+        failure_risk = min(0.45, days / 365 * 0.7)
+        if self._rng.random() < failure_risk:
+            self._health = "out_of_business"
+        return self._health

vendsim_vb2/tools/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Tool registries for the Vending-Bench 2 environment."""

vendsim_vb2/tools/main_agent_tools.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass(frozen=True, slots=True)
+class ToolSpec:
+    name: str
+    description: str
+    time_cost_minutes: int
+MAIN_TOOL_SPECS: tuple[ToolSpec, ...] = (
+    ToolSpec("set_price", "Update the price of a product in the vending machine.", 5),
+    ToolSpec("send_email", "Send an email to a supplier or service provider.", 10),
+    ToolSpec("check_balance", "Review current bank balance.", 1),
+    ToolSpec("check_storage_inventory", "Inspect the storage inventory.", 2),
+    ToolSpec("wait_for_next_day", "Advance simulation to the next business day.", 0),
+    ToolSpec("run_sub_agent", "Delegate a physical-world action to the sub-agent.", 0),
+    ToolSpec("chat_with_sub_agent", "Message the sub-agent without taking action.", 5),
+    ToolSpec("request_supplier_quote", "Request a quote from a supplier.", 10),
+    ToolSpec("negotiate_supplier", "Negotiate pricing with a supplier.", 10),
+    ToolSpec("place_supplier_order", "Place a supplier order after email confirmation.", 10),
+    ToolSpec("check_delivery", "Check the delivery status of a supplier order.", 5),
+    ToolSpec("get_status", "Return a full environment snapshot.", 0),
+)
+def list_main_tools() -> list[str]:
+    return [spec.name for spec in MAIN_TOOL_SPECS]
+def get_main_tool_specs() -> tuple[ToolSpec, ...]:
+    return MAIN_TOOL_SPECS

vendsim_vb2/tools/memory_tools.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from __future__ import annotations
+from dataclasses import dataclass
+@dataclass(frozen=True, slots=True)
+class MemoryToolSpec:
+    name: str
+    description: str
+MEMORY_TOOL_SPECS: tuple[MemoryToolSpec, ...] = (
+    MemoryToolSpec("write_scratchpad", "Append a note to working memory."),
+    MemoryToolSpec("read_scratchpad", "Read the working-memory scratchpad."),
+    MemoryToolSpec("search_notes", "Search saved notes for a keyword."),
+    MemoryToolSpec("set_reminder", "Schedule a future reminder."),
+)
+def list_memory_tools() -> list[str]:
+    return [spec.name for spec in MEMORY_TOOL_SPECS]
+def get_memory_tool_specs() -> tuple[MemoryToolSpec, ...]:
+    return MEMORY_TOOL_SPECS