from __future__ import annotations from typing import Literal from uuid import uuid4 from fastapi.responses import RedirectResponse from openenv.core.env_server import create_app from openenv.core.env_server.interfaces import Environment from openenv.core.env_server.types import Action, Observation, State from pydantic import Field class MinimalAction(Action): action_type: Literal["noop", "increment", "finish"] = "noop" amount: int = Field(default=1, ge=1, le=3) class MinimalObservation(Observation): status: str counter: int summary: str reward: float = 0.0 done: bool = False class MinimalState(State): counter: int = 0 class MinimalEnvironment(Environment[MinimalAction, MinimalObservation, MinimalState]): SUPPORTS_CONCURRENT_SESSIONS = False def __init__(self): super().__init__() self._done = False self._state = MinimalState(episode_id=str(uuid4()), step_count=0, counter=0) def reset(self, seed: int | None = None, episode_id: str | None = None, **kwargs) -> MinimalObservation: del seed, kwargs self._done = False self._state = MinimalState( episode_id=episode_id or str(uuid4()), step_count=0, counter=0, ) return self._observation(status="ready", reward=0.0, done=False) def step(self, action: MinimalAction, timeout_s: float | None = None, **kwargs) -> MinimalObservation: del timeout_s, kwargs if self._done: return self._observation(status="done", reward=0.0, done=True) self._state.step_count += 1 reward = 0.0 status = "ok" if action.action_type == "increment": self._state.counter += action.amount reward = float(action.amount) elif action.action_type == "finish": self._done = True status = "finished" if self._state.step_count >= 8: self._done = True status = "finished" return self._observation(status=status, reward=reward, done=self._done) @property def state(self) -> MinimalState: return self._state def close(self) -> None: return None def _observation(self, *, status: str, reward: float, done: bool) -> MinimalObservation: return MinimalObservation( status=status, counter=self._state.counter, summary=( f"Minimal demo environment. Counter={self._state.counter}. " f"Step={self._state.step_count}. " f"Choose noop, increment, or finish." ), reward=reward, done=done, ) app = create_app(MinimalEnvironment, MinimalAction, MinimalObservation, env_name="minimal_space") @app.get("/", include_in_schema=False) def root() -> RedirectResponse: return RedirectResponse(url="/web")