mini-rl-env / grid_env /Server /warehouse_env.py
sohambose98's picture
base inference added
97ac6b2
"""
Server-side environment service for the warehouse fulfillment project.
"""
from __future__ import annotations
from threading import Lock
from typing import Any, Dict, Optional
from grid_env.env import WarehouseFulfillmentEnv, available_tasks
from grid_env.graders import grade_episode
from grid_env.models import WarehouseAction, model_to_dict
class WarehouseEnvService:
"""
Thin stateful wrapper around the environment for HTTP usage.
"""
def __init__(self, default_task_id: str = "easy_single_pick", default_seed: int = 7) -> None:
self._lock = Lock()
self._default_task_id = default_task_id
self._default_seed = default_seed
self._env = WarehouseFulfillmentEnv(task_id=default_task_id, seed=default_seed)
self._env.reset(task_id=default_task_id, seed=default_seed)
def health(self) -> Dict[str, Any]:
state = self._env.state()
return {
"status": "ok",
"task_id": state.task_id,
"episode_id": state.episode_id,
}
def tasks(self) -> Dict[str, Any]:
return {"tasks": available_tasks()}
def reset(self, task_id: Optional[str] = None, seed: Optional[int] = None) -> Dict[str, Any]:
with self._lock:
observation = self._env.reset(
task_id=task_id or self._default_task_id,
seed=seed if seed is not None else self._default_seed,
)
return {
"observation": model_to_dict(observation),
"state": model_to_dict(self._env.state()),
}
def step(self, command: str) -> Dict[str, Any]:
with self._lock:
observation, reward, done, info = self._env.step(WarehouseAction(command=command))
state = self._env.state()
return {
"observation": model_to_dict(observation),
"reward": model_to_dict(reward),
"done": done,
"info": {
**info,
"score": grade_episode(state) if state.done else info.get("score"),
},
"state": model_to_dict(state),
}
def state(self) -> Dict[str, Any]:
with self._lock:
return model_to_dict(self._env.state())