"""FastAPI application for the SimMart environment.""" from fastapi.responses import HTMLResponse try: from openenv.core.env_server.http_server import create_app except Exception as e: raise ImportError( "openenv-core is required. pip install openenv-core" ) from e try: from ..models import SimMartAction, SimMartObservation from .environment import SimMartEnvironment except (ImportError, ModuleNotFoundError): from models import SimMartAction, SimMartObservation from server.environment import SimMartEnvironment app = create_app( SimMartEnvironment, SimMartAction, SimMartObservation, env_name="simmart", max_concurrent_envs=10, ) _HUB = "https://huggingface.co/spaces/Viani/SimMart/blob/main" _INDEX_HTML = f"""
A 1.5B model running a 30-store, 8-week tier-2 Indian retail chain.
An LLM CEO opens a weekly inbox of 12–18 proposals from four department agents
(Supply Chain, Store Ops, Finance, Growth). Each week the CEO emits an
approve / reject / flag_suspicious verdict
per proposal, plus a free-form Founder's Journal. Two of the proposals each
quarter are deliberately rogue — inflated POs, kickback contracts,
fictitious refunds. Reward is dense: KPI deltas (EBITDA + NPS + stockout +
cash) + rogue catch + terminal P&L + journal coherence.
Trained with SFT then 110 GRPO steps on Qwen2.5-1.5B + LoRA. Held-out reward +0.84 — within 0.37 of Claude Haiku 4.5, 2× the reward of Claude Sonnet 4.6, at 1/800 the parameter count. See BLOG.md for the full results.
POST /reset | Start a new episode. Body: {{"seed": int}} |
POST /step | Take a CEO action. Body: {{"env_id": str, "action": SimMartAction}} |
GET /state | Current observation without stepping |
GET /docs | Interactive Swagger UI (full schema) |
curl -X POST https://Viani-SimMart.hf.space/reset \\
-H 'Content-Type: application/json' \\
-d '{{"seed": 42}}'