File size: 4,669 Bytes
5c35138
 
bee987f
 
5c35138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bee987f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1906186
 
 
 
bee987f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c35138
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""FastAPI application for the SimMart environment."""

from fastapi.responses import HTMLResponse

try:
    from openenv.core.env_server.http_server import create_app
except Exception as e:
    raise ImportError(
        "openenv-core is required.  pip install openenv-core"
    ) from e

try:
    from ..models import SimMartAction, SimMartObservation
    from .environment import SimMartEnvironment
except (ImportError, ModuleNotFoundError):
    from models import SimMartAction, SimMartObservation
    from server.environment import SimMartEnvironment

app = create_app(
    SimMartEnvironment,
    SimMartAction,
    SimMartObservation,
    env_name="simmart",
    max_concurrent_envs=10,
)


_HUB = "https://huggingface.co/spaces/Viani/SimMart/blob/main"

_INDEX_HTML = f"""<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width,initial-scale=1">
  <title>SimMart β€” OpenEnv retail-CEO simulation</title>
  <style>
    body {{ font: 15px/1.55 -apple-system, system-ui, "Segoe UI", Roboto, sans-serif;
            max-width: 760px; margin: 36px auto; padding: 0 22px; color: #1a1a1a; }}
    h1 {{ font-size: 28px; margin: 0 0 4px 0; }}
    .tag {{ color: #555; font-size: 14px; margin: 0 0 22px 0; }}
    h3 {{ margin-top: 26px; font-size: 16px; }}
    p {{ margin: 10px 0; }}
    code {{ background: #f4f4f5; padding: 1px 5px; border-radius: 3px;
            font: 13px/1.4 ui-monospace, SFMono-Regular, Menlo, monospace; }}
    pre {{ background: #f4f4f5; padding: 12px 14px; border-radius: 6px;
           overflow-x: auto; font: 13px/1.5 ui-monospace, SFMono-Regular, Menlo, monospace;
           border: 1px solid #ececec; }}
    a {{ color: #c2410c; text-decoration: none; }}
    a:hover {{ text-decoration: underline; }}
    table {{ border-collapse: collapse; margin: 6px 0 14px 0; }}
    table td {{ padding: 4px 16px 4px 0; vertical-align: top; font-size: 14px; }}
    .links a {{ display: inline-block; margin: 4px 16px 4px 0; }}
    .pill {{ display: inline-block; padding: 2px 8px; border-radius: 11px;
             background: #fff7ed; color: #c2410c; font-size: 12px; font-weight: 600; }}
  </style>
</head>
<body>
  <h1>πŸ›’ SimMart <span class="pill">OpenEnv</span></h1>
  <p class="tag">A 1.5B model running a 30-store, 8-week tier-2 Indian retail chain.</p>

  <p>An LLM CEO opens a weekly inbox of 12–18 proposals from four department agents
  (Supply Chain, Store Ops, Finance, Growth). Each week the CEO emits an
  <code>approve</code> / <code>reject</code> / <code>flag_suspicious</code> verdict
  per proposal, plus a free-form Founder's Journal. Two of the proposals each
  quarter are deliberately <em>rogue</em> β€” inflated POs, kickback contracts,
  fictitious refunds. Reward is dense: KPI deltas (EBITDA + NPS + stockout +
  cash) + rogue catch + terminal P&amp;L + journal coherence.</p>

  <p>Trained with SFT then 110 GRPO steps on Qwen2.5-1.5B + LoRA. Held-out
  reward <strong>+0.84</strong> &mdash; within <strong>0.37</strong> of Claude
  Haiku 4.5, <strong>2&times; the reward of Claude Sonnet 4.6</strong>, at
  1/800 the parameter count. See
  <a href="{_HUB}/BLOG.md" target="_top">BLOG.md</a> for the full results.</p>

  <h3>API endpoints</h3>
  <table>
    <tr><td><code>POST /reset</code></td><td>Start a new episode. Body: <code>{{"seed": int}}</code></td></tr>
    <tr><td><code>POST /step</code></td><td>Take a CEO action. Body: <code>{{"env_id": str, "action": SimMartAction}}</code></td></tr>
    <tr><td><code>GET&nbsp;&nbsp;/state</code></td><td>Current observation without stepping</td></tr>
    <tr><td><code>GET&nbsp;&nbsp;<a href="/docs">/docs</a></code></td><td>Interactive Swagger UI (full schema)</td></tr>
  </table>

  <h3>Try it (curl)</h3>
<pre>curl -X POST https://Viani-SimMart.hf.space/reset \\
  -H 'Content-Type: application/json' \\
  -d '{{"seed": 42}}'</pre>

  <h3>Materials</h3>
  <p class="links">
    <a href="{_HUB}/README.md" target="_top">README</a>
    <a href="{_HUB}/BLOG.md" target="_top">Mini-blog</a>
    <a href="{_HUB}/notebooks/hackathon_grpo_single_gpu.ipynb" target="_top">Training notebook</a>
    <a href="{_HUB}/assets/training_curve_4dept.png" target="_top">Training curve</a>
    <a href="/docs">API docs</a>
  </p>
</body>
</html>
"""


@app.get("/", response_class=HTMLResponse, include_in_schema=False)
def index() -> HTMLResponse:
    """Landing page for the HF Space iframe; not part of the OpenEnv API."""
    return HTMLResponse(_INDEX_HTML)


def main(host: str = "0.0.0.0", port: int = 7860):
    import uvicorn
    uvicorn.run(app, host=host, port=port)


if __name__ == "__main__":
    main()