ademarteau commited on
Commit
b413222
·
1 Parent(s): 355b2d5

feat: replace Gradio with React UI — GRPO tab, 730-day sim, 200-entry memory bank

Browse files
Dockerfile CHANGED
@@ -1,5 +1,13 @@
1
- FROM python:3.13-slim
 
 
 
 
 
 
2
 
 
 
3
  WORKDIR /app
4
 
5
  RUN apt-get update && apt-get install -y build-essential git && rm -rf /var/lib/apt/lists/*
@@ -14,10 +22,13 @@ COPY server/ ./server/
14
  COPY agent/ ./agent/
15
  COPY client/ ./client/
16
 
 
 
 
17
  RUN useradd -m user
18
  USER user
19
- ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH GRADIO_SERVER_NAME=0.0.0.0 GRADIO_SERVER_PORT=7860
20
 
21
  EXPOSE 7860
22
 
23
- CMD ["python", "app.py"]
 
1
+ # ── Stage 1: Build React frontend ────────────────────────────────────────────
2
+ FROM node:20-slim AS frontend-build
3
+ WORKDIR /frontend
4
+ COPY frontend/package*.json ./
5
+ RUN npm ci
6
+ COPY frontend/ ./
7
+ RUN npm run build
8
 
9
+ # ── Stage 2: Python app ───────────────────────────────────────────────────────
10
+ FROM python:3.13-slim
11
  WORKDIR /app
12
 
13
  RUN apt-get update && apt-get install -y build-essential git && rm -rf /var/lib/apt/lists/*
 
22
  COPY agent/ ./agent/
23
  COPY client/ ./client/
24
 
25
+ # Copy built React app
26
+ COPY --from=frontend-build /frontend/dist ./static/
27
+
28
  RUN useradd -m user
29
  USER user
30
+ ENV HOME=/home/user PATH=/home/user/.local/bin:$PATH
31
 
32
  EXPOSE 7860
33
 
34
+ CMD ["uvicorn", "server.inventory_env:app", "--host", "0.0.0.0", "--port", "7860"]
frontend/index.html ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Stock Oracle — Inventory Optimization Agent</title>
7
+ <style>
8
+ * { box-sizing: border-box; margin: 0; padding: 0; }
9
+ body { background: #07090f; }
10
+ </style>
11
+ </head>
12
+ <body>
13
+ <div id="root"></div>
14
+ <script type="module" src="/src/main.jsx"></script>
15
+ </body>
16
+ </html>
frontend/package.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "stock-oracle",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "scripts": {
6
+ "dev": "vite",
7
+ "build": "vite build",
8
+ "preview": "vite preview"
9
+ },
10
+ "dependencies": {
11
+ "react": "^18.3.1",
12
+ "react-dom": "^18.3.1",
13
+ "recharts": "^2.12.7"
14
+ },
15
+ "devDependencies": {
16
+ "@vitejs/plugin-react": "^4.3.1",
17
+ "vite": "^5.4.2"
18
+ }
19
+ }
frontend/src/App.jsx ADDED
@@ -0,0 +1,852 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useRef, useCallback, useEffect } from "react";
2
+ import {
3
+ LineChart, Line, XAxis, YAxis, Tooltip, ResponsiveContainer,
4
+ ReferenceLine, AreaChart, Area, BarChart, Bar, Legend,
5
+ } from "recharts";
6
+
7
+ // ─── DESIGN TOKENS ────────────────────────────────────────────────────────────
8
+ const C = {
9
+ bg: "#07090f", panel: "#0d1117", border: "#161d2a", border2: "#1e2d40",
10
+ text: "#c9d5e0", muted: "#3a5060", dim: "#1a2535",
11
+ green: "#34d399", blue: "#38bdf8", amber: "#fbbf24",
12
+ red: "#f87171", purple: "#a78bfa", teal: "#2dd4bf",
13
+ };
14
+
15
+ // ─── CONFIG (mirrors config.py) ───────────────────────────────────────────────
16
+ const CFG = {
17
+ LEAD_TIME: 3,
18
+ DEFAULT_SL: 0.95,
19
+ WRITE_OFF_RATE: 0.00143,
20
+ WRITE_OFF_FREQ: 7,
21
+ HISTO_DAYS: 365,
22
+ SIM_DAYS: 730,
23
+ DECISION_INTERVAL: 5,
24
+ MEMORY_SIZE: 200,
25
+ };
26
+
27
+ // ─── MATH HELPERS ─────────────────────────────────────────────────────────────
28
+ function normalRandom() {
29
+ let u = 0, v = 0;
30
+ while (!u) u = Math.random();
31
+ while (!v) v = Math.random();
32
+ return Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v);
33
+ }
34
+ function gammaRandom(shape, scale) {
35
+ if (shape < 1) return gammaRandom(1 + shape, scale) * Math.pow(Math.random(), 1 / shape);
36
+ const d = shape - 1 / 3, c = 1 / Math.sqrt(9 * d);
37
+ while (true) {
38
+ let x, v;
39
+ do { x = normalRandom(); v = 1 + c * x; } while (v <= 0);
40
+ v = v * v * v;
41
+ const u = Math.random();
42
+ if (u < 1 - 0.0331 * x * x * x * x) return d * v * scale;
43
+ if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) return d * v * scale;
44
+ }
45
+ }
46
+ function poissonRandom(lambda) {
47
+ let L = Math.exp(-lambda), k = 0, p = 1;
48
+ do { k++; p *= Math.random(); } while (p > L);
49
+ return k - 1;
50
+ }
51
+ function expRandom(rate) { return -Math.log(Math.random()) / rate; }
52
+ function arr_mean(a) { return a.length ? a.reduce((s, x) => s + x, 0) / a.length : 0; }
53
+ function arr_std(a) {
54
+ if (a.length < 2) return 0;
55
+ const m = arr_mean(a);
56
+ return Math.sqrt(a.reduce((s, x) => s + (x - m) ** 2, 0) / (a.length - 1));
57
+ }
58
+ function quantile(sorted, q) { return sorted[Math.floor(sorted.length * q)]; }
59
+
60
+ // ─── DEMAND ENVIRONMENTS ──────────────────────────────────────────────────────
61
+ const ENVS = {
62
+ gamma_poisson: {
63
+ label: "Gamma–Poisson", tag: "MODERATE", color: C.green,
64
+ desc: "90% Gamma(7,16) + 10% Poisson(80). Stable with rare spikes.",
65
+ sample: () => Math.random() < 0.9 ? Math.max(0, Math.round(gammaRandom(7, 16))) : poissonRandom(80),
66
+ demMean: 112, demStd: 38,
67
+ },
68
+ bimodal_hv: {
69
+ label: "Bimodal High-Var", tag: "HARD", color: C.amber,
70
+ desc: "50% low-mean Gamma + 50% high-mean Gamma. Extremely unpredictable.",
71
+ sample: () => Math.random() < 0.5
72
+ ? Math.max(0, Math.round(gammaRandom(7, 3)))
73
+ : Math.max(0, Math.round(gammaRandom(7, 29))),
74
+ demMean: 112, demStd: 95,
75
+ },
76
+ spiking: {
77
+ label: "Sporadic Spiking", tag: "EXTREME", color: C.red,
78
+ desc: "95% zero demand, 5% large Exponential bursts.",
79
+ sample: () => Math.random() < 0.95 ? 0 : Math.max(0, Math.round(expRandom(0.05))),
80
+ demMean: 20, demStd: 55,
81
+ },
82
+ gamma_stable: {
83
+ label: "Stable Gamma", tag: "EASY", color: C.blue,
84
+ desc: "Single Gamma(7,16), low variance. Baseline environment.",
85
+ sample: () => Math.max(0, Math.round(gammaRandom(7, 16))),
86
+ demMean: 112, demStd: 35,
87
+ },
88
+ };
89
+
90
+ // ─── BASELINE AGENTS ──────────────────────────────────────────────────────────
91
+ const BASELINES = {
92
+ base: { label: "Base", color: C.muted, compute: (h) => arr_mean(h) * CFG.LEAD_TIME },
93
+ safety_stock: {
94
+ label: "Safety Stock", color: C.blue,
95
+ compute: (h) => arr_mean(h) * CFG.LEAD_TIME + 1.645 * arr_std(h) * Math.sqrt(CFG.LEAD_TIME),
96
+ },
97
+ forecast: {
98
+ label: "Oracle Forecast", color: C.green,
99
+ compute: (h, dm, ds) => dm * CFG.LEAD_TIME + 1.645 * ds * Math.sqrt(CFG.LEAD_TIME),
100
+ },
101
+ monte_carlo: {
102
+ label: "Monte Carlo", color: C.purple,
103
+ compute: (h) => {
104
+ const s = [];
105
+ for (let i = 0; i < 500; i++) {
106
+ let t = 0;
107
+ for (let j = 0; j < CFG.LEAD_TIME; j++)
108
+ t += h[Math.floor(Math.random() * h.length)] * (0.8 + Math.random() * 0.4);
109
+ s.push(t);
110
+ }
111
+ s.sort((a, b) => a - b);
112
+ return quantile(s, 0.95);
113
+ },
114
+ },
115
+ };
116
+
117
+ // ─── SIMULATION ENGINE ──────────────────────────────────────────────────���─────
118
+ function buildDemandSeries(envKey, n) {
119
+ return Array.from({ length: n }, () => ENVS[envKey].sample());
120
+ }
121
+
122
+ function runOneSimulation(computeROP, demandSeries, envKey) {
123
+ const env = ENVS[envKey];
124
+ const n = demandSeries.length;
125
+ let inventory = 0;
126
+ const orders = [];
127
+ let totDemand = 0, totFulfilled = 0, totWriteOff = 0, stockOuts = 0, lostSales = 0;
128
+ const timeline = [];
129
+
130
+ for (let day = 0; day < n; day++) {
131
+ const demand = demandSeries[day];
132
+ const hist = demandSeries.slice(Math.max(0, day - CFG.HISTO_DAYS), day);
133
+ const arrivals = orders.filter(o => o.arr === day);
134
+ const delivered = arrivals.reduce((s, o) => s + o.qty, 0);
135
+ inventory += delivered;
136
+ orders.splice(0, orders.length, ...orders.filter(o => o.arr > day));
137
+ const preInv = inventory;
138
+ const fulfilled = Math.min(demand, inventory);
139
+ inventory = Math.max(0, inventory - demand);
140
+ const lost = Math.max(0, demand - fulfilled);
141
+ if (lost > 0) stockOuts++;
142
+ lostSales += lost;
143
+ let rop = 0, ordered = 0;
144
+ if (hist.length >= 5 && day < n - CFG.LEAD_TIME) {
145
+ rop = Math.max(0, computeROP(hist, env.demMean, env.demStd));
146
+ if (inventory <= rop) {
147
+ const qty = Math.ceil(rop - inventory + arr_mean(hist) * CFG.LEAD_TIME);
148
+ orders.push({ arr: day + CFG.LEAD_TIME, qty });
149
+ ordered = qty;
150
+ }
151
+ }
152
+ let wo = 0;
153
+ if (day % CFG.WRITE_OFF_FREQ === 0) {
154
+ wo = Math.floor(inventory * CFG.WRITE_OFF_RATE);
155
+ inventory -= wo;
156
+ totWriteOff += wo;
157
+ }
158
+ totDemand += demand;
159
+ totFulfilled += fulfilled;
160
+ const fillRateCum = totDemand > 0 ? totFulfilled / totDemand : 0;
161
+ timeline.push({ day, demand, inventory: preInv, inventoryAfter: inventory, fulfilled, lost, rop: Math.round(rop), ordered, wo, delivered, fillRateCum });
162
+ }
163
+ return {
164
+ timeline,
165
+ metrics: { fillRate: totDemand > 0 ? totFulfilled / totDemand : 0, stockOuts, lostSales, totWriteOff, totDemand, totFulfilled },
166
+ };
167
+ }
168
+
169
+ // ─── HF INFERENCE API ─────────────────────────────────────────────────────────
170
+ async function callQwen(messages, modelId, hfToken) {
171
+ const url = `https://api-inference.huggingface.co/models/${modelId}/v1/chat/completions`;
172
+ const resp = await fetch(url, {
173
+ method: "POST",
174
+ headers: {
175
+ "Content-Type": "application/json",
176
+ ...(hfToken ? { Authorization: `Bearer ${hfToken}` } : {}),
177
+ },
178
+ body: JSON.stringify({ model: modelId, messages, max_tokens: 600, temperature: 0.7 }),
179
+ });
180
+ if (!resp.ok) throw new Error(`API error ${resp.status}: ${await resp.text()}`);
181
+ const data = await resp.json();
182
+ return data.choices?.[0]?.message?.content || "";
183
+ }
184
+
185
+ // ─── SYSTEM PROMPT ────────────────────────────────────────────────────────────
186
+ const SYSTEM_PROMPT = `You are an expert inventory optimization agent in a stochastic supply-chain simulation.
187
+
188
+ YOUR OBJECTIVE:
189
+ Maximize profit while maintaining fill rate >= 95% over a 365-day decision horizon (days 365–730 of the simulation, after a 365-day warm-up).
190
+
191
+ ENVIRONMENT RULES:
192
+ - Orders arrive exactly 3 days after placement (LEAD_TIME = 3)
193
+ - An order fires whenever inventory <= your reorder_point
194
+ - Order quantity = reorder_point - current_inventory + mean_demand * LEAD_TIME
195
+ - Every 7 days, ~0.14% of inventory is written off (spoilage/expiry)
196
+ - Reward = daily P&L: revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost
197
+
198
+ REASONING REQUIREMENTS — all 4:
199
+ 1. SUBGOAL DECOMPOSITION: Break into subgoals (e.g., "rebuild buffer", "reduce overstock")
200
+ 2. STATE ANALYSIS: Interpret inventory level, demand trend, pending orders, fill rate trajectory
201
+ 3. DECISION: Output a specific numeric reorder_point with clear justification
202
+ 4. RECOVERY PLAN: If fill rate < 95% or recent stockouts, state recovery strategy
203
+
204
+ Think 3+ days ahead — your ROP today only shows effect after lead time.
205
+
206
+ OUTPUT FORMAT — valid JSON only, no markdown fences:
207
+ {"subgoals":["...","..."],"state_analysis":"...","recovery_plan":"...","reorder_point":<number>,"confidence":"high|medium|low","reasoning_depth":"..."}`;
208
+
209
+ // ─── BUILD SNAPSHOT FOR LLM ───────────────────────────────────────────────────
210
+ function buildSnapshot(demandSeries, timeline, day, memory) {
211
+ const hist = demandSeries.slice(Math.max(0, day - CFG.HISTO_DAYS), day);
212
+ const last5 = timeline.slice(Math.max(0, day - 5), day);
213
+ const curInv = timeline[day - 1]?.inventoryAfter ?? 0;
214
+ return {
215
+ day,
216
+ days_remaining: CFG.SIM_DAYS - day,
217
+ current_inventory: Math.round(curInv),
218
+ demand_mean_30d: Math.round(arr_mean(demandSeries.slice(Math.max(0, day - 30), day)) * 10) / 10,
219
+ demand_std_30d: Math.round(arr_std(demandSeries.slice(Math.max(0, day - 30), day)) * 10) / 10,
220
+ fill_rate_so_far: timeline[day - 1]?.fillRateCum
221
+ ? `${(timeline[day - 1].fillRateCum * 100).toFixed(1)}%` : "N/A",
222
+ recent_stockouts: last5.filter(d => d.lost > 0).length,
223
+ recent_lost_sales: last5.reduce((s, d) => s + d.lost, 0),
224
+ last_5_days: last5.map(d => ({
225
+ day: d.day, demand: d.demand, inv: d.inventoryAfter, lost: d.lost, rop: d.rop,
226
+ })),
227
+ memory_bank: memory.slice(-CFG.MEMORY_SIZE),
228
+ };
229
+ }
230
+
231
+ // ─── SHARED SIMULATION RUNNER ─────────────────────────────────────────────────
232
+ async function runAgentLoop({ envKey, modelId, hfToken, onDay, onDecision, onStatus, abortRef }) {
233
+ const demandSeries = buildDemandSeries(envKey, CFG.SIM_DAYS);
234
+ const env = ENVS[envKey];
235
+ let inventory = 0;
236
+ const orders = [];
237
+ let totDemand = 0, totFulfilled = 0, totWriteOff = 0, stockOuts = 0, lostSales = 0;
238
+ const timeline = [];
239
+ let currentROP = env.demMean * CFG.LEAD_TIME;
240
+ let memory = [];
241
+ let convo = [];
242
+
243
+ for (let day = 0; day < CFG.SIM_DAYS; day++) {
244
+ if (abortRef.current) break;
245
+ const demand = demandSeries[day];
246
+ const hist = demandSeries.slice(Math.max(0, day - CFG.HISTO_DAYS), day);
247
+ const arrivals = orders.filter(o => o.arr === day);
248
+ const delivered = arrivals.reduce((s, o) => s + o.qty, 0);
249
+ inventory += delivered;
250
+ orders.splice(0, orders.length, ...orders.filter(o => o.arr > day));
251
+ const preInv = inventory;
252
+ const fulfilled = Math.min(demand, inventory);
253
+ inventory = Math.max(0, inventory - demand);
254
+ const lost = Math.max(0, demand - fulfilled);
255
+ if (lost > 0) stockOuts++;
256
+ lostSales += lost;
257
+ let ordered = 0;
258
+ if (hist.length >= 5 && day < CFG.SIM_DAYS - CFG.LEAD_TIME && inventory <= currentROP) {
259
+ const qty = Math.ceil(currentROP - inventory + arr_mean(hist) * CFG.LEAD_TIME);
260
+ orders.push({ arr: day + CFG.LEAD_TIME, qty });
261
+ ordered = qty;
262
+ }
263
+ let wo = 0;
264
+ if (day % CFG.WRITE_OFF_FREQ === 0) {
265
+ wo = Math.floor(inventory * CFG.WRITE_OFF_RATE);
266
+ inventory -= wo;
267
+ totWriteOff += wo;
268
+ }
269
+ totDemand += demand;
270
+ totFulfilled += fulfilled;
271
+ const fillRateCum = totDemand > 0 ? totFulfilled / totDemand : 0;
272
+ const entry = { day, demand, inventory: preInv, inventoryAfter: inventory, fulfilled, lost, rop: Math.round(currentROP), ordered, wo, delivered, fillRateCum };
273
+ timeline.push(entry);
274
+ onDay(day, [...timeline]);
275
+
276
+ if (day >= CFG.HISTO_DAYS && day % CFG.DECISION_INTERVAL === 0 && day < CFG.SIM_DAYS - CFG.LEAD_TIME) {
277
+ onStatus(`Day ${day}/${CFG.SIM_DAYS}: agent reasoning...`);
278
+ const snapshot = buildSnapshot(demandSeries, timeline, day, memory);
279
+ const userMsg = {
280
+ role: "user",
281
+ content: `SNAPSHOT Day ${day}/${CFG.SIM_DAYS}\n${JSON.stringify(snapshot)}\n\nSet reorder_point for next ${CFG.DECISION_INTERVAL} days.`,
282
+ };
283
+ const msgs = [...convo.slice(-6), userMsg];
284
+ try {
285
+ const raw = await callQwen(msgs, modelId, hfToken);
286
+ let decision;
287
+ try {
288
+ decision = JSON.parse(raw.replace(/```json|```/g, "").trim());
289
+ } catch {
290
+ const m = raw.match(/"reorder_point"\s*:\s*(\d+\.?\d*)/);
291
+ decision = { subgoals: ["parse error"], state_analysis: raw.slice(0, 200), recovery_plan: "N/A", reorder_point: m ? parseFloat(m[1]) : currentROP, confidence: "low", reasoning_depth: "parse failed" };
292
+ }
293
+ currentROP = Math.max(0, decision.reorder_point || currentROP);
294
+ convo = [...convo, userMsg, { role: "assistant", content: raw }];
295
+ memory = [...memory, {
296
+ day,
297
+ rop: Math.round(currentROP),
298
+ confidence: decision.confidence,
299
+ fill_rate: `${(fillRateCum * 100).toFixed(1)}%`,
300
+ inventory: Math.round(preInv),
301
+ demand_mean: Math.round(arr_mean(demandSeries.slice(Math.max(0, day - 30), day))),
302
+ stockouts_cumulative: stockOuts,
303
+ lost_sales_cumulative: Math.round(lostSales),
304
+ key_insight: decision.state_analysis?.slice(0, 100),
305
+ }].slice(-CFG.MEMORY_SIZE);
306
+ onDecision({ day, snapshot, decision, rop: currentROP, fillRateCum, memory: [...memory] });
307
+ } catch (e) {
308
+ onStatus(`Day ${day}: API error — ${e.message}`);
309
+ onDecision({ day, snapshot, decision: { subgoals: [], state_analysis: `API error: ${e.message}`, recovery_plan: "N/A", reorder_point: currentROP, confidence: "low", reasoning_depth: "error" }, rop: currentROP, fillRateCum, memory: [...memory] });
310
+ }
311
+ await new Promise(r => setTimeout(r, 150));
312
+ }
313
+ }
314
+ return {
315
+ timeline,
316
+ metrics: { fillRate: totDemand > 0 ? totFulfilled / totDemand : 0, stockOuts, lostSales, totWriteOff, totDemand, totFulfilled },
317
+ memory,
318
+ };
319
+ }
320
+
321
+ // ─── SHARED UI COMPONENTS ────────────────────────────────────��────────────────
322
+ function Panel({ title, children, style = {} }) {
323
+ return (
324
+ <div style={{ background: C.panel, border: `1px solid ${C.border}`, borderRadius: 10, padding: "16px 18px", ...style }}>
325
+ {title && <div style={{ fontSize: 9, letterSpacing: 4, color: C.muted, marginBottom: 12, textTransform: "uppercase" }}>{title}</div>}
326
+ {children}
327
+ </div>
328
+ );
329
+ }
330
+
331
+ function FillBadge({ rate }) {
332
+ const color = rate >= 0.95 ? C.green : rate >= 0.85 ? C.amber : C.red;
333
+ return <span style={{ color, fontWeight: 700 }}>{rate ? `${(rate * 100).toFixed(1)}%` : "—"}</span>;
334
+ }
335
+
336
+ function MetricBox({ label, value, highlight, color }) {
337
+ return (
338
+ <div style={{ background: highlight ? "#0d1f18" : C.panel, border: `1px solid ${(color || C.green) + (highlight ? "30" : "15")}`, borderRadius: 8, padding: "10px 16px", textAlign: "center" }}>
339
+ <div style={{ fontSize: 9, letterSpacing: 3, color: C.muted, marginBottom: 3 }}>{label}</div>
340
+ <div style={{ fontSize: 20, fontWeight: 600 }}>{value}</div>
341
+ </div>
342
+ );
343
+ }
344
+
345
+ function SimTabs({ tabs, active, onSelect }) {
346
+ return (
347
+ <div style={{ display: "flex", gap: 6, marginBottom: 14, flexWrap: "wrap" }}>
348
+ {tabs.map(({ id, label }) => {
349
+ const isActive = active === id;
350
+ return (
351
+ <button key={id} onClick={() => onSelect(id)} style={{
352
+ background: isActive ? C.border2 : "transparent",
353
+ border: `1px solid ${isActive ? C.border2 : "transparent"}`,
354
+ borderRadius: 6, padding: "7px 14px",
355
+ color: isActive ? C.text : C.muted, fontFamily: "inherit",
356
+ fontSize: 11, cursor: "pointer", letterSpacing: 1,
357
+ }}>{label}</button>
358
+ );
359
+ })}
360
+ </div>
361
+ );
362
+ }
363
+
364
+ function LiveSimCharts({ timeline }) {
365
+ return (
366
+ <div style={{ display: "flex", flexDirection: "column", gap: 14 }}>
367
+ <Panel title="Inventory · Demand · Reorder Point">
368
+ <ResponsiveContainer width="100%" height={200}>
369
+ <AreaChart data={timeline} margin={{ top: 4, right: 4, bottom: 0, left: 0 }}>
370
+ <defs>
371
+ <linearGradient id="ig" x1="0" y1="0" x2="0" y2="1">
372
+ <stop offset="5%" stopColor={C.blue} stopOpacity={0.25} />
373
+ <stop offset="95%" stopColor={C.blue} stopOpacity={0} />
374
+ </linearGradient>
375
+ </defs>
376
+ <XAxis dataKey="day" tick={{ fontSize: 9, fill: C.muted }} />
377
+ <YAxis tick={{ fontSize: 9, fill: C.muted }} width={45} />
378
+ <Tooltip contentStyle={{ background: "#0a0f18", border: `1px solid ${C.border2}`, fontSize: 10, borderRadius: 6 }} labelFormatter={d => `Day ${d}`} />
379
+ <Area type="monotone" dataKey="inventory" stroke={C.blue} strokeWidth={1.5} fill="url(#ig)" dot={false} name="Inventory" />
380
+ <Line type="monotone" dataKey="demand" stroke={C.red} strokeWidth={1} dot={false} name="Demand" />
381
+ <Line type="monotone" dataKey="rop" stroke={C.amber} strokeWidth={1} strokeDasharray="5 3" dot={false} name="ROP" />
382
+ </AreaChart>
383
+ </ResponsiveContainer>
384
+ </Panel>
385
+ <div style={{ display: "grid", gridTemplateColumns: "1fr 1fr", gap: 14 }}>
386
+ <Panel title="Cumulative Fill Rate">
387
+ <ResponsiveContainer width="100%" height={130}>
388
+ <LineChart data={timeline} margin={{ top: 4, right: 4, bottom: 0, left: 0 }}>
389
+ <XAxis dataKey="day" tick={{ fontSize: 9, fill: C.muted }} />
390
+ <YAxis domain={[0, 1]} tickFormatter={v => `${(v * 100).toFixed(0)}%`} tick={{ fontSize: 9, fill: C.muted }} width={38} />
391
+ <ReferenceLine y={0.95} stroke={C.amber} strokeDasharray="4 3" />
392
+ <Tooltip contentStyle={{ background: "#0a0f18", border: `1px solid ${C.border2}`, fontSize: 10 }} formatter={v => `${(v * 100).toFixed(1)}%`} />
393
+ <Line type="monotone" dataKey="fillRateCum" stroke={C.teal} strokeWidth={2} dot={false} name="Fill Rate" />
394
+ </LineChart>
395
+ </ResponsiveContainer>
396
+ </Panel>
397
+ <Panel title="Lost Sales Per Day">
398
+ <ResponsiveContainer width="100%" height={130}>
399
+ <BarChart data={timeline} barSize={2} margin={{ top: 4, right: 4, bottom: 0, left: 0 }}>
400
+ <XAxis dataKey="day" tick={{ fontSize: 9, fill: C.muted }} />
401
+ <YAxis tick={{ fontSize: 9, fill: C.muted }} width={38} />
402
+ <Tooltip contentStyle={{ background: "#0a0f18", border: `1px solid ${C.border2}`, fontSize: 10 }} />
403
+ <Bar dataKey="lost" fill={C.red} opacity={0.8} name="Lost Sales" />
404
+ </BarChart>
405
+ </ResponsiveContainer>
406
+ </Panel>
407
+ </div>
408
+ </div>
409
+ );
410
+ }
411
+
412
+ function ReasoningLog({ log, logEndRef }) {
413
+ return (
414
+ <div style={{ display: "flex", flexDirection: "column", gap: 10, maxHeight: "72vh", overflowY: "auto", paddingRight: 4 }}>
415
+ {log.length === 0 && <div style={{ color: C.muted, fontSize: 12, padding: 20, textAlign: "center" }}>Waiting for first LLM decision (after day {CFG.HISTO_DAYS})…</div>}
416
+ {log.map((entry, i) => {
417
+ const d = entry.decision;
418
+ const isLatest = i === log.length - 1;
419
+ return (
420
+ <div key={i} style={{ background: isLatest ? "#0c1a24" : C.panel, border: `1px solid ${isLatest ? C.teal + "40" : C.border}`, borderRadius: 10, padding: "14px 16px", borderLeft: `3px solid ${isLatest ? C.teal : C.border2}` }}>
421
+ <div style={{ display: "flex", justifyContent: "space-between", alignItems: "center", marginBottom: 10, flexWrap: "wrap", gap: 6 }}>
422
+ <div style={{ fontSize: 11, color: C.teal, fontWeight: 600 }}>Day {entry.day} — Decision #{i + 1}</div>
423
+ <div style={{ display: "flex", gap: 8 }}>
424
+ <span style={{ fontSize: 10, color: C.muted }}>ROP: <span style={{ color: C.amber, fontWeight: 600 }}>{Math.round(entry.rop)}</span></span>
425
+ <span style={{ fontSize: 10, color: C.muted }}>Fill: <FillBadge rate={entry.fillRateCum} /></span>
426
+ <span style={{ fontSize: 9, padding: "2px 7px", borderRadius: 3, background: d.confidence === "high" ? "#0d1f18" : d.confidence === "medium" ? "#1f1a0d" : "#1f0d0d", color: d.confidence === "high" ? C.green : d.confidence === "medium" ? C.amber : C.red, border: "1px solid currentColor", opacity: 0.8 }}>{(d.confidence || "?").toUpperCase()}</span>
427
+ </div>
428
+ </div>
429
+ {d.subgoals?.length > 0 && (
430
+ <div style={{ marginBottom: 10 }}>
431
+ <div style={{ fontSize: 9, letterSpacing: 3, color: C.muted, marginBottom: 6 }}>SUBGOAL DECOMPOSITION</div>
432
+ <div style={{ display: "flex", gap: 6, flexWrap: "wrap" }}>
433
+ {d.subgoals.map((sg, j) => (
434
+ <div key={j} style={{ fontSize: 10, background: C.dim, border: `1px solid ${C.border2}`, borderRadius: 4, padding: "4px 9px", color: C.blue }}>{j + 1}. {sg}</div>
435
+ ))}
436
+ </div>
437
+ </div>
438
+ )}
439
+ <div style={{ marginBottom: 8 }}>
440
+ <div style={{ fontSize: 9, letterSpacing: 3, color: C.muted, marginBottom: 5 }}>STATE ANALYSIS</div>
441
+ <div style={{ fontSize: 11, color: C.text, lineHeight: 1.7, background: C.dim, borderRadius: 6, padding: "8px 10px" }}>{d.state_analysis}</div>
442
+ </div>
443
+ {d.recovery_plan && d.recovery_plan !== "N/A" && (
444
+ <div style={{ marginBottom: 8 }}>
445
+ <div style={{ fontSize: 9, letterSpacing: 3, color: C.muted, marginBottom: 5 }}>RECOVERY PLAN</div>
446
+ <div style={{ fontSize: 11, color: C.amber, lineHeight: 1.6, background: "#1a1400", borderRadius: 6, padding: "8px 10px", border: `1px solid ${C.amber}20` }}>{d.recovery_plan}</div>
447
+ </div>
448
+ )}
449
+ {d.reasoning_depth && <div style={{ fontSize: 10, color: C.muted }}><span style={{ color: C.purple }}>Reasoning: </span>{d.reasoning_depth}</div>}
450
+ </div>
451
+ );
452
+ })}
453
+ <div ref={logEndRef} />
454
+ </div>
455
+ );
456
+ }
457
+
458
+ function ComparePanel({ agentMetrics, agentLog, simTimeline, baselineResults }) {
459
+ const agentFillRates = simTimeline.map(t => ({ day: t.day, agent: t.fillRateCum }));
460
+ return (
461
+ <div style={{ display: "flex", flexDirection: "column", gap: 14 }}>
462
+ <div style={{ display: "grid", gridTemplateColumns: "repeat(5,1fr)", gap: 10 }}>
463
+ {agentMetrics && (
464
+ <div style={{ background: "#0a1e18", border: `2px solid ${C.teal}40`, borderRadius: 10, padding: 14 }}>
465
+ <div style={{ fontSize: 9, color: C.teal, letterSpacing: 3, marginBottom: 8 }}>🤖 LLM AGENT</div>
466
+ {[["Fill Rate", <FillBadge rate={agentMetrics.fillRate} />], ["Stockouts", agentMetrics.stockOuts], ["Lost Sales", agentMetrics.lostSales.toLocaleString()], ["Write-Offs", agentMetrics.totWriteOff.toLocaleString()]].map(([l, v]) => (
467
+ <div key={l} style={{ display: "flex", justifyContent: "space-between", fontSize: 11, marginBottom: 5 }}>
468
+ <span style={{ color: C.muted }}>{l}</span><span style={{ fontWeight: 600 }}>{v}</span>
469
+ </div>
470
+ ))}
471
+ </div>
472
+ )}
473
+ {Object.entries(baselineResults).map(([bk, br]) => (
474
+ <div key={bk} style={{ background: C.panel, border: `1px solid ${BASELINES[bk].color}30`, borderRadius: 10, padding: 14 }}>
475
+ <div style={{ fontSize: 9, color: BASELINES[bk].color, letterSpacing: 3, marginBottom: 8 }}>{BASELINES[bk].label.toUpperCase()}</div>
476
+ {[["Fill Rate", <FillBadge rate={br.metrics.fillRate} />], ["Stockouts", br.metrics.stockOuts], ["Lost Sales", br.metrics.lostSales.toLocaleString()], ["Write-Offs", br.metrics.totWriteOff.toLocaleString()]].map(([l, v]) => (
477
+ <div key={l} style={{ display: "flex", justifyContent: "space-between", fontSize: 11, marginBottom: 5 }}>
478
+ <span style={{ color: C.muted }}>{l}</span><span style={{ fontWeight: 600 }}>{v}</span>
479
+ </div>
480
+ ))}
481
+ </div>
482
+ ))}
483
+ </div>
484
+ {Object.keys(baselineResults).length > 0 && (
485
+ <Panel title="Fill Rate Convergence — Agent vs All Baselines">
486
+ <ResponsiveContainer width="100%" height={220}>
487
+ <LineChart margin={{ top: 4, right: 8, bottom: 0, left: 0 }}>
488
+ <XAxis dataKey="day" type="number" domain={[0, CFG.SIM_DAYS]} tick={{ fontSize: 9, fill: C.muted }} />
489
+ <YAxis domain={[0, 1]} tickFormatter={v => `${(v * 100).toFixed(0)}%`} tick={{ fontSize: 9, fill: C.muted }} width={40} />
490
+ <ReferenceLine y={0.95} stroke={C.amber} strokeDasharray="5 3" label={{ value: "95% target", fontSize: 9, fill: C.amber }} />
491
+ <Tooltip contentStyle={{ background: "#0a0f18", border: `1px solid ${C.border2}`, fontSize: 10 }} formatter={v => `${(v * 100).toFixed(1)}%`} />
492
+ <Legend wrapperStyle={{ fontSize: 10 }} />
493
+ <Line data={agentFillRates} type="monotone" dataKey="agent" stroke={C.teal} strokeWidth={2.5} dot={false} name="LLM Agent" />
494
+ {Object.entries(baselineResults).map(([bk, br]) => (
495
+ <Line key={bk} data={br.timeline.map(t => ({ day: t.day, fillRate: t.fillRateCum }))} type="monotone" dataKey="fillRate" stroke={BASELINES[bk].color} strokeWidth={1} strokeDasharray="3 2" dot={false} name={BASELINES[bk].label} />
496
+ ))}
497
+ </LineChart>
498
+ </ResponsiveContainer>
499
+ </Panel>
500
+ )}
501
+ </div>
502
+ );
503
+ }
504
+
505
+ function MemoryBankPanel({ memory }) {
506
+ return (
507
+ <div style={{ display: "flex", flexDirection: "column", gap: 10 }}>
508
+ <Panel>
509
+ <div style={{ fontSize: 11, color: C.muted, lineHeight: 1.8, marginBottom: 12 }}>
510
+ The memory bank stores the last {CFG.MEMORY_SIZE} decisions with full context — inventory level, demand signal, fill rate, and cumulative losses. This enables the agent to reason across the full {CFG.SIM_DAYS - CFG.HISTO_DAYS}-day horizon beyond the LLM's context window.
511
+ </div>
512
+ <div style={{ display: "grid", gridTemplateColumns: "repeat(auto-fill,minmax(220px,1fr))", gap: 8 }}>
513
+ {memory.map((m, i) => (
514
+ <div key={i} style={{ background: C.dim, border: `1px solid ${C.border}`, borderRadius: 7, padding: "10px 12px" }}>
515
+ <div style={{ fontSize: 10, color: C.teal, fontWeight: 600, marginBottom: 6 }}>Day {m.day}</div>
516
+ {[["ROP Set", m.rop], ["Confidence", m.confidence], ["Fill Rate", m.fill_rate || "—"], ["Inventory", m.inventory], ["Demand Mean", m.demand_mean], ["Stockouts ∑", m.stockouts_cumulative], ["Lost Sales ∑", m.lost_sales_cumulative]].map(([l, v]) => (
517
+ <div key={l} style={{ display: "flex", justifyContent: "space-between", fontSize: 10, marginBottom: 3 }}>
518
+ <span style={{ color: C.muted }}>{l}</span>
519
+ <span style={{ color: C.text }}>{v}</span>
520
+ </div>
521
+ ))}
522
+ {m.key_insight && <div style={{ fontSize: 9, color: C.muted, marginTop: 6, lineHeight: 1.5, borderTop: `1px solid ${C.border}`, paddingTop: 5 }}>{m.key_insight}</div>}
523
+ </div>
524
+ ))}
525
+ {memory.length === 0 && <div style={{ color: C.muted, fontSize: 11 }}>Memory builds as agent makes decisions…</div>}
526
+ </div>
527
+ </Panel>
528
+ </div>
529
+ );
530
+ }
531
+
532
+ // ─── SINGLE-AGENT SIMULATION VIEW ─────────────────────────────────────────────
533
+ function AgentSimView({ label, accentColor, modelId, hfToken, envKey, baselineResults }) {
534
+ const [phase, setPhase] = useState("idle"); // idle | running | done
535
+ const [timeline, setTimeline] = useState([]);
536
+ const [log, setLog] = useState([]);
537
+ const [memory, setMemory] = useState([]);
538
+ const [metrics, setMetrics] = useState(null);
539
+ const [activeTab, setActiveTab] = useState("live");
540
+ const [status, setStatus] = useState("");
541
+ const [runningDay, setRunningDay] = useState(0);
542
+ const abortRef = useRef(false);
543
+ const logEndRef = useRef(null);
544
+ useEffect(() => { if (logEndRef.current) logEndRef.current.scrollIntoView({ behavior: "smooth" }); }, [log]);
545
+
546
+ const start = useCallback(async () => {
547
+ abortRef.current = false;
548
+ setPhase("running"); setTimeline([]); setLog([]); setMemory([]); setMetrics(null); setRunningDay(0);
549
+ try {
550
+ const result = await runAgentLoop({
551
+ envKey, modelId, hfToken, abortRef,
552
+ onDay: (day, tl) => { setTimeline(tl); setRunningDay(day); },
553
+ onDecision: ({ day, snapshot, decision, rop, fillRateCum, memory: mem }) => {
554
+ setLog(prev => [...prev, { day, snapshot, decision, rop, fillRateCum }]);
555
+ setMemory(mem);
556
+ },
557
+ onStatus: setStatus,
558
+ });
559
+ setMetrics(result.metrics);
560
+ setMemory(result.memory);
561
+ } catch (e) {
562
+ setStatus(`Error: ${e.message}`);
563
+ }
564
+ setPhase("done");
565
+ }, [envKey, modelId, hfToken]);
566
+
567
+ const stop = () => { abortRef.current = true; setPhase("done"); setStatus("Stopped."); };
568
+ const reset = () => { setPhase("idle"); setTimeline([]); setLog([]); setMemory([]); setMetrics(null); };
569
+
570
+ const tabs = [
571
+ { id: "live", label: "LIVE SIM" },
572
+ { id: "reasoning", label: `REASONING (${log.length})` },
573
+ { id: "compare", label: "COMPARE" },
574
+ { id: "memory", label: `MEMORY (${memory.length})` },
575
+ ];
576
+
577
+ return (
578
+ <div>
579
+ {phase === "idle" && (
580
+ <button onClick={start} style={{ background: "#0d1f18", border: `1px solid ${accentColor}60`, borderRadius: 7, padding: "12px 24px", color: accentColor, fontFamily: "inherit", fontSize: 13, cursor: "pointer", letterSpacing: 2, fontWeight: 600 }}>
581
+ ▶ RUN {label.toUpperCase()}
582
+ </button>
583
+ )}
584
+ {(phase === "running" || phase === "done") && (
585
+ <>
586
+ <div style={{ display: "flex", justifyContent: "space-between", alignItems: "center", marginBottom: 12, flexWrap: "wrap", gap: 8 }}>
587
+ <div style={{ display: "flex", gap: 8, alignItems: "center", fontSize: 11 }}>
588
+ {phase === "running" && <span style={{ color: C.amber }}>●</span>}
589
+ <span style={{ color: C.muted }}>{status}</span>
590
+ {phase === "running" && (
591
+ <div style={{ width: 160, height: 4, background: C.border, borderRadius: 2, overflow: "hidden" }}>
592
+ <div style={{ height: "100%", width: `${(runningDay / CFG.SIM_DAYS) * 100}%`, background: accentColor, transition: "width 0.3s", borderRadius: 2 }} />
593
+ </div>
594
+ )}
595
+ </div>
596
+ <div style={{ display: "flex", gap: 8 }}>
597
+ {phase === "running" && <button onClick={stop} style={{ background: "#2a0f0f", border: `1px solid ${C.red}40`, borderRadius: 6, padding: "6px 14px", color: C.red, fontFamily: "inherit", fontSize: 11, cursor: "pointer" }}>■ STOP</button>}
598
+ <button onClick={reset} style={{ background: C.panel, border: `1px solid ${C.border}`, borderRadius: 6, padding: "6px 14px", color: C.muted, fontFamily: "inherit", fontSize: 11, cursor: "pointer" }}>↺ RESET</button>
599
+ </div>
600
+ </div>
601
+ {metrics && (
602
+ <div style={{ display: "flex", gap: 10, marginBottom: 14, flexWrap: "wrap" }}>
603
+ <MetricBox label="FILL RATE" value={<FillBadge rate={metrics.fillRate} />} highlight color={accentColor} />
604
+ <MetricBox label="STOCKOUTS" value={metrics.stockOuts} />
605
+ <MetricBox label="LOST SALES" value={metrics.lostSales.toLocaleString()} />
606
+ <MetricBox label="WRITE-OFFS" value={metrics.totWriteOff.toLocaleString()} />
607
+ <MetricBox label="DECISIONS" value={log.length} />
608
+ </div>
609
+ )}
610
+ <SimTabs tabs={tabs} active={activeTab} onSelect={setActiveTab} />
611
+ {activeTab === "live" && <LiveSimCharts timeline={timeline} />}
612
+ {activeTab === "reasoning" && <ReasoningLog log={log} logEndRef={logEndRef} />}
613
+ {activeTab === "compare" && <ComparePanel agentMetrics={metrics} agentLog={log} simTimeline={timeline} baselineResults={baselineResults} />}
614
+ {activeTab === "memory" && <MemoryBankPanel memory={memory} />}
615
+ </>
616
+ )}
617
+ </div>
618
+ );
619
+ }
620
+
621
+ // ─── MAIN APP ─────────────────────────────────────────────────────────────────
622
+ export default function StockOracle() {
623
+ const [envKey, setEnvKey] = useState("gamma_poisson");
624
+ const [hfToken, setHfToken] = useState("");
625
+ const [grpoModelId, setGrpoModelId] = useState("");
626
+ const [activeTopTab, setActiveTopTab] = useState("llm");
627
+ const [baselineResults, setBaselineResults] = useState({});
628
+ const [baselinesReady, setBaselinesReady] = useState(false);
629
+ const env = ENVS[envKey];
630
+
631
+ const runBaselines = useCallback(() => {
632
+ setBaselinesReady(false);
633
+ const demand = buildDemandSeries(envKey, CFG.SIM_DAYS);
634
+ const results = {};
635
+ Object.entries(BASELINES).forEach(([k, ag]) => {
636
+ results[k] = runOneSimulation((h, dm, ds) => ag.compute(h, dm, ds), demand, envKey);
637
+ });
638
+ setBaselineResults(results);
639
+ setBaselinesReady(true);
640
+ }, [envKey]);
641
+
642
+ const topTabs = [
643
+ { id: "llm", label: "QWEN BASE AGENT" },
644
+ { id: "grpo", label: "GRPO FINE-TUNED ★" },
645
+ { id: "baselines", label: "BASELINES" },
646
+ ];
647
+
648
+ return (
649
+ <div style={{ minHeight: "100vh", background: C.bg, fontFamily: "'JetBrains Mono',monospace", color: C.text, padding: "24px 16px" }}>
650
+ <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600&display=swap" rel="stylesheet" />
651
+
652
+ {/* HEADER */}
653
+ <div style={{ maxWidth: 1280, margin: "0 auto" }}>
654
+ <div style={{ marginBottom: 28 }}>
655
+ <div style={{ fontSize: 9, letterSpacing: 5, color: C.muted, marginBottom: 6 }}>HACKATHON · LONG-HORIZON REASONING ENVIRONMENT</div>
656
+ <h1 style={{ margin: 0, fontSize: "clamp(32px,5vw,52px)", fontWeight: 700, letterSpacing: -1, background: `linear-gradient(120deg,${C.teal},${C.blue},${C.purple})`, WebkitBackgroundClip: "text", WebkitTextFillColor: "transparent", lineHeight: 1.1, fontFamily: "inherit" }}>
657
+ STOCK ORACLE
658
+ </h1>
659
+ <div style={{ fontSize: 10, color: C.muted, marginTop: 5, letterSpacing: 2 }}>
660
+ LLM AGENT · GRPO RL TRAINING · INVENTORY OPTIMIZATION · LONG-HORIZON PLANNING
661
+ </div>
662
+ </div>
663
+
664
+ {/* GLOBAL CONFIG */}
665
+ <div style={{ display: "grid", gridTemplateColumns: "1fr 1fr 1fr", gap: 16, marginBottom: 24 }}>
666
+ {/* Env selector */}
667
+ <Panel title="Demand Environment">
668
+ {Object.entries(ENVS).map(([k, e]) => (
669
+ <button key={k} onClick={() => { setEnvKey(k); setBaselinesReady(false); }} style={{ display: "block", width: "100%", textAlign: "left", background: envKey === k ? "#0f1e2e" : "transparent", border: `1px solid ${envKey === k ? e.color + "50" : C.border}`, borderRadius: 6, padding: "9px 12px", marginBottom: 6, cursor: "pointer", fontFamily: "inherit", transition: "all 0.15s" }}>
670
+ <div style={{ display: "flex", justifyContent: "space-between" }}>
671
+ <span style={{ fontSize: 12, color: envKey === k ? e.color : C.muted, fontWeight: 500 }}>{e.label}</span>
672
+ <span style={{ fontSize: 9, color: e.color, border: `1px solid ${e.color}40`, borderRadius: 3, padding: "2px 6px" }}>{e.tag}</span>
673
+ </div>
674
+ <div style={{ fontSize: 10, color: C.dim, marginTop: 3, lineHeight: 1.5 }}>{e.desc}</div>
675
+ </button>
676
+ ))}
677
+ </Panel>
678
+
679
+ {/* HF Token */}
680
+ <Panel title="HuggingFace Token">
681
+ <div style={{ fontSize: 11, color: C.muted, lineHeight: 1.7, marginBottom: 12 }}>
682
+ Required for Qwen2.5-72B inference via HF Inference API.<br />
683
+ Get one at <span style={{ color: C.blue }}>huggingface.co/settings/tokens</span>
684
+ </div>
685
+ <input
686
+ type="password"
687
+ placeholder="hf_..."
688
+ value={hfToken}
689
+ onChange={e => setHfToken(e.target.value)}
690
+ style={{ width: "100%", background: C.dim, border: `1px solid ${C.border2}`, borderRadius: 6, padding: "9px 12px", color: C.text, fontFamily: "inherit", fontSize: 12, outline: "none", marginBottom: 10 }}
691
+ />
692
+ <div style={{ fontSize: 11, color: C.muted, lineHeight: 1.7, marginBottom: 8, marginTop: 8 }}>
693
+ <span style={{ color: C.purple, fontWeight: 600 }}>GRPO Fine-tuned Model ID</span><br />
694
+ <span style={{ fontSize: 10 }}>HF model ID of the trained adapter (e.g. ademarteau/qwen-inventory-grpo-iter4). Leave blank while training.</span>
695
+ </div>
696
+ <input
697
+ type="text"
698
+ placeholder="ademarteau/qwen-inventory-grpo-iter4"
699
+ value={grpoModelId}
700
+ onChange={e => setGrpoModelId(e.target.value)}
701
+ style={{ width: "100%", background: C.dim, border: `1px solid ${C.purple}40`, borderRadius: 6, padding: "9px 12px", color: C.text, fontFamily: "inherit", fontSize: 12, outline: "none" }}
702
+ />
703
+ </Panel>
704
+
705
+ {/* Baselines */}
706
+ <Panel title="Baseline Agents">
707
+ <div style={{ fontSize: 11, color: C.muted, lineHeight: 1.7, marginBottom: 12 }}>
708
+ Pre-compute all 4 rule-based baselines for comparison in the Compare tab.
709
+ </div>
710
+ <button onClick={runBaselines} style={{ width: "100%", background: C.dim, border: `1px solid ${C.border2}`, borderRadius: 6, padding: "10px", color: C.text, fontFamily: "inherit", fontSize: 11, cursor: "pointer", letterSpacing: 1, marginBottom: 10 }}>
711
+ ▶ RUN BASELINES
712
+ </button>
713
+ {baselinesReady && Object.entries(baselineResults).map(([k, r]) => (
714
+ <div key={k} style={{ display: "flex", justifyContent: "space-between", fontSize: 11, marginBottom: 5 }}>
715
+ <span style={{ color: BASELINES[k].color }}>{BASELINES[k].label}</span>
716
+ <FillBadge rate={r.metrics.fillRate} />
717
+ </div>
718
+ ))}
719
+ </Panel>
720
+ </div>
721
+
722
+ {/* TOP TABS */}
723
+ <SimTabs tabs={topTabs} active={activeTopTab} onSelect={setActiveTopTab} />
724
+
725
+ {/* QWEN BASE TAB */}
726
+ {activeTopTab === "llm" && (
727
+ <div>
728
+ <div style={{ fontSize: 11, color: C.muted, marginBottom: 14 }}>
729
+ <span style={{ color: C.blue, fontWeight: 600 }}>Qwen2.5-72B-Instruct</span> via HF Inference API · decisions every {CFG.DECISION_INTERVAL} days · {CFG.SIM_DAYS - CFG.HISTO_DAYS} decision steps · memory bank up to {CFG.MEMORY_SIZE} entries
730
+ </div>
731
+ <AgentSimView
732
+ label="Qwen Base"
733
+ accentColor={C.teal}
734
+ modelId="Qwen/Qwen2.5-72B-Instruct"
735
+ hfToken={hfToken}
736
+ envKey={envKey}
737
+ baselineResults={baselineResults}
738
+ />
739
+ </div>
740
+ )}
741
+
742
+ {/* GRPO TAB */}
743
+ {activeTopTab === "grpo" && (
744
+ <div>
745
+ {/* Training status banner */}
746
+ <div style={{ background: "#0d1a0d", border: `1px solid ${C.green}30`, borderRadius: 10, padding: "16px 20px", marginBottom: 18 }}>
747
+ <div style={{ display: "flex", alignItems: "center", gap: 10, marginBottom: 8 }}>
748
+ <span style={{ color: C.green, fontSize: 11 }}>● TRAINING IN PROGRESS</span>
749
+ <span style={{ color: C.muted, fontSize: 10 }}>Northflank · 16 vCPU / 196 GB · Qwen2.5-3B-Instruct + LoRA</span>
750
+ </div>
751
+ <div style={{ display: "grid", gridTemplateColumns: "repeat(4,1fr)", gap: 12, fontSize: 10, color: C.muted }}>
752
+ {[["Algorithm", "GRPO (Group Relative Policy Optimization)"], ["Reward", "Analytical P&L simulation — 30-day lookahead"], ["Base Model", "Qwen/Qwen2.5-3B-Instruct via Unsloth"], ["Status", "Iteration 1/5 · Rollout collection in progress"]].map(([l, v]) => (
753
+ <div key={l}>
754
+ <div style={{ color: C.muted, letterSpacing: 2, marginBottom: 3, fontSize: 9 }}>{l.toUpperCase()}</div>
755
+ <div style={{ color: C.text }}>{v}</div>
756
+ </div>
757
+ ))}
758
+ </div>
759
+ </div>
760
+
761
+ <div style={{ fontSize: 11, color: C.muted, marginBottom: 14 }}>
762
+ {grpoModelId
763
+ ? <><span style={{ color: C.purple, fontWeight: 600 }}>Fine-tuned model:</span> {grpoModelId}</>
764
+ : <span style={{ color: C.amber }}>⚠ Enter the GRPO model ID above once training completes to run inference.</span>}
765
+ </div>
766
+
767
+ {grpoModelId ? (
768
+ <AgentSimView
769
+ label="GRPO Fine-tuned"
770
+ accentColor={C.purple}
771
+ modelId={grpoModelId}
772
+ hfToken={hfToken}
773
+ envKey={envKey}
774
+ baselineResults={baselineResults}
775
+ />
776
+ ) : (
777
+ <Panel>
778
+ <div style={{ fontSize: 11, color: C.muted, lineHeight: 1.8 }}>
779
+ {[
780
+ ["What is GRPO?", "Group Relative Policy Optimization — reinforcement learning applied to the LLM. The model generates candidate reorder points, receives P&L rewards from the simulation, and updates weights to favor profitable decisions."],
781
+ ["Reward signal", "Analytical 30-day forward simulation from current state: revenue − holding_cost − stockout_penalty − order_cost − writeoff_cost, normalized by baseline profit. 60% P&L weight + 40% fill rate vs 95% target."],
782
+ ["vs Base Qwen", "The base model reasons generically. After GRPO training, the model should internalize inventory-specific heuristics: lead-time-aware ordering, demand volatility buffers, write-off avoidance at high inventory levels."],
783
+ ["Memory (200 entries)", "Unlike base Qwen (limited by context window), the GRPO-trained model was trained with full 200-entry memory banks, enabling true long-horizon reasoning across the 365-day decision horizon."],
784
+ ].map(([t, d]) => (
785
+ <div key={t} style={{ marginBottom: 12 }}>
786
+ <span style={{ color: C.purple, fontWeight: 600 }}>{t}: </span>
787
+ <span>{d}</span>
788
+ </div>
789
+ ))}
790
+ </div>
791
+ </Panel>
792
+ )}
793
+ </div>
794
+ )}
795
+
796
+ {/* BASELINES TAB */}
797
+ {activeTopTab === "baselines" && (
798
+ <div>
799
+ {!baselinesReady ? (
800
+ <div style={{ color: C.muted, fontSize: 12, padding: 20 }}>Run baselines from the config panel above first.</div>
801
+ ) : (
802
+ <div style={{ display: "flex", flexDirection: "column", gap: 14 }}>
803
+ <div style={{ display: "grid", gridTemplateColumns: "repeat(4,1fr)", gap: 10 }}>
804
+ {Object.entries(baselineResults).map(([k, r]) => (
805
+ <div key={k} style={{ background: C.panel, border: `1px solid ${BASELINES[k].color}30`, borderRadius: 10, padding: 16 }}>
806
+ <div style={{ fontSize: 9, color: BASELINES[k].color, letterSpacing: 3, marginBottom: 10 }}>{BASELINES[k].label.toUpperCase()}</div>
807
+ {[["Fill Rate", <FillBadge rate={r.metrics.fillRate} />], ["Stockouts", r.metrics.stockOuts], ["Lost Sales", r.metrics.lostSales.toLocaleString()], ["Write-Offs", r.metrics.totWriteOff.toLocaleString()]].map(([l, v]) => (
808
+ <div key={l} style={{ display: "flex", justifyContent: "space-between", fontSize: 11, marginBottom: 6 }}>
809
+ <span style={{ color: C.muted }}>{l}</span><span style={{ fontWeight: 600 }}>{v}</span>
810
+ </div>
811
+ ))}
812
+ </div>
813
+ ))}
814
+ </div>
815
+ <Panel title="Fill Rate Convergence — All Baselines">
816
+ <ResponsiveContainer width="100%" height={240}>
817
+ <LineChart margin={{ top: 4, right: 8, bottom: 0, left: 0 }}>
818
+ <XAxis dataKey="day" type="number" domain={[0, CFG.SIM_DAYS]} tick={{ fontSize: 9, fill: C.muted }} />
819
+ <YAxis domain={[0, 1]} tickFormatter={v => `${(v * 100).toFixed(0)}%`} tick={{ fontSize: 9, fill: C.muted }} width={40} />
820
+ <ReferenceLine y={0.95} stroke={C.amber} strokeDasharray="5 3" />
821
+ <Tooltip contentStyle={{ background: "#0a0f18", border: `1px solid ${C.border2}`, fontSize: 10 }} formatter={v => `${(v * 100).toFixed(1)}%`} />
822
+ <Legend wrapperStyle={{ fontSize: 10 }} />
823
+ {Object.entries(baselineResults).map(([k, r]) => (
824
+ <Line key={k} data={r.timeline.map(t => ({ day: t.day, fillRate: t.fillRateCum }))} type="monotone" dataKey="fillRate" stroke={BASELINES[k].color} strokeWidth={1.5} dot={false} name={BASELINES[k].label} />
825
+ ))}
826
+ </LineChart>
827
+ </ResponsiveContainer>
828
+ </Panel>
829
+ </div>
830
+ )}
831
+ </div>
832
+ )}
833
+
834
+ {/* FOOTER */}
835
+ <div style={{ marginTop: 32, paddingTop: 16, borderTop: `1px solid ${C.border}`, display: "grid", gridTemplateColumns: "repeat(4,1fr)", gap: 12, fontSize: 10, color: C.dim }}>
836
+ {[
837
+ ["Environment", `Stochastic inventory simulation · ${CFG.SIM_DAYS}-day horizon · 4 demand regimes · lead time ${CFG.LEAD_TIME} days · spoilage ${(CFG.WRITE_OFF_RATE * 100).toFixed(2)}%/day`],
838
+ ["Agent Architecture", `Qwen2.5-72B via HF Inference API · decisions every ${CFG.DECISION_INTERVAL} days · rolling 6-turn conversation · ${CFG.MEMORY_SIZE}-entry memory bank`],
839
+ ["GRPO Training", "Qwen2.5-3B-Instruct fine-tuned with GRPO · analytical P&L reward · 30-day lookahead simulation · LoRA r=16 · currently training on Northflank"],
840
+ ["Benchmarking", "LLM agent vs 4 rule-based baselines: Base, Safety Stock, Oracle Forecast, Monte Carlo · same demand series · identical simulation engine"],
841
+ ].map(([t, d]) => (
842
+ <div key={t}>
843
+ <div style={{ color: C.muted, fontWeight: 600, marginBottom: 4, fontSize: 9, letterSpacing: 2 }}>{t.toUpperCase()}</div>
844
+ <div style={{ lineHeight: 1.7 }}>{d}</div>
845
+ </div>
846
+ ))}
847
+ </div>
848
+ </div>
849
+ <style>{`@keyframes pulse{0%,100%{opacity:1}50%{opacity:0.3}} input:focus{border-color:${C.teal}!important;} input::placeholder{color:${C.muted}}`}</style>
850
+ </div>
851
+ );
852
+ }
frontend/src/main.jsx ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import ReactDOM from "react-dom/client";
3
+ import App from "./App.jsx";
4
+
5
+ ReactDOM.createRoot(document.getElementById("root")).render(
6
+ <React.StrictMode>
7
+ <App />
8
+ </React.StrictMode>
9
+ );
frontend/vite.config.js ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import { defineConfig } from "vite";
2
+ import react from "@vitejs/plugin-react";
3
+
4
+ export default defineConfig({
5
+ plugins: [react()],
6
+ build: {
7
+ outDir: "dist",
8
+ emptyOutDir: true,
9
+ },
10
+ });
server/inventory_env.py CHANGED
@@ -6,6 +6,8 @@ from dataclasses import dataclass, asdict
6
  from typing import List, Optional
7
  import numpy as np
8
  from fastapi import FastAPI, HTTPException
 
 
9
  from pydantic import BaseModel
10
 
11
  from config import (
@@ -256,3 +258,16 @@ def state():
256
  stockouts=episode.stockouts,
257
  lost_sales=episode.lost_sales,
258
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from typing import List, Optional
7
  import numpy as np
8
  from fastapi import FastAPI, HTTPException
9
+ from fastapi.staticfiles import StaticFiles
10
+ from fastapi.responses import FileResponse
11
  from pydantic import BaseModel
12
 
13
  from config import (
 
258
  stockouts=episode.stockouts,
259
  lost_sales=episode.lost_sales,
260
  )
261
+
262
+
263
+ # ── Serve React frontend (static files built by Dockerfile) ──────────────────
264
+ _static_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "static")
265
+ if os.path.isdir(_static_dir):
266
+ app.mount("/assets", StaticFiles(directory=os.path.join(_static_dir, "assets")), name="assets")
267
+
268
+ @app.get("/", include_in_schema=False)
269
+ @app.get("/{full_path:path}", include_in_schema=False)
270
+ async def serve_spa(full_path: str = ""):
271
+ # API routes are handled above; everything else serves the React app
272
+ index = os.path.join(_static_dir, "index.html")
273
+ return FileResponse(index)