import { useState, useRef, useCallback, useEffect } from "react"; import { LineChart, Line, XAxis, YAxis, Tooltip, ResponsiveContainer, ReferenceLine, AreaChart, Area, BarChart, Bar, Legend, } from "recharts"; // ─── DESIGN TOKENS ──────────────────────────────────────────────────────────── const C = { bg: "#07090f", panel: "#0d1117", border: "#161d2a", border2: "#1e2d40", text: "#c9d5e0", muted: "#3a5060", dim: "#1a2535", green: "#34d399", blue: "#38bdf8", amber: "#fbbf24", red: "#f87171", purple: "#a78bfa", teal: "#2dd4bf", }; // ─── CONFIG (mirrors config.py) ─────────────────────────────────────────────── const CFG = { LEAD_TIME: 3, DEFAULT_SL: 0.95, WRITE_OFF_RATE: 0.00143, WRITE_OFF_FREQ: 7, HISTO_DAYS: 365, SIM_DAYS: 730, DECISION_INTERVAL: 5, MEMORY_SIZE: 200, SELLING_PRICE: 25.0, UNIT_COST: 10.0, FIXED_ORDER_COST: 150.0, HOLDING_RATE: 0.02, }; // ─── MATH HELPERS ───────────────────────────────────────────────────────────── function normalRandom() { let u = 0, v = 0; while (!u) u = Math.random(); while (!v) v = Math.random(); return Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v); } function gammaRandom(shape, scale) { if (shape < 1) return gammaRandom(1 + shape, scale) * Math.pow(Math.random(), 1 / shape); const d = shape - 1 / 3, c = 1 / Math.sqrt(9 * d); while (true) { let x, v; do { x = normalRandom(); v = 1 + c * x; } while (v <= 0); v = v * v * v; const u = Math.random(); if (u < 1 - 0.0331 * x * x * x * x) return d * v * scale; if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) return d * v * scale; } } function poissonRandom(lambda) { let L = Math.exp(-lambda), k = 0, p = 1; do { k++; p *= Math.random(); } while (p > L); return k - 1; } function expRandom(rate) { return -Math.log(Math.random()) / rate; } function arr_mean(a) { return a.length ? a.reduce((s, x) => s + x, 0) / a.length : 0; } function arr_std(a) { if (a.length < 2) return 0; const m = arr_mean(a); return Math.sqrt(a.reduce((s, x) => s + (x - m) ** 2, 0) / (a.length - 1)); } function quantile(sorted, q) { return sorted[Math.floor(sorted.length * q)]; } // ─── DEMAND ENVIRONMENTS ────────────────────────────────────────────────────── const ENVS = { gamma_poisson: { label: "Gamma–Poisson", tag: "MODERATE", color: C.green, desc: "90% Gamma(7,16) + 10% Poisson(80). Stable with rare spikes.", sample: () => Math.random() < 0.9 ? Math.max(0, Math.round(gammaRandom(7, 16))) : poissonRandom(80), demMean: 112, demStd: 38, }, bimodal_hv: { label: "Bimodal High-Var", tag: "HARD", color: C.amber, desc: "50% low-mean Gamma + 50% high-mean Gamma. Extremely unpredictable.", sample: () => Math.random() < 0.5 ? Math.max(0, Math.round(gammaRandom(7, 3))) : Math.max(0, Math.round(gammaRandom(7, 29))), demMean: 112, demStd: 95, }, spiking: { label: "Sporadic Spiking", tag: "EXTREME", color: C.red, desc: "95% zero demand, 5% large Exponential bursts.", sample: () => Math.random() < 0.95 ? 0 : Math.max(0, Math.round(expRandom(0.05))), demMean: 20, demStd: 55, }, gamma_stable: { label: "Stable Gamma", tag: "EASY", color: C.blue, desc: "Single Gamma(7,16), low variance. Baseline environment.", sample: () => Math.max(0, Math.round(gammaRandom(7, 16))), demMean: 112, demStd: 35, }, }; // ─── BASELINE AGENTS ────────────────────────────────────────────────────────── const BASELINES = { base: { label: "Base", color: C.muted, compute: (h) => arr_mean(h) * CFG.LEAD_TIME }, safety_stock: { label: "Safety Stock", color: C.blue, compute: (h) => arr_mean(h) * CFG.LEAD_TIME + 1.645 * arr_std(h) * Math.sqrt(CFG.LEAD_TIME), }, forecast: { label: "Oracle Forecast", color: C.green, compute: (h, dm, ds) => dm * CFG.LEAD_TIME + 1.645 * ds * Math.sqrt(CFG.LEAD_TIME), }, monte_carlo: { label: "Monte Carlo", color: C.purple, compute: (h) => { const s = []; for (let i = 0; i < 500; i++) { let t = 0; for (let j = 0; j < CFG.LEAD_TIME; j++) t += h[Math.floor(Math.random() * h.length)] * (0.8 + Math.random() * 0.4); s.push(t); } s.sort((a, b) => a - b); return quantile(s, 0.95); }, }, }; // ─── SIMULATION ENGINE ──────────────────────────────────────────────────────── function buildDemandSeries(envKey, n) { return Array.from({ length: n }, () => ENVS[envKey].sample()); } function runOneSimulation(computeROP, demandSeries, envKey) { const env = ENVS[envKey]; const n = demandSeries.length; let inventory = 0; const orders = []; let totDemand = 0, totFulfilled = 0, totWriteOff = 0, stockOuts = 0, lostSales = 0, totProfit = 0, servicedays = 0; const timeline = []; for (let day = 0; day < n; day++) { const demand = demandSeries[day]; const hist = demandSeries.slice(Math.max(0, day - CFG.HISTO_DAYS), day); const arrivals = orders.filter(o => o.arr === day); const delivered = arrivals.reduce((s, o) => s + o.qty, 0); inventory += delivered; orders.splice(0, orders.length, ...orders.filter(o => o.arr > day)); const preInv = inventory; const fulfilled = Math.min(demand, inventory); inventory = Math.max(0, inventory - demand); const lost = Math.max(0, demand - fulfilled); if (lost > 0) stockOuts++; else servicedays++; lostSales += lost; let rop = 0, ordered = 0; if (hist.length >= 5 && day < n - CFG.LEAD_TIME) { rop = Math.max(0, computeROP(hist, env.demMean, env.demStd)); if (inventory <= rop) { const qty = Math.ceil(rop - inventory + arr_mean(hist) * CFG.LEAD_TIME); orders.push({ arr: day + CFG.LEAD_TIME, qty }); ordered = qty; } } let wo = 0; if (day % CFG.WRITE_OFF_FREQ === 0) { wo = Math.floor(inventory * CFG.WRITE_OFF_RATE); inventory -= wo; totWriteOff += wo; } totDemand += demand; totFulfilled += fulfilled; const grossMargin = fulfilled * (CFG.SELLING_PRICE - CFG.UNIT_COST); const holdingCost = inventory * CFG.UNIT_COST * CFG.HOLDING_RATE; const stockoutPenalty = lost * (CFG.SELLING_PRICE - CFG.UNIT_COST); const orderCost = ordered > 0 ? CFG.FIXED_ORDER_COST : 0; const writeoffCost = wo * CFG.UNIT_COST; totProfit += grossMargin - holdingCost - stockoutPenalty - orderCost - writeoffCost; const fillRateCum = totDemand > 0 ? totFulfilled / totDemand : 0; timeline.push({ day, demand, inventory: preInv, inventoryAfter: inventory, fulfilled, lost, rop: Math.round(rop), ordered, wo, delivered, fillRateCum }); } const daysElapsed = n; return { timeline, metrics: { fillRate: totDemand > 0 ? totFulfilled / totDemand : 0, stockOuts, lostSales, totWriteOff, totDemand, totFulfilled, profit: totProfit, serviceLevel: daysElapsed > 0 ? servicedays / daysElapsed : 0 }, }; } // ─── HF INFERENCE API (proxied through FastAPI to avoid CSP on HF Spaces) ──── async function callQwen(messages, modelId, hfToken) { const resp = await fetch("/api/qwen", { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: modelId, messages, max_tokens: 600, temperature: 0.7, hf_token: hfToken || "" }), }); if (!resp.ok) throw new Error(`API error ${resp.status}: ${await resp.text()}`); const data = await resp.json(); return data.choices?.[0]?.message?.content || ""; } // ─── SYSTEM PROMPT ──────────────────────────────────────────────────────────── const SYSTEM_PROMPT = `You are an expert inventory optimization agent in a stochastic supply-chain simulation. YOUR OBJECTIVE: Maximize profit while maintaining fill rate >= 95% over a 365-day decision horizon (days 365–730 of the simulation, after a 365-day warm-up). ENVIRONMENT RULES: - Orders arrive exactly 3 days after placement (LEAD_TIME = 3) - An order fires whenever inventory <= your reorder_point - Order quantity = reorder_point - current_inventory + mean_demand * LEAD_TIME - Every 7 days, ~0.14% of inventory is written off (spoilage/expiry) - Reward = daily P&L: revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost REASONING REQUIREMENTS — all 4: 1. SUBGOAL DECOMPOSITION: Break into subgoals (e.g., "rebuild buffer", "reduce overstock") 2. STATE ANALYSIS: Interpret inventory level, demand trend, pending orders, fill rate trajectory 3. DECISION: Output a specific numeric reorder_point with clear justification 4. RECOVERY PLAN: If fill rate < 95% or recent stockouts, state recovery strategy Think 3+ days ahead — your ROP today only shows effect after lead time. OUTPUT FORMAT — valid JSON only, no markdown fences: {"subgoals":["...","..."],"state_analysis":"...","recovery_plan":"...","reorder_point":,"confidence":"high|medium|low","reasoning_depth":"..."}`; // ─── BUILD SNAPSHOT FOR LLM ─────────────────────────────────────────────────── function buildSnapshot(demandSeries, timeline, day, memory) { const hist = demandSeries.slice(Math.max(0, day - CFG.HISTO_DAYS), day); const last5 = timeline.slice(Math.max(0, day - 5), day); const curInv = timeline[day - 1]?.inventoryAfter ?? 0; return { day, days_remaining: CFG.SIM_DAYS - day, current_inventory: Math.round(curInv), demand_mean_30d: Math.round(arr_mean(demandSeries.slice(Math.max(0, day - 30), day)) * 10) / 10, demand_std_30d: Math.round(arr_std(demandSeries.slice(Math.max(0, day - 30), day)) * 10) / 10, fill_rate_so_far: timeline[day - 1]?.fillRateCum ? `${(timeline[day - 1].fillRateCum * 100).toFixed(1)}%` : "N/A", recent_stockouts: last5.filter(d => d.lost > 0).length, recent_lost_sales: last5.reduce((s, d) => s + d.lost, 0), last_5_days: last5.map(d => ({ day: d.day, demand: d.demand, inv: d.inventoryAfter, lost: d.lost, rop: d.rop, })), memory_bank: memory.slice(-CFG.MEMORY_SIZE), }; } // ─── SHARED SIMULATION RUNNER ───────────────────────────────────────────────── async function runAgentLoop({ envKey, modelId, hfToken, onDay, onDecision, onStatus, abortRef }) { const demandSeries = buildDemandSeries(envKey, CFG.SIM_DAYS); const env = ENVS[envKey]; let inventory = 0; const orders = []; let totDemand = 0, totFulfilled = 0, totWriteOff = 0, stockOuts = 0, lostSales = 0, totProfit = 0, servicedays = 0; const timeline = []; let currentROP = env.demMean * CFG.LEAD_TIME; let memory = []; let convo = []; for (let day = 0; day < CFG.SIM_DAYS; day++) { if (abortRef.current) break; const demand = demandSeries[day]; const hist = demandSeries.slice(Math.max(0, day - CFG.HISTO_DAYS), day); const arrivals = orders.filter(o => o.arr === day); const delivered = arrivals.reduce((s, o) => s + o.qty, 0); inventory += delivered; orders.splice(0, orders.length, ...orders.filter(o => o.arr > day)); const preInv = inventory; const fulfilled = Math.min(demand, inventory); inventory = Math.max(0, inventory - demand); const lost = Math.max(0, demand - fulfilled); if (lost > 0) stockOuts++; else servicedays++; lostSales += lost; let ordered = 0; if (hist.length >= 5 && day < CFG.SIM_DAYS - CFG.LEAD_TIME && inventory <= currentROP) { const qty = Math.ceil(currentROP - inventory + arr_mean(hist) * CFG.LEAD_TIME); orders.push({ arr: day + CFG.LEAD_TIME, qty }); ordered = qty; } let wo = 0; if (day % CFG.WRITE_OFF_FREQ === 0) { wo = Math.floor(inventory * CFG.WRITE_OFF_RATE); inventory -= wo; totWriteOff += wo; } totDemand += demand; totFulfilled += fulfilled; const grossMargin = fulfilled * (CFG.SELLING_PRICE - CFG.UNIT_COST); const holdingCost = inventory * CFG.UNIT_COST * CFG.HOLDING_RATE; const stockoutPenalty = lost * (CFG.SELLING_PRICE - CFG.UNIT_COST); const orderCost = ordered > 0 ? CFG.FIXED_ORDER_COST : 0; const writeoffCost = wo * CFG.UNIT_COST; totProfit += grossMargin - holdingCost - stockoutPenalty - orderCost - writeoffCost; const fillRateCum = totDemand > 0 ? totFulfilled / totDemand : 0; const entry = { day, demand, inventory: preInv, inventoryAfter: inventory, fulfilled, lost, rop: Math.round(currentROP), ordered, wo, delivered, fillRateCum }; timeline.push(entry); onDay(day, [...timeline]); if (day >= CFG.HISTO_DAYS && day % CFG.DECISION_INTERVAL === 0 && day < CFG.SIM_DAYS - CFG.LEAD_TIME) { onStatus(`Day ${day}/${CFG.SIM_DAYS}: agent reasoning...`); const snapshot = buildSnapshot(demandSeries, timeline, day, memory); const userMsg = { role: "user", content: `SNAPSHOT Day ${day}/${CFG.SIM_DAYS}\n${JSON.stringify(snapshot)}\n\nSet reorder_point for next ${CFG.DECISION_INTERVAL} days.`, }; const msgs = [...convo.slice(-6), userMsg]; try { const raw = await callQwen(msgs, modelId, hfToken); let decision; try { decision = JSON.parse(raw.replace(/```json|```/g, "").trim()); } catch { const m = raw.match(/"reorder_point"\s*:\s*(\d+\.?\d*)/); decision = { subgoals: ["parse error"], state_analysis: raw.slice(0, 200), recovery_plan: "N/A", reorder_point: m ? parseFloat(m[1]) : currentROP, confidence: "low", reasoning_depth: "parse failed" }; } currentROP = Math.max(0, decision.reorder_point || currentROP); convo = [...convo, userMsg, { role: "assistant", content: raw }]; memory = [...memory, { day, rop: Math.round(currentROP), confidence: decision.confidence, fill_rate: `${(fillRateCum * 100).toFixed(1)}%`, inventory: Math.round(preInv), demand_mean: Math.round(arr_mean(demandSeries.slice(Math.max(0, day - 30), day))), stockouts_cumulative: stockOuts, lost_sales_cumulative: Math.round(lostSales), key_insight: decision.state_analysis?.slice(0, 100), }].slice(-CFG.MEMORY_SIZE); onDecision({ day, snapshot, decision, rop: currentROP, fillRateCum, memory: [...memory] }); } catch (e) { onStatus(`Day ${day}: API error — ${e.message}`); onDecision({ day, snapshot, decision: { subgoals: [], state_analysis: `API error: ${e.message}`, recovery_plan: "N/A", reorder_point: currentROP, confidence: "low", reasoning_depth: "error" }, rop: currentROP, fillRateCum, memory: [...memory] }); } await new Promise(r => setTimeout(r, 150)); } } return { timeline, metrics: { fillRate: totDemand > 0 ? totFulfilled / totDemand : 0, stockOuts, lostSales, totWriteOff, totDemand, totFulfilled, profit: totProfit, serviceLevel: CFG.SIM_DAYS > 0 ? servicedays / CFG.SIM_DAYS : 0 }, memory, }; } // ─── SHARED UI COMPONENTS ───────────────────────────────────────────────────── function Panel({ title, children, style = {} }) { return (
{title &&
{title}
} {children}
); } function FillBadge({ rate }) { const color = rate >= 0.95 ? C.green : rate >= 0.85 ? C.amber : C.red; return {rate ? `${(rate * 100).toFixed(1)}%` : "—"}; } function MetricBox({ label, value, highlight, color }) { return (
{label}
{value}
); } function SimTabs({ tabs, active, onSelect }) { return (
{tabs.map(({ id, label }) => { const isActive = active === id; return ( ); })}
); } function LiveSimCharts({ timeline }) { return (
`Day ${d}`} />
`${(v * 100).toFixed(0)}%`} tick={{ fontSize: 9, fill: C.muted }} width={38} /> `${(v * 100).toFixed(1)}%`} />
); } function ReasoningLog({ log, logEndRef }) { return (
{log.length === 0 &&
Waiting for first LLM decision (after day {CFG.HISTO_DAYS})…
} {log.map((entry, i) => { const d = entry.decision; const isLatest = i === log.length - 1; return (
Day {entry.day} — Decision #{i + 1}
ROP: {Math.round(entry.rop)} Fill: {(d.confidence || "?").toUpperCase()}
{d.subgoals?.length > 0 && (
SUBGOAL DECOMPOSITION
{d.subgoals.map((sg, j) => (
{j + 1}. {sg}
))}
)}
STATE ANALYSIS
{d.state_analysis}
{d.recovery_plan && d.recovery_plan !== "N/A" && (
RECOVERY PLAN
{d.recovery_plan}
)} {d.reasoning_depth &&
Reasoning: {d.reasoning_depth}
}
); })}
); } function ComparePanel({ agentMetrics, agentLog, simTimeline, baselineResults }) { const agentFillRates = simTimeline.map(t => ({ day: t.day, agent: t.fillRateCum })); return (
{agentMetrics && (
🤖 LLM AGENT
{[["Profit", `$${Math.round(agentMetrics.profit).toLocaleString()}`], ["Service Level", ], ["Fill Rate", ], ["Stockouts", agentMetrics.stockOuts]].map(([l, v]) => (
{l}{v}
))}
)} {Object.entries(baselineResults).map(([bk, br]) => (
{BASELINES[bk].label.toUpperCase()}
{[["Profit", `$${Math.round(br.metrics.profit).toLocaleString()}`], ["Service Level", ], ["Fill Rate", ], ["Stockouts", br.metrics.stockOuts]].map(([l, v]) => (
{l}{v}
))}
))}
{Object.keys(baselineResults).length > 0 && ( `${(v * 100).toFixed(0)}%`} tick={{ fontSize: 9, fill: C.muted }} width={40} /> `${(v * 100).toFixed(1)}%`} /> {Object.entries(baselineResults).map(([bk, br]) => ( ({ day: t.day, fillRate: t.fillRateCum }))} type="monotone" dataKey="fillRate" stroke={BASELINES[bk].color} strokeWidth={1} strokeDasharray="3 2" dot={false} name={BASELINES[bk].label} /> ))} )}
); } function MemoryBankPanel({ memory }) { return (
The memory bank stores the last {CFG.MEMORY_SIZE} decisions with full context — inventory level, demand signal, fill rate, and cumulative losses. This enables the agent to reason across the full {CFG.SIM_DAYS - CFG.HISTO_DAYS}-day horizon beyond the LLM's context window.
{memory.map((m, i) => (
Day {m.day}
{[["ROP Set", m.rop], ["Confidence", m.confidence], ["Fill Rate", m.fill_rate || "—"], ["Inventory", m.inventory], ["Demand Mean", m.demand_mean], ["Stockouts ∑", m.stockouts_cumulative], ["Lost Sales ∑", m.lost_sales_cumulative]].map(([l, v]) => (
{l} {v}
))} {m.key_insight &&
{m.key_insight}
}
))} {memory.length === 0 &&
Memory builds as agent makes decisions…
}
); } // ─── SINGLE-AGENT SIMULATION VIEW ───────────────────────────────────────────── function AgentSimView({ label, accentColor, modelId, hfToken, envKey, baselineResults }) { const [phase, setPhase] = useState("idle"); // idle | running | done const [timeline, setTimeline] = useState([]); const [log, setLog] = useState([]); const [memory, setMemory] = useState([]); const [metrics, setMetrics] = useState(null); const [activeTab, setActiveTab] = useState("live"); const [status, setStatus] = useState(""); const [runningDay, setRunningDay] = useState(0); const abortRef = useRef(false); const logEndRef = useRef(null); useEffect(() => { if (logEndRef.current) logEndRef.current.scrollIntoView({ behavior: "smooth" }); }, [log]); const start = useCallback(async () => { abortRef.current = false; setPhase("running"); setTimeline([]); setLog([]); setMemory([]); setMetrics(null); setRunningDay(0); try { const result = await runAgentLoop({ envKey, modelId, hfToken, abortRef, onDay: (day, tl) => { setTimeline(tl); setRunningDay(day); }, onDecision: ({ day, snapshot, decision, rop, fillRateCum, memory: mem }) => { setLog(prev => [...prev, { day, snapshot, decision, rop, fillRateCum }]); setMemory(mem); }, onStatus: setStatus, }); setMetrics(result.metrics); setMemory(result.memory); } catch (e) { setStatus(`Error: ${e.message}`); } setPhase("done"); }, [envKey, modelId, hfToken]); const stop = () => { abortRef.current = true; setPhase("done"); setStatus("Stopped."); }; const reset = () => { setPhase("idle"); setTimeline([]); setLog([]); setMemory([]); setMetrics(null); }; const tabs = [ { id: "live", label: "LIVE SIM" }, { id: "reasoning", label: `REASONING (${log.length})` }, { id: "compare", label: "COMPARE" }, { id: "memory", label: `MEMORY (${memory.length})` }, ]; return (
{phase === "idle" && ( )} {(phase === "running" || phase === "done") && ( <>
{phase === "running" && } {status} {phase === "running" && (
)}
{phase === "running" && }
{metrics && (
} /> } />
)} {activeTab === "live" && } {activeTab === "reasoning" && } {activeTab === "compare" && } {activeTab === "memory" && } )}
); } // ─── MAIN APP ───────────────────────────────────────────────────────────────── export default function StockOracle() { const [envKey, setEnvKey] = useState("gamma_poisson"); const [hfToken, setHfToken] = useState(""); const [grpoModelId, setGrpoModelId] = useState(""); const [activeTopTab, setActiveTopTab] = useState("llm"); const [baselineResults, setBaselineResults] = useState({}); const [baselinesReady, setBaselinesReady] = useState(false); const env = ENVS[envKey]; const runBaselines = useCallback(() => { setBaselinesReady(false); const demand = buildDemandSeries(envKey, CFG.SIM_DAYS); const results = {}; Object.entries(BASELINES).forEach(([k, ag]) => { results[k] = runOneSimulation((h, dm, ds) => ag.compute(h, dm, ds), demand, envKey); }); setBaselineResults(results); setBaselinesReady(true); }, [envKey]); const topTabs = [ { id: "llm", label: "QWEN BASE AGENT" }, { id: "grpo", label: "GRPO FINE-TUNED ★" }, { id: "baselines", label: "BASELINES" }, ]; return (
{/* HEADER */}
HACKATHON · LONG-HORIZON REASONING ENVIRONMENT

STOCK ORACLE

LLM AGENT · GRPO RL TRAINING · INVENTORY OPTIMIZATION · LONG-HORIZON PLANNING
{/* GLOBAL CONFIG */}
{/* Env selector */} {Object.entries(ENVS).map(([k, e]) => ( ))} {/* HF Token */}
Required for Qwen2.5-72B inference via HF Inference API.
Get one at huggingface.co/settings/tokens
setHfToken(e.target.value)} style={{ width: "100%", background: C.dim, border: `1px solid ${C.border2}`, borderRadius: 6, padding: "9px 12px", color: C.text, fontFamily: "inherit", fontSize: 12, outline: "none", marginBottom: 10 }} />
GRPO Fine-tuned Model ID
HF model ID of the trained adapter (e.g. ademarteau/qwen-inventory-grpo-iter4). Leave blank while training.
setGrpoModelId(e.target.value)} style={{ width: "100%", background: C.dim, border: `1px solid ${C.purple}40`, borderRadius: 6, padding: "9px 12px", color: C.text, fontFamily: "inherit", fontSize: 12, outline: "none" }} />
{/* Baselines */}
Pre-compute all 4 rule-based baselines for comparison in the Compare tab.
{baselinesReady && Object.entries(baselineResults).map(([k, r]) => (
{BASELINES[k].label}
))}
{/* TOP TABS */} {/* QWEN BASE TAB */} {activeTopTab === "llm" && (
Qwen2.5-72B-Instruct via HF Inference API · decisions every {CFG.DECISION_INTERVAL} days · {CFG.SIM_DAYS - CFG.HISTO_DAYS} decision steps · memory bank up to {CFG.MEMORY_SIZE} entries
)} {/* GRPO TAB */} {activeTopTab === "grpo" && (
{/* Training status banner */}
● TRAINING IN PROGRESS Northflank · 16 vCPU / 196 GB · Qwen2.5-3B-Instruct + LoRA
{[["Algorithm", "GRPO (Group Relative Policy Optimization)"], ["Reward", "Analytical P&L simulation — 30-day lookahead"], ["Base Model", "Qwen/Qwen2.5-3B-Instruct via Unsloth"], ["Status", "Iteration 1/5 · Rollout collection in progress"]].map(([l, v]) => (
{l.toUpperCase()}
{v}
))}
{grpoModelId ? <>Fine-tuned model: {grpoModelId} : ⚠ Enter the GRPO model ID above once training completes to run inference.}
{grpoModelId ? ( ) : (
{[ ["What is GRPO?", "Group Relative Policy Optimization — reinforcement learning applied to the LLM. The model generates candidate reorder points, receives P&L rewards from the simulation, and updates weights to favor profitable decisions."], ["Reward signal", "Analytical 30-day forward simulation from current state: revenue − holding_cost − stockout_penalty − order_cost − writeoff_cost, normalized by baseline profit. 60% P&L weight + 40% fill rate vs 95% target."], ["vs Base Qwen", "The base model reasons generically. After GRPO training, the model should internalize inventory-specific heuristics: lead-time-aware ordering, demand volatility buffers, write-off avoidance at high inventory levels."], ["Memory (200 entries)", "Unlike base Qwen (limited by context window), the GRPO-trained model was trained with full 200-entry memory banks, enabling true long-horizon reasoning across the 365-day decision horizon."], ].map(([t, d]) => (
{t}: {d}
))}
)}
)} {/* BASELINES TAB */} {activeTopTab === "baselines" && (
{!baselinesReady ? (
Run baselines from the config panel above first.
) : (
{Object.entries(baselineResults).map(([k, r]) => (
{BASELINES[k].label.toUpperCase()}
{[["Profit", `$${Math.round(r.metrics.profit).toLocaleString()}`], ["Service Level", ], ["Fill Rate", ], ["Stockouts", r.metrics.stockOuts]].map(([l, v]) => (
{l}{v}
))}
))}
`${(v * 100).toFixed(0)}%`} tick={{ fontSize: 9, fill: C.muted }} width={40} /> `${(v * 100).toFixed(1)}%`} /> {Object.entries(baselineResults).map(([k, r]) => ( ({ day: t.day, fillRate: t.fillRateCum }))} type="monotone" dataKey="fillRate" stroke={BASELINES[k].color} strokeWidth={1.5} dot={false} name={BASELINES[k].label} /> ))}
)}
)} {/* FOOTER */}
{[ ["Environment", `Stochastic inventory simulation · ${CFG.SIM_DAYS}-day horizon · 4 demand regimes · lead time ${CFG.LEAD_TIME} days · spoilage ${(CFG.WRITE_OFF_RATE * 100).toFixed(2)}%/day`], ["Agent Architecture", `Qwen2.5-72B via HF Inference API · decisions every ${CFG.DECISION_INTERVAL} days · rolling 6-turn conversation · ${CFG.MEMORY_SIZE}-entry memory bank`], ["GRPO Training", "Qwen2.5-3B-Instruct fine-tuned with GRPO · analytical P&L reward · 30-day lookahead simulation · LoRA r=16 · currently training on Northflank"], ["Benchmarking", "LLM agent vs 4 rule-based baselines: Base, Safety Stock, Oracle Forecast, Monte Carlo · same demand series · identical simulation engine"], ].map(([t, d]) => (
{t.toUpperCase()}
{d}
))}
); }