Spaces:

ademarteau
/

RL-Inventory-Simulations

Runtime error

App Files Files Community

ademarteau commited on Mar 7

Commit

9a68962

1 Parent(s): cb82053

simulator jsx from claude

Browse files

Files changed (1) hide show

rl_simulator.jsx +826 -0

rl_simulator.jsx ADDED Viewed

	@@ -0,0 +1,826 @@

+import { useState, useRef, useCallback, useEffect } from "react";
+import { LineChart, Line, XAxis, YAxis, Tooltip, ResponsiveContainer, ReferenceLine, AreaChart, Area, BarChart, Bar, Legend } from "recharts";
+// ─── DESIGN TOKENS ────────────────────────────────────────────────────────────
+const C = {
+  bg:       "#07090f",
+  panel:    "#0d1117",
+  border:   "#161d2a",
+  border2:  "#1e2d40",
+  text:     "#c9d5e0",
+  muted:    "#3a5060",
+  dim:      "#1a2535",
+  green:    "#34d399",
+  blue:     "#38bdf8",
+  amber:    "#fbbf24",
+  red:      "#f87171",
+  purple:   "#a78bfa",
+  teal:     "#2dd4bf",
+};
+// ─── CONFIG (mirrors config.py exactly) ───────────────────────────────────────
+const CFG = {
+  LEAD_TIME:           3,
+  BASE_STOCK:          0,
+  DEFAULT_SL:          0.95,
+  WRITE_OFF_RATE:      0.01,
+  WRITE_OFF_FREQ:      7,
+  HISTO_DAYS:          30,
+  SIM_DAYS:            120,
+};
+// ─── MATH HELPERS ─────────────────────────────────────────────────────────────
+function normalRandom() {
+  let u=0,v=0; while(!u)u=Math.random(); while(!v)v=Math.random();
+  return Math.sqrt(-2*Math.log(u))*Math.cos(2*Math.PI*v);
+}
+function gammaRandom(shape,scale){
+  if(shape<1)return gammaRandom(1+shape,scale)*Math.pow(Math.random(),1/shape);
+  const d=shape-1/3,c=1/Math.sqrt(9*d);
+  while(true){let x,v;do{x=normalRandom();v=1+c*x;}while(v<=0);v=v*v*v;const u=Math.random();
+    if(u<1-0.0331*x*x*x*x)return d*v*scale;
+    if(Math.log(u)<0.5*x*x+d*(1-v+Math.log(v)))return d*v*scale;}
+}
+function poissonRandom(lambda){let L=Math.exp(-lambda),k=0,p=1;do{k++;p*=Math.random();}while(p>L);return k-1;}
+function expRandom(rate){return-Math.log(Math.random())/rate;}
+function arr_mean(a){return a.length?a.reduce((s,x)=>s+x,0)/a.length:0;}
+function arr_std(a){if(a.length<2)return 0;const m=arr_mean(a);return Math.sqrt(a.reduce((s,x)=>s+(x-m)**2,0)/(a.length-1));}
+function quantile(sorted,q){return sorted[Math.floor(sorted.length*q)];}
+// ─── DEMAND ENVIRONMENTS (mirrors demand_environment.py) ──────────────────────
+const ENVS = {
+  gamma_poisson:{
+    label:"Gamma–Poisson",tag:"MODERATE",color:C.green,
+    desc:"90% Gamma(7,16) + 10% Poisson(80). Stable with rare spikes.",
+    sample:()=>Math.random()<0.9?Math.max(0,Math.round(gammaRandom(7,16))):poissonRandom(80),
+    demMean:112,demStd:38,
+  },
+  bimodal_hv:{
+    label:"Bimodal High-Var",tag:"HARD",color:C.amber,
+    desc:"50% Gamma(low mean) + 50% Gamma(high mean). Extremely unpredictable.",
+    sample:()=>Math.random()<0.5?Math.max(0,Math.round(gammaRandom(7,3))):Math.max(0,Math.round(gammaRandom(7,29))),
+    demMean:112,demStd:95,
+  },
+  spiking:{
+    label:"Sporadic Spiking",tag:"EXTREME",color:C.red,
+    desc:"95% zero demand, 5% large Exponential bursts. Hardest to plan.",
+    sample:()=>Math.random()<0.95?0:Math.max(0,Math.round(expRandom(0.05))),
+    demMean:20,demStd:55,
+  },
+  gamma_stable:{
+    label:"Stable Gamma",tag:"EASY",color:C.blue,
+    desc:"Single Gamma(7,16), low variance. Baseline environment.",
+    sample:()=>Math.max(0,Math.round(gammaRandom(7,16))),
+    demMean:112,demStd:35,
+  },
+};
+// ─── BASELINE AGENTS (mirrors agent_environment.py) ───────────────────────────
+const BASELINES = {
+  base:{
+    label:"Base",color:C.muted,
+    compute:(hist)=>arr_mean(hist)*CFG.LEAD_TIME,
+  },
+  safety_stock:{
+    label:"Safety Stock",color:C.blue,
+    compute:(hist)=>{
+      const m=arr_mean(hist),s=arr_std(hist);
+      return m*CFG.LEAD_TIME+1.645*s*Math.sqrt(CFG.LEAD_TIME);
+    },
+  },
+  forecast:{
+    label:"Oracle Forecast",color:C.green,
+    compute:(hist,dMean,dStd)=>dMean*CFG.LEAD_TIME+1.645*dStd*Math.sqrt(CFG.LEAD_TIME),
+  },
+  monte_carlo:{
+    label:"Monte Carlo",color:C.purple,
+    compute:(hist)=>{
+      const s=[];
+      for(let i=0;i<500;i++){
+        let t=0;for(let j=0;j<CFG.LEAD_TIME;j++)t+=hist[Math.floor(Math.random()*hist.length)]*(0.8+Math.random()*0.4);
+        s.push(t);
+      }
+      s.sort((a,b)=>a-b);return quantile(s,0.95);
+    },
+  },
+};
+// ─── SIMULATION ENGINE ────────────────────────────────────────────────────────
+function buildDemandSeries(envKey, n){
+  return Array.from({length:n},()=>ENVS[envKey].sample());
+}
+function runOneSimulation(computeROP, demandSeries, envKey){
+  const env=ENVS[envKey];
+  const n=demandSeries.length;
+  let inventory=0;
+  const orders=[];
+  let totDemand=0,totFulfilled=0,totWriteOff=0,stockOuts=0,lostSales=0;
+  const timeline=[];
+  for(let day=0;day<n;day++){
+    const demand=demandSeries[day];
+    const hist=demandSeries.slice(Math.max(0,day-CFG.HISTO_DAYS),day);
+    // Deliver orders
+    const arrivals=orders.filter(o=>o.arr===day);
+    const delivered=arrivals.reduce((s,o)=>s+o.qty,0);
+    inventory+=delivered;
+    orders.splice(0,orders.length,...orders.filter(o=>o.arr>day));
+    const preInv=inventory;
+    // Fulfill demand
+    const fulfilled=Math.min(demand,inventory);
+    inventory=Math.max(0,inventory-demand);
+    const lost=Math.max(0,demand-fulfilled);
+    if(lost>0)stockOuts++;
+    lostSales+=lost;
+    // Reorder
+    let rop=0,ordered=0;
+    if(hist.length>=5&&day<n-CFG.LEAD_TIME){
+      rop=Math.max(0,computeROP(hist,env.demMean,env.demStd));
+      if(inventory<=rop){
+        const qty=Math.ceil(rop-inventory+arr_mean(hist)*CFG.LEAD_TIME);
+        orders.push({arr:day+CFG.LEAD_TIME,qty});
+        ordered=qty;
+      }
+    }
+    // Write-off
+    let wo=0;
+    if(day%CFG.WRITE_OFF_FREQ===0){wo=Math.floor(inventory*CFG.WRITE_OFF_RATE);inventory-=wo;totWriteOff+=wo;}
+    totDemand+=demand;totFulfilled+=fulfilled;
+    const fillRateCum=totDemand>0?totFulfilled/totDemand:0;
+    timeline.push({day,demand,inventory:preInv,inventoryAfter:inventory,fulfilled,lost,rop:Math.round(rop),ordered,wo,delivered,fillRateCum});
+  }
+  return{timeline,metrics:{fillRate:totDemand>0?totFulfilled/totDemand:0,stockOuts,lostSales,totWriteOff,totDemand,totFulfilled}};
+}
+// ─── BUILD ENVIRONMENT SNAPSHOT FOR LLM ───────────────────────────────────────
+function buildEnvSnapshot(demandSeries, timeline, day){
+  const recent=demandSeries.slice(Math.max(0,day-10),day);
+  const hist=demandSeries.slice(Math.max(0,day-CFG.HISTO_DAYS),day);
+  const last5=timeline.slice(Math.max(0,day-5),day);
+  const curInv=timeline[day-1]?.inventoryAfter??0;
+  const pendingOrders=[];
+  // Reconstruct pending from timeline (simplified)
+  const fillSoFar=timeline[day-1]?.fillRateCum??null;
+  return {
+    day,
+    current_inventory: curInv,
+    lead_time: CFG.LEAD_TIME,
+    write_off_rate: CFG.WRITE_OFF_RATE,
+    service_level_target: CFG.DEFAULT_SL,
+    sim_days_total: CFG.SIM_DAYS,
+    days_remaining: CFG.SIM_DAYS-day,
+    recent_demand_10d: recent,
+    demand_mean_30d: Math.round(arr_mean(hist)*10)/10,
+    demand_std_30d:  Math.round(arr_std(hist)*10)/10,
+    fill_rate_so_far: fillSoFar ? Math.round(fillSoFar*1000)/10+"%" : "N/A",
+    last_5_days: last5.map(d=>({day:d.day,demand:d.demand,inv:d.inventoryAfter,lost:d.lost,rop:d.rop,ordered:d.ordered})),
+    recent_stockouts: last5.filter(d=>d.lost>0).length,
+    recent_lost_sales: last5.reduce((s,d)=>s+d.lost,0),
+  };
+}
+// ─── LLM CALL ─────────────────────────────────────────────────────────────────
+async function callClaude(messages, systemPrompt){
+  const resp=await fetch("https://api.anthropic.com/v1/messages",{
+    method:"POST",
+    headers:{"Content-Type":"application/json"},
+    body:JSON.stringify({
+      model:"claude-sonnet-4-20250514",
+      max_tokens:1000,
+      system:systemPrompt,
+      messages,
+    }),
+  });
+  const data=await resp.json();
+  const text=data.content?.find(b=>b.type==="text")?.text||"";
+  return text;
+}
+// ─── SYSTEM PROMPT ────────────────────────────────────────────────────────────
+const SYSTEM_PROMPT = `You are an expert inventory optimization agent embedded in a stochastic simulation environment.
+YOUR ROLE:
+You receive a JSON snapshot of the current simulation state and must decide the REORDER POINT (ROP) — the inventory threshold that triggers a new order.
+ENVIRONMENT RULES:
+- Orders arrive exactly LEAD_TIME=3 days after placement
+- You place an order whenever inventory <= your ROP
+- Order quantity = ROP - current_inventory + mean_demand * LEAD_TIME (already handled)
+- Every 7 days, 1% of inventory is written off (waste/expiry)
+- Reward = fill_rate at end of simulation (target: >=95%)
+- Reward is SPARSE: fill rate only stabilizes after ~50 days
+REASONING REQUIREMENTS - you MUST do all 4:
+1. SUBGOAL DECOMPOSITION: Break the problem into explicit subgoals (e.g., "build buffer", "survive spike risk", "minimize waste")
+2. STATE ANALYSIS: Interpret current inventory, demand trend, stockout risk, fill rate trajectory
+3. DECISION: Output a specific numeric ROP with clear justification
+4. RECOVERY PLAN: If fill rate < 95% or recent stockouts occurred, state your recovery strategy
+CRITICAL: You must reason BEYOND the next step. Consider that your ROP today affects inventory 3+ days from now.
+For spiking demand: ROP must account for rare but catastrophic spikes.
+For high-variance: wider safety buffers needed.
+For stable demand: tighter ROP to avoid write-offs.
+OUTPUT FORMAT — respond with this exact JSON (no markdown fences):
+{
+  "subgoals": ["subgoal 1", "subgoal 2", "subgoal 3"],
+  "state_analysis": "2-3 sentence analysis of current state and risks",
+  "recovery_plan": "what you're doing to recover or maintain performance",
+  "reorder_point": <number>,
+  "confidence": "high|medium|low",
+  "reasoning_depth": "brief note on what makes this decision non-trivial"
+}`;
+// ─── MAIN COMPONENT ───────────────────────────────────────────────────────────
+export default function StockOracleAgent() {
+  const [envKey, setEnvKey]           = useState("gamma_poisson");
+  const [phase, setPhase]             = useState("config"); // config | running | done
+  const [agentLog, setAgentLog]       = useState([]);       // [{day, snapshot, decision, rop}]
+  const [simTimeline, setSimTimeline] = useState([]);
+  const [baselineResults, setBaselineResults] = useState({});
+  const [agentMetrics, setAgentMetrics]       = useState(null);
+  const [runningDay, setRunningDay]   = useState(0);
+  const [statusMsg, setStatusMsg]     = useState("");
+  const [memoryBank, setMemoryBank]   = useState([]);       // persistent cross-turn memory
+  const [conversationHistory, setConversationHistory] = useState([]);
+  const [activeTab, setActiveTab]     = useState("live");   // live | reasoning | compare | memory
+  const abortRef = useRef(false);
+  const logEndRef = useRef(null);
+  useEffect(()=>{if(logEndRef.current)logEndRef.current.scrollIntoView({behavior:"smooth"});},[agentLog]);
+  // ── Run baselines (instant, no API) ──
+  const runBaselines = useCallback((demandSeries) => {
+    const results = {};
+    Object.entries(BASELINES).forEach(([k,ag])=>{
+      results[k]=runOneSimulation((h,dm,ds)=>ag.compute(h,dm,ds), demandSeries, envKey);
+    });
+    setBaselineResults(results);
+    return results;
+  },[envKey]);
+  // ── Build persistent memory summary ──
+  function updateMemory(prevMemory, decision, day, metrics){
+    const entry = {
+      day,
+      rop: decision.reorder_point,
+      confidence: decision.confidence,
+      fill_rate: metrics?.fillRate ? Math.round(metrics.fillRate*1000)/10 : null,
+      stockouts_in_window: metrics?.stockOuts??0,
+      key_insight: decision.state_analysis?.slice(0,80)+"...",
+    };
+    // Keep last 15 memory entries as compressed state
+    const newMem = [...prevMemory.slice(-14), entry];
+    return newMem;
+  }
+  // ── Main simulation loop ──
+  const runAgentSimulation = useCallback(async () => {
+    abortRef.current = false;
+    setPhase("running");
+    setAgentLog([]);
+    setSimTimeline([]);
+    setAgentMetrics(null);
+    setMemoryBank([]);
+    setConversationHistory([]);
+    setRunningDay(0);
+    const demandSeries = buildDemandSeries(envKey, CFG.SIM_DAYS);
+    // Run baselines in background
+    setStatusMsg("Computing baseline agents...");
+    runBaselines(demandSeries);
+    // Agent-driven simulation
+    // We step through the sim, calling Claude every DECISION_INTERVAL days
+    const DECISION_INTERVAL = 5; // Claude decides ROP every 5 days
+    let inventory = 0;
+    const orders = [];
+    let totDemand=0, totFulfilled=0, totWriteOff=0, stockOuts=0, lostSales=0;
+    const timeline = [];
+    let currentROP = arr_mean(demandSeries.slice(0,CFG.HISTO_DAYS)) * CFG.LEAD_TIME; // initial ROP
+    let localMemory = [];
+    let localConvo = [];
+    let localLog = [];
+    for(let day=0; day<CFG.SIM_DAYS; day++){
+      if(abortRef.current) break;
+      const demand = demandSeries[day];
+      const hist   = demandSeries.slice(Math.max(0,day-CFG.HISTO_DAYS), day);
+      // Deliver orders
+      const arrivals = orders.filter(o=>o.arr===day);
+      const delivered = arrivals.reduce((s,o)=>s+o.qty,0);
+      inventory += delivered;
+      orders.splice(0,orders.length,...orders.filter(o=>o.arr>day));
+      const preInv = inventory;
+      // Fulfill demand
+      const fulfilled = Math.min(demand, inventory);
+      inventory = Math.max(0, inventory-demand);
+      const lost = Math.max(0, demand-fulfilled);
+      if(lost>0) stockOuts++;
+      lostSales += lost;
+      // Reorder check using current ROP
+      let ordered=0;
+      if(hist.length>=5 && day<CFG.SIM_DAYS-CFG.LEAD_TIME){
+        if(inventory<=currentROP){
+          const qty=Math.ceil(currentROP-inventory+arr_mean(hist)*CFG.LEAD_TIME);
+          orders.push({arr:day+CFG.LEAD_TIME,qty});
+          ordered=qty;
+        }
+      }
+      // Write-off
+      let wo=0;
+      if(day%CFG.WRITE_OFF_FREQ===0){wo=Math.floor(inventory*CFG.WRITE_OFF_RATE);inventory-=wo;totWriteOff+=wo;}
+      totDemand+=demand; totFulfilled+=fulfilled;
+      const fillRateCum = totDemand>0?totFulfilled/totDemand:0;
+      const tEntry = {day,demand,inventory:preInv,inventoryAfter:inventory,fulfilled,lost,rop:Math.round(currentROP),ordered,wo,delivered,fillRateCum};
+      timeline.push(tEntry);
+      setSimTimeline([...timeline]);
+      setRunningDay(day);
+      // ── LLM Decision every DECISION_INTERVAL days ──
+      if(day>=CFG.HISTO_DAYS && day%DECISION_INTERVAL===0 && day<CFG.SIM_DAYS-CFG.LEAD_TIME){
+        setStatusMsg(`Day ${day}: Agent reasoning...`);
+        const snapshot = buildEnvSnapshot(demandSeries, timeline, day);
+        // Build memory context
+        const memoryContext = localMemory.length>0
+          ? `\nYOUR MEMORY FROM PREVIOUS DECISIONS:\n${JSON.stringify(localMemory.slice(-8),null,2)}`
+          : "";
+        const userMsg = {
+          role:"user",
+          content: `ENVIRONMENT SNAPSHOT — Day ${day}/${CFG.SIM_DAYS}\n${JSON.stringify(snapshot,null,2)}${memoryContext}\n\nDecide your reorder_point for the next ${DECISION_INTERVAL} days.`
+        };
+        // Maintain rolling conversation (last 6 turns to stay in context)
+        const trimmedConvo = localConvo.slice(-6);
+        const fullMessages = [...trimmedConvo, userMsg];
+        try {
+          const rawResp = await callClaude(fullMessages, SYSTEM_PROMPT);
+          let decision;
+          try {
+            const cleaned = rawResp.replace(/```json|```/g,"").trim();
+            decision = JSON.parse(cleaned);
+          } catch {
+            // Fallback: extract reorder_point with regex
+            const match = rawResp.match(/"reorder_point"\s*:\s*(\d+\.?\d*)/);
+            decision = {
+              subgoals:["parse error — fallback"],
+              state_analysis: rawResp.slice(0,200),
+              recovery_plan:"N/A",
+              reorder_point: match ? parseFloat(match[1]) : currentROP,
+              confidence:"low",
+              reasoning_depth:"parse failed",
+            };
+          }
+          currentROP = Math.max(0, decision.reorder_point||currentROP);
+          // Update conversation history
+          const assistantMsg = {role:"assistant", content:rawResp};
+          localConvo = [...localConvo, userMsg, assistantMsg];
+          setConversationHistory([...localConvo]);
+          // Update memory bank
+          localMemory = updateMemory(localMemory, decision, day, {fillRate:fillRateCum, stockOuts});
+          setMemoryBank([...localMemory]);
+          // Add to agent log
+          const logEntry = {day, snapshot, decision, rop:currentROP, fillRateCum};
+          localLog = [...localLog, logEntry];
+          setAgentLog([...localLog]);
+        } catch(e) {
+          setStatusMsg(`Day ${day}: API error — ${e.message}`);
+        }
+        // Small pause to not slam API
+        await new Promise(r=>setTimeout(r,200));
+      }
+    }
+    // Final metrics
+    const finalMetrics = {
+      fillRate:totDemand>0?totFulfilled/totDemand:0,
+      stockOuts, lostSales, totWriteOff, totDemand, totFulfilled
+    };
+    setAgentMetrics(finalMetrics);
+    setSimTimeline([...timeline]);
+    setPhase("done");
+    setStatusMsg("Simulation complete.");
+    setActiveTab("compare");
+  }, [envKey, runBaselines]);
+  const stopSim = () => { abortRef.current=true; setStatusMsg("Stopped by user."); setPhase("done"); };
+  // ── Render helpers ──
+  const env = ENVS[envKey];
+  const latestLog = agentLog[agentLog.length-1];
+  function FillBadge({rate}){
+    const c=rate>=0.95?C.green:rate>=0.85?C.amber:C.red;
+    return <span style={{color:c,fontWeight:700}}>{rate?(rate*100).toFixed(1)+"%":"—"}</span>;
+  }
+  function Panel({title,children,style={}}){
+    return(
+      <div style={{background:C.panel,border:`1px solid ${C.border}`,borderRadius:10,padding:"16px 18px",...style}}>
+        {title&&<div style={{fontSize:9,letterSpacing:4,color:C.muted,marginBottom:12,textTransform:"uppercase"}}>{title}</div>}
+        {children}
+      </div>
+    );
+  }
+  function Tab({id,label}){
+    const active=activeTab===id;
+    return(
+      <button onClick={()=>setActiveTab(id)} style={{
+        background:active?C.border2:"transparent",
+        border:`1px solid ${active?C.border2:"transparent"}`,
+        borderRadius:6,padding:"7px 14px",
+        color:active?C.text:C.muted,fontFamily:"inherit",
+        fontSize:11,cursor:"pointer",letterSpacing:1,transition:"all 0.15s",
+      }}>{label}</button>
+    );
+  }
+  const agentTimelineFillRates = simTimeline.map(t=>({day:t.day,agent:t.fillRateCum}));
+  return(
+    <div style={{minHeight:"100vh",background:C.bg,fontFamily:"'JetBrains Mono',monospace",color:C.text,padding:"24px 16px"}}>
+      <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600&family=Clash+Display:wght@600;700&display=swap" rel="stylesheet"/>
+      {/* ── HEADER ── */}
+      <div style={{maxWidth:1200,margin:"0 auto"}}>
+        <div style={{display:"flex",justifyContent:"space-between",alignItems:"flex-start",marginBottom:28,flexWrap:"wrap",gap:12}}>
+          <div>
+            <div style={{fontSize:9,letterSpacing:5,color:C.muted,marginBottom:6}}>HACKATHON · LONG-HORIZON REASONING ENVIRONMENT</div>
+            <h1 style={{margin:0,fontSize:"clamp(32px,5vw,52px)",fontWeight:700,letterSpacing:-1,
+              background:`linear-gradient(120deg,${C.teal},${C.blue},${C.purple})`,
+              WebkitBackgroundClip:"text",WebkitTextFillColor:"transparent",lineHeight:1.1,
+              fontFamily:"'JetBrains Mono',monospace",
+            }}>STOCK ORACLE</h1>
+            <div style={{fontSize:10,color:C.muted,marginTop:5,letterSpacing:2}}>
+              LLM AGENT · INVENTORY OPTIMIZATION · SPARSE REWARD · MULTI-STEP PLANNING
+            </div>
+          </div>
+          {phase==="done"&&agentMetrics&&(
+            <div style={{display:"flex",gap:10,flexWrap:"wrap"}}>
+              {[
+                {label:"AGENT FILL RATE",val:<FillBadge rate={agentMetrics.fillRate}/>,highlight:true},
+                {label:"STOCKOUTS",val:agentMetrics.stockOuts},
+                {label:"LOST SALES",val:agentMetrics.lostSales.toLocaleString()},
+                {label:"LLM DECISIONS",val:agentLog.length},
+              ].map(({label,val,highlight})=>(
+                <div key={label} style={{background:highlight?"#0d1f18":C.panel,border:`1px solid ${highlight?C.green+"30":C.border}`,borderRadius:8,padding:"10px 16px",textAlign:"center"}}>
+                  <div style={{fontSize:9,letterSpacing:3,color:C.muted,marginBottom:3}}>{label}</div>
+                  <div style={{fontSize:22,fontWeight:600,letterSpacing:1}}>{val}</div>
+                </div>
+              ))}
+            </div>
+          )}
+        </div>
+        {/* ── CONFIG ── */}
+        {phase==="config"&&(
+          <div style={{display:"grid",gridTemplateColumns:"1fr 1fr",gap:16,marginBottom:20,maxWidth:800}}>
+            <Panel title="Demand Environment">
+              {Object.entries(ENVS).map(([k,e])=>(
+                <button key={k} onClick={()=>setEnvKey(k)} style={{
+                  display:"block",width:"100%",textAlign:"left",
+                  background:envKey===k?"#0f1e2e":"transparent",
+                  border:`1px solid ${envKey===k?e.color+"50":C.border}`,
+                  borderRadius:6,padding:"10px 12px",marginBottom:6,cursor:"pointer",fontFamily:"inherit",
+                  transition:"all 0.15s",
+                }}>
+                  <div style={{display:"flex",justifyContent:"space-between",alignItems:"center"}}>
+                    <span style={{fontSize:12,color:envKey===k?e.color:C.muted,fontWeight:500}}>{e.label}</span>
+                    <span style={{fontSize:9,color:e.color,border:`1px solid ${e.color}40`,borderRadius:3,padding:"2px 6px"}}>{e.tag}</span>
+                  </div>
+                  <div style={{fontSize:10,color:C.dim,marginTop:4,lineHeight:1.5}}>{e.desc}</div>
+                </button>
+              ))}
+            </Panel>
+            <Panel title="About This Environment">
+              <div style={{fontSize:11,color:C.muted,lineHeight:1.8}}>
+                {[
+                  ["Sparse Reward","Fill rate only converges after 50+ days. No reward signal per individual decision."],
+                  ["Multi-Step Planning","Each ROP decision affects inventory 3 days forward (lead time). Cascading errors are common."],
+                  ["State Tracking","Agent maintains memory across 120 days: inventory levels, order pipeline, demand patterns."],
+                  ["Error Recovery","Post-stockout, agent must over-order to rebuild buffer without triggering write-off waste."],
+                  ["Extended Horizon","120 decisions × 5-day intervals. LLM conversation history managed via rolling window + memory bank."],
+                ].map(([t,d])=>(
+                  <div key={t} style={{marginBottom:10}}>
+                    <span style={{color:C.teal,fontWeight:600}}>{t}: </span>
+                    <span style={{color:C.muted}}>{d}</span>
+                  </div>
+                ))}
+              </div>
+              <button onClick={runAgentSimulation} style={{
+                width:"100%",marginTop:16,
+                background:"#0d1f18",border:`1px solid ${C.green}60`,
+                borderRadius:7,padding:"14px",color:C.green,
+                fontFamily:"inherit",fontSize:13,cursor:"pointer",
+                letterSpacing:2,fontWeight:600,transition:"all 0.2s",
+              }}>
+                ▶ LAUNCH AGENT SIMULATION
+              </button>
+            </Panel>
+          </div>
+        )}
+        {/* ── RUNNING / DONE ── */}
+        {(phase==="running"||phase==="done")&&(
+          <>
+            {/* Status bar */}
+            <div style={{display:"flex",justifyContent:"space-between",alignItems:"center",marginBottom:16,flexWrap:"wrap",gap:8}}>
+              <div style={{display:"flex",gap:8,alignItems:"center",fontSize:11}}>
+                {phase==="running"&&<span style={{color:C.amber,animation:"pulse 1s infinite"}}
+                  >●</span>}
+                <span style={{color:C.muted}}>{statusMsg}</span>
+                {phase==="running"&&(
+                  <div style={{width:200,height:4,background:C.border,borderRadius:2,overflow:"hidden"}}>
+                    <div style={{height:"100%",width:`${(runningDay/CFG.SIM_DAYS)*100}%`,background:C.teal,transition:"width 0.3s",borderRadius:2}}/>
+                  </div>
+                )}
+              </div>
+              <div style={{display:"flex",gap:8}}>
+                {phase==="running"&&<button onClick={stopSim} style={{background:"#2a0f0f",border:`1px solid ${C.red}40`,borderRadius:6,padding:"6px 14px",color:C.red,fontFamily:"inherit",fontSize:11,cursor:"pointer"}}>■ STOP</button>}
+                <button onClick={()=>{setPhase("config");setAgentLog([]);setSimTimeline([]);setBaselineResults({});setAgentMetrics(null);}} style={{background:C.panel,border:`1px solid ${C.border}`,borderRadius:6,padding:"6px 14px",color:C.muted,fontFamily:"inherit",fontSize:11,cursor:"pointer"}}>↺ RESET</button>
+              </div>
+            </div>
+            {/* Tabs */}
+            <div style={{display:"flex",gap:6,marginBottom:14,flexWrap:"wrap"}}>
+              <Tab id="live"      label="LIVE SIM"/>
+              <Tab id="reasoning" label={`AGENT REASONING (${agentLog.length})`}/>
+              <Tab id="compare"   label="COMPARE AGENTS"/>
+              <Tab id="memory"    label={`MEMORY BANK (${memoryBank.length})`}/>
+            </div>
+            {/* ── TAB: LIVE SIM ── */}
+            {activeTab==="live"&&(
+              <div style={{display:"flex",flexDirection:"column",gap:14}}>
+                <Panel title="Inventory · Demand · Reorder Point">
+                  <ResponsiveContainer width="100%" height={200}>
+                    <AreaChart data={simTimeline} margin={{top:4,right:4,bottom:0,left:0}}>
+                      <defs>
+                        <linearGradient id="ig" x1="0" y1="0" x2="0" y2="1">
+                          <stop offset="5%" stopColor={C.blue} stopOpacity={0.25}/>
+                          <stop offset="95%" stopColor={C.blue} stopOpacity={0}/>
+                        </linearGradient>
+                      </defs>
+                      <XAxis dataKey="day" tick={{fontSize:9,fill:C.muted}}/>
+                      <YAxis tick={{fontSize:9,fill:C.muted}} width={45}/>
+                      <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10,borderRadius:6}} labelFormatter={d=>`Day ${d}`}/>
+                      <Area type="monotone" dataKey="inventory" stroke={C.blue} strokeWidth={1.5} fill="url(#ig)" dot={false} name="Inventory"/>
+                      <Line type="monotone" dataKey="demand" stroke={C.red} strokeWidth={1} dot={false} name="Demand"/>
+                      <Line type="monotone" dataKey="rop" stroke={C.amber} strokeWidth={1} strokeDasharray="5 3" dot={false} name="Agent ROP"/>
+                    </AreaChart>
+                  </ResponsiveContainer>
+                </Panel>
+                <div style={{display:"grid",gridTemplateColumns:"1fr 1fr",gap:14}}>
+                  <Panel title="Cumulative Fill Rate">
+                    <ResponsiveContainer width="100%" height={130}>
+                      <LineChart data={simTimeline} margin={{top:4,right:4,bottom:0,left:0}}>
+                        <XAxis dataKey="day" tick={{fontSize:9,fill:C.muted}}/>
+                        <YAxis domain={[0,1]} tickFormatter={v=>`${(v*100).toFixed(0)}%`} tick={{fontSize:9,fill:C.muted}} width={38}/>
+                        <ReferenceLine y={0.95} stroke={C.amber} strokeDasharray="4 3"/>
+                        <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10}} formatter={v=>`${(v*100).toFixed(1)}%`}/>
+                        <Line type="monotone" dataKey="fillRateCum" stroke={C.teal} strokeWidth={2} dot={false} name="Fill Rate"/>
+                      </LineChart>
+                    </ResponsiveContainer>
+                  </Panel>
+                  <Panel title="Lost Sales Per Day">
+                    <ResponsiveContainer width="100%" height={130}>
+                      <BarChart data={simTimeline} barSize={2} margin={{top:4,right:4,bottom:0,left:0}}>
+                        <XAxis dataKey="day" tick={{fontSize:9,fill:C.muted}}/>
+                        <YAxis tick={{fontSize:9,fill:C.muted}} width={38}/>
+                        <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10}}/>
+                        <Bar dataKey="lost" fill={C.red} opacity={0.8} name="Lost Sales"/>
+                      </BarChart>
+                    </ResponsiveContainer>
+                  </Panel>
+                </div>
+              </div>
+            )}
+            {/* ── TAB: AGENT REASONING ── */}
+            {activeTab==="reasoning"&&(
+              <div style={{display:"flex",flexDirection:"column",gap:10,maxHeight:"72vh",overflowY:"auto",paddingRight:4}}>
+                {agentLog.length===0&&<div style={{color:C.muted,fontSize:12,padding:20,textAlign:"center"}}>Waiting for first LLM decision (after day {CFG.HISTO_DAYS})...</div>}
+                {agentLog.map((entry,i)=>{
+                  const d=entry.decision;
+                  const isLatest=i===agentLog.length-1;
+                  return(
+                    <div key={i} style={{
+                      background:isLatest?"#0c1a24":C.panel,
+                      border:`1px solid ${isLatest?C.teal+"40":C.border}`,
+                      borderRadius:10,padding:"14px 16px",
+                      borderLeft:`3px solid ${isLatest?C.teal:C.border2}`,
+                    }}>
+                      <div style={{display:"flex",justifyContent:"space-between",alignItems:"center",marginBottom:10,flexWrap:"wrap",gap:6}}>
+                        <div style={{fontSize:11,color:C.teal,fontWeight:600}}>Day {entry.day} — Decision #{i+1}</div>
+                        <div style={{display:"flex",gap:8,flexWrap:"wrap"}}>
+                          <span style={{fontSize:10,color:C.muted}}>ROP: <span style={{color:C.amber,fontWeight:600}}>{Math.round(entry.rop)}</span></span>
+                          <span style={{fontSize:10,color:C.muted}}>Fill: <FillBadge rate={entry.fillRateCum}/></span>
+                          <span style={{fontSize:9,padding:"2px 7px",borderRadius:3,
+                            background:d.confidence==="high"?"#0d1f18":d.confidence==="medium"?"#1f1a0d":"#1f0d0d",
+                            color:d.confidence==="high"?C.green:d.confidence==="medium"?C.amber:C.red,
+                            border:`1px solid currentColor`,opacity:0.8,
+                          }}>{d.confidence?.toUpperCase()||"?"}</span>
+                        </div>
+                      </div>
+                      {/* Subgoals */}
+                      {d.subgoals?.length>0&&(
+                        <div style={{marginBottom:10}}>
+                          <div style={{fontSize:9,letterSpacing:3,color:C.muted,marginBottom:6}}>SUBGOAL DECOMPOSITION</div>
+                          <div style={{display:"flex",gap:6,flexWrap:"wrap"}}>
+                            {d.subgoals.map((sg,j)=>(
+                              <div key={j} style={{fontSize:10,background:C.dim,border:`1px solid ${C.border2}`,borderRadius:4,padding:"4px 9px",color:C.blue}}>
+                                {j+1}. {sg}
+                              </div>
+                            ))}
+                          </div>
+                        </div>
+                      )}
+                      {/* State analysis */}
+                      <div style={{marginBottom:8}}>
+                        <div style={{fontSize:9,letterSpacing:3,color:C.muted,marginBottom:5}}>STATE ANALYSIS</div>
+                        <div style={{fontSize:11,color:C.text,lineHeight:1.7,background:C.dim,borderRadius:6,padding:"8px 10px"}}>{d.state_analysis}</div>
+                      </div>
+                      {/* Recovery */}
+                      {d.recovery_plan&&d.recovery_plan!=="N/A"&&(
+                        <div style={{marginBottom:8}}>
+                          <div style={{fontSize:9,letterSpacing:3,color:C.muted,marginBottom:5}}>RECOVERY PLAN</div>
+                          <div style={{fontSize:11,color:C.amber,lineHeight:1.6,background:"#1a1400",borderRadius:6,padding:"8px 10px",border:`1px solid ${C.amber}20`}}>{d.recovery_plan}</div>
+                        </div>
+                      )}
+                      {/* Reasoning depth */}
+                      {d.reasoning_depth&&(
+                        <div style={{fontSize:10,color:C.muted,marginTop:6}}>
+                          <span style={{color:C.purple}}>Reasoning: </span>{d.reasoning_depth}
+                        </div>
+                      )}
+                    </div>
+                  );
+                })}
+                <div ref={logEndRef}/>
+              </div>
+            )}
+            {/* ── TAB: COMPARE ── */}
+            {activeTab==="compare"&&(
+              <div style={{display:"flex",flexDirection:"column",gap:14}}>
+                {/* Scorecard */}
+                <div style={{display:"grid",gridTemplateColumns:"repeat(5,1fr)",gap:10}}>
+                  {/* Agent */}
+                  {agentMetrics&&(
+                    <div style={{background:"#0a1e18",border:`2px solid ${C.teal}40`,borderRadius:10,padding:"14px",gridColumn:"1"}}>
+                      <div style={{fontSize:9,color:C.teal,letterSpacing:3,marginBottom:8}}>🤖 LLM AGENT</div>
+                      {[["Fill Rate",<FillBadge rate={agentMetrics.fillRate}/>],["Stockouts",agentMetrics.stockOuts],["Lost Sales",agentMetrics.lostSales.toLocaleString()],["Write-Offs",agentMetrics.totWriteOff.toLocaleString()]].map(([l,v])=>(
+                        <div key={l} style={{display:"flex",justifyContent:"space-between",fontSize:11,marginBottom:5}}>
+                          <span style={{color:C.muted}}>{l}</span><span style={{fontWeight:600}}>{v}</span>
+                        </div>
+                      ))}
+                    </div>
+                  )}
+                  {/* Baselines */}
+                  {Object.entries(baselineResults).map(([bk,br])=>(
+                    <div key={bk} style={{background:C.panel,border:`1px solid ${BASELINES[bk].color}30`,borderRadius:10,padding:"14px"}}>
+                      <div style={{fontSize:9,color:BASELINES[bk].color,letterSpacing:3,marginBottom:8}}>{BASELINES[bk].label.toUpperCase()}</div>
+                      {[["Fill Rate",<FillBadge rate={br.metrics.fillRate}/>],["Stockouts",br.metrics.stockOuts],["Lost Sales",br.metrics.lostSales.toLocaleString()],["Write-Offs",br.metrics.totWriteOff.toLocaleString()]].map(([l,v])=>(
+                        <div key={l} style={{display:"flex",justifyContent:"space-between",fontSize:11,marginBottom:5}}>
+                          <span style={{color:C.muted}}>{l}</span><span style={{fontWeight:600}}>{v}</span>
+                        </div>
+                      ))}
+                    </div>
+                  ))}
+                </div>
+                {/* Fill rate comparison chart */}
+                {Object.keys(baselineResults).length>0&&(
+                  <Panel title="Fill Rate Convergence — Agent vs All Baselines">
+                    <div style={{fontSize:10,color:C.muted,marginBottom:10}}>
+                      Dashed line = 95% target. The LLM agent ({C.teal}) must beat baselines through structured reasoning, not hard-coded rules.
+                    </div>
+                    <ResponsiveContainer width="100%" height={220}>
+                      <LineChart margin={{top:4,right:8,bottom:0,left:0}}>
+                        <XAxis dataKey="day" type="number" domain={[0,CFG.SIM_DAYS]} tick={{fontSize:9,fill:C.muted}}/>
+                        <YAxis domain={[0,1]} tickFormatter={v=>`${(v*100).toFixed(0)}%`} tick={{fontSize:9,fill:C.muted}} width={40}/>
+                        <ReferenceLine y={0.95} stroke={C.amber} strokeDasharray="5 3" label={{value:"95% target",fontSize:9,fill:C.amber}}/>
+                        <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10}} formatter={v=>`${(v*100).toFixed(1)}%`}/>
+                        <Legend wrapperStyle={{fontSize:10}}/>
+                        {/* Agent line */}
+                        <Line data={agentTimelineFillRates} type="monotone" dataKey="agent" stroke={C.teal} strokeWidth={2.5} dot={false} name="LLM Agent"/>
+                        {/* Baselines */}
+                        {Object.entries(baselineResults).map(([bk,br])=>(
+                          <Line key={bk} data={br.timeline.map(t=>({day:t.day,fillRate:t.fillRateCum}))}
+                            type="monotone" dataKey="fillRate" stroke={BASELINES[bk].color} strokeWidth={1}
+                            strokeDasharray="3 2" dot={false} name={BASELINES[bk].label}/>
+                        ))}
+                      </LineChart>
+                    </ResponsiveContainer>
+                  </Panel>
+                )}
+                {/* ROP decisions overlay */}
+                {agentLog.length>0&&(
+                  <Panel title="Agent Reorder Point Over Time vs Demand Distribution">
+                    <ResponsiveContainer width="100%" height={160}>
+                      <AreaChart data={simTimeline} margin={{top:4,right:4,bottom:0,left:0}}>
+                        <defs>
+                          <linearGradient id="dg" x1="0" y1="0" x2="0" y2="1">
+                            <stop offset="5%" stopColor={C.red} stopOpacity={0.15}/>
+                            <stop offset="95%" stopColor={C.red} stopOpacity={0}/>
+                          </linearGradient>
+                        </defs>
+                        <XAxis dataKey="day" tick={{fontSize:9,fill:C.muted}}/>
+                        <YAxis tick={{fontSize:9,fill:C.muted}} width={45}/>
+                        <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10}}/>
+                        <Area type="monotone" dataKey="demand" stroke={C.red} strokeWidth={1} fill="url(#dg)" dot={false} name="Demand"/>
+                        <Line type="monotone" dataKey="rop" stroke={C.amber} strokeWidth={2} dot={false} name="Agent ROP"/>
+                      </AreaChart>
+                    </ResponsiveContainer>
+                  </Panel>
+                )}
+              </div>
+            )}
+            {/* ── TAB: MEMORY BANK ── */}
+            {activeTab==="memory"&&(
+              <div style={{display:"flex",flexDirection:"column",gap:10}}>
+                <Panel>
+                  <div style={{fontSize:11,color:C.muted,lineHeight:1.8,marginBottom:12}}>
+                    The memory bank is a compressed rolling state passed to the LLM on every decision turn. It enables the agent to reason beyond its context window — tracking performance trends, past ROP decisions, and emerging patterns across the full 120-day horizon.
+                  </div>
+                  <div style={{display:"grid",gridTemplateColumns:"repeat(auto-fill,minmax(200px,1fr))",gap:8}}>
+                    {memoryBank.map((m,i)=>(
+                      <div key={i} style={{background:C.dim,border:`1px solid ${C.border}`,borderRadius:7,padding:"10px 12px"}}>
+                        <div style={{fontSize:10,color:C.teal,fontWeight:600,marginBottom:6}}>Day {m.day}</div>
+                        {[
+                          ["ROP Set",m.rop],
+                          ["Confidence",m.confidence],
+                          ["Fill Rate",m.fill_rate?(m.fill_rate+"%"):"—"],
+                          ["Stockouts",m.stockouts_in_window],
+                        ].map(([l,v])=>(
+                          <div key={l} style={{display:"flex",justifyContent:"space-between",fontSize:10,marginBottom:4}}>
+                            <span style={{color:C.muted}}>{l}</span>
+                            <span style={{color:C.text}}>{v}</span>
+                          </div>
+                        ))}
+                        <div style={{fontSize:9,color:C.muted,marginTop:6,lineHeight:1.5,borderTop:`1px solid ${C.border}`,paddingTop:5}}>
+                          {m.key_insight}
+                        </div>
+                      </div>
+                    ))}
+                    {memoryBank.length===0&&<div style={{color:C.muted,fontSize:11}}>Memory builds as agent makes decisions...</div>}
+                  </div>
+                </Panel>
+              </div>
+            )}
+          </>
+        )}
+        {/* ── FOOTER ── */}
+        <div style={{marginTop:28,paddingTop:16,borderTop:`1px solid ${C.border}`,display:"grid",gridTemplateColumns:"repeat(4,1fr)",gap:12,fontSize:10,color:C.dim}}>
+          {[
+            ["Environment","Stochastic inventory simulation with 4 demand regimes (Gamma-Poisson, Bimodal HV, Spiking, Stable Gamma). Mirrors real supply-chain uncertainty."],
+            ["Agent Architecture","Claude Sonnet 4 called every 5 simulation days. Rolling 6-turn conversation + compressed memory bank enables reasoning beyond context window."],
+            ["Reward Structure","Sparse: fill rate signal only meaningful after 50+ days. Agent must plan across 120-day horizon with no per-step guidance."],
+            ["Benchmarking","LLM agent compared against 4 rule-based baselines: Base, Safety Stock, Oracle Forecast, Monte Carlo — all from the original Python codebase."],
+          ].map(([t,d])=>(
+            <div key={t}>
+              <div style={{color:C.muted,fontWeight:600,marginBottom:4,fontSize:9,letterSpacing:2}}>{t.toUpperCase()}</div>
+              <div style={{lineHeight:1.7}}>{d}</div>
+            </div>
+          ))}
+        </div>
+      </div>
+      <style>{`@keyframes pulse{0%,100%{opacity:1}50%{opacity:0.3}}`}</style>
+    </div>
+  );
+}