Spaces:

ademarteau
/

RL-Inventory-Simulations

Runtime error

File size: 42,701 Bytes

9a68962

import { useState, useRef, useCallback, useEffect } from "react";
import { LineChart, Line, XAxis, YAxis, Tooltip, ResponsiveContainer, ReferenceLine, AreaChart, Area, BarChart, Bar, Legend } from "recharts";

// ─── DESIGN TOKENS ────────────────────────────────────────────────────────────
const C = {
  bg:       "#07090f",
  panel:    "#0d1117",
  border:   "#161d2a",
  border2:  "#1e2d40",
  text:     "#c9d5e0",
  muted:    "#3a5060",
  dim:      "#1a2535",
  green:    "#34d399",
  blue:     "#38bdf8",
  amber:    "#fbbf24",
  red:      "#f87171",
  purple:   "#a78bfa",
  teal:     "#2dd4bf",
};

// ─── CONFIG (mirrors config.py exactly) ───────────────────────────────────────
const CFG = {
  LEAD_TIME:           3,
  BASE_STOCK:          0,
  DEFAULT_SL:          0.95,
  WRITE_OFF_RATE:      0.01,
  WRITE_OFF_FREQ:      7,
  HISTO_DAYS:          30,
  SIM_DAYS:            120,
};

// ─── MATH HELPERS ─────────────────────────────────────────────────────────────
function normalRandom() {
  let u=0,v=0; while(!u)u=Math.random(); while(!v)v=Math.random();
  return Math.sqrt(-2*Math.log(u))*Math.cos(2*Math.PI*v);
}
function gammaRandom(shape,scale){
  if(shape<1)return gammaRandom(1+shape,scale)*Math.pow(Math.random(),1/shape);
  const d=shape-1/3,c=1/Math.sqrt(9*d);
  while(true){let x,v;do{x=normalRandom();v=1+c*x;}while(v<=0);v=v*v*v;const u=Math.random();
    if(u<1-0.0331*x*x*x*x)return d*v*scale;
    if(Math.log(u)<0.5*x*x+d*(1-v+Math.log(v)))return d*v*scale;}
}
function poissonRandom(lambda){let L=Math.exp(-lambda),k=0,p=1;do{k++;p*=Math.random();}while(p>L);return k-1;}
function expRandom(rate){return-Math.log(Math.random())/rate;}
function arr_mean(a){return a.length?a.reduce((s,x)=>s+x,0)/a.length:0;}
function arr_std(a){if(a.length<2)return 0;const m=arr_mean(a);return Math.sqrt(a.reduce((s,x)=>s+(x-m)**2,0)/(a.length-1));}
function quantile(sorted,q){return sorted[Math.floor(sorted.length*q)];}

// ─── DEMAND ENVIRONMENTS (mirrors demand_environment.py) ──────────────────────
const ENVS = {
  gamma_poisson:{
    label:"Gamma–Poisson",tag:"MODERATE",color:C.green,
    desc:"90% Gamma(7,16) + 10% Poisson(80). Stable with rare spikes.",
    sample:()=>Math.random()<0.9?Math.max(0,Math.round(gammaRandom(7,16))):poissonRandom(80),
    demMean:112,demStd:38,
  },
  bimodal_hv:{
    label:"Bimodal High-Var",tag:"HARD",color:C.amber,
    desc:"50% Gamma(low mean) + 50% Gamma(high mean). Extremely unpredictable.",
    sample:()=>Math.random()<0.5?Math.max(0,Math.round(gammaRandom(7,3))):Math.max(0,Math.round(gammaRandom(7,29))),
    demMean:112,demStd:95,
  },
  spiking:{
    label:"Sporadic Spiking",tag:"EXTREME",color:C.red,
    desc:"95% zero demand, 5% large Exponential bursts. Hardest to plan.",
    sample:()=>Math.random()<0.95?0:Math.max(0,Math.round(expRandom(0.05))),
    demMean:20,demStd:55,
  },
  gamma_stable:{
    label:"Stable Gamma",tag:"EASY",color:C.blue,
    desc:"Single Gamma(7,16), low variance. Baseline environment.",
    sample:()=>Math.max(0,Math.round(gammaRandom(7,16))),
    demMean:112,demStd:35,
  },
};

// ─── BASELINE AGENTS (mirrors agent_environment.py) ───────────────────────────
const BASELINES = {
  base:{
    label:"Base",color:C.muted,
    compute:(hist)=>arr_mean(hist)*CFG.LEAD_TIME,
  },
  safety_stock:{
    label:"Safety Stock",color:C.blue,
    compute:(hist)=>{
      const m=arr_mean(hist),s=arr_std(hist);
      return m*CFG.LEAD_TIME+1.645*s*Math.sqrt(CFG.LEAD_TIME);
    },
  },
  forecast:{
    label:"Oracle Forecast",color:C.green,
    compute:(hist,dMean,dStd)=>dMean*CFG.LEAD_TIME+1.645*dStd*Math.sqrt(CFG.LEAD_TIME),
  },
  monte_carlo:{
    label:"Monte Carlo",color:C.purple,
    compute:(hist)=>{
      const s=[];
      for(let i=0;i<500;i++){
        let t=0;for(let j=0;j<CFG.LEAD_TIME;j++)t+=hist[Math.floor(Math.random()*hist.length)]*(0.8+Math.random()*0.4);
        s.push(t);
      }
      s.sort((a,b)=>a-b);return quantile(s,0.95);
    },
  },
};

// ─── SIMULATION ENGINE ────────────────────────────────────────────────────────
function buildDemandSeries(envKey, n){
  return Array.from({length:n},()=>ENVS[envKey].sample());
}

function runOneSimulation(computeROP, demandSeries, envKey){
  const env=ENVS[envKey];
  const n=demandSeries.length;
  let inventory=0;
  const orders=[];
  let totDemand=0,totFulfilled=0,totWriteOff=0,stockOuts=0,lostSales=0;
  const timeline=[];

  for(let day=0;day<n;day++){
    const demand=demandSeries[day];
    const hist=demandSeries.slice(Math.max(0,day-CFG.HISTO_DAYS),day);

    // Deliver orders
    const arrivals=orders.filter(o=>o.arr===day);
    const delivered=arrivals.reduce((s,o)=>s+o.qty,0);
    inventory+=delivered;
    orders.splice(0,orders.length,...orders.filter(o=>o.arr>day));

    const preInv=inventory;

    // Fulfill demand
    const fulfilled=Math.min(demand,inventory);
    inventory=Math.max(0,inventory-demand);
    const lost=Math.max(0,demand-fulfilled);
    if(lost>0)stockOuts++;
    lostSales+=lost;

    // Reorder
    let rop=0,ordered=0;
    if(hist.length>=5&&day<n-CFG.LEAD_TIME){
      rop=Math.max(0,computeROP(hist,env.demMean,env.demStd));
      if(inventory<=rop){
        const qty=Math.ceil(rop-inventory+arr_mean(hist)*CFG.LEAD_TIME);
        orders.push({arr:day+CFG.LEAD_TIME,qty});
        ordered=qty;
      }
    }

    // Write-off
    let wo=0;
    if(day%CFG.WRITE_OFF_FREQ===0){wo=Math.floor(inventory*CFG.WRITE_OFF_RATE);inventory-=wo;totWriteOff+=wo;}

    totDemand+=demand;totFulfilled+=fulfilled;
    const fillRateCum=totDemand>0?totFulfilled/totDemand:0;
    timeline.push({day,demand,inventory:preInv,inventoryAfter:inventory,fulfilled,lost,rop:Math.round(rop),ordered,wo,delivered,fillRateCum});
  }
  return{timeline,metrics:{fillRate:totDemand>0?totFulfilled/totDemand:0,stockOuts,lostSales,totWriteOff,totDemand,totFulfilled}};
}

// ─── BUILD ENVIRONMENT SNAPSHOT FOR LLM ───────────────────────────────────────
function buildEnvSnapshot(demandSeries, timeline, day){
  const recent=demandSeries.slice(Math.max(0,day-10),day);
  const hist=demandSeries.slice(Math.max(0,day-CFG.HISTO_DAYS),day);
  const last5=timeline.slice(Math.max(0,day-5),day);
  const curInv=timeline[day-1]?.inventoryAfter??0;
  const pendingOrders=[];
  // Reconstruct pending from timeline (simplified)
  const fillSoFar=timeline[day-1]?.fillRateCum??null;

  return {
    day,
    current_inventory: curInv,
    lead_time: CFG.LEAD_TIME,
    write_off_rate: CFG.WRITE_OFF_RATE,
    service_level_target: CFG.DEFAULT_SL,
    sim_days_total: CFG.SIM_DAYS,
    days_remaining: CFG.SIM_DAYS-day,
    recent_demand_10d: recent,
    demand_mean_30d: Math.round(arr_mean(hist)*10)/10,
    demand_std_30d:  Math.round(arr_std(hist)*10)/10,
    fill_rate_so_far: fillSoFar ? Math.round(fillSoFar*1000)/10+"%" : "N/A",
    last_5_days: last5.map(d=>({day:d.day,demand:d.demand,inv:d.inventoryAfter,lost:d.lost,rop:d.rop,ordered:d.ordered})),
    recent_stockouts: last5.filter(d=>d.lost>0).length,
    recent_lost_sales: last5.reduce((s,d)=>s+d.lost,0),
  };
}

// ─── LLM CALL ─────────────────────────────────────────────────────────────────
async function callClaude(messages, systemPrompt){
  const resp=await fetch("https://api.anthropic.com/v1/messages",{
    method:"POST",
    headers:{"Content-Type":"application/json"},
    body:JSON.stringify({
      model:"claude-sonnet-4-20250514",
      max_tokens:1000,
      system:systemPrompt,
      messages,
    }),
  });
  const data=await resp.json();
  const text=data.content?.find(b=>b.type==="text")?.text||"";
  return text;
}

// ─── SYSTEM PROMPT ────────────────────────────────────────────────────────────
const SYSTEM_PROMPT = `You are an expert inventory optimization agent embedded in a stochastic simulation environment.

YOUR ROLE:
You receive a JSON snapshot of the current simulation state and must decide the REORDER POINT (ROP) — the inventory threshold that triggers a new order.

ENVIRONMENT RULES:
- Orders arrive exactly LEAD_TIME=3 days after placement
- You place an order whenever inventory <= your ROP
- Order quantity = ROP - current_inventory + mean_demand * LEAD_TIME (already handled)
- Every 7 days, 1% of inventory is written off (waste/expiry)
- Reward = fill_rate at end of simulation (target: >=95%)
- Reward is SPARSE: fill rate only stabilizes after ~50 days

REASONING REQUIREMENTS - you MUST do all 4:
1. SUBGOAL DECOMPOSITION: Break the problem into explicit subgoals (e.g., "build buffer", "survive spike risk", "minimize waste")
2. STATE ANALYSIS: Interpret current inventory, demand trend, stockout risk, fill rate trajectory
3. DECISION: Output a specific numeric ROP with clear justification
4. RECOVERY PLAN: If fill rate < 95% or recent stockouts occurred, state your recovery strategy

CRITICAL: You must reason BEYOND the next step. Consider that your ROP today affects inventory 3+ days from now.
For spiking demand: ROP must account for rare but catastrophic spikes.
For high-variance: wider safety buffers needed.
For stable demand: tighter ROP to avoid write-offs.

OUTPUT FORMAT — respond with this exact JSON (no markdown fences):
{
  "subgoals": ["subgoal 1", "subgoal 2", "subgoal 3"],
  "state_analysis": "2-3 sentence analysis of current state and risks",
  "recovery_plan": "what you're doing to recover or maintain performance",
  "reorder_point": <number>,
  "confidence": "high|medium|low",
  "reasoning_depth": "brief note on what makes this decision non-trivial"
}`;

// ─── MAIN COMPONENT ───────────────────────────────────────────────────────────
export default function StockOracleAgent() {
  const [envKey, setEnvKey]           = useState("gamma_poisson");
  const [phase, setPhase]             = useState("config"); // config | running | done
  const [agentLog, setAgentLog]       = useState([]);       // [{day, snapshot, decision, rop}]
  const [simTimeline, setSimTimeline] = useState([]);
  const [baselineResults, setBaselineResults] = useState({});
  const [agentMetrics, setAgentMetrics]       = useState(null);
  const [runningDay, setRunningDay]   = useState(0);
  const [statusMsg, setStatusMsg]     = useState("");
  const [memoryBank, setMemoryBank]   = useState([]);       // persistent cross-turn memory
  const [conversationHistory, setConversationHistory] = useState([]);
  const [activeTab, setActiveTab]     = useState("live");   // live | reasoning | compare | memory
  const abortRef = useRef(false);
  const logEndRef = useRef(null);

  useEffect(()=>{if(logEndRef.current)logEndRef.current.scrollIntoView({behavior:"smooth"});},[agentLog]);

  // ── Run baselines (instant, no API) ──
  const runBaselines = useCallback((demandSeries) => {
    const results = {};
    Object.entries(BASELINES).forEach(([k,ag])=>{
      results[k]=runOneSimulation((h,dm,ds)=>ag.compute(h,dm,ds), demandSeries, envKey);
    });
    setBaselineResults(results);
    return results;
  },[envKey]);

  // ── Build persistent memory summary ──
  function updateMemory(prevMemory, decision, day, metrics){
    const entry = {
      day,
      rop: decision.reorder_point,
      confidence: decision.confidence,
      fill_rate: metrics?.fillRate ? Math.round(metrics.fillRate*1000)/10 : null,
      stockouts_in_window: metrics?.stockOuts??0,
      key_insight: decision.state_analysis?.slice(0,80)+"...",
    };
    // Keep last 15 memory entries as compressed state
    const newMem = [...prevMemory.slice(-14), entry];
    return newMem;
  }

  // ── Main simulation loop ──
  const runAgentSimulation = useCallback(async () => {
    abortRef.current = false;
    setPhase("running");
    setAgentLog([]);
    setSimTimeline([]);
    setAgentMetrics(null);
    setMemoryBank([]);
    setConversationHistory([]);
    setRunningDay(0);

    const demandSeries = buildDemandSeries(envKey, CFG.SIM_DAYS);

    // Run baselines in background
    setStatusMsg("Computing baseline agents...");
    runBaselines(demandSeries);

    // Agent-driven simulation
    // We step through the sim, calling Claude every DECISION_INTERVAL days
    const DECISION_INTERVAL = 5; // Claude decides ROP every 5 days
    let inventory = 0;
    const orders = [];
    let totDemand=0, totFulfilled=0, totWriteOff=0, stockOuts=0, lostSales=0;
    const timeline = [];
    let currentROP = arr_mean(demandSeries.slice(0,CFG.HISTO_DAYS)) * CFG.LEAD_TIME; // initial ROP
    let localMemory = [];
    let localConvo = [];
    let localLog = [];

    for(let day=0; day<CFG.SIM_DAYS; day++){
      if(abortRef.current) break;

      const demand = demandSeries[day];
      const hist   = demandSeries.slice(Math.max(0,day-CFG.HISTO_DAYS), day);

      // Deliver orders
      const arrivals = orders.filter(o=>o.arr===day);
      const delivered = arrivals.reduce((s,o)=>s+o.qty,0);
      inventory += delivered;
      orders.splice(0,orders.length,...orders.filter(o=>o.arr>day));

      const preInv = inventory;

      // Fulfill demand
      const fulfilled = Math.min(demand, inventory);
      inventory = Math.max(0, inventory-demand);
      const lost = Math.max(0, demand-fulfilled);
      if(lost>0) stockOuts++;
      lostSales += lost;

      // Reorder check using current ROP
      let ordered=0;
      if(hist.length>=5 && day<CFG.SIM_DAYS-CFG.LEAD_TIME){
        if(inventory<=currentROP){
          const qty=Math.ceil(currentROP-inventory+arr_mean(hist)*CFG.LEAD_TIME);
          orders.push({arr:day+CFG.LEAD_TIME,qty});
          ordered=qty;
        }
      }

      // Write-off
      let wo=0;
      if(day%CFG.WRITE_OFF_FREQ===0){wo=Math.floor(inventory*CFG.WRITE_OFF_RATE);inventory-=wo;totWriteOff+=wo;}

      totDemand+=demand; totFulfilled+=fulfilled;
      const fillRateCum = totDemand>0?totFulfilled/totDemand:0;
      const tEntry = {day,demand,inventory:preInv,inventoryAfter:inventory,fulfilled,lost,rop:Math.round(currentROP),ordered,wo,delivered,fillRateCum};
      timeline.push(tEntry);

      setSimTimeline([...timeline]);
      setRunningDay(day);

      // ── LLM Decision every DECISION_INTERVAL days ──
      if(day>=CFG.HISTO_DAYS && day%DECISION_INTERVAL===0 && day<CFG.SIM_DAYS-CFG.LEAD_TIME){
        setStatusMsg(`Day ${day}: Agent reasoning...`);

        const snapshot = buildEnvSnapshot(demandSeries, timeline, day);

        // Build memory context
        const memoryContext = localMemory.length>0
          ? `\nYOUR MEMORY FROM PREVIOUS DECISIONS:\n${JSON.stringify(localMemory.slice(-8),null,2)}`
          : "";

        const userMsg = {
          role:"user",
          content: `ENVIRONMENT SNAPSHOT — Day ${day}/${CFG.SIM_DAYS}\n${JSON.stringify(snapshot,null,2)}${memoryContext}\n\nDecide your reorder_point for the next ${DECISION_INTERVAL} days.`
        };

        // Maintain rolling conversation (last 6 turns to stay in context)
        const trimmedConvo = localConvo.slice(-6);
        const fullMessages = [...trimmedConvo, userMsg];

        try {
          const rawResp = await callClaude(fullMessages, SYSTEM_PROMPT);
          let decision;
          try {
            const cleaned = rawResp.replace(/```json|```/g,"").trim();
            decision = JSON.parse(cleaned);
          } catch {
            // Fallback: extract reorder_point with regex
            const match = rawResp.match(/"reorder_point"\s*:\s*(\d+\.?\d*)/);
            decision = {
              subgoals:["parse error — fallback"],
              state_analysis: rawResp.slice(0,200),
              recovery_plan:"N/A",
              reorder_point: match ? parseFloat(match[1]) : currentROP,
              confidence:"low",
              reasoning_depth:"parse failed",
            };
          }

          currentROP = Math.max(0, decision.reorder_point||currentROP);

          // Update conversation history
          const assistantMsg = {role:"assistant", content:rawResp};
          localConvo = [...localConvo, userMsg, assistantMsg];
          setConversationHistory([...localConvo]);

          // Update memory bank
          localMemory = updateMemory(localMemory, decision, day, {fillRate:fillRateCum, stockOuts});
          setMemoryBank([...localMemory]);

          // Add to agent log
          const logEntry = {day, snapshot, decision, rop:currentROP, fillRateCum};
          localLog = [...localLog, logEntry];
          setAgentLog([...localLog]);

        } catch(e) {
          setStatusMsg(`Day ${day}: API error — ${e.message}`);
        }

        // Small pause to not slam API
        await new Promise(r=>setTimeout(r,200));
      }
    }

    // Final metrics
    const finalMetrics = {
      fillRate:totDemand>0?totFulfilled/totDemand:0,
      stockOuts, lostSales, totWriteOff, totDemand, totFulfilled
    };
    setAgentMetrics(finalMetrics);
    setSimTimeline([...timeline]);
    setPhase("done");
    setStatusMsg("Simulation complete.");
    setActiveTab("compare");
  }, [envKey, runBaselines]);

  const stopSim = () => { abortRef.current=true; setStatusMsg("Stopped by user."); setPhase("done"); };

  // ── Render helpers ──
  const env = ENVS[envKey];
  const latestLog = agentLog[agentLog.length-1];

  function FillBadge({rate}){
    const c=rate>=0.95?C.green:rate>=0.85?C.amber:C.red;
    return <span style={{color:c,fontWeight:700}}>{rate?(rate*100).toFixed(1)+"%":"—"}</span>;
  }

  function Panel({title,children,style={}}){
    return(
      <div style={{background:C.panel,border:`1px solid ${C.border}`,borderRadius:10,padding:"16px 18px",...style}}>
        {title&&<div style={{fontSize:9,letterSpacing:4,color:C.muted,marginBottom:12,textTransform:"uppercase"}}>{title}</div>}
        {children}
      </div>
    );
  }

  function Tab({id,label}){
    const active=activeTab===id;
    return(
      <button onClick={()=>setActiveTab(id)} style={{
        background:active?C.border2:"transparent",
        border:`1px solid ${active?C.border2:"transparent"}`,
        borderRadius:6,padding:"7px 14px",
        color:active?C.text:C.muted,fontFamily:"inherit",
        fontSize:11,cursor:"pointer",letterSpacing:1,transition:"all 0.15s",
      }}>{label}</button>
    );
  }

  const agentTimelineFillRates = simTimeline.map(t=>({day:t.day,agent:t.fillRateCum}));

  return(
    <div style={{minHeight:"100vh",background:C.bg,fontFamily:"'JetBrains Mono',monospace",color:C.text,padding:"24px 16px"}}>
      <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600&family=Clash+Display:wght@600;700&display=swap" rel="stylesheet"/>

      {/* ── HEADER ── */}
      <div style={{maxWidth:1200,margin:"0 auto"}}>
        <div style={{display:"flex",justifyContent:"space-between",alignItems:"flex-start",marginBottom:28,flexWrap:"wrap",gap:12}}>
          <div>
            <div style={{fontSize:9,letterSpacing:5,color:C.muted,marginBottom:6}}>HACKATHON · LONG-HORIZON REASONING ENVIRONMENT</div>
            <h1 style={{margin:0,fontSize:"clamp(32px,5vw,52px)",fontWeight:700,letterSpacing:-1,
              background:`linear-gradient(120deg,${C.teal},${C.blue},${C.purple})`,
              WebkitBackgroundClip:"text",WebkitTextFillColor:"transparent",lineHeight:1.1,
              fontFamily:"'JetBrains Mono',monospace",
            }}>STOCK ORACLE</h1>
            <div style={{fontSize:10,color:C.muted,marginTop:5,letterSpacing:2}}>
              LLM AGENT · INVENTORY OPTIMIZATION · SPARSE REWARD · MULTI-STEP PLANNING
            </div>
          </div>
          {phase==="done"&&agentMetrics&&(
            <div style={{display:"flex",gap:10,flexWrap:"wrap"}}>
              {[
                {label:"AGENT FILL RATE",val:<FillBadge rate={agentMetrics.fillRate}/>,highlight:true},
                {label:"STOCKOUTS",val:agentMetrics.stockOuts},
                {label:"LOST SALES",val:agentMetrics.lostSales.toLocaleString()},
                {label:"LLM DECISIONS",val:agentLog.length},
              ].map(({label,val,highlight})=>(
                <div key={label} style={{background:highlight?"#0d1f18":C.panel,border:`1px solid ${highlight?C.green+"30":C.border}`,borderRadius:8,padding:"10px 16px",textAlign:"center"}}>
                  <div style={{fontSize:9,letterSpacing:3,color:C.muted,marginBottom:3}}>{label}</div>
                  <div style={{fontSize:22,fontWeight:600,letterSpacing:1}}>{val}</div>
                </div>
              ))}
            </div>
          )}
        </div>

        {/* ── CONFIG ── */}
        {phase==="config"&&(
          <div style={{display:"grid",gridTemplateColumns:"1fr 1fr",gap:16,marginBottom:20,maxWidth:800}}>
            <Panel title="Demand Environment">
              {Object.entries(ENVS).map(([k,e])=>(
                <button key={k} onClick={()=>setEnvKey(k)} style={{
                  display:"block",width:"100%",textAlign:"left",
                  background:envKey===k?"#0f1e2e":"transparent",
                  border:`1px solid ${envKey===k?e.color+"50":C.border}`,
                  borderRadius:6,padding:"10px 12px",marginBottom:6,cursor:"pointer",fontFamily:"inherit",
                  transition:"all 0.15s",
                }}>
                  <div style={{display:"flex",justifyContent:"space-between",alignItems:"center"}}>
                    <span style={{fontSize:12,color:envKey===k?e.color:C.muted,fontWeight:500}}>{e.label}</span>
                    <span style={{fontSize:9,color:e.color,border:`1px solid ${e.color}40`,borderRadius:3,padding:"2px 6px"}}>{e.tag}</span>
                  </div>
                  <div style={{fontSize:10,color:C.dim,marginTop:4,lineHeight:1.5}}>{e.desc}</div>
                </button>
              ))}
            </Panel>
            <Panel title="About This Environment">
              <div style={{fontSize:11,color:C.muted,lineHeight:1.8}}>
                {[
                  ["Sparse Reward","Fill rate only converges after 50+ days. No reward signal per individual decision."],
                  ["Multi-Step Planning","Each ROP decision affects inventory 3 days forward (lead time). Cascading errors are common."],
                  ["State Tracking","Agent maintains memory across 120 days: inventory levels, order pipeline, demand patterns."],
                  ["Error Recovery","Post-stockout, agent must over-order to rebuild buffer without triggering write-off waste."],
                  ["Extended Horizon","120 decisions × 5-day intervals. LLM conversation history managed via rolling window + memory bank."],
                ].map(([t,d])=>(
                  <div key={t} style={{marginBottom:10}}>
                    <span style={{color:C.teal,fontWeight:600}}>{t}: </span>
                    <span style={{color:C.muted}}>{d}</span>
                  </div>
                ))}
              </div>
              <button onClick={runAgentSimulation} style={{
                width:"100%",marginTop:16,
                background:"#0d1f18",border:`1px solid ${C.green}60`,
                borderRadius:7,padding:"14px",color:C.green,
                fontFamily:"inherit",fontSize:13,cursor:"pointer",
                letterSpacing:2,fontWeight:600,transition:"all 0.2s",
              }}>
                ▶ LAUNCH AGENT SIMULATION
              </button>
            </Panel>
          </div>
        )}

        {/* ── RUNNING / DONE ── */}
        {(phase==="running"||phase==="done")&&(
          <>
            {/* Status bar */}
            <div style={{display:"flex",justifyContent:"space-between",alignItems:"center",marginBottom:16,flexWrap:"wrap",gap:8}}>
              <div style={{display:"flex",gap:8,alignItems:"center",fontSize:11}}>
                {phase==="running"&&<span style={{color:C.amber,animation:"pulse 1s infinite"}}
                  >●</span>}
                <span style={{color:C.muted}}>{statusMsg}</span>
                {phase==="running"&&(
                  <div style={{width:200,height:4,background:C.border,borderRadius:2,overflow:"hidden"}}>
                    <div style={{height:"100%",width:`${(runningDay/CFG.SIM_DAYS)*100}%`,background:C.teal,transition:"width 0.3s",borderRadius:2}}/>
                  </div>
                )}
              </div>
              <div style={{display:"flex",gap:8}}>
                {phase==="running"&&<button onClick={stopSim} style={{background:"#2a0f0f",border:`1px solid ${C.red}40`,borderRadius:6,padding:"6px 14px",color:C.red,fontFamily:"inherit",fontSize:11,cursor:"pointer"}}>■ STOP</button>}
                <button onClick={()=>{setPhase("config");setAgentLog([]);setSimTimeline([]);setBaselineResults({});setAgentMetrics(null);}} style={{background:C.panel,border:`1px solid ${C.border}`,borderRadius:6,padding:"6px 14px",color:C.muted,fontFamily:"inherit",fontSize:11,cursor:"pointer"}}>↺ RESET</button>
              </div>
            </div>

            {/* Tabs */}
            <div style={{display:"flex",gap:6,marginBottom:14,flexWrap:"wrap"}}>
              <Tab id="live"      label="LIVE SIM"/>
              <Tab id="reasoning" label={`AGENT REASONING (${agentLog.length})`}/>
              <Tab id="compare"   label="COMPARE AGENTS"/>
              <Tab id="memory"    label={`MEMORY BANK (${memoryBank.length})`}/>
            </div>

            {/* ── TAB: LIVE SIM ── */}
            {activeTab==="live"&&(
              <div style={{display:"flex",flexDirection:"column",gap:14}}>
                <Panel title="Inventory · Demand · Reorder Point">
                  <ResponsiveContainer width="100%" height={200}>
                    <AreaChart data={simTimeline} margin={{top:4,right:4,bottom:0,left:0}}>
                      <defs>
                        <linearGradient id="ig" x1="0" y1="0" x2="0" y2="1">
                          <stop offset="5%" stopColor={C.blue} stopOpacity={0.25}/>
                          <stop offset="95%" stopColor={C.blue} stopOpacity={0}/>
                        </linearGradient>
                      </defs>
                      <XAxis dataKey="day" tick={{fontSize:9,fill:C.muted}}/>
                      <YAxis tick={{fontSize:9,fill:C.muted}} width={45}/>
                      <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10,borderRadius:6}} labelFormatter={d=>`Day ${d}`}/>
                      <Area type="monotone" dataKey="inventory" stroke={C.blue} strokeWidth={1.5} fill="url(#ig)" dot={false} name="Inventory"/>
                      <Line type="monotone" dataKey="demand" stroke={C.red} strokeWidth={1} dot={false} name="Demand"/>
                      <Line type="monotone" dataKey="rop" stroke={C.amber} strokeWidth={1} strokeDasharray="5 3" dot={false} name="Agent ROP"/>
                    </AreaChart>
                  </ResponsiveContainer>
                </Panel>
                <div style={{display:"grid",gridTemplateColumns:"1fr 1fr",gap:14}}>
                  <Panel title="Cumulative Fill Rate">
                    <ResponsiveContainer width="100%" height={130}>
                      <LineChart data={simTimeline} margin={{top:4,right:4,bottom:0,left:0}}>
                        <XAxis dataKey="day" tick={{fontSize:9,fill:C.muted}}/>
                        <YAxis domain={[0,1]} tickFormatter={v=>`${(v*100).toFixed(0)}%`} tick={{fontSize:9,fill:C.muted}} width={38}/>
                        <ReferenceLine y={0.95} stroke={C.amber} strokeDasharray="4 3"/>
                        <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10}} formatter={v=>`${(v*100).toFixed(1)}%`}/>
                        <Line type="monotone" dataKey="fillRateCum" stroke={C.teal} strokeWidth={2} dot={false} name="Fill Rate"/>
                      </LineChart>
                    </ResponsiveContainer>
                  </Panel>
                  <Panel title="Lost Sales Per Day">
                    <ResponsiveContainer width="100%" height={130}>
                      <BarChart data={simTimeline} barSize={2} margin={{top:4,right:4,bottom:0,left:0}}>
                        <XAxis dataKey="day" tick={{fontSize:9,fill:C.muted}}/>
                        <YAxis tick={{fontSize:9,fill:C.muted}} width={38}/>
                        <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10}}/>
                        <Bar dataKey="lost" fill={C.red} opacity={0.8} name="Lost Sales"/>
                      </BarChart>
                    </ResponsiveContainer>
                  </Panel>
                </div>
              </div>
            )}

            {/* ── TAB: AGENT REASONING ── */}
            {activeTab==="reasoning"&&(
              <div style={{display:"flex",flexDirection:"column",gap:10,maxHeight:"72vh",overflowY:"auto",paddingRight:4}}>
                {agentLog.length===0&&<div style={{color:C.muted,fontSize:12,padding:20,textAlign:"center"}}>Waiting for first LLM decision (after day {CFG.HISTO_DAYS})...</div>}
                {agentLog.map((entry,i)=>{
                  const d=entry.decision;
                  const isLatest=i===agentLog.length-1;
                  return(
                    <div key={i} style={{
                      background:isLatest?"#0c1a24":C.panel,
                      border:`1px solid ${isLatest?C.teal+"40":C.border}`,
                      borderRadius:10,padding:"14px 16px",
                      borderLeft:`3px solid ${isLatest?C.teal:C.border2}`,
                    }}>
                      <div style={{display:"flex",justifyContent:"space-between",alignItems:"center",marginBottom:10,flexWrap:"wrap",gap:6}}>
                        <div style={{fontSize:11,color:C.teal,fontWeight:600}}>Day {entry.day} — Decision #{i+1}</div>
                        <div style={{display:"flex",gap:8,flexWrap:"wrap"}}>
                          <span style={{fontSize:10,color:C.muted}}>ROP: <span style={{color:C.amber,fontWeight:600}}>{Math.round(entry.rop)}</span></span>
                          <span style={{fontSize:10,color:C.muted}}>Fill: <FillBadge rate={entry.fillRateCum}/></span>
                          <span style={{fontSize:9,padding:"2px 7px",borderRadius:3,
                            background:d.confidence==="high"?"#0d1f18":d.confidence==="medium"?"#1f1a0d":"#1f0d0d",
                            color:d.confidence==="high"?C.green:d.confidence==="medium"?C.amber:C.red,
                            border:`1px solid currentColor`,opacity:0.8,
                          }}>{d.confidence?.toUpperCase()||"?"}</span>
                        </div>
                      </div>

                      {/* Subgoals */}
                      {d.subgoals?.length>0&&(
                        <div style={{marginBottom:10}}>
                          <div style={{fontSize:9,letterSpacing:3,color:C.muted,marginBottom:6}}>SUBGOAL DECOMPOSITION</div>
                          <div style={{display:"flex",gap:6,flexWrap:"wrap"}}>
                            {d.subgoals.map((sg,j)=>(
                              <div key={j} style={{fontSize:10,background:C.dim,border:`1px solid ${C.border2}`,borderRadius:4,padding:"4px 9px",color:C.blue}}>
                                {j+1}. {sg}
                              </div>
                            ))}
                          </div>
                        </div>
                      )}

                      {/* State analysis */}
                      <div style={{marginBottom:8}}>
                        <div style={{fontSize:9,letterSpacing:3,color:C.muted,marginBottom:5}}>STATE ANALYSIS</div>
                        <div style={{fontSize:11,color:C.text,lineHeight:1.7,background:C.dim,borderRadius:6,padding:"8px 10px"}}>{d.state_analysis}</div>
                      </div>

                      {/* Recovery */}
                      {d.recovery_plan&&d.recovery_plan!=="N/A"&&(
                        <div style={{marginBottom:8}}>
                          <div style={{fontSize:9,letterSpacing:3,color:C.muted,marginBottom:5}}>RECOVERY PLAN</div>
                          <div style={{fontSize:11,color:C.amber,lineHeight:1.6,background:"#1a1400",borderRadius:6,padding:"8px 10px",border:`1px solid ${C.amber}20`}}>{d.recovery_plan}</div>
                        </div>
                      )}

                      {/* Reasoning depth */}
                      {d.reasoning_depth&&(
                        <div style={{fontSize:10,color:C.muted,marginTop:6}}>
                          <span style={{color:C.purple}}>Reasoning: </span>{d.reasoning_depth}
                        </div>
                      )}
                    </div>
                  );
                })}
                <div ref={logEndRef}/>
              </div>
            )}

            {/* ── TAB: COMPARE ── */}
            {activeTab==="compare"&&(
              <div style={{display:"flex",flexDirection:"column",gap:14}}>
                {/* Scorecard */}
                <div style={{display:"grid",gridTemplateColumns:"repeat(5,1fr)",gap:10}}>
                  {/* Agent */}
                  {agentMetrics&&(
                    <div style={{background:"#0a1e18",border:`2px solid ${C.teal}40`,borderRadius:10,padding:"14px",gridColumn:"1"}}>
                      <div style={{fontSize:9,color:C.teal,letterSpacing:3,marginBottom:8}}>🤖 LLM AGENT</div>
                      {[["Fill Rate",<FillBadge rate={agentMetrics.fillRate}/>],["Stockouts",agentMetrics.stockOuts],["Lost Sales",agentMetrics.lostSales.toLocaleString()],["Write-Offs",agentMetrics.totWriteOff.toLocaleString()]].map(([l,v])=>(
                        <div key={l} style={{display:"flex",justifyContent:"space-between",fontSize:11,marginBottom:5}}>
                          <span style={{color:C.muted}}>{l}</span><span style={{fontWeight:600}}>{v}</span>
                        </div>
                      ))}
                    </div>
                  )}
                  {/* Baselines */}
                  {Object.entries(baselineResults).map(([bk,br])=>(
                    <div key={bk} style={{background:C.panel,border:`1px solid ${BASELINES[bk].color}30`,borderRadius:10,padding:"14px"}}>
                      <div style={{fontSize:9,color:BASELINES[bk].color,letterSpacing:3,marginBottom:8}}>{BASELINES[bk].label.toUpperCase()}</div>
                      {[["Fill Rate",<FillBadge rate={br.metrics.fillRate}/>],["Stockouts",br.metrics.stockOuts],["Lost Sales",br.metrics.lostSales.toLocaleString()],["Write-Offs",br.metrics.totWriteOff.toLocaleString()]].map(([l,v])=>(
                        <div key={l} style={{display:"flex",justifyContent:"space-between",fontSize:11,marginBottom:5}}>
                          <span style={{color:C.muted}}>{l}</span><span style={{fontWeight:600}}>{v}</span>
                        </div>
                      ))}
                    </div>
                  ))}
                </div>

                {/* Fill rate comparison chart */}
                {Object.keys(baselineResults).length>0&&(
                  <Panel title="Fill Rate Convergence — Agent vs All Baselines">
                    <div style={{fontSize:10,color:C.muted,marginBottom:10}}>
                      Dashed line = 95% target. The LLM agent ({C.teal}) must beat baselines through structured reasoning, not hard-coded rules.
                    </div>
                    <ResponsiveContainer width="100%" height={220}>
                      <LineChart margin={{top:4,right:8,bottom:0,left:0}}>
                        <XAxis dataKey="day" type="number" domain={[0,CFG.SIM_DAYS]} tick={{fontSize:9,fill:C.muted}}/>
                        <YAxis domain={[0,1]} tickFormatter={v=>`${(v*100).toFixed(0)}%`} tick={{fontSize:9,fill:C.muted}} width={40}/>
                        <ReferenceLine y={0.95} stroke={C.amber} strokeDasharray="5 3" label={{value:"95% target",fontSize:9,fill:C.amber}}/>
                        <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10}} formatter={v=>`${(v*100).toFixed(1)}%`}/>
                        <Legend wrapperStyle={{fontSize:10}}/>
                        {/* Agent line */}
                        <Line data={agentTimelineFillRates} type="monotone" dataKey="agent" stroke={C.teal} strokeWidth={2.5} dot={false} name="LLM Agent"/>
                        {/* Baselines */}
                        {Object.entries(baselineResults).map(([bk,br])=>(
                          <Line key={bk} data={br.timeline.map(t=>({day:t.day,fillRate:t.fillRateCum}))}
                            type="monotone" dataKey="fillRate" stroke={BASELINES[bk].color} strokeWidth={1}
                            strokeDasharray="3 2" dot={false} name={BASELINES[bk].label}/>
                        ))}
                      </LineChart>
                    </ResponsiveContainer>
                  </Panel>
                )}

                {/* ROP decisions overlay */}
                {agentLog.length>0&&(
                  <Panel title="Agent Reorder Point Over Time vs Demand Distribution">
                    <ResponsiveContainer width="100%" height={160}>
                      <AreaChart data={simTimeline} margin={{top:4,right:4,bottom:0,left:0}}>
                        <defs>
                          <linearGradient id="dg" x1="0" y1="0" x2="0" y2="1">
                            <stop offset="5%" stopColor={C.red} stopOpacity={0.15}/>
                            <stop offset="95%" stopColor={C.red} stopOpacity={0}/>
                          </linearGradient>
                        </defs>
                        <XAxis dataKey="day" tick={{fontSize:9,fill:C.muted}}/>
                        <YAxis tick={{fontSize:9,fill:C.muted}} width={45}/>
                        <Tooltip contentStyle={{background:"#0a0f18",border:`1px solid ${C.border2}`,fontSize:10}}/>
                        <Area type="monotone" dataKey="demand" stroke={C.red} strokeWidth={1} fill="url(#dg)" dot={false} name="Demand"/>
                        <Line type="monotone" dataKey="rop" stroke={C.amber} strokeWidth={2} dot={false} name="Agent ROP"/>
                      </AreaChart>
                    </ResponsiveContainer>
                  </Panel>
                )}
              </div>
            )}

            {/* ── TAB: MEMORY BANK ── */}
            {activeTab==="memory"&&(
              <div style={{display:"flex",flexDirection:"column",gap:10}}>
                <Panel>
                  <div style={{fontSize:11,color:C.muted,lineHeight:1.8,marginBottom:12}}>
                    The memory bank is a compressed rolling state passed to the LLM on every decision turn. It enables the agent to reason beyond its context window — tracking performance trends, past ROP decisions, and emerging patterns across the full 120-day horizon.
                  </div>
                  <div style={{display:"grid",gridTemplateColumns:"repeat(auto-fill,minmax(200px,1fr))",gap:8}}>
                    {memoryBank.map((m,i)=>(
                      <div key={i} style={{background:C.dim,border:`1px solid ${C.border}`,borderRadius:7,padding:"10px 12px"}}>
                        <div style={{fontSize:10,color:C.teal,fontWeight:600,marginBottom:6}}>Day {m.day}</div>
                        {[
                          ["ROP Set",m.rop],
                          ["Confidence",m.confidence],
                          ["Fill Rate",m.fill_rate?(m.fill_rate+"%"):"—"],
                          ["Stockouts",m.stockouts_in_window],
                        ].map(([l,v])=>(
                          <div key={l} style={{display:"flex",justifyContent:"space-between",fontSize:10,marginBottom:4}}>
                            <span style={{color:C.muted}}>{l}</span>
                            <span style={{color:C.text}}>{v}</span>
                          </div>
                        ))}
                        <div style={{fontSize:9,color:C.muted,marginTop:6,lineHeight:1.5,borderTop:`1px solid ${C.border}`,paddingTop:5}}>
                          {m.key_insight}
                        </div>
                      </div>
                    ))}
                    {memoryBank.length===0&&<div style={{color:C.muted,fontSize:11}}>Memory builds as agent makes decisions...</div>}
                  </div>
                </Panel>
              </div>
            )}
          </>
        )}

        {/* ── FOOTER ── */}
        <div style={{marginTop:28,paddingTop:16,borderTop:`1px solid ${C.border}`,display:"grid",gridTemplateColumns:"repeat(4,1fr)",gap:12,fontSize:10,color:C.dim}}>
          {[
            ["Environment","Stochastic inventory simulation with 4 demand regimes (Gamma-Poisson, Bimodal HV, Spiking, Stable Gamma). Mirrors real supply-chain uncertainty."],
            ["Agent Architecture","Claude Sonnet 4 called every 5 simulation days. Rolling 6-turn conversation + compressed memory bank enables reasoning beyond context window."],
            ["Reward Structure","Sparse: fill rate signal only meaningful after 50+ days. Agent must plan across 120-day horizon with no per-step guidance."],
            ["Benchmarking","LLM agent compared against 4 rule-based baselines: Base, Safety Stock, Oracle Forecast, Monte Carlo — all from the original Python codebase."],
          ].map(([t,d])=>(
            <div key={t}>
              <div style={{color:C.muted,fontWeight:600,marginBottom:4,fontSize:9,letterSpacing:2}}>{t.toUpperCase()}</div>
              <div style={{lineHeight:1.7}}>{d}</div>
            </div>
          ))}
        </div>
      </div>
      <style>{`@keyframes pulse{0%,100%{opacity:1}50%{opacity:0.3}}`}</style>
    </div>
  );
}