Spaces:

ademarteau
/

RL-Inventory-Simulations

Runtime error

File size: 18,006 Bytes

import json
import os
import re

import gradio as gr
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np
from huggingface_hub import InferenceClient

from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME, UNIT_COST, SELLING_PRICE, FIXED_ORDER_COST, WRITE_OFF_RATE
from agent_environment import BaseAgent, SafetyStockAgent, ForecastAgent, MonteCarloAgent
from demand_environment import GammaPoisson, GammaGammaHighVariance, SpikingDemand, SingleGammaLowVariance
from demand_calculator import DemandCalculator
from order_processor import OrderProcessor
from inventory_manager import InventoryManager
from performance_tracker import PerformanceTracker

ENV_MAP = {
    "GammaPoisson (90/10 mixture)": GammaPoisson,
    "GammaGamma High Variance (bimodal)": GammaGammaHighVariance,
    "Spiking Demand": SpikingDemand,
    "Single Gamma Low Variance": SingleGammaLowVariance,
}

DECISION_INTERVAL = 5

LLM_SYSTEM_PROMPT = """You are an expert inventory optimization agent in a stochastic simulation.

Decide the REORDER POINT (ROP) — the inventory threshold that triggers a new order.

RULES:
- Orders arrive LEAD_TIME=3 days after placement
- Every 7 days, 1% of inventory is written off
- Goal: fill rate >= 95% at end of episode

OUTPUT — respond with this exact JSON (no markdown fences):
{
  "subgoals": ["subgoal 1", "subgoal 2"],
  "state_analysis": "2-3 sentence analysis",
  "recovery_plan": "recovery strategy if fill rate < 95%",
  "reorder_point": <number>,
  "confidence": "high|medium|low"
}"""


# ── Shared chart builder ───────────────────────────────────────────────────────

def build_chart(daily_inventory, running_fill_rate, rop_markers, title, daily_pnl=None):
    n_rows = 3 if daily_pnl else 2
    fig, axes = plt.subplots(n_rows, 1, figsize=(10, 4 + 2.5 * n_rows), sharex=True)
    ax1, ax2 = axes[0], axes[1]
    days = list(range(len(daily_inventory)))

    ax1.plot(days, daily_inventory, color="steelblue", linewidth=0.8)
    if rop_markers:
        rop_days, rop_vals = zip(*rop_markers)
        ax1.scatter([d - HISTO_DAYS for d in rop_days], rop_vals,
                    color="orange", s=20, zorder=5, label="ROP set")
        ax1.legend(fontsize=8)
    ax1.set_ylabel("Inventory Level")
    ax1.set_title(title)

    ax2.plot(days, running_fill_rate, color="seagreen", linewidth=0.8)
    ax2.axhline(y=0.95, color="red", linestyle="--", linewidth=0.6, label="95% target")
    ax2.set_ylabel("Cumulative Fill Rate")
    ax2.set_ylim(0, 1)
    ax2.legend(fontsize=8)

    if daily_pnl:
        ax3 = axes[2]
        revenues       = [r["revenue"]          for r in daily_pnl]
        holding_costs  = [r["holding_cost"]      for r in daily_pnl]
        stockout_pens  = [r["stockout_penalty"]  for r in daily_pnl]
        order_costs    = [r["order_cost"]        for r in daily_pnl]
        writeoff_costs = [r["writeoff_cost"]     for r in daily_pnl]
        net_profits    = [r["daily_profit"]      for r in daily_pnl]

        ax3.fill_between(days, revenues, alpha=0.25, color="green", label="Revenue")
        ax3.plot(days, net_profits,      color="black",  linewidth=0.9, label="Net profit")
        ax3.fill_between(days, [-h for h in holding_costs],  alpha=0.3, color="royalblue",  label="Holding cost")
        ax3.fill_between(days, [-s for s in stockout_pens],  alpha=0.3, color="crimson",    label="Stockout penalty")
        ax3.fill_between(days, [-o for o in order_costs],    alpha=0.25, color="darkorange", label="Order cost")
        ax3.fill_between(days, [-w for w in writeoff_costs], alpha=0.25, color="purple",     label="Write-off cost")
        ax3.axhline(y=0, color="grey", linewidth=0.5)
        ax3.set_ylabel("Daily P&L ($)")
        ax3.set_xlabel("Evaluation Day")
        ax3.legend(fontsize=7, ncol=3)
    else:
        ax2.set_xlabel("Evaluation Day")

    plt.tight_layout()
    return fig


# ── Tab 1: Baseline agents ─────────────────────────────────────────────────────

def run_simulation(agent_name, env_name):
    env_class = ENV_MAP[env_name]
    environment = env_class(SIM_DAYS)
    dc = DemandCalculator(SIM_DAYS)
    dc.set_environment(environment)
    for i in range(SIM_DAYS):
        dc.get_daily_demand(i)
    demand_mean = [d.demand_mean for d in dc.daily_demand_distribution]
    demand_std = [d.demand_std for d in dc.daily_demand_distribution]
    agent_map = {
        "Base (Historical Mean)": BaseAgent(dc),
        "Safety Stock": SafetyStockAgent(dc),
        "Forecast": ForecastAgent(dc, demand_mean, demand_std),
        "Monte Carlo": MonteCarloAgent(dc),
    }
    agent = agent_map[agent_name]
    order_processor = OrderProcessor()
    performance_tracker = PerformanceTracker()
    inventory_manager = InventoryManager(order_processor=order_processor, agent=agent)
    daily_inventory, running_fill_rate, daily_pnl = [], [], []
    total_demand, total_fulfilled = 0, 0
    for day in range(HISTO_DAYS, SIM_DAYS):
        demand_qty = dc.get_daily_demand(day)
        base_inv = inventory_manager.inventory
        inventory_manager.inventory_update(demand_qty)
        q_before = len(order_processor.order_queue)
        if day < SIM_DAYS - LEAD_TIME:
            inventory_manager.reorder(day)
        new_orders = order_processor.order_queue[q_before:]
        ordered_qty = sum(o.quantity for o in new_orders)
        inventory_manager.process_deliveries(day)
        fulfilled = min(demand_qty, base_inv)
        daily_writeoff = inventory_manager.apply_writeoff(day)
        total_demand += demand_qty
        total_fulfilled += fulfilled
        performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff)
        daily_inventory.append(inventory_manager.inventory)
        running_fill_rate.append(total_fulfilled / total_demand if total_demand > 0 else 0)

        lost = max(0, demand_qty - fulfilled)
        revenue = fulfilled * SELLING_PRICE
        holding_cost = inventory_manager.inventory * UNIT_COST * 0.005
        stockout_penalty = lost * (SELLING_PRICE - UNIT_COST)
        order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST
        writeoff_cost = daily_writeoff * UNIT_COST
        daily_pnl.append({
            "revenue": revenue,
            "holding_cost": holding_cost,
            "stockout_penalty": stockout_penalty,
            "order_cost": order_cost,
            "writeoff_cost": writeoff_cost,
            "daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost,
        })

    summary = performance_tracker.performance_summary()
    total_profit = sum(d["daily_profit"] for d in daily_pnl)
    days_elapsed = len(daily_pnl)
    service_level = (days_elapsed - summary['stock_out_count']) / days_elapsed if days_elapsed > 0 else 0.0
    fig = build_chart(daily_inventory, running_fill_rate, [], f"{agent_name}  |  {env_name}", daily_pnl)
    metrics = (
        f"**Total Profit:** ${total_profit:,.0f}  \n"
        f"**Service Level:** {service_level:.2%}  \n"
        f"**Fill Rate:** {summary['fill_rate']:.2%}  \n"
        f"**Stockouts:** {summary['stock_out_count']}  \n"
        f"**Lost Sales:** {summary['total_lost_sales']:.0f}  \n"
        f"**Write-offs:** {summary['write_offs']:.0f}"
    )
    return fig, metrics


# ── Tab 2: LLM agent (live) ────────────────────────────────────────────────────

def _parse_decision(raw: str, fallback_rop: float) -> dict:
    try:
        cleaned = re.sub(r"```json|```", "", raw).strip()
        return json.loads(cleaned)
    except (json.JSONDecodeError, ValueError):
        match = re.search(r'"reorder_point"\s*:\s*(\d+\.?\d*)', raw)
        return {
            "subgoals": ["parse error"],
            "state_analysis": raw[:150],
            "recovery_plan": "N/A",
            "reorder_point": float(match.group(1)) if match else fallback_rop,
            "confidence": "low",
        }


def run_llm_simulation(env_name, hf_token):
    env_class = ENV_MAP[env_name]
    environment = env_class(SIM_DAYS)
    dc = DemandCalculator(SIM_DAYS)
    dc.set_environment(environment)
    for i in range(SIM_DAYS):
        dc.get_daily_demand(i)

    order_processor = OrderProcessor()
    performance_tracker = PerformanceTracker()
    inventory_manager = InventoryManager(
        order_processor=order_processor,
        agent=BaseAgent(dc),  # placeholder; we override ROP manually
    )

    client = InferenceClient(token=hf_token or os.environ.get("HF_TOKEN"))
    convo_history = []
    memory_bank = []
    current_rop = dc.daily_demand_distribution[HISTO_DAYS].demand_mean * LEAD_TIME
    daily_inventory, running_fill_rate, rop_markers, daily_pnl = [], [], [], []
    total_demand, total_fulfilled = 0, 0
    decision_log = []

    for day in range(HISTO_DAYS, SIM_DAYS):
        demand_qty = dc.get_daily_demand(day)
        base_inv = inventory_manager.inventory

        inventory_manager.inventory_update(demand_qty)

        # Manual reorder using current_rop
        ordered_qty = 0
        if day < SIM_DAYS - LEAD_TIME and inventory_manager.inventory <= current_rop:
            hist = [dc.daily_demand_distribution[d].actual_demand
                    for d in range(max(0, day - 30), day)]
            mean_d = sum(hist) / len(hist) if hist else current_rop / LEAD_TIME
            qty = max(0, current_rop - inventory_manager.inventory + mean_d * LEAD_TIME)
            if qty > 0:
                order_processor.place_order(day, int(qty))
                ordered_qty = qty

        inventory_manager.process_deliveries(day)
        fulfilled = min(demand_qty, base_inv)
        daily_writeoff = inventory_manager.apply_writeoff(day)
        total_demand += demand_qty
        total_fulfilled += fulfilled
        performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff)
        daily_inventory.append(inventory_manager.inventory)
        fr = total_fulfilled / total_demand if total_demand > 0 else 0
        running_fill_rate.append(fr)

        lost = max(0, demand_qty - fulfilled)
        revenue = fulfilled * SELLING_PRICE
        holding_cost = inventory_manager.inventory * UNIT_COST * 0.005
        stockout_penalty = lost * (SELLING_PRICE - UNIT_COST)
        order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST
        writeoff_cost = daily_writeoff * UNIT_COST
        daily_pnl.append({
            "revenue": revenue,
            "holding_cost": holding_cost,
            "stockout_penalty": stockout_penalty,
            "order_cost": order_cost,
            "writeoff_cost": writeoff_cost,
            "daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost,
        })

        # LLM decision every DECISION_INTERVAL days
        if (day - HISTO_DAYS) % DECISION_INTERVAL == 0 and day < SIM_DAYS - LEAD_TIME:
            hist30 = [dc.daily_demand_distribution[d].actual_demand
                      for d in range(max(0, day - 30), day)]
            snapshot = {
                "day": day, "days_remaining": SIM_DAYS - day,
                "current_inventory": round(inventory_manager.inventory, 1),
                "demand_mean_30d": round(sum(hist30) / len(hist30), 1) if hist30 else 0,
                "fill_rate_so_far": f"{fr*100:.1f}%",
                "recent_stockouts": performance_tracker.stock_out_count,
                "lead_time": LEAD_TIME,
            }
            if memory_bank:
                snapshot["memory"] = memory_bank[-6:]

            user_msg = (
                f"Day {day}/{SIM_DAYS}\n{json.dumps(snapshot, indent=2)}\n\n"
                f"Set reorder_point for the next {DECISION_INTERVAL} days."
            )
            messages = [
                {"role": "system", "content": LLM_SYSTEM_PROMPT},
                *convo_history[-6:],
                {"role": "user", "content": user_msg},
            ]
            try:
                resp = client.chat.completions.create(
                    model="Qwen/Qwen2.5-72B-Instruct",
                    messages=messages,
                    max_tokens=600,
                )
                raw = resp.choices[0].message.content
                decision = _parse_decision(raw, current_rop)
                current_rop = max(0.0, decision["reorder_point"])
                convo_history = [*convo_history[-5:],
                                 {"role": "user", "content": user_msg},
                                 {"role": "assistant", "content": raw}]
                memory_bank = [*memory_bank[-7:], {
                    "day": day, "rop": round(current_rop, 1),
                    "fill_rate": f"{fr*100:.1f}%",
                    "confidence": decision.get("confidence", "?"),
                }]
                rop_markers.append((day, current_rop))
                conf = decision.get("confidence", "?")
                analysis = decision.get("state_analysis", "")[:80]
                decision_log.append(
                    f"**Day {day}** | ROP={current_rop:.0f} | Fill={fr*100:.1f}% "
                    f"| [{conf}] {analysis}"
                )
            except Exception as e:
                decision_log.append(f"**Day {day}** | API error: {str(e)[:60]}")

            # Yield live update
            fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
                              f"Qwen2.5-72B  |  {env_name}  |  Day {day}/{SIM_DAYS}", daily_pnl)
            summary = performance_tracker.performance_summary()
            total_profit = sum(d["daily_profit"] for d in daily_pnl)
            days_elapsed = len(daily_pnl)
            service_level = (days_elapsed - summary['stock_out_count']) / days_elapsed if days_elapsed > 0 else 0.0
            metrics = (
                f"**Total Profit:** ${total_profit:,.0f}  \n"
                f"**Service Level:** {service_level:.2%}  \n"
                f"**Fill Rate:** {summary['fill_rate']:.2%}  \n"
                f"**Stockouts:** {summary['stock_out_count']}  \n"
                f"**Lost Sales:** {summary['total_lost_sales']:.0f}  \n"
                f"**Write-offs:** {summary['write_offs']:.0f}  \n"
                f"**Decisions:** {len(decision_log)}"
            )
            log_md = "\n\n".join(decision_log[-20:])
            yield fig, metrics, log_md

    # Final yield
    fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
                      f"Qwen2.5-72B  |  {env_name}  |  COMPLETE", daily_pnl)
    summary = performance_tracker.performance_summary()
    total_profit = sum(d["daily_profit"] for d in daily_pnl)
    days_elapsed = len(daily_pnl)
    service_level = (days_elapsed - summary['stock_out_count']) / days_elapsed if days_elapsed > 0 else 0.0
    metrics = (
        f"**Total Profit:** ${total_profit:,.0f}  \n"
        f"**Service Level:** {service_level:.2%}  \n"
        f"**Fill Rate:** {summary['fill_rate']:.2%}  \n"
        f"**Stockouts:** {summary['stock_out_count']}  \n"
        f"**Lost Sales:** {summary['total_lost_sales']:.0f}  \n"
        f"**Write-offs:** {summary['write_offs']:.0f}  \n"
        f"**Decisions:** {len(decision_log)}"
    )
    yield fig, metrics, "\n\n".join(decision_log)


# ── UI ─────────────────────────────────────────────────────────────────────────

with gr.Blocks(title="Inventory Simulation") as demo:
    gr.Markdown("# Inventory Optimization: Agent Comparison")

    with gr.Tabs():

        with gr.Tab("Baseline Agents"):
            gr.Markdown("Run one of the 4 rule-based agents through a full 365-day simulation.")
            with gr.Row():
                agent_dd = gr.Dropdown(
                    choices=["Base (Historical Mean)", "Safety Stock", "Forecast", "Monte Carlo"],
                    value="Safety Stock", label="Agent",
                )
                env_dd = gr.Dropdown(
                    choices=list(ENV_MAP.keys()),
                    value="GammaPoisson (90/10 mixture)", label="Demand Environment",
                )
            run_btn = gr.Button("Run Simulation", variant="primary")
            with gr.Row():
                chart = gr.Plot(label="Results")
                metrics_md = gr.Markdown(label="Metrics")
            run_btn.click(run_simulation, inputs=[agent_dd, env_dd], outputs=[chart, metrics_md])

        with gr.Tab("LLM Agent — Live"):
            gr.Markdown(
                "Qwen2.5-72B makes a reorder decision every 5 days. "
                "Chart and log update in real-time as the simulation runs."
            )
            with gr.Row():
                llm_env_dd = gr.Dropdown(
                    choices=list(ENV_MAP.keys()),
                    value="GammaPoisson (90/10 mixture)", label="Demand Environment",
                )
                hf_token_box = gr.Textbox(
                    label="HF Token (optional if HF_TOKEN env var is set)",
                    type="password", placeholder="hf_...",
                )
            llm_run_btn = gr.Button("Run LLM Simulation", variant="primary")
            with gr.Row():
                llm_chart = gr.Plot(label="Live Simulation")
                with gr.Column():
                    llm_metrics = gr.Markdown(label="Metrics")
                    llm_log = gr.Markdown(label="Decision Log")
            llm_run_btn.click(
                run_llm_simulation,
                inputs=[llm_env_dd, hf_token_box],
                outputs=[llm_chart, llm_metrics, llm_log],
            )

demo.launch(server_name=os.environ.get("GRADIO_SERVER_NAME", "127.0.0.1"))