Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| import re | |
| import gradio as gr | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from huggingface_hub import InferenceClient | |
| from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME, UNIT_COST, SELLING_PRICE, FIXED_ORDER_COST, WRITE_OFF_RATE | |
| from agent_environment import BaseAgent, SafetyStockAgent, ForecastAgent, MonteCarloAgent | |
| from demand_environment import GammaPoisson, GammaGammaHighVariance, SpikingDemand, SingleGammaLowVariance | |
| from demand_calculator import DemandCalculator | |
| from order_processor import OrderProcessor | |
| from inventory_manager import InventoryManager | |
| from performance_tracker import PerformanceTracker | |
| ENV_MAP = { | |
| "GammaPoisson (90/10 mixture)": GammaPoisson, | |
| "GammaGamma High Variance (bimodal)": GammaGammaHighVariance, | |
| "Spiking Demand": SpikingDemand, | |
| "Single Gamma Low Variance": SingleGammaLowVariance, | |
| } | |
| DECISION_INTERVAL = 5 | |
| LLM_SYSTEM_PROMPT = """You are an expert inventory optimization agent in a stochastic simulation. | |
| Decide the REORDER POINT (ROP) β the inventory threshold that triggers a new order. | |
| RULES: | |
| - Orders arrive LEAD_TIME=3 days after placement | |
| - Every 7 days, 1% of inventory is written off | |
| - Goal: fill rate >= 95% at end of episode | |
| OUTPUT β respond with this exact JSON (no markdown fences): | |
| { | |
| "subgoals": ["subgoal 1", "subgoal 2"], | |
| "state_analysis": "2-3 sentence analysis", | |
| "recovery_plan": "recovery strategy if fill rate < 95%", | |
| "reorder_point": <number>, | |
| "confidence": "high|medium|low" | |
| }""" | |
| # ββ Shared chart builder βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_chart(daily_inventory, running_fill_rate, rop_markers, title, daily_pnl=None): | |
| n_rows = 3 if daily_pnl else 2 | |
| fig, axes = plt.subplots(n_rows, 1, figsize=(10, 4 + 2.5 * n_rows), sharex=True) | |
| ax1, ax2 = axes[0], axes[1] | |
| days = list(range(len(daily_inventory))) | |
| ax1.plot(days, daily_inventory, color="steelblue", linewidth=0.8) | |
| if rop_markers: | |
| rop_days, rop_vals = zip(*rop_markers) | |
| ax1.scatter([d - HISTO_DAYS for d in rop_days], rop_vals, | |
| color="orange", s=20, zorder=5, label="ROP set") | |
| ax1.legend(fontsize=8) | |
| ax1.set_ylabel("Inventory Level") | |
| ax1.set_title(title) | |
| ax2.plot(days, running_fill_rate, color="seagreen", linewidth=0.8) | |
| ax2.axhline(y=0.95, color="red", linestyle="--", linewidth=0.6, label="95% target") | |
| ax2.set_ylabel("Cumulative Fill Rate") | |
| ax2.set_ylim(0, 1) | |
| ax2.legend(fontsize=8) | |
| if daily_pnl: | |
| ax3 = axes[2] | |
| revenues = [r["revenue"] for r in daily_pnl] | |
| holding_costs = [r["holding_cost"] for r in daily_pnl] | |
| stockout_pens = [r["stockout_penalty"] for r in daily_pnl] | |
| order_costs = [r["order_cost"] for r in daily_pnl] | |
| writeoff_costs = [r["writeoff_cost"] for r in daily_pnl] | |
| net_profits = [r["daily_profit"] for r in daily_pnl] | |
| ax3.fill_between(days, revenues, alpha=0.25, color="green", label="Revenue") | |
| ax3.plot(days, net_profits, color="black", linewidth=0.9, label="Net profit") | |
| ax3.fill_between(days, [-h for h in holding_costs], alpha=0.3, color="royalblue", label="Holding cost") | |
| ax3.fill_between(days, [-s for s in stockout_pens], alpha=0.3, color="crimson", label="Stockout penalty") | |
| ax3.fill_between(days, [-o for o in order_costs], alpha=0.25, color="darkorange", label="Order cost") | |
| ax3.fill_between(days, [-w for w in writeoff_costs], alpha=0.25, color="purple", label="Write-off cost") | |
| ax3.axhline(y=0, color="grey", linewidth=0.5) | |
| ax3.set_ylabel("Daily P&L ($)") | |
| ax3.set_xlabel("Evaluation Day") | |
| ax3.legend(fontsize=7, ncol=3) | |
| else: | |
| ax2.set_xlabel("Evaluation Day") | |
| plt.tight_layout() | |
| return fig | |
| # ββ Tab 1: Baseline agents βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_simulation(agent_name, env_name): | |
| env_class = ENV_MAP[env_name] | |
| environment = env_class(SIM_DAYS) | |
| dc = DemandCalculator(SIM_DAYS) | |
| dc.set_environment(environment) | |
| for i in range(SIM_DAYS): | |
| dc.get_daily_demand(i) | |
| demand_mean = [d.demand_mean for d in dc.daily_demand_distribution] | |
| demand_std = [d.demand_std for d in dc.daily_demand_distribution] | |
| agent_map = { | |
| "Base (Historical Mean)": BaseAgent(dc), | |
| "Safety Stock": SafetyStockAgent(dc), | |
| "Forecast": ForecastAgent(dc, demand_mean, demand_std), | |
| "Monte Carlo": MonteCarloAgent(dc), | |
| } | |
| agent = agent_map[agent_name] | |
| order_processor = OrderProcessor() | |
| performance_tracker = PerformanceTracker() | |
| inventory_manager = InventoryManager(order_processor=order_processor, agent=agent) | |
| daily_inventory, running_fill_rate, daily_pnl = [], [], [] | |
| total_demand, total_fulfilled = 0, 0 | |
| for day in range(HISTO_DAYS, SIM_DAYS): | |
| demand_qty = dc.get_daily_demand(day) | |
| base_inv = inventory_manager.inventory | |
| inventory_manager.inventory_update(demand_qty) | |
| q_before = len(order_processor.order_queue) | |
| if day < SIM_DAYS - LEAD_TIME: | |
| inventory_manager.reorder(day) | |
| new_orders = order_processor.order_queue[q_before:] | |
| ordered_qty = sum(o.quantity for o in new_orders) | |
| inventory_manager.process_deliveries(day) | |
| fulfilled = min(demand_qty, base_inv) | |
| daily_writeoff = inventory_manager.apply_writeoff(day) | |
| total_demand += demand_qty | |
| total_fulfilled += fulfilled | |
| performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff) | |
| daily_inventory.append(inventory_manager.inventory) | |
| running_fill_rate.append(total_fulfilled / total_demand if total_demand > 0 else 0) | |
| lost = max(0, demand_qty - fulfilled) | |
| revenue = fulfilled * SELLING_PRICE | |
| holding_cost = inventory_manager.inventory * UNIT_COST * 0.005 | |
| stockout_penalty = lost * (SELLING_PRICE - UNIT_COST) | |
| order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST | |
| writeoff_cost = daily_writeoff * UNIT_COST | |
| daily_pnl.append({ | |
| "revenue": revenue, | |
| "holding_cost": holding_cost, | |
| "stockout_penalty": stockout_penalty, | |
| "order_cost": order_cost, | |
| "writeoff_cost": writeoff_cost, | |
| "daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost, | |
| }) | |
| summary = performance_tracker.performance_summary() | |
| total_profit = sum(d["daily_profit"] for d in daily_pnl) | |
| days_elapsed = len(daily_pnl) | |
| service_level = (days_elapsed - summary['stock_out_count']) / days_elapsed if days_elapsed > 0 else 0.0 | |
| fig = build_chart(daily_inventory, running_fill_rate, [], f"{agent_name} | {env_name}", daily_pnl) | |
| metrics = ( | |
| f"**Total Profit:** ${total_profit:,.0f} \n" | |
| f"**Service Level:** {service_level:.2%} \n" | |
| f"**Fill Rate:** {summary['fill_rate']:.2%} \n" | |
| f"**Stockouts:** {summary['stock_out_count']} \n" | |
| f"**Lost Sales:** {summary['total_lost_sales']:.0f} \n" | |
| f"**Write-offs:** {summary['write_offs']:.0f}" | |
| ) | |
| return fig, metrics | |
| # ββ Tab 2: LLM agent (live) ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _parse_decision(raw: str, fallback_rop: float) -> dict: | |
| try: | |
| cleaned = re.sub(r"```json|```", "", raw).strip() | |
| return json.loads(cleaned) | |
| except (json.JSONDecodeError, ValueError): | |
| match = re.search(r'"reorder_point"\s*:\s*(\d+\.?\d*)', raw) | |
| return { | |
| "subgoals": ["parse error"], | |
| "state_analysis": raw[:150], | |
| "recovery_plan": "N/A", | |
| "reorder_point": float(match.group(1)) if match else fallback_rop, | |
| "confidence": "low", | |
| } | |
| def run_llm_simulation(env_name, hf_token): | |
| env_class = ENV_MAP[env_name] | |
| environment = env_class(SIM_DAYS) | |
| dc = DemandCalculator(SIM_DAYS) | |
| dc.set_environment(environment) | |
| for i in range(SIM_DAYS): | |
| dc.get_daily_demand(i) | |
| order_processor = OrderProcessor() | |
| performance_tracker = PerformanceTracker() | |
| inventory_manager = InventoryManager( | |
| order_processor=order_processor, | |
| agent=BaseAgent(dc), # placeholder; we override ROP manually | |
| ) | |
| client = InferenceClient(token=hf_token or os.environ.get("HF_TOKEN")) | |
| convo_history = [] | |
| memory_bank = [] | |
| current_rop = dc.daily_demand_distribution[HISTO_DAYS].demand_mean * LEAD_TIME | |
| daily_inventory, running_fill_rate, rop_markers, daily_pnl = [], [], [], [] | |
| total_demand, total_fulfilled = 0, 0 | |
| decision_log = [] | |
| for day in range(HISTO_DAYS, SIM_DAYS): | |
| demand_qty = dc.get_daily_demand(day) | |
| base_inv = inventory_manager.inventory | |
| inventory_manager.inventory_update(demand_qty) | |
| # Manual reorder using current_rop | |
| ordered_qty = 0 | |
| if day < SIM_DAYS - LEAD_TIME and inventory_manager.inventory <= current_rop: | |
| hist = [dc.daily_demand_distribution[d].actual_demand | |
| for d in range(max(0, day - 30), day)] | |
| mean_d = sum(hist) / len(hist) if hist else current_rop / LEAD_TIME | |
| qty = max(0, current_rop - inventory_manager.inventory + mean_d * LEAD_TIME) | |
| if qty > 0: | |
| order_processor.place_order(day, int(qty)) | |
| ordered_qty = qty | |
| inventory_manager.process_deliveries(day) | |
| fulfilled = min(demand_qty, base_inv) | |
| daily_writeoff = inventory_manager.apply_writeoff(day) | |
| total_demand += demand_qty | |
| total_fulfilled += fulfilled | |
| performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff) | |
| daily_inventory.append(inventory_manager.inventory) | |
| fr = total_fulfilled / total_demand if total_demand > 0 else 0 | |
| running_fill_rate.append(fr) | |
| lost = max(0, demand_qty - fulfilled) | |
| revenue = fulfilled * SELLING_PRICE | |
| holding_cost = inventory_manager.inventory * UNIT_COST * 0.005 | |
| stockout_penalty = lost * (SELLING_PRICE - UNIT_COST) | |
| order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST | |
| writeoff_cost = daily_writeoff * UNIT_COST | |
| daily_pnl.append({ | |
| "revenue": revenue, | |
| "holding_cost": holding_cost, | |
| "stockout_penalty": stockout_penalty, | |
| "order_cost": order_cost, | |
| "writeoff_cost": writeoff_cost, | |
| "daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost, | |
| }) | |
| # LLM decision every DECISION_INTERVAL days | |
| if (day - HISTO_DAYS) % DECISION_INTERVAL == 0 and day < SIM_DAYS - LEAD_TIME: | |
| hist30 = [dc.daily_demand_distribution[d].actual_demand | |
| for d in range(max(0, day - 30), day)] | |
| snapshot = { | |
| "day": day, "days_remaining": SIM_DAYS - day, | |
| "current_inventory": round(inventory_manager.inventory, 1), | |
| "demand_mean_30d": round(sum(hist30) / len(hist30), 1) if hist30 else 0, | |
| "fill_rate_so_far": f"{fr*100:.1f}%", | |
| "recent_stockouts": performance_tracker.stock_out_count, | |
| "lead_time": LEAD_TIME, | |
| } | |
| if memory_bank: | |
| snapshot["memory"] = memory_bank[-6:] | |
| user_msg = ( | |
| f"Day {day}/{SIM_DAYS}\n{json.dumps(snapshot, indent=2)}\n\n" | |
| f"Set reorder_point for the next {DECISION_INTERVAL} days." | |
| ) | |
| messages = [ | |
| {"role": "system", "content": LLM_SYSTEM_PROMPT}, | |
| *convo_history[-6:], | |
| {"role": "user", "content": user_msg}, | |
| ] | |
| try: | |
| resp = client.chat.completions.create( | |
| model="Qwen/Qwen2.5-72B-Instruct", | |
| messages=messages, | |
| max_tokens=600, | |
| ) | |
| raw = resp.choices[0].message.content | |
| decision = _parse_decision(raw, current_rop) | |
| current_rop = max(0.0, decision["reorder_point"]) | |
| convo_history = [*convo_history[-5:], | |
| {"role": "user", "content": user_msg}, | |
| {"role": "assistant", "content": raw}] | |
| memory_bank = [*memory_bank[-7:], { | |
| "day": day, "rop": round(current_rop, 1), | |
| "fill_rate": f"{fr*100:.1f}%", | |
| "confidence": decision.get("confidence", "?"), | |
| }] | |
| rop_markers.append((day, current_rop)) | |
| conf = decision.get("confidence", "?") | |
| analysis = decision.get("state_analysis", "")[:80] | |
| decision_log.append( | |
| f"**Day {day}** | ROP={current_rop:.0f} | Fill={fr*100:.1f}% " | |
| f"| [{conf}] {analysis}" | |
| ) | |
| except Exception as e: | |
| decision_log.append(f"**Day {day}** | API error: {str(e)[:60]}") | |
| # Yield live update | |
| fig = build_chart(daily_inventory, running_fill_rate, rop_markers, | |
| f"Qwen2.5-72B | {env_name} | Day {day}/{SIM_DAYS}", daily_pnl) | |
| summary = performance_tracker.performance_summary() | |
| total_profit = sum(d["daily_profit"] for d in daily_pnl) | |
| days_elapsed = len(daily_pnl) | |
| service_level = (days_elapsed - summary['stock_out_count']) / days_elapsed if days_elapsed > 0 else 0.0 | |
| metrics = ( | |
| f"**Total Profit:** ${total_profit:,.0f} \n" | |
| f"**Service Level:** {service_level:.2%} \n" | |
| f"**Fill Rate:** {summary['fill_rate']:.2%} \n" | |
| f"**Stockouts:** {summary['stock_out_count']} \n" | |
| f"**Lost Sales:** {summary['total_lost_sales']:.0f} \n" | |
| f"**Write-offs:** {summary['write_offs']:.0f} \n" | |
| f"**Decisions:** {len(decision_log)}" | |
| ) | |
| log_md = "\n\n".join(decision_log[-20:]) | |
| yield fig, metrics, log_md | |
| # Final yield | |
| fig = build_chart(daily_inventory, running_fill_rate, rop_markers, | |
| f"Qwen2.5-72B | {env_name} | COMPLETE", daily_pnl) | |
| summary = performance_tracker.performance_summary() | |
| total_profit = sum(d["daily_profit"] for d in daily_pnl) | |
| days_elapsed = len(daily_pnl) | |
| service_level = (days_elapsed - summary['stock_out_count']) / days_elapsed if days_elapsed > 0 else 0.0 | |
| metrics = ( | |
| f"**Total Profit:** ${total_profit:,.0f} \n" | |
| f"**Service Level:** {service_level:.2%} \n" | |
| f"**Fill Rate:** {summary['fill_rate']:.2%} \n" | |
| f"**Stockouts:** {summary['stock_out_count']} \n" | |
| f"**Lost Sales:** {summary['total_lost_sales']:.0f} \n" | |
| f"**Write-offs:** {summary['write_offs']:.0f} \n" | |
| f"**Decisions:** {len(decision_log)}" | |
| ) | |
| yield fig, metrics, "\n\n".join(decision_log) | |
| # ββ UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(title="Inventory Simulation") as demo: | |
| gr.Markdown("# Inventory Optimization: Agent Comparison") | |
| with gr.Tabs(): | |
| with gr.Tab("Baseline Agents"): | |
| gr.Markdown("Run one of the 4 rule-based agents through a full 365-day simulation.") | |
| with gr.Row(): | |
| agent_dd = gr.Dropdown( | |
| choices=["Base (Historical Mean)", "Safety Stock", "Forecast", "Monte Carlo"], | |
| value="Safety Stock", label="Agent", | |
| ) | |
| env_dd = gr.Dropdown( | |
| choices=list(ENV_MAP.keys()), | |
| value="GammaPoisson (90/10 mixture)", label="Demand Environment", | |
| ) | |
| run_btn = gr.Button("Run Simulation", variant="primary") | |
| with gr.Row(): | |
| chart = gr.Plot(label="Results") | |
| metrics_md = gr.Markdown(label="Metrics") | |
| run_btn.click(run_simulation, inputs=[agent_dd, env_dd], outputs=[chart, metrics_md]) | |
| with gr.Tab("LLM Agent β Live"): | |
| gr.Markdown( | |
| "Qwen2.5-72B makes a reorder decision every 5 days. " | |
| "Chart and log update in real-time as the simulation runs." | |
| ) | |
| with gr.Row(): | |
| llm_env_dd = gr.Dropdown( | |
| choices=list(ENV_MAP.keys()), | |
| value="GammaPoisson (90/10 mixture)", label="Demand Environment", | |
| ) | |
| hf_token_box = gr.Textbox( | |
| label="HF Token (optional if HF_TOKEN env var is set)", | |
| type="password", placeholder="hf_...", | |
| ) | |
| llm_run_btn = gr.Button("Run LLM Simulation", variant="primary") | |
| with gr.Row(): | |
| llm_chart = gr.Plot(label="Live Simulation") | |
| with gr.Column(): | |
| llm_metrics = gr.Markdown(label="Metrics") | |
| llm_log = gr.Markdown(label="Decision Log") | |
| llm_run_btn.click( | |
| run_llm_simulation, | |
| inputs=[llm_env_dd, hf_token_box], | |
| outputs=[llm_chart, llm_metrics, llm_log], | |
| ) | |
| demo.launch(server_name=os.environ.get("GRADIO_SERVER_NAME", "127.0.0.1")) | |