Spaces:

ademarteau
/

RL-Inventory-Simulations

Runtime error

ademarteau

metrics: profit first, then service level, then fill rate

39193b5 3 days ago

18 kB

	import json
	import os
	import re

	import gradio as gr
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import numpy as np
	from huggingface_hub import InferenceClient

	from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME, UNIT_COST, SELLING_PRICE, FIXED_ORDER_COST, WRITE_OFF_RATE
	from agent_environment import BaseAgent, SafetyStockAgent, ForecastAgent, MonteCarloAgent
	from demand_environment import GammaPoisson, GammaGammaHighVariance, SpikingDemand, SingleGammaLowVariance
	from demand_calculator import DemandCalculator
	from order_processor import OrderProcessor
	from inventory_manager import InventoryManager
	from performance_tracker import PerformanceTracker

	ENV_MAP = {
	"GammaPoisson (90/10 mixture)": GammaPoisson,
	"GammaGamma High Variance (bimodal)": GammaGammaHighVariance,
	"Spiking Demand": SpikingDemand,
	"Single Gamma Low Variance": SingleGammaLowVariance,
	}

	DECISION_INTERVAL = 5

	LLM_SYSTEM_PROMPT = """You are an expert inventory optimization agent in a stochastic simulation.

	Decide the REORDER POINT (ROP) — the inventory threshold that triggers a new order.

	RULES:
	- Orders arrive LEAD_TIME=3 days after placement
	- Every 7 days, 1% of inventory is written off
	- Goal: fill rate >= 95% at end of episode

	OUTPUT — respond with this exact JSON (no markdown fences):
	{
	"subgoals": ["subgoal 1", "subgoal 2"],
	"state_analysis": "2-3 sentence analysis",
	"recovery_plan": "recovery strategy if fill rate < 95%",
	"reorder_point": <number>,
	"confidence": "high\|medium\|low"
	}"""


	# ── Shared chart builder ───────────────────────────────────────────────────────

	def build_chart(daily_inventory, running_fill_rate, rop_markers, title, daily_pnl=None):
	n_rows = 3 if daily_pnl else 2
	fig, axes = plt.subplots(n_rows, 1, figsize=(10, 4 + 2.5 * n_rows), sharex=True)
	ax1, ax2 = axes[0], axes[1]
	days = list(range(len(daily_inventory)))

	ax1.plot(days, daily_inventory, color="steelblue", linewidth=0.8)
	if rop_markers:
	rop_days, rop_vals = zip(*rop_markers)
	ax1.scatter([d - HISTO_DAYS for d in rop_days], rop_vals,
	color="orange", s=20, zorder=5, label="ROP set")
	ax1.legend(fontsize=8)
	ax1.set_ylabel("Inventory Level")
	ax1.set_title(title)

	ax2.plot(days, running_fill_rate, color="seagreen", linewidth=0.8)
	ax2.axhline(y=0.95, color="red", linestyle="--", linewidth=0.6, label="95% target")
	ax2.set_ylabel("Cumulative Fill Rate")
	ax2.set_ylim(0, 1)
	ax2.legend(fontsize=8)

	if daily_pnl:
	ax3 = axes[2]
	revenues = [r["revenue"] for r in daily_pnl]
	holding_costs = [r["holding_cost"] for r in daily_pnl]
	stockout_pens = [r["stockout_penalty"] for r in daily_pnl]
	order_costs = [r["order_cost"] for r in daily_pnl]
	writeoff_costs = [r["writeoff_cost"] for r in daily_pnl]
	net_profits = [r["daily_profit"] for r in daily_pnl]

	ax3.fill_between(days, revenues, alpha=0.25, color="green", label="Revenue")
	ax3.plot(days, net_profits, color="black", linewidth=0.9, label="Net profit")
	ax3.fill_between(days, [-h for h in holding_costs], alpha=0.3, color="royalblue", label="Holding cost")
	ax3.fill_between(days, [-s for s in stockout_pens], alpha=0.3, color="crimson", label="Stockout penalty")
	ax3.fill_between(days, [-o for o in order_costs], alpha=0.25, color="darkorange", label="Order cost")
	ax3.fill_between(days, [-w for w in writeoff_costs], alpha=0.25, color="purple", label="Write-off cost")
	ax3.axhline(y=0, color="grey", linewidth=0.5)
	ax3.set_ylabel("Daily P&L ($)")
	ax3.set_xlabel("Evaluation Day")
	ax3.legend(fontsize=7, ncol=3)
	else:
	ax2.set_xlabel("Evaluation Day")

	plt.tight_layout()
	return fig


	# ── Tab 1: Baseline agents ─────────────────────────────────────────────────────

	def run_simulation(agent_name, env_name):
	env_class = ENV_MAP[env_name]
	environment = env_class(SIM_DAYS)
	dc = DemandCalculator(SIM_DAYS)
	dc.set_environment(environment)
	for i in range(SIM_DAYS):
	dc.get_daily_demand(i)
	demand_mean = [d.demand_mean for d in dc.daily_demand_distribution]
	demand_std = [d.demand_std for d in dc.daily_demand_distribution]
	agent_map = {
	"Base (Historical Mean)": BaseAgent(dc),
	"Safety Stock": SafetyStockAgent(dc),
	"Forecast": ForecastAgent(dc, demand_mean, demand_std),
	"Monte Carlo": MonteCarloAgent(dc),
	}
	agent = agent_map[agent_name]
	order_processor = OrderProcessor()
	performance_tracker = PerformanceTracker()
	inventory_manager = InventoryManager(order_processor=order_processor, agent=agent)
	daily_inventory, running_fill_rate, daily_pnl = [], [], []
	total_demand, total_fulfilled = 0, 0
	for day in range(HISTO_DAYS, SIM_DAYS):
	demand_qty = dc.get_daily_demand(day)
	base_inv = inventory_manager.inventory
	inventory_manager.inventory_update(demand_qty)
	q_before = len(order_processor.order_queue)
	if day < SIM_DAYS - LEAD_TIME:
	inventory_manager.reorder(day)
	new_orders = order_processor.order_queue[q_before:]
	ordered_qty = sum(o.quantity for o in new_orders)
	inventory_manager.process_deliveries(day)
	fulfilled = min(demand_qty, base_inv)
	daily_writeoff = inventory_manager.apply_writeoff(day)
	total_demand += demand_qty
	total_fulfilled += fulfilled
	performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff)
	daily_inventory.append(inventory_manager.inventory)
	running_fill_rate.append(total_fulfilled / total_demand if total_demand > 0 else 0)

	lost = max(0, demand_qty - fulfilled)
	revenue = fulfilled * SELLING_PRICE
	holding_cost = inventory_manager.inventory * UNIT_COST * 0.005
	stockout_penalty = lost * (SELLING_PRICE - UNIT_COST)
	order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST
	writeoff_cost = daily_writeoff * UNIT_COST
	daily_pnl.append({
	"revenue": revenue,
	"holding_cost": holding_cost,
	"stockout_penalty": stockout_penalty,
	"order_cost": order_cost,
	"writeoff_cost": writeoff_cost,
	"daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost,
	})

	summary = performance_tracker.performance_summary()
	total_profit = sum(d["daily_profit"] for d in daily_pnl)
	days_elapsed = len(daily_pnl)
	service_level = (days_elapsed - summary['stock_out_count']) / days_elapsed if days_elapsed > 0 else 0.0
	fig = build_chart(daily_inventory, running_fill_rate, [], f"{agent_name} \| {env_name}", daily_pnl)
	metrics = (
	f"Total Profit: ${total_profit:,.0f} \n"
	f"Service Level: {service_level:.2%} \n"
	f"Fill Rate: {summary['fill_rate']:.2%} \n"
	f"Stockouts: {summary['stock_out_count']} \n"
	f"Lost Sales: {summary['total_lost_sales']:.0f} \n"
	f"Write-offs: {summary['write_offs']:.0f}"
	)
	return fig, metrics


	# ── Tab 2: LLM agent (live) ────────────────────────────────────────────────────

	def _parse_decision(raw: str, fallback_rop: float) -> dict:
	try:
	cleaned = re.sub(r"```json\|```", "", raw).strip()
	return json.loads(cleaned)
	except (json.JSONDecodeError, ValueError):
	match = re.search(r'"reorder_point"\s:\s(\d+\.?\d*)', raw)
	return {
	"subgoals": ["parse error"],
	"state_analysis": raw[:150],
	"recovery_plan": "N/A",
	"reorder_point": float(match.group(1)) if match else fallback_rop,
	"confidence": "low",
	}


	def run_llm_simulation(env_name, hf_token):
	env_class = ENV_MAP[env_name]
	environment = env_class(SIM_DAYS)
	dc = DemandCalculator(SIM_DAYS)
	dc.set_environment(environment)
	for i in range(SIM_DAYS):
	dc.get_daily_demand(i)

	order_processor = OrderProcessor()
	performance_tracker = PerformanceTracker()
	inventory_manager = InventoryManager(
	order_processor=order_processor,
	agent=BaseAgent(dc), # placeholder; we override ROP manually
	)

	client = InferenceClient(token=hf_token or os.environ.get("HF_TOKEN"))
	convo_history = []
	memory_bank = []
	current_rop = dc.daily_demand_distribution[HISTO_DAYS].demand_mean * LEAD_TIME
	daily_inventory, running_fill_rate, rop_markers, daily_pnl = [], [], [], []
	total_demand, total_fulfilled = 0, 0
	decision_log = []

	for day in range(HISTO_DAYS, SIM_DAYS):
	demand_qty = dc.get_daily_demand(day)
	base_inv = inventory_manager.inventory

	inventory_manager.inventory_update(demand_qty)

	# Manual reorder using current_rop
	ordered_qty = 0
	if day < SIM_DAYS - LEAD_TIME and inventory_manager.inventory <= current_rop:
	hist = [dc.daily_demand_distribution[d].actual_demand
	for d in range(max(0, day - 30), day)]
	mean_d = sum(hist) / len(hist) if hist else current_rop / LEAD_TIME
	qty = max(0, current_rop - inventory_manager.inventory + mean_d * LEAD_TIME)
	if qty > 0:
	order_processor.place_order(day, int(qty))
	ordered_qty = qty

	inventory_manager.process_deliveries(day)
	fulfilled = min(demand_qty, base_inv)
	daily_writeoff = inventory_manager.apply_writeoff(day)
	total_demand += demand_qty
	total_fulfilled += fulfilled
	performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff)
	daily_inventory.append(inventory_manager.inventory)
	fr = total_fulfilled / total_demand if total_demand > 0 else 0
	running_fill_rate.append(fr)

	lost = max(0, demand_qty - fulfilled)
	revenue = fulfilled * SELLING_PRICE
	holding_cost = inventory_manager.inventory * UNIT_COST * 0.005
	stockout_penalty = lost * (SELLING_PRICE - UNIT_COST)
	order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST
	writeoff_cost = daily_writeoff * UNIT_COST
	daily_pnl.append({
	"revenue": revenue,
	"holding_cost": holding_cost,
	"stockout_penalty": stockout_penalty,
	"order_cost": order_cost,
	"writeoff_cost": writeoff_cost,
	"daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost,
	})

	# LLM decision every DECISION_INTERVAL days
	if (day - HISTO_DAYS) % DECISION_INTERVAL == 0 and day < SIM_DAYS - LEAD_TIME:
	hist30 = [dc.daily_demand_distribution[d].actual_demand
	for d in range(max(0, day - 30), day)]
	snapshot = {
	"day": day, "days_remaining": SIM_DAYS - day,
	"current_inventory": round(inventory_manager.inventory, 1),
	"demand_mean_30d": round(sum(hist30) / len(hist30), 1) if hist30 else 0,
	"fill_rate_so_far": f"{fr*100:.1f}%",
	"recent_stockouts": performance_tracker.stock_out_count,
	"lead_time": LEAD_TIME,
	}
	if memory_bank:
	snapshot["memory"] = memory_bank[-6:]

	user_msg = (
	f"Day {day}/{SIM_DAYS}\n{json.dumps(snapshot, indent=2)}\n\n"
	f"Set reorder_point for the next {DECISION_INTERVAL} days."
	)
	messages = [
	{"role": "system", "content": LLM_SYSTEM_PROMPT},
	*convo_history[-6:],
	{"role": "user", "content": user_msg},
	]
	try:
	resp = client.chat.completions.create(
	model="Qwen/Qwen2.5-72B-Instruct",
	messages=messages,
	max_tokens=600,
	)
	raw = resp.choices[0].message.content
	decision = _parse_decision(raw, current_rop)
	current_rop = max(0.0, decision["reorder_point"])
	convo_history = [*convo_history[-5:],
	{"role": "user", "content": user_msg},
	{"role": "assistant", "content": raw}]
	memory_bank = [*memory_bank[-7:], {
	"day": day, "rop": round(current_rop, 1),
	"fill_rate": f"{fr*100:.1f}%",
	"confidence": decision.get("confidence", "?"),
	}]
	rop_markers.append((day, current_rop))
	conf = decision.get("confidence", "?")
	analysis = decision.get("state_analysis", "")[:80]
	decision_log.append(
	f"Day {day} \| ROP={current_rop:.0f} \| Fill={fr*100:.1f}% "
	f"\| [{conf}] {analysis}"
	)
	except Exception as e:
	decision_log.append(f"Day {day} \| API error: {str(e)[:60]}")

	# Yield live update
	fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
	f"Qwen2.5-72B \| {env_name} \| Day {day}/{SIM_DAYS}", daily_pnl)
	summary = performance_tracker.performance_summary()
	total_profit = sum(d["daily_profit"] for d in daily_pnl)
	days_elapsed = len(daily_pnl)
	service_level = (days_elapsed - summary['stock_out_count']) / days_elapsed if days_elapsed > 0 else 0.0
	metrics = (
	f"Total Profit: ${total_profit:,.0f} \n"
	f"Service Level: {service_level:.2%} \n"
	f"Fill Rate: {summary['fill_rate']:.2%} \n"
	f"Stockouts: {summary['stock_out_count']} \n"
	f"Lost Sales: {summary['total_lost_sales']:.0f} \n"
	f"Write-offs: {summary['write_offs']:.0f} \n"
	f"Decisions: {len(decision_log)}"
	)
	log_md = "\n\n".join(decision_log[-20:])
	yield fig, metrics, log_md

	# Final yield
	fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
	f"Qwen2.5-72B \| {env_name} \| COMPLETE", daily_pnl)
	summary = performance_tracker.performance_summary()
	total_profit = sum(d["daily_profit"] for d in daily_pnl)
	days_elapsed = len(daily_pnl)
	service_level = (days_elapsed - summary['stock_out_count']) / days_elapsed if days_elapsed > 0 else 0.0
	metrics = (
	f"Total Profit: ${total_profit:,.0f} \n"
	f"Service Level: {service_level:.2%} \n"
	f"Fill Rate: {summary['fill_rate']:.2%} \n"
	f"Stockouts: {summary['stock_out_count']} \n"
	f"Lost Sales: {summary['total_lost_sales']:.0f} \n"
	f"Write-offs: {summary['write_offs']:.0f} \n"
	f"Decisions: {len(decision_log)}"
	)
	yield fig, metrics, "\n\n".join(decision_log)


	# ── UI ─────────────────────────────────────────────────────────────────────────

	with gr.Blocks(title="Inventory Simulation") as demo:
	gr.Markdown("# Inventory Optimization: Agent Comparison")

	with gr.Tabs():

	with gr.Tab("Baseline Agents"):
	gr.Markdown("Run one of the 4 rule-based agents through a full 365-day simulation.")
	with gr.Row():
	agent_dd = gr.Dropdown(
	choices=["Base (Historical Mean)", "Safety Stock", "Forecast", "Monte Carlo"],
	value="Safety Stock", label="Agent",
	)
	env_dd = gr.Dropdown(
	choices=list(ENV_MAP.keys()),
	value="GammaPoisson (90/10 mixture)", label="Demand Environment",
	)
	run_btn = gr.Button("Run Simulation", variant="primary")
	with gr.Row():
	chart = gr.Plot(label="Results")
	metrics_md = gr.Markdown(label="Metrics")
	run_btn.click(run_simulation, inputs=[agent_dd, env_dd], outputs=[chart, metrics_md])

	with gr.Tab("LLM Agent — Live"):
	gr.Markdown(
	"Qwen2.5-72B makes a reorder decision every 5 days. "
	"Chart and log update in real-time as the simulation runs."
	)
	with gr.Row():
	llm_env_dd = gr.Dropdown(
	choices=list(ENV_MAP.keys()),
	value="GammaPoisson (90/10 mixture)", label="Demand Environment",
	)
	hf_token_box = gr.Textbox(
	label="HF Token (optional if HF_TOKEN env var is set)",
	type="password", placeholder="hf_...",
	)
	llm_run_btn = gr.Button("Run LLM Simulation", variant="primary")
	with gr.Row():
	llm_chart = gr.Plot(label="Live Simulation")
	with gr.Column():
	llm_metrics = gr.Markdown(label="Metrics")
	llm_log = gr.Markdown(label="Decision Log")
	llm_run_btn.click(
	run_llm_simulation,
	inputs=[llm_env_dd, hf_token_box],
	outputs=[llm_chart, llm_metrics, llm_log],
	)

	demo.launch(server_name=os.environ.get("GRADIO_SERVER_NAME", "127.0.0.1"))