ademarteau commited on
Commit
3cad082
Β·
1 Parent(s): 288043f

Added trained PPO model + app.py UI changes for HF Spaces

Browse files
agent/rl_agent.py CHANGED
@@ -53,6 +53,7 @@ class InventoryGymEnv(gym.Env):
53
  self._base_url = base_url
54
  self._env_type = env_type
55
 
 
56
  self._http_client = httpx.AsyncClient(base_url=base_url, timeout=30.0)
57
  self._inv_client = InventoryEnvClient(base_url)
58
  self._inv_client._client = self._http_client
@@ -75,11 +76,11 @@ class InventoryGymEnv(gym.Env):
75
 
76
  def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict]:
77
  super().reset(seed=seed)
78
- obs = asyncio.run(self._inv_client.reset(env_type=self._env_type))
79
  return self._obs_to_array(obs), {}
80
 
81
  def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
82
- result = asyncio.run(
83
  self._inv_client.step(InventoryAction(reorder_point=float(action[0])))
84
  )
85
  return (
@@ -91,7 +92,8 @@ class InventoryGymEnv(gym.Env):
91
  )
92
 
93
  def close(self) -> None:
94
- asyncio.run(self._http_client.aclose())
 
95
 
96
  # ------------------------------------------------------------------
97
  # Helper
 
53
  self._base_url = base_url
54
  self._env_type = env_type
55
 
56
+ self._loop = asyncio.new_event_loop()
57
  self._http_client = httpx.AsyncClient(base_url=base_url, timeout=30.0)
58
  self._inv_client = InventoryEnvClient(base_url)
59
  self._inv_client._client = self._http_client
 
76
 
77
  def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict]:
78
  super().reset(seed=seed)
79
+ obs = self._loop.run_until_complete(self._inv_client.reset(env_type=self._env_type))
80
  return self._obs_to_array(obs), {}
81
 
82
  def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
83
+ result = self._loop.run_until_complete(
84
  self._inv_client.step(InventoryAction(reorder_point=float(action[0])))
85
  )
86
  return (
 
92
  )
93
 
94
  def close(self) -> None:
95
+ self._loop.run_until_complete(self._http_client.aclose())
96
+ self._loop.close()
97
 
98
  # ------------------------------------------------------------------
99
  # Helper
app.py CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
5
  import matplotlib
6
  matplotlib.use("Agg")
7
  import matplotlib.pyplot as plt
 
8
  from huggingface_hub import InferenceClient
9
 
10
  from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME, UNIT_COST, SELLING_PRICE, FIXED_ORDER_COST, WRITE_OFF_RATE
@@ -321,6 +322,135 @@ def run_llm_simulation(env_name, hf_token):
321
  yield fig, metrics, "\n\n".join(decision_log)
322
 
323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  # ── UI ─────────────────────────────────────────────────────────────────────────
325
 
326
  with gr.Blocks(title="Inventory Simulation") as demo:
@@ -345,6 +475,31 @@ with gr.Blocks(title="Inventory Simulation") as demo:
345
  metrics_md = gr.Markdown(label="Metrics")
346
  run_btn.click(run_simulation, inputs=[agent_dd, env_dd], outputs=[chart, metrics_md])
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  with gr.Tab("LLM Agent β€” Live"):
349
  gr.Markdown(
350
  "Qwen2.5-72B makes a reorder decision every 5 days. "
 
5
  import matplotlib
6
  matplotlib.use("Agg")
7
  import matplotlib.pyplot as plt
8
+ import numpy as np
9
  from huggingface_hub import InferenceClient
10
 
11
  from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME, UNIT_COST, SELLING_PRICE, FIXED_ORDER_COST, WRITE_OFF_RATE
 
322
  yield fig, metrics, "\n\n".join(decision_log)
323
 
324
 
325
+ # ── Tab 3: PPO RL agent (live) ─────────────────────────────────────────────────
326
+
327
+ def run_ppo_simulation(env_name, model_path):
328
+ model_path = (model_path or "ppo_inventory").strip()
329
+ try:
330
+ from stable_baselines3 import PPO
331
+ model = PPO.load(model_path)
332
+ except Exception as e:
333
+ yield None, f"**Error loading model:** {e}", ""
334
+ return
335
+
336
+ env_class = ENV_MAP[env_name]
337
+ environment = env_class(SIM_DAYS)
338
+ dc = DemandCalculator(SIM_DAYS)
339
+ dc.set_environment(environment)
340
+ for i in range(SIM_DAYS):
341
+ dc.get_daily_demand(i)
342
+
343
+ order_processor = OrderProcessor()
344
+ performance_tracker = PerformanceTracker()
345
+ inventory_manager = InventoryManager(order_processor=order_processor, agent=BaseAgent(dc))
346
+
347
+ daily_inventory, running_fill_rate, rop_markers, daily_pnl = [], [], [], []
348
+ total_demand, total_fulfilled = 0, 0
349
+ decision_log = []
350
+ demand_history: list[float] = []
351
+ recent_stockout_days = 0
352
+ recent_lost_sales = 0.0
353
+ current_rop = dc.daily_demand_distribution[HISTO_DAYS].demand_mean * LEAD_TIME
354
+
355
+ for day in range(HISTO_DAYS, SIM_DAYS):
356
+ demand_qty = dc.get_daily_demand(day)
357
+ demand_history.append(float(demand_qty))
358
+ base_inv = inventory_manager.inventory
359
+
360
+ inventory_manager.inventory_update(demand_qty)
361
+
362
+ # Build 22-float observation matching rl_agent.py layout
363
+ demand_last_5 = (demand_history[-5:] + [0.0] * 5)[:5]
364
+ hist30 = demand_history[-30:]
365
+ demand_mean_30d = float(np.mean(hist30)) if hist30 else 0.0
366
+ demand_std_30d = float(np.std(hist30)) if hist30 else 0.0
367
+ fr = total_fulfilled / total_demand if total_demand > 0 else 0.0
368
+
369
+ pending = list(order_processor.order_queue)
370
+ pending_flat: list[float] = []
371
+ for slot in range(5):
372
+ if slot < len(pending):
373
+ pending_flat.extend([float(pending[slot].arrival_day), float(pending[slot].quantity)])
374
+ else:
375
+ pending_flat.extend([0.0, 0.0])
376
+
377
+ obs = np.array(
378
+ [float(day), float(base_inv)]
379
+ + [float(d) for d in demand_last_5]
380
+ + [demand_mean_30d, demand_std_30d, fr,
381
+ float(recent_stockout_days), float(recent_lost_sales)]
382
+ + pending_flat,
383
+ dtype=np.float32,
384
+ )
385
+
386
+ action, _ = model.predict(obs, deterministic=True)
387
+ current_rop = max(0.0, float(action[0]))
388
+
389
+ # Reorder if below ROP
390
+ ordered_qty = 0
391
+ if day < SIM_DAYS - LEAD_TIME and inventory_manager.inventory <= current_rop:
392
+ qty = max(0, int(current_rop - inventory_manager.inventory + demand_mean_30d * LEAD_TIME))
393
+ if qty > 0:
394
+ order_processor.place_order(day, qty)
395
+ ordered_qty = qty
396
+
397
+ inventory_manager.process_deliveries(day)
398
+ fulfilled = min(demand_qty, base_inv)
399
+ daily_writeoff = inventory_manager.apply_writeoff(day)
400
+ total_demand += demand_qty
401
+ total_fulfilled += fulfilled
402
+
403
+ lost = max(0, demand_qty - fulfilled)
404
+ recent_lost_sales = recent_lost_sales * 0.9 + lost
405
+ recent_stockout_days = recent_stockout_days + (1 if lost > 0 else 0)
406
+
407
+ performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff)
408
+ daily_inventory.append(inventory_manager.inventory)
409
+ fr = total_fulfilled / total_demand if total_demand > 0 else 0.0
410
+ running_fill_rate.append(fr)
411
+ rop_markers.append((day, current_rop))
412
+
413
+ revenue = fulfilled * SELLING_PRICE
414
+ holding_cost = inventory_manager.inventory * UNIT_COST * 0.005
415
+ stockout_penalty = lost * (SELLING_PRICE - UNIT_COST)
416
+ order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST
417
+ writeoff_cost = daily_writeoff * UNIT_COST
418
+ daily_pnl.append({
419
+ "revenue": revenue,
420
+ "holding_cost": holding_cost,
421
+ "stockout_penalty": stockout_penalty,
422
+ "order_cost": order_cost,
423
+ "writeoff_cost": writeoff_cost,
424
+ "daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost,
425
+ })
426
+
427
+ if (day - HISTO_DAYS) % 5 == 0:
428
+ decision_log.append(
429
+ f"**Day {day}** | ROP={current_rop:.0f} | Fill={fr*100:.1f}% | inv={base_inv:.0f}"
430
+ )
431
+ fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
432
+ f"PPO Agent | {env_name} | Day {day}/{SIM_DAYS}", daily_pnl)
433
+ summary = performance_tracker.performance_summary()
434
+ metrics = (
435
+ f"**Fill Rate:** {summary['fill_rate']:.2%} \n"
436
+ f"**Stockouts:** {summary['stock_out_count']} \n"
437
+ f"**Lost Sales:** {summary['total_lost_sales']:.0f} \n"
438
+ f"**Write-offs:** {summary['write_offs']:.0f}"
439
+ )
440
+ yield fig, metrics, "\n\n".join(decision_log[-20:])
441
+
442
+ fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
443
+ f"PPO Agent | {env_name} | COMPLETE", daily_pnl)
444
+ summary = performance_tracker.performance_summary()
445
+ metrics = (
446
+ f"**Fill Rate:** {summary['fill_rate']:.2%} \n"
447
+ f"**Stockouts:** {summary['stock_out_count']} \n"
448
+ f"**Lost Sales:** {summary['total_lost_sales']:.0f} \n"
449
+ f"**Write-offs:** {summary['write_offs']:.0f}"
450
+ )
451
+ yield fig, metrics, "\n\n".join(decision_log)
452
+
453
+
454
  # ── UI ─────────────────────────────────────────────────────────────────────────
455
 
456
  with gr.Blocks(title="Inventory Simulation") as demo:
 
475
  metrics_md = gr.Markdown(label="Metrics")
476
  run_btn.click(run_simulation, inputs=[agent_dd, env_dd], outputs=[chart, metrics_md])
477
 
478
+ with gr.Tab("PPO Agent β€” Live"):
479
+ gr.Markdown(
480
+ "Trained PPO (stable-baselines3) agent runs the full 365-day simulation. "
481
+ "Requires `ppo_inventory.zip` in the repo root (train first with `rl_agent.py`)."
482
+ )
483
+ with gr.Row():
484
+ ppo_env_dd = gr.Dropdown(
485
+ choices=list(ENV_MAP.keys()),
486
+ value="GammaPoisson (90/10 mixture)", label="Demand Environment",
487
+ )
488
+ ppo_model_box = gr.Textbox(
489
+ label="Model path (no .zip)", value="ppo_inventory", placeholder="ppo_inventory"
490
+ )
491
+ ppo_run_btn = gr.Button("Run PPO Simulation", variant="primary")
492
+ with gr.Row():
493
+ ppo_chart = gr.Plot(label="Live Simulation")
494
+ with gr.Column():
495
+ ppo_metrics = gr.Markdown(label="Metrics")
496
+ ppo_log = gr.Markdown(label="Decision Log")
497
+ ppo_run_btn.click(
498
+ run_ppo_simulation,
499
+ inputs=[ppo_env_dd, ppo_model_box],
500
+ outputs=[ppo_chart, ppo_metrics, ppo_log],
501
+ )
502
+
503
  with gr.Tab("LLM Agent β€” Live"):
504
  gr.Markdown(
505
  "Qwen2.5-72B makes a reorder decision every 5 days. "
llm_agent_runner.py CHANGED
@@ -28,7 +28,7 @@ from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME
28
  # ── Server ─────────────────────────────────────────────────────────────────────
29
 
30
  BASE_URL = "http://127.0.0.1:7861"
31
- DECISION_INTERVAL = 5 # Claude decides every N days
32
 
33
  ENV_NAMES = {
34
  0: "GammaPoisson",
 
28
  # ── Server ─────────────────────────────────────────────────────────────────────
29
 
30
  BASE_URL = "http://127.0.0.1:7861"
31
+ DECISION_INTERVAL = 5 # Qwen decides every N days
32
 
33
  ENV_NAMES = {
34
  0: "GammaPoisson",
server/__pycache__/inventory_env.cpython-313.pyc CHANGED
Binary files a/server/__pycache__/inventory_env.cpython-313.pyc and b/server/__pycache__/inventory_env.cpython-313.pyc differ