Spaces:

ademarteau
/

RL-Inventory-Simulations

Runtime error

App Files Files Community

ademarteau commited on 3 days ago

Commit

3cad082

1 Parent(s): 288043f

Added trained PPO model + app.py UI changes for HF Spaces

Browse files

Files changed (4) hide show

agent/rl_agent.py +5 -3
app.py +155 -0
llm_agent_runner.py +1 -1
server/__pycache__/inventory_env.cpython-313.pyc +0 -0

agent/rl_agent.py CHANGED Viewed

@@ -53,6 +53,7 @@ class InventoryGymEnv(gym.Env):
         self._base_url = base_url
         self._env_type = env_type
         self._http_client = httpx.AsyncClient(base_url=base_url, timeout=30.0)
         self._inv_client = InventoryEnvClient(base_url)
         self._inv_client._client = self._http_client
@@ -75,11 +76,11 @@ class InventoryGymEnv(gym.Env):
     def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict]:
         super().reset(seed=seed)
-        obs = asyncio.run(self._inv_client.reset(env_type=self._env_type))
         return self._obs_to_array(obs), {}
     def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
-        result = asyncio.run(
             self._inv_client.step(InventoryAction(reorder_point=float(action[0])))
         )
         return (
@@ -91,7 +92,8 @@ class InventoryGymEnv(gym.Env):
         )
     def close(self) -> None:
-        asyncio.run(self._http_client.aclose())
     # ------------------------------------------------------------------
     # Helper

         self._base_url = base_url
         self._env_type = env_type
+        self._loop = asyncio.new_event_loop()
         self._http_client = httpx.AsyncClient(base_url=base_url, timeout=30.0)
         self._inv_client = InventoryEnvClient(base_url)
         self._inv_client._client = self._http_client
     def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict]:
         super().reset(seed=seed)
+        obs = self._loop.run_until_complete(self._inv_client.reset(env_type=self._env_type))
         return self._obs_to_array(obs), {}
     def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
+        result = self._loop.run_until_complete(
             self._inv_client.step(InventoryAction(reorder_point=float(action[0])))
         )
         return (
         )
     def close(self) -> None:
+        self._loop.run_until_complete(self._http_client.aclose())
+        self._loop.close()
     # ------------------------------------------------------------------
     # Helper

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import gradio as gr
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from huggingface_hub import InferenceClient
 from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME, UNIT_COST, SELLING_PRICE, FIXED_ORDER_COST, WRITE_OFF_RATE
@@ -321,6 +322,135 @@ def run_llm_simulation(env_name, hf_token):
     yield fig, metrics, "\n\n".join(decision_log)
 # ── UI ─────────────────────────────────────────────────────────────────────────
 with gr.Blocks(title="Inventory Simulation") as demo:
@@ -345,6 +475,31 @@ with gr.Blocks(title="Inventory Simulation") as demo:
                 metrics_md = gr.Markdown(label="Metrics")
             run_btn.click(run_simulation, inputs=[agent_dd, env_dd], outputs=[chart, metrics_md])
         with gr.Tab("LLM Agent — Live"):
             gr.Markdown(
                 "Qwen2.5-72B makes a reorder decision every 5 days. "

 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
+import numpy as np
 from huggingface_hub import InferenceClient
 from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME, UNIT_COST, SELLING_PRICE, FIXED_ORDER_COST, WRITE_OFF_RATE
     yield fig, metrics, "\n\n".join(decision_log)
+# ── Tab 3: PPO RL agent (live) ─────────────────────────────────────────────────
+def run_ppo_simulation(env_name, model_path):
+    model_path = (model_path or "ppo_inventory").strip()
+    try:
+        from stable_baselines3 import PPO
+        model = PPO.load(model_path)
+    except Exception as e:
+        yield None, f"**Error loading model:** {e}", ""
+        return
+    env_class = ENV_MAP[env_name]
+    environment = env_class(SIM_DAYS)
+    dc = DemandCalculator(SIM_DAYS)
+    dc.set_environment(environment)
+    for i in range(SIM_DAYS):
+        dc.get_daily_demand(i)
+    order_processor = OrderProcessor()
+    performance_tracker = PerformanceTracker()
+    inventory_manager = InventoryManager(order_processor=order_processor, agent=BaseAgent(dc))
+    daily_inventory, running_fill_rate, rop_markers, daily_pnl = [], [], [], []
+    total_demand, total_fulfilled = 0, 0
+    decision_log = []
+    demand_history: list[float] = []
+    recent_stockout_days = 0
+    recent_lost_sales = 0.0
+    current_rop = dc.daily_demand_distribution[HISTO_DAYS].demand_mean * LEAD_TIME
+    for day in range(HISTO_DAYS, SIM_DAYS):
+        demand_qty = dc.get_daily_demand(day)
+        demand_history.append(float(demand_qty))
+        base_inv = inventory_manager.inventory
+        inventory_manager.inventory_update(demand_qty)
+        # Build 22-float observation matching rl_agent.py layout
+        demand_last_5 = (demand_history[-5:] + [0.0] * 5)[:5]
+        hist30 = demand_history[-30:]
+        demand_mean_30d = float(np.mean(hist30)) if hist30 else 0.0
+        demand_std_30d = float(np.std(hist30)) if hist30 else 0.0
+        fr = total_fulfilled / total_demand if total_demand > 0 else 0.0
+        pending = list(order_processor.order_queue)
+        pending_flat: list[float] = []
+        for slot in range(5):
+            if slot < len(pending):
+                pending_flat.extend([float(pending[slot].arrival_day), float(pending[slot].quantity)])
+            else:
+                pending_flat.extend([0.0, 0.0])
+        obs = np.array(
+            [float(day), float(base_inv)]
+            + [float(d) for d in demand_last_5]
+            + [demand_mean_30d, demand_std_30d, fr,
+               float(recent_stockout_days), float(recent_lost_sales)]
+            + pending_flat,
+            dtype=np.float32,
+        )
+        action, _ = model.predict(obs, deterministic=True)
+        current_rop = max(0.0, float(action[0]))
+        # Reorder if below ROP
+        ordered_qty = 0
+        if day < SIM_DAYS - LEAD_TIME and inventory_manager.inventory <= current_rop:
+            qty = max(0, int(current_rop - inventory_manager.inventory + demand_mean_30d * LEAD_TIME))
+            if qty > 0:
+                order_processor.place_order(day, qty)
+                ordered_qty = qty
+        inventory_manager.process_deliveries(day)
+        fulfilled = min(demand_qty, base_inv)
+        daily_writeoff = inventory_manager.apply_writeoff(day)
+        total_demand += demand_qty
+        total_fulfilled += fulfilled
+        lost = max(0, demand_qty - fulfilled)
+        recent_lost_sales = recent_lost_sales * 0.9 + lost
+        recent_stockout_days = recent_stockout_days + (1 if lost > 0 else 0)
+        performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff)
+        daily_inventory.append(inventory_manager.inventory)
+        fr = total_fulfilled / total_demand if total_demand > 0 else 0.0
+        running_fill_rate.append(fr)
+        rop_markers.append((day, current_rop))
+        revenue = fulfilled * SELLING_PRICE
+        holding_cost = inventory_manager.inventory * UNIT_COST * 0.005
+        stockout_penalty = lost * (SELLING_PRICE - UNIT_COST)
+        order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST
+        writeoff_cost = daily_writeoff * UNIT_COST
+        daily_pnl.append({
+            "revenue": revenue,
+            "holding_cost": holding_cost,
+            "stockout_penalty": stockout_penalty,
+            "order_cost": order_cost,
+            "writeoff_cost": writeoff_cost,
+            "daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost,
+        })
+        if (day - HISTO_DAYS) % 5 == 0:
+            decision_log.append(
+                f"**Day {day}** | ROP={current_rop:.0f} | Fill={fr*100:.1f}% | inv={base_inv:.0f}"
+            )
+            fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
+                              f"PPO Agent  |  {env_name}  |  Day {day}/{SIM_DAYS}", daily_pnl)
+            summary = performance_tracker.performance_summary()
+            metrics = (
+                f"**Fill Rate:** {summary['fill_rate']:.2%}  \n"
+                f"**Stockouts:** {summary['stock_out_count']}  \n"
+                f"**Lost Sales:** {summary['total_lost_sales']:.0f}  \n"
+                f"**Write-offs:** {summary['write_offs']:.0f}"
+            )
+            yield fig, metrics, "\n\n".join(decision_log[-20:])
+    fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
+                      f"PPO Agent  |  {env_name}  |  COMPLETE", daily_pnl)
+    summary = performance_tracker.performance_summary()
+    metrics = (
+        f"**Fill Rate:** {summary['fill_rate']:.2%}  \n"
+        f"**Stockouts:** {summary['stock_out_count']}  \n"
+        f"**Lost Sales:** {summary['total_lost_sales']:.0f}  \n"
+        f"**Write-offs:** {summary['write_offs']:.0f}"
+    )
+    yield fig, metrics, "\n\n".join(decision_log)
 # ── UI ─────────────────────────────────────────────────────────────────────────
 with gr.Blocks(title="Inventory Simulation") as demo:
                 metrics_md = gr.Markdown(label="Metrics")
             run_btn.click(run_simulation, inputs=[agent_dd, env_dd], outputs=[chart, metrics_md])
+        with gr.Tab("PPO Agent — Live"):
+            gr.Markdown(
+                "Trained PPO (stable-baselines3) agent runs the full 365-day simulation. "
+                "Requires `ppo_inventory.zip` in the repo root (train first with `rl_agent.py`)."
+            )
+            with gr.Row():
+                ppo_env_dd = gr.Dropdown(
+                    choices=list(ENV_MAP.keys()),
+                    value="GammaPoisson (90/10 mixture)", label="Demand Environment",
+                )
+                ppo_model_box = gr.Textbox(
+                    label="Model path (no .zip)", value="ppo_inventory", placeholder="ppo_inventory"
+                )
+            ppo_run_btn = gr.Button("Run PPO Simulation", variant="primary")
+            with gr.Row():
+                ppo_chart = gr.Plot(label="Live Simulation")
+                with gr.Column():
+                    ppo_metrics = gr.Markdown(label="Metrics")
+                    ppo_log = gr.Markdown(label="Decision Log")
+            ppo_run_btn.click(
+                run_ppo_simulation,
+                inputs=[ppo_env_dd, ppo_model_box],
+                outputs=[ppo_chart, ppo_metrics, ppo_log],
+            )
         with gr.Tab("LLM Agent — Live"):
             gr.Markdown(
                 "Qwen2.5-72B makes a reorder decision every 5 days. "

llm_agent_runner.py CHANGED Viewed

@@ -28,7 +28,7 @@ from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME
 # ── Server ─────────────────────────────────────────────────────────────────────
 BASE_URL = "http://127.0.0.1:7861"
-DECISION_INTERVAL = 5  # Claude decides every N days
 ENV_NAMES = {
     0: "GammaPoisson",

 # ── Server ─────────────────────────────────────────────────────────────────────
 BASE_URL = "http://127.0.0.1:7861"
+DECISION_INTERVAL = 5  # Qwen decides every N days
 ENV_NAMES = {
     0: "GammaPoisson",

server/__pycache__/inventory_env.cpython-313.pyc CHANGED Viewed

Binary files a/server/__pycache__/inventory_env.cpython-313.pyc and b/server/__pycache__/inventory_env.cpython-313.pyc differ