Spaces:
Runtime error
Runtime error
RishbhaJain Claude Sonnet 4.6 commited on
Commit Β·
c10dcd0
1
Parent(s): 4d42a14
fix: pipeline-aware ordering, YoY demand signal, reward rebalancing
Browse files- Fix bullwhip/overshoot: order against inventory position (on-hand +
pipeline) in both _simulate_rop and inventory_env.py /step endpoint.
Prevents 3 overlapping orders stacking during stockout recovery.
- Add demand_last_year_7d (7-day window same period last year) to
observation β surfaced from pre-generated 730-day demand series.
Propagated through server model, client dataclass, and train_grpo prompts.
- Shorten LOOKAHEAD_DAYS 365 β 30: matches adaptive policy horizon,
removes bias toward inflated constant-policy ROPs.
- Raise HOLDING_RATE 0.005 β 0.02: reduces 300:1 stockout/holding
asymmetry that incentivised excessive overstocking.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- agent/train_grpo.py +8 -3
- client/inventory_client.py +2 -0
- reward.py +1 -1
- server/inventory_env.py +11 -2
agent/train_grpo.py
CHANGED
|
@@ -107,6 +107,7 @@ def format_prompt(obs_dict: dict[str, Any], memory_bank: list[dict[str, Any]]) -
|
|
| 107 |
"recent_stockouts": obs_dict["recent_stockouts"],
|
| 108 |
"recent_lost_sales": round(obs_dict["recent_lost_sales"], 2),
|
| 109 |
"pending_orders": obs_dict.get("pending_orders", []),
|
|
|
|
| 110 |
"memory_bank": memory_bank[-MEMORY_SIZE:],
|
| 111 |
}
|
| 112 |
user_content = json.dumps(snapshot, separators=(",", ":"))
|
|
@@ -207,6 +208,7 @@ async def _run_episode_async(
|
|
| 207 |
{"arrival_day": o.arrival_day, "quantity": o.quantity}
|
| 208 |
for o in obs.pending_orders
|
| 209 |
],
|
|
|
|
| 210 |
}
|
| 211 |
|
| 212 |
messages = format_prompt(obs_dict, memory_bank)
|
|
@@ -312,7 +314,7 @@ FIXED_ORDER_COST = 150.0
|
|
| 312 |
HOLDING_RATE = 0.005
|
| 313 |
WRITE_OFF_RATE = 0.00143
|
| 314 |
LEAD_TIME = 3
|
| 315 |
-
LOOKAHEAD_DAYS =
|
| 316 |
TARGET_FILL_RATE = 0.95
|
| 317 |
FILL_RATE_WEIGHT = 0.4
|
| 318 |
|
|
@@ -362,8 +364,10 @@ def _simulate_rop(obs: dict[str, Any], rop: float) -> float:
|
|
| 362 |
total_demand += demand
|
| 363 |
|
| 364 |
order_qty = 0.0
|
| 365 |
-
|
| 366 |
-
|
|
|
|
|
|
|
| 367 |
pending.append((day + LEAD_TIME, order_qty))
|
| 368 |
|
| 369 |
revenue = sold * SELLING_PRICE
|
|
@@ -616,6 +620,7 @@ async def _eval_episode_async(
|
|
| 616 |
{"arrival_day": o.arrival_day, "quantity": o.quantity}
|
| 617 |
for o in obs.pending_orders
|
| 618 |
],
|
|
|
|
| 619 |
}
|
| 620 |
|
| 621 |
messages = format_prompt(obs_dict, memory_bank)
|
|
|
|
| 107 |
"recent_stockouts": obs_dict["recent_stockouts"],
|
| 108 |
"recent_lost_sales": round(obs_dict["recent_lost_sales"], 2),
|
| 109 |
"pending_orders": obs_dict.get("pending_orders", []),
|
| 110 |
+
"demand_last_year_7d": [round(d, 2) for d in obs_dict.get("demand_last_year_7d", [])],
|
| 111 |
"memory_bank": memory_bank[-MEMORY_SIZE:],
|
| 112 |
}
|
| 113 |
user_content = json.dumps(snapshot, separators=(",", ":"))
|
|
|
|
| 208 |
{"arrival_day": o.arrival_day, "quantity": o.quantity}
|
| 209 |
for o in obs.pending_orders
|
| 210 |
],
|
| 211 |
+
"demand_last_year_7d": [round(d, 2) for d in obs.demand_last_year_7d],
|
| 212 |
}
|
| 213 |
|
| 214 |
messages = format_prompt(obs_dict, memory_bank)
|
|
|
|
| 314 |
HOLDING_RATE = 0.005
|
| 315 |
WRITE_OFF_RATE = 0.00143
|
| 316 |
LEAD_TIME = 3
|
| 317 |
+
LOOKAHEAD_DAYS = 30
|
| 318 |
TARGET_FILL_RATE = 0.95
|
| 319 |
FILL_RATE_WEIGHT = 0.4
|
| 320 |
|
|
|
|
| 364 |
total_demand += demand
|
| 365 |
|
| 366 |
order_qty = 0.0
|
| 367 |
+
pipeline = sum(qty for arr, qty in pending)
|
| 368 |
+
inv_position = inv + pipeline
|
| 369 |
+
if inv_position <= rop:
|
| 370 |
+
order_qty = max(0.0, rop - inv_position + mean_d * LEAD_TIME)
|
| 371 |
pending.append((day + LEAD_TIME, order_qty))
|
| 372 |
|
| 373 |
revenue = sold * SELLING_PRICE
|
|
|
|
| 620 |
{"arrival_day": o.arrival_day, "quantity": o.quantity}
|
| 621 |
for o in obs.pending_orders
|
| 622 |
],
|
| 623 |
+
"demand_last_year_7d": [round(d, 2) for d in obs.demand_last_year_7d],
|
| 624 |
}
|
| 625 |
|
| 626 |
messages = format_prompt(obs_dict, memory_bank)
|
client/inventory_client.py
CHANGED
|
@@ -51,6 +51,7 @@ class InventoryObservation:
|
|
| 51 |
recent_lost_sales: float
|
| 52 |
days_remaining: int
|
| 53 |
pending_orders: List[PendingOrder]
|
|
|
|
| 54 |
|
| 55 |
@classmethod
|
| 56 |
def from_dict(cls, d: dict) -> "InventoryObservation":
|
|
@@ -65,6 +66,7 @@ class InventoryObservation:
|
|
| 65 |
recent_lost_sales=d["recent_lost_sales"],
|
| 66 |
days_remaining=d["days_remaining"],
|
| 67 |
pending_orders=[PendingOrder(**o) for o in d["pending_orders"]],
|
|
|
|
| 68 |
)
|
| 69 |
|
| 70 |
|
|
|
|
| 51 |
recent_lost_sales: float
|
| 52 |
days_remaining: int
|
| 53 |
pending_orders: List[PendingOrder]
|
| 54 |
+
demand_last_year_7d: List[float]
|
| 55 |
|
| 56 |
@classmethod
|
| 57 |
def from_dict(cls, d: dict) -> "InventoryObservation":
|
|
|
|
| 66 |
recent_lost_sales=d["recent_lost_sales"],
|
| 67 |
days_remaining=d["days_remaining"],
|
| 68 |
pending_orders=[PendingOrder(**o) for o in d["pending_orders"]],
|
| 69 |
+
demand_last_year_7d=d.get("demand_last_year_7d", []),
|
| 70 |
)
|
| 71 |
|
| 72 |
|
reward.py
CHANGED
|
@@ -5,7 +5,7 @@ from config import (
|
|
| 5 |
)
|
| 6 |
|
| 7 |
# Holding cost rate (fraction of unit cost per day)
|
| 8 |
-
HOLDING_RATE = 0.
|
| 9 |
|
| 10 |
|
| 11 |
# ββ Core P&L computation βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 5 |
)
|
| 6 |
|
| 7 |
# Holding cost rate (fraction of unit cost per day)
|
| 8 |
+
HOLDING_RATE = 0.02
|
| 9 |
|
| 10 |
|
| 11 |
# ββ Core P&L computation βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
server/inventory_env.py
CHANGED
|
@@ -53,6 +53,7 @@ class InventoryObservation(BaseModel):
|
|
| 53 |
recent_lost_sales: float
|
| 54 |
days_remaining: int
|
| 55 |
pending_orders: List[PendingOrder]
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
class StepResult(BaseModel):
|
|
@@ -101,6 +102,11 @@ class EpisodeState:
|
|
| 101 |
for o in self.order_processor.order_queue[:5]
|
| 102 |
]
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
return InventoryObservation(
|
| 105 |
day=self.day,
|
| 106 |
current_inventory=self.inventory,
|
|
@@ -115,6 +121,7 @@ class EpisodeState:
|
|
| 115 |
recent_lost_sales=self.lost_sales,
|
| 116 |
days_remaining=SIM_DAYS - self.day,
|
| 117 |
pending_orders=pending,
|
|
|
|
| 118 |
)
|
| 119 |
|
| 120 |
|
|
@@ -183,8 +190,10 @@ def step(action: InventoryAction):
|
|
| 183 |
qty = 0
|
| 184 |
hist = episode.demand_series[max(0, day - 30):day]
|
| 185 |
mean_demand = float(np.mean(hist)) if hist else 0.0
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
| 188 |
if qty > 0:
|
| 189 |
episode.order_processor.place_order(day, int(qty))
|
| 190 |
|
|
|
|
| 53 |
recent_lost_sales: float
|
| 54 |
days_remaining: int
|
| 55 |
pending_orders: List[PendingOrder]
|
| 56 |
+
demand_last_year_7d: List[float]
|
| 57 |
|
| 58 |
|
| 59 |
class StepResult(BaseModel):
|
|
|
|
| 102 |
for o in self.order_processor.order_queue[:5]
|
| 103 |
]
|
| 104 |
|
| 105 |
+
ly_anchor = self.day - 365
|
| 106 |
+
ly_start = max(0, ly_anchor - 3)
|
| 107 |
+
ly_end = min(len(self.demand_series), ly_anchor + 4)
|
| 108 |
+
demand_last_year_7d = [float(d) for d in self.demand_series[ly_start:ly_end]]
|
| 109 |
+
|
| 110 |
return InventoryObservation(
|
| 111 |
day=self.day,
|
| 112 |
current_inventory=self.inventory,
|
|
|
|
| 121 |
recent_lost_sales=self.lost_sales,
|
| 122 |
days_remaining=SIM_DAYS - self.day,
|
| 123 |
pending_orders=pending,
|
| 124 |
+
demand_last_year_7d=demand_last_year_7d,
|
| 125 |
)
|
| 126 |
|
| 127 |
|
|
|
|
| 190 |
qty = 0
|
| 191 |
hist = episode.demand_series[max(0, day - 30):day]
|
| 192 |
mean_demand = float(np.mean(hist)) if hist else 0.0
|
| 193 |
+
pipeline = sum(o.quantity for o in episode.order_processor.order_queue)
|
| 194 |
+
inv_position = episode.inventory + pipeline
|
| 195 |
+
if day < SIM_DAYS - LEAD_TIME and inv_position <= rop:
|
| 196 |
+
qty = max(0.0, rop - inv_position + mean_demand * LEAD_TIME)
|
| 197 |
if qty > 0:
|
| 198 |
episode.order_processor.place_order(day, int(qty))
|
| 199 |
|