Spaces:
Runtime error
Runtime error
Merge RJ into main — use RJ app.py with P&L reward visualization
Browse files- agent/llm_agent.py +34 -26
- agent/rl_agent.py +2 -15
- app.py +71 -9
- config.py +8 -2
- order_processor.py +4 -2
- server/inventory_env.py +48 -14
agent/llm_agent.py
CHANGED
|
@@ -18,7 +18,7 @@ import re
|
|
| 18 |
import sys
|
| 19 |
from typing import Any
|
| 20 |
|
| 21 |
-
import
|
| 22 |
|
| 23 |
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 24 |
|
|
@@ -28,27 +28,35 @@ SYSTEM_PROMPT = """\
|
|
| 28 |
You are an expert inventory optimization agent operating inside a stochastic supply-chain simulation.
|
| 29 |
|
| 30 |
YOUR OBJECTIVE:
|
| 31 |
-
Maximize
|
| 32 |
-
365-day episode. The episode ends at day 730 (after 365 days of decisions following a 365-day warm-up).
|
| 33 |
|
| 34 |
ENVIRONMENT RULES:
|
| 35 |
-
-
|
| 36 |
-
- An order
|
| 37 |
-
- Order quantity = reorder_point - current_inventory + mean_demand *
|
| 38 |
-
-
|
| 39 |
-
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
YOUR ACTION EACH STEP:
|
| 43 |
-
Set `reorder_point` — the inventory
|
| 44 |
-
A higher ROP builds safety buffer but risks write-offs. A lower ROP conserves stock but risks stockouts.
|
| 45 |
|
| 46 |
REASONING GUIDANCE:
|
| 47 |
-
-
|
| 48 |
-
-
|
| 49 |
-
-
|
| 50 |
-
-
|
| 51 |
-
- Think 3
|
| 52 |
|
| 53 |
RESPONSE FORMAT — reply with ONLY a valid JSON object, no markdown fences:
|
| 54 |
{"reorder_point": <float>, "reasoning": "<concise explanation>", "confidence": <float 0-1>}
|
|
@@ -56,14 +64,14 @@ RESPONSE FORMAT — reply with ONLY a valid JSON object, no markdown fences:
|
|
| 56 |
|
| 57 |
|
| 58 |
class ClaudeInventoryAgent:
|
| 59 |
-
"""Inventory optimization agent backed by
|
| 60 |
|
| 61 |
MEMORY_SIZE = 15
|
| 62 |
HISTORY_TURNS = 6
|
| 63 |
-
MODEL = "
|
| 64 |
|
| 65 |
def __init__(self, api_key: str) -> None:
|
| 66 |
-
self._client =
|
| 67 |
self._memory_bank: list[dict[str, Any]] = []
|
| 68 |
self._conversation: list[dict[str, str]] = []
|
| 69 |
|
|
@@ -127,13 +135,13 @@ class ClaudeInventoryAgent:
|
|
| 127 |
confidence: float
|
| 128 |
|
| 129 |
try:
|
| 130 |
-
|
|
|
|
| 131 |
model=self.MODEL,
|
|
|
|
| 132 |
max_tokens=512,
|
| 133 |
-
system=SYSTEM_PROMPT,
|
| 134 |
-
messages=messages,
|
| 135 |
)
|
| 136 |
-
raw_text: str = response.
|
| 137 |
|
| 138 |
try:
|
| 139 |
parsed = self._parse_response(raw_text)
|
|
@@ -253,8 +261,8 @@ def _parse_args() -> argparse.Namespace:
|
|
| 253 |
parser.add_argument(
|
| 254 |
"--api-key",
|
| 255 |
type=str,
|
| 256 |
-
default=os.environ.get("
|
| 257 |
-
help="
|
| 258 |
)
|
| 259 |
return parser.parse_args()
|
| 260 |
|
|
@@ -263,7 +271,7 @@ if __name__ == "__main__":
|
|
| 263 |
args = _parse_args()
|
| 264 |
|
| 265 |
if not args.api_key:
|
| 266 |
-
print("Error: no
|
| 267 |
sys.exit(1)
|
| 268 |
|
| 269 |
asyncio.run(
|
|
|
|
| 18 |
import sys
|
| 19 |
from typing import Any
|
| 20 |
|
| 21 |
+
from huggingface_hub import InferenceClient
|
| 22 |
|
| 23 |
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
| 24 |
|
|
|
|
| 28 |
You are an expert inventory optimization agent operating inside a stochastic supply-chain simulation.
|
| 29 |
|
| 30 |
YOUR OBJECTIVE:
|
| 31 |
+
Maximize daily profit and fill rate over a 365-day episode (days 365–730 after a warm-up period).
|
|
|
|
| 32 |
|
| 33 |
ENVIRONMENT RULES:
|
| 34 |
+
- Lead time: 3 days ± 1 day (stochastic — orders may arrive in 2, 3, or 4 days)
|
| 35 |
+
- An order fires automatically whenever inventory <= your reorder_point
|
| 36 |
+
- Order quantity = reorder_point - current_inventory + mean_demand * lead_time (handled by env)
|
| 37 |
+
- Spoilage: 0.143% of on-hand inventory is lost every day (~1% per week)
|
| 38 |
+
- unit_cost = $10, selling_price = $25, fixed_order_cost = $150 per order
|
| 39 |
+
|
| 40 |
+
DAILY REWARD FORMULA:
|
| 41 |
+
revenue = units_sold * 25
|
| 42 |
+
holding_cost = inventory * 10 * 0.005
|
| 43 |
+
stockout_penalty = lost_units * 15 (lost margin per unit)
|
| 44 |
+
order_cost = 150 (if ordered) + qty * 10
|
| 45 |
+
writeoff_cost = spoilage * 10
|
| 46 |
+
daily_reward = (revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost) / baseline
|
| 47 |
+
|
| 48 |
+
END-OF-EPISODE BONUS (day 730 only):
|
| 49 |
+
bonus = fill_rate * 0.5 + profit_ratio * 0.5
|
| 50 |
|
| 51 |
YOUR ACTION EACH STEP:
|
| 52 |
+
Set `reorder_point` — the inventory threshold that triggers a replenishment order.
|
|
|
|
| 53 |
|
| 54 |
REASONING GUIDANCE:
|
| 55 |
+
- Stockouts are expensive ($15/unit lost margin) — keep enough buffer for lead time uncertainty
|
| 56 |
+
- Excess inventory bleeds holding cost ($0.05/unit/day) and spoilage — don't over-order
|
| 57 |
+
- $150 fixed order cost: batch orders rather than ordering tiny amounts every day
|
| 58 |
+
- Account for pending orders in the pipeline before deciding to order more
|
| 59 |
+
- Think 3–4 days ahead due to stochastic lead times
|
| 60 |
|
| 61 |
RESPONSE FORMAT — reply with ONLY a valid JSON object, no markdown fences:
|
| 62 |
{"reorder_point": <float>, "reasoning": "<concise explanation>", "confidence": <float 0-1>}
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
class ClaudeInventoryAgent:
|
| 67 |
+
"""Inventory optimization agent backed by Qwen2.5-72B via HuggingFace Inference API."""
|
| 68 |
|
| 69 |
MEMORY_SIZE = 15
|
| 70 |
HISTORY_TURNS = 6
|
| 71 |
+
MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
| 72 |
|
| 73 |
def __init__(self, api_key: str) -> None:
|
| 74 |
+
self._client = InferenceClient(api_key=api_key)
|
| 75 |
self._memory_bank: list[dict[str, Any]] = []
|
| 76 |
self._conversation: list[dict[str, str]] = []
|
| 77 |
|
|
|
|
| 135 |
confidence: float
|
| 136 |
|
| 137 |
try:
|
| 138 |
+
hf_messages = [{"role": "system", "content": SYSTEM_PROMPT}] + messages
|
| 139 |
+
response = self._client.chat.completions.create(
|
| 140 |
model=self.MODEL,
|
| 141 |
+
messages=hf_messages,
|
| 142 |
max_tokens=512,
|
|
|
|
|
|
|
| 143 |
)
|
| 144 |
+
raw_text: str = response.choices[0].message.content
|
| 145 |
|
| 146 |
try:
|
| 147 |
parsed = self._parse_response(raw_text)
|
|
|
|
| 261 |
parser.add_argument(
|
| 262 |
"--api-key",
|
| 263 |
type=str,
|
| 264 |
+
default=os.environ.get("HF_TOKEN", ""),
|
| 265 |
+
help="HuggingFace token (defaults to HF_TOKEN env var).",
|
| 266 |
)
|
| 267 |
return parser.parse_args()
|
| 268 |
|
|
|
|
| 271 |
args = _parse_args()
|
| 272 |
|
| 273 |
if not args.api_key:
|
| 274 |
+
print("Error: no HuggingFace token provided. Set HF_TOKEN or use --api-key.")
|
| 275 |
sys.exit(1)
|
| 276 |
|
| 277 |
asyncio.run(
|
agent/rl_agent.py
CHANGED
|
@@ -57,8 +57,6 @@ class InventoryGymEnv(gym.Env):
|
|
| 57 |
self._inv_client = InventoryEnvClient(base_url)
|
| 58 |
self._inv_client._client = self._http_client
|
| 59 |
|
| 60 |
-
self._last_fill_rate: float = 0.0
|
| 61 |
-
|
| 62 |
self.observation_space = spaces.Box(
|
| 63 |
low=0.0,
|
| 64 |
high=np.inf,
|
|
@@ -78,26 +76,15 @@ class InventoryGymEnv(gym.Env):
|
|
| 78 |
def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict]:
|
| 79 |
super().reset(seed=seed)
|
| 80 |
obs = asyncio.run(self._inv_client.reset(env_type=self._env_type))
|
| 81 |
-
self._last_fill_rate = 0.0
|
| 82 |
return self._obs_to_array(obs), {}
|
| 83 |
|
| 84 |
def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
|
| 85 |
result = asyncio.run(
|
| 86 |
self._inv_client.step(InventoryAction(reorder_point=float(action[0])))
|
| 87 |
)
|
| 88 |
-
obs = result.observation
|
| 89 |
-
|
| 90 |
-
fill_rate_delta = obs.fill_rate_so_far - self._last_fill_rate
|
| 91 |
-
shaped_reward = (
|
| 92 |
-
fill_rate_delta * 10.0
|
| 93 |
-
- obs.recent_lost_sales * 0.01
|
| 94 |
-
- obs.current_inventory * 0.0001
|
| 95 |
-
)
|
| 96 |
-
self._last_fill_rate = obs.fill_rate_so_far
|
| 97 |
-
|
| 98 |
return (
|
| 99 |
-
self._obs_to_array(
|
| 100 |
-
float(
|
| 101 |
result.done,
|
| 102 |
False,
|
| 103 |
result.info,
|
|
|
|
| 57 |
self._inv_client = InventoryEnvClient(base_url)
|
| 58 |
self._inv_client._client = self._http_client
|
| 59 |
|
|
|
|
|
|
|
| 60 |
self.observation_space = spaces.Box(
|
| 61 |
low=0.0,
|
| 62 |
high=np.inf,
|
|
|
|
| 76 |
def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None) -> tuple[np.ndarray, dict]:
|
| 77 |
super().reset(seed=seed)
|
| 78 |
obs = asyncio.run(self._inv_client.reset(env_type=self._env_type))
|
|
|
|
| 79 |
return self._obs_to_array(obs), {}
|
| 80 |
|
| 81 |
def step(self, action: np.ndarray) -> tuple[np.ndarray, float, bool, bool, dict]:
|
| 82 |
result = asyncio.run(
|
| 83 |
self._inv_client.step(InventoryAction(reorder_point=float(action[0])))
|
| 84 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
return (
|
| 86 |
+
self._obs_to_array(result.observation),
|
| 87 |
+
float(result.reward),
|
| 88 |
result.done,
|
| 89 |
False,
|
| 90 |
result.info,
|
app.py
CHANGED
|
@@ -7,7 +7,7 @@ matplotlib.use("Agg")
|
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
|
| 10 |
-
from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME
|
| 11 |
from agent_environment import BaseAgent, SafetyStockAgent, ForecastAgent, MonteCarloAgent
|
| 12 |
from demand_environment import GammaPoisson, GammaGammaHighVariance, SpikingDemand, SingleGammaLowVariance
|
| 13 |
from demand_calculator import DemandCalculator
|
|
@@ -45,9 +45,12 @@ OUTPUT — respond with this exact JSON (no markdown fences):
|
|
| 45 |
|
| 46 |
# ── Shared chart builder ───────────────────────────────────────────────────────
|
| 47 |
|
| 48 |
-
def build_chart(daily_inventory, running_fill_rate, rop_markers, title):
|
| 49 |
-
|
|
|
|
|
|
|
| 50 |
days = list(range(len(daily_inventory)))
|
|
|
|
| 51 |
ax1.plot(days, daily_inventory, color="steelblue", linewidth=0.8)
|
| 52 |
if rop_markers:
|
| 53 |
rop_days, rop_vals = zip(*rop_markers)
|
|
@@ -56,12 +59,35 @@ def build_chart(daily_inventory, running_fill_rate, rop_markers, title):
|
|
| 56 |
ax1.legend(fontsize=8)
|
| 57 |
ax1.set_ylabel("Inventory Level")
|
| 58 |
ax1.set_title(title)
|
|
|
|
| 59 |
ax2.plot(days, running_fill_rate, color="seagreen", linewidth=0.8)
|
| 60 |
ax2.axhline(y=0.95, color="red", linestyle="--", linewidth=0.6, label="95% target")
|
| 61 |
ax2.set_ylabel("Cumulative Fill Rate")
|
| 62 |
-
ax2.set_xlabel("Evaluation Day")
|
| 63 |
ax2.set_ylim(0, 1)
|
| 64 |
ax2.legend(fontsize=8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
plt.tight_layout()
|
| 66 |
return fig
|
| 67 |
|
|
@@ -87,14 +113,17 @@ def run_simulation(agent_name, env_name):
|
|
| 87 |
order_processor = OrderProcessor()
|
| 88 |
performance_tracker = PerformanceTracker()
|
| 89 |
inventory_manager = InventoryManager(order_processor=order_processor, agent=agent)
|
| 90 |
-
daily_inventory, running_fill_rate = [], []
|
| 91 |
total_demand, total_fulfilled = 0, 0
|
| 92 |
for day in range(HISTO_DAYS, SIM_DAYS):
|
| 93 |
demand_qty = dc.get_daily_demand(day)
|
| 94 |
base_inv = inventory_manager.inventory
|
| 95 |
inventory_manager.inventory_update(demand_qty)
|
|
|
|
| 96 |
if day < SIM_DAYS - LEAD_TIME:
|
| 97 |
inventory_manager.reorder(day)
|
|
|
|
|
|
|
| 98 |
inventory_manager.process_deliveries(day)
|
| 99 |
fulfilled = min(demand_qty, base_inv)
|
| 100 |
daily_writeoff = inventory_manager.apply_writeoff(day)
|
|
@@ -103,8 +132,24 @@ def run_simulation(agent_name, env_name):
|
|
| 103 |
performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff)
|
| 104 |
daily_inventory.append(inventory_manager.inventory)
|
| 105 |
running_fill_rate.append(total_fulfilled / total_demand if total_demand > 0 else 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
summary = performance_tracker.performance_summary()
|
| 107 |
-
fig = build_chart(daily_inventory, running_fill_rate, [], f"{agent_name} | {env_name}")
|
| 108 |
metrics = (
|
| 109 |
f"**Fill Rate:** {summary['fill_rate']:.2%} \n"
|
| 110 |
f"**Stockouts:** {summary['stock_out_count']} \n"
|
|
@@ -151,7 +196,7 @@ def run_llm_simulation(env_name, hf_token):
|
|
| 151 |
convo_history = []
|
| 152 |
memory_bank = []
|
| 153 |
current_rop = dc.daily_demand_distribution[HISTO_DAYS].demand_mean * LEAD_TIME
|
| 154 |
-
daily_inventory, running_fill_rate, rop_markers = [], [], []
|
| 155 |
total_demand, total_fulfilled = 0, 0
|
| 156 |
decision_log = []
|
| 157 |
|
|
@@ -162,6 +207,7 @@ def run_llm_simulation(env_name, hf_token):
|
|
| 162 |
inventory_manager.inventory_update(demand_qty)
|
| 163 |
|
| 164 |
# Manual reorder using current_rop
|
|
|
|
| 165 |
if day < SIM_DAYS - LEAD_TIME and inventory_manager.inventory <= current_rop:
|
| 166 |
hist = [dc.daily_demand_distribution[d].actual_demand
|
| 167 |
for d in range(max(0, day - 30), day)]
|
|
@@ -169,6 +215,7 @@ def run_llm_simulation(env_name, hf_token):
|
|
| 169 |
qty = max(0, current_rop - inventory_manager.inventory + mean_d * LEAD_TIME)
|
| 170 |
if qty > 0:
|
| 171 |
order_processor.place_order(day, int(qty))
|
|
|
|
| 172 |
|
| 173 |
inventory_manager.process_deliveries(day)
|
| 174 |
fulfilled = min(demand_qty, base_inv)
|
|
@@ -180,6 +227,21 @@ def run_llm_simulation(env_name, hf_token):
|
|
| 180 |
fr = total_fulfilled / total_demand if total_demand > 0 else 0
|
| 181 |
running_fill_rate.append(fr)
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
# LLM decision every DECISION_INTERVAL days
|
| 184 |
if (day - HISTO_DAYS) % DECISION_INTERVAL == 0 and day < SIM_DAYS - LEAD_TIME:
|
| 185 |
hist30 = [dc.daily_demand_distribution[d].actual_demand
|
|
@@ -233,7 +295,7 @@ def run_llm_simulation(env_name, hf_token):
|
|
| 233 |
|
| 234 |
# Yield live update
|
| 235 |
fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
|
| 236 |
-
f"Qwen2.5-72B | {env_name} | Day {day}/{SIM_DAYS}")
|
| 237 |
summary = performance_tracker.performance_summary()
|
| 238 |
metrics = (
|
| 239 |
f"**Fill Rate:** {summary['fill_rate']:.2%} \n"
|
|
@@ -247,7 +309,7 @@ def run_llm_simulation(env_name, hf_token):
|
|
| 247 |
|
| 248 |
# Final yield
|
| 249 |
fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
|
| 250 |
-
f"Qwen2.5-72B | {env_name} | COMPLETE")
|
| 251 |
summary = performance_tracker.performance_summary()
|
| 252 |
metrics = (
|
| 253 |
f"**Fill Rate:** {summary['fill_rate']:.2%} \n"
|
|
|
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
|
| 10 |
+
from config import SIM_DAYS, HISTO_DAYS, LEAD_TIME, UNIT_COST, SELLING_PRICE, FIXED_ORDER_COST, WRITE_OFF_RATE
|
| 11 |
from agent_environment import BaseAgent, SafetyStockAgent, ForecastAgent, MonteCarloAgent
|
| 12 |
from demand_environment import GammaPoisson, GammaGammaHighVariance, SpikingDemand, SingleGammaLowVariance
|
| 13 |
from demand_calculator import DemandCalculator
|
|
|
|
| 45 |
|
| 46 |
# ── Shared chart builder ───────────────────────────────────────────────────────
|
| 47 |
|
| 48 |
+
def build_chart(daily_inventory, running_fill_rate, rop_markers, title, daily_pnl=None):
|
| 49 |
+
n_rows = 3 if daily_pnl else 2
|
| 50 |
+
fig, axes = plt.subplots(n_rows, 1, figsize=(10, 4 + 2.5 * n_rows), sharex=True)
|
| 51 |
+
ax1, ax2 = axes[0], axes[1]
|
| 52 |
days = list(range(len(daily_inventory)))
|
| 53 |
+
|
| 54 |
ax1.plot(days, daily_inventory, color="steelblue", linewidth=0.8)
|
| 55 |
if rop_markers:
|
| 56 |
rop_days, rop_vals = zip(*rop_markers)
|
|
|
|
| 59 |
ax1.legend(fontsize=8)
|
| 60 |
ax1.set_ylabel("Inventory Level")
|
| 61 |
ax1.set_title(title)
|
| 62 |
+
|
| 63 |
ax2.plot(days, running_fill_rate, color="seagreen", linewidth=0.8)
|
| 64 |
ax2.axhline(y=0.95, color="red", linestyle="--", linewidth=0.6, label="95% target")
|
| 65 |
ax2.set_ylabel("Cumulative Fill Rate")
|
|
|
|
| 66 |
ax2.set_ylim(0, 1)
|
| 67 |
ax2.legend(fontsize=8)
|
| 68 |
+
|
| 69 |
+
if daily_pnl:
|
| 70 |
+
ax3 = axes[2]
|
| 71 |
+
revenues = [r["revenue"] for r in daily_pnl]
|
| 72 |
+
holding_costs = [r["holding_cost"] for r in daily_pnl]
|
| 73 |
+
stockout_pens = [r["stockout_penalty"] for r in daily_pnl]
|
| 74 |
+
order_costs = [r["order_cost"] for r in daily_pnl]
|
| 75 |
+
writeoff_costs = [r["writeoff_cost"] for r in daily_pnl]
|
| 76 |
+
net_profits = [r["daily_profit"] for r in daily_pnl]
|
| 77 |
+
|
| 78 |
+
ax3.fill_between(days, revenues, alpha=0.25, color="green", label="Revenue")
|
| 79 |
+
ax3.plot(days, net_profits, color="black", linewidth=0.9, label="Net profit")
|
| 80 |
+
ax3.fill_between(days, [-h for h in holding_costs], alpha=0.3, color="royalblue", label="Holding cost")
|
| 81 |
+
ax3.fill_between(days, [-s for s in stockout_pens], alpha=0.3, color="crimson", label="Stockout penalty")
|
| 82 |
+
ax3.fill_between(days, [-o for o in order_costs], alpha=0.25, color="darkorange", label="Order cost")
|
| 83 |
+
ax3.fill_between(days, [-w for w in writeoff_costs], alpha=0.25, color="purple", label="Write-off cost")
|
| 84 |
+
ax3.axhline(y=0, color="grey", linewidth=0.5)
|
| 85 |
+
ax3.set_ylabel("Daily P&L ($)")
|
| 86 |
+
ax3.set_xlabel("Evaluation Day")
|
| 87 |
+
ax3.legend(fontsize=7, ncol=3)
|
| 88 |
+
else:
|
| 89 |
+
ax2.set_xlabel("Evaluation Day")
|
| 90 |
+
|
| 91 |
plt.tight_layout()
|
| 92 |
return fig
|
| 93 |
|
|
|
|
| 113 |
order_processor = OrderProcessor()
|
| 114 |
performance_tracker = PerformanceTracker()
|
| 115 |
inventory_manager = InventoryManager(order_processor=order_processor, agent=agent)
|
| 116 |
+
daily_inventory, running_fill_rate, daily_pnl = [], [], []
|
| 117 |
total_demand, total_fulfilled = 0, 0
|
| 118 |
for day in range(HISTO_DAYS, SIM_DAYS):
|
| 119 |
demand_qty = dc.get_daily_demand(day)
|
| 120 |
base_inv = inventory_manager.inventory
|
| 121 |
inventory_manager.inventory_update(demand_qty)
|
| 122 |
+
q_before = len(order_processor.order_queue)
|
| 123 |
if day < SIM_DAYS - LEAD_TIME:
|
| 124 |
inventory_manager.reorder(day)
|
| 125 |
+
new_orders = order_processor.order_queue[q_before:]
|
| 126 |
+
ordered_qty = sum(o.quantity for o in new_orders)
|
| 127 |
inventory_manager.process_deliveries(day)
|
| 128 |
fulfilled = min(demand_qty, base_inv)
|
| 129 |
daily_writeoff = inventory_manager.apply_writeoff(day)
|
|
|
|
| 132 |
performance_tracker.daily_performance(demand_qty, int(fulfilled), daily_writeoff)
|
| 133 |
daily_inventory.append(inventory_manager.inventory)
|
| 134 |
running_fill_rate.append(total_fulfilled / total_demand if total_demand > 0 else 0)
|
| 135 |
+
|
| 136 |
+
lost = max(0, demand_qty - fulfilled)
|
| 137 |
+
revenue = fulfilled * SELLING_PRICE
|
| 138 |
+
holding_cost = inventory_manager.inventory * UNIT_COST * 0.005
|
| 139 |
+
stockout_penalty = lost * (SELLING_PRICE - UNIT_COST)
|
| 140 |
+
order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST
|
| 141 |
+
writeoff_cost = daily_writeoff * UNIT_COST
|
| 142 |
+
daily_pnl.append({
|
| 143 |
+
"revenue": revenue,
|
| 144 |
+
"holding_cost": holding_cost,
|
| 145 |
+
"stockout_penalty": stockout_penalty,
|
| 146 |
+
"order_cost": order_cost,
|
| 147 |
+
"writeoff_cost": writeoff_cost,
|
| 148 |
+
"daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost,
|
| 149 |
+
})
|
| 150 |
+
|
| 151 |
summary = performance_tracker.performance_summary()
|
| 152 |
+
fig = build_chart(daily_inventory, running_fill_rate, [], f"{agent_name} | {env_name}", daily_pnl)
|
| 153 |
metrics = (
|
| 154 |
f"**Fill Rate:** {summary['fill_rate']:.2%} \n"
|
| 155 |
f"**Stockouts:** {summary['stock_out_count']} \n"
|
|
|
|
| 196 |
convo_history = []
|
| 197 |
memory_bank = []
|
| 198 |
current_rop = dc.daily_demand_distribution[HISTO_DAYS].demand_mean * LEAD_TIME
|
| 199 |
+
daily_inventory, running_fill_rate, rop_markers, daily_pnl = [], [], [], []
|
| 200 |
total_demand, total_fulfilled = 0, 0
|
| 201 |
decision_log = []
|
| 202 |
|
|
|
|
| 207 |
inventory_manager.inventory_update(demand_qty)
|
| 208 |
|
| 209 |
# Manual reorder using current_rop
|
| 210 |
+
ordered_qty = 0
|
| 211 |
if day < SIM_DAYS - LEAD_TIME and inventory_manager.inventory <= current_rop:
|
| 212 |
hist = [dc.daily_demand_distribution[d].actual_demand
|
| 213 |
for d in range(max(0, day - 30), day)]
|
|
|
|
| 215 |
qty = max(0, current_rop - inventory_manager.inventory + mean_d * LEAD_TIME)
|
| 216 |
if qty > 0:
|
| 217 |
order_processor.place_order(day, int(qty))
|
| 218 |
+
ordered_qty = qty
|
| 219 |
|
| 220 |
inventory_manager.process_deliveries(day)
|
| 221 |
fulfilled = min(demand_qty, base_inv)
|
|
|
|
| 227 |
fr = total_fulfilled / total_demand if total_demand > 0 else 0
|
| 228 |
running_fill_rate.append(fr)
|
| 229 |
|
| 230 |
+
lost = max(0, demand_qty - fulfilled)
|
| 231 |
+
revenue = fulfilled * SELLING_PRICE
|
| 232 |
+
holding_cost = inventory_manager.inventory * UNIT_COST * 0.005
|
| 233 |
+
stockout_penalty = lost * (SELLING_PRICE - UNIT_COST)
|
| 234 |
+
order_cost = (FIXED_ORDER_COST if ordered_qty > 0 else 0.0) + ordered_qty * UNIT_COST
|
| 235 |
+
writeoff_cost = daily_writeoff * UNIT_COST
|
| 236 |
+
daily_pnl.append({
|
| 237 |
+
"revenue": revenue,
|
| 238 |
+
"holding_cost": holding_cost,
|
| 239 |
+
"stockout_penalty": stockout_penalty,
|
| 240 |
+
"order_cost": order_cost,
|
| 241 |
+
"writeoff_cost": writeoff_cost,
|
| 242 |
+
"daily_profit": revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost,
|
| 243 |
+
})
|
| 244 |
+
|
| 245 |
# LLM decision every DECISION_INTERVAL days
|
| 246 |
if (day - HISTO_DAYS) % DECISION_INTERVAL == 0 and day < SIM_DAYS - LEAD_TIME:
|
| 247 |
hist30 = [dc.daily_demand_distribution[d].actual_demand
|
|
|
|
| 295 |
|
| 296 |
# Yield live update
|
| 297 |
fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
|
| 298 |
+
f"Qwen2.5-72B | {env_name} | Day {day}/{SIM_DAYS}", daily_pnl)
|
| 299 |
summary = performance_tracker.performance_summary()
|
| 300 |
metrics = (
|
| 301 |
f"**Fill Rate:** {summary['fill_rate']:.2%} \n"
|
|
|
|
| 309 |
|
| 310 |
# Final yield
|
| 311 |
fig = build_chart(daily_inventory, running_fill_rate, rop_markers,
|
| 312 |
+
f"Qwen2.5-72B | {env_name} | COMPLETE", daily_pnl)
|
| 313 |
summary = performance_tracker.performance_summary()
|
| 314 |
metrics = (
|
| 315 |
f"**Fill Rate:** {summary['fill_rate']:.2%} \n"
|
config.py
CHANGED
|
@@ -8,11 +8,17 @@ N_SIMULATIONS = 100
|
|
| 8 |
MC_SIMS = 1000
|
| 9 |
|
| 10 |
# Replenishment constraints & constants
|
| 11 |
-
WRITE_OFF_RATE = 0.
|
| 12 |
-
WRITE_OFF_FREQUENCY =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Stock constraints
|
| 15 |
LEAD_TIME = 3
|
|
|
|
| 16 |
BASE_STOCK = 0
|
| 17 |
DEFAULT_SERVICE_LEVEL = 0.95
|
| 18 |
|
|
|
|
| 8 |
MC_SIMS = 1000
|
| 9 |
|
| 10 |
# Replenishment constraints & constants
|
| 11 |
+
WRITE_OFF_RATE = 0.00143 # ~0.143% daily spoilage (≈ 1% per week)
|
| 12 |
+
WRITE_OFF_FREQUENCY = 1 # applied every day
|
| 13 |
+
|
| 14 |
+
# Economic parameters
|
| 15 |
+
UNIT_COST = 10.0 # purchase cost per unit
|
| 16 |
+
SELLING_PRICE = 25.0 # revenue per unit sold
|
| 17 |
+
FIXED_ORDER_COST = 150.0 # fixed cost per order placed
|
| 18 |
|
| 19 |
# Stock constraints
|
| 20 |
LEAD_TIME = 3
|
| 21 |
+
LEAD_TIME_JITTER = 1 # ±1 day randomness on lead time
|
| 22 |
BASE_STOCK = 0
|
| 23 |
DEFAULT_SERVICE_LEVEL = 0.95
|
| 24 |
|
order_processor.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from dataclasses import dataclass
|
| 2 |
from typing import List
|
| 3 |
-
|
|
|
|
| 4 |
|
| 5 |
@dataclass
|
| 6 |
class Order:
|
|
@@ -12,7 +13,8 @@ class OrderProcessor:
|
|
| 12 |
self.order_queue: List[Order] = [] # self.order_queue stores Order objects
|
| 13 |
|
| 14 |
def place_order(self, time_period: int, quantity: int):
|
| 15 |
-
|
|
|
|
| 16 |
self.order_queue.append(Order(arrival_day=arrival_day, quantity=quantity))
|
| 17 |
|
| 18 |
def manage_order(self, time_period: int) -> int:
|
|
|
|
| 1 |
from dataclasses import dataclass
|
| 2 |
from typing import List
|
| 3 |
+
import numpy as np
|
| 4 |
+
from config import LEAD_TIME, LEAD_TIME_JITTER
|
| 5 |
|
| 6 |
@dataclass
|
| 7 |
class Order:
|
|
|
|
| 13 |
self.order_queue: List[Order] = [] # self.order_queue stores Order objects
|
| 14 |
|
| 15 |
def place_order(self, time_period: int, quantity: int):
|
| 16 |
+
jitter = np.random.randint(-LEAD_TIME_JITTER, LEAD_TIME_JITTER + 1)
|
| 17 |
+
arrival_day = max(time_period + 1, time_period + LEAD_TIME + jitter)
|
| 18 |
self.order_queue.append(Order(arrival_day=arrival_day, quantity=quantity))
|
| 19 |
|
| 20 |
def manage_order(self, time_period: int) -> int:
|
server/inventory_env.py
CHANGED
|
@@ -11,6 +11,7 @@ from pydantic import BaseModel
|
|
| 11 |
from config import (
|
| 12 |
SIM_DAYS, HISTO_DAYS, LEAD_TIME,
|
| 13 |
WRITE_OFF_RATE, WRITE_OFF_FREQUENCY,
|
|
|
|
| 14 |
)
|
| 15 |
from demand_environment import (
|
| 16 |
GammaPoisson, GammaGammaHighVariance, SpikingDemand, SingleGammaLowVariance,
|
|
@@ -87,6 +88,8 @@ class EpisodeState:
|
|
| 87 |
self.total_fulfilled: float = 0.0
|
| 88 |
self.stockouts: int = 0
|
| 89 |
self.lost_sales: float = 0.0
|
|
|
|
|
|
|
| 90 |
self.initialized: bool = False
|
| 91 |
|
| 92 |
def get_obs(self) -> InventoryObservation:
|
|
@@ -139,6 +142,11 @@ def reset(env_type: int = 0):
|
|
| 139 |
episode.day = HISTO_DAYS
|
| 140 |
episode.initialized = True
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
return episode.get_obs()
|
| 143 |
|
| 144 |
|
|
@@ -162,18 +170,25 @@ def step(action: InventoryAction):
|
|
| 162 |
o for o in episode.order_processor.order_queue if o.arrival_day > day
|
| 163 |
]
|
| 164 |
|
| 165 |
-
# 2.
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
episode.inventory = max(0.0, episode.inventory - demand)
|
| 168 |
-
lost = max(0.0, demand -
|
| 169 |
if lost > 0:
|
| 170 |
episode.stockouts += 1
|
| 171 |
episode.lost_sales += lost
|
| 172 |
episode.total_demand += demand
|
| 173 |
-
episode.total_fulfilled +=
|
| 174 |
|
| 175 |
-
#
|
| 176 |
rop = max(0.0, action.reorder_point)
|
|
|
|
| 177 |
if day < SIM_DAYS - LEAD_TIME and episode.inventory <= rop:
|
| 178 |
hist = episode.demand_series[max(0, day - 30):day]
|
| 179 |
mean_demand = float(np.mean(hist)) if hist else 0.0
|
|
@@ -181,27 +196,43 @@ def step(action: InventoryAction):
|
|
| 181 |
if qty > 0:
|
| 182 |
episode.order_processor.place_order(day, int(qty))
|
| 183 |
|
| 184 |
-
# 4. Weekly write-off
|
| 185 |
-
if day % WRITE_OFF_FREQUENCY == 0:
|
| 186 |
-
writeoff = int(episode.inventory * WRITE_OFF_RATE)
|
| 187 |
-
episode.inventory -= writeoff
|
| 188 |
-
episode.performance_tracker.write_offs += writeoff
|
| 189 |
-
|
| 190 |
# 5. Track performance
|
| 191 |
episode.performance_tracker.daily_performance(
|
| 192 |
demand_quantity=demand,
|
| 193 |
-
fulfilled_demand=int(
|
| 194 |
-
daily_writeoff=0,
|
| 195 |
)
|
| 196 |
|
| 197 |
episode.day += 1
|
| 198 |
done = episode.day >= SIM_DAYS
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
fill_rate = (
|
| 201 |
episode.total_fulfilled / episode.total_demand
|
| 202 |
if episode.total_demand > 0 else 0.0
|
| 203 |
)
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
return StepResult(
|
| 207 |
observation=episode.get_obs(),
|
|
@@ -211,6 +242,9 @@ def step(action: InventoryAction):
|
|
| 211 |
"fill_rate": fill_rate,
|
| 212 |
"stockouts": episode.stockouts,
|
| 213 |
"lost_sales": episode.lost_sales,
|
|
|
|
|
|
|
|
|
|
| 214 |
"reasoning_logged": action.reasoning[:200] if action.reasoning else "",
|
| 215 |
},
|
| 216 |
)
|
|
|
|
| 11 |
from config import (
|
| 12 |
SIM_DAYS, HISTO_DAYS, LEAD_TIME,
|
| 13 |
WRITE_OFF_RATE, WRITE_OFF_FREQUENCY,
|
| 14 |
+
UNIT_COST, SELLING_PRICE, FIXED_ORDER_COST,
|
| 15 |
)
|
| 16 |
from demand_environment import (
|
| 17 |
GammaPoisson, GammaGammaHighVariance, SpikingDemand, SingleGammaLowVariance,
|
|
|
|
| 88 |
self.total_fulfilled: float = 0.0
|
| 89 |
self.stockouts: int = 0
|
| 90 |
self.lost_sales: float = 0.0
|
| 91 |
+
self.cumulative_profit: float = 0.0
|
| 92 |
+
self.baseline_profit: float = 0.0
|
| 93 |
self.initialized: bool = False
|
| 94 |
|
| 95 |
def get_obs(self) -> InventoryObservation:
|
|
|
|
| 142 |
episode.day = HISTO_DAYS
|
| 143 |
episode.initialized = True
|
| 144 |
|
| 145 |
+
# Compute baseline profit: expected daily profit at full service (no stockouts)
|
| 146 |
+
episode_demand = episode.demand_series[HISTO_DAYS:]
|
| 147 |
+
mean_demand = float(np.mean(episode_demand)) if episode_demand else 0.0
|
| 148 |
+
episode.baseline_profit = mean_demand * (SELLING_PRICE - UNIT_COST)
|
| 149 |
+
|
| 150 |
return episode.get_obs()
|
| 151 |
|
| 152 |
|
|
|
|
| 170 |
o for o in episode.order_processor.order_queue if o.arrival_day > day
|
| 171 |
]
|
| 172 |
|
| 173 |
+
# 2. Daily spoilage (0.143% per day)
|
| 174 |
+
spoilage = episode.inventory * WRITE_OFF_RATE
|
| 175 |
+
writeoff_cost = spoilage * UNIT_COST
|
| 176 |
+
episode.inventory = max(0.0, episode.inventory - spoilage)
|
| 177 |
+
episode.performance_tracker.write_offs += spoilage
|
| 178 |
+
|
| 179 |
+
# 3. Fulfill demand
|
| 180 |
+
units_sold = min(demand, episode.inventory)
|
| 181 |
episode.inventory = max(0.0, episode.inventory - demand)
|
| 182 |
+
lost = max(0.0, demand - units_sold)
|
| 183 |
if lost > 0:
|
| 184 |
episode.stockouts += 1
|
| 185 |
episode.lost_sales += lost
|
| 186 |
episode.total_demand += demand
|
| 187 |
+
episode.total_fulfilled += units_sold
|
| 188 |
|
| 189 |
+
# 4. Reorder if inventory at or below ROP
|
| 190 |
rop = max(0.0, action.reorder_point)
|
| 191 |
+
qty = 0
|
| 192 |
if day < SIM_DAYS - LEAD_TIME and episode.inventory <= rop:
|
| 193 |
hist = episode.demand_series[max(0, day - 30):day]
|
| 194 |
mean_demand = float(np.mean(hist)) if hist else 0.0
|
|
|
|
| 196 |
if qty > 0:
|
| 197 |
episode.order_processor.place_order(day, int(qty))
|
| 198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
# 5. Track performance
|
| 200 |
episode.performance_tracker.daily_performance(
|
| 201 |
demand_quantity=demand,
|
| 202 |
+
fulfilled_demand=int(units_sold),
|
| 203 |
+
daily_writeoff=0,
|
| 204 |
)
|
| 205 |
|
| 206 |
episode.day += 1
|
| 207 |
done = episode.day >= SIM_DAYS
|
| 208 |
|
| 209 |
+
# 6. Compute dense daily P&L reward
|
| 210 |
+
revenue = units_sold * SELLING_PRICE
|
| 211 |
+
holding_cost = episode.inventory * UNIT_COST * 0.005
|
| 212 |
+
stockout_penalty = lost * (SELLING_PRICE - UNIT_COST)
|
| 213 |
+
order_cost = (FIXED_ORDER_COST if qty > 0 else 0.0) + qty * UNIT_COST
|
| 214 |
+
|
| 215 |
+
daily_profit = revenue - holding_cost - stockout_penalty - order_cost - writeoff_cost
|
| 216 |
+
episode.cumulative_profit += daily_profit
|
| 217 |
+
|
| 218 |
+
baseline = episode.baseline_profit
|
| 219 |
+
daily_reward = daily_profit / baseline if baseline > 0 else 0.0
|
| 220 |
+
|
| 221 |
+
# 7. Sparse episode bonus at end
|
| 222 |
fill_rate = (
|
| 223 |
episode.total_fulfilled / episode.total_demand
|
| 224 |
if episode.total_demand > 0 else 0.0
|
| 225 |
)
|
| 226 |
+
if done:
|
| 227 |
+
episode_length = SIM_DAYS - HISTO_DAYS
|
| 228 |
+
profit_ratio = (
|
| 229 |
+
episode.cumulative_profit / (baseline * episode_length)
|
| 230 |
+
if baseline > 0 else 0.0
|
| 231 |
+
)
|
| 232 |
+
episode_bonus = fill_rate * 0.5 + profit_ratio * 0.5
|
| 233 |
+
reward = daily_reward + episode_bonus
|
| 234 |
+
else:
|
| 235 |
+
reward = daily_reward
|
| 236 |
|
| 237 |
return StepResult(
|
| 238 |
observation=episode.get_obs(),
|
|
|
|
| 242 |
"fill_rate": fill_rate,
|
| 243 |
"stockouts": episode.stockouts,
|
| 244 |
"lost_sales": episode.lost_sales,
|
| 245 |
+
"inventory_in": delivered,
|
| 246 |
+
"units_sold": units_sold,
|
| 247 |
+
"daily_profit": daily_profit,
|
| 248 |
"reasoning_logged": action.reasoning[:200] if action.reasoning else "",
|
| 249 |
},
|
| 250 |
)
|