Spaces:

ademarteau
/

RL-Inventory-Simulations

Runtime error

RL-Inventory-Simulations / server /inventory_env.py

ademarteau

fix: use correct HF router URL /hf-inference/v1/ (not /models/{id}/v1/)

9a9473a 2 days ago

9.95 kB

	import sys
	import os
	sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))

	from dataclasses import dataclass, asdict
	from typing import List, Optional
	import numpy as np
	import httpx
	from fastapi import FastAPI, HTTPException
	from fastapi.staticfiles import StaticFiles
	from fastapi.responses import FileResponse
	from pydantic import BaseModel

	from config import (
	SIM_DAYS, HISTO_DAYS, LEAD_TIME,
	WRITE_OFF_RATE, WRITE_OFF_FREQUENCY,
	)
	from reward import compute_daily_pnl
	from demand_environment import (
	GammaPoisson, GammaGammaHighVariance, SpikingDemand, SingleGammaLowVariance,
	)
	from demand_calculator import DemandCalculator
	from order_processor import OrderProcessor
	from performance_tracker import PerformanceTracker

	app = FastAPI(title="Inventory Reasoning Environment")

	ENV_TYPES = {
	0: GammaPoisson,
	1: GammaGammaHighVariance,
	2: SpikingDemand,
	3: SingleGammaLowVariance,
	}


	# ── Pydantic models (request/response) ───────────────────────────────────────

	class InventoryAction(BaseModel):
	reorder_point: float
	reasoning: str = ""


	class PendingOrder(BaseModel):
	arrival_day: int
	quantity: int


	class InventoryObservation(BaseModel):
	day: int
	current_inventory: float
	demand_last_5: List[float]
	demand_mean_30d: float
	demand_std_30d: float
	fill_rate_so_far: float
	recent_stockouts: int
	recent_lost_sales: float
	days_remaining: int
	pending_orders: List[PendingOrder]
	demand_last_year_7d: List[float]


	class StepResult(BaseModel):
	observation: InventoryObservation
	reward: float
	done: bool
	info: dict


	class StateResponse(BaseModel):
	day: int
	fill_rate: float
	done: bool
	total_demand: float
	total_fulfilled: float
	stockouts: int
	lost_sales: float


	# ── Episode state (single global episode for simplicity) ─────────────────────

	class EpisodeState:
	def __init__(self):
	self.reset_state()

	def reset_state(self):
	self.day: int = 0
	self.inventory: float = 0.0
	self.demand_series: List[int] = []
	self.order_processor = OrderProcessor()
	self.performance_tracker = PerformanceTracker()
	self.total_demand: float = 0.0
	self.total_fulfilled: float = 0.0
	self.stockouts: int = 0
	self.lost_sales: float = 0.0
	self.initialized: bool = False

	def get_obs(self) -> InventoryObservation:
	hist_start = max(0, self.day - HISTO_DAYS)
	hist = self.demand_series[hist_start:self.day]
	last5 = self.demand_series[max(0, self.day - 5):self.day]
	hist30 = self.demand_series[max(0, self.day - 30):self.day]

	pending = [
	PendingOrder(arrival_day=o.arrival_day, quantity=o.quantity)
	for o in self.order_processor.order_queue[:5]
	]

	ly_anchor = self.day - 365
	ly_start = max(0, ly_anchor - 3)
	ly_end = min(len(self.demand_series), ly_anchor + 4)
	demand_last_year_7d = [float(d) for d in self.demand_series[ly_start:ly_end]]

	return InventoryObservation(
	day=self.day,
	current_inventory=self.inventory,
	demand_last_5=[float(d) for d in last5],
	demand_mean_30d=float(np.mean(hist30)) if hist30 else 0.0,
	demand_std_30d=float(np.std(hist30)) if len(hist30) > 1 else 0.0,
	fill_rate_so_far=(
	self.total_fulfilled / self.total_demand
	if self.total_demand > 0 else 0.0
	),
	recent_stockouts=self.stockouts,
	recent_lost_sales=self.lost_sales,
	days_remaining=SIM_DAYS - self.day,
	pending_orders=pending,
	demand_last_year_7d=demand_last_year_7d,
	)


	episode = EpisodeState()


	# ── Endpoints ─────────────────────────────────────────────────────────────────

	@app.post("/reset", response_model=InventoryObservation)
	def reset(env_type: int = 0):
	if env_type not in ENV_TYPES:
	raise HTTPException(status_code=400, detail=f"env_type must be 0-{len(ENV_TYPES)-1}")

	episode.reset_state()

	env_class = ENV_TYPES[env_type]
	environment = env_class(SIM_DAYS)
	dc = DemandCalculator(SIM_DAYS)
	dc.set_environment(environment)
	episode.demand_series = [dc.get_daily_demand(i) for i in range(SIM_DAYS)]

	# Warm up history (agents use HISTO_DAYS of history before acting)
	episode.day = HISTO_DAYS
	episode.initialized = True

	return episode.get_obs()


	@app.post("/step", response_model=StepResult)
	def step(action: InventoryAction):
	if not episode.initialized:
	raise HTTPException(status_code=400, detail="Call /reset before /step")
	if episode.day >= SIM_DAYS:
	raise HTTPException(status_code=400, detail="Episode already done. Call /reset.")

	day = episode.day
	demand = episode.demand_series[day]

	# 1. Deliver pending orders
	delivered = sum(
	o.quantity for o in episode.order_processor.order_queue
	if o.arrival_day == day
	)
	episode.inventory += delivered
	episode.order_processor.order_queue = [
	o for o in episode.order_processor.order_queue if o.arrival_day > day
	]

	# 2. Daily spoilage (0.143% per day)
	spoilage = episode.inventory * WRITE_OFF_RATE
	episode.inventory = max(0.0, episode.inventory - spoilage)
	episode.performance_tracker.write_offs += spoilage

	# 3. Fulfill demand
	units_sold = min(demand, episode.inventory)
	episode.inventory = max(0.0, episode.inventory - demand)
	lost = max(0.0, demand - units_sold)
	if lost > 0:
	episode.stockouts += 1
	episode.lost_sales += lost
	episode.total_demand += demand
	episode.total_fulfilled += units_sold

	# 4. Reorder if inventory at or below ROP
	rop = max(0.0, action.reorder_point)
	qty = 0
	hist = episode.demand_series[max(0, day - 30):day]
	mean_demand = float(np.mean(hist)) if hist else 0.0
	pipeline = sum(o.quantity for o in episode.order_processor.order_queue)
	inv_position = episode.inventory + pipeline
	if day < SIM_DAYS - LEAD_TIME and inv_position <= rop:
	qty = max(0.0, rop - inv_position + mean_demand * LEAD_TIME)
	if qty > 0:
	episode.order_processor.place_order(day, int(qty))

	# 5. Track performance
	episode.performance_tracker.daily_performance(
	demand_quantity=demand,
	fulfilled_demand=int(units_sold),
	daily_writeoff=0,
	)

	episode.day += 1
	done = episode.day >= SIM_DAYS

	fill_rate = (
	episode.total_fulfilled / episode.total_demand
	if episode.total_demand > 0 else 0.0
	)

	pnl = compute_daily_pnl(
	units_sold=units_sold,
	lost=lost,
	inventory_after=episode.inventory,
	ordered_qty=qty,
	spoilage=spoilage,
	mean_demand=mean_demand,
	)
	reward = pnl["daily_reward"]

	return StepResult(
	observation=episode.get_obs(),
	reward=reward,
	done=done,
	info={
	"fill_rate": fill_rate,
	"stockouts": episode.stockouts,
	"lost_sales": episode.lost_sales,
	"inventory_in": delivered,
	"units_sold": units_sold,
	"daily_profit": pnl["daily_profit"],
	"daily_reward": pnl["daily_reward"],
	"reasoning_logged": action.reasoning[:200] if action.reasoning else "",
	},
	)


	@app.get("/state", response_model=StateResponse)
	def state():
	if not episode.initialized:
	raise HTTPException(status_code=400, detail="Call /reset first")
	fill_rate = (
	episode.total_fulfilled / episode.total_demand
	if episode.total_demand > 0 else 0.0
	)
	return StateResponse(
	day=episode.day,
	fill_rate=fill_rate,
	done=episode.day >= SIM_DAYS,
	total_demand=episode.total_demand,
	total_fulfilled=episode.total_fulfilled,
	stockouts=episode.stockouts,
	lost_sales=episode.lost_sales,
	)


	# ── HF Inference API proxy (avoids browser CSP restrictions on HF Spaces) ────

	class QwenRequest(BaseModel):
	model: str
	messages: list
	max_tokens: int = 600
	temperature: float = 0.7
	hf_token: str = ""

	@app.post("/api/qwen", include_in_schema=False)
	async def qwen_proxy(req: QwenRequest):
	token = req.hf_token or os.environ.get("HF_TOKEN", "")
	headers = {"Content-Type": "application/json"}
	if token:
	headers["Authorization"] = f"Bearer {token}"
	url = "https://router.huggingface.co/hf-inference/v1/chat/completions"
	payload = {"model": req.model, "messages": req.messages, "max_tokens": req.max_tokens, "temperature": req.temperature}
	async with httpx.AsyncClient(timeout=60.0) as client:
	resp = await client.post(url, json=payload, headers=headers)
	if resp.status_code != 200:
	raise HTTPException(status_code=resp.status_code, detail=resp.text)
	return resp.json()


	# ── Serve React frontend (static files built by Dockerfile) ──────────────────
	_static_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "static")
	if os.path.isdir(_static_dir):
	app.mount("/assets", StaticFiles(directory=os.path.join(_static_dir, "assets")), name="assets")

	@app.get("/", include_in_schema=False)
	@app.get("/{full_path:path}", include_in_schema=False)
	async def serve_spa(full_path: str = ""):
	# API routes are handled above; everything else serves the React app
	index = os.path.join(_static_dir, "index.html")
	return FileResponse(index, headers={"Cache-Control": "no-store, no-cache, must-revalidate"})