import sys import os sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) from dataclasses import dataclass, asdict from typing import List, Optional import numpy as np import httpx from fastapi import FastAPI, HTTPException from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse from pydantic import BaseModel from config import ( SIM_DAYS, HISTO_DAYS, LEAD_TIME, WRITE_OFF_RATE, WRITE_OFF_FREQUENCY, ) from reward import compute_daily_pnl from demand_environment import ( GammaPoisson, GammaGammaHighVariance, SpikingDemand, SingleGammaLowVariance, ) from demand_calculator import DemandCalculator from order_processor import OrderProcessor from performance_tracker import PerformanceTracker app = FastAPI(title="Inventory Reasoning Environment") ENV_TYPES = { 0: GammaPoisson, 1: GammaGammaHighVariance, 2: SpikingDemand, 3: SingleGammaLowVariance, } # ── Pydantic models (request/response) ─────────────────────────────────────── class InventoryAction(BaseModel): reorder_point: float reasoning: str = "" class PendingOrder(BaseModel): arrival_day: int quantity: int class InventoryObservation(BaseModel): day: int current_inventory: float demand_last_5: List[float] demand_mean_30d: float demand_std_30d: float fill_rate_so_far: float recent_stockouts: int recent_lost_sales: float days_remaining: int pending_orders: List[PendingOrder] demand_last_year_7d: List[float] class StepResult(BaseModel): observation: InventoryObservation reward: float done: bool info: dict class StateResponse(BaseModel): day: int fill_rate: float done: bool total_demand: float total_fulfilled: float stockouts: int lost_sales: float # ── Episode state (single global episode for simplicity) ───────────────────── class EpisodeState: def __init__(self): self.reset_state() def reset_state(self): self.day: int = 0 self.inventory: float = 0.0 self.demand_series: List[int] = [] self.order_processor = OrderProcessor() self.performance_tracker = PerformanceTracker() self.total_demand: float = 0.0 self.total_fulfilled: float = 0.0 self.stockouts: int = 0 self.lost_sales: float = 0.0 self.initialized: bool = False def get_obs(self) -> InventoryObservation: hist_start = max(0, self.day - HISTO_DAYS) hist = self.demand_series[hist_start:self.day] last5 = self.demand_series[max(0, self.day - 5):self.day] hist30 = self.demand_series[max(0, self.day - 30):self.day] pending = [ PendingOrder(arrival_day=o.arrival_day, quantity=o.quantity) for o in self.order_processor.order_queue[:5] ] ly_anchor = self.day - 365 ly_start = max(0, ly_anchor - 3) ly_end = min(len(self.demand_series), ly_anchor + 4) demand_last_year_7d = [float(d) for d in self.demand_series[ly_start:ly_end]] return InventoryObservation( day=self.day, current_inventory=self.inventory, demand_last_5=[float(d) for d in last5], demand_mean_30d=float(np.mean(hist30)) if hist30 else 0.0, demand_std_30d=float(np.std(hist30)) if len(hist30) > 1 else 0.0, fill_rate_so_far=( self.total_fulfilled / self.total_demand if self.total_demand > 0 else 0.0 ), recent_stockouts=self.stockouts, recent_lost_sales=self.lost_sales, days_remaining=SIM_DAYS - self.day, pending_orders=pending, demand_last_year_7d=demand_last_year_7d, ) episode = EpisodeState() # ── Endpoints ───────────────────────────────────────────────────────────────── @app.post("/reset", response_model=InventoryObservation) def reset(env_type: int = 0): if env_type not in ENV_TYPES: raise HTTPException(status_code=400, detail=f"env_type must be 0-{len(ENV_TYPES)-1}") episode.reset_state() env_class = ENV_TYPES[env_type] environment = env_class(SIM_DAYS) dc = DemandCalculator(SIM_DAYS) dc.set_environment(environment) episode.demand_series = [dc.get_daily_demand(i) for i in range(SIM_DAYS)] # Warm up history (agents use HISTO_DAYS of history before acting) episode.day = HISTO_DAYS episode.initialized = True return episode.get_obs() @app.post("/step", response_model=StepResult) def step(action: InventoryAction): if not episode.initialized: raise HTTPException(status_code=400, detail="Call /reset before /step") if episode.day >= SIM_DAYS: raise HTTPException(status_code=400, detail="Episode already done. Call /reset.") day = episode.day demand = episode.demand_series[day] # 1. Deliver pending orders delivered = sum( o.quantity for o in episode.order_processor.order_queue if o.arrival_day == day ) episode.inventory += delivered episode.order_processor.order_queue = [ o for o in episode.order_processor.order_queue if o.arrival_day > day ] # 2. Daily spoilage (0.143% per day) spoilage = episode.inventory * WRITE_OFF_RATE episode.inventory = max(0.0, episode.inventory - spoilage) episode.performance_tracker.write_offs += spoilage # 3. Fulfill demand units_sold = min(demand, episode.inventory) episode.inventory = max(0.0, episode.inventory - demand) lost = max(0.0, demand - units_sold) if lost > 0: episode.stockouts += 1 episode.lost_sales += lost episode.total_demand += demand episode.total_fulfilled += units_sold # 4. Reorder if inventory at or below ROP rop = max(0.0, action.reorder_point) qty = 0 hist = episode.demand_series[max(0, day - 30):day] mean_demand = float(np.mean(hist)) if hist else 0.0 pipeline = sum(o.quantity for o in episode.order_processor.order_queue) inv_position = episode.inventory + pipeline if day < SIM_DAYS - LEAD_TIME and inv_position <= rop: qty = max(0.0, rop - inv_position + mean_demand * LEAD_TIME) if qty > 0: episode.order_processor.place_order(day, int(qty)) # 5. Track performance episode.performance_tracker.daily_performance( demand_quantity=demand, fulfilled_demand=int(units_sold), daily_writeoff=0, ) episode.day += 1 done = episode.day >= SIM_DAYS fill_rate = ( episode.total_fulfilled / episode.total_demand if episode.total_demand > 0 else 0.0 ) pnl = compute_daily_pnl( units_sold=units_sold, lost=lost, inventory_after=episode.inventory, ordered_qty=qty, spoilage=spoilage, mean_demand=mean_demand, ) reward = pnl["daily_reward"] return StepResult( observation=episode.get_obs(), reward=reward, done=done, info={ "fill_rate": fill_rate, "stockouts": episode.stockouts, "lost_sales": episode.lost_sales, "inventory_in": delivered, "units_sold": units_sold, "daily_profit": pnl["daily_profit"], "daily_reward": pnl["daily_reward"], "reasoning_logged": action.reasoning[:200] if action.reasoning else "", }, ) @app.get("/state", response_model=StateResponse) def state(): if not episode.initialized: raise HTTPException(status_code=400, detail="Call /reset first") fill_rate = ( episode.total_fulfilled / episode.total_demand if episode.total_demand > 0 else 0.0 ) return StateResponse( day=episode.day, fill_rate=fill_rate, done=episode.day >= SIM_DAYS, total_demand=episode.total_demand, total_fulfilled=episode.total_fulfilled, stockouts=episode.stockouts, lost_sales=episode.lost_sales, ) # ── HF Inference API proxy (avoids browser CSP restrictions on HF Spaces) ──── class QwenRequest(BaseModel): model: str messages: list max_tokens: int = 600 temperature: float = 0.7 hf_token: str = "" @app.post("/api/qwen", include_in_schema=False) async def qwen_proxy(req: QwenRequest): token = req.hf_token or os.environ.get("HF_TOKEN", "") headers = {"Content-Type": "application/json"} if token: headers["Authorization"] = f"Bearer {token}" url = "https://router.huggingface.co/hf-inference/v1/chat/completions" payload = {"model": req.model, "messages": req.messages, "max_tokens": req.max_tokens, "temperature": req.temperature} async with httpx.AsyncClient(timeout=60.0) as client: resp = await client.post(url, json=payload, headers=headers) if resp.status_code != 200: raise HTTPException(status_code=resp.status_code, detail=resp.text) return resp.json() # ── Serve React frontend (static files built by Dockerfile) ────────────────── _static_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "static") if os.path.isdir(_static_dir): app.mount("/assets", StaticFiles(directory=os.path.join(_static_dir, "assets")), name="assets") @app.get("/", include_in_schema=False) @app.get("/{full_path:path}", include_in_schema=False) async def serve_spa(full_path: str = ""): # API routes are handled above; everything else serves the React app index = os.path.join(_static_dir, "index.html") return FileResponse(index, headers={"Cache-Control": "no-store, no-cache, must-revalidate"})