import sys, os, uvicorn sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from fastapi import FastAPI from fastapi.responses import RedirectResponse from pydantic import BaseModel from typing import Optional from models import DeliveryObservation, DeliveryAction from environment import DeliveryDecisionEnvironment, TASKS app = FastAPI(title="Smart Delivery RL Environment", version="1.0.0") envs = {t: DeliveryDecisionEnvironment(task_id=t) for t in TASKS} for t in TASKS: envs[t].reset() class StepRequest(BaseModel): action_type: str task_id: Optional[str] = "task_1" @app.get("/", include_in_schema=False) def root(): return RedirectResponse(url="/docs") @app.get("/health") def health(): return {"status":"healthy","environment":"smart-delivery-env","version":"1.0.0","tasks":list(TASKS.keys())} @app.get("/tasks") def get_tasks(): return {"tasks":[{"task_id":k,"name":v["name"],"difficulty":v["difficulty"], "description":v["description"],"action_schema":{"action_type":"string", "valid_values":["retry","safe_drop","locker_drop","neighbor_handoff","contact_support","return"]} } for k,v in TASKS.items()]} @app.post("/reset") def reset(task_id: str = "task_1"): if task_id not in envs: return {"error":"Unknown task."} return envs[task_id].reset().dict() @app.post("/step") def step(req: StepRequest): task_id = req.task_id or "task_1" if task_id not in envs: return {"error":"Unknown task_id"} return envs[task_id].step(req.action_type).dict() @app.get("/state") def state(task_id: str = "task_1"): if task_id not in envs: return {"error":"Unknown task_id"} return envs[task_id].state.dict() @app.post("/grader") def grader(task_id: str = "task_1"): if task_id not in envs: return {"error":"Unknown task_id"} score = envs[task_id].grade() return {"task_id":task_id,"score":score, "outcome":envs[task_id].state.outcome, "steps":envs[task_id].state.step_count} @app.post("/baseline") def baseline(task_id: str = "task_1"): if task_id not in TASKS: return {"error":"Unknown task_id"} env = DeliveryDecisionEnvironment(task_id=task_id) obs = env.reset(); traj=[] while not obs.done and env.state.step_count<10: if obs.locker_nearby>0.5: act="locker_drop" elif obs.alternate_available>0.5: act="neighbor_handoff" elif (obs.location_safety>0.5 and obs.weather_risk<0.6 and obs.package_fragile<0.5): act="safe_drop" elif obs.otp_attempts<0.7: act="retry" elif obs.recipient_available<0.5: act="contact_support" else: act="return" traj.append({"action":act,"message":obs.message}) obs=env.step(act) return {"task_id":task_id,"trajectory":traj,"final_score":env.grade(), "outcome":env.state.outcome,"total_steps":env.state.step_count} def main(): uvicorn.run(app, host="0.0.0.0", port=7860) if __name__ == "__main__": main()