import os, requests from openai import OpenAI API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1") MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini") HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN is None: raise ValueError("HF_TOKEN environment variable is required") SPACE_URL = os.getenv("SPACE_URL", "https://ravigohelai-smart-delivery-env.hf.space") client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN) def get_llm_action(observation: dict) -> str: prompt = f"""You are a delivery agent. Choose the best action. Situation: otp_attempts={observation.get("otp_attempts",0):.2f}, recipient_available={observation.get("recipient_available",0):.2f}, alternate_available={observation.get("alternate_available",0):.2f}, package_value={observation.get("package_value",0):.2f}, package_fragile={observation.get("package_fragile",0):.2f}, time_remaining={observation.get("time_remaining",0):.2f}, weather_risk={observation.get("weather_risk",0):.2f}, location_safety={observation.get("location_safety",0):.2f}, locker_nearby={observation.get("locker_nearby",0):.2f} Valid actions: retry, safe_drop, locker_drop, neighbor_handoff, contact_support, return Reply with ONLY the action name.""" try: response = client.chat.completions.create(model=MODEL_NAME, messages=[{"role":"user","content":prompt}]) action = response.choices[0].message.content.strip().lower() valid = ["retry","safe_drop","locker_drop","neighbor_handoff","contact_support","return"] return action if action in valid else "retry" except: obs = observation if obs.get("locker_nearby",0)>0.5: return "locker_drop" elif obs.get("alternate_available",0)>0.5: return "neighbor_handoff" elif obs.get("otp_attempts",0)<0.7: return "retry" else: return "contact_support" def run_inference(task_id: str = "task_1") -> dict: print(f"[START] task={task_id}", flush=True) r = requests.post(f"{SPACE_URL}/reset", params={"task_id":task_id}, timeout=30) obs = r.json(); step = 0 while not obs.get("done", False) and step < 10: step += 1 action = get_llm_action(obs) reward = float(obs.get("reward") or 0.0) print(f"[STEP] step={step} action={action} reward={reward:.3f}", flush=True) r = requests.post(f"{SPACE_URL}/step", json={"action_type":action,"task_id":task_id}, timeout=30) obs = r.json() r = requests.post(f"{SPACE_URL}/grader", params={"task_id":task_id}, timeout=30) grader = r.json() score = float(grader.get("score", 0.5)) outcome = grader.get("outcome", "unknown") print(f"[END] task={task_id} score={score:.3f} outcome={outcome} steps={step}", flush=True) return {"task_id":task_id,"score":score,"outcome":outcome,"steps":step} def main(): results = {} for task_id in ["task_1","task_2","task_3"]: result = run_inference(task_id) results[task_id] = result["score"] avg = sum(results.values())/len(results) print(f"[SUMMARY] scores={results} average={avg:.3f}", flush=True) return results if __name__ == "__main__": main()