smart-delivery-env / inference.py
RaviGohelAI
fix: scores strictly 0.05-0.95 never 0.0 or 1.0
23c53d1
import os, requests
from openai import OpenAI
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN is None: raise ValueError("HF_TOKEN environment variable is required")
SPACE_URL = os.getenv("SPACE_URL", "https://ravigohelai-smart-delivery-env.hf.space")
client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
def get_llm_action(observation: dict) -> str:
prompt = f"""You are a delivery agent. Choose the best action.
Situation: otp_attempts={observation.get("otp_attempts",0):.2f}, recipient_available={observation.get("recipient_available",0):.2f}, alternate_available={observation.get("alternate_available",0):.2f}, package_value={observation.get("package_value",0):.2f}, package_fragile={observation.get("package_fragile",0):.2f}, time_remaining={observation.get("time_remaining",0):.2f}, weather_risk={observation.get("weather_risk",0):.2f}, location_safety={observation.get("location_safety",0):.2f}, locker_nearby={observation.get("locker_nearby",0):.2f}
Valid actions: retry, safe_drop, locker_drop, neighbor_handoff, contact_support, return
Reply with ONLY the action name."""
try:
response = client.chat.completions.create(model=MODEL_NAME,
messages=[{"role":"user","content":prompt}])
action = response.choices[0].message.content.strip().lower()
valid = ["retry","safe_drop","locker_drop","neighbor_handoff","contact_support","return"]
return action if action in valid else "retry"
except:
obs = observation
if obs.get("locker_nearby",0)>0.5: return "locker_drop"
elif obs.get("alternate_available",0)>0.5: return "neighbor_handoff"
elif obs.get("otp_attempts",0)<0.7: return "retry"
else: return "contact_support"
def run_inference(task_id: str = "task_1") -> dict:
print(f"[START] task={task_id}", flush=True)
r = requests.post(f"{SPACE_URL}/reset", params={"task_id":task_id}, timeout=30)
obs = r.json(); step = 0
while not obs.get("done", False) and step < 10:
step += 1
action = get_llm_action(obs)
reward = float(obs.get("reward") or 0.0)
print(f"[STEP] step={step} action={action} reward={reward:.3f}", flush=True)
r = requests.post(f"{SPACE_URL}/step",
json={"action_type":action,"task_id":task_id}, timeout=30)
obs = r.json()
r = requests.post(f"{SPACE_URL}/grader", params={"task_id":task_id}, timeout=30)
grader = r.json()
score = float(grader.get("score", 0.5))
outcome = grader.get("outcome", "unknown")
print(f"[END] task={task_id} score={score:.3f} outcome={outcome} steps={step}", flush=True)
return {"task_id":task_id,"score":score,"outcome":outcome,"steps":step}
def main():
results = {}
for task_id in ["task_1","task_2","task_3"]:
result = run_inference(task_id)
results[task_id] = result["score"]
avg = sum(results.values())/len(results)
print(f"[SUMMARY] scores={results} average={avg:.3f}", flush=True)
return results
if __name__ == "__main__": main()