vikash-nuvai
fix: clamp inference score between 0.0001 and 0.9999 to pass strictly bounded validation
6a5b308 | #!/usr/bin/env python3 | |
| # Copyright (c) 2026 CtrlAltWin Team | |
| """ | |
| Tiffin Packer — OpenEnv Inference Script. | |
| Runs an LLM agent against the tiffin packing environment using the | |
| OpenAI Client API with environment variables: | |
| API_BASE_URL — The API endpoint for the LLM | |
| MODEL_NAME — The model identifier for inference | |
| HF_TOKEN — Hugging Face / API key | |
| Usage: | |
| API_BASE_URL=https://api.openai.com/v1 \ | |
| MODEL_NAME=gpt-4o \ | |
| HF_TOKEN=your-key \ | |
| python inference.py | |
| """ | |
| import json | |
| import os | |
| import sys | |
| import time | |
| import traceback | |
| import requests | |
| from openai import OpenAI | |
| # --------------------------------------------------------------------------- | |
| # Required environment variables | |
| # --------------------------------------------------------------------------- | |
| API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1") | |
| MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o") | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860") | |
| if not HF_TOKEN: | |
| print("WARNING: HF_TOKEN not set. LLM calls will fail.", flush=True) | |
| client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN) | |
| # --------------------------------------------------------------------------- | |
| # System prompt | |
| # --------------------------------------------------------------------------- | |
| SYSTEM_PROMPT = """You are a tiffin packing assistant that controls a robotic arm. | |
| Your goal: pack Indian meal items into the correct tiffin containers. | |
| COMMANDS — respond with ONLY a JSON object, no other text: | |
| {"command": "observe"} — See the full scene | |
| {"command": "identify", "target_id": N} — Classify food item N using VLM | |
| {"command": "pick", "target_id": N} — Pick up food item N | |
| {"command": "place", "target_id": N} — Place held item into container N | |
| {"command": "pour", "target_id": N} — Pour held liquid into container N | |
| PACKING RULES: | |
| 1. ALWAYS identify items before packing (you cannot see food properties otherwise) | |
| 2. Liquids (sambar, dal, rasam, curry) → sealed containers only | |
| 3. Solids (rice, chapati, idli) → any container type | |
| 4. Semi-solids (curd, pickle, chutney) → sealed containers preferred | |
| 5. FRAGILE items (papad=0.9, chapati=0.7) → don't crush under heavy items | |
| 6. HOT and COLD food must NOT share a container | |
| 7. Don't overflow containers — check volume math! | |
| 8. Strong-flavor items (pickle, chutney) should be isolated | |
| STRATEGY: | |
| 1. First: observe the scene | |
| 2. Then: identify ALL food items (one by one) | |
| 3. Then: plan which food goes where based on constraints | |
| 4. Finally: pick and place/pour each item | |
| Respond with ONLY valid JSON. No explanation, no markdown, no extra text.""" | |
| def parse_action(text: str) -> dict: | |
| """Parse LLM output into an action dict.""" | |
| text = text.strip() | |
| # Try to extract JSON from the text | |
| if text.startswith("```"): | |
| # Handle markdown code blocks | |
| lines = text.split("\n") | |
| json_lines = [l for l in lines if not l.startswith("```")] | |
| text = "\n".join(json_lines).strip() | |
| # Try direct JSON parse | |
| try: | |
| action = json.loads(text) | |
| if "command" in action: | |
| return action | |
| except json.JSONDecodeError: | |
| pass | |
| # Try to find JSON in the text | |
| for i in range(len(text)): | |
| if text[i] == "{": | |
| for j in range(len(text) - 1, i, -1): | |
| if text[j] == "}": | |
| try: | |
| action = json.loads(text[i : j + 1]) | |
| if "command" in action: | |
| return action | |
| except json.JSONDecodeError: | |
| continue | |
| # Fallback | |
| print(f" [WARN] Could not parse action: {text[:100]}", flush=True) | |
| return {"command": "observe"} | |
| def run_episode(task_id: str) -> dict: | |
| """Run one episode of the tiffin packing task.""" | |
| # Emit [START] structured output for the validator | |
| print(f"[START] task={task_id}", flush=True) | |
| step = 0 | |
| try: | |
| print(f"\n{'='*60}", flush=True) | |
| print(f" TASK: {task_id.upper()}", flush=True) | |
| print(f"{'='*60}", flush=True) | |
| # Reset the environment | |
| try: | |
| resp = requests.post( | |
| f"{ENV_URL}/reset", | |
| json={"task_id": task_id, "seed": 42}, | |
| timeout=30, | |
| ) | |
| resp.raise_for_status() | |
| result = resp.json() | |
| obs = result.get("observation", result) | |
| except Exception as e: | |
| print(f" ERROR: Failed to reset environment: {e}", flush=True) | |
| print(f"[END] task={task_id} score=0.0001 steps=0", flush=True) | |
| return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0001, "steps": 0, "error": str(e)} | |
| # Initialize conversation | |
| init_scene = obs.get("scene_description", "") | |
| init_feedback = obs.get("step_feedback", "") | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| { | |
| "role": "user", | |
| "content": ( | |
| f"Task: {task_id}\n\n" | |
| f"{init_feedback}\n\n" | |
| f"Scene:\n{init_scene}\n\n" | |
| f"Available commands: {obs.get('available_commands', [])}\n\n" | |
| f"What is your first action? Respond with JSON only." | |
| ), | |
| }, | |
| ] | |
| total_reward = 0.0 | |
| max_steps = 35 # safety limit | |
| while not obs.get("done", False) and step < max_steps: | |
| step += 1 | |
| # Get LLM decision | |
| try: | |
| response = client.chat.completions.create( | |
| model=MODEL_NAME, | |
| messages=messages, | |
| temperature=0.0, | |
| max_tokens=200, | |
| ) | |
| action_text = response.choices[0].message.content.strip() | |
| except Exception as e: | |
| print(f" [Step {step}] LLM error: {e}", flush=True) | |
| action_text = '{"command": "observe"}' | |
| action = parse_action(action_text) | |
| print(f" [Step {step}] Action: {json.dumps(action)}", flush=True) | |
| # Execute step | |
| try: | |
| resp = requests.post( | |
| f"{ENV_URL}/step", | |
| json={"action": action}, | |
| timeout=30, | |
| ) | |
| resp.raise_for_status() | |
| result = resp.json() | |
| obs = result.get("observation", result) | |
| reward = result.get("reward", obs.get("reward", 0.0)) | |
| total_reward += reward or 0 | |
| # Emit [STEP] structured output for the validator | |
| print(f"[STEP] step={step} reward={reward}", flush=True) | |
| except Exception as e: | |
| print(f" [Step {step}] Step error: {e}", flush=True) | |
| break | |
| # Print feedback | |
| feedback = obs.get("step_feedback", "")[:200] | |
| print(f" Reward: {reward:+.2f} | Feedback: {feedback}", flush=True) | |
| # Update conversation with assistant response and new observation | |
| messages.append({"role": "assistant", "content": action_text}) | |
| # Build concise next observation for LLM | |
| held = obs.get("held_item") | |
| held_str = ( | |
| f"Holding: {held.get('name', 'unknown')}" if held else "Arm: idle" | |
| ) | |
| items_status = [ | |
| f"[{i['id']}] {i.get('name', '?')} ({i['status']})" | |
| for i in obs.get("food_items", []) | |
| ] | |
| containers_status = [ | |
| f"[{c['id']}] {c['name']} {c.get('fill_percentage',0):.0f}% full" | |
| for c in obs.get("containers", []) | |
| ] | |
| messages.append( | |
| { | |
| "role": "user", | |
| "content": ( | |
| f"Step {step} result (reward={reward:+.2f}):\n" | |
| f"Feedback: {obs.get('step_feedback', '')}\n\n" | |
| f"{held_str}\n" | |
| f"Items: {', '.join(items_status)}\n" | |
| f"Containers: {', '.join(containers_status)}\n" | |
| f"Available: {obs.get('available_commands', [])}\n\n" | |
| f"{'VLM Result: ' + json.dumps(obs.get('vlm_result')) if obs.get('vlm_result') else ''}\n\n" | |
| f"Next action? JSON only." | |
| ), | |
| }, | |
| ) | |
| # Extract final score | |
| final_score = obs.get("metadata", {}).get("final_score", 0.0) | |
| # Ensure score is strictly between 0 and 1 (exclusive) for the validator | |
| final_score = max(0.0001, min(0.9999, float(final_score))) | |
| grade_breakdown = obs.get("metadata", {}).get("grade_breakdown", {}) | |
| print(f"\n {'─'*40}", flush=True) | |
| print(f" Steps taken: {step}", flush=True) | |
| print(f" Total reward: {total_reward:+.2f}", flush=True) | |
| print(f" Final score: {final_score:.4f}", flush=True) | |
| if grade_breakdown: | |
| print(f" Breakdown:", flush=True) | |
| print(f" Validity: {grade_breakdown.get('validity', 0):.4f} (x0.4)", flush=True) | |
| print(f" Efficiency: {grade_breakdown.get('efficiency', 0):.4f} (x0.3)", flush=True) | |
| print(f" Constraints: {grade_breakdown.get('constraints', 0):.4f} (x0.2)", flush=True) | |
| print(f" Neatness: {grade_breakdown.get('neatness', 0):.4f} (x0.1)", flush=True) | |
| # Emit [END] structured output for the validator | |
| print(f"[END] task={task_id} score={final_score} steps={step}", flush=True) | |
| return { | |
| "task_id": task_id, | |
| "steps": step, | |
| "total_reward": round(total_reward, 4), | |
| "score": final_score, | |
| "grade_breakdown": grade_breakdown, | |
| } | |
| except Exception as e: | |
| # Catch-all: ensure [END] is ALWAYS emitted even on unexpected errors | |
| print(f" FATAL ERROR in episode {task_id}: {e}", flush=True) | |
| traceback.print_exc() | |
| print(f"[END] task={task_id} score=0.0001 steps={step}", flush=True) | |
| return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0001, "steps": step, "error": str(e)} | |
| def main(): | |
| """Run all 3 tasks and report results.""" | |
| print("=" * 60, flush=True) | |
| print(" TIFFIN PACKER — INFERENCE SCRIPT", flush=True) | |
| print(f" Model: {MODEL_NAME}", flush=True) | |
| print(f" API: {API_BASE_URL}", flush=True) | |
| print(f" Env: {ENV_URL}", flush=True) | |
| print("=" * 60, flush=True) | |
| start_time = time.time() | |
| results = {} | |
| for task_id in ["easy", "medium", "hard"]: | |
| result = run_episode(task_id) | |
| results[task_id] = result | |
| elapsed = time.time() - start_time | |
| # Summary | |
| print("\n" + "=" * 60, flush=True) | |
| print(" FINAL RESULTS", flush=True) | |
| print("=" * 60, flush=True) | |
| for task_id, r in results.items(): | |
| print(f" {task_id:8s}: score={r['score']:.4f} reward={r['total_reward']:+.2f} steps={r.get('steps', '?')}", flush=True) | |
| avg_score = sum(r["score"] for r in results.values()) / max(len(results), 1) | |
| print(f"\n Average score: {avg_score:.4f}", flush=True) | |
| print(f" Total time: {elapsed:.1f}s", flush=True) | |
| # Save results | |
| os.makedirs("outputs/evals", exist_ok=True) | |
| with open("outputs/evals/results.json", "w") as f: | |
| json.dump( | |
| { | |
| "model": MODEL_NAME, | |
| "api_base_url": API_BASE_URL, | |
| "results": results, | |
| "average_score": avg_score, | |
| "elapsed_seconds": round(elapsed, 1), | |
| }, | |
| f, | |
| indent=2, | |
| ) | |
| print(f"\n Results saved to outputs/evals/results.json", flush=True) | |
| if __name__ == "__main__": | |
| main() | |