#!/usr/bin/env python3 # Copyright (c) 2026 CtrlAltWin Team """ Tiffin Packer — OpenEnv Inference Script. Runs an LLM agent against the tiffin packing environment using the OpenAI Client API with environment variables: API_BASE_URL — The API endpoint for the LLM MODEL_NAME — The model identifier for inference HF_TOKEN — Hugging Face / API key Usage: API_BASE_URL=https://api.openai.com/v1 \ MODEL_NAME=gpt-4o \ HF_TOKEN=your-key \ python inference.py """ import json import os import sys import time import traceback import requests from openai import OpenAI # --------------------------------------------------------------------------- # Required environment variables # --------------------------------------------------------------------------- API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1") MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o") HF_TOKEN = os.environ.get("HF_TOKEN", "") ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860") if not HF_TOKEN: print("WARNING: HF_TOKEN not set. LLM calls will fail.", flush=True) client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN) # --------------------------------------------------------------------------- # System prompt # --------------------------------------------------------------------------- SYSTEM_PROMPT = """You are a tiffin packing assistant that controls a robotic arm. Your goal: pack Indian meal items into the correct tiffin containers. COMMANDS — respond with ONLY a JSON object, no other text: {"command": "observe"} — See the full scene {"command": "identify", "target_id": N} — Classify food item N using VLM {"command": "pick", "target_id": N} — Pick up food item N {"command": "place", "target_id": N} — Place held item into container N {"command": "pour", "target_id": N} — Pour held liquid into container N PACKING RULES: 1. ALWAYS identify items before packing (you cannot see food properties otherwise) 2. Liquids (sambar, dal, rasam, curry) → sealed containers only 3. Solids (rice, chapati, idli) → any container type 4. Semi-solids (curd, pickle, chutney) → sealed containers preferred 5. FRAGILE items (papad=0.9, chapati=0.7) → don't crush under heavy items 6. HOT and COLD food must NOT share a container 7. Don't overflow containers — check volume math! 8. Strong-flavor items (pickle, chutney) should be isolated STRATEGY: 1. First: observe the scene 2. Then: identify ALL food items (one by one) 3. Then: plan which food goes where based on constraints 4. Finally: pick and place/pour each item Respond with ONLY valid JSON. No explanation, no markdown, no extra text.""" def parse_action(text: str) -> dict: """Parse LLM output into an action dict.""" text = text.strip() # Try to extract JSON from the text if text.startswith("```"): # Handle markdown code blocks lines = text.split("\n") json_lines = [l for l in lines if not l.startswith("```")] text = "\n".join(json_lines).strip() # Try direct JSON parse try: action = json.loads(text) if "command" in action: return action except json.JSONDecodeError: pass # Try to find JSON in the text for i in range(len(text)): if text[i] == "{": for j in range(len(text) - 1, i, -1): if text[j] == "}": try: action = json.loads(text[i : j + 1]) if "command" in action: return action except json.JSONDecodeError: continue # Fallback print(f" [WARN] Could not parse action: {text[:100]}", flush=True) return {"command": "observe"} def run_episode(task_id: str) -> dict: """Run one episode of the tiffin packing task.""" # Emit [START] structured output for the validator print(f"[START] task={task_id}", flush=True) step = 0 try: print(f"\n{'='*60}", flush=True) print(f" TASK: {task_id.upper()}", flush=True) print(f"{'='*60}", flush=True) # Reset the environment try: resp = requests.post( f"{ENV_URL}/reset", json={"task_id": task_id, "seed": 42}, timeout=30, ) resp.raise_for_status() result = resp.json() obs = result.get("observation", result) except Exception as e: print(f" ERROR: Failed to reset environment: {e}", flush=True) print(f"[END] task={task_id} score=0.0001 steps=0", flush=True) return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0001, "steps": 0, "error": str(e)} # Initialize conversation init_scene = obs.get("scene_description", "") init_feedback = obs.get("step_feedback", "") messages = [ {"role": "system", "content": SYSTEM_PROMPT}, { "role": "user", "content": ( f"Task: {task_id}\n\n" f"{init_feedback}\n\n" f"Scene:\n{init_scene}\n\n" f"Available commands: {obs.get('available_commands', [])}\n\n" f"What is your first action? Respond with JSON only." ), }, ] total_reward = 0.0 max_steps = 35 # safety limit while not obs.get("done", False) and step < max_steps: step += 1 # Get LLM decision try: response = client.chat.completions.create( model=MODEL_NAME, messages=messages, temperature=0.0, max_tokens=200, ) action_text = response.choices[0].message.content.strip() except Exception as e: print(f" [Step {step}] LLM error: {e}", flush=True) action_text = '{"command": "observe"}' action = parse_action(action_text) print(f" [Step {step}] Action: {json.dumps(action)}", flush=True) # Execute step try: resp = requests.post( f"{ENV_URL}/step", json={"action": action}, timeout=30, ) resp.raise_for_status() result = resp.json() obs = result.get("observation", result) reward = result.get("reward", obs.get("reward", 0.0)) total_reward += reward or 0 # Emit [STEP] structured output for the validator print(f"[STEP] step={step} reward={reward}", flush=True) except Exception as e: print(f" [Step {step}] Step error: {e}", flush=True) break # Print feedback feedback = obs.get("step_feedback", "")[:200] print(f" Reward: {reward:+.2f} | Feedback: {feedback}", flush=True) # Update conversation with assistant response and new observation messages.append({"role": "assistant", "content": action_text}) # Build concise next observation for LLM held = obs.get("held_item") held_str = ( f"Holding: {held.get('name', 'unknown')}" if held else "Arm: idle" ) items_status = [ f"[{i['id']}] {i.get('name', '?')} ({i['status']})" for i in obs.get("food_items", []) ] containers_status = [ f"[{c['id']}] {c['name']} {c.get('fill_percentage',0):.0f}% full" for c in obs.get("containers", []) ] messages.append( { "role": "user", "content": ( f"Step {step} result (reward={reward:+.2f}):\n" f"Feedback: {obs.get('step_feedback', '')}\n\n" f"{held_str}\n" f"Items: {', '.join(items_status)}\n" f"Containers: {', '.join(containers_status)}\n" f"Available: {obs.get('available_commands', [])}\n\n" f"{'VLM Result: ' + json.dumps(obs.get('vlm_result')) if obs.get('vlm_result') else ''}\n\n" f"Next action? JSON only." ), }, ) # Extract final score final_score = obs.get("metadata", {}).get("final_score", 0.0) # Ensure score is strictly between 0 and 1 (exclusive) for the validator final_score = max(0.0001, min(0.9999, float(final_score))) grade_breakdown = obs.get("metadata", {}).get("grade_breakdown", {}) print(f"\n {'─'*40}", flush=True) print(f" Steps taken: {step}", flush=True) print(f" Total reward: {total_reward:+.2f}", flush=True) print(f" Final score: {final_score:.4f}", flush=True) if grade_breakdown: print(f" Breakdown:", flush=True) print(f" Validity: {grade_breakdown.get('validity', 0):.4f} (x0.4)", flush=True) print(f" Efficiency: {grade_breakdown.get('efficiency', 0):.4f} (x0.3)", flush=True) print(f" Constraints: {grade_breakdown.get('constraints', 0):.4f} (x0.2)", flush=True) print(f" Neatness: {grade_breakdown.get('neatness', 0):.4f} (x0.1)", flush=True) # Emit [END] structured output for the validator print(f"[END] task={task_id} score={final_score} steps={step}", flush=True) return { "task_id": task_id, "steps": step, "total_reward": round(total_reward, 4), "score": final_score, "grade_breakdown": grade_breakdown, } except Exception as e: # Catch-all: ensure [END] is ALWAYS emitted even on unexpected errors print(f" FATAL ERROR in episode {task_id}: {e}", flush=True) traceback.print_exc() print(f"[END] task={task_id} score=0.0001 steps={step}", flush=True) return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0001, "steps": step, "error": str(e)} def main(): """Run all 3 tasks and report results.""" print("=" * 60, flush=True) print(" TIFFIN PACKER — INFERENCE SCRIPT", flush=True) print(f" Model: {MODEL_NAME}", flush=True) print(f" API: {API_BASE_URL}", flush=True) print(f" Env: {ENV_URL}", flush=True) print("=" * 60, flush=True) start_time = time.time() results = {} for task_id in ["easy", "medium", "hard"]: result = run_episode(task_id) results[task_id] = result elapsed = time.time() - start_time # Summary print("\n" + "=" * 60, flush=True) print(" FINAL RESULTS", flush=True) print("=" * 60, flush=True) for task_id, r in results.items(): print(f" {task_id:8s}: score={r['score']:.4f} reward={r['total_reward']:+.2f} steps={r.get('steps', '?')}", flush=True) avg_score = sum(r["score"] for r in results.values()) / max(len(results), 1) print(f"\n Average score: {avg_score:.4f}", flush=True) print(f" Total time: {elapsed:.1f}s", flush=True) # Save results os.makedirs("outputs/evals", exist_ok=True) with open("outputs/evals/results.json", "w") as f: json.dump( { "model": MODEL_NAME, "api_base_url": API_BASE_URL, "results": results, "average_score": avg_score, "elapsed_seconds": round(elapsed, 1), }, f, indent=2, ) print(f"\n Results saved to outputs/evals/results.json", flush=True) if __name__ == "__main__": main()