Spaces:

codecrypt112
/

openenv-hackathon-ctrlaltwin-tiffenpacker

Sleeping

App Files Files Community

vikash-nuvai commited on Apr 8

Commit

5d20aef

1 Parent(s): 12d9f1b

fix: add structured output markers for validator

Browse files

Files changed (1) hide show

inference.py +165 -137

inference.py CHANGED Viewed

@@ -20,9 +20,9 @@ import json
 import os
 import sys
 import time
 import requests
-from openai import OpenAI
 # ---------------------------------------------------------------------------
 # Required environment variables
@@ -32,11 +32,6 @@ MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o")
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860")
-if not HF_TOKEN:
-    print("WARNING: HF_TOKEN not set. LLM calls will fail.")
-client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
 # ---------------------------------------------------------------------------
 # System prompt
 # ---------------------------------------------------------------------------
@@ -69,6 +64,16 @@ STRATEGY:
 Respond with ONLY valid JSON. No explanation, no markdown, no extra text."""
 def parse_action(text: str) -> dict:
     """Parse LLM output into an action dict."""
     text = text.strip()
@@ -101,175 +106,198 @@ def parse_action(text: str) -> dict:
                         continue
     # Fallback
-    print(f"  [WARN] Could not parse action: {text[:100]}")
     return {"command": "observe"}
-def run_episode(task_id: str) -> dict:
     """Run one episode of the tiffin packing task."""
-    print(f"\n{'='*60}")
-    print(f"  TASK: {task_id.upper()}")
-    print(f"{'='*60}")
-    # Reset the environment
-    try:
-        resp = requests.post(
-            f"{ENV_URL}/reset",
-            json={"task_id": task_id, "seed": 42},
-            timeout=30,
-        )
-        resp.raise_for_status()
-        result = resp.json()
-        obs = result.get("observation", result)
-    except Exception as e:
-        print(f"  ERROR: Failed to reset environment: {e}")
-        return {"task_id": task_id, "reward": 0.0, "score": 0.0, "error": str(e)}
-    # Initialize conversation
-    init_scene = obs.get("scene_description", "")
-    init_feedback = obs.get("step_feedback", "")
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {
-            "role": "user",
-            "content": (
-                f"Task: {task_id}\n\n"
-                f"{init_feedback}\n\n"
-                f"Scene:\n{init_scene}\n\n"
-                f"Available commands: {obs.get('available_commands', [])}\n\n"
-                f"What is your first action? Respond with JSON only."
-            ),
-        },
-    ]
-    total_reward = 0.0
     step = 0
-    max_steps = 35  # safety limit
-    while not obs.get("done", False) and step < max_steps:
-        step += 1
-        # Get LLM decision
-        try:
-            response = client.chat.completions.create(
-                model=MODEL_NAME,
-                messages=messages,
-                temperature=0.0,
-                max_tokens=200,
-            )
-            action_text = response.choices[0].message.content.strip()
-        except Exception as e:
-            print(f"  [Step {step}] LLM error: {e}")
-            action_text = '{"command": "observe"}'
-        action = parse_action(action_text)
-        print(f"  [Step {step}] Action: {json.dumps(action)}")
-        # Execute step
         try:
             resp = requests.post(
-                f"{ENV_URL}/step",
-                json={"action": action},
                 timeout=30,
             )
             resp.raise_for_status()
             result = resp.json()
             obs = result.get("observation", result)
-            reward = result.get("reward", obs.get("reward", 0.0))
-            total_reward += reward or 0
         except Exception as e:
-            print(f"  [Step {step}] Step error: {e}")
-            break
-        # Print feedback
-        feedback = obs.get("step_feedback", "")[:200]
-        print(f"           Reward: {reward:+.2f} | Feedback: {feedback}")
-        # Update conversation with assistant response and new observation
-        messages.append({"role": "assistant", "content": action_text})
-        # Build concise next observation for LLM
-        held = obs.get("held_item")
-        held_str = (
-            f"Holding: {held.get('name', 'unknown')}" if held else "Arm: idle"
-        )
-        items_status = [
-            f"[{i['id']}] {i.get('name', '?')} ({i['status']})"
-            for i in obs.get("food_items", [])
-        ]
-        containers_status = [
-            f"[{c['id']}] {c['name']} {c.get('fill_percentage',0):.0f}% full"
-            for c in obs.get("containers", [])
-        ]
-        messages.append(
             {
                 "role": "user",
                 "content": (
-                    f"Step {step} result (reward={reward:+.2f}):\n"
-                    f"Feedback: {obs.get('step_feedback', '')}\n\n"
-                    f"{held_str}\n"
-                    f"Items: {', '.join(items_status)}\n"
-                    f"Containers: {', '.join(containers_status)}\n"
-                    f"Available: {obs.get('available_commands', [])}\n\n"
-                    f"{'VLM Result: ' + json.dumps(obs.get('vlm_result')) if obs.get('vlm_result') else ''}\n\n"
-                    f"Next action? JSON only."
                 ),
             },
-        )
-    # Extract final score
-    final_score = obs.get("metadata", {}).get("final_score", 0.0)
-    grade_breakdown = obs.get("metadata", {}).get("grade_breakdown", {})
-    print(f"\n  {'─'*40}")
-    print(f"  Steps taken:  {step}")
-    print(f"  Total reward: {total_reward:+.2f}")
-    print(f"  Final score:  {final_score:.4f}")
-    if grade_breakdown:
-        print(f"  Breakdown:")
-        print(f"    Validity:    {grade_breakdown.get('validity', 0):.4f} (x0.4)")
-        print(f"    Efficiency:  {grade_breakdown.get('efficiency', 0):.4f} (x0.3)")
-        print(f"    Constraints: {grade_breakdown.get('constraints', 0):.4f} (x0.2)")
-        print(f"    Neatness:    {grade_breakdown.get('neatness', 0):.4f} (x0.1)")
-    return {
-        "task_id": task_id,
-        "steps": step,
-        "total_reward": round(total_reward, 4),
-        "score": final_score,
-        "grade_breakdown": grade_breakdown,
-    }
 def main():
     """Run all 3 tasks and report results."""
-    print("=" * 60)
-    print("  TIFFIN PACKER — INFERENCE SCRIPT")
-    print(f"  Model: {MODEL_NAME}")
-    print(f"  API:   {API_BASE_URL}")
-    print(f"  Env:   {ENV_URL}")
-    print("=" * 60)
     start_time = time.time()
     results = {}
     for task_id in ["easy", "medium", "hard"]:
-        result = run_episode(task_id)
         results[task_id] = result
     elapsed = time.time() - start_time
     # Summary
-    print("\n" + "=" * 60)
-    print("  FINAL RESULTS")
-    print("=" * 60)
     for task_id, r in results.items():
-        print(f"  {task_id:8s}: score={r['score']:.4f}  reward={r['total_reward']:+.2f}  steps={r.get('steps', '?')}")
     avg_score = sum(r["score"] for r in results.values()) / max(len(results), 1)
-    print(f"\n  Average score: {avg_score:.4f}")
-    print(f"  Total time:    {elapsed:.1f}s")
     # Save results
     os.makedirs("outputs/evals", exist_ok=True)
@@ -285,7 +313,7 @@ def main():
             f,
             indent=2,
         )
-    print(f"\n  Results saved to outputs/evals/results.json")
 if __name__ == "__main__":

 import os
 import sys
 import time
+import traceback
 import requests
 # ---------------------------------------------------------------------------
 # Required environment variables
 HF_TOKEN = os.environ.get("HF_TOKEN", "")
 ENV_URL = os.environ.get("ENV_URL", "http://localhost:7860")
 # ---------------------------------------------------------------------------
 # System prompt
 # ---------------------------------------------------------------------------
 Respond with ONLY valid JSON. No explanation, no markdown, no extra text."""
+def get_client():
+    """Lazily create an OpenAI client. Returns None if openai is unavailable."""
+    try:
+        from openai import OpenAI
+        return OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN or "dummy")
+    except Exception as e:
+        print(f"WARNING: Could not create OpenAI client: {e}", flush=True)
+        return None
 def parse_action(text: str) -> dict:
     """Parse LLM output into an action dict."""
     text = text.strip()
                         continue
     # Fallback
+    print(f"  [WARN] Could not parse action: {text[:100]}", flush=True)
     return {"command": "observe"}
+def run_episode(task_id: str, client) -> dict:
     """Run one episode of the tiffin packing task."""
+    # Emit [START] structured output for the validator
+    print(f"[START] task={task_id}", flush=True)
     step = 0
+    try:
+        print(f"\n{'='*60}", flush=True)
+        print(f"  TASK: {task_id.upper()}", flush=True)
+        print(f"{'='*60}", flush=True)
+        # Reset the environment
         try:
             resp = requests.post(
+                f"{ENV_URL}/reset",
+                json={"task_id": task_id, "seed": 42},
                 timeout=30,
             )
             resp.raise_for_status()
             result = resp.json()
             obs = result.get("observation", result)
         except Exception as e:
+            print(f"  ERROR: Failed to reset environment: {e}", flush=True)
+            print(f"[END] task={task_id} score=0.0 steps=0", flush=True)
+            return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0, "steps": 0, "error": str(e)}
+        # Initialize conversation
+        init_scene = obs.get("scene_description", "")
+        init_feedback = obs.get("step_feedback", "")
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
             {
                 "role": "user",
                 "content": (
+                    f"Task: {task_id}\n\n"
+                    f"{init_feedback}\n\n"
+                    f"Scene:\n{init_scene}\n\n"
+                    f"Available commands: {obs.get('available_commands', [])}\n\n"
+                    f"What is your first action? Respond with JSON only."
                 ),
             },
+        ]
+        total_reward = 0.0
+        max_steps = 35  # safety limit
+        while not obs.get("done", False) and step < max_steps:
+            step += 1
+            # Get LLM decision
+            try:
+                if client is None:
+                    raise RuntimeError("No OpenAI client available")
+                response = client.chat.completions.create(
+                    model=MODEL_NAME,
+                    messages=messages,
+                    temperature=0.0,
+                    max_tokens=200,
+                )
+                action_text = response.choices[0].message.content.strip()
+            except Exception as e:
+                print(f"  [Step {step}] LLM error: {e}", flush=True)
+                action_text = '{"command": "observe"}'
+            action = parse_action(action_text)
+            print(f"  [Step {step}] Action: {json.dumps(action)}", flush=True)
+            # Execute step
+            try:
+                resp = requests.post(
+                    f"{ENV_URL}/step",
+                    json={"action": action},
+                    timeout=30,
+                )
+                resp.raise_for_status()
+                result = resp.json()
+                obs = result.get("observation", result)
+                reward = result.get("reward", obs.get("reward", 0.0))
+                total_reward += reward or 0
+                # Emit [STEP] structured output for the validator
+                print(f"[STEP] step={step} reward={reward}", flush=True)
+            except Exception as e:
+                print(f"  [Step {step}] Step error: {e}", flush=True)
+                break
+            # Print feedback
+            feedback = obs.get("step_feedback", "")[:200]
+            print(f"           Reward: {reward:+.2f} | Feedback: {feedback}", flush=True)
+            # Update conversation with assistant response and new observation
+            messages.append({"role": "assistant", "content": action_text})
+            # Build concise next observation for LLM
+            held = obs.get("held_item")
+            held_str = (
+                f"Holding: {held.get('name', 'unknown')}" if held else "Arm: idle"
+            )
+            items_status = [
+                f"[{i['id']}] {i.get('name', '?')} ({i['status']})"
+                for i in obs.get("food_items", [])
+            ]
+            containers_status = [
+                f"[{c['id']}] {c['name']} {c.get('fill_percentage',0):.0f}% full"
+                for c in obs.get("containers", [])
+            ]
+            messages.append(
+                {
+                    "role": "user",
+                    "content": (
+                        f"Step {step} result (reward={reward:+.2f}):\n"
+                        f"Feedback: {obs.get('step_feedback', '')}\n\n"
+                        f"{held_str}\n"
+                        f"Items: {', '.join(items_status)}\n"
+                        f"Containers: {', '.join(containers_status)}\n"
+                        f"Available: {obs.get('available_commands', [])}\n\n"
+                        f"{'VLM Result: ' + json.dumps(obs.get('vlm_result')) if obs.get('vlm_result') else ''}\n\n"
+                        f"Next action? JSON only."
+                    ),
+                },
+            )
+        # Extract final score
+        final_score = obs.get("metadata", {}).get("final_score", 0.0)
+        grade_breakdown = obs.get("metadata", {}).get("grade_breakdown", {})
+        print(f"\n  {'─'*40}", flush=True)
+        print(f"  Steps taken:  {step}", flush=True)
+        print(f"  Total reward: {total_reward:+.2f}", flush=True)
+        print(f"  Final score:  {final_score:.4f}", flush=True)
+        if grade_breakdown:
+            print(f"  Breakdown:", flush=True)
+            print(f"    Validity:    {grade_breakdown.get('validity', 0):.4f} (x0.4)", flush=True)
+            print(f"    Efficiency:  {grade_breakdown.get('efficiency', 0):.4f} (x0.3)", flush=True)
+            print(f"    Constraints: {grade_breakdown.get('constraints', 0):.4f} (x0.2)", flush=True)
+            print(f"    Neatness:    {grade_breakdown.get('neatness', 0):.4f} (x0.1)", flush=True)
+        # Emit [END] structured output for the validator
+        print(f"[END] task={task_id} score={final_score} steps={step}", flush=True)
+        return {
+            "task_id": task_id,
+            "steps": step,
+            "total_reward": round(total_reward, 4),
+            "score": final_score,
+            "grade_breakdown": grade_breakdown,
+        }
+    except Exception as e:
+        # Catch-all: ensure [END] is ALWAYS emitted even on unexpected errors
+        print(f"  FATAL ERROR in episode {task_id}: {e}", flush=True)
+        traceback.print_exc()
+        print(f"[END] task={task_id} score=0.0 steps={step}", flush=True)
+        return {"task_id": task_id, "total_reward": 0.0, "reward": 0.0, "score": 0.0, "steps": step, "error": str(e)}
 def main():
     """Run all 3 tasks and report results."""
+    print("=" * 60, flush=True)
+    print("  TIFFIN PACKER — INFERENCE SCRIPT", flush=True)
+    print(f"  Model: {MODEL_NAME}", flush=True)
+    print(f"  API:   {API_BASE_URL}", flush=True)
+    print(f"  Env:   {ENV_URL}", flush=True)
+    print("=" * 60, flush=True)
+    # Create client lazily — don't crash on import
+    client = get_client()
     start_time = time.time()
     results = {}
     for task_id in ["easy", "medium", "hard"]:
+        result = run_episode(task_id, client)
         results[task_id] = result
     elapsed = time.time() - start_time
     # Summary
+    print("\n" + "=" * 60, flush=True)
+    print("  FINAL RESULTS", flush=True)
+    print("=" * 60, flush=True)
     for task_id, r in results.items():
+        print(f"  {task_id:8s}: score={r['score']:.4f}  reward={r['total_reward']:+.2f}  steps={r.get('steps', '?')}", flush=True)
     avg_score = sum(r["score"] for r in results.values()) / max(len(results), 1)
+    print(f"\n  Average score: {avg_score:.4f}", flush=True)
+    print(f"  Total time:    {elapsed:.1f}s", flush=True)
     # Save results
     os.makedirs("outputs/evals", exist_ok=True)
             f,
             indent=2,
         )
+    print(f"\n  Results saved to outputs/evals/results.json", flush=True)
 if __name__ == "__main__":