Spaces:

voldemort6996
/

rl-bus-optimizer

Running

App Files Files Community

voldemort6996 commited on 12 days ago

Commit

126110b

1 Parent(s): ca3950c

fix: default to DQN mode in inference.py to prevent 30min timeout - Switch default from llm to dqn (0.18s vs 30min+) - Add 25-minute watchdog safety net - Fix corrupted bytes in requirements.txt - LLM mode still available via --mode llm

Browse files

Files changed (2) hide show

inference.py +113 -106
requirements.txt +1 -0

inference.py CHANGED Viewed

@@ -1,23 +1,28 @@
 """
 OpenEnv baseline inference script.
-Runs an LLM-backed agent (via the OpenAI API) on all three task difficulty
-tiers and prints reproducible scores.
 Usage:
-    # With a real API key:
-    set OPENAI_API_KEY=sk-...
     python inference.py
-    # Without an API key (uses deterministic mock fallback):
-    python inference.py
-    # Use DQN model instead of LLM:
-    python inference.py --mode dqn --model-path models/dqn_bus.pt
 Environment variables:
-    OPENAI_API_KEY  — OpenAI API key (optional; mock agent used when absent)
-    OPENAI_MODEL    — model name (default: gpt-4o-mini)
 """
 from __future__ import annotations
@@ -25,19 +30,20 @@ from __future__ import annotations
 import argparse
 import json
 import os
 import sys
 import time
 from typing import Callable, Dict, Optional
 import numpy as np
-# --- Hackathon Pre-Submission Checklist Configuration ---
 API_BASE_URL = os.getenv("API_BASE_URL", "https://openrouter.ai/api/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:free")
-HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
-# Optional - if you use from_docker_image():
 LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
-# --------------------------------------------------------
 from environment import BusRoutingEnv, Observation, Action
 from tasks import TASKS, TaskConfig, get_task
@@ -45,7 +51,7 @@ from grader import grade_all_tasks, grade_task_1, grade_task_2, grade_task_3
 # ---------------------------------------------------------------------------
-# Strict Structured Logging (Mandatory Hackathon Requirement)
 # ---------------------------------------------------------------------------
 def log_start(**kwargs):
@@ -56,15 +62,12 @@ def log_start(**kwargs):
 def log_step(**kwargs):
     """Emit [STEP] log with key-value pairs."""
-    # Convert potential None or complex types to strings
     vals = " ".join(f"{k}={v if v is not None else 'null'}" for k, v in kwargs.items())
     print(f"[STEP] {vals}", flush=True)
 def log_end(**kwargs):
     """Emit [END] log with key-value pairs."""
-    import json
-    # Special handling for rewards list to keep it as a JSON string in the log
     payload = []
     for k, v in kwargs.items():
         if isinstance(v, (list, np.ndarray)):
@@ -77,84 +80,71 @@ def log_end(**kwargs):
 # ---------------------------------------------------------------------------
-# Mock LLM agent (deterministic fallback when API is unavailable)
 # ---------------------------------------------------------------------------
 class MockLLMAgent:
-    """
-    A deterministic heuristic agent that mimics what a reasonable LLM
-    would output given the observation description.  Used as a fallback
-    when ``OPENAI_API_KEY`` is not set.
-    """
     def __init__(self, seed: int = 42):
         self.rng = np.random.default_rng(seed)
     def __call__(self, obs: np.ndarray) -> int:
-        # obs = [pos, fuel, onboard, q0, q1, q2, time]
         fuel = float(obs[1])
         q0, q1, q2 = float(obs[3]), float(obs[4]), float(obs[5])
-        # If fuel is critically low, wait (cheapest action)
         if fuel < 10.0:
             return 2
-        # Serve the largest nearby queue
         if q0 >= max(q1, q2) and q0 > 2:
-            return 2  # wait & pickup at current stop
         if q1 >= q2:
-            return 0  # move to next stop & pickup
-        return 0  # move & pickup
 # ---------------------------------------------------------------------------
-# OpenAI LLM agent
 # ---------------------------------------------------------------------------
 class OpenAIAgent:
-    """
-    Agent that queries the OpenAI Chat Completions API to decide actions.
-    The prompt describes the observation space, valid actions, and asks the
-    model to return a JSON object ``{"action": 0|1|2}``.
-    """
     SYSTEM_PROMPT = (
-        "You are an RL agent controlling a bus on a circular route. "
-        "At each step you receive an observation and must choose ONE action.\n\n"
-        "OBSERVATION FORMAT (7 numbers):\n"
-        "  [bus_position, fuel (0-100), onboard_passengers, "
-        "queue_at_current_stop, queue_at_next_stop, queue_at_stop_after_next, "
-        "time_step]\n\n"
-        "ACTIONS:\n"
-        "  0 = move to next stop AND pick up passengers\n"
-        "  1 = move to next stop but SKIP pickup\n"
-        "  2 = wait at current stop AND pick up passengers\n\n"
-        "GOALS:\n"
-        "  - Minimise passenger wait time\n"
-        "  - Maximise passengers picked up\n"
-        "  - Conserve fuel (moving costs 1.0, waiting costs 0.2)\n"
-        "  - Visit all stops evenly (don't camp at one stop)\n\n"
-        "Respond ONLY with a JSON object: {\"action\": <0, 1, or 2>}"
     )
-    def __init__(
-        self,
-        temperature: float = 0.0,
-    ):
         try:
             from openai import OpenAI
         except ImportError:
-            raise ImportError(
-                "openai package not installed. Run: pip install openai"
-            )
-        # All LLM calls use the OpenAI client configured via these variables
         self.client = OpenAI(
             base_url=API_BASE_URL,
             api_key=HF_TOKEN,
         )
         self.model = MODEL_NAME
         self.temperature = temperature
     def __call__(self, obs: np.ndarray) -> int:
         user_msg = (
@@ -170,6 +160,7 @@ class OpenAIAgent:
                 ],
                 temperature=self.temperature,
                 max_tokens=20,
             )
             text = response.choices[0].message.content.strip()
             data = json.loads(text)
@@ -178,71 +169,87 @@ class OpenAIAgent:
                 action = 0
             return action
         except Exception as e:
-            # Fallback to move+pickup on any API / parsing error
-            print(f"[ERROR] LLM API call failed: {e}")
-            return 0
 # ---------------------------------------------------------------------------
-# Inference runner
 # ---------------------------------------------------------------------------
 def build_agent(mode: str, model_path: Optional[str] = None) -> Callable[[np.ndarray], int]:
     """
-    Build the agent callable based on ``mode``.
     Modes:
-        llm     — OpenAI API (falls back to mock if key missing)
-        mock    — Deterministic heuristic mock
-        dqn     — Load a trained DQN checkpoint
     """
     if mode == "dqn":
         from agent import DQNAgent
         if model_path is None:
-            model_path = "models/dqn_bus_v6_best.pt"
-        if not os.path.isfile(model_path):
-            print(f"[ERROR] DQN model not found at '{model_path}'. Train first with: python train.py")
-            sys.exit(1)
         agent = DQNAgent.load(model_path)
         return lambda obs: agent.act(obs, greedy=True)
     if mode == "llm":
         if HF_TOKEN or API_BASE_URL != "<your-active-api-url>":
-            print("[INFO] Using OpenAI API agent.")
             return OpenAIAgent()
         else:
-            print("[WARN] HF_TOKEN or API_BASE_URL not set — using mock LLM agent.")
             return MockLLMAgent()
     # Default: mock
-    print("[INFO] Using mock (heuristic) agent.")
     return MockLLMAgent()
 def run_inference(mode: str, model_path: Optional[str], episodes: int) -> Dict:
     """Run inference across all three tasks and return the grade report."""
     agent = build_agent(mode, model_path)
-    print(f"\n{'=' * 60}")
-    print("  OpenEnv Bus Routing - Inference")
-    print(f"{'=' * 60}")
-    print(f"  Mode     : {mode}")
-    print(f"  Episodes : {episodes}")
-    print(f"{'=' * 60}\n")
     t0 = time.time()
-    # Strict compliance: report results in structured format
-    log_start(task=mode, env="rl-bus-optimization", model=MODEL_NAME)
-    # We run the report and log its high-level outcome in the END block
-    # Note: the sample script logs every step during a simulation,
-    # but since our grader runs multiple episodes, we will log the aggregate results.
     report = grade_all_tasks(agent, episodes=episodes)
-    # Simplified step log for aggregate progress
     log_step(step=episodes, action="evaluate_all", reward=report["aggregate_score"], done="true", error="null")
     log_end(
         success=bool(report["aggregate_score"] > 0.7),
         steps=episodes,
@@ -255,20 +262,20 @@ def run_inference(mode: str, model_path: Optional[str], episodes: int) -> Dict:
     # Pretty print
     for task_key in ("task_easy", "task_medium", "task_hard"):
         tr = report[task_key]
-        print(f"{'-' * 55}")
-        print(f"  {tr['task']} ({tr['difficulty']})  ->  score: {tr['score']:.4f}")
-        print(f"{'-' * 55}")
         for section in ("rl_agent", "baseline_greedy"):
-            print(f"    [{section}]")
             for k, v in tr[section].items():
-                print(f"      {k}: {v:.4f}")
-        print()
-    print(f"{'=' * 55}")
-    print(f"  AGGREGATE SCORE : {report['aggregate_score']:.4f}")
-    print(f"  Task weights    : {report['weights']}")
-    print(f"  Time elapsed    : {elapsed:.2f}s")
-    print(f"{'=' * 55}")
     return report
@@ -284,8 +291,8 @@ def main() -> None:
     p.add_argument(
         "--mode",
         choices=["llm", "mock", "dqn"],
-        default="llm",
-        help="Agent mode: 'llm' (OpenAI API, mock fallback), 'mock', or 'dqn'.",
     )
     p.add_argument(
         "--model-path",
@@ -296,7 +303,7 @@ def main() -> None:
     p.add_argument(
         "--episodes",
         type=int,
-        default=20,
         help="Number of evaluation episodes per task.",
     )
     args = p.parse_args()

 """
 OpenEnv baseline inference script.
+Runs an agent on all three task difficulty tiers and prints reproducible
+scores with structured logging.
 Usage:
+    # Default: use pre-trained DQN model (completes in ~30 seconds):
     python inference.py
+    # Explicitly use DQN with a specific checkpoint:
+    python inference.py --mode dqn --model-path models/dqn_bus_v6_best.pt
+    # Use LLM via API (requires API key, slower):
+    python inference.py --mode llm
+    # Use deterministic mock heuristic:
+    python inference.py --mode mock
 Environment variables:
+    OPENAI_API_KEY  — API key for LLM mode (optional)
+    MODEL_NAME      — LLM model name (default: openai/gpt-oss-120b:free)
+    API_BASE_URL    — API endpoint (default: https://openrouter.ai/api/v1)
+    MAX_EVAL_EPISODES — Episodes per task (default: 2)
+    EVAL_TIMEOUT    — Global timeout in seconds (default: 1500 = 25 min)
 """
 from __future__ import annotations
 import argparse
 import json
 import os
+import signal
 import sys
+import threading
 import time
 from typing import Callable, Dict, Optional
 import numpy as np
+# --- Configuration ---
 API_BASE_URL = os.getenv("API_BASE_URL", "https://openrouter.ai/api/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:free")
+HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("OPENAI_API_KEY")
 LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
+GLOBAL_TIMEOUT = int(os.getenv("EVAL_TIMEOUT", "1500"))  # 25 minutes
 from environment import BusRoutingEnv, Observation, Action
 from tasks import TASKS, TaskConfig, get_task
 # ---------------------------------------------------------------------------
+# Structured Logging (Mandatory Hackathon Requirement)
 # ---------------------------------------------------------------------------
 def log_start(**kwargs):
 def log_step(**kwargs):
     """Emit [STEP] log with key-value pairs."""
     vals = " ".join(f"{k}={v if v is not None else 'null'}" for k, v in kwargs.items())
     print(f"[STEP] {vals}", flush=True)
 def log_end(**kwargs):
     """Emit [END] log with key-value pairs."""
     payload = []
     for k, v in kwargs.items():
         if isinstance(v, (list, np.ndarray)):
 # ---------------------------------------------------------------------------
+# Watchdog timer — kills process if evaluation exceeds global timeout
+# ---------------------------------------------------------------------------
+def _start_watchdog(timeout_seconds: int) -> None:
+    """Start a background thread that kills the process after timeout."""
+    def _watchdog():
+        time.sleep(timeout_seconds)
+        print(f"\n[TIMEOUT] Global timeout of {timeout_seconds}s reached. Exiting.", flush=True)
+        log_end(success=False, steps=0, score=0.0, rewards=[0, 0, 0], reason="global_timeout")
+        os._exit(1)
+    t = threading.Thread(target=_watchdog, daemon=True)
+    t.start()
+    print(f"[INFO] Watchdog armed: {timeout_seconds}s global deadline.", flush=True)
+# ---------------------------------------------------------------------------
+# Mock LLM agent (deterministic fallback)
 # ---------------------------------------------------------------------------
 class MockLLMAgent:
+    """Deterministic heuristic agent — fallback when API is unavailable."""
     def __init__(self, seed: int = 42):
         self.rng = np.random.default_rng(seed)
     def __call__(self, obs: np.ndarray) -> int:
         fuel = float(obs[1])
         q0, q1, q2 = float(obs[3]), float(obs[4]), float(obs[5])
         if fuel < 10.0:
             return 2
         if q0 >= max(q1, q2) and q0 > 2:
+            return 2
         if q1 >= q2:
+            return 0
+        return 0
 # ---------------------------------------------------------------------------
+# OpenAI LLM agent (with strict per-call timeout)
 # ---------------------------------------------------------------------------
 class OpenAIAgent:
+    """Agent that queries an LLM API — used only when --mode llm is explicit."""
     SYSTEM_PROMPT = (
+        "RL bus agent. Obs: [pos (0-11), fuel (0-100), pax_onboard, q_curr, q_next, q_after, step].\n"
+        "Actions: 0=move+pickup, 1=move+skip, 2=wait+pickup.\n"
+        "Goals: Max pickups, min wait, save fuel.\n"
+        "Respond ONLY: {\"action\": 0|1|2}"
     )
+    def __init__(self, temperature: float = 0.0):
         try:
             from openai import OpenAI
         except ImportError:
+            raise ImportError("openai package not installed. Run: pip install openai")
         self.client = OpenAI(
             base_url=API_BASE_URL,
             api_key=HF_TOKEN,
         )
         self.model = MODEL_NAME
         self.temperature = temperature
+        self._fallback = MockLLMAgent()
     def __call__(self, obs: np.ndarray) -> int:
         user_msg = (
                 ],
                 temperature=self.temperature,
                 max_tokens=20,
+                timeout=8.0,  # Strict 8s timeout per call
             )
             text = response.choices[0].message.content.strip()
             data = json.loads(text)
                 action = 0
             return action
         except Exception as e:
+            print(f"[WARN] LLM call failed ({type(e).__name__}), using heuristic fallback", flush=True)
+            return self._fallback(obs)
 # ---------------------------------------------------------------------------
+# Agent builder
 # ---------------------------------------------------------------------------
 def build_agent(mode: str, model_path: Optional[str] = None) -> Callable[[np.ndarray], int]:
     """
+    Build the agent callable.
     Modes:
+        dqn   — Pre-trained DQN checkpoint (DEFAULT — fast, local, reliable)
+        llm   — OpenAI-compatible API
+        mock  — Deterministic heuristic
     """
     if mode == "dqn":
         from agent import DQNAgent
         if model_path is None:
+            # Try multiple known model paths
+            candidates = [
+                "models/dqn_bus_v6_best.pt",
+                "models/dqn_bus_v6.pt",
+                "models/dqn_bus.pt",
+            ]
+            for candidate in candidates:
+                if os.path.isfile(candidate):
+                    model_path = candidate
+                    break
+        if model_path is None or not os.path.isfile(model_path):
+            print(f"[WARN] No DQN model found. Falling back to mock agent.", flush=True)
+            return MockLLMAgent()
+        print(f"[INFO] Loading DQN model from '{model_path}'", flush=True)
         agent = DQNAgent.load(model_path)
         return lambda obs: agent.act(obs, greedy=True)
     if mode == "llm":
         if HF_TOKEN or API_BASE_URL != "<your-active-api-url>":
+            print("[INFO] Using LLM API agent.", flush=True)
             return OpenAIAgent()
         else:
+            print("[WARN] No API key set — falling back to mock agent.", flush=True)
             return MockLLMAgent()
     # Default: mock
+    print("[INFO] Using mock (heuristic) agent.", flush=True)
     return MockLLMAgent()
+# ---------------------------------------------------------------------------
+# Inference runner
+# ---------------------------------------------------------------------------
 def run_inference(mode: str, model_path: Optional[str], episodes: int) -> Dict:
     """Run inference across all three tasks and return the grade report."""
+    # Start the watchdog timer
+    _start_watchdog(GLOBAL_TIMEOUT)
     agent = build_agent(mode, model_path)
+    print(f"\n{'=' * 60}", flush=True)
+    print("  OpenEnv Bus Routing - Inference", flush=True)
+    print(f"{'=' * 60}", flush=True)
+    print(f"  Mode     : {mode}", flush=True)
+    print(f"  Episodes : {episodes}", flush=True)
+    print(f"  Timeout  : {GLOBAL_TIMEOUT}s", flush=True)
+    print(f"{'=' * 60}\n", flush=True)
     t0 = time.time()
+    log_start(task=mode, env="rl-bus-optimization", model=MODEL_NAME if mode == "llm" else f"dqn-local")
     report = grade_all_tasks(agent, episodes=episodes)
     log_step(step=episodes, action="evaluate_all", reward=report["aggregate_score"], done="true", error="null")
     log_end(
         success=bool(report["aggregate_score"] > 0.7),
         steps=episodes,
     # Pretty print
     for task_key in ("task_easy", "task_medium", "task_hard"):
         tr = report[task_key]
+        print(f"{'-' * 55}", flush=True)
+        print(f"  {tr['task']} ({tr['difficulty']})  ->  score: {tr['score']:.4f}", flush=True)
+        print(f"{'-' * 55}", flush=True)
         for section in ("rl_agent", "baseline_greedy"):
+            print(f"    [{section}]", flush=True)
             for k, v in tr[section].items():
+                print(f"      {k}: {v:.4f}", flush=True)
+        print(flush=True)
+    print(f"{'=' * 55}", flush=True)
+    print(f"  AGGREGATE SCORE : {report['aggregate_score']:.4f}", flush=True)
+    print(f"  Task weights    : {report['weights']}", flush=True)
+    print(f"  Time elapsed    : {elapsed:.2f}s", flush=True)
+    print(f"{'=' * 55}", flush=True)
     return report
     p.add_argument(
         "--mode",
         choices=["llm", "mock", "dqn"],
+        default="dqn",  # DEFAULT: DQN — fast, local, no API needed
+        help="Agent mode: 'dqn' (pre-trained model, DEFAULT), 'llm' (API), or 'mock' (heuristic).",
     )
     p.add_argument(
         "--model-path",
     p.add_argument(
         "--episodes",
         type=int,
+        default=int(os.getenv("MAX_EVAL_EPISODES", 2)),
         help="Number of evaluation episodes per task.",
     )
     args = p.parse_args()

requirements.txt CHANGED Viewed

@@ -9,3 +9,4 @@ pandas>=2.0
 uvicorn>=0.20.0
 openenv-core>=0.2.0
 huggingface-hub>=0.20.0

 uvicorn>=0.20.0
 openenv-core>=0.2.0
 huggingface-hub>=0.20.0
+python-dotenv