Spaces:

arrow072
/

meta_env

Sleeping

App Files Files Community

arrow072 commited on Apr 12

Commit

22fc9d3

verified ·

1 Parent(s): 7b14fc3

Update inference.py

Browse files

Files changed (1) hide show

inference.py +126 -148

inference.py CHANGED Viewed

@@ -1,152 +1,130 @@
-import os
-from openai import OpenAI
 from env import TrafficEnv
-# Safe task configs for 3 modes
-EASY_CONFIG = {
-    "max_steps": 20,
-    "max_queue": 20,
-    "arrival_rate": (0, 2),
-    "discharge_rate": (3, 5),
-    "emergency_prob": 0.01,
-    "switch_penalty": 0.2,
-    "starvation_threshold": 10,
-    "burst_prob": 0.0,
-    "burst_multiplier": 1.0,
-}
-MEDIUM_CONFIG = {
-    "max_steps": 20,
-    "max_queue": 20,
-    "arrival_rate": (1, 3),
-    "discharge_rate": (3, 5),
-    "emergency_prob": 0.03,
-    "switch_penalty": 0.2,
-    "starvation_threshold": 10,
-    "burst_prob": 0.2,
-    "burst_multiplier": 1.5,
-}
-HARD_CONFIG = {
-    "max_steps": 20,
-    "max_queue": 20,
-    "arrival_rate": (2, 4),
-    "discharge_rate": (3, 5),
-    "emergency_prob": 0.05,
-    "switch_penalty": 0.2,
-    "starvation_threshold": 8,
-    "burst_prob": 0.35,
-    "burst_multiplier": 2.0,
-}
-def strict_score(x: float) -> float:
-    # Map RL reward from [-1, 1] to [0, 1], then clamp strictly inside (0,1)
-    x = (float(x) + 1.0) / 2.0
-    return max(0.001, min(0.999, x))
-def build_client():
-    api_base_url = os.environ.get("API_BASE_URL")
-    api_key = os.environ.get("API_KEY")
-    model_name = os.environ.get("MODEL_NAME", "gpt-4o-mini")
-    if api_base_url and api_key:
-        client = OpenAI(
-            base_url=api_base_url,
-            api_key=api_key,
-        )
-        return client, model_name, True
-    # Fallback for environments where these vars are not present
-    return None, model_name, False
-def choose_action(client, model_name, state):
-    prompt = f"""
-You are controlling a traffic signal at a 4-way intersection.
-Current state:
-{state}
-Available actions:
-0 = keep current signal phase
-1 = switch signal phase
-Reply with only one number: 0 or 1
-""".strip()
-    response = client.chat.completions.create(
-        model=model_name,
-        messages=[
-            {
-                "role": "system",
-                "content": "You are a traffic signal controller. Reply with only 0 or 1."
-            },
-            {
-                "role": "user",
-                "content": prompt
-            }
-        ],
-        temperature=0,
-    )
-    content = response.choices[0].message.content.strip()
-    try:
-        action = int(content)
-        if action not in (0, 1):
-            action = 0
-    except Exception:
-        action = 0
-    return action
-def run_task(task_name, config, client, model_name, use_llm):
-    env = TrafficEnv(config)
     state = env.reset()
-    print("[START]", flush=True)
-    done = False
-    step_idx = 0
-    total_reward = 0.0
-    while not done:
-        if use_llm:
-            action = choose_action(client, model_name, state)
-        else:
-            # Safe fallback so the script never crashes outside validator
-            action = 0
-        state, reward, done, info = env.step(action)
-        step_score = strict_score(reward)
-        print(
-            f"[STEP] task={task_name}, step={step_idx}, score={step_score:.3f}, done={done}",
-            flush=True,
-        )
-        total_reward += reward
-        step_idx += 1
-    avg_reward = total_reward / max(1, step_idx)
-    final_score = strict_score(avg_reward)
-    print(f"[END] task={task_name}, score={final_score:.3f}", flush=True)
-    return final_score
 if __name__ == "__main__":
-    client, model_name, use_llm = build_client()
-    tasks = [
-        ("easy", EASY_CONFIG),
-        ("medium", MEDIUM_CONFIG),
-        ("hard", HARD_CONFIG),
-    ]
-    for task_name, config in tasks:
-        run_task(task_name, config, client, model_name, use_llm)

+from fastapi import FastAPI
+from fastapi.responses import HTMLResponse
+from pydantic import BaseModel
 from env import TrafficEnv
+from tasks import get_config
+from baseline_agent import RuleBasedAgent
+import os
+import openai
+class LLMAgent:
+    def __init__(self):
+        try:
+            self.client = openai.OpenAI(
+                base_url=os.environ["API_BASE_URL"],
+                api_key=os.environ["API_KEY"]
+            )
+        except Exception:
+            self.client = None
+        self.fallback = RuleBasedAgent()
+    def select_action(self, state):
+        prompt = f"Traffic state: {state}. Reply with 1 to switch phase, 0 to keep phase. Output only 1 or 0."
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-3.5-turbo",
+                messages=[
+                    {"role": "system", "content": "You are a traffic signal controller."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=5,
+                temperature=0.0
+            )
+            content = response.choices[0].message.content.strip()
+            # Still call fallback to maintain its internal step counter
+            self.fallback.select_action(state)
+            if "1" in content:
+                return 1
+            else:
+                return 0
+        except Exception as e:
+            return self.fallback.select_action(state)
+    def reset(self):
+        self.fallback.reset()
+app = FastAPI()
+env = TrafficEnv(get_config("medium"))
+agent = LLMAgent()
+class Action(BaseModel):
+    action: int
+@app.get("/", response_class=HTMLResponse)
+def root():
+    with open("index.html", "r", encoding="utf-8") as f:
+        return f.read()
+@app.post("/reset")
+def reset():
     state = env.reset()
+    try:
+        state = state.tolist()
+    except:
+        pass
+    agent.reset()
+    return {"state":state}
+@app.post("/step")
+def step(data:Action):
+    state,reward,done,info = env.step(data.action)
+    try:
+        state = state.tolist()
+    except:
+        pass
+    return {
+        "state":state,
+        "reward":reward,
+        "done":done,
+        "info":info
+    }
+@app.post("/auto_step")
+def auto_step():
+    state_dict = env.get_state()
+    action = agent.select_action(state_dict)
+    state,reward,done,info = env.step(action)
+    try:
+        state = state.tolist()
+    except:
+        pass
+    return {
+        "state":state,
+        "reward":reward,
+        "done":done,
+        "info":info,
+        "action_taken": action
+    }
 if __name__ == "__main__":
+    import sys
+    tasks_to_run = ["easy", "medium", "hard"]
+    if len(sys.argv) > 1:
+        # e.g., if validator optionally passes a task name as argument
+        task_arg = sys.argv[1].replace("--task=", "").replace("--task", "")
+        if task_arg in tasks_to_run:
+            tasks_to_run = [task_arg]
+    for task_name in tasks_to_run:
+        config = get_config(task_name)
+        eval_env = TrafficEnv(config)
+        eval_agent = LLMAgent()
+        state = eval_env.reset()
+        eval_agent.reset()
+        print("[START]", flush=True)
+        done = False
+        step_idx = 0
+        total_reward = 0.0
+        while not done:
+            action = eval_agent.select_action(state)
+            state, reward, done, info = eval_env.step(action)
+            print(f"[STEP] step={step_idx}, reward={reward}, done={done}", flush=True)
+            step_idx += 1
+            total_reward += reward
+        print("[END]", flush=True)