Spaces:

arrow072
/

meta_env

Sleeping

App Files Files Community

arrow072 commited on Apr 12

Commit

7b14fc3

verified ·

1 Parent(s): 94ed1c9

Update inference.py

Browse files

Files changed (1) hide show

inference.py +148 -126

inference.py CHANGED Viewed

@@ -1,130 +1,152 @@
-from fastapi import FastAPI
-from fastapi.responses import HTMLResponse
-from pydantic import BaseModel
-from env import TrafficEnv
-from tasks import get_config
-from baseline_agent import RuleBasedAgent
 import os
-import openai
-class LLMAgent:
-    def __init__(self):
-        try:
-            self.client = openai.OpenAI(
-                base_url=os.environ["API_BASE_URL"],
-                api_key=os.environ["API_KEY"]
-            )
-        except Exception:
-            self.client = None
-        self.fallback = RuleBasedAgent()
-    def select_action(self, state):
-        prompt = f"Traffic state: {state}. Reply with 1 to switch phase, 0 to keep phase. Output only 1 or 0."
-        try:
-            response = self.client.chat.completions.create(
-                model="gpt-3.5-turbo",
-                messages=[
-                    {"role": "system", "content": "You are a traffic signal controller."},
-                    {"role": "user", "content": prompt}
-                ],
-                max_tokens=5,
-                temperature=0.0
-            )
-            content = response.choices[0].message.content.strip()
-            # Still call fallback to maintain its internal step counter
-            self.fallback.select_action(state)
-            if "1" in content:
-                return 1
-            else:
-                return 0
-        except Exception as e:
-            return self.fallback.select_action(state)
-    def reset(self):
-        self.fallback.reset()
-app = FastAPI()
-env = TrafficEnv(get_config("medium"))
-agent = LLMAgent()
-class Action(BaseModel):
-    action: int
-@app.get("/", response_class=HTMLResponse)
-def root():
-    with open("index.html", "r", encoding="utf-8") as f:
-        return f.read()
-@app.post("/reset")
-def reset():
-    state = env.reset()
-    try:
-        state = state.tolist()
-    except:
-        pass
-    agent.reset()
-    return {"state":state}
-@app.post("/step")
-def step(data:Action):
-    state,reward,done,info = env.step(data.action)
-    try:
-        state = state.tolist()
-    except:
-        pass
-    return {
-        "state":state,
-        "reward":reward,
-        "done":done,
-        "info":info
-    }
-@app.post("/auto_step")
-def auto_step():
-    state_dict = env.get_state()
-    action = agent.select_action(state_dict)
-    state,reward,done,info = env.step(action)
     try:
-        state = state.tolist()
-    except:
-        pass
-    return {
-        "state":state,
-        "reward":reward,
-        "done":done,
-        "info":info,
-        "action_taken": action
-    }
 if __name__ == "__main__":
-    import sys
-    tasks_to_run = ["easy", "medium", "hard"]
-    if len(sys.argv) > 1:
-        # e.g., if validator optionally passes a task name as argument
-        task_arg = sys.argv[1].replace("--task=", "").replace("--task", "")
-        if task_arg in tasks_to_run:
-            tasks_to_run = [task_arg]
-    for task_name in tasks_to_run:
-        config = get_config(task_name)
-        eval_env = TrafficEnv(config)
-        eval_agent = LLMAgent()
-        state = eval_env.reset()
-        eval_agent.reset()
-        print("[START]", flush=True)
-        done = False
-        step_idx = 0
-        total_reward = 0.0
-        while not done:
-            action = eval_agent.select_action(state)
-            state, reward, done, info = eval_env.step(action)
-            print(f"[STEP] step={step_idx}, reward={reward}, done={done}", flush=True)
-            step_idx += 1
-            total_reward += reward
-        print("[END]", flush=True)

 import os
+from openai import OpenAI
+from env import TrafficEnv
+# Safe task configs for 3 modes
+EASY_CONFIG = {
+    "max_steps": 20,
+    "max_queue": 20,
+    "arrival_rate": (0, 2),
+    "discharge_rate": (3, 5),
+    "emergency_prob": 0.01,
+    "switch_penalty": 0.2,
+    "starvation_threshold": 10,
+    "burst_prob": 0.0,
+    "burst_multiplier": 1.0,
+}
+MEDIUM_CONFIG = {
+    "max_steps": 20,
+    "max_queue": 20,
+    "arrival_rate": (1, 3),
+    "discharge_rate": (3, 5),
+    "emergency_prob": 0.03,
+    "switch_penalty": 0.2,
+    "starvation_threshold": 10,
+    "burst_prob": 0.2,
+    "burst_multiplier": 1.5,
+}
+HARD_CONFIG = {
+    "max_steps": 20,
+    "max_queue": 20,
+    "arrival_rate": (2, 4),
+    "discharge_rate": (3, 5),
+    "emergency_prob": 0.05,
+    "switch_penalty": 0.2,
+    "starvation_threshold": 8,
+    "burst_prob": 0.35,
+    "burst_multiplier": 2.0,
+}
+def strict_score(x: float) -> float:
+    # Map RL reward from [-1, 1] to [0, 1], then clamp strictly inside (0,1)
+    x = (float(x) + 1.0) / 2.0
+    return max(0.001, min(0.999, x))
+def build_client():
+    api_base_url = os.environ.get("API_BASE_URL")
+    api_key = os.environ.get("API_KEY")
+    model_name = os.environ.get("MODEL_NAME", "gpt-4o-mini")
+    if api_base_url and api_key:
+        client = OpenAI(
+            base_url=api_base_url,
+            api_key=api_key,
+        )
+        return client, model_name, True
+    # Fallback for environments where these vars are not present
+    return None, model_name, False
+def choose_action(client, model_name, state):
+    prompt = f"""
+You are controlling a traffic signal at a 4-way intersection.
+Current state:
+{state}
+Available actions:
+0 = keep current signal phase
+1 = switch signal phase
+Reply with only one number: 0 or 1
+""".strip()
+    response = client.chat.completions.create(
+        model=model_name,
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a traffic signal controller. Reply with only 0 or 1."
+            },
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ],
+        temperature=0,
+    )
+    content = response.choices[0].message.content.strip()
     try:
+        action = int(content)
+        if action not in (0, 1):
+            action = 0
+    except Exception:
+        action = 0
+    return action
+def run_task(task_name, config, client, model_name, use_llm):
+    env = TrafficEnv(config)
+    state = env.reset()
+    print("[START]", flush=True)
+    done = False
+    step_idx = 0
+    total_reward = 0.0
+    while not done:
+        if use_llm:
+            action = choose_action(client, model_name, state)
+        else:
+            # Safe fallback so the script never crashes outside validator
+            action = 0
+        state, reward, done, info = env.step(action)
+        step_score = strict_score(reward)
+        print(
+            f"[STEP] task={task_name}, step={step_idx}, score={step_score:.3f}, done={done}",
+            flush=True,
+        )
+        total_reward += reward
+        step_idx += 1
+    avg_reward = total_reward / max(1, step_idx)
+    final_score = strict_score(avg_reward)
+    print(f"[END] task={task_name}, score={final_score:.3f}", flush=True)
+    return final_score
 if __name__ == "__main__":
+    client, model_name, use_llm = build_client()
+    tasks = [
+        ("easy", EASY_CONFIG),
+        ("medium", MEDIUM_CONFIG),
+        ("hard", HARD_CONFIG),
+    ]
+    for task_name, config in tasks:
+        run_task(task_name, config, client, model_name, use_llm)