Spaces:

arrow072
/

open_env_traffic_system

Sleeping

App Files Files Community

arrow072 commited on Apr 12

Commit

aa49a5f

verified ·

1 Parent(s): e22b3e1

Update inference.py

Browse files

Files changed (1) hide show

inference.py +64 -139

inference.py CHANGED Viewed

@@ -1,153 +1,78 @@
 import os
 from openai import OpenAI
 from env import TrafficEnv
-# Try importing task configs from tasks.py
-# If they are missing, use safe fallback configs
-try:
-    from tasks import EASY_CONFIG, MEDIUM_CONFIG, HARD_CONFIG
-except Exception:
-    EASY_CONFIG = {
-        "max_steps": 20,
-        "max_queue": 20,
-        "arrival_rate": (0, 2),
-        "discharge_rate": (3, 5),
-        "emergency_prob": 0.01,
-        "switch_penalty": 0.2,
-        "starvation_threshold": 10,
-        "burst_prob": 0.0,
-        "burst_multiplier": 1.0,
-    }
-    MEDIUM_CONFIG = {
-        "max_steps": 20,
-        "max_queue": 20,
-        "arrival_rate": (1, 3),
-        "discharge_rate": (3, 5),
-        "emergency_prob": 0.03,
-        "switch_penalty": 0.2,
-        "starvation_threshold": 10,
-        "burst_prob": 0.2,
-        "burst_multiplier": 1.5,
-    }
-    HARD_CONFIG = {
-        "max_steps": 20,
-        "max_queue": 20,
-        "arrival_rate": (2, 4),
-        "discharge_rate": (3, 5),
-        "emergency_prob": 0.05,
-        "switch_penalty": 0.2,
-        "starvation_threshold": 8,
-        "burst_prob": 0.35,
-        "burst_multiplier": 2.0,
-    }
-def strict_score(x: float) -> float:
-    """
-    Convert any raw value into a score strictly inside (0, 1).
-    This avoids validator failures for exact 0.0 or 1.0.
-    """
-    # If x is RL reward in [-1, 1], map to [0, 1]
     x = (float(x) + 1.0) / 2.0
-    # Clamp strictly inside (0, 1)
     return max(0.001, min(0.999, x))
-class LLMAgent:
-    def __init__(self):
-        self.client = OpenAI(
-            base_url=os.environ["API_BASE_URL"],
-            api_key=os.environ["API_KEY"],
-        )
-        self.model_name = os.environ["MODEL_NAME"]
-    def reset(self):
-        pass
-    def select_action(self, state: dict) -> int:
-        prompt = f"""
-You are a traffic control agent for a 4-way intersection.
-Current state:
-{state}
-Available actions:
-0 = keep current signal phase
-1 = switch signal phase
-Reply with only one number: 0 or 1
-""".strip()
-        response = self.client.chat.completions.create(
-            model=self.model_name,
-            messages=[
-                {
-                    "role": "system",
-                    "content": "You are a traffic signal controller. Reply with only 0 or 1.",
-                },
-                {
-                    "role": "user",
-                    "content": prompt,
-                },
-            ],
-            temperature=0,
-        )
-        content = response.choices[0].message.content.strip()
-        try:
-            action = int(content)
-            if action not in (0, 1):
-                action = 0
-        except Exception:
             action = 0
-        return action
-def run_task(task_name: str, config: dict) -> float:
-    env = TrafficEnv(config)
-    agent = LLMAgent()
-    state = env.reset()
-    agent.reset()
-    print("[START]", flush=True)
-    done = False
-    step_idx = 0
-    total_reward = 0.0
-    while not done:
-        action = agent.select_action(state)
-        state, reward, done, info = env.step(action)
-        step_score = strict_score(reward)
-        print(
-            f"[STEP] task={task_name}, step={step_idx}, action={action}, score={step_score:.3f}, done={done}",
-            flush=True,
-        )
-        total_reward += reward
-        step_idx += 1
-    avg_reward = total_reward / max(1, step_idx)
-    final_score = strict_score(avg_reward)
-    print(f"[END] task={task_name}, score={final_score:.3f}", flush=True)
-    return final_score
-if __name__ == "__main__":
-    tasks = [
-        ("easy", EASY_CONFIG),
-        ("medium", MEDIUM_CONFIG),
-        ("hard", HARD_CONFIG),
-    ]
-    for task_name, config in tasks:
-        run_task(task_name, config)

 import os
 from openai import OpenAI
 from env import TrafficEnv
+# Minimal config (no tasks.py dependency)
+CONFIG = {
+    "max_steps": 20,
+    "max_queue": 20,
+    "arrival_rate": (0, 2),
+    "discharge_rate": (3, 5),
+    "emergency_prob": 0.02,
+    "switch_penalty": 0.2,
+    "starvation_threshold": 10,
+    "burst_prob": 0.1,
+    "burst_multiplier": 1.2,
+}
+def strict_score(x):
+    # Convert [-1,1] → (0,1)
     x = (float(x) + 1.0) / 2.0
     return max(0.001, min(0.999, x))
+# LLM client (IMPORTANT)
+client = OpenAI(
+    base_url=os.environ["API_BASE_URL"],
+    api_key=os.environ["API_KEY"]
+)
+MODEL_NAME = os.environ["MODEL_NAME"]
+env = TrafficEnv(CONFIG)
+print("[START]", flush=True)
+state = env.reset()
+done = False
+step_count = 0
+total_reward = 0.0
+while not done:
+    prompt = f"""
+State: {state}
+Choose action:
+0 = keep
+1 = switch
+Reply only 0 or 1
+"""
+    response = client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=[
+            {"role": "system", "content": "Reply only 0 or 1."},
+            {"role": "user", "content": prompt}
+        ],
+        temperature=0
+    )
+    try:
+        action = int(response.choices[0].message.content.strip())
+        if action not in [0, 1]:
             action = 0
+    except:
+        action = 0
+    state, reward, done, info = env.step(action)
+    score = strict_score(reward)
+    print(f"[STEP] step={step_count}, score={score:.3f}, done={done}", flush=True)
+    total_reward += reward
+    step_count += 1
+# Final score (IMPORTANT)
+final_score = total_reward / max(1, step_count)
+final_score = strict_score(final_score)
+print(f"[END] score={final_score:.3f}", flush=True)