Spaces:
Sleeping
Sleeping
| import os | |
| from openai import OpenAI | |
| from env import TrafficEnv | |
| CONFIG = { | |
| "max_steps": 20, | |
| "max_queue": 20, | |
| "arrival_rate": (0, 2), | |
| "discharge_rate": (3, 5), | |
| "emergency_prob": 0.02, | |
| "switch_penalty": 0.2, | |
| "starvation_threshold": 10, | |
| "burst_prob": 0.1, | |
| "burst_multiplier": 1.2, | |
| } | |
| def strict_score(x): | |
| x = (float(x) + 1.0) / 2.0 | |
| return max(0.001, min(0.999, x)) | |
| def build_client(): | |
| api_base_url = os.environ.get("API_BASE_URL") | |
| api_key = os.environ.get("API_KEY") | |
| model_name = os.environ.get("MODEL_NAME", "gpt-4o-mini") | |
| if api_base_url and api_key: | |
| client = OpenAI(base_url=api_base_url, api_key=api_key) | |
| return client, model_name, True | |
| return None, model_name, False | |
| def choose_action(client, model_name, state): | |
| prompt = f""" | |
| You are controlling a traffic signal at a 4-way intersection. | |
| Current state: | |
| {state} | |
| Available actions: | |
| 0 = keep current signal phase | |
| 1 = switch signal phase | |
| Reply with only one number: 0 or 1 | |
| """.strip() | |
| response = client.chat.completions.create( | |
| model=model_name, | |
| messages=[ | |
| {"role": "system", "content": "Reply with only 0 or 1."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| temperature=0, | |
| ) | |
| content = response.choices[0].message.content.strip() | |
| try: | |
| action = int(content) | |
| if action not in (0, 1): | |
| action = 0 | |
| except Exception: | |
| action = 0 | |
| return action | |
| def run_task(task_name, config, client, model_name, use_llm): | |
| env = TrafficEnv(config) | |
| state = env.reset() | |
| print("[START]", flush=True) | |
| done = False | |
| step_idx = 0 | |
| total_reward = 0.0 | |
| while not done: | |
| action = choose_action(client, model_name, state) if use_llm else 0 | |
| state, reward, done, info = env.step(action) | |
| step_score = strict_score(reward) | |
| print( | |
| f"[STEP] task={task_name}, step={step_idx}, action={action}, score={step_score:.3f}, done={done}", | |
| flush=True, | |
| ) | |
| total_reward += reward | |
| step_idx += 1 | |
| avg_reward = total_reward / max(1, step_idx) | |
| final_score = strict_score(avg_reward) | |
| print(f"[END] task={task_name}, score={final_score:.3f}", flush=True) | |
| if __name__ == "__main__": | |
| client, model_name, use_llm = build_client() | |
| tasks = [ | |
| ("easy", CONFIG), | |
| ("medium", CONFIG), | |
| ("hard", CONFIG), | |
| ] | |
| for task_name, config in tasks: | |
| run_task(task_name, config, client, model_name, use_llm |