Spaces:
Sleeping
Sleeping
| import asyncio | |
| import os | |
| import textwrap | |
| from typing import List, Optional | |
| from openai import OpenAI | |
| from client import SmartGridEnv | |
| from models import SmartGridAction | |
| IMAGE_NAME = os.getenv("IMAGE_NAME") | |
| API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") | |
| API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1" | |
| MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct" | |
| TASK_NAME = os.getenv("TASK", "balanced_grid_easy") | |
| BENCHMARK = os.getenv("BENCHMARK", "smart_grid") | |
| MAX_STEPS = 24 | |
| TEMPERATURE = 0.1 | |
| MAX_TOKENS = 50 | |
| SUCCESS_SCORE_THRESHOLD = 0.6 # normalized score in [0, 1] | |
| _MAX_REWARD_PER_STEP = 0.1 | |
| MAX_TOTAL_REWARD = MAX_STEPS * _MAX_REWARD_PER_STEP | |
| SYSTEM_PROMPT = textwrap.dedent( | |
| """ | |
| You are controlling a smart grid environment. | |
| You MUST output EXACTLY FOUR numbers separated by commas. | |
| supply_r1, supply_r2, supply_r3, charge_battery | |
| Even if charge_battery is zero, you must include it in the output. Do not omit any value. | |
| STRICT RULES: | |
| - Output ONLY numbers | |
| - NO text, NO explanation | |
| - EXACTLY 4 values | |
| - Example: 10.5,20.0,15.0,-5.0 | |
| At each step: | |
| 1. You receive demand for three regions | |
| 2. You receive how much solar and wind power has been generated | |
| 3. You have a battery that can be charged or discharged (up to its capacity) | |
| Your goal: | |
| - Minimize unmet demand | |
| - Avoid wasting energy | |
| - Use battery with brains | |
| Respond only with 4 numbers separated by commas, with no extra text: | |
| supply_r1, supply_r2, supply_r3, charge_battery | |
| If you do not follow format, the system will FAIL. | |
| Rules: | |
| - supply values are supposed to be non-negative and represents how much energy you allocate to a region | |
| - charge_battery can be positive (to charge) or negative (to discharge) the battery within limits | |
| - Do not output anything else | |
| Example: | |
| Demand: 20,30,25 | |
| Output: 20,30,25,0 | |
| """ | |
| ).strip() | |
| def log_start(task: str, env: str, model: str) -> None: | |
| print(f"[START] task={task} env={env} model={model}", flush=True) | |
| def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None: | |
| error_val = error if error else "null" | |
| done_val = str(done).lower() | |
| print( | |
| f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", | |
| flush=True, | |
| ) | |
| def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None: | |
| rewards_str = ",".join(f"{r:.2f}" for r in rewards) | |
| print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True) | |
| def build_user_prompt(obs) -> str: | |
| return textwrap.dedent( | |
| f""" | |
| Hour: {obs.hour} | |
| Demand: | |
| - R1={obs.demand_r1} | |
| - R2={obs.demand_r2} | |
| - R3={obs.demand_r3} | |
| Generation: | |
| - Solar={obs.solar_generation} | |
| - Wind={obs.wind_generation} | |
| Battery: | |
| - Level={obs.battery_level} | |
| - Capacity={obs.battery_capacity} | |
| What action do you take? How much do you supply and what should be the battery action? | |
| """ | |
| ).strip() | |
| def get_action(client: OpenAI, obs) -> SmartGridAction: | |
| user_prompt = build_user_prompt(obs) | |
| try: | |
| completion = client.chat.completions.create( | |
| model=MODEL_NAME, | |
| messages=[ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": user_prompt}, | |
| ], | |
| temperature=TEMPERATURE, | |
| max_tokens=MAX_TOKENS, | |
| stream=False, | |
| ) | |
| text = (completion.choices[0].message.content or "").strip() | |
| parts = text.replace("\n", "").split(",") | |
| # I see the model doesn't write the 4th value when it's zero, so default to 0 if not mentioned. | |
| if len(parts) < 3: | |
| raise ValueError(f"Expected 4 comma-separated values, got {len(parts)}\nResponse: {text}") | |
| if len(parts) == 3: | |
| parts.append("0.0") | |
| r1, r2, r3, bt = map(float, parts) | |
| return SmartGridAction( | |
| supply_r1=r1, | |
| supply_r2=r2, | |
| supply_r3=r3, | |
| charge_battery=bt | |
| ) | |
| except Exception as exc: | |
| print(f"[DEBUG] Model request failed: {exc}", flush=True) | |
| return SmartGridAction(supply_r1=0.0, supply_r2=0.0, supply_r3=0.0, charge_battery=0.0) | |
| async def main() -> None: | |
| client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) | |
| env = await SmartGridEnv.from_docker_image(IMAGE_NAME) | |
| rewards = [] | |
| steps_taken = 0 | |
| score = 0.0 | |
| success = False | |
| log_start(TASK_NAME, BENCHMARK, MODEL_NAME) | |
| try: | |
| result = await env.reset() # OpenENV.reset() | |
| obs = result.observation | |
| for step in range(1, MAX_STEPS + 1): | |
| if result.done: | |
| break | |
| action = get_action(client, obs) | |
| result = await env.step(action) | |
| obs = result.observation | |
| reward = result.reward or 0.0 | |
| done = result.done | |
| rewards.append(reward) | |
| steps_taken = step | |
| log_step(step, str(action), reward, done, None) | |
| if done: | |
| break | |
| score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0 | |
| score = min(max(score, 0.0), 1.0) | |
| success = score >= SUCCESS_SCORE_THRESHOLD | |
| finally: | |
| try: | |
| await env.close() | |
| except Exception as e: | |
| print(f"[DEBUG] env.close() error (container cleanup): {e}", flush=True) | |
| log_end(success, steps_taken, score, rewards) | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |