File size: 16,409 Bytes
4c68ece 9cdb062 4c68ece 8cd3fa7 89d39f7 6aa8acb 8cd3fa7 4c68ece 511f04a 4c68ece 8cd3fa7 4c68ece 899c12a 09a9c72 4c68ece 79fb14b 9c3ced0 79fb14b 4c68ece 8cd3fa7 4c68ece 9cdb062 4c68ece 9cdb062 4c68ece 9cdb062 4c68ece 8cd3fa7 4c68ece 89d39f7 4c68ece 6aa8acb 511f04a 8cd3fa7 4c68ece 8cd3fa7 4c68ece 8cd3fa7 4c68ece 8cd3fa7 511f04a 4c68ece 8cd3fa7 4c68ece 6aa8acb 4c68ece 511f04a 4c68ece 511f04a 89d39f7 292424c 511f04a 4c68ece 511f04a 4c68ece 511f04a 4c68ece 511f04a 4c68ece 511f04a 4c68ece 511f04a 4c68ece 511f04a 4c68ece 89d39f7 4c68ece 8cd3fa7 4c68ece 511f04a 4c68ece 8cd3fa7 933baa6 4c68ece 933baa6 8cd3fa7 933baa6 4c68ece 933baa6 8cd3fa7 933baa6 4c68ece 933baa6 4c68ece 9cdb062 4c68ece 9cdb062 899c12a 9cdb062 899c12a 9cdb062 899c12a 9cdb062 899c12a 9cdb062 6aa8acb 4c68ece 6aa8acb 4c68ece b4f91f0 4c68ece 899c12a 4c68ece 899c12a 4c68ece 899c12a 4c68ece 9cdb062 4c68ece 9cdb062 4c68ece 9cdb062 09a9c72 9cdb062 6a19dc6 9cdb062 47a298a 9cdb062 4c68ece 9cdb062 4c68ece 9cdb062 4c68ece 511f04a 8cd3fa7 4c68ece | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 | """
PolicyEvolverEnv β Hackathon Inference Script
=============================================
MANDATORY ENV VARS:
API_BASE_URL The API endpoint for the LLM.
MODEL_NAME The model identifier to use for inference.
HF_TOKEN Your Hugging Face / API key.
IMAGE_NAME The Docker image name (set by validator).
STDOUT FORMAT:
[START] task=<task_name> env=policy_evolver_env model=<model_name>
[STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
[END] task=<task_name> success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
"""
import asyncio
import os
import sys
import json
from typing import Dict, List, Optional
from openai import OpenAI
from client import PolicyEvolverEnv
from models import Action
# βββ Environment Variables (Hackathon Mandatory) βββ
IMAGE_NAME = os.getenv("IMAGE_NAME")
API_KEY = os.environ.get("API_KEY") or os.environ.get("HF_TOKEN")
API_BASE_URL = os.environ.get("API_BASE_URL")
MODEL_NAME = os.environ.get("MODEL_NAME")
BENCHMARK = "policy_evolver_env"
# βββ Auto-discover model if MODEL_NAME is not set βββ
if not MODEL_NAME and API_BASE_URL and API_KEY:
try:
import httpx
resp = httpx.get(
f"{API_BASE_URL.rstrip('/')}/models",
headers={"Authorization": f"Bearer {API_KEY}"},
timeout=10,
)
if resp.status_code == 200:
models_data = resp.json().get("data", [])
# Filter out wildcards and pick a real model name
for m in models_data:
mid = m.get("id", "")
if mid and mid != "*" and not mid.startswith("*"):
MODEL_NAME = mid
print(f"[DEBUG] Auto-discovered model: {MODEL_NAME}", flush=True)
break
except Exception as e:
print(f"[DEBUG] Model discovery failed: {e}", flush=True)
if not MODEL_NAME:
MODEL_NAME = "gpt-4o-mini"
print(f"[DEBUG] Using default MODEL_NAME: {MODEL_NAME}", flush=True)
MAX_STEPS = 5
TEMPERATURE = 0.0
SUCCESS_THRESHOLD = 0.70
# βββ Logging Helpers (Hackathon Mandatory Format) βββ
def log_start(task: str, env: str, model: str) -> None:
print(f"[START] task={task} env={env} model={model}", flush=True)
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
error_val = f'"{error}"' if error else "null"
done_val = str(done).lower()
print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True)
def log_end(task: str, success: bool, steps: int, score: float, rewards: List[float]) -> None:
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
print(f"[END] task={task} success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
# βββ LLM Agent βββ
class PolicyEvolverAgent:
"""Strategic Policy Agent β maximizes governance scores via in-context adaptation."""
SYSTEM_PROMPT = (
"You are a Strategic Policy Engineer. Your goal is to maximize governance outcomes through verifiable "
"precision. STYLISTIC RULES:\n"
"1. NO VAGUENESS: Never use words like 'maybe', 'perhaps', 'sometimes', 'usually'.\n"
"2. COMMAND LANGUAGE: Use 'must', 'shall', 'prohibited', 'required', 'mandatory'.\n"
"3. MEASURABLE CRITERIA: Define terms with 'if-then' and metrics.\n"
"4. ANALYTICAL COT: Your 'think' field MUST be 150-250 words and include terms: 'tradeoff', 'precision', "
"'recall', 'threshold', 'impact', 'evidence'.\n"
"5. JSON ONLY: Output ONLY the JSON object. No preamble.\n"
"6. INCREMENTALISM: If your previous score was high (>0.80), focus on surgical precision rather than holistic rewriting. "
"DO NOT add words that create ambiguity."
)
def __init__(self, model: str):
self.model = model
self.action_history: list = []
self.score_history: list = []
def _call_llm(self, client: OpenAI, prompt: str) -> Optional[dict]:
"""Call the LLM and robustly parse the JSON response."""
try:
resp = client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": self.SYSTEM_PROMPT},
{"role": "user", "content": prompt},
],
max_tokens=1024,
temperature=TEMPERATURE,
seed=42,
)
raw = resp.choices[0].message.content.strip()
# Strip markdown fences
if "```json" in raw:
raw = raw.split("```json")[1].split("```")[0].strip()
elif "```" in raw:
raw = raw.split("```")[1].split("```")[0].strip()
try:
return json.loads(raw)
except json.JSONDecodeError:
start = raw.find("{")
end = raw.rfind("}")
if start != -1 and end != -1:
return json.loads(raw[start : end + 1])
raise
except Exception as e:
print(f"[DEBUG] LLM Call Error: {e}", file=sys.stderr)
if 'raw' in locals():
print(f"[DEBUG] Raw content: {raw}", file=sys.stderr)
raise e
def _build_feedback(self, step: int, last_score: float, last_action: dict, task_id: str) -> str:
"""Build diagnostic feedback from previous step for in-context learning."""
if step == 0 or not last_action:
return ""
lines = [
f"\n=== STRATEGIC FEEDBACK (Step {step}) ===",
f"Previous score: {last_score:.3f} / 1.000",
]
if task_id == "task_easy":
defn = last_action.get("suggested_definition", "")
vague = ["might", "could", "perhaps", "sometimes", "often", "generally", "usually", "typically", "may", "possibly"]
found = [w for w in vague if w in defn.lower()]
meas = ["threshold", "verify", "days", "$", "%", "reports", "hours", "within", "exceed", "minimum", "must", "shall"]
mfound = [w for w in meas if w in defn.lower()]
if found:
lines.append(f"FAILURE: Vague words detected: {found}. Remove them entirely.")
if len(mfound) < 2:
lines.append("FAILURE: Missing measurable criteria. Add numbers, hours, percentages.")
if len(defn.split()) < 15:
lines.append("FAILURE: Definition too short. Minimum 15 words.")
elif task_id == "task_medium":
if not last_action.get("rule_domain", "").strip():
lines.append("FAILURE: rule_domain was empty.")
if len(last_action.get("new_rule", "").split()) < 10:
lines.append("FAILURE: New rule too short.")
elif task_id == "task_hard":
outcomes = last_action.get("expected_outcomes", {})
if isinstance(outcomes, dict) and len(outcomes) >= 2:
vals = [v for v in outcomes.values() if isinstance(v, (int, float))]
vals = [v / 100 if v > 1 else v for v in vals]
if vals and all(v > 0.70 for v in vals):
lines.append("FAILURE: Unrealistic tradeoff β all metrics > 0.70. Model friction.")
mods = last_action.get("policy_modifications", [])
if len(mods) < 2:
lines.append("FAILURE: Need >= 2 policy_modifications.")
# Append history summaries
for act, sc in zip(self.action_history[-3:], self.score_history[-3:]):
lines.append(f" [{sc:.2f}] {act.get('action_type', '?')}")
# Surgical Refinement Guard
if last_score >= 0.80:
lines = [
f"\n=== SURGICAL REFINEMENT (Step {step}) ===",
f"Current Score: {last_score:.3f} β EXCELLENT.",
"CRITICAL: Do NOT rewrite the policy. Only perform 'surgical' removals or additions.",
"1. CHECK: Remove 'might', 'could', 'perhaps', 'sometimes', 'often' if present.",
"2. CHECK: Ensure words count >= 12. Add one more specific metric (%, hours, $) if needed.",
"Do NOT add any words that could be seen as vague. Aim for 0.95+."
]
else:
target = min(last_score + 0.20, 0.95)
lines.append(f"\nYour next proposal MUST score above {target:.2f}. Be more specific.")
return "\n".join(lines)
def get_action(self, client: OpenAI, task_id: str, obs: dict) -> dict:
"""Generate the next strategic action for the given task."""
step = obs.get("step_count", 0)
last_score = obs.get("info", {}).get("last_reward", 0.0)
last_action = obs.get("info", {}).get("last_action", {})
feedback = self._build_feedback(step, last_score, last_action, task_id)
if task_id == "task_easy":
prompt = (
f"POLICIES: {obs.get('current_policies', [])}\n"
f"DATA: {obs.get('data_corpus', [])[:5]}\n{feedback}\n"
"TASK: Propose clarification for an ambiguous term with a measurable definition.\n"
"JSON: {\"action_type\": \"propose_clarification\", \"ambiguous_term\": \"...\", "
"\"suggested_definition\": \"...\", \"affected_policy_ids\": [\"str\"], "
"\"justification\": \"...\", \"think\": \"...\"}"
)
elif task_id == "task_medium":
prompt = (
f"POLICIES: {obs.get('current_policies', [])}\n"
f"DATA: {obs.get('data_corpus', [])}\n{feedback}\n"
"TASK: Propose a new rule for a coverage gap. Use mandatory language.\n"
"JSON: {\"action_type\": \"propose_new_rule\", \"rule_domain\": \"...\", "
"\"new_rule\": \"...\", \"scope\": [\"str\"], \"integration_points\": [\"str\"], "
"\"justification\": \"...\", \"think\": \"...\"}"
)
else:
prompt = (
f"METRICS: {obs.get('system_metrics', {})}\n"
f"ISSUES: {obs.get('identified_issues', [])}\n{feedback}\n"
"TASK: Evolve policies with realistic tradeoffs.\n"
"JSON: {\"action_type\": \"evolve_policy\", \"policy_modifications\": "
"[{\"policy_id\": \"...\", \"change_type\": \"enhance|restrict|add|remove\", "
"\"new_text\": \"...\", \"reason\": \"...\"}], \"expected_outcomes\": "
"{\"fraud_rate\": 0.8, \"revenue_velocity\": 0.4}, "
"\"rollback_conditions\": [\"...\"], \"justification\": \"...\", \"think\": \"...\"}"
)
result = self._call_llm(client, prompt)
return result
# βββ Episode Runner βββ
async def run_episode(client: Optional[OpenAI], env: Optional[PolicyEvolverEnv], task_id: str, setup_error: Optional[Exception] = None) -> dict:
"""Run a single task episode following the hackathon format."""
log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
if setup_error:
print(f"[FATAL] Setup Error: {setup_error}", file=sys.stderr)
log_step(step=1, action="setup", reward=0.0, done=True, error=str(setup_error))
log_end(task=task_id, success=False, steps=0, score=0.0, rewards=[])
sys.exit(1)
if not client or not env:
print("[FATAL] Client or Environment not initialized", file=sys.stderr)
log_step(step=1, action="setup", reward=0.0, done=True, error="Client or Environment not initialized")
log_end(task=task_id, success=False, steps=0, score=0.0, rewards=[])
sys.exit(1)
agent = PolicyEvolverAgent(MODEL_NAME)
rewards: List[float] = []
steps_taken = 0
score = 0.0
success = False
try:
result = await env.reset(task_id=task_id)
for step in range(1, MAX_STEPS + 1):
if result.done:
break
# Get observation as dict
obs_dict = result.observation
if hasattr(obs_dict, "model_dump"):
obs_dict = obs_dict.model_dump()
elif not isinstance(obs_dict, dict):
obs_dict = dict(obs_dict)
# Agent decides action (graceful failure per step)
try:
action_dict = agent.get_action(client, task_id, obs_dict)
except Exception as e:
# LLM call failed β log error for this step and move to next task
print(f"[DEBUG] LLM error on step {step}: {e}", file=sys.stderr)
log_step(step=step, action="llm_error", reward=0.0, done=True, error=str(e))
rewards.append(0.0)
steps_taken = step
break
agent.action_history.append(action_dict)
# Validate and step
error = None
try:
action_obj = Action.model_validate(action_dict)
result = await env.step(action_obj)
reward = result.reward or 0.0
done = result.done
except Exception as e:
reward = 0.0
done = True
error = str(e)
rewards.append(reward)
agent.score_history.append(reward)
steps_taken = step
act_name = action_dict.get("action_type", "unknown")
log_step(step=step, action=act_name, reward=reward, done=done, error=error)
if done:
break
score = rewards[-1] if rewards else 0.0
success = score >= SUCCESS_THRESHOLD
except Exception as e:
print(f"[FATAL] Runtime Error: {e}", file=sys.stderr)
log_step(step=steps_taken + 1, action="error", reward=0.0, done=True, error=str(e))
log_end(task=task_id, success=False, steps=steps_taken, score=0.0, rewards=rewards)
sys.exit(1)
finally:
# We only log_end here if we didn't exit(1) already
if not sys.exc_info()[0]:
log_end(task=task_id, success=success, steps=steps_taken, score=score, rewards=rewards)
# βββ Main Entry Point βββ
async def main() -> None:
client = None
env = None
setup_error = None
try:
# 1. Initialize OpenAI Client
try:
if not API_KEY or not API_BASE_URL:
raise Exception("Missing mandatory environment variables: API_KEY and/or API_BASE_URL")
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
except Exception as e:
setup_error = Exception(f"OpenAI client initialization failed: {e}")
# 2. Initialize Environment
if not setup_error:
try:
if IMAGE_NAME:
# Manually handle Docker startup to override the 30s library default
from openenv.core.containers.runtime.providers import LocalDockerProvider
provider = LocalDockerProvider()
base_url = provider.start_container(IMAGE_NAME)
print(f"[DEBUG] Waiting for container {IMAGE_NAME} at {base_url} (Extended Timeout 120s)...", flush=True)
provider.wait_for_ready(base_url, timeout_s=120.0)
env = PolicyEvolverEnv(base_url=base_url, provider=provider)
await env.connect()
else:
local_url = os.environ.get("ENV_BASE_URL", "http://127.0.0.1:8000")
env = PolicyEvolverEnv(base_url=local_url)
# For local testing, we might want to check connection immediately or let run_episode handle it
except Exception as e:
setup_error = Exception(f"Environment initialization failed: {e}")
except Exception as e:
setup_error = e
# 3. Always loop over tasks to ensure structured logs
tasks = ["task_easy", "task_medium", "task_hard"]
for task in tasks:
await run_episode(client, env, task, setup_error=setup_error)
# 4. Final Cleanup
if env:
try:
await env.close()
except Exception as e:
print(f"[DEBUG] env.close() error: {e}", flush=True)
if __name__ == "__main__":
asyncio.run(main())
|