Commit ·
f020509
1
Parent(s): a4be35d
refactor: Inference and readme
Browse files- README.md +8 -10
- baseline_scores_heuristic.json +0 -58
- inference.py +9 -0
README.md
CHANGED
|
@@ -267,24 +267,22 @@ LLM and RL agents are expected to exceed these scores.
|
|
| 267 |
```
|
| 268 |
gridmind-rl/
|
| 269 |
+-- main.go # HTTP server & OpenEnv API
|
| 270 |
-
+-- inference.py # Agent entry point
|
| 271 |
+-- openenv.yaml # OpenEnv spec
|
| 272 |
+-- Dockerfile # Container build
|
| 273 |
+-- env/
|
| 274 |
+-- environment.go # Physics simulation
|
| 275 |
-
+-- models.go
|
| 276 |
-
+-- rewards.go
|
| 277 |
-
+-- tasks.go
|
| 278 |
-
+--
|
| 279 |
-
+--
|
| 280 |
-
+-- models.py # Pydantic models
|
| 281 |
-
+-- requirements.txt
|
| 282 |
+-- dashboard/
|
| 283 |
+-- server.py # Web server (port 7861)
|
| 284 |
-
+-- static/
|
| 285 |
+-- data/
|
| 286 |
+-- price_curves.json # Price data
|
| 287 |
-
+-- generate_prices.py
|
| 288 |
+-- tests/
|
| 289 |
+-- test_graders.py # Python tests
|
| 290 |
+-- environment_test.go # Go tests
|
|
|
|
| 267 |
```
|
| 268 |
gridmind-rl/
|
| 269 |
+-- main.go # HTTP server & OpenEnv API
|
| 270 |
+
+-- inference.py # Agent entry point (LLM + heuristic)
|
| 271 |
+-- openenv.yaml # OpenEnv spec
|
| 272 |
+-- Dockerfile # Container build
|
| 273 |
+-- env/
|
| 274 |
+-- environment.go # Physics simulation
|
| 275 |
+
+-- models.go # Data models
|
| 276 |
+
+-- rewards.go # Reward computation
|
| 277 |
+
+-- tasks.go # Task grading
|
| 278 |
+
+-- server/
|
| 279 |
+
+-- app.py # Server entry point
|
|
|
|
|
|
|
| 280 |
+-- dashboard/
|
| 281 |
+-- server.py # Web server (port 7861)
|
| 282 |
+
+-- static/ # Frontend assets
|
| 283 |
+-- data/
|
| 284 |
+-- price_curves.json # Price data
|
| 285 |
+
+-- generate_prices.py # Price generator
|
| 286 |
+-- tests/
|
| 287 |
+-- test_graders.py # Python tests
|
| 288 |
+-- environment_test.go # Go tests
|
baseline_scores_heuristic.json
DELETED
|
@@ -1,58 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"model": "meta-llama/llama-3.3-70b-instruct:free",
|
| 3 |
-
"api_base": "https://openrouter.ai/api/v1",
|
| 4 |
-
"episodes_per_task": 1,
|
| 5 |
-
"seed_base": 1000,
|
| 6 |
-
"fast_mode": true,
|
| 7 |
-
"llm_every": 4,
|
| 8 |
-
"max_steps": null,
|
| 9 |
-
"task_averages": {
|
| 10 |
-
"1": 0.708,
|
| 11 |
-
"2": 0.6328,
|
| 12 |
-
"3": 0.5983
|
| 13 |
-
},
|
| 14 |
-
"overall_average": 0.6463666666666666,
|
| 15 |
-
"all_results": [
|
| 16 |
-
{
|
| 17 |
-
"task_id": 1,
|
| 18 |
-
"seed": 1100,
|
| 19 |
-
"total_reward": 246.42219784256966,
|
| 20 |
-
"total_steps": 94,
|
| 21 |
-
"elapsed_sec": 1.5613129138946533,
|
| 22 |
-
"score": 0.708,
|
| 23 |
-
"sub_scores": {
|
| 24 |
-
"cost": 0.7079636116620143
|
| 25 |
-
},
|
| 26 |
-
"exploit_detected": false
|
| 27 |
-
},
|
| 28 |
-
{
|
| 29 |
-
"task_id": 2,
|
| 30 |
-
"seed": 1200,
|
| 31 |
-
"total_reward": 242.81120610868118,
|
| 32 |
-
"total_steps": 95,
|
| 33 |
-
"elapsed_sec": 1.594855785369873,
|
| 34 |
-
"score": 0.6328,
|
| 35 |
-
"sub_scores": {
|
| 36 |
-
"cost": 0.7005224090103834,
|
| 37 |
-
"temperature": 0.53125
|
| 38 |
-
},
|
| 39 |
-
"exploit_detected": false
|
| 40 |
-
},
|
| 41 |
-
{
|
| 42 |
-
"task_id": 3,
|
| 43 |
-
"seed": 1300,
|
| 44 |
-
"total_reward": 251.7133773862143,
|
| 45 |
-
"total_steps": 94,
|
| 46 |
-
"elapsed_sec": 1.6321852207183838,
|
| 47 |
-
"score": 0.5983,
|
| 48 |
-
"sub_scores": {
|
| 49 |
-
"batch_deadline": 1,
|
| 50 |
-
"carbon": 0.6563888726735232,
|
| 51 |
-
"cost": 0.6695079035324871,
|
| 52 |
-
"grid_response": 0.21428571428571427,
|
| 53 |
-
"temperature": 0.5833333333333334
|
| 54 |
-
},
|
| 55 |
-
"exploit_detected": false
|
| 56 |
-
}
|
| 57 |
-
]
|
| 58 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inference.py
CHANGED
|
@@ -290,11 +290,15 @@ Respond with ONLY a JSON action:
|
|
| 290 |
|
| 291 |
# ── Environment Client ────────────────────────────────────────────────────────
|
| 292 |
class GridMindEnvClient:
|
|
|
|
|
|
|
| 293 |
def __init__(self, base_url: str = ENV_URL, timeout: int = 30):
|
|
|
|
| 294 |
self.base = base_url.rstrip("/")
|
| 295 |
self.timeout = timeout
|
| 296 |
|
| 297 |
def health(self) -> bool:
|
|
|
|
| 298 |
try:
|
| 299 |
r = requests.get(f"{self.base}/health", timeout=5)
|
| 300 |
return r.status_code == 200
|
|
@@ -302,6 +306,7 @@ class GridMindEnvClient:
|
|
| 302 |
return False
|
| 303 |
|
| 304 |
def reset(self, task_id: int = 1, seed: int = 42, num_buildings: int = 1) -> Optional[dict]:
|
|
|
|
| 305 |
try:
|
| 306 |
payload = {"task_id": task_id, "seed": seed, "num_buildings": num_buildings}
|
| 307 |
r = requests.post(f"{self.base}/reset", json=payload, timeout=self.timeout)
|
|
@@ -312,6 +317,7 @@ class GridMindEnvClient:
|
|
| 312 |
return None
|
| 313 |
|
| 314 |
def step(self, action: dict) -> Optional[dict]:
|
|
|
|
| 315 |
try:
|
| 316 |
r = requests.post(f"{self.base}/step", json=action, timeout=self.timeout)
|
| 317 |
r.raise_for_status()
|
|
@@ -321,6 +327,7 @@ class GridMindEnvClient:
|
|
| 321 |
return None
|
| 322 |
|
| 323 |
def grade(self) -> dict:
|
|
|
|
| 324 |
try:
|
| 325 |
r = requests.get(f"{self.base}/grade", timeout=self.timeout)
|
| 326 |
r.raise_for_status()
|
|
@@ -330,6 +337,7 @@ class GridMindEnvClient:
|
|
| 330 |
return {"score": SCORE_EPSILON, "sub_scores": {}, "exploit_detected": False}
|
| 331 |
|
| 332 |
def state(self) -> Optional[dict]:
|
|
|
|
| 333 |
try:
|
| 334 |
r = requests.get(f"{self.base}/state", timeout=self.timeout)
|
| 335 |
r.raise_for_status()
|
|
@@ -339,6 +347,7 @@ class GridMindEnvClient:
|
|
| 339 |
return None
|
| 340 |
|
| 341 |
def close(self) -> None:
|
|
|
|
| 342 |
return None
|
| 343 |
|
| 344 |
|
|
|
|
| 290 |
|
| 291 |
# ── Environment Client ────────────────────────────────────────────────────────
|
| 292 |
class GridMindEnvClient:
|
| 293 |
+
"""HTTP client for the GridMind-RL Go environment server."""
|
| 294 |
+
|
| 295 |
def __init__(self, base_url: str = ENV_URL, timeout: int = 30):
|
| 296 |
+
"""Initialize client with base URL and timeout."""
|
| 297 |
self.base = base_url.rstrip("/")
|
| 298 |
self.timeout = timeout
|
| 299 |
|
| 300 |
def health(self) -> bool:
|
| 301 |
+
"""Check if the environment server is healthy."""
|
| 302 |
try:
|
| 303 |
r = requests.get(f"{self.base}/health", timeout=5)
|
| 304 |
return r.status_code == 200
|
|
|
|
| 306 |
return False
|
| 307 |
|
| 308 |
def reset(self, task_id: int = 1, seed: int = 42, num_buildings: int = 1) -> Optional[dict]:
|
| 309 |
+
"""Start a new episode with the given task and seed."""
|
| 310 |
try:
|
| 311 |
payload = {"task_id": task_id, "seed": seed, "num_buildings": num_buildings}
|
| 312 |
r = requests.post(f"{self.base}/reset", json=payload, timeout=self.timeout)
|
|
|
|
| 317 |
return None
|
| 318 |
|
| 319 |
def step(self, action: dict) -> Optional[dict]:
|
| 320 |
+
"""Take an action and receive the next observation and reward."""
|
| 321 |
try:
|
| 322 |
r = requests.post(f"{self.base}/step", json=action, timeout=self.timeout)
|
| 323 |
r.raise_for_status()
|
|
|
|
| 327 |
return None
|
| 328 |
|
| 329 |
def grade(self) -> dict:
|
| 330 |
+
"""Get the episode grade/score after completion."""
|
| 331 |
try:
|
| 332 |
r = requests.get(f"{self.base}/grade", timeout=self.timeout)
|
| 333 |
r.raise_for_status()
|
|
|
|
| 337 |
return {"score": SCORE_EPSILON, "sub_scores": {}, "exploit_detected": False}
|
| 338 |
|
| 339 |
def state(self) -> Optional[dict]:
|
| 340 |
+
"""Get the current environment state."""
|
| 341 |
try:
|
| 342 |
r = requests.get(f"{self.base}/state", timeout=self.timeout)
|
| 343 |
r.raise_for_status()
|
|
|
|
| 347 |
return None
|
| 348 |
|
| 349 |
def close(self) -> None:
|
| 350 |
+
"""Close the client connection (no-op for HTTP)."""
|
| 351 |
return None
|
| 352 |
|
| 353 |
|