ShreeshantXD commited on
Commit
f020509
·
1 Parent(s): a4be35d

refactor: Inference and readme

Browse files
Files changed (3) hide show
  1. README.md +8 -10
  2. baseline_scores_heuristic.json +0 -58
  3. inference.py +9 -0
README.md CHANGED
@@ -267,24 +267,22 @@ LLM and RL agents are expected to exceed these scores.
267
  ```
268
  gridmind-rl/
269
  +-- main.go # HTTP server & OpenEnv API
270
- +-- inference.py # Agent entry point
271
  +-- openenv.yaml # OpenEnv spec
272
  +-- Dockerfile # Container build
273
  +-- env/
274
  +-- environment.go # Physics simulation
275
- +-- models.go # Data models
276
- +-- rewards.go # Reward computation
277
- +-- tasks.go # Task grading
278
- +-- python/
279
- +-- inference.py # LLM agent
280
- +-- models.py # Pydantic models
281
- +-- requirements.txt
282
  +-- dashboard/
283
  +-- server.py # Web server (port 7861)
284
- +-- static/ # Frontend assets
285
  +-- data/
286
  +-- price_curves.json # Price data
287
- +-- generate_prices.py # Price generator
288
  +-- tests/
289
  +-- test_graders.py # Python tests
290
  +-- environment_test.go # Go tests
 
267
  ```
268
  gridmind-rl/
269
  +-- main.go # HTTP server & OpenEnv API
270
+ +-- inference.py # Agent entry point (LLM + heuristic)
271
  +-- openenv.yaml # OpenEnv spec
272
  +-- Dockerfile # Container build
273
  +-- env/
274
  +-- environment.go # Physics simulation
275
+ +-- models.go # Data models
276
+ +-- rewards.go # Reward computation
277
+ +-- tasks.go # Task grading
278
+ +-- server/
279
+ +-- app.py # Server entry point
 
 
280
  +-- dashboard/
281
  +-- server.py # Web server (port 7861)
282
+ +-- static/ # Frontend assets
283
  +-- data/
284
  +-- price_curves.json # Price data
285
+ +-- generate_prices.py # Price generator
286
  +-- tests/
287
  +-- test_graders.py # Python tests
288
  +-- environment_test.go # Go tests
baseline_scores_heuristic.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "model": "meta-llama/llama-3.3-70b-instruct:free",
3
- "api_base": "https://openrouter.ai/api/v1",
4
- "episodes_per_task": 1,
5
- "seed_base": 1000,
6
- "fast_mode": true,
7
- "llm_every": 4,
8
- "max_steps": null,
9
- "task_averages": {
10
- "1": 0.708,
11
- "2": 0.6328,
12
- "3": 0.5983
13
- },
14
- "overall_average": 0.6463666666666666,
15
- "all_results": [
16
- {
17
- "task_id": 1,
18
- "seed": 1100,
19
- "total_reward": 246.42219784256966,
20
- "total_steps": 94,
21
- "elapsed_sec": 1.5613129138946533,
22
- "score": 0.708,
23
- "sub_scores": {
24
- "cost": 0.7079636116620143
25
- },
26
- "exploit_detected": false
27
- },
28
- {
29
- "task_id": 2,
30
- "seed": 1200,
31
- "total_reward": 242.81120610868118,
32
- "total_steps": 95,
33
- "elapsed_sec": 1.594855785369873,
34
- "score": 0.6328,
35
- "sub_scores": {
36
- "cost": 0.7005224090103834,
37
- "temperature": 0.53125
38
- },
39
- "exploit_detected": false
40
- },
41
- {
42
- "task_id": 3,
43
- "seed": 1300,
44
- "total_reward": 251.7133773862143,
45
- "total_steps": 94,
46
- "elapsed_sec": 1.6321852207183838,
47
- "score": 0.5983,
48
- "sub_scores": {
49
- "batch_deadline": 1,
50
- "carbon": 0.6563888726735232,
51
- "cost": 0.6695079035324871,
52
- "grid_response": 0.21428571428571427,
53
- "temperature": 0.5833333333333334
54
- },
55
- "exploit_detected": false
56
- }
57
- ]
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
inference.py CHANGED
@@ -290,11 +290,15 @@ Respond with ONLY a JSON action:
290
 
291
  # ── Environment Client ────────────────────────────────────────────────────────
292
  class GridMindEnvClient:
 
 
293
  def __init__(self, base_url: str = ENV_URL, timeout: int = 30):
 
294
  self.base = base_url.rstrip("/")
295
  self.timeout = timeout
296
 
297
  def health(self) -> bool:
 
298
  try:
299
  r = requests.get(f"{self.base}/health", timeout=5)
300
  return r.status_code == 200
@@ -302,6 +306,7 @@ class GridMindEnvClient:
302
  return False
303
 
304
  def reset(self, task_id: int = 1, seed: int = 42, num_buildings: int = 1) -> Optional[dict]:
 
305
  try:
306
  payload = {"task_id": task_id, "seed": seed, "num_buildings": num_buildings}
307
  r = requests.post(f"{self.base}/reset", json=payload, timeout=self.timeout)
@@ -312,6 +317,7 @@ class GridMindEnvClient:
312
  return None
313
 
314
  def step(self, action: dict) -> Optional[dict]:
 
315
  try:
316
  r = requests.post(f"{self.base}/step", json=action, timeout=self.timeout)
317
  r.raise_for_status()
@@ -321,6 +327,7 @@ class GridMindEnvClient:
321
  return None
322
 
323
  def grade(self) -> dict:
 
324
  try:
325
  r = requests.get(f"{self.base}/grade", timeout=self.timeout)
326
  r.raise_for_status()
@@ -330,6 +337,7 @@ class GridMindEnvClient:
330
  return {"score": SCORE_EPSILON, "sub_scores": {}, "exploit_detected": False}
331
 
332
  def state(self) -> Optional[dict]:
 
333
  try:
334
  r = requests.get(f"{self.base}/state", timeout=self.timeout)
335
  r.raise_for_status()
@@ -339,6 +347,7 @@ class GridMindEnvClient:
339
  return None
340
 
341
  def close(self) -> None:
 
342
  return None
343
 
344
 
 
290
 
291
  # ── Environment Client ────────────────────────────────────────────────────────
292
  class GridMindEnvClient:
293
+ """HTTP client for the GridMind-RL Go environment server."""
294
+
295
  def __init__(self, base_url: str = ENV_URL, timeout: int = 30):
296
+ """Initialize client with base URL and timeout."""
297
  self.base = base_url.rstrip("/")
298
  self.timeout = timeout
299
 
300
  def health(self) -> bool:
301
+ """Check if the environment server is healthy."""
302
  try:
303
  r = requests.get(f"{self.base}/health", timeout=5)
304
  return r.status_code == 200
 
306
  return False
307
 
308
  def reset(self, task_id: int = 1, seed: int = 42, num_buildings: int = 1) -> Optional[dict]:
309
+ """Start a new episode with the given task and seed."""
310
  try:
311
  payload = {"task_id": task_id, "seed": seed, "num_buildings": num_buildings}
312
  r = requests.post(f"{self.base}/reset", json=payload, timeout=self.timeout)
 
317
  return None
318
 
319
  def step(self, action: dict) -> Optional[dict]:
320
+ """Take an action and receive the next observation and reward."""
321
  try:
322
  r = requests.post(f"{self.base}/step", json=action, timeout=self.timeout)
323
  r.raise_for_status()
 
327
  return None
328
 
329
  def grade(self) -> dict:
330
+ """Get the episode grade/score after completion."""
331
  try:
332
  r = requests.get(f"{self.base}/grade", timeout=self.timeout)
333
  r.raise_for_status()
 
337
  return {"score": SCORE_EPSILON, "sub_scores": {}, "exploit_detected": False}
338
 
339
  def state(self) -> Optional[dict]:
340
+ """Get the current environment state."""
341
  try:
342
  r = requests.get(f"{self.base}/state", timeout=self.timeout)
343
  r.raise_for_status()
 
347
  return None
348
 
349
  def close(self) -> None:
350
+ """Close the client connection (no-op for HTTP)."""
351
  return None
352
 
353