Spaces:

LO-Kyu
/

gridmind

Running

App Files Files Community

ShreeshantXD commited on 12 days ago

Commit

f020509

1 Parent(s): a4be35d

refactor: Inference and readme

Browse files

Files changed (3) hide show

README.md +8 -10
baseline_scores_heuristic.json +0 -58
inference.py +9 -0

README.md CHANGED Viewed

@@ -267,24 +267,22 @@ LLM and RL agents are expected to exceed these scores.
 ```
 gridmind-rl/
 +-- main.go                    # HTTP server & OpenEnv API
-+-- inference.py               # Agent entry point
 +-- openenv.yaml               # OpenEnv spec
 +-- Dockerfile                 # Container build
 +-- env/
     +-- environment.go         # Physics simulation
-    +-- models.go              # Data models
-    +-- rewards.go             # Reward computation
-    +-- tasks.go               # Task grading
-+-- python/
-    +-- inference.py           # LLM agent
-    +-- models.py              # Pydantic models
-    +-- requirements.txt
 +-- dashboard/
     +-- server.py              # Web server (port 7861)
-    +-- static/                # Frontend assets
 +-- data/
     +-- price_curves.json      # Price data
-    +-- generate_prices.py     # Price generator
 +-- tests/
     +-- test_graders.py        # Python tests
     +-- environment_test.go    # Go tests

 ```
 gridmind-rl/
 +-- main.go                    # HTTP server & OpenEnv API
++-- inference.py               # Agent entry point (LLM + heuristic)
 +-- openenv.yaml               # OpenEnv spec
 +-- Dockerfile                 # Container build
 +-- env/
     +-- environment.go         # Physics simulation
+    +-- models.go             # Data models
+    +-- rewards.go            # Reward computation
+    +-- tasks.go              # Task grading
++-- server/
+    +-- app.py                # Server entry point
 +-- dashboard/
     +-- server.py              # Web server (port 7861)
+    +-- static/               # Frontend assets
 +-- data/
     +-- price_curves.json      # Price data
+    +-- generate_prices.py    # Price generator
 +-- tests/
     +-- test_graders.py        # Python tests
     +-- environment_test.go    # Go tests

baseline_scores_heuristic.json DELETED Viewed

@@ -1,58 +0,0 @@
-{
-  "model": "meta-llama/llama-3.3-70b-instruct:free",
-  "api_base": "https://openrouter.ai/api/v1",
-  "episodes_per_task": 1,
-  "seed_base": 1000,
-  "fast_mode": true,
-  "llm_every": 4,
-  "max_steps": null,
-  "task_averages": {
-    "1": 0.708,
-    "2": 0.6328,
-    "3": 0.5983
-  },
-  "overall_average": 0.6463666666666666,
-  "all_results": [
-    {
-      "task_id": 1,
-      "seed": 1100,
-      "total_reward": 246.42219784256966,
-      "total_steps": 94,
-      "elapsed_sec": 1.5613129138946533,
-      "score": 0.708,
-      "sub_scores": {
-        "cost": 0.7079636116620143
-      },
-      "exploit_detected": false
-    },
-    {
-      "task_id": 2,
-      "seed": 1200,
-      "total_reward": 242.81120610868118,
-      "total_steps": 95,
-      "elapsed_sec": 1.594855785369873,
-      "score": 0.6328,
-      "sub_scores": {
-        "cost": 0.7005224090103834,
-        "temperature": 0.53125
-      },
-      "exploit_detected": false
-    },
-    {
-      "task_id": 3,
-      "seed": 1300,
-      "total_reward": 251.7133773862143,
-      "total_steps": 94,
-      "elapsed_sec": 1.6321852207183838,
-      "score": 0.5983,
-      "sub_scores": {
-        "batch_deadline": 1,
-        "carbon": 0.6563888726735232,
-        "cost": 0.6695079035324871,
-        "grid_response": 0.21428571428571427,
-        "temperature": 0.5833333333333334
-      },
-      "exploit_detected": false
-    }
-  ]
-}

inference.py CHANGED Viewed

@@ -290,11 +290,15 @@ Respond with ONLY a JSON action:
 # ── Environment Client ────────────────────────────────────────────────────────
 class GridMindEnvClient:
     def __init__(self, base_url: str = ENV_URL, timeout: int = 30):
         self.base = base_url.rstrip("/")
         self.timeout = timeout
     def health(self) -> bool:
         try:
             r = requests.get(f"{self.base}/health", timeout=5)
             return r.status_code == 200
@@ -302,6 +306,7 @@ class GridMindEnvClient:
             return False
     def reset(self, task_id: int = 1, seed: int = 42, num_buildings: int = 1) -> Optional[dict]:
         try:
             payload = {"task_id": task_id, "seed": seed, "num_buildings": num_buildings}
             r = requests.post(f"{self.base}/reset", json=payload, timeout=self.timeout)
@@ -312,6 +317,7 @@ class GridMindEnvClient:
             return None
     def step(self, action: dict) -> Optional[dict]:
         try:
             r = requests.post(f"{self.base}/step", json=action, timeout=self.timeout)
             r.raise_for_status()
@@ -321,6 +327,7 @@ class GridMindEnvClient:
             return None
     def grade(self) -> dict:
         try:
             r = requests.get(f"{self.base}/grade", timeout=self.timeout)
             r.raise_for_status()
@@ -330,6 +337,7 @@ class GridMindEnvClient:
             return {"score": SCORE_EPSILON, "sub_scores": {}, "exploit_detected": False}
     def state(self) -> Optional[dict]:
         try:
             r = requests.get(f"{self.base}/state", timeout=self.timeout)
             r.raise_for_status()
@@ -339,6 +347,7 @@ class GridMindEnvClient:
             return None
     def close(self) -> None:
         return None

 # ── Environment Client ────────────────────────────────────────────────────────
 class GridMindEnvClient:
+    """HTTP client for the GridMind-RL Go environment server."""
     def __init__(self, base_url: str = ENV_URL, timeout: int = 30):
+        """Initialize client with base URL and timeout."""
         self.base = base_url.rstrip("/")
         self.timeout = timeout
     def health(self) -> bool:
+        """Check if the environment server is healthy."""
         try:
             r = requests.get(f"{self.base}/health", timeout=5)
             return r.status_code == 200
             return False
     def reset(self, task_id: int = 1, seed: int = 42, num_buildings: int = 1) -> Optional[dict]:
+        """Start a new episode with the given task and seed."""
         try:
             payload = {"task_id": task_id, "seed": seed, "num_buildings": num_buildings}
             r = requests.post(f"{self.base}/reset", json=payload, timeout=self.timeout)
             return None
     def step(self, action: dict) -> Optional[dict]:
+        """Take an action and receive the next observation and reward."""
         try:
             r = requests.post(f"{self.base}/step", json=action, timeout=self.timeout)
             r.raise_for_status()
             return None
     def grade(self) -> dict:
+        """Get the episode grade/score after completion."""
         try:
             r = requests.get(f"{self.base}/grade", timeout=self.timeout)
             r.raise_for_status()
             return {"score": SCORE_EPSILON, "sub_scores": {}, "exploit_detected": False}
     def state(self) -> Optional[dict]:
+        """Get the current environment state."""
         try:
             r = requests.get(f"{self.base}/state", timeout=self.timeout)
             r.raise_for_status()
             return None
     def close(self) -> None:
+        """Close the client connection (no-op for HTTP)."""
         return None