Spaces:

Prajwal782007
/

Gridmind

Running

App Files Files Community

ShreeshantXD commited on Apr 3

Commit

2e0c292

2 Parent(s): 574589d f1bfee9

fix: resolve merge conflicts, finalize inference + README

Browse files

Files changed (7) hide show

.gitignore +0 -0
README.md +2 -2
__pycache__/inference.cpython-311.pyc +0 -0
env/rewards.go +8 -8
openenv.yaml +1 -0
python/__pycache__/inference.cpython-311.pyc +0 -0
python/inference.py +10 -4

.gitignore ADDED Viewed

File without changes

README.md CHANGED Viewed

@@ -42,8 +42,8 @@ docker build -t gridmind-rl .
 docker run --rm -p 7860:7860 -p 7861:7861 --name gridmind gridmind-rl
 ```
-- **7860** — Environment API (OpenEnv / agent traffic)
-- **7861** — Web dashboard (optional)
 **Windows (PowerShell)** — same commands in a terminal with Docker Desktop running.

 docker run --rm -p 7860:7860 -p 7861:7861 --name gridmind gridmind-rl
 ```
+- **7860** — Environment API (OpenEnv / agent traffic); http://localhost:7860
+- **7861** — Web dashboard (optional); http://localhost:7861
 **Windows (PowerShell)** — same commands in a terminal with Docker Desktop running.

__pycache__/inference.cpython-311.pyc ADDED Viewed

Binary file (691 Bytes). View file

env/rewards.go CHANGED Viewed

@@ -31,10 +31,10 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
 	rc := RewardComponents{}
 	// ── 1. Cost Savings ─────────────────────────────────────────────────────
-	// Negative reward proportional to energy cost. Normalised by typical step cost.
-	// Typical step cost at full load, peak price: 50kW * 0.25h * 0.32 = $4.00.
 	typicalCost := 4.0
-	rc.CostSavings = -(inp.StepCost / typicalCost) * 2.0
 	// ── 2. Temperature Constraint ────────────────────────────────────────────
 	// Only active for task 2 and 3.
@@ -76,12 +76,12 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
 	}
 	// ── 7. Carbon Reward ─────────────────────────────────────────────────────
-	// Low-carbon bonus: active for task 3 (and optional overlay on others).
 	if inp.TaskID >= 3 {
 		// Normalise carbon: iso-ne range roughly 100–700 gCO2/kWh
 		carbonNorm := (inp.B.CarbonIntensity - 100.0) / 600.0
-		// Reward for reducing energy during high-carbon periods
-		rc.CarbonReward = -inp.EnergyKWh * carbonNorm * 0.3
 	}
 	// ── Aggregate ────────────────────────────────────────────────────────────
@@ -98,11 +98,11 @@ func computeTempReward(temp, setpoint, tMin, tMax float64) float64 {
 		// Gaussian-shaped bonus: maximum at setpoint, degrades toward bounds
 		deviation := math.Abs(temp - setpoint)
 		sigma := (tMax - tMin) / 4.0
-		return math.Exp(-0.5*(deviation/sigma)*(deviation/sigma)) * 0.5
 	}
 	// Outside bounds: proportional penalty
 	excess := math.Max(temp-tMax, tMin-temp)
-	return -excess * 0.4
 }
 // computeGridResponse returns a bonus for shedding load during high grid stress,

 	rc := RewardComponents{}
 	// ── 1. Cost Savings ─────────────────────────────────────────────────────
+	// Shift from pure penalty to a positive baseline: standardizing operations gives positive reward.
+	// Baseline reward of 1.5, minus the relative cost.
 	typicalCost := 4.0
+	rc.CostSavings = 1.5 - (inp.StepCost / typicalCost) * 2.0
 	// ── 2. Temperature Constraint ────────────────────────────────────────────
 	// Only active for task 2 and 3.
 	}
 	// ── 7. Carbon Reward ─────────────────────────────────────────────────────
+	// Low-carbon bonus: active for task 3.
 	if inp.TaskID >= 3 {
 		// Normalise carbon: iso-ne range roughly 100–700 gCO2/kWh
 		carbonNorm := (inp.B.CarbonIntensity - 100.0) / 600.0
+		// Provide a baseline positive score, reduced by carbon footprint
+		rc.CarbonReward = 0.5 - (inp.EnergyKWh * carbonNorm * 0.3)
 	}
 	// ── Aggregate ────────────────────────────────────────────────────────────
 		// Gaussian-shaped bonus: maximum at setpoint, degrades toward bounds
 		deviation := math.Abs(temp - setpoint)
 		sigma := (tMax - tMin) / 4.0
+		return math.Exp(-0.5*(deviation/sigma)*(deviation/sigma)) * 1.5 // Increased positive reward
 	}
 	// Outside bounds: proportional penalty
 	excess := math.Max(temp-tMax, tMin-temp)
+	return -excess * 0.6
 }
 // computeGridResponse returns a bonus for shedding load during high grid stress,

openenv.yaml CHANGED Viewed

@@ -8,6 +8,7 @@ description: |
 author: LOKyu Team
 tags:
   - reinforcement-learning
   - energy
   - demand-response

 author: LOKyu Team
 tags:
+  - openenv
   - reinforcement-learning
   - energy
   - demand-response

python/__pycache__/inference.cpython-311.pyc ADDED Viewed

Binary file (23.5 kB). View file

python/inference.py CHANGED Viewed

@@ -5,7 +5,6 @@ Runs an LLM agent against all 3 tasks for N episodes each.
 Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
 Usage:
-    export API_BASE_URL=https://router.huggingface.co/v1
     export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
     export HF_TOKEN=hf_xxxx
     python inference.py
@@ -27,8 +26,8 @@ from openai import OpenAI
 # ── Constants ──────────────────────────────────────────────────────────────
 ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
-API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 DEFAULT_EPISODES = 1
 DEFAULT_SEED_BASE = 1000
@@ -128,9 +127,12 @@ class LLMAgent:
             api_key=HF_TOKEN if HF_TOKEN else "none",
         )
         self.model = MODEL_NAME
     def choose_action(self, obs: dict, task_id: int) -> dict:
         """Prompt the LLM with current observation, return parsed action dict."""
         task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
         prompt = f"""{task_desc}
@@ -174,7 +176,12 @@ Respond with ONLY a JSON action:
                 action = json.loads(content)
                 return self._clamp_action(action)
             except Exception as e:
-                print(f"  [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
                 time.sleep(1)
         return self._heuristic_action(obs)
@@ -302,7 +309,6 @@ def run_episode(
     elapsed = time.time() - start_time
     grade = env_client.grade()
     print("[END]", flush=True)
     return {

 Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
 Usage:
     export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
     export HF_TOKEN=hf_xxxx
     python inference.py
 # ── Constants ──────────────────────────────────────────────────────────────
 ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
 MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
+API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 DEFAULT_EPISODES = 1
 DEFAULT_SEED_BASE = 1000
             api_key=HF_TOKEN if HF_TOKEN else "none",
         )
         self.model = MODEL_NAME
+        self.fallback_mode = False
     def choose_action(self, obs: dict, task_id: int) -> dict:
         """Prompt the LLM with current observation, return parsed action dict."""
+        if self.fallback_mode:
+            return self._heuristic_action(obs)
         task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
         prompt = f"""{task_desc}
                 action = json.loads(content)
                 return self._clamp_action(action)
             except Exception as e:
+                err_str = str(e)
+                print(f"  [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {err_str}")
+                if "402" in err_str or "depleted" in err_str:
+                    print("  [WARN] Hugging Face free credits depleted! Switching to local heuristic agent for the rest of the simulation.")
+                    self.fallback_mode = True
+                    return self._heuristic_action(obs)
                 time.sleep(1)
         return self._heuristic_action(obs)
     elapsed = time.time() - start_time
     grade = env_client.grade()
     print("[END]", flush=True)
     return {