ShreeshantXD commited on
Commit
2e0c292
·
2 Parent(s): 574589df1bfee9

fix: resolve merge conflicts, finalize inference + README

Browse files
.gitignore ADDED
File without changes
README.md CHANGED
@@ -42,8 +42,8 @@ docker build -t gridmind-rl .
42
  docker run --rm -p 7860:7860 -p 7861:7861 --name gridmind gridmind-rl
43
  ```
44
 
45
- - **7860** — Environment API (OpenEnv / agent traffic)
46
- - **7861** — Web dashboard (optional)
47
 
48
  **Windows (PowerShell)** — same commands in a terminal with Docker Desktop running.
49
 
 
42
  docker run --rm -p 7860:7860 -p 7861:7861 --name gridmind gridmind-rl
43
  ```
44
 
45
+ - **7860** — Environment API (OpenEnv / agent traffic); http://localhost:7860
46
+ - **7861** — Web dashboard (optional); http://localhost:7861
47
 
48
  **Windows (PowerShell)** — same commands in a terminal with Docker Desktop running.
49
 
__pycache__/inference.cpython-311.pyc ADDED
Binary file (691 Bytes). View file
 
env/rewards.go CHANGED
@@ -31,10 +31,10 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
31
  rc := RewardComponents{}
32
 
33
  // ── 1. Cost Savings ─────────────────────────────────────────────────────
34
- // Negative reward proportional to energy cost. Normalised by typical step cost.
35
- // Typical step cost at full load, peak price: 50kW * 0.25h * 0.32 = $4.00.
36
  typicalCost := 4.0
37
- rc.CostSavings = -(inp.StepCost / typicalCost) * 2.0
38
 
39
  // ── 2. Temperature Constraint ────────────────────────────────────────────
40
  // Only active for task 2 and 3.
@@ -76,12 +76,12 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
76
  }
77
 
78
  // ── 7. Carbon Reward ─────────────────────────────────────────────────────
79
- // Low-carbon bonus: active for task 3 (and optional overlay on others).
80
  if inp.TaskID >= 3 {
81
  // Normalise carbon: iso-ne range roughly 100–700 gCO2/kWh
82
  carbonNorm := (inp.B.CarbonIntensity - 100.0) / 600.0
83
- // Reward for reducing energy during high-carbon periods
84
- rc.CarbonReward = -inp.EnergyKWh * carbonNorm * 0.3
85
  }
86
 
87
  // ── Aggregate ────────────────────────────────────────────────────────────
@@ -98,11 +98,11 @@ func computeTempReward(temp, setpoint, tMin, tMax float64) float64 {
98
  // Gaussian-shaped bonus: maximum at setpoint, degrades toward bounds
99
  deviation := math.Abs(temp - setpoint)
100
  sigma := (tMax - tMin) / 4.0
101
- return math.Exp(-0.5*(deviation/sigma)*(deviation/sigma)) * 0.5
102
  }
103
  // Outside bounds: proportional penalty
104
  excess := math.Max(temp-tMax, tMin-temp)
105
- return -excess * 0.4
106
  }
107
 
108
  // computeGridResponse returns a bonus for shedding load during high grid stress,
 
31
  rc := RewardComponents{}
32
 
33
  // ── 1. Cost Savings ─────────────────────────────────────────────────────
34
+ // Shift from pure penalty to a positive baseline: standardizing operations gives positive reward.
35
+ // Baseline reward of 1.5, minus the relative cost.
36
  typicalCost := 4.0
37
+ rc.CostSavings = 1.5 - (inp.StepCost / typicalCost) * 2.0
38
 
39
  // ── 2. Temperature Constraint ────────────────────────────────────────────
40
  // Only active for task 2 and 3.
 
76
  }
77
 
78
  // ── 7. Carbon Reward ─────────────────────────────────────────────────────
79
+ // Low-carbon bonus: active for task 3.
80
  if inp.TaskID >= 3 {
81
  // Normalise carbon: iso-ne range roughly 100–700 gCO2/kWh
82
  carbonNorm := (inp.B.CarbonIntensity - 100.0) / 600.0
83
+ // Provide a baseline positive score, reduced by carbon footprint
84
+ rc.CarbonReward = 0.5 - (inp.EnergyKWh * carbonNorm * 0.3)
85
  }
86
 
87
  // ── Aggregate ────────────────────────────────────────────────────────────
 
98
  // Gaussian-shaped bonus: maximum at setpoint, degrades toward bounds
99
  deviation := math.Abs(temp - setpoint)
100
  sigma := (tMax - tMin) / 4.0
101
+ return math.Exp(-0.5*(deviation/sigma)*(deviation/sigma)) * 1.5 // Increased positive reward
102
  }
103
  // Outside bounds: proportional penalty
104
  excess := math.Max(temp-tMax, tMin-temp)
105
+ return -excess * 0.6
106
  }
107
 
108
  // computeGridResponse returns a bonus for shedding load during high grid stress,
openenv.yaml CHANGED
@@ -8,6 +8,7 @@ description: |
8
 
9
  author: LOKyu Team
10
  tags:
 
11
  - reinforcement-learning
12
  - energy
13
  - demand-response
 
8
 
9
  author: LOKyu Team
10
  tags:
11
+ - openenv
12
  - reinforcement-learning
13
  - energy
14
  - demand-response
python/__pycache__/inference.cpython-311.pyc ADDED
Binary file (23.5 kB). View file
 
python/inference.py CHANGED
@@ -5,7 +5,6 @@ Runs an LLM agent against all 3 tasks for N episodes each.
5
  Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
6
 
7
  Usage:
8
- export API_BASE_URL=https://router.huggingface.co/v1
9
  export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
10
  export HF_TOKEN=hf_xxxx
11
  python inference.py
@@ -27,8 +26,8 @@ from openai import OpenAI
27
  # ── Constants ──────────────────────────────────────────────────────────────
28
 
29
  ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
30
- API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
31
  MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
 
32
  HF_TOKEN = os.getenv("HF_TOKEN", "")
33
  DEFAULT_EPISODES = 1
34
  DEFAULT_SEED_BASE = 1000
@@ -128,9 +127,12 @@ class LLMAgent:
128
  api_key=HF_TOKEN if HF_TOKEN else "none",
129
  )
130
  self.model = MODEL_NAME
 
131
 
132
  def choose_action(self, obs: dict, task_id: int) -> dict:
133
  """Prompt the LLM with current observation, return parsed action dict."""
 
 
134
  task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
135
 
136
  prompt = f"""{task_desc}
@@ -174,7 +176,12 @@ Respond with ONLY a JSON action:
174
  action = json.loads(content)
175
  return self._clamp_action(action)
176
  except Exception as e:
177
- print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
 
 
 
 
 
178
  time.sleep(1)
179
 
180
  return self._heuristic_action(obs)
@@ -302,7 +309,6 @@ def run_episode(
302
 
303
  elapsed = time.time() - start_time
304
  grade = env_client.grade()
305
-
306
  print("[END]", flush=True)
307
 
308
  return {
 
5
  Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
6
 
7
  Usage:
 
8
  export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
9
  export HF_TOKEN=hf_xxxx
10
  python inference.py
 
26
  # ── Constants ──────────────────────────────────────────────────────────────
27
 
28
  ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
 
29
  MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
30
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
31
  HF_TOKEN = os.getenv("HF_TOKEN", "")
32
  DEFAULT_EPISODES = 1
33
  DEFAULT_SEED_BASE = 1000
 
127
  api_key=HF_TOKEN if HF_TOKEN else "none",
128
  )
129
  self.model = MODEL_NAME
130
+ self.fallback_mode = False
131
 
132
  def choose_action(self, obs: dict, task_id: int) -> dict:
133
  """Prompt the LLM with current observation, return parsed action dict."""
134
+ if self.fallback_mode:
135
+ return self._heuristic_action(obs)
136
  task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
137
 
138
  prompt = f"""{task_desc}
 
176
  action = json.loads(content)
177
  return self._clamp_action(action)
178
  except Exception as e:
179
+ err_str = str(e)
180
+ print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {err_str}")
181
+ if "402" in err_str or "depleted" in err_str:
182
+ print(" [WARN] Hugging Face free credits depleted! Switching to local heuristic agent for the rest of the simulation.")
183
+ self.fallback_mode = True
184
+ return self._heuristic_action(obs)
185
  time.sleep(1)
186
 
187
  return self._heuristic_action(obs)
 
309
 
310
  elapsed = time.time() - start_time
311
  grade = env_client.grade()
 
312
  print("[END]", flush=True)
313
 
314
  return {