Spaces:
Running
Running
fix: resolve merge conflicts, finalize inference + README
Browse files- .gitignore +0 -0
- README.md +2 -2
- __pycache__/inference.cpython-311.pyc +0 -0
- env/rewards.go +8 -8
- openenv.yaml +1 -0
- python/__pycache__/inference.cpython-311.pyc +0 -0
- python/inference.py +10 -4
.gitignore
ADDED
|
File without changes
|
README.md
CHANGED
|
@@ -42,8 +42,8 @@ docker build -t gridmind-rl .
|
|
| 42 |
docker run --rm -p 7860:7860 -p 7861:7861 --name gridmind gridmind-rl
|
| 43 |
```
|
| 44 |
|
| 45 |
-
- **7860** — Environment API (OpenEnv / agent traffic)
|
| 46 |
-
- **7861** — Web dashboard (optional)
|
| 47 |
|
| 48 |
**Windows (PowerShell)** — same commands in a terminal with Docker Desktop running.
|
| 49 |
|
|
|
|
| 42 |
docker run --rm -p 7860:7860 -p 7861:7861 --name gridmind gridmind-rl
|
| 43 |
```
|
| 44 |
|
| 45 |
+
- **7860** — Environment API (OpenEnv / agent traffic); http://localhost:7860
|
| 46 |
+
- **7861** — Web dashboard (optional); http://localhost:7861
|
| 47 |
|
| 48 |
**Windows (PowerShell)** — same commands in a terminal with Docker Desktop running.
|
| 49 |
|
__pycache__/inference.cpython-311.pyc
ADDED
|
Binary file (691 Bytes). View file
|
|
|
env/rewards.go
CHANGED
|
@@ -31,10 +31,10 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
|
|
| 31 |
rc := RewardComponents{}
|
| 32 |
|
| 33 |
// ── 1. Cost Savings ─────────────────────────────────────────────────────
|
| 34 |
-
//
|
| 35 |
-
//
|
| 36 |
typicalCost := 4.0
|
| 37 |
-
rc.CostSavings = -(inp.StepCost / typicalCost) * 2.0
|
| 38 |
|
| 39 |
// ── 2. Temperature Constraint ────────────────────────────────────────────
|
| 40 |
// Only active for task 2 and 3.
|
|
@@ -76,12 +76,12 @@ func ComputeReward(inp ComputeRewardInput) RewardComponents {
|
|
| 76 |
}
|
| 77 |
|
| 78 |
// ── 7. Carbon Reward ─────────────────────────────────────────────────────
|
| 79 |
-
// Low-carbon bonus: active for task 3
|
| 80 |
if inp.TaskID >= 3 {
|
| 81 |
// Normalise carbon: iso-ne range roughly 100–700 gCO2/kWh
|
| 82 |
carbonNorm := (inp.B.CarbonIntensity - 100.0) / 600.0
|
| 83 |
-
//
|
| 84 |
-
rc.CarbonReward = -inp.EnergyKWh * carbonNorm * 0.3
|
| 85 |
}
|
| 86 |
|
| 87 |
// ── Aggregate ────────────────────────────────────────────────────────────
|
|
@@ -98,11 +98,11 @@ func computeTempReward(temp, setpoint, tMin, tMax float64) float64 {
|
|
| 98 |
// Gaussian-shaped bonus: maximum at setpoint, degrades toward bounds
|
| 99 |
deviation := math.Abs(temp - setpoint)
|
| 100 |
sigma := (tMax - tMin) / 4.0
|
| 101 |
-
return math.Exp(-0.5*(deviation/sigma)*(deviation/sigma)) *
|
| 102 |
}
|
| 103 |
// Outside bounds: proportional penalty
|
| 104 |
excess := math.Max(temp-tMax, tMin-temp)
|
| 105 |
-
return -excess * 0.
|
| 106 |
}
|
| 107 |
|
| 108 |
// computeGridResponse returns a bonus for shedding load during high grid stress,
|
|
|
|
| 31 |
rc := RewardComponents{}
|
| 32 |
|
| 33 |
// ── 1. Cost Savings ─────────────────────────────────────────────────────
|
| 34 |
+
// Shift from pure penalty to a positive baseline: standardizing operations gives positive reward.
|
| 35 |
+
// Baseline reward of 1.5, minus the relative cost.
|
| 36 |
typicalCost := 4.0
|
| 37 |
+
rc.CostSavings = 1.5 - (inp.StepCost / typicalCost) * 2.0
|
| 38 |
|
| 39 |
// ── 2. Temperature Constraint ────────────────────────────────────────────
|
| 40 |
// Only active for task 2 and 3.
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
// ── 7. Carbon Reward ─────────────────────────────────────────────────────
|
| 79 |
+
// Low-carbon bonus: active for task 3.
|
| 80 |
if inp.TaskID >= 3 {
|
| 81 |
// Normalise carbon: iso-ne range roughly 100–700 gCO2/kWh
|
| 82 |
carbonNorm := (inp.B.CarbonIntensity - 100.0) / 600.0
|
| 83 |
+
// Provide a baseline positive score, reduced by carbon footprint
|
| 84 |
+
rc.CarbonReward = 0.5 - (inp.EnergyKWh * carbonNorm * 0.3)
|
| 85 |
}
|
| 86 |
|
| 87 |
// ── Aggregate ────────────────────────────────────────────────────────────
|
|
|
|
| 98 |
// Gaussian-shaped bonus: maximum at setpoint, degrades toward bounds
|
| 99 |
deviation := math.Abs(temp - setpoint)
|
| 100 |
sigma := (tMax - tMin) / 4.0
|
| 101 |
+
return math.Exp(-0.5*(deviation/sigma)*(deviation/sigma)) * 1.5 // Increased positive reward
|
| 102 |
}
|
| 103 |
// Outside bounds: proportional penalty
|
| 104 |
excess := math.Max(temp-tMax, tMin-temp)
|
| 105 |
+
return -excess * 0.6
|
| 106 |
}
|
| 107 |
|
| 108 |
// computeGridResponse returns a bonus for shedding load during high grid stress,
|
openenv.yaml
CHANGED
|
@@ -8,6 +8,7 @@ description: |
|
|
| 8 |
|
| 9 |
author: LOKyu Team
|
| 10 |
tags:
|
|
|
|
| 11 |
- reinforcement-learning
|
| 12 |
- energy
|
| 13 |
- demand-response
|
|
|
|
| 8 |
|
| 9 |
author: LOKyu Team
|
| 10 |
tags:
|
| 11 |
+
- openenv
|
| 12 |
- reinforcement-learning
|
| 13 |
- energy
|
| 14 |
- demand-response
|
python/__pycache__/inference.cpython-311.pyc
ADDED
|
Binary file (23.5 kB). View file
|
|
|
python/inference.py
CHANGED
|
@@ -5,7 +5,6 @@ Runs an LLM agent against all 3 tasks for N episodes each.
|
|
| 5 |
Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
|
| 6 |
|
| 7 |
Usage:
|
| 8 |
-
export API_BASE_URL=https://router.huggingface.co/v1
|
| 9 |
export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
|
| 10 |
export HF_TOKEN=hf_xxxx
|
| 11 |
python inference.py
|
|
@@ -27,8 +26,8 @@ from openai import OpenAI
|
|
| 27 |
# ── Constants ──────────────────────────────────────────────────────────────
|
| 28 |
|
| 29 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
| 30 |
-
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 31 |
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
|
|
|
|
| 32 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 33 |
DEFAULT_EPISODES = 1
|
| 34 |
DEFAULT_SEED_BASE = 1000
|
|
@@ -128,9 +127,12 @@ class LLMAgent:
|
|
| 128 |
api_key=HF_TOKEN if HF_TOKEN else "none",
|
| 129 |
)
|
| 130 |
self.model = MODEL_NAME
|
|
|
|
| 131 |
|
| 132 |
def choose_action(self, obs: dict, task_id: int) -> dict:
|
| 133 |
"""Prompt the LLM with current observation, return parsed action dict."""
|
|
|
|
|
|
|
| 134 |
task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
|
| 135 |
|
| 136 |
prompt = f"""{task_desc}
|
|
@@ -174,7 +176,12 @@ Respond with ONLY a JSON action:
|
|
| 174 |
action = json.loads(content)
|
| 175 |
return self._clamp_action(action)
|
| 176 |
except Exception as e:
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
time.sleep(1)
|
| 179 |
|
| 180 |
return self._heuristic_action(obs)
|
|
@@ -302,7 +309,6 @@ def run_episode(
|
|
| 302 |
|
| 303 |
elapsed = time.time() - start_time
|
| 304 |
grade = env_client.grade()
|
| 305 |
-
|
| 306 |
print("[END]", flush=True)
|
| 307 |
|
| 308 |
return {
|
|
|
|
| 5 |
Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
|
| 6 |
|
| 7 |
Usage:
|
|
|
|
| 8 |
export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
|
| 9 |
export HF_TOKEN=hf_xxxx
|
| 10 |
python inference.py
|
|
|
|
| 26 |
# ── Constants ──────────────────────────────────────────────────────────────
|
| 27 |
|
| 28 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
|
|
|
| 29 |
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
|
| 30 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 31 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 32 |
DEFAULT_EPISODES = 1
|
| 33 |
DEFAULT_SEED_BASE = 1000
|
|
|
|
| 127 |
api_key=HF_TOKEN if HF_TOKEN else "none",
|
| 128 |
)
|
| 129 |
self.model = MODEL_NAME
|
| 130 |
+
self.fallback_mode = False
|
| 131 |
|
| 132 |
def choose_action(self, obs: dict, task_id: int) -> dict:
|
| 133 |
"""Prompt the LLM with current observation, return parsed action dict."""
|
| 134 |
+
if self.fallback_mode:
|
| 135 |
+
return self._heuristic_action(obs)
|
| 136 |
task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
|
| 137 |
|
| 138 |
prompt = f"""{task_desc}
|
|
|
|
| 176 |
action = json.loads(content)
|
| 177 |
return self._clamp_action(action)
|
| 178 |
except Exception as e:
|
| 179 |
+
err_str = str(e)
|
| 180 |
+
print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {err_str}")
|
| 181 |
+
if "402" in err_str or "depleted" in err_str:
|
| 182 |
+
print(" [WARN] Hugging Face free credits depleted! Switching to local heuristic agent for the rest of the simulation.")
|
| 183 |
+
self.fallback_mode = True
|
| 184 |
+
return self._heuristic_action(obs)
|
| 185 |
time.sleep(1)
|
| 186 |
|
| 187 |
return self._heuristic_action(obs)
|
|
|
|
| 309 |
|
| 310 |
elapsed = time.time() - start_time
|
| 311 |
grade = env_client.grade()
|
|
|
|
| 312 |
print("[END]", flush=True)
|
| 313 |
|
| 314 |
return {
|