Spaces:
Running
Running
feat: add baseline inference script, performance scores, and update openenv configuration
Browse files- README.md +4 -10
- baseline_scores.json +54 -0
- python/inference.py → inference.py +12 -5
- openenv.yaml +1 -0
README.md
CHANGED
|
@@ -9,8 +9,8 @@
|
|
| 9 |
## 🚀 Quick Start (3 Steps)
|
| 10 |
|
| 11 |
1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
|
| 12 |
-
2. **Run the simulator**: `docker run -p 7860:7860 -p 7861:7861
|
| 13 |
-
3. **Watch the AI learn**: `python
|
| 14 |
|
| 15 |
That's it! The AI will start making energy decisions and you'll see live results.
|
| 16 |
|
|
@@ -99,22 +99,16 @@ python -m uvicorn dashboard.server:app --host 0.0.0.0 --port 7861
|
|
| 99 |
|
| 100 |
**On Windows (PowerShell - Recommended)**:
|
| 101 |
```powershell
|
| 102 |
-
$env:API_BASE_URL = "https://router.huggingface.co/v1"
|
| 103 |
-
$env:MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
|
| 104 |
$env:HF_TOKEN = "hf_your_token_here" # Paste your token here
|
| 105 |
```
|
| 106 |
|
| 107 |
**On Windows (Command Prompt)**:
|
| 108 |
```cmd
|
| 109 |
-
set API_BASE_URL=https://router.huggingface.co/v1
|
| 110 |
-
set MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
|
| 111 |
set HF_TOKEN=hf_your_token_here
|
| 112 |
```
|
| 113 |
|
| 114 |
**On Mac/Linux**:
|
| 115 |
```bash
|
| 116 |
-
export API_BASE_URL=https://router.huggingface.co/v1
|
| 117 |
-
export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
|
| 118 |
export HF_TOKEN=hf_your_token_here
|
| 119 |
```
|
| 120 |
|
|
@@ -122,7 +116,7 @@ export HF_TOKEN=hf_your_token_here
|
|
| 122 |
|
| 123 |
```bash
|
| 124 |
# Run 3 learning episodes (takes ~5 minutes)
|
| 125 |
-
python
|
| 126 |
```
|
| 127 |
|
| 128 |
You'll see output like:
|
|
@@ -202,7 +196,7 @@ $env:MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" # Faster but less accura
|
|
| 202 |
|
| 203 |
**Run longer training**:
|
| 204 |
```bash
|
| 205 |
-
python
|
| 206 |
```
|
| 207 |
|
| 208 |
**Test the environment manually**:
|
|
|
|
| 9 |
## 🚀 Quick Start (3 Steps)
|
| 10 |
|
| 11 |
1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
|
| 12 |
+
2. **Run the simulator**: `docker build -q -t gridmind-rl . && docker run -p 7860:7860 -p 7861:7861 gridmind-rl`
|
| 13 |
+
3. **Watch the AI learn**: `python inference.py --episodes 1`
|
| 14 |
|
| 15 |
That's it! The AI will start making energy decisions and you'll see live results.
|
| 16 |
|
|
|
|
| 99 |
|
| 100 |
**On Windows (PowerShell - Recommended)**:
|
| 101 |
```powershell
|
|
|
|
|
|
|
| 102 |
$env:HF_TOKEN = "hf_your_token_here" # Paste your token here
|
| 103 |
```
|
| 104 |
|
| 105 |
**On Windows (Command Prompt)**:
|
| 106 |
```cmd
|
|
|
|
|
|
|
| 107 |
set HF_TOKEN=hf_your_token_here
|
| 108 |
```
|
| 109 |
|
| 110 |
**On Mac/Linux**:
|
| 111 |
```bash
|
|
|
|
|
|
|
| 112 |
export HF_TOKEN=hf_your_token_here
|
| 113 |
```
|
| 114 |
|
|
|
|
| 116 |
|
| 117 |
```bash
|
| 118 |
# Run 3 learning episodes (takes ~5 minutes)
|
| 119 |
+
python inference.py --episodes 3
|
| 120 |
```
|
| 121 |
|
| 122 |
You'll see output like:
|
|
|
|
| 196 |
|
| 197 |
**Run longer training**:
|
| 198 |
```bash
|
| 199 |
+
python inference.py --episodes 10 # Takes ~30 minutes
|
| 200 |
```
|
| 201 |
|
| 202 |
**Test the environment manually**:
|
baseline_scores.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model": "Qwen/Qwen2.5-7B-Instruct",
|
| 3 |
+
"api_base": "https://router.huggingface.co/v1",
|
| 4 |
+
"episodes_per_task": 1,
|
| 5 |
+
"seed_base": 1000,
|
| 6 |
+
"task_averages": {
|
| 7 |
+
"1": 0.7664,
|
| 8 |
+
"2": 0.1332,
|
| 9 |
+
"3": 0.144
|
| 10 |
+
},
|
| 11 |
+
"overall_average": 0.3478666666666667,
|
| 12 |
+
"all_results": [
|
| 13 |
+
{
|
| 14 |
+
"task_id": 1,
|
| 15 |
+
"seed": 1100,
|
| 16 |
+
"total_reward": -24.481103708779088,
|
| 17 |
+
"total_steps": 267,
|
| 18 |
+
"elapsed_sec": 15.361155271530151,
|
| 19 |
+
"score": 0.7664,
|
| 20 |
+
"sub_scores": {
|
| 21 |
+
"cost": 0.7664032944600553
|
| 22 |
+
},
|
| 23 |
+
"exploit_detected": false
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"task_id": 2,
|
| 27 |
+
"seed": 1200,
|
| 28 |
+
"total_reward": -2176.488362731018,
|
| 29 |
+
"total_steps": 284,
|
| 30 |
+
"elapsed_sec": 3.9256582260131836,
|
| 31 |
+
"score": 0.1332,
|
| 32 |
+
"sub_scores": {
|
| 33 |
+
"cost": 0.18956946086927984,
|
| 34 |
+
"temperature": 0.04861111111111111
|
| 35 |
+
},
|
| 36 |
+
"exploit_detected": false
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"task_id": 3,
|
| 40 |
+
"seed": 1300,
|
| 41 |
+
"total_reward": -2337.268269877898,
|
| 42 |
+
"total_steps": 285,
|
| 43 |
+
"elapsed_sec": 3.49006724357605,
|
| 44 |
+
"score": 0.144,
|
| 45 |
+
"sub_scores": {
|
| 46 |
+
"batch_deadline": 0,
|
| 47 |
+
"cost": 0.19033424572859092,
|
| 48 |
+
"grid_response": 0.2608695652173913,
|
| 49 |
+
"temperature": 0.04861111111111111
|
| 50 |
+
},
|
| 51 |
+
"exploit_detected": false
|
| 52 |
+
}
|
| 53 |
+
]
|
| 54 |
+
}
|
python/inference.py → inference.py
RENAMED
|
@@ -5,10 +5,9 @@ Runs an LLM agent against all 3 tasks for N episodes each.
|
|
| 5 |
Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
|
| 6 |
|
| 7 |
Usage:
|
| 8 |
-
export
|
| 9 |
-
export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
|
| 10 |
export HF_TOKEN=hf_xxxx
|
| 11 |
-
python
|
| 12 |
"""
|
| 13 |
|
| 14 |
import argparse
|
|
@@ -26,8 +25,8 @@ from openai import OpenAI
|
|
| 26 |
# ── Constants ──────────────────────────────────────────────────────────────
|
| 27 |
|
| 28 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
|
|
|
| 29 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 30 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
|
| 31 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 32 |
DEFAULT_EPISODES = 3
|
| 33 |
DEFAULT_SEED_BASE = 1000 # episodes use seed BASE+episode_idx for reproducibility
|
|
@@ -102,9 +101,12 @@ class LLMAgent:
|
|
| 102 |
api_key=HF_TOKEN if HF_TOKEN else "none",
|
| 103 |
)
|
| 104 |
self.model = MODEL_NAME
|
|
|
|
| 105 |
|
| 106 |
def choose_action(self, obs: dict, task_id: int) -> dict:
|
| 107 |
"""Prompt the LLM with current observation, return parsed action dict."""
|
|
|
|
|
|
|
| 108 |
task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
|
| 109 |
|
| 110 |
prompt = f"""{task_desc}
|
|
@@ -144,7 +146,12 @@ Respond with ONLY a JSON action:
|
|
| 144 |
content = completion.choices[0].message.content.strip()
|
| 145 |
return self._parse_action(content)
|
| 146 |
except Exception as e:
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
time.sleep(1)
|
| 149 |
|
| 150 |
# Fallback: rule-based heuristic
|
|
|
|
| 5 |
Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
|
| 6 |
|
| 7 |
Usage:
|
| 8 |
+
export MODEL_NAME=mistralai/Mistral-7B-Instruct-v0.3
|
|
|
|
| 9 |
export HF_TOKEN=hf_xxxx
|
| 10 |
+
python inference.py [--episodes 3] [--env-url http://localhost:7860]
|
| 11 |
"""
|
| 12 |
|
| 13 |
import argparse
|
|
|
|
| 25 |
# ── Constants ──────────────────────────────────────────────────────────────
|
| 26 |
|
| 27 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
|
| 28 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
|
| 29 |
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
|
|
|
| 30 |
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 31 |
DEFAULT_EPISODES = 3
|
| 32 |
DEFAULT_SEED_BASE = 1000 # episodes use seed BASE+episode_idx for reproducibility
|
|
|
|
| 101 |
api_key=HF_TOKEN if HF_TOKEN else "none",
|
| 102 |
)
|
| 103 |
self.model = MODEL_NAME
|
| 104 |
+
self.fallback_mode = False
|
| 105 |
|
| 106 |
def choose_action(self, obs: dict, task_id: int) -> dict:
|
| 107 |
"""Prompt the LLM with current observation, return parsed action dict."""
|
| 108 |
+
if self.fallback_mode:
|
| 109 |
+
return self._heuristic_action(obs)
|
| 110 |
task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
|
| 111 |
|
| 112 |
prompt = f"""{task_desc}
|
|
|
|
| 146 |
content = completion.choices[0].message.content.strip()
|
| 147 |
return self._parse_action(content)
|
| 148 |
except Exception as e:
|
| 149 |
+
err_str = str(e)
|
| 150 |
+
print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {err_str}")
|
| 151 |
+
if "402" in err_str or "depleted" in err_str:
|
| 152 |
+
print(" [WARN] Hugging Face free credits depleted! Switching to local heuristic agent for the rest of the simulation.")
|
| 153 |
+
self.fallback_mode = True
|
| 154 |
+
return self._heuristic_action(obs)
|
| 155 |
time.sleep(1)
|
| 156 |
|
| 157 |
# Fallback: rule-based heuristic
|
openenv.yaml
CHANGED
|
@@ -8,6 +8,7 @@ description: |
|
|
| 8 |
|
| 9 |
author: LOKyu Team
|
| 10 |
tags:
|
|
|
|
| 11 |
- reinforcement-learning
|
| 12 |
- energy
|
| 13 |
- demand-response
|
|
|
|
| 8 |
|
| 9 |
author: LOKyu Team
|
| 10 |
tags:
|
| 11 |
+
- openenv
|
| 12 |
- reinforcement-learning
|
| 13 |
- energy
|
| 14 |
- demand-response
|