adityss commited on
Commit
60cbc23
·
1 Parent(s): e3130b4

feat: add baseline inference script, performance scores, and update openenv configuration

Browse files
README.md CHANGED
@@ -9,8 +9,8 @@
9
  ## 🚀 Quick Start (3 Steps)
10
 
11
  1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
12
- 2. **Run the simulator**: `docker run -p 7860:7860 -p 7861:7861 ghcr.io/your-repo/gridmind-rl:latest`
13
- 3. **Watch the AI learn**: `python python/inference.py --episodes 1`
14
 
15
  That's it! The AI will start making energy decisions and you'll see live results.
16
 
@@ -99,22 +99,16 @@ python -m uvicorn dashboard.server:app --host 0.0.0.0 --port 7861
99
 
100
  **On Windows (PowerShell - Recommended)**:
101
  ```powershell
102
- $env:API_BASE_URL = "https://router.huggingface.co/v1"
103
- $env:MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
104
  $env:HF_TOKEN = "hf_your_token_here" # Paste your token here
105
  ```
106
 
107
  **On Windows (Command Prompt)**:
108
  ```cmd
109
- set API_BASE_URL=https://router.huggingface.co/v1
110
- set MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
111
  set HF_TOKEN=hf_your_token_here
112
  ```
113
 
114
  **On Mac/Linux**:
115
  ```bash
116
- export API_BASE_URL=https://router.huggingface.co/v1
117
- export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
118
  export HF_TOKEN=hf_your_token_here
119
  ```
120
 
@@ -122,7 +116,7 @@ export HF_TOKEN=hf_your_token_here
122
 
123
  ```bash
124
  # Run 3 learning episodes (takes ~5 minutes)
125
- python python/inference.py --episodes 3
126
  ```
127
 
128
  You'll see output like:
@@ -202,7 +196,7 @@ $env:MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" # Faster but less accura
202
 
203
  **Run longer training**:
204
  ```bash
205
- python python/inference.py --episodes 10 # Takes ~30 minutes
206
  ```
207
 
208
  **Test the environment manually**:
 
9
  ## 🚀 Quick Start (3 Steps)
10
 
11
  1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
12
+ 2. **Run the simulator**: `docker build -q -t gridmind-rl . && docker run -p 7860:7860 -p 7861:7861 gridmind-rl`
13
+ 3. **Watch the AI learn**: `python inference.py --episodes 1`
14
 
15
  That's it! The AI will start making energy decisions and you'll see live results.
16
 
 
99
 
100
  **On Windows (PowerShell - Recommended)**:
101
  ```powershell
 
 
102
  $env:HF_TOKEN = "hf_your_token_here" # Paste your token here
103
  ```
104
 
105
  **On Windows (Command Prompt)**:
106
  ```cmd
 
 
107
  set HF_TOKEN=hf_your_token_here
108
  ```
109
 
110
  **On Mac/Linux**:
111
  ```bash
 
 
112
  export HF_TOKEN=hf_your_token_here
113
  ```
114
 
 
116
 
117
  ```bash
118
  # Run 3 learning episodes (takes ~5 minutes)
119
+ python inference.py --episodes 3
120
  ```
121
 
122
  You'll see output like:
 
196
 
197
  **Run longer training**:
198
  ```bash
199
+ python inference.py --episodes 10 # Takes ~30 minutes
200
  ```
201
 
202
  **Test the environment manually**:
baseline_scores.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "Qwen/Qwen2.5-7B-Instruct",
3
+ "api_base": "https://router.huggingface.co/v1",
4
+ "episodes_per_task": 1,
5
+ "seed_base": 1000,
6
+ "task_averages": {
7
+ "1": 0.7664,
8
+ "2": 0.1332,
9
+ "3": 0.144
10
+ },
11
+ "overall_average": 0.3478666666666667,
12
+ "all_results": [
13
+ {
14
+ "task_id": 1,
15
+ "seed": 1100,
16
+ "total_reward": -24.481103708779088,
17
+ "total_steps": 267,
18
+ "elapsed_sec": 15.361155271530151,
19
+ "score": 0.7664,
20
+ "sub_scores": {
21
+ "cost": 0.7664032944600553
22
+ },
23
+ "exploit_detected": false
24
+ },
25
+ {
26
+ "task_id": 2,
27
+ "seed": 1200,
28
+ "total_reward": -2176.488362731018,
29
+ "total_steps": 284,
30
+ "elapsed_sec": 3.9256582260131836,
31
+ "score": 0.1332,
32
+ "sub_scores": {
33
+ "cost": 0.18956946086927984,
34
+ "temperature": 0.04861111111111111
35
+ },
36
+ "exploit_detected": false
37
+ },
38
+ {
39
+ "task_id": 3,
40
+ "seed": 1300,
41
+ "total_reward": -2337.268269877898,
42
+ "total_steps": 285,
43
+ "elapsed_sec": 3.49006724357605,
44
+ "score": 0.144,
45
+ "sub_scores": {
46
+ "batch_deadline": 0,
47
+ "cost": 0.19033424572859092,
48
+ "grid_response": 0.2608695652173913,
49
+ "temperature": 0.04861111111111111
50
+ },
51
+ "exploit_detected": false
52
+ }
53
+ ]
54
+ }
python/inference.py → inference.py RENAMED
@@ -5,10 +5,9 @@ Runs an LLM agent against all 3 tasks for N episodes each.
5
  Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
6
 
7
  Usage:
8
- export API_BASE_URL=https://router.huggingface.co/v1
9
- export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
10
  export HF_TOKEN=hf_xxxx
11
- python python/inference.py [--episodes 3] [--env-url http://localhost:7860]
12
  """
13
 
14
  import argparse
@@ -26,8 +25,8 @@ from openai import OpenAI
26
  # ── Constants ──────────────────────────────────────────────────────────────
27
 
28
  ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
 
29
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
30
- MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
31
  HF_TOKEN = os.getenv("HF_TOKEN", "")
32
  DEFAULT_EPISODES = 3
33
  DEFAULT_SEED_BASE = 1000 # episodes use seed BASE+episode_idx for reproducibility
@@ -102,9 +101,12 @@ class LLMAgent:
102
  api_key=HF_TOKEN if HF_TOKEN else "none",
103
  )
104
  self.model = MODEL_NAME
 
105
 
106
  def choose_action(self, obs: dict, task_id: int) -> dict:
107
  """Prompt the LLM with current observation, return parsed action dict."""
 
 
108
  task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
109
 
110
  prompt = f"""{task_desc}
@@ -144,7 +146,12 @@ Respond with ONLY a JSON action:
144
  content = completion.choices[0].message.content.strip()
145
  return self._parse_action(content)
146
  except Exception as e:
147
- print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
 
 
 
 
 
148
  time.sleep(1)
149
 
150
  # Fallback: rule-based heuristic
 
5
  Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
6
 
7
  Usage:
8
+ export MODEL_NAME=mistralai/Mistral-7B-Instruct-v0.3
 
9
  export HF_TOKEN=hf_xxxx
10
+ python inference.py [--episodes 3] [--env-url http://localhost:7860]
11
  """
12
 
13
  import argparse
 
25
  # ── Constants ──────────────────────────────────────────────────────────────
26
 
27
  ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
28
+ MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
29
  API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 
30
  HF_TOKEN = os.getenv("HF_TOKEN", "")
31
  DEFAULT_EPISODES = 3
32
  DEFAULT_SEED_BASE = 1000 # episodes use seed BASE+episode_idx for reproducibility
 
101
  api_key=HF_TOKEN if HF_TOKEN else "none",
102
  )
103
  self.model = MODEL_NAME
104
+ self.fallback_mode = False
105
 
106
  def choose_action(self, obs: dict, task_id: int) -> dict:
107
  """Prompt the LLM with current observation, return parsed action dict."""
108
+ if self.fallback_mode:
109
+ return self._heuristic_action(obs)
110
  task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
111
 
112
  prompt = f"""{task_desc}
 
146
  content = completion.choices[0].message.content.strip()
147
  return self._parse_action(content)
148
  except Exception as e:
149
+ err_str = str(e)
150
+ print(f" [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {err_str}")
151
+ if "402" in err_str or "depleted" in err_str:
152
+ print(" [WARN] Hugging Face free credits depleted! Switching to local heuristic agent for the rest of the simulation.")
153
+ self.fallback_mode = True
154
+ return self._heuristic_action(obs)
155
  time.sleep(1)
156
 
157
  # Fallback: rule-based heuristic
openenv.yaml CHANGED
@@ -8,6 +8,7 @@ description: |
8
 
9
  author: LOKyu Team
10
  tags:
 
11
  - reinforcement-learning
12
  - energy
13
  - demand-response
 
8
 
9
  author: LOKyu Team
10
  tags:
11
+ - openenv
12
  - reinforcement-learning
13
  - energy
14
  - demand-response