Spaces:

Prajwal782007
/

Gridmind

Running

App Files Files Community

adityss commited on Apr 2

Commit

60cbc23

1 Parent(s): e3130b4

feat: add baseline inference script, performance scores, and update openenv configuration

Browse files

Files changed (4) hide show

README.md +4 -10
baseline_scores.json +54 -0
python/inference.py → inference.py +12 -5
openenv.yaml +1 -0

README.md CHANGED Viewed

@@ -9,8 +9,8 @@
 ## 🚀 Quick Start (3 Steps)
 1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
-2. **Run the simulator**: `docker run -p 7860:7860 -p 7861:7861 ghcr.io/your-repo/gridmind-rl:latest`
-3. **Watch the AI learn**: `python python/inference.py --episodes 1`
 That's it! The AI will start making energy decisions and you'll see live results.
@@ -99,22 +99,16 @@ python -m uvicorn dashboard.server:app --host 0.0.0.0 --port 7861
 **On Windows (PowerShell - Recommended)**:
 ```powershell
-$env:API_BASE_URL = "https://router.huggingface.co/v1"
-$env:MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
 $env:HF_TOKEN = "hf_your_token_here"  # Paste your token here
 ```
 **On Windows (Command Prompt)**:
 ```cmd
-set API_BASE_URL=https://router.huggingface.co/v1
-set MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
 set HF_TOKEN=hf_your_token_here
 ```
 **On Mac/Linux**:
 ```bash
-export API_BASE_URL=https://router.huggingface.co/v1
-export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
 export HF_TOKEN=hf_your_token_here
 ```
@@ -122,7 +116,7 @@ export HF_TOKEN=hf_your_token_here
 ```bash
 # Run 3 learning episodes (takes ~5 minutes)
-python python/inference.py --episodes 3
 ```
 You'll see output like:
@@ -202,7 +196,7 @@ $env:MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"  # Faster but less accura
 **Run longer training**:
 ```bash
-python python/inference.py --episodes 10  # Takes ~30 minutes
 ```
 **Test the environment manually**:

 ## 🚀 Quick Start (3 Steps)
 1. **Get a free AI API key** from [Hugging Face](https://huggingface.co/join) (takes 2 minutes)
+2. **Run the simulator**: `docker build -q -t gridmind-rl . && docker run -p 7860:7860 -p 7861:7861 gridmind-rl`
+3. **Watch the AI learn**: `python inference.py --episodes 1`
 That's it! The AI will start making energy decisions and you'll see live results.
 **On Windows (PowerShell - Recommended)**:
 ```powershell
 $env:HF_TOKEN = "hf_your_token_here"  # Paste your token here
 ```
 **On Windows (Command Prompt)**:
 ```cmd
 set HF_TOKEN=hf_your_token_here
 ```
 **On Mac/Linux**:
 ```bash
 export HF_TOKEN=hf_your_token_here
 ```
 ```bash
 # Run 3 learning episodes (takes ~5 minutes)
+python inference.py --episodes 3
 ```
 You'll see output like:
 **Run longer training**:
 ```bash
+python inference.py --episodes 10  # Takes ~30 minutes
 ```
 **Test the environment manually**:

baseline_scores.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "model": "Qwen/Qwen2.5-7B-Instruct",
+  "api_base": "https://router.huggingface.co/v1",
+  "episodes_per_task": 1,
+  "seed_base": 1000,
+  "task_averages": {
+    "1": 0.7664,
+    "2": 0.1332,
+    "3": 0.144
+  },
+  "overall_average": 0.3478666666666667,
+  "all_results": [
+    {
+      "task_id": 1,
+      "seed": 1100,
+      "total_reward": -24.481103708779088,
+      "total_steps": 267,
+      "elapsed_sec": 15.361155271530151,
+      "score": 0.7664,
+      "sub_scores": {
+        "cost": 0.7664032944600553
+      },
+      "exploit_detected": false
+    },
+    {
+      "task_id": 2,
+      "seed": 1200,
+      "total_reward": -2176.488362731018,
+      "total_steps": 284,
+      "elapsed_sec": 3.9256582260131836,
+      "score": 0.1332,
+      "sub_scores": {
+        "cost": 0.18956946086927984,
+        "temperature": 0.04861111111111111
+      },
+      "exploit_detected": false
+    },
+    {
+      "task_id": 3,
+      "seed": 1300,
+      "total_reward": -2337.268269877898,
+      "total_steps": 285,
+      "elapsed_sec": 3.49006724357605,
+      "score": 0.144,
+      "sub_scores": {
+        "batch_deadline": 0,
+        "cost": 0.19033424572859092,
+        "grid_response": 0.2608695652173913,
+        "temperature": 0.04861111111111111
+      },
+      "exploit_detected": false
+    }
+  ]
+}

python/inference.py → inference.py RENAMED Viewed

@@ -5,10 +5,9 @@ Runs an LLM agent against all 3 tasks for N episodes each.
 Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
 Usage:
-    export API_BASE_URL=https://router.huggingface.co/v1
-    export MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
     export HF_TOKEN=hf_xxxx
-    python python/inference.py [--episodes 3] [--env-url http://localhost:7860]
 """
 import argparse
@@ -26,8 +25,8 @@ from openai import OpenAI
 # ── Constants ──────────────────────────────────────────────────────────────
 ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.1-8B-Instruct")
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 DEFAULT_EPISODES = 3
 DEFAULT_SEED_BASE = 1000  # episodes use seed BASE+episode_idx for reproducibility
@@ -102,9 +101,12 @@ class LLMAgent:
             api_key=HF_TOKEN if HF_TOKEN else "none",
         )
         self.model = MODEL_NAME
     def choose_action(self, obs: dict, task_id: int) -> dict:
         """Prompt the LLM with current observation, return parsed action dict."""
         task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
         prompt = f"""{task_desc}
@@ -144,7 +146,12 @@ Respond with ONLY a JSON action:
                 content = completion.choices[0].message.content.strip()
                 return self._parse_action(content)
             except Exception as e:
-                print(f"  [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {e}")
                 time.sleep(1)
         # Fallback: rule-based heuristic

 Uses OpenAI-compatible API via API_BASE_URL / MODEL_NAME / HF_TOKEN environment variables.
 Usage:
+    export MODEL_NAME=mistralai/Mistral-7B-Instruct-v0.3
     export HF_TOKEN=hf_xxxx
+    python inference.py [--episodes 3] [--env-url http://localhost:7860]
 """
 import argparse
 # ── Constants ──────────────────────────────────────────────────────────────
 ENV_URL = os.getenv("ENV_URL", "http://localhost:7860")
+MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
 HF_TOKEN = os.getenv("HF_TOKEN", "")
 DEFAULT_EPISODES = 3
 DEFAULT_SEED_BASE = 1000  # episodes use seed BASE+episode_idx for reproducibility
             api_key=HF_TOKEN if HF_TOKEN else "none",
         )
         self.model = MODEL_NAME
+        self.fallback_mode = False
     def choose_action(self, obs: dict, task_id: int) -> dict:
         """Prompt the LLM with current observation, return parsed action dict."""
+        if self.fallback_mode:
+            return self._heuristic_action(obs)
         task_desc = TASK_DESCRIPTIONS.get(task_id, TASK_DESCRIPTIONS[1])
         prompt = f"""{task_desc}
                 content = completion.choices[0].message.content.strip()
                 return self._parse_action(content)
             except Exception as e:
+                err_str = str(e)
+                print(f"  [LLM attempt {attempt+1}/{MAX_RETRIES}] error: {err_str}")
+                if "402" in err_str or "depleted" in err_str:
+                    print("  [WARN] Hugging Face free credits depleted! Switching to local heuristic agent for the rest of the simulation.")
+                    self.fallback_mode = True
+                    return self._heuristic_action(obs)
                 time.sleep(1)
         # Fallback: rule-based heuristic

openenv.yaml CHANGED Viewed

@@ -8,6 +8,7 @@ description: |
 author: LOKyu Team
 tags:
   - reinforcement-learning
   - energy
   - demand-response

 author: LOKyu Team
 tags:
+  - openenv
   - reinforcement-learning
   - energy
   - demand-response