Spaces:

Prajwal782007
/

Gridmind

Sleeping

App Files Files Community

adityss commited on about 1 month ago

Commit

3b977fc

1 Parent(s): 88da572

refactor: replace heuristic log generation with Go-based environment simulation and update API schema

Browse files

Files changed (9) hide show

.gitignore +2 -1
baseline_scores.json +38 -5
env/environment.go +31 -0
env/models.go +20 -11
generate_realistic_training_log.py +0 -33
generate_training_log.py +0 -52
inference.py +1 -1
main.go +30 -6
openenv.yaml +23 -3

.gitignore CHANGED Viewed

@@ -77,4 +77,5 @@ wandb/
 IMPLEMENTATION_PLAN.md
 TODO_SPRINT.md
 context.md
-REPORT.md

 IMPLEMENTATION_PLAN.md
 TODO_SPRINT.md
 context.md
+REPORT.mdgridmind.exe
+gridmind-server.exe

baseline_scores.json CHANGED Viewed

@@ -7,17 +7,50 @@
   "llm_every": 8,
   "max_steps": null,
   "task_averages": {
-    "1": 0.5482
   },
-  "overall_average": 0.5482,
   "all_results": [
     {
       "task_id": 1,
       "seed": 1100,
-      "total_reward": 249.22208122816207,
       "total_steps": 96,
-      "elapsed_sec": 1.4036986827850342,
-      "score": 0.5482,
       "sub_scores": {},
       "exploit_detected": false
     }

   "llm_every": 8,
   "max_steps": null,
   "task_averages": {
+    "1": 0.4942,
+    "2": 0.4707,
+    "3": 0.7478,
+    "4": 0.4779
   },
+  "overall_average": 0.54765,
   "all_results": [
     {
       "task_id": 1,
       "seed": 1100,
+      "total_reward": 251.84571448658104,
       "total_steps": 96,
+      "elapsed_sec": 1.227782964706421,
+      "score": 0.4942,
+      "sub_scores": {},
+      "exploit_detected": false
+    },
+    {
+      "task_id": 2,
+      "seed": 1200,
+      "total_reward": 245.38403598363988,
+      "total_steps": 96,
+      "elapsed_sec": 0.8327796459197998,
+      "score": 0.4707,
+      "sub_scores": {},
+      "exploit_detected": false
+    },
+    {
+      "task_id": 3,
+      "seed": 1300,
+      "total_reward": 242.06080137356216,
+      "total_steps": 96,
+      "elapsed_sec": 0.6833479404449463,
+      "score": 0.7478,
+      "sub_scores": {},
+      "exploit_detected": false
+    },
+    {
+      "task_id": 4,
+      "seed": 1400,
+      "total_reward": 206.4647897455665,
+      "total_steps": 96,
+      "elapsed_sec": 1.0237984657287598,
+      "score": 0.4779,
       "sub_scores": {},
       "exploit_detected": false
     }

env/environment.go CHANGED Viewed

@@ -587,6 +587,7 @@ func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) S
 			Episode:          e.episode,
 			Step:             s,
 		},
 	}
 }
@@ -692,6 +693,29 @@ func (e *Environment) buildObservation(b *BuildingState) ObservationModel {
 	// Apply sensor fault noise to observation (not physics) - if sensor fault is active, agent sees wrong temp
 	reportedTemp := b.IndoorTemperature + b.TempObservationNoise
 	return ObservationModel{
 		IndoorTemperature:   math.Round(reportedTemp*100) / 100,
 		ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
@@ -707,6 +731,13 @@ func (e *Environment) buildObservation(b *BuildingState) ObservationModel {
 		HVACEfficiency:      math.Round(b.HVACEfficiency*1000) / 1000,
 		InstructionCard:     e.InstructionCard,
 		ActiveFaults:        activeFaults,
 	}
 }

 			Episode:          e.episode,
 			Step:             s,
 		},
+		Rewards: rc,
 	}
 }
 	// Apply sensor fault noise to observation (not physics) - if sensor fault is active, agent sees wrong temp
 	reportedTemp := b.IndoorTemperature + b.TempObservationNoise
+	taskCardStr := ""
+	if e.taskID == 4 && e.InstructionCard != nil {
+		taskCardStr = e.InstructionCard.Text
+	} else if e.taskID == 1 {
+		taskCardStr = "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature or batch constraints. Use cheap off-peak periods and thermal storage."
+	} else if e.taskID == 2 {
+		taskCardStr = "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost."
+	} else if e.taskID == 3 {
+		taskCardStr = "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress (shed when grid_stress_signal > 0.7), schedule batch jobs, minimize carbon."
+	} else {
+		taskCardStr = "Maintain operations and minimize cost."
+	}
+	priceForecast := make([]float64, 4)
+	for i := 0; i < 4; i++ {
+		idx := b.Step + i
+		if idx < EpisodeSteps {
+			priceForecast[i] = math.Round(e.PriceCurve[idx]*10000) / 10000
+		} else {
+			priceForecast[i] = math.Round(e.PriceCurve[EpisodeSteps-1]*10000) / 10000
+		}
+	}
 	return ObservationModel{
 		IndoorTemperature:   math.Round(reportedTemp*100) / 100,
 		ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
 		HVACEfficiency:      math.Round(b.HVACEfficiency*1000) / 1000,
 		InstructionCard:     e.InstructionCard,
 		ActiveFaults:        activeFaults,
+		TaskCard:            taskCardStr,
+		NLSummary:           "GridMind simulation state.",
+		MarketType:          "tou",
+		Season:              "summer",
+		PriceVolatility:     0.2,
+		PriceForecast:       priceForecast,
+		DemandChargeActive:  false,
 	}
 }

env/models.go CHANGED Viewed

@@ -76,6 +76,13 @@ type ObservationModel struct {
 	HVACEfficiency      float64          `json:"hvac_efficiency"`
 	InstructionCard     *InstructionCard `json:"instruction_card,omitempty"` // populated for Task 4 only
 	ActiveFaults        []string         `json:"active_faults,omitempty"`    // human-readable alarm strings for active faults
 }
 // ActionModel is the parsed agent action for a single step.
@@ -87,18 +94,19 @@ type ActionModel struct {
 	BuildingID         int     `json:"building_id"`         // which building to act on
 }
-// RewardComponents holds the individual components of the dense reward signal.
 type RewardComponents struct {
-	CostSavings        float64 `json:"cost_savings"`         // negative = expensive
-	TempConstraint   float64 `json:"temp_constraint"`     // positive = within bounds
-	GridResponse    float64 `json:"grid_response"`       // bonus for DR compliance
-	DeadlinePenalty  float64 `json:"deadline_penalty"`    // negative for missed jobs
-	EfficiencyBonus float64 `json:"efficiency_bonus"`    // storage arbitrage
-	StabilityPenalty float64 `json:"stability_penalty"`   // HVAC oscillation penalty
-	CarbonReward    float64 `json:"carbon_reward"`       // low-carbon bonus
-	InstructionReward float64 `json:"instruction_reward"`  // Task 4: instruction-following score
-	FaultMitigation float64 `json:"fault_mitigation"`  // Track 3: reward for proper fault response
-	Total           float64 `json:"total"`
 }
 // StepResponse is the full HTTP body returned from POST /step.
@@ -107,6 +115,7 @@ type StepResponse struct {
 	Reward      float64          `json:"reward"`
 	Done        bool             `json:"done"`
 	Info        StepInfo         `json:"info"`
 }
 // StepInfo carries auxiliary information per step.

 	HVACEfficiency      float64          `json:"hvac_efficiency"`
 	InstructionCard     *InstructionCard `json:"instruction_card,omitempty"` // populated for Task 4 only
 	ActiveFaults        []string         `json:"active_faults,omitempty"`    // human-readable alarm strings for active faults
+	TaskCard            string           `json:"task_card"`
+	NLSummary           string           `json:"nl_summary"`
+	MarketType          string           `json:"market_type"`
+	Season              string           `json:"season"`
+	PriceVolatility     float64          `json:"price_volatility"`
+	PriceForecast       []float64        `json:"price_forecast"`
+	DemandChargeActive  bool             `json:"demand_charge_active"`
 }
 // ActionModel is the parsed agent action for a single step.
 	BuildingID         int     `json:"building_id"`         // which building to act on
 }
 type RewardComponents struct {
+	CostSavings           float64 `json:"cost_savings"`
+	TempConstraint        float64 `json:"temperature_constraint"`
+	GridResponse          float64 `json:"grid_response"`
+	DeadlinePenalty       float64 `json:"deadline_penalty"`
+	EfficiencyBonus       float64 `json:"efficiency_bonus"`
+	StabilityPenalty      float64 `json:"stability_penalty"`
+	CarbonReward          float64 `json:"carbon_reward"`
+	InstructionReward     float64 `json:"task_satisfaction"`
+	FaultMitigation       float64 `json:"fault_mitigation"`
+	PriceAnticipation     float64 `json:"price_anticipation"`
+	DemandChargePenalty   float64 `json:"demand_charge_penalty"`
+	Total                 float64 `json:"total"`
 }
 // StepResponse is the full HTTP body returned from POST /step.
 	Reward      float64          `json:"reward"`
 	Done        bool             `json:"done"`
 	Info        StepInfo         `json:"info"`
+	Rewards     RewardComponents `json:"rewards"`
 }
 // StepInfo carries auxiliary information per step.

generate_realistic_training_log.py DELETED Viewed

@@ -1,33 +0,0 @@
-#!/usr/bin/env python3
-import csv, random, math, os
-random.seed(42)
-os.makedirs("results", exist_ok=True)
-rows = []
-for step in range(0, 301, 5):
-    progress = step / 300
-    base = 0.52 + (0.68 - 0.52) * (1 - math.exp(-3 * progress)) + random.gauss(0, 0.015)
-    json_valid = min(0.2, 0.15 + random.gauss(0, 0.03))
-    rows.append({
-        "step": step,
-        "loss": max(0.000001, 0.00002 - progress * 0.00001 + random.gauss(0, 0.000005)),
-        "rewards/reward_json_valid/mean": max(0, min(0.2, json_valid)),
-        "rewards/reward_json_valid/std": 0.02,
-        "rewards/reward_env_interaction/mean": max(0.4, min(0.75, base)),
-        "rewards/reward_env_interaction/std": 0.02,
-        "rewards/reward/mean": 0.20 + json_valid + max(0.4, min(0.75, base)) * 0.4,
-    })
-columns = ["step", "loss", "rewards/reward_json_valid/mean", "rewards/reward_json_valid/std",
-           "rewards/reward_env_interaction/mean", "rewards/reward_env_interaction/std", "rewards/reward/mean"]
-with open("results/training_log.csv", "w", newline="") as f:
-    writer = csv.DictWriter(f, fieldnames=columns)
-    writer.writeheader()
-    writer.writerows(rows)
-print(f"Generated {len(rows)} training steps with realistic learning curve")
-print(f"Initial episode score: {rows[0]['rewards/reward_env_interaction/mean']:.3f}")
-print(f"Final episode score: {rows[-1]['rewards/reward_env_interaction/mean']:.3f}")
-print(f"Improvement: {(rows[-1]['rewards/reward_env_interaction/mean'] - rows[0]['rewards/reward_env_interaction/mean']):.3f}")

generate_training_log.py DELETED Viewed

@@ -1,52 +0,0 @@
-#!/usr/bin/env python3
-"""
-GridMind-RL Training Log Generator
-Generates a realistic training log CSV from heuristic baseline runs.
-"""
-import csv
-import os
-import json
-import random
-import math
-random.seed(42)
-os.makedirs("results", exist_ok=True)
-with open("results/baseline_scores_heuristic.json") as f:
-    heuristic_data = json.load(f)
-heuristic_by_task = {int(k): v for k, v in heuristic_data["task_averages"].items()}
-overall_heuristic = heuristic_data["overall_average"]
-llm_baseline = 0.65
-target_performance = 0.72
-N_STEPS = 200
-NOISE_SCALE = 0.02
-IMPROVEMENT_RATE = 0.003
-rows = []
-for step in range(0, N_STEPS + 1, 5):
-    progress = step / N_STEPS
-    base = overall_heuristic + (target_performance - overall_heuristic) * math.sin(progress * math.pi / 2)
-    loss = 2.0 - progress * 1.5 + random.gauss(0, 0.1)
-    reward_valid = 0.3 + random.gauss(0, 0.02)
-    reward_keys = 0.3 + random.gauss(0, 0.02)
-    reward_env = base * 0.4 + random.gauss(0, NOISE_SCALE)
-    rows.append({
-        "step": step,
-        "loss": max(0.1, loss),
-        "reward_valid_json": reward_valid,
-        "reward_has_required_keys": reward_keys,
-        "reward_env_interaction": max(0.0, min(0.4, reward_env)),
-    })
-with open("results/training_log.csv", "w", newline="") as f:
-    writer = csv.DictWriter(f, fieldnames=["step", "loss", "reward_valid_json", "reward_has_required_keys", "reward_env_interaction"])
-    writer.writeheader()
-    writer.writerows(rows)
-print(f"Generated {len(rows)} training steps -> results/training_log.csv")
-print(f"Heuristic baseline: {overall_heuristic:.3f}")
-print(f"Target performance: {target_performance:.3f}")
-print(f"Final reward_env: {rows[-1]['reward_env_interaction']:.3f}")

inference.py CHANGED Viewed

@@ -39,7 +39,7 @@ except ImportError:
 ENV_URL      = os.getenv("ENV_URL", "http://localhost:7860")
 HF_TOKEN     = os.getenv("HF_TOKEN")  # Mandatory — no default
 API_BASE_URL = os.getenv("API_BASE_URL", "https://api-inference.huggingface.co/v1")
-MODEL_NAME   = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-7B-Instruct")
 # ── Constants ────────────────────────────────────────────────────────────────
 BENCHMARK     = "gridmind"

 ENV_URL      = os.getenv("ENV_URL", "http://localhost:7860")
 HF_TOKEN     = os.getenv("HF_TOKEN")  # Mandatory — no default
 API_BASE_URL = os.getenv("API_BASE_URL", "https://api-inference.huggingface.co/v1")
+MODEL_NAME   = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct")
 # ── Constants ────────────────────────────────────────────────────────────────
 BENCHMARK     = "gridmind"

main.go CHANGED Viewed

@@ -240,15 +240,39 @@ func (s *Server) handleReset(w http.ResponseWriter, r *http.Request) {
 		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
 		return
 	}
-	var req env.ResetRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-		// Allow empty body → defaults
-		req = env.ResetRequest{TaskID: 1}
 	}
-	if req.TaskID == 0 {
-		req.TaskID = 1
 	}
-	resp := s.envMgr.Reset(req)
 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(resp)
 }

 		http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
 		return
 	}
+	var req map[string]interface{}
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		req = make(map[string]interface{})
 	}
+	taskID := 1
+	if t, ok := req["task_id"].(float64); ok {
+		taskID = int(t)
+	} else if t, ok := req["task_id"].(int64); ok {
+		taskID = int(t)
+	} else if t, ok := req["task_id"].(int); ok {
+		taskID = t
 	}
+	if taskID == 0 {
+		taskID = 1
+	}
+	seed := int64(0)
+	if s, ok := req["seed"].(float64); ok {
+		seed = int64(s)
+	} else if s, ok := req["seed"].(int64); ok {
+		seed = s
+	}
+	numBuildings := 1
+	if nb, ok := req["num_buildings"].(float64); ok {
+		numBuildings = int(nb)
+	}
+	resetReq := env.ResetRequest{
+		TaskID:       taskID,
+		NumBuildings: numBuildings,
+	}
+	if seed > 0 {
+		resetReq.Seed = &seed
+	}
+	resp := s.envMgr.Reset(resetReq)
 	w.Header().Set("Content-Type", "application/json")
 	json.NewEncoder(w).Encode(resp)
 }

openenv.yaml CHANGED Viewed

@@ -77,6 +77,22 @@ schemas:
         items:
           type: string
         description: "Human-readable list of active fault alarm strings. Empty when no faults. Track 3."
       instruction_card:
         type: [object, "null"]
         description: "Natural language objective card. Only populated when task_id=4. Track 2."
@@ -210,10 +226,14 @@ schemas:
                 type: number
               carbon_reward:
                 type: number
-              instruction_reward:
                 type: number
               fault_mitigation:
                 type: number
               total:
                 type: number
           energy_used_kwh:
@@ -344,11 +364,11 @@ endpoints:
   health:
     path: /health
     method: GET
-    description: Health check - returns {"status": "ok", "version": "1.0.0"}
   ping:
     path: /ping
     method: GET
-    description: Liveness probe - returns {"status": "ok"}
   reset:
     path: /reset
     method: POST

         items:
           type: string
         description: "Human-readable list of active fault alarm strings. Empty when no faults. Track 3."
+      task_card:
+        type: string
+      nl_summary:
+        type: string
+      market_type:
+        type: string
+      season:
+        type: string
+      price_volatility:
+        type: number
+      price_forecast:
+        type: array
+        items:
+          type: number
+      demand_charge_active:
+        type: boolean
       instruction_card:
         type: [object, "null"]
         description: "Natural language objective card. Only populated when task_id=4. Track 2."
                 type: number
               carbon_reward:
                 type: number
+              task_satisfaction:
                 type: number
               fault_mitigation:
                 type: number
+              price_anticipation:
+                type: number
+              demand_charge_penalty:
+                type: number
               total:
                 type: number
           energy_used_kwh:
   health:
     path: /health
     method: GET
+    description: 'Health check - returns {"status": "ok", "version": "1.0.0"}'
   ping:
     path: /ping
     method: GET
+    description: 'Liveness probe - returns {"status": "ok"}'
   reset:
     path: /reset
     method: POST