// Package env implements the GridMind-RL simulation core.
// It models a multi-building industrial/commercial energy management system
// with stochastic electricity prices, thermal dynamics, and batch job scheduling.
package env

import (
	"math"
	"math/rand"
	"sync"
	"time"
)

const (
	EpisodeSteps    = 96   // 24 hours × 15-min intervals (96 × 0.25h = 24h)
	StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
	MaxBuildings    = 3
	DefaultSetpoint = 21.0  // °C comfortable indoor temp
	TMinDefault     = 19.0  // °C lower bound
	TMaxDefault     = 23.0  // °C upper bound
	MaxHVACPowerKW  = 50.0  // kW per building
	MaxStorageKWh   = 100.0 // kWh thermal storage capacity
	StorageLossRate = 0.005 // fraction lost per step (thermal dissipation)
	MaxBatchJobs    = 5     // max concurrent batch jobs per building
)

// Environment is the thread-safe top-level simulation manager.
type Environment struct {
	mu           sync.RWMutex
	rng          *rand.Rand
	seed         int64
	episode      int
	step         int
	done         bool
	taskID       int
	difficulty   string
	numBuildings int

	Buildings        []*BuildingState
	PriceCurve       [EpisodeSteps]float64
	CarbonCurve      [EpisodeSteps]float64
	Replay           []ReplayEntry
	LastActions      []ActionModel
	InstructionCard  *InstructionCard // set for Task 4 episodes
	FaultSchedule    *FaultSchedule   // randomised fault events for this episode
	PriceMultipliers []float64        // per-building multipliers set by coordinator (default 1.0)

	// History for dashboard rendering (per building)
	TempHistory     [][]float64
	CostHistory     [][]float64
	HVACHistory     [][]float64
	LoadShedHistory [][]float64
	RewardHistory   [][]RewardComponents

	// Exploit detection counters
	totalShedSteps     []int
	thermalCycleCounts []int
	prevChargeRates    []float64
}

// NewEnvironment creates an initialised (but not reset) environment.
func NewEnvironment() *Environment {
	seed := time.Now().UnixNano()
	return &Environment{
		rng:          rand.New(rand.NewSource(seed)),
		seed:         seed,
		taskID:       1,
		difficulty:   "easy",
		numBuildings: 1,
	}
}

// Reset initializes a new episode. Thread-safe.
func (e *Environment) Reset(req ResetRequest) ResetResponse {
	e.mu.Lock()
	defer e.mu.Unlock()

	// Apply seed
	if req.Seed != nil {
		e.seed = *req.Seed
	} else {
		e.seed = time.Now().UnixNano()
	}
	e.rng = rand.New(rand.NewSource(e.seed))

	// Apply task and difficulty
	e.taskID = req.TaskID
	if e.taskID < 1 || e.taskID > 4 {
		e.taskID = 1
	}
	e.difficulty = req.Difficulty
	if e.difficulty == "" {
		switch e.taskID {
		case 1:
			e.difficulty = "easy"
		case 2:
			e.difficulty = "medium"
		case 3, 4:
			e.difficulty = "hard"
		}
	}

	// Number of buildings (federation)
	e.numBuildings = req.NumBuildings
	if e.numBuildings < 1 {
		e.numBuildings = 1
	}
	if e.numBuildings > MaxBuildings {
		e.numBuildings = MaxBuildings
	}

	e.episode++
	e.step = 0
	e.done = false
	e.Replay = make([]ReplayEntry, 0, EpisodeSteps)
	e.LastActions = make([]ActionModel, e.numBuildings)

	// Generate price and carbon curves for this episode
	e.generatePriceCurve()
	e.generateCarbonCurve()

	// Initialise buildings
	e.Buildings = make([]*BuildingState, e.numBuildings)
	e.TempHistory = make([][]float64, e.numBuildings)
	e.CostHistory = make([][]float64, e.numBuildings)
	e.HVACHistory = make([][]float64, e.numBuildings)
	e.LoadShedHistory = make([][]float64, e.numBuildings)
	e.RewardHistory = make([][]RewardComponents, e.numBuildings)
	e.totalShedSteps = make([]int, e.numBuildings)
	e.thermalCycleCounts = make([]int, e.numBuildings)
	e.prevChargeRates = make([]float64, e.numBuildings)

	for i := range e.Buildings {
		e.Buildings[i] = e.newBuildingState(i)
		e.TempHistory[i] = make([]float64, 0, EpisodeSteps)
		e.CostHistory[i] = make([]float64, 0, EpisodeSteps)
		e.HVACHistory[i] = make([]float64, 0, EpisodeSteps)
		e.LoadShedHistory[i] = make([]float64, 0, EpisodeSteps)
		e.RewardHistory[i] = make([]RewardComponents, 0, EpisodeSteps)
	}

	// Initialise coordinator price multipliers to 1.0
	e.PriceMultipliers = make([]float64, e.numBuildings)
	for i := range e.PriceMultipliers {
		e.PriceMultipliers[i] = 1.0
	}

	// Generate instruction card for Task 4
	e.InstructionCard = nil
	if e.taskID == 4 {
		e.InstructionCard = GenerateInstructionCard(e.rng)
	}

	// Generate fault schedule for all tasks (probability varies by difficulty)
	e.FaultSchedule = GenerateFaultSchedule(e.rng, e.difficulty)

	obs := make([]ObservationModel, e.numBuildings)
	for i, b := range e.Buildings {
		obs[i] = e.buildObservation(b)
	}

	return ResetResponse{
		Observations:    obs,
		Episode:         e.episode,
		TaskID:          e.taskID,
		Seed:            e.seed,
		InstructionCard: e.InstructionCard,
	}
}

// Step advances the simulation by one timestep for all buildings. Thread-safe.
func (e *Environment) Step(actions []ActionModel) ([]StepResponse, bool) {
	e.mu.Lock()
	defer e.mu.Unlock()

	if e.done {
		return nil, true
	}

	// Validate and clamp actions
	for i := range actions {
		e.clampAction(&actions[i])
		if i < e.numBuildings {
			e.LastActions[i] = actions[i]
		}
	}

	responses := make([]StepResponse, e.numBuildings)
	for i, b := range e.Buildings {
		var act ActionModel
		// Find action for this building (by building_id or by index)
		act = e.findAction(actions, i)
		responses[i] = e.stepBuilding(b, act, i)
	}

	e.step++
	if e.step >= EpisodeSteps {
		e.done = true
	}

	// Record replay entry (aggregate of all buildings, first building primary)
	if len(responses) > 0 {
		entry := ReplayEntry{
			Step:        e.step - 1,
			Observation: responses[0].Observation,
			Action:      e.LastActions[0],
			Reward:      responses[0].Reward,
			Components:  responses[0].Info.RewardComponents,
			Done:        e.done,
		}
		e.Replay = append(e.Replay, entry)
	}

	return responses, e.done
}

// GetState returns a full snapshot of environment state. Thread-safe (read lock).
func (e *Environment) GetState() StateResponse {
	e.mu.RLock()
	defer e.mu.RUnlock()

	buildings := make([]BuildingStatePublic, e.numBuildings)
	for i, b := range e.Buildings {
		pub := BuildingStatePublic{
			ObservationModel:    e.buildObservation(b),
			OutdoorTemperature:  b.OutdoorTemperature,
			SetpointTemperature: b.SetpointTemperature,
			BaselineCost:        b.BaselineCost,
			BaselineCarbon:      b.BaselineCarbon,
			CumulativeCarbon:    b.CumulativeCarbon,
			Jobs:                b.Jobs,
		}
		if i < len(e.TempHistory) {
			pub.TempHistory = e.TempHistory[i]
			pub.CostHistory = e.CostHistory[i]
			pub.HVACHistory = e.HVACHistory[i]
			pub.LoadShedHistory = e.LoadShedHistory[i]
			pub.RewardHistory = e.RewardHistory[i]
		}
		buildings[i] = pub
	}

	priceCurve := make([]float64, EpisodeSteps/4)
	carbonCurve := make([]float64, EpisodeSteps/4)
	for h := 0; h < EpisodeSteps/4; h++ {
		stepIdx := h * 4
		if stepIdx < EpisodeSteps {
			priceCurve[h] = e.PriceCurve[stepIdx]
			carbonCurve[h] = e.CarbonCurve[stepIdx]
		}
	}

	return StateResponse{
		Buildings:       buildings,
		PriceCurve:      priceCurve,
		CarbonCurve:     carbonCurve,
		Episode:         e.episode,
		Step:            e.step,
		TaskID:          e.taskID,
		Done:            e.done,
		Seed:            e.seed,
		InstructionCard: e.InstructionCard,
	}
}

// GetReplay returns the full episode replay. Thread-safe.
func (e *Environment) GetReplay() []ReplayEntry {
	e.mu.RLock()
	defer e.mu.RUnlock()
	result := make([]ReplayEntry, len(e.Replay))
	copy(result, e.Replay)
	return result
}

// ──────────────────────────────────────────────
// Internal helpers
// ──────────────────────────────────────────────

func (e *Environment) newBuildingState(id int) *BuildingState {
	// Randomise initial conditions slightly
	initTemp := DefaultSetpoint + (e.rng.Float64()-0.5)*2.0
	storageLevel := 0.3 + e.rng.Float64()*0.4  // start 30–70% full
	outdoorTemp := 15.0 + e.rng.Float64()*15.0 // 15–30 °C

	b := &BuildingState{
		BuildingID:          id,
		IndoorTemperature:   initTemp,
		ThermalStorageLevel: storageLevel,
		ProcessDemand:       10.0 + e.rng.Float64()*20.0,
		CurrentPrice:        e.PriceCurve[0],
		GridStressSignal:    0.0,
		CarbonIntensity:     e.CarbonCurve[0],
		HourOfDay:           0,
		Step:                0,
		BatchQueue:          []int{},
		CumulativeCost:      0.0,
		CumulativeCarbon:    0.0,
		OutdoorTemperature:  outdoorTemp,
		PrevHVACLevel:       0.5,
		BaselineCost:        0.0,
		BaselineCarbon:      0.0,
		SetpointTemperature: DefaultSetpoint,
		MaxHVACPower:        MaxHVACPowerKW,
		MaxStorageCapacity:  MaxStorageKWh,
		ThermalLossRate:     StorageLossRate,
		HVACEfficiency:      1.0,
		HVACDegradationRate: 0.0005 + e.rng.Float64()*0.001, // 0.05% to 0.15% per step
	}

	// Spawn batch jobs based on difficulty
	b.Jobs = e.generateBatchJobs()
	b.BatchQueue = pendingDeadlines(b.Jobs)
	return b
}

func (e *Environment) generateBatchJobs() []BatchJob {
	numJobs := 3
	switch e.difficulty {
	case "medium":
		numJobs = 4
	case "hard":
		numJobs = 5
	}

	jobs := make([]BatchJob, numJobs)
	for i := range jobs {
		// Deadline spread across episode (leave slack at end for duration)
		span := EpisodeSteps - 12
		if span < 8 {
			span = 8
		}
		deadline := 4 + e.rng.Intn(span)
		jobs[i] = BatchJob{
			ID:             i + 1,
			DeadlineSlot:   deadline,
			Duration:       1 + e.rng.Intn(3),
			PowerDraw:      5.0 + e.rng.Float64()*15.0,
			Scheduled:      false,
			ScheduledAt:    -1,
			Completed:      false,
			MissedDeadline: false,
		}
	}
	return jobs
}

// generatePriceCurve creates a stochastic Time-of-Use price curve for the episode.
func (e *Environment) generatePriceCurve() {
	// Base ToU: low overnight, moderate morning, high peak (8-12, 17-21), low night
	volatility := 0.1
	switch e.difficulty {
	case "medium":
		volatility = 0.2
	case "hard":
		volatility = 0.35
	}

	// Random peak window shift (±2 hours) for stochasticity
	morningPeakShift := e.rng.Intn(5) - 2
	eveningPeakShift := e.rng.Intn(5) - 2

	for s := 0; s < EpisodeSteps; s++ {
		hour := (s / 4)
		base := touPrice(hour, morningPeakShift, eveningPeakShift)
		noise := (e.rng.Float64()*2 - 1) * volatility * base
		price := math.Max(0.02, base+noise)
		e.PriceCurve[s] = price
	}
}

// touPrice returns the base time-of-use price for a given hour.
// Price schedule ($/kWh):
//   00:00–06:00  0.035  Deep off-peak (lowest demand)
//   06:00–08:00  0.070  Ramp-up (industry + residential start)
//   08:00–12:00  0.200  Morning peak (industrial load high)
//   12:00–17:00  0.120  Solar + stabilised demand shoulder
//   17:00–21:00  0.310  True peak (highest grid stress)
//   21:00–24:00  0.093  Declining demand
func touPrice(hour, morningShift, eveningShift int) float64 {
	morningPeakStart := 8 + morningShift
	morningPeakEnd := 12 + morningShift
	eveningPeakStart := 17 + eveningShift
	eveningPeakEnd := 21 + eveningShift

	switch {
	case hour >= morningPeakStart && hour < morningPeakEnd:
		return 0.20 // Morning peak: 0.16–0.24 $/kWh
	case hour >= eveningPeakStart && hour < eveningPeakEnd:
		return 0.31 // Evening peak: 0.26–0.36 $/kWh
	case hour >= morningPeakEnd && hour < eveningPeakStart:
		return 0.12 // Solar/shoulder: 0.09–0.15 $/kWh
	case hour >= 6 && hour < morningPeakStart:
		return 0.07 // Ramp-up: 0.055–0.085 $/kWh
	case hour >= eveningPeakEnd:
		return 0.093 // Declining: 0.075–0.11 $/kWh
	default:
		return 0.035 // Deep off-peak (0–6 AM): 0.028–0.042 $/kWh
	}
}

// generateCarbonCurve creates a realistic carbon intensity curve (gCO2/kWh).
// Correlates roughly with price: higher price = more peaker plants = higher carbon.
func (e *Environment) generateCarbonCurve() {
	for s := 0; s < EpisodeSteps; s++ {
		price := e.PriceCurve[s]
		// Map price range [0.028, 0.36] → carbon [150, 600] gCO2/kWh
		carbon := 150.0 + (price-0.028)/(0.36-0.028)*(600.0-150.0)
		noise := (e.rng.Float64()*2 - 1) * 30.0
		e.CarbonCurve[s] = math.Max(100.0, carbon+noise)
	}
}

// stepBuilding advances a single building by one timestep.
func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) StepResponse {
	s := e.step

	// Update environmental signals from curves
	b.CurrentPrice = e.PriceCurve[s] * e.PriceMultipliers[idx]
	b.CarbonIntensity = e.CarbonCurve[s]
	b.HourOfDay = (s / 4) % 24

	// Restore defaults before applying faults (allows recovery when fault ends)
	b.MaxHVACPower = MaxHVACPowerKW

	// Apply fault events for this step (modifies price, stress, HVAC capacity)
	activeFaultDescs := ApplyFaults(b, e.FaultSchedule, s, e.rng)
	_ = activeFaultDescs // stored for use in buildObservation via FaultSchedule.ActiveAt

	// Stochastic grid stress events (more frequent in hard mode).
	// Note: FaultGridOutage sets GridStressSignal=1.0 inside ApplyFaults.
	// We only overwrite it from the stochastic model if no outage is active.
	hasGridFault := false
	if e.FaultSchedule != nil {
		for _, f := range e.FaultSchedule.ActiveAt(s) {
			if f.Type == FaultGridOutage {
				hasGridFault = true
				break
			}
		}
	}
	if !hasGridFault {
		b.GridStressSignal = e.updateGridStress(s)
	}

	// Weather perturbation: outdoor temp drifts sinusoidally + noise
	b.OutdoorTemperature = e.updateOutdoorTemp(s)

	// Process demand fluctuation
	b.ProcessDemand = e.updateProcessDemand(s)

	// ----- Apply actions -----

	// 0. Degrade HVAC efficiency
	b.HVACEfficiency = math.Max(0.5, b.HVACEfficiency-b.HVACDegradationRate)

	// 1. HVAC: heats/cools building toward setpoint
	hvacPower := act.HVACPowerLevel * b.MaxHVACPower * b.HVACEfficiency // kW

	// 2. Thermal storage: charge or discharge
	chargeKW := act.ThermalChargeRate * b.MaxHVACPower * 0.3 // max 30% of HVAC for storage
	newStorageEnergy := b.ThermalStorageLevel*b.MaxStorageCapacity + chargeKW*StepDurationHrs
	// Apply thermal losses
	newStorageEnergy *= (1.0 - b.ThermalLossRate)
	newStorageEnergy = math.Max(0, math.Min(b.MaxStorageCapacity, newStorageEnergy))
	b.ThermalStorageLevel = newStorageEnergy / b.MaxStorageCapacity

	// 3. Load shedding
	clampedShed := math.Max(0, math.Min(0.5, act.LoadShedFraction))
	shedKW := clampedShed * b.ProcessDemand

	// 4. Batch job scheduling
	batchCompleted, batchMissed := e.updateBatchJobs(b, act.BatchJobSlot, s)

	// ----- Thermal dynamics -----
	// First-order setpoint-driven model:
	// HVAC drives temperature toward setpoint; higher power = stronger effect.
	// At HVACPowerLevel=1.0, HVAC strongly pushes toward setpoint.
	// At HVACPowerLevel=0.0, HVAC is off — temp drifts with environment.
	hvacEffect := (b.SetpointTemperature - b.IndoorTemperature) * act.HVACPowerLevel * 0.15

	// Outdoor infiltration: building slowly equilibrates with outside
	infiltration := (b.OutdoorTemperature - b.IndoorTemperature) * 0.03

	// Thermal storage discharge provides supplemental conditioning toward setpoint
	storageEffect := 0.0
	if act.ThermalChargeRate < 0 {
		storageEffect = (b.SetpointTemperature - b.IndoorTemperature) * math.Abs(act.ThermalChargeRate) * 0.05
	}

	// Process equipment waste heat (always warms the building)
	processHeat := b.ProcessDemand * 0.002 // kW→°C rough factor

	deltaT := hvacEffect + infiltration + storageEffect + processHeat
	b.IndoorTemperature += deltaT

	// Clamp to physically reasonable indoor range
	b.IndoorTemperature = math.Max(10.0, math.Min(40.0, b.IndoorTemperature))

	// ----- Energy & cost accounting -----
	batchPowerDraw := e.batchRunningPower(b)
	totalKW := hvacPower + math.Max(0, chargeKW) + batchPowerDraw - shedKW
	totalKW = math.Max(0, totalKW)
	energyKWh := totalKW * StepDurationHrs
	stepCost := energyKWh * b.CurrentPrice
	stepCarbon := energyKWh * b.CarbonIntensity

	b.CumulativeCost += stepCost
	b.CumulativeCarbon += stepCarbon

	// Baseline (always-on at 70% HVAC, no storage/shedding)
	baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
	baselineEnergy := baselineKW * StepDurationHrs
	b.BaselineCost += baselineEnergy * b.CurrentPrice
	b.BaselineCarbon += baselineEnergy * b.CarbonIntensity

	// ----- Reward computation -----
	// Get active faults for fault mitigation reward
	var activeFaults []FaultEvent
	if e.FaultSchedule != nil {
		activeFaults = e.FaultSchedule.ActiveAt(s)
	}
	rc := ComputeReward(ComputeRewardInput{
		B:               b,
		Act:             act,
		StepCost:        stepCost,
		EnergyKWh:       energyKWh,
		TMin:            TMinDefault,
		TMax:            TMaxDefault,
		StepCarbon:      stepCarbon,
		BatchMissed:     len(batchMissed),
		GridStress:      b.GridStressSignal,
		ShedFraction:    clampedShed,
		TaskID:          e.taskID,
		PrevHVACLevel:   b.PrevHVACLevel,
		ChargeRate:      act.ThermalChargeRate,
		PrevChargeRate:  e.prevChargeRates[idx],
		StorageDelta:    act.ThermalChargeRate,
		PriceCurve:      e.PriceCurve[:],
		CurrentStep:     s,
		InstructionCard: e.InstructionCard,
		ActiveFaults:    activeFaults,
	})
	b.PrevHVACLevel = act.HVACPowerLevel
	e.prevChargeRates[idx] = act.ThermalChargeRate

	// Update batch queue
	b.BatchQueue = pendingDeadlines(b.Jobs)

	// Exploit detection
	if clampedShed > 0.4 {
		e.totalShedSteps[idx]++
	}
	if len(e.thermalCycleCounts) > idx {
		if len(e.Replay) > 0 {
			prev := e.prevChargeRates[idx]
			if prev > 0.3 && act.ThermalChargeRate < -0.3 || prev < -0.3 && act.ThermalChargeRate > 0.3 {
				e.thermalCycleCounts[idx]++
			}
		}
	}

	// Per-building step index matches global timestep for this transition (0 .. EpisodeSteps-1)
	b.Step = s

	// Record history
	if idx < len(e.TempHistory) {
		e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)
		e.CostHistory[idx] = append(e.CostHistory[idx], b.CumulativeCost)
		e.HVACHistory[idx] = append(e.HVACHistory[idx], act.HVACPowerLevel)
		e.LoadShedHistory[idx] = append(e.LoadShedHistory[idx], clampedShed)
		e.RewardHistory[idx] = append(e.RewardHistory[idx], rc)
	}

	obs := e.buildObservation(b)

	return StepResponse{
		Observation: obs,
		Reward:      rc.Total,
		Done:        e.done || s+1 >= EpisodeSteps,
		Info: StepInfo{
			RewardComponents: rc,
			EnergyUsed:       energyKWh,
			CarbonEmitted:    stepCarbon,
			PriceSignal:      b.CurrentPrice,
			GridStress:       b.GridStressSignal,
			BatchCompleted:   batchCompleted,
			BatchMissed:      batchMissed,
			Episode:          e.episode,
			Step:             s,
		},
		Rewards: rc,
	}
}

func (e *Environment) updateGridStress(s int) float64 {
	// Grid stress is elevated during price peaks and stochastic demand spikes
	price := e.PriceCurve[s]
	priceNorm := (price - 0.028) / (0.36 - 0.028)

	// Random stress events
	stressProb := 0.05
	switch e.difficulty {
	case "medium":
		stressProb = 0.1
	case "hard":
		stressProb = 0.2
	}
	spike := 0.0
	if e.rng.Float64() < stressProb {
		spike = 0.3 + e.rng.Float64()*0.5
	}
	stress := math.Min(1.0, priceNorm*0.6+spike)
	return math.Max(0, stress)
}

func (e *Environment) updateOutdoorTemp(s int) float64 {
	// Sinusoidal daily temperature cycle + noise
	hour := float64(s) / 4.0
	baseTemp := 15.0 + 8.0*math.Sin(2*math.Pi*(hour-6)/24.0)
	noise := (e.rng.Float64()*2 - 1) * 1.5
	return baseTemp + noise
}

func (e *Environment) updateProcessDemand(s int) float64 {
	// Process demand shifts with business hours
	hour := s / 4
	base := 10.0
	if hour >= 8 && hour <= 18 {
		base = 20.0 + 10.0*math.Sin(math.Pi*float64(hour-8)/10.0)
	}
	noise := (e.rng.Float64()*2 - 1) * 3.0
	return math.Max(0, base+noise)
}

func (e *Environment) updateBatchJobs(b *BuildingState, slot int, step int) (completed []int, missed []int) {
	completed = []int{}
	missed = []int{}

	// Schedule the first pending job into the chosen slot
	for i := range b.Jobs {
		job := &b.Jobs[i]
		if !job.Scheduled && !job.Completed && !job.MissedDeadline {
			schedAt := step + slot
			job.Scheduled = true
			job.ScheduledAt = schedAt
			break // only schedule one job per step
		}
	}

	// Advance running or completed jobs
	for i := range b.Jobs {
		job := &b.Jobs[i]
		if job.Completed || job.MissedDeadline {
			continue
		}
		// Check deadline miss
		if step >= job.DeadlineSlot && !job.Completed {
			job.MissedDeadline = true
			missed = append(missed, job.ID)
			continue
		}
		// Mark as completed if scheduled and past its start
		if job.Scheduled && step >= job.ScheduledAt {
			if step >= job.ScheduledAt+job.Duration-1 {
				job.Completed = true
				completed = append(completed, job.ID)
			}
		}
	}
	return
}

func (e *Environment) batchRunningPower(b *BuildingState) float64 {
	total := 0.0
	for _, job := range b.Jobs {
		if job.Scheduled && !job.Completed && !job.MissedDeadline {
			if e.step >= job.ScheduledAt && e.step < job.ScheduledAt+job.Duration {
				total += job.PowerDraw
			}
		}
	}
	return total
}

func (e *Environment) buildObservation(b *BuildingState) ObservationModel {
	// Collect active fault descriptions for this step
	var activeFaults []string
	if e.FaultSchedule != nil {
		for _, f := range e.FaultSchedule.ActiveAt(b.Step) {
			activeFaults = append(activeFaults, f.Description)
		}
	}

	// Apply sensor fault noise to observation (not physics) - if sensor fault is active, agent sees wrong temp
	reportedTemp := b.IndoorTemperature + b.TempObservationNoise

	taskCardStr := ""
	if e.taskID == 4 && e.InstructionCard != nil {
		taskCardStr = e.InstructionCard.Text
	} else if e.taskID == 1 {
		taskCardStr = "Task 1 (Easy - Cost Minimization): Minimize total energy cost over 24 hours. No temperature or batch constraints. Use cheap off-peak periods and thermal storage."
	} else if e.taskID == 2 {
		taskCardStr = "Task 2 (Medium - Temperature Management): Minimize cost AND keep indoor temperature within 19-23°C at all times. Balance comfort vs cost."
	} else if e.taskID == 3 {
		taskCardStr = "Task 3 (Hard - Full Demand Response): Minimize cost, maintain temperature, respond to grid stress (shed when grid_stress_signal > 0.7), schedule batch jobs, minimize carbon."
	} else {
		taskCardStr = "Maintain operations and minimize cost."
	}

	priceForecast := make([]float64, 4)
	for i := 0; i < 4; i++ {
		idx := b.Step + i
		if idx < EpisodeSteps {
			priceForecast[i] = math.Round(e.PriceCurve[idx]*10000) / 10000
		} else {
			priceForecast[i] = math.Round(e.PriceCurve[EpisodeSteps-1]*10000) / 10000
		}
	}

	return ObservationModel{
		IndoorTemperature:   math.Round(reportedTemp*100) / 100,
		ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
		ProcessDemand:       math.Round(b.ProcessDemand*100) / 100,
		CurrentPrice:        math.Round(b.CurrentPrice*10000) / 10000,
		GridStressSignal:    math.Round(b.GridStressSignal*1000) / 1000,
		CarbonIntensity:     math.Round(b.CarbonIntensity*10) / 10,
		HourOfDay:           b.HourOfDay,
		BatchQueue:          pendingDeadlines(b.Jobs),
		CumulativeCost:      math.Round(b.CumulativeCost*10000) / 10000,
		Step:                b.Step,
		BuildingID:          b.BuildingID,
		HVACEfficiency:      math.Round(b.HVACEfficiency*1000) / 1000,
		InstructionCard:     e.InstructionCard,
		ActiveFaults:        activeFaults,
		TaskCard:            taskCardStr,
		NLSummary:           "GridMind simulation state.",
		MarketType:          "tou",
		Season:              "summer",
		PriceVolatility:     0.2,
		PriceForecast:       priceForecast,
		DemandChargeActive:  false,
	}
}

func (e *Environment) clampAction(a *ActionModel) {
	a.HVACPowerLevel = math.Max(0, math.Min(1.0, a.HVACPowerLevel))
	a.ThermalChargeRate = math.Max(-1.0, math.Min(1.0, a.ThermalChargeRate))
	a.BatchJobSlot = max(0, min(4, a.BatchJobSlot))
	a.LoadShedFraction = math.Max(0, math.Min(0.5, a.LoadShedFraction))
}

func (e *Environment) findAction(actions []ActionModel, buildingIdx int) ActionModel {
	// Try to find an action with matching building_id, else use positional
	for _, a := range actions {
		if a.BuildingID == buildingIdx {
			return a
		}
	}
	if buildingIdx < len(actions) {
		return actions[buildingIdx]
	}
	// Default: do-nothing action
	return ActionModel{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0, LoadShedFraction: 0.0}
}

// pendingDeadlines returns a slice of deadline slots for all incomplete, unscheduled jobs.
func pendingDeadlines(jobs []BatchJob) []int {
	result := []int{}
	for _, j := range jobs {
		if !j.Completed && !j.MissedDeadline {
			result = append(result, j.DeadlineSlot)
		}
	}
	return result
}

func max(a, b int) int {
	if a > b {
		return a
	}
	return b
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

// ExploitDetected returns whether the current episode shows signs of degenerate strategies.
func (e *Environment) ExploitDetected(buildingIdx int) (bool, float64) {
	e.mu.RLock()
	defer e.mu.RUnlock()
	if buildingIdx >= len(e.totalShedSteps) {
		return false, 0.0
	}
	// Flag if agent always sheds > 40% load (more than 70% of steps)
	shedRatio := float64(e.totalShedSteps[buildingIdx]) / float64(e.step+1)
	cycleRatio := float64(e.thermalCycleCounts[buildingIdx]) / float64(e.step+1)
	exploited := shedRatio > 0.7 || cycleRatio > 0.4
	penalty := 0.0
	if exploited {
		penalty = math.Max(shedRatio-0.7, 0)*0.5 + math.Max(cycleRatio-0.4, 0)*0.3
	}
	return exploited, penalty
}

// GetFeederState returns the aggregate fleet view for the coordinator.
func (e *Environment) GetFeederState() FeederState {
	e.mu.RLock()
	defer e.mu.RUnlock()

	var totalDemand float64
	buildings := make([]BuildingSummary, len(e.Buildings))
	for i, b := range e.Buildings {
		demand := b.ProcessDemand + b.MaxHVACPower*b.PrevHVACLevel
		totalDemand += demand
		buildings[i] = BuildingSummary{
			BuildingID:          b.BuildingID,
			CurrentDemandKW:     math.Round(demand*100) / 100,
			IndoorTemperature:   math.Round(b.IndoorTemperature*100) / 100,
			ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
			CumulativeCost:      math.Round(b.CumulativeCost*100) / 100,
			GridStressSignal:    math.Round(b.GridStressSignal*100) / 100,
			PriceMultiplier:     e.PriceMultipliers[i],
		}
	}

	limit := float64(120 * len(e.Buildings)) // Simplistic soft cap

	// Downsample price curve to 24 hourly points
	hourlyCurve := make([]float64, 24)
	for h := 0; h < 24; h++ {
		hourlyCurve[h] = e.PriceCurve[h*4]
	}

	return FeederState{
		TotalDemandKW:    math.Round(totalDemand*100) / 100,
		FeederLimitKW:    limit,
		FeederOverload:   totalDemand > limit,
		UtilizationPct:   math.Round((totalDemand/limit)*1000) / 10,
		Buildings:        buildings,
		PriceCurveHourly: hourlyCurve,
		Step:             e.step,
		Episode:          e.episode,
	}
}

// SetCoordinatorSignals applies per-building price multipliers.
func (e *Environment) SetCoordinatorSignals(multipliers []float64) {
	e.mu.Lock()
	defer e.mu.Unlock()
	for i, val := range multipliers {
		if i < len(e.PriceMultipliers) {
			e.PriceMultipliers[i] = math.Max(0.1, math.Min(10.0, val)) // Clamp safety
		}
	}
}

// cloneBuilding creates a deep copy of a BuildingState
func cloneBuilding(b *BuildingState) *BuildingState {
	c := *b
	c.BatchQueue = make([]int, len(b.BatchQueue))
	copy(c.BatchQueue, b.BatchQueue)
	c.Jobs = make([]BatchJob, len(b.Jobs))
	copy(c.Jobs, b.Jobs)
	return &c
}

// SimulateStep predicts the next state and reward without modifying the actual environment.
// It performs a deep copy of the required state, applies the actions, and returns the expected result.
func (e *Environment) SimulateStep(actions []ActionModel) ([]StepResponse, bool) {
	e.mu.RLock()
	defer e.mu.RUnlock()

	if e.done {
		return nil, true
	}

	// Create a temporary mock environment for a single step
	mock := &Environment{
		rng:              rand.New(rand.NewSource(e.rng.Int63())), // local PRNG to not desync main
		episode:          e.episode,
		step:             e.step,
		taskID:           e.taskID,
		seed:             e.seed,
		difficulty:       e.difficulty,
		numBuildings:     e.numBuildings,
		Buildings:        make([]*BuildingState, e.numBuildings),
		PriceCurve:       e.PriceCurve,
		CarbonCurve:      e.CarbonCurve,
		InstructionCard:  e.InstructionCard,
		FaultSchedule:    e.FaultSchedule,
		PriceMultipliers: e.PriceMultipliers,
		prevChargeRates:  make([]float64, len(e.prevChargeRates)),
	}
	copy(mock.prevChargeRates, e.prevChargeRates)

	for i, b := range e.Buildings {
		mock.Buildings[i] = cloneBuilding(b)
	}

	// Clamp and apply actions
	mockActions := make([]ActionModel, len(actions))
	copy(mockActions, actions)
	for i := range mockActions {
		mock.clampAction(&mockActions[i])
	}

	responses := make([]StepResponse, mock.numBuildings)
	for i, b := range mock.Buildings {
		act := mock.findAction(mockActions, i)
		responses[i] = mock.stepBuilding(b, act, i)
	}

	mockDone := (mock.step + 1) >= EpisodeSteps
	return responses, mockDone
}