// Package env implements the GridMind-RL simulation core.
// It models a multi-building industrial/commercial energy management system
// with stochastic electricity prices, thermal dynamics, and batch job scheduling.
package env

import (
	"math"
	"math/rand"
	"sync"
	"time"
)

const (
	EpisodeSteps    = 96   // 24 hours × 15-min intervals (96 × 0.25h = 24h)
	StepDurationHrs = 0.25 // each step = 15 minutes = 0.25 h
	MaxBuildings    = 3
	DefaultSetpoint = 21.0  // °C comfortable indoor temp
	TMinDefault     = 19.0  // °C lower bound
	TMaxDefault     = 23.0  // °C upper bound
	MaxHVACPowerKW  = 50.0  // kW per building
	MaxStorageKWh   = 100.0 // kWh thermal storage capacity
	StorageLossRate = 0.005 // fraction lost per step (thermal dissipation)
	MaxBatchJobs    = 5     // max concurrent batch jobs per building
)

// Environment is the thread-safe top-level simulation manager.
type Environment struct {
	mu           sync.RWMutex
	rng          *rand.Rand
	seed         int64
	episode      int
	step         int
	done         bool
	taskID       int
	difficulty   string
	numBuildings int

	Buildings   []*BuildingState
	PriceCurve  [EpisodeSteps]float64 // $/kWh for each step
	CarbonCurve [EpisodeSteps]float64 // gCO2/kWh for each step
	Replay      []ReplayEntry
	LastActions []ActionModel

	// History for dashboard rendering (per building)
	TempHistory     [][]float64
	CostHistory     [][]float64
	HVACHistory     [][]float64
	LoadShedHistory [][]float64
	RewardHistory   [][]RewardComponents

	// Exploit detection counters
	totalShedSteps     []int // steps where load_shed > 0.4
	thermalCycleCounts []int // rapid thermal storage reversals
	prevChargeRates    []float64
}

// NewEnvironment creates an initialised (but not reset) environment.
func NewEnvironment() *Environment {
	seed := time.Now().UnixNano()
	return &Environment{
		rng:          rand.New(rand.NewSource(seed)),
		seed:         seed,
		taskID:       1,
		difficulty:   "easy",
		numBuildings: 1,
	}
}

// Reset initializes a new episode. Thread-safe.
func (e *Environment) Reset(req ResetRequest) ResetResponse {
	e.mu.Lock()
	defer e.mu.Unlock()

	// Apply seed
	if req.Seed != nil {
		e.seed = *req.Seed
	} else {
		e.seed = time.Now().UnixNano()
	}
	e.rng = rand.New(rand.NewSource(e.seed))

	// Apply task and difficulty
	e.taskID = req.TaskID
	if e.taskID < 1 || e.taskID > 3 {
		e.taskID = 1
	}
	e.difficulty = req.Difficulty
	if e.difficulty == "" {
		switch e.taskID {
		case 1:
			e.difficulty = "easy"
		case 2:
			e.difficulty = "medium"
		case 3:
			e.difficulty = "hard"
		}
	}

	// Number of buildings (federation)
	e.numBuildings = req.NumBuildings
	if e.numBuildings < 1 {
		e.numBuildings = 1
	}
	if e.numBuildings > MaxBuildings {
		e.numBuildings = MaxBuildings
	}

	e.episode++
	e.step = 0
	e.done = false
	e.Replay = make([]ReplayEntry, 0, EpisodeSteps)
	e.LastActions = make([]ActionModel, e.numBuildings)

	// Generate price and carbon curves for this episode
	e.generatePriceCurve()
	e.generateCarbonCurve()

	// Initialise buildings
	e.Buildings = make([]*BuildingState, e.numBuildings)
	e.TempHistory = make([][]float64, e.numBuildings)
	e.CostHistory = make([][]float64, e.numBuildings)
	e.HVACHistory = make([][]float64, e.numBuildings)
	e.LoadShedHistory = make([][]float64, e.numBuildings)
	e.RewardHistory = make([][]RewardComponents, e.numBuildings)
	e.totalShedSteps = make([]int, e.numBuildings)
	e.thermalCycleCounts = make([]int, e.numBuildings)
	e.prevChargeRates = make([]float64, e.numBuildings)

	for i := 0; i < e.numBuildings; i++ {
		e.Buildings[i] = e.newBuildingState(i)
		e.TempHistory[i] = make([]float64, 0, EpisodeSteps)
		e.CostHistory[i] = make([]float64, 0, EpisodeSteps)
		e.HVACHistory[i] = make([]float64, 0, EpisodeSteps)
		e.LoadShedHistory[i] = make([]float64, 0, EpisodeSteps)
		e.RewardHistory[i] = make([]RewardComponents, 0, EpisodeSteps)
	}

	obs := make([]ObservationModel, e.numBuildings)
	for i, b := range e.Buildings {
		obs[i] = e.buildObservation(b)
	}

	return ResetResponse{
		Observations: obs,
		Episode:      e.episode,
		TaskID:       e.taskID,
		Seed:         e.seed,
	}
}

// Step advances the simulation by one timestep for all buildings. Thread-safe.
func (e *Environment) Step(actions []ActionModel) ([]StepResponse, bool) {
	e.mu.Lock()
	defer e.mu.Unlock()

	if e.done {
		return nil, true
	}

	// Validate and clamp actions
	for i := range actions {
		e.clampAction(&actions[i])
		if i < e.numBuildings {
			e.LastActions[i] = actions[i]
		}
	}

	responses := make([]StepResponse, e.numBuildings)
	for i, b := range e.Buildings {
		var act ActionModel
		// Find action for this building (by building_id or by index)
		act = e.findAction(actions, i)
		responses[i] = e.stepBuilding(b, act, i)
	}

	e.step++
	if e.step >= EpisodeSteps {
		e.done = true
	}

	// Record replay entry (aggregate of all buildings, first building primary)
	if len(responses) > 0 {
		entry := ReplayEntry{
			Step:        e.step - 1,
			Observation: responses[0].Observation,
			Action:      e.LastActions[0],
			Reward:      responses[0].Reward,
			Components:  responses[0].Info.RewardComponents,
			Done:        e.done,
		}
		e.Replay = append(e.Replay, entry)
	}

	return responses, e.done
}

// GetState returns a full snapshot of environment state. Thread-safe (read lock).
func (e *Environment) GetState() StateResponse {
	e.mu.RLock()
	defer e.mu.RUnlock()

	buildings := make([]BuildingStatePublic, e.numBuildings)
	for i, b := range e.Buildings {
		pub := BuildingStatePublic{
			ObservationModel:    e.buildObservation(b),
			OutdoorTemperature:  b.OutdoorTemperature,
			SetpointTemperature: b.SetpointTemperature,
			BaselineCost:        b.BaselineCost,
			BaselineCarbon:      b.BaselineCarbon,
			CumulativeCarbon:    b.CumulativeCarbon,
			Jobs:                b.Jobs,
		}
		if i < len(e.TempHistory) {
			pub.TempHistory = e.TempHistory[i]
			pub.CostHistory = e.CostHistory[i]
			pub.HVACHistory = e.HVACHistory[i]
			pub.LoadShedHistory = e.LoadShedHistory[i]
			pub.RewardHistory = e.RewardHistory[i]
		}
		buildings[i] = pub
	}

	priceCurve := make([]float64, EpisodeSteps/4)
	carbonCurve := make([]float64, EpisodeSteps/4)
	for h := 0; h < EpisodeSteps/4; h++ {
		stepIdx := h * 4
		if stepIdx < EpisodeSteps {
			priceCurve[h] = e.PriceCurve[stepIdx]
			carbonCurve[h] = e.CarbonCurve[stepIdx]
		}
	}

	return StateResponse{
		Buildings:   buildings,
		PriceCurve:  priceCurve,
		CarbonCurve: carbonCurve,
		Episode:     e.episode,
		Step:        e.step,
		TaskID:      e.taskID,
		Done:        e.done,
		Seed:        e.seed,
	}
}

// GetReplay returns the full episode replay. Thread-safe.
func (e *Environment) GetReplay() []ReplayEntry {
	e.mu.RLock()
	defer e.mu.RUnlock()
	result := make([]ReplayEntry, len(e.Replay))
	copy(result, e.Replay)
	return result
}

// ──────────────────────────────────────────────
// Internal helpers
// ──────────────────────────────────────────────

func (e *Environment) newBuildingState(id int) *BuildingState {
	// Randomise initial conditions slightly
	initTemp := DefaultSetpoint + (e.rng.Float64()-0.5)*2.0
	storageLevel := 0.3 + e.rng.Float64()*0.4  // start 30–70% full
	outdoorTemp := 15.0 + e.rng.Float64()*15.0 // 15–30 °C

	b := &BuildingState{
		BuildingID:          id,
		IndoorTemperature:   initTemp,
		ThermalStorageLevel: storageLevel,
		ProcessDemand:       10.0 + e.rng.Float64()*20.0,
		CurrentPrice:        e.PriceCurve[0],
		GridStressSignal:    0.0,
		CarbonIntensity:     e.CarbonCurve[0],
		HourOfDay:           0,
		Step:                0,
		BatchQueue:          []int{},
		CumulativeCost:      0.0,
		CumulativeCarbon:    0.0,
		OutdoorTemperature:  outdoorTemp,
		PrevHVACLevel:       0.5,
		BaselineCost:        0.0,
		BaselineCarbon:      0.0,
		SetpointTemperature: DefaultSetpoint,
		MaxHVACPower:        MaxHVACPowerKW,
		MaxStorageCapacity:  MaxStorageKWh,
		ThermalLossRate:     StorageLossRate,
	}

	// Spawn batch jobs based on difficulty
	b.Jobs = e.generateBatchJobs()
	b.BatchQueue = pendingDeadlines(b.Jobs)
	return b
}

func (e *Environment) generateBatchJobs() []BatchJob {
	numJobs := 3
	switch e.difficulty {
	case "medium":
		numJobs = 4
	case "hard":
		numJobs = 5
	}

	jobs := make([]BatchJob, numJobs)
	for i := range jobs {
		// Deadline spread across episode (leave slack at end for duration)
		span := EpisodeSteps - 12
		if span < 8 {
			span = 8
		}
		deadline := 4 + e.rng.Intn(span)
		jobs[i] = BatchJob{
			ID:             i + 1,
			DeadlineSlot:   deadline,
			Duration:       1 + e.rng.Intn(3),
			PowerDraw:      5.0 + e.rng.Float64()*15.0,
			Scheduled:      false,
			ScheduledAt:    -1,
			Completed:      false,
			MissedDeadline: false,
		}
	}
	return jobs
}

// generatePriceCurve creates a stochastic Time-of-Use price curve for the episode.
func (e *Environment) generatePriceCurve() {
	// Base ToU: low overnight, moderate morning, high peak (8-12, 17-21), low night
	volatility := 0.1
	switch e.difficulty {
	case "medium":
		volatility = 0.2
	case "hard":
		volatility = 0.35
	}

	// Random peak window shift (±2 hours) for stochasticity
	morningPeakShift := e.rng.Intn(5) - 2
	eveningPeakShift := e.rng.Intn(5) - 2

	for s := 0; s < EpisodeSteps; s++ {
		hour := (s / 4)
		base := touPrice(hour, morningPeakShift, eveningPeakShift)
		noise := (e.rng.Float64()*2 - 1) * volatility * base
		price := math.Max(0.02, base+noise)
		e.PriceCurve[s] = price
	}
}

// touPrice returns the base time-of-use price for a given hour.
func touPrice(hour, morningShift, eveningShift int) float64 {
	// Off-peak: 0.04 $/kWh, on-peak: 0.18 $/kWh, extreme peak: 0.32 $/kWh
	morningPeakStart := 8 + morningShift
	morningPeakEnd := 12 + morningShift
	eveningPeakStart := 17 + eveningShift
	eveningPeakEnd := 21 + eveningShift

	switch {
	case hour >= morningPeakStart && hour < morningPeakEnd:
		return 0.18
	case hour >= eveningPeakStart && hour <= eveningPeakEnd:
		return 0.22
	case (hour >= 9 && hour < morningPeakStart) || (hour >= morningPeakEnd && hour < eveningPeakStart):
		return 0.10
	case hour >= 23 || hour < 6:
		return 0.04
	default:
		return 0.08
	}
}

// generateCarbonCurve creates a realistic carbon intensity curve (gCO2/kWh).
// Correlates roughly with price: higher price = more peaker plants = higher carbon.
func (e *Environment) generateCarbonCurve() {
	for s := 0; s < EpisodeSteps; s++ {
		price := e.PriceCurve[s]
		// Map price range [0.04, 0.32] → carbon [150, 600] gCO2/kWh
		carbon := 150.0 + (price-0.04)/(0.32-0.04)*(600.0-150.0)
		noise := (e.rng.Float64()*2 - 1) * 30.0
		e.CarbonCurve[s] = math.Max(100.0, carbon+noise)
	}
}

// stepBuilding advances a single building by one timestep.
func (e *Environment) stepBuilding(b *BuildingState, act ActionModel, idx int) StepResponse {
	s := e.step

	// Update environmental signals from curves
	b.CurrentPrice = e.PriceCurve[s]
	b.CarbonIntensity = e.CarbonCurve[s]
	b.HourOfDay = (s / 4) % 24

	// Stochastic grid stress events (more frequent in hard mode)
	b.GridStressSignal = e.updateGridStress(s)

	// Weather perturbation: outdoor temp drifts sinusoidally + noise
	b.OutdoorTemperature = e.updateOutdoorTemp(s)

	// Process demand fluctuation
	b.ProcessDemand = e.updateProcessDemand(s)

	// ----- Apply actions -----

	// 1. HVAC: heats/cools building toward setpoint
	hvacPower := act.HVACPowerLevel * b.MaxHVACPower // kW

	// 2. Thermal storage: charge or discharge
	chargeKW := act.ThermalChargeRate * b.MaxHVACPower * 0.3 // max 30% of HVAC for storage
	newStorageEnergy := b.ThermalStorageLevel*b.MaxStorageCapacity + chargeKW*StepDurationHrs
	// Apply thermal losses
	newStorageEnergy *= (1.0 - b.ThermalLossRate)
	newStorageEnergy = math.Max(0, math.Min(b.MaxStorageCapacity, newStorageEnergy))
	b.ThermalStorageLevel = newStorageEnergy / b.MaxStorageCapacity

	// 3. Load shedding
	clampedShed := math.Max(0, math.Min(0.5, act.LoadShedFraction))
	shedKW := clampedShed * b.ProcessDemand

	// 4. Batch job scheduling
	batchCompleted, batchMissed := e.updateBatchJobs(b, act.BatchJobSlot, s)

	// ----- Thermal dynamics -----
	// First-order setpoint-driven model:
	// HVAC drives temperature toward setpoint; higher power = stronger effect.
	// At HVACPowerLevel=1.0, HVAC strongly pushes toward setpoint.
	// At HVACPowerLevel=0.0, HVAC is off — temp drifts with environment.
	hvacEffect := (b.SetpointTemperature - b.IndoorTemperature) * act.HVACPowerLevel * 0.15

	// Outdoor infiltration: building slowly equilibrates with outside
	infiltration := (b.OutdoorTemperature - b.IndoorTemperature) * 0.03

	// Thermal storage discharge provides supplemental conditioning toward setpoint
	storageEffect := 0.0
	if act.ThermalChargeRate < 0 {
		storageEffect = (b.SetpointTemperature - b.IndoorTemperature) * math.Abs(act.ThermalChargeRate) * 0.05
	}

	// Process equipment waste heat (always warms the building)
	processHeat := b.ProcessDemand * 0.002 // kW→°C rough factor

	deltaT := hvacEffect + infiltration + storageEffect + processHeat
	b.IndoorTemperature += deltaT

	// Clamp to physically reasonable indoor range
	b.IndoorTemperature = math.Max(10.0, math.Min(40.0, b.IndoorTemperature))

	// ----- Energy & cost accounting -----
	batchPowerDraw := e.batchRunningPower(b)
	totalKW := hvacPower + math.Max(0, chargeKW) + batchPowerDraw - shedKW
	totalKW = math.Max(0, totalKW)
	energyKWh := totalKW * StepDurationHrs
	stepCost := energyKWh * b.CurrentPrice
	stepCarbon := energyKWh * b.CarbonIntensity

	b.CumulativeCost += stepCost
	b.CumulativeCarbon += stepCarbon

	// Baseline (always-on at 70% HVAC, no storage/shedding)
	baselineKW := 0.7*b.MaxHVACPower + b.ProcessDemand
	baselineEnergy := baselineKW * StepDurationHrs
	b.BaselineCost += baselineEnergy * b.CurrentPrice
	b.BaselineCarbon += baselineEnergy * b.CarbonIntensity

	// ----- Reward computation -----
	rc := ComputeReward(ComputeRewardInput{
		B:              b,
		Act:            act,
		StepCost:       stepCost,
		EnergyKWh:      energyKWh,
		TMin:           TMinDefault,
		TMax:           TMaxDefault,
		StepCarbon:     stepCarbon,
		BatchMissed:    len(batchMissed),
		GridStress:     b.GridStressSignal,
		ShedFraction:   clampedShed,
		TaskID:         e.taskID,
		PrevHVACLevel:  b.PrevHVACLevel,
		ChargeRate:     act.ThermalChargeRate,
		PrevChargeRate: e.prevChargeRates[idx],
		StorageDelta:   act.ThermalChargeRate,
		PriceCurve:     e.PriceCurve[:],
		CurrentStep:    s,
	})
	b.PrevHVACLevel = act.HVACPowerLevel
	e.prevChargeRates[idx] = act.ThermalChargeRate

	// Update batch queue
	b.BatchQueue = pendingDeadlines(b.Jobs)

	// Exploit detection
	if clampedShed > 0.4 {
		e.totalShedSteps[idx]++
	}
	if len(e.thermalCycleCounts) > idx {
		if len(e.Replay) > 0 {
			prev := e.prevChargeRates[idx]
			if prev > 0.3 && act.ThermalChargeRate < -0.3 || prev < -0.3 && act.ThermalChargeRate > 0.3 {
				e.thermalCycleCounts[idx]++
			}
		}
	}

	// Per-building step index matches global timestep for this transition (0 .. EpisodeSteps-1)
	b.Step = s

	// Record history
	if idx < len(e.TempHistory) {
		e.TempHistory[idx] = append(e.TempHistory[idx], b.IndoorTemperature)
		e.CostHistory[idx] = append(e.CostHistory[idx], b.CumulativeCost)
		e.HVACHistory[idx] = append(e.HVACHistory[idx], act.HVACPowerLevel)
		e.LoadShedHistory[idx] = append(e.LoadShedHistory[idx], clampedShed)
		e.RewardHistory[idx] = append(e.RewardHistory[idx], rc)
	}

	obs := e.buildObservation(b)

	return StepResponse{
		Observation: obs,
		Reward:      rc.Total,
		Done:        e.done || s+1 >= EpisodeSteps,
		Info: StepInfo{
			RewardComponents: rc,
			EnergyUsed:       energyKWh,
			CarbonEmitted:    stepCarbon,
			PriceSignal:      b.CurrentPrice,
			GridStress:       b.GridStressSignal,
			BatchCompleted:   batchCompleted,
			BatchMissed:      batchMissed,
			Episode:          e.episode,
			Step:             s,
		},
	}
}

func (e *Environment) updateGridStress(s int) float64 {
	// Grid stress is elevated during price peaks and stochastic demand spikes
	price := e.PriceCurve[s]
	priceNorm := (price - 0.04) / (0.32 - 0.04)

	// Random stress events
	stressProb := 0.05
	switch e.difficulty {
	case "medium":
		stressProb = 0.1
	case "hard":
		stressProb = 0.2
	}
	spike := 0.0
	if e.rng.Float64() < stressProb {
		spike = 0.3 + e.rng.Float64()*0.5
	}
	stress := math.Min(1.0, priceNorm*0.6+spike)
	return math.Max(0, stress)
}

func (e *Environment) updateOutdoorTemp(s int) float64 {
	// Sinusoidal daily temperature cycle + noise
	hour := float64(s) / 4.0
	baseTemp := 15.0 + 8.0*math.Sin(2*math.Pi*(hour-6)/24.0)
	noise := (e.rng.Float64()*2 - 1) * 1.5
	return baseTemp + noise
}

func (e *Environment) updateProcessDemand(s int) float64 {
	// Process demand shifts with business hours
	hour := s / 4
	base := 10.0
	if hour >= 8 && hour <= 18 {
		base = 20.0 + 10.0*math.Sin(math.Pi*float64(hour-8)/10.0)
	}
	noise := (e.rng.Float64()*2 - 1) * 3.0
	return math.Max(0, base+noise)
}

func (e *Environment) updateBatchJobs(b *BuildingState, slot int, step int) (completed []int, missed []int) {
	completed = []int{}
	missed = []int{}

	// Schedule the first pending job into the chosen slot
	for i := range b.Jobs {
		job := &b.Jobs[i]
		if !job.Scheduled && !job.Completed && !job.MissedDeadline {
			schedAt := step + slot
			job.Scheduled = true
			job.ScheduledAt = schedAt
			break // only schedule one job per step
		}
	}

	// Advance running or completed jobs
	for i := range b.Jobs {
		job := &b.Jobs[i]
		if job.Completed || job.MissedDeadline {
			continue
		}
		// Check deadline miss
		if step >= job.DeadlineSlot && !job.Completed {
			job.MissedDeadline = true
			missed = append(missed, job.ID)
			continue
		}
		// Mark as completed if scheduled and past its start
		if job.Scheduled && step >= job.ScheduledAt {
			if step >= job.ScheduledAt+job.Duration-1 {
				job.Completed = true
				completed = append(completed, job.ID)
			}
		}
	}
	return
}

func (e *Environment) batchRunningPower(b *BuildingState) float64 {
	total := 0.0
	for _, job := range b.Jobs {
		if job.Scheduled && !job.Completed && !job.MissedDeadline {
			if e.step >= job.ScheduledAt && e.step < job.ScheduledAt+job.Duration {
				total += job.PowerDraw
			}
		}
	}
	return total
}

func (e *Environment) buildObservation(b *BuildingState) ObservationModel {
	return ObservationModel{
		IndoorTemperature:   math.Round(b.IndoorTemperature*100) / 100,
		ThermalStorageLevel: math.Round(b.ThermalStorageLevel*1000) / 1000,
		ProcessDemand:       math.Round(b.ProcessDemand*100) / 100,
		CurrentPrice:        math.Round(b.CurrentPrice*10000) / 10000,
		GridStressSignal:    math.Round(b.GridStressSignal*1000) / 1000,
		CarbonIntensity:     math.Round(b.CarbonIntensity*10) / 10,
		HourOfDay:           b.HourOfDay,
		BatchQueue:          pendingDeadlines(b.Jobs),
		CumulativeCost:      math.Round(b.CumulativeCost*10000) / 10000,
		Step:                b.Step,
		BuildingID:          b.BuildingID,
	}
}

func (e *Environment) clampAction(a *ActionModel) {
	a.HVACPowerLevel = math.Max(0, math.Min(1.0, a.HVACPowerLevel))
	a.ThermalChargeRate = math.Max(-1.0, math.Min(1.0, a.ThermalChargeRate))
	a.BatchJobSlot = max(0, min(4, a.BatchJobSlot))
	a.LoadShedFraction = math.Max(0, math.Min(0.5, a.LoadShedFraction))
}

func (e *Environment) findAction(actions []ActionModel, buildingIdx int) ActionModel {
	// Try to find an action with matching building_id, else use positional
	for _, a := range actions {
		if a.BuildingID == buildingIdx {
			return a
		}
	}
	if buildingIdx < len(actions) {
		return actions[buildingIdx]
	}
	// Default: do-nothing action
	return ActionModel{HVACPowerLevel: 0.5, ThermalChargeRate: 0.0, BatchJobSlot: 0, LoadShedFraction: 0.0}
}

// pendingDeadlines returns a slice of deadline slots for all incomplete, unscheduled jobs.
func pendingDeadlines(jobs []BatchJob) []int {
	result := []int{}
	for _, j := range jobs {
		if !j.Completed && !j.MissedDeadline {
			result = append(result, j.DeadlineSlot)
		}
	}
	return result
}

func max(a, b int) int {
	if a > b {
		return a
	}
	return b
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

// ExploitDetected returns whether the current episode shows signs of degenerate strategies.
func (e *Environment) ExploitDetected(buildingIdx int) (bool, float64) {
	e.mu.RLock()
	defer e.mu.RUnlock()
	if buildingIdx >= len(e.totalShedSteps) {
		return false, 0.0
	}
	// Flag if agent always sheds > 40% load (more than 70% of steps)
	shedRatio := float64(e.totalShedSteps[buildingIdx]) / float64(e.step+1)
	cycleRatio := float64(e.thermalCycleCounts[buildingIdx]) / float64(e.step+1)
	exploited := shedRatio > 0.7 || cycleRatio > 0.4
	penalty := 0.0
	if exploited {
		penalty = math.Max(shedRatio-0.7, 0)*0.5 + math.Max(cycleRatio-0.4, 0)*0.3
	}
	return exploited, penalty
}