import numpy as np
import matplotlib.pyplot as plt

# 2. Defeating the Loop-and-Farm Exploit (PBRS + Velocity)
time_steps = np.arange(0, 100)
# Simulate an agent "oscillating" the load between 70% and 90% to farm rewards
load = 0.80 + 0.10 * np.sin(time_steps / 3.0) 

tau_stress = 0.85
beta_stress = 10.0
gamma_discount = 0.99
lambda_vel = 5.0

# Calculate Potential
potential = -beta_stress * np.maximum(0, load - tau_stress)**2

# Naive PBRS Reward (Vulnerable to farming)
r_naive = np.zeros_like(load)
for t in range(1, len(load)):
    r_naive[t] = gamma_discount * potential[t] - potential[t-1]

# Fixed PBRS with Velocity Penalty
r_fixed = np.zeros_like(load)
for t in range(1, len(load)):
    velocity_penalty = lambda_vel * (load[t] - load[t-1])**2
    r_fixed[t] = r_naive[t] - velocity_penalty

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True)

# Top Plot: The oscillating load
ax1.plot(time_steps, load, color='purple', linewidth=2, label='CPU Load')
ax1.axhline(tau_stress, color='red', linestyle='--', label='Danger Threshold (85%)')
ax1.set_ylabel('Node CPU Load', fontsize=12)
ax1.set_title('The "Loop and Farm" Exploit vs. Velocity-Penalized PBRS', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Bottom Plot: The Rewards
ax2.plot(time_steps, r_naive, label='Naive PBRS (Positive spikes = Agent Farms Points)', color='red', linestyle='--', linewidth=2)
ax2.plot(time_steps, r_fixed, label='Velocity-Penalized PBRS (DIME Final)', color='green', linewidth=2)
ax2.axhline(0, color='black', linewidth=1)
ax2.set_xlabel('Time Steps', fontsize=12)
ax2.set_ylabel('Instantaneous Reward', fontsize=12)
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('fig2_cascade_exploit_fix.png', dpi=300)
print("Saved fig2_cascade_exploit_fix.png")