import numpy as np import matplotlib.pyplot as plt # 2. Defeating the Loop-and-Farm Exploit (PBRS + Velocity) time_steps = np.arange(0, 100) # Simulate an agent "oscillating" the load between 70% and 90% to farm rewards load = 0.80 + 0.10 * np.sin(time_steps / 3.0) tau_stress = 0.85 beta_stress = 10.0 gamma_discount = 0.99 lambda_vel = 5.0 # Calculate Potential potential = -beta_stress * np.maximum(0, load - tau_stress)**2 # Naive PBRS Reward (Vulnerable to farming) r_naive = np.zeros_like(load) for t in range(1, len(load)): r_naive[t] = gamma_discount * potential[t] - potential[t-1] # Fixed PBRS with Velocity Penalty r_fixed = np.zeros_like(load) for t in range(1, len(load)): velocity_penalty = lambda_vel * (load[t] - load[t-1])**2 r_fixed[t] = r_naive[t] - velocity_penalty fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True) # Top Plot: The oscillating load ax1.plot(time_steps, load, color='purple', linewidth=2, label='CPU Load') ax1.axhline(tau_stress, color='red', linestyle='--', label='Danger Threshold (85%)') ax1.set_ylabel('Node CPU Load', fontsize=12) ax1.set_title('The "Loop and Farm" Exploit vs. Velocity-Penalized PBRS', fontsize=14, fontweight='bold') ax1.legend() ax1.grid(True, alpha=0.3) # Bottom Plot: The Rewards ax2.plot(time_steps, r_naive, label='Naive PBRS (Positive spikes = Agent Farms Points)', color='red', linestyle='--', linewidth=2) ax2.plot(time_steps, r_fixed, label='Velocity-Penalized PBRS (DIME Final)', color='green', linewidth=2) ax2.axhline(0, color='black', linewidth=1) ax2.set_xlabel('Time Steps', fontsize=12) ax2.set_ylabel('Instantaneous Reward', fontsize=12) ax2.legend() ax2.grid(True, alpha=0.3) plt.tight_layout() plt.savefig('fig2_cascade_exploit_fix.png', dpi=300) print("Saved fig2_cascade_exploit_fix.png")