| import numpy as np | |
| batch_sizes = np.array([4, 8, 16, 32]) | |
| rewards = np.array([0.72, 0.81, 0.85, 0.83]) | |
| plt.plot(batch_sizes, rewards, marker='o') | |
| plt.title("Reward vs Batch Size Scaling") | |
| plt.xlabel("Batch Size") | |
| plt.ylabel("Final Reward") | |
| plt.grid(True) | |
| plt.show() | |