|
|
import numpy as np |
|
|
from Qlearning_pole import Qlearning |
|
|
from tqdm import tqdm |
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
|
|
|
|
|
|
iterations = 50 |
|
|
agent = Qlearning() |
|
|
|
|
|
agent.Q = np.load("Qmatrix.npy") |
|
|
|
|
|
scores = [] |
|
|
for i in tqdm(range(iterations),miniters=1,desc="Trained Agent"): |
|
|
a, b = agent.simulateLearnedStrategy() |
|
|
scores.append(np.sum(a)) |
|
|
|
|
|
random_scores = [] |
|
|
for i in tqdm(range(iterations),miniters=1,desc="Random Agent"): |
|
|
a, b = agent.simulateRandomStrategy() |
|
|
random_scores.append(a) |
|
|
|
|
|
data = [random_scores,scores] |
|
|
print(data) |
|
|
|
|
|
plt.title("Rewards with trained agent") |
|
|
plt.hist(scores) |
|
|
plt.xlabel('Reward') |
|
|
plt.ylabel('Percentage') |
|
|
|
|
|
plt.show() |
|
|
|
|
|
fig = plt.figure(figsize=(10,7)) |
|
|
ax = fig.add_subplot(111) |
|
|
|
|
|
|
|
|
bp = ax.boxplot(data,patch_artist=True,notch=True,vert=0) |
|
|
plt.title("Trained Agent vs Random Agent (50 episodes)") |
|
|
|
|
|
plt.show() |
|
|
|