import numpy as np
from Qlearning_pole import Qlearning
from tqdm import tqdm
import matplotlib.pyplot as plt


# Number of games the agent will play.
iterations = 50
agent = Qlearning()
# Insert the weights of the Agent to plot.
agent.Q = np.load("Qmatrix.npy")

scores = []
for i in tqdm(range(iterations),miniters=1,desc="Trained Agent"):
    a, b = agent.simulateLearnedStrategy()
    scores.append(np.sum(a))

random_scores = []
for i in tqdm(range(iterations),miniters=1,desc="Random Agent"):
    a, b = agent.simulateRandomStrategy()
    random_scores.append(a)

data = [random_scores,scores]
print(data)

plt.title("Rewards with trained agent")
plt.hist(scores)
plt.xlabel('Reward')
plt.ylabel('Percentage')
# plt.savefig('./resources/new.png')
plt.show()

fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)


bp = ax.boxplot(data,patch_artist=True,notch=True,vert=0)
plt.title("Trained Agent vs Random Agent (50 episodes)")
# plt.savefig("./resources/old_boxplot.png")
plt.show()