Spaces:
Sleeping
Sleeping
File size: 3,337 Bytes
9d9bb0d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | import numpy as np
import random
from collections import deque
from Cod_neuro_net import NeuralNetwork
class DQNAgent:
def __init__ (self):
self.input_size = 11
self.hidden_size = 256
self. output_size = 3
self.network = NeuralNetwork(self.input_size,self.hidden_size,self.output_size)
self.memory = deque (maxlen=100_000) #Deque will delete the memory when full
self.gamma = 0.9 #Discount (Model focuses on future rewards)
self.epsilon = 1.0 # Exploration rate (from 1.0 to 100% each try)
self.epsilon_min = 0.01 # The agent must at least explore 1% for each try
self.epsilon_decay = 0.995 #Ensures Exploration and Exploitation
self.learning_rate = 0.001 # How the NN updates its bias and Weight
self.batch_size = 64 #Learn from 64 past experiences at once
def remember (self, state, action, reward, next_state, done):
self.memory.append((state,action,reward,next_state,done))
def act (self, state):
if random.random() < self.epsilon:
return random.randint(0,2)
return self.network.predict(state)
def learn(self):
if len(self.memory) < self.batch_size:
return
batch = random.sample(self.memory,self.batch_size)
for state, action, reward, next_state, done in batch:
if done:
q_target = reward
else:
future_q = np.max(self.network.forward(next_state))
q_target = reward + self.gamma * future_q
q_values = self.network.forward(state) #Get all the Q Values for each action (Right, Straight, Left)
target = q_values.copy()
target[action] = q_target
self.network.backward(state, target, self.learning_rate)
# Decay epsilon — become less random over time
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def save(self, filepath="snake_brain.npz"):
np.savez(filepath,
w1=self.network.w1, b1=self.network.b1,
w2=self.network.w2, b2=self.network.b2)
print(f"Model saved to {filepath}")
def load(self, filepath="snake_brain.npz"):
data = np.load(filepath)
self.network.w1 = data["W1"]
self.network.b1 = data["b1"]
self.network.w2 = data["W2"]
self.network.b2 = data["b2"]
print(f"Model loaded from {filepath}")
if __name__ == "__main__":
agent = DQNAgent()
print("Testing agent with fake experiences...\n")
# Simulate 200 random game steps
for i in range(200):
state = np.random.randn(11)
action = agent.act(state)
reward = random.choice([-10, 1, 10])
next_state = np.random.randn(11)
done = random.random() < 0.05 # 5% chance of dying
agent.remember(state, action, reward, next_state, done)
# Now try to learn from those memories
agent.learn()
print(f"Memories stored : {len(agent.memory)}")
print(f"Epsilon now : {agent.epsilon:.4f} (was 1.0)")
print(f"Test action : {agent.act(np.random.randn(11))}")
print("\nAgent is working correctly!") |