import numpy as np class NeuralNetwork: def __init__(self,input_size, hidden_size, output_size): #Layer 1: Connects input to hidden neurons self.w1 = np.random.randn(input_size,hidden_size) * 0.01 self.b1 = np.zeros(hidden_size) #Layer 2 : Connects hidden neurons to output self.w2 = np.random.randn(hidden_size,output_size) * 0.01 self.b2 = np.zeros(output_size) def relu(self,x): return np.maximum(0,x) def relu_derivative(self,x): return (x > 0).astype(float) def forward(self, x): self.z1 = np.dot(x, self.w1) + self.b1 self.a1 = self.relu(self.z1) self.z2 = np.dot(self.a1, self.w2) + self.b2 return self.z2 def backward (self, x, target, learning_rate = 0.001): output = self.forward(x) loss = np.mean((output - target) **2) d_output = 2 * (output- target)/ len(target) #This calculates how each neuron contributed to the loss ( Gradient) d_w2 = np.outer(self.a1,d_output) #The outer product d_b2 = d_output d_hidden = np.dot(d_output, self.w2.T) #Move output error back into the hidden layer (Transposing the matrix) #Filter the error so that it passes through neurons that were active during the Forward pass d_hidden *= self.relu_derivative(self.z1) #Error will pass through neurons with 1 d_w1 = np.outer(x, d_hidden) d_b1 = d_hidden self.w1 -= learning_rate * d_w1 self.b1 -= learning_rate * d_b1 self.w2 -= learning_rate * d_w2 self.b2 -= learning_rate * d_b2 return loss def predict(self, state): scores = self.forward(state) return np.argmax(scores) if __name__ == "__main__": net = NeuralNetwork(input_size=11, hidden_size=256, output_size=3) fake_state = np.array([1,0,0,1,0,0,1,0,0,0,1], dtype=float) fake_target = np.array([0.0, 1.0, 0.0]) # we WANT output 2 to be 1.0 print("Training the network on one example...\n") print(f"{'Step':<8} {'Loss':>10} {'Scores (left, straight, right)'}") print("-" * 60) for step in range(1, 301): loss = net.backward(fake_state, fake_target, learning_rate=0.01) if step % 30 == 0 or step == 1: scores = net.forward(fake_state) print(f"{step:<8} {loss:>10.6f} {np.round(scores, 3)}") print("\nFinal chosen action:", net.predict(fake_state)) print("(Should be 1 = go straight )")