TaherFattahi
/

tetris-neural-network-Q-learning

Reinforcement Learning

Model card Files Files and versions

tetris-neural-network-Q-learning / model.py

TaherFattahi's picture

init: tetris neural network model with q learning

03b0d13 over 1 year ago

history blame contribute delete

3.42 kB

	import torch
	import torch.nn as nn
	import torch.optim as optim
	import numpy as np

	class Experience:
	def __init__(self):
	self.state: list[int] = None
	self.action: int = None
	self.reward: float = None
	self.next_state: list[int] = None
	self.done: bool = False

	class TetrisNet(nn.Module):
	"""
	The PyTorch neural network equivalent to your Keras model:
	Input: 16-dimensional board
	Hidden layers: 64 -> 64 -> 32, ReLU activation
	Output: 4-dimensional, linear
	"""
	def __init__(self):
	super(TetrisNet, self).__init__()
	self.layer1 = nn.Linear(16, 64)
	self.layer2 = nn.Linear(64, 64)
	self.layer3 = nn.Linear(64, 32)
	self.output = nn.Linear(32, 4)
	self.relu = nn.ReLU()

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	x = self.relu(self.layer1(x))
	x = self.relu(self.layer2(x))
	x = self.relu(self.layer3(x))
	x = self.output(x)
	return x

	class TetrisAI:
	"""
	PyTorch implementation of the TetrisAI class.
	- Loads a saved model if save_file_path is provided.
	- Otherwise, constructs a fresh model.
	- Has methods to save, predict, and train the model.
	"""

	def __init__(self, save_file_path: str = None):
	# Create the model
	self.model = TetrisNet()

	# Define the optimizer and loss function
	self.optimizer = optim.Adam(self.model.parameters(), lr=0.003)
	self.criterion = nn.MSELoss()

	# Load from file if path is provided
	if save_file_path is not None:
	checkpoint = torch.load(save_file_path, map_location=torch.device('cpu'))
	self.model.load_state_dict(checkpoint['model_state_dict'])
	self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
	self.model.eval()

	def save(self, path: str) -> None:
	"""
	Saves the PyTorch model and optimizer state to a file.
	"""
	torch.save({
	'model_state_dict': self.model.state_dict(),
	'optimizer_state_dict': self.optimizer.state_dict()
	}, path)

	def predict(self, board: list[int]) -> list[float]:
	"""
	Performs a forward pass to predict the Q-values for each possible move.
	Returns these Q-values as a list of floats.
	"""
	# Convert board to a float tensor with shape [1, 16]
	x = torch.tensor([board], dtype=torch.float32)

	# Put model in evaluation mode and disable gradient tracking
	self.model.eval()
	with torch.no_grad():
	prediction = self.model(x)

	# Convert the single batch output (shape [1, 4]) to a Python list of floats
	return prediction[0].tolist()

	def train(self, board: list[int], qvalues: list[float]) -> None:
	"""
	Trains the model on one step using the given board as input and qvalues as the desired output.
	"""
	# Put model in training mode
	self.model.train()

	# Convert data to tensors
	x = torch.tensor([board], dtype=torch.float32)
	y = torch.tensor([qvalues], dtype=torch.float32)

	# Zero the parameter gradients
	self.optimizer.zero_grad()

	# Forward + Backward + Optimize
	predictions = self.model(x)
	loss = self.criterion(predictions, y)
	loss.backward()
	self.optimizer.step()