| import torch |
| import torch.nn as nn |
| import torch.optim as optim |
| import numpy as np |
|
|
| class Experience: |
| def __init__(self): |
| self.state: list[int] = None |
| self.action: int = None |
| self.reward: float = None |
| self.next_state: list[int] = None |
| self.done: bool = False |
|
|
| class TetrisNet(nn.Module): |
| """ |
| The PyTorch neural network equivalent to your Keras model: |
| Input: 16-dimensional board |
| Hidden layers: 64 -> 64 -> 32, ReLU activation |
| Output: 4-dimensional, linear |
| """ |
| def __init__(self): |
| super(TetrisNet, self).__init__() |
| self.layer1 = nn.Linear(16, 64) |
| self.layer2 = nn.Linear(64, 64) |
| self.layer3 = nn.Linear(64, 32) |
| self.output = nn.Linear(32, 4) |
| self.relu = nn.ReLU() |
|
|
| def forward(self, x: torch.Tensor) -> torch.Tensor: |
| x = self.relu(self.layer1(x)) |
| x = self.relu(self.layer2(x)) |
| x = self.relu(self.layer3(x)) |
| x = self.output(x) |
| return x |
|
|
| class TetrisAI: |
| """ |
| PyTorch implementation of the TetrisAI class. |
| - Loads a saved model if save_file_path is provided. |
| - Otherwise, constructs a fresh model. |
| - Has methods to save, predict, and train the model. |
| """ |
|
|
| def __init__(self, save_file_path: str = None): |
| |
| self.model = TetrisNet() |
|
|
| |
| self.optimizer = optim.Adam(self.model.parameters(), lr=0.003) |
| self.criterion = nn.MSELoss() |
|
|
| |
| if save_file_path is not None: |
| checkpoint = torch.load(save_file_path, map_location=torch.device('cpu')) |
| self.model.load_state_dict(checkpoint['model_state_dict']) |
| self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) |
| self.model.eval() |
|
|
| def save(self, path: str) -> None: |
| """ |
| Saves the PyTorch model and optimizer state to a file. |
| """ |
| torch.save({ |
| 'model_state_dict': self.model.state_dict(), |
| 'optimizer_state_dict': self.optimizer.state_dict() |
| }, path) |
|
|
| def predict(self, board: list[int]) -> list[float]: |
| """ |
| Performs a forward pass to predict the Q-values for each possible move. |
| Returns these Q-values as a list of floats. |
| """ |
| |
| x = torch.tensor([board], dtype=torch.float32) |
|
|
| |
| self.model.eval() |
| with torch.no_grad(): |
| prediction = self.model(x) |
|
|
| |
| return prediction[0].tolist() |
|
|
| def train(self, board: list[int], qvalues: list[float]) -> None: |
| """ |
| Trains the model on one step using the given board as input and qvalues as the desired output. |
| """ |
| |
| self.model.train() |
|
|
| |
| x = torch.tensor([board], dtype=torch.float32) |
| y = torch.tensor([qvalues], dtype=torch.float32) |
|
|
| |
| self.optimizer.zero_grad() |
|
|
| |
| predictions = self.model(x) |
| loss = self.criterion(predictions, y) |
| loss.backward() |
| self.optimizer.step() |
|
|