""" LSTM Model for Sentiment Analysis ================================= Basic LSTM model for 3-class sentiment classification. This serves as baseline model for comparison. """ import torch import torch.nn as nn import torch.nn.functional as F class LSTMModel(nn.Module): """ Basic LSTM model for sentiment classification. Architecture: Embedding -> LSTM -> Dropout -> Fully Connected -> Softmax Args: vocab_size: Size of vocabulary embedding_dim: Dimension of word embeddings hidden_dim: Dimension of LSTM hidden state output_dim: Number of output classes (3 for sentiment) n_layers: Number of LSTM layers dropout: Dropout probability bidirectional: Whether to use bidirectional LSTM pad_idx: Index of padding token """ def __init__( self, vocab_size, embedding_dim=128, hidden_dim=256, output_dim=3, n_layers=2, dropout=0.5, bidirectional=False, pad_idx=0 ): super(LSTMModel, self).__init__() self.embedding = nn.Embedding( num_embeddings=vocab_size, embedding_dim=embedding_dim, padding_idx=pad_idx ) self.lstm = nn.LSTM( input_size=embedding_dim, hidden_size=hidden_dim, num_layers=n_layers, batch_first=True, dropout=dropout if n_layers > 1 else 0, bidirectional=bidirectional ) # Adjust for bidirectional lstm_output_dim = hidden_dim * 2 if bidirectional else hidden_dim self.dropout = nn.Dropout(dropout) self.fc = nn.Linear(lstm_output_dim, output_dim) # Store config for saving/loading self.config = { 'vocab_size': vocab_size, 'embedding_dim': embedding_dim, 'hidden_dim': hidden_dim, 'output_dim': output_dim, 'n_layers': n_layers, 'dropout': dropout, 'bidirectional': bidirectional, 'pad_idx': pad_idx } def forward(self, input_ids, lengths=None): """ Forward pass. Args: input_ids: Tensor of shape (batch_size, seq_length) lengths: Tensor of actual sequence lengths (optional) Returns: logits: Tensor of shape (batch_size, output_dim) """ # Embedding: (batch_size, seq_length) -> (batch_size, seq_length, embedding_dim) embedded = self.embedding(input_ids) embedded = self.dropout(embedded) # LSTM: (batch_size, seq_length, embedding_dim) -> (batch_size, seq_length, hidden_dim) if lengths is not None: # Pack padded sequence for efficiency packed = nn.utils.rnn.pack_padded_sequence( embedded, lengths.cpu(), batch_first=True, enforce_sorted=False ) lstm_out, (hidden, cell) = self.lstm(packed) # Unpack lstm_out, _ = nn.utils.rnn.pad_packed_sequence(lstm_out, batch_first=True) else: lstm_out, (hidden, cell) = self.lstm(embedded) # Use the last hidden state if self.lstm.bidirectional: # Concatenate forward and backward hidden states hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1) else: hidden = hidden[-1,:,:] # Dropout and fully connected hidden = self.dropout(hidden) logits = self.fc(hidden) return logits def predict(self, input_ids, lengths=None): """Get predicted class.""" with torch.no_grad(): logits = self.forward(input_ids, lengths) predictions = torch.argmax(logits, dim=1) return predictions def predict_proba(self, input_ids, lengths=None): """Get class probabilities.""" with torch.no_grad(): logits = self.forward(input_ids, lengths) probabilities = F.softmax(logits, dim=1) return probabilities class BiLSTMModel(LSTMModel): """ Bidirectional LSTM model. Simply sets bidirectional=True in parent class. """ def __init__( self, vocab_size, embedding_dim=128, hidden_dim=256, output_dim=3, n_layers=2, dropout=0.5, pad_idx=0 ): super(BiLSTMModel, self).__init__( vocab_size=vocab_size, embedding_dim=embedding_dim, hidden_dim=hidden_dim, output_dim=output_dim, n_layers=n_layers, dropout=dropout, bidirectional=True, pad_idx=pad_idx ) # ==================== TESTING ==================== if __name__ == "__main__": # Test model vocab_size = 10000 batch_size = 32 seq_length = 50 # Create random input input_ids = torch.randint(0, vocab_size, (batch_size, seq_length)) lengths = torch.randint(10, seq_length, (batch_size,)) # Test LSTM model print("Testing LSTM Model:") model = LSTMModel(vocab_size=vocab_size) output = model(input_ids, lengths) print(f" Input shape: {input_ids.shape}") print(f" Output shape: {output.shape}") print(f" Parameters: {sum(p.numel() for p in model.parameters()):,}") # Test BiLSTM model print("\nTesting BiLSTM Model:") model_bi = BiLSTMModel(vocab_size=vocab_size) output_bi = model_bi(input_ids, lengths) print(f" Input shape: {input_ids.shape}") print(f" Output shape: {output_bi.shape}") print(f" Parameters: {sum(p.numel() for p in model_bi.parameters()):,}")