Spaces:

sadjava
/

sentiment-analysis

Sleeping

File size: 4,670 Bytes

ba4df19

import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


class DynamicLayerConfig:
    """
        Arguments for nn.Embedding layer:
            vocab_size - size of the vocabulary (number of unique tokens, depends on tokenizer configuration)
            embed_size - the number of features to represent one token
        Arguments for LSTM layer:
            hidden_size – the number of features in the hidden state
            proj_size – if > 0, will use LSTM with projections of corresponding size (instead of embed_size)
            num_layers – number of recurrent layers
            dropout – if non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, 
                        with dropout probability equal to dropout
            bidirectional – if True, becomes a bidirectional LSTM
    """
    def __init__(
            self, 
            vocab_size: int, 
            embed_size: int,
            hidden_size: int, 
            proj_size: int = 0, 
            num_layers: int = 1, 
            dropout: float = 0., 
            bidirectional: bool = False
            ):
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.proj_size = proj_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.bidirectional = bidirectional

class DynamicLayerAttentionBlock(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.hidden_size = config.hidden_size
        self.proj_size = config.proj_size if config.proj_size != 0 else config.embed_size
        if config.bidirectional:
            self.hidden_size *= 2
            self.proj_size *= 2

        self.W_Q = nn.Linear(self.hidden_size, self.proj_size, bias=False)
        self.W_K = nn.Linear(self.hidden_size, self.proj_size, bias=False)
        self.W_V = nn.Linear(self.hidden_size, self.proj_size, bias=False)

    def forward(self, rnn_output):
        
        Q = self.W_Q(rnn_output)                           
        K = self.W_K(rnn_output)
        V = self.W_V(rnn_output)

        d_k = K.size(-1)
        scores = torch.matmul(Q, K.transpose(1,2)) / np.sqrt(d_k)
        alpha_n = F.softmax(scores, dim=-1)
        context = torch.matmul(alpha_n, V)
        
        output = context.sum(1)
        
        return output, alpha_n    


class DynamicLayer(nn.Module):
    def __init__(self, config: DynamicLayerConfig):
        super().__init__()

        self.config = config

        self.wte = nn.Embedding(self.config.vocab_size, self.config.embed_size)
        self.lstm = nn.LSTM(
            input_size=self.config.embed_size,
            hidden_size=self.config.hidden_size, 
            proj_size=self.config.proj_size, 
            num_layers=self.config.num_layers,
            dropout=self.config.dropout,
            bidirectional=self.config.bidirectional,
            batch_first=True,
        )
        self.attention = DynamicLayerAttentionBlock(self.config)

    """
        Arguments:
        input_ids - tensor of shape (batch_size, sequence_length). All values are in interval - [0, vocab_size). 
                    These indices will be processed through nn.Embedding to obtain inputs_embeds of shape (batch_size, sequence_length, embed_size)
            or

        inputs_embeds - tensor of shape (batch_size, sequence_length, embed_size)
    """
    def forward(
        self,
        input_ids: torch.LongTensor,
        input_lens: torch.LongTensor,
    ) -> torch.FloatTensor:

        input_embeds = self.wte(input_ids)

        input_packed = pack_padded_sequence(input_embeds, input_lens, batch_first=True, enforce_sorted=False)
        
        lstm_output, (hn, cn) = self.lstm(input_packed)

        output_padded, output_lengths = pad_packed_sequence(lstm_output, batch_first=True)

        output, _ = self.attention(output_padded)
        return output
    

class Model(nn.Module):
    def __init__(self, config: DynamicLayerConfig):
        super().__init__()
        self.proj_size = config.proj_size if config.proj_size != 0 else config.embed_size
        if config.bidirectional:
            self.proj_size *= 2
        self.dynamic_layer = DynamicLayer(config)
        self.fc = nn.Linear(self.proj_size, 1)
    
    def forward(
        self,
        input_ids: torch.LongTensor,
        input_lens: torch.LongTensor,
    ) -> torch.FloatTensor:

        fixed_sized = self.dynamic_layer(input_ids, input_lens)
        return torch.sigmoid(self.fc(fixed_sized))