Spaces:

sadjava
/

sentiment-analysis

Sleeping

App Files Files Community

sentiment-analysis / layer.py

sadjava

added app

ba4df19 over 2 years ago

raw

history blame contribute delete

4.67 kB

	import numpy as np
	import torch
	from torch import nn
	import torch.nn.functional as F
	from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


	class DynamicLayerConfig:
	"""
	Arguments for nn.Embedding layer:
	vocab_size - size of the vocabulary (number of unique tokens, depends on tokenizer configuration)
	embed_size - the number of features to represent one token
	Arguments for LSTM layer:
	hidden_size – the number of features in the hidden state
	proj_size – if > 0, will use LSTM with projections of corresponding size (instead of embed_size)
	num_layers – number of recurrent layers
	dropout – if non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer,
	with dropout probability equal to dropout
	bidirectional – if True, becomes a bidirectional LSTM
	"""
	def __init__(
	self,
	vocab_size: int,
	embed_size: int,
	hidden_size: int,
	proj_size: int = 0,
	num_layers: int = 1,
	dropout: float = 0.,
	bidirectional: bool = False
	):
	self.embed_size = embed_size
	self.hidden_size = hidden_size
	self.vocab_size = vocab_size
	self.proj_size = proj_size
	self.num_layers = num_layers
	self.dropout = dropout
	self.bidirectional = bidirectional

	class DynamicLayerAttentionBlock(nn.Module):
	def __init__(self, config):
	super().__init__()
	self.hidden_size = config.hidden_size
	self.proj_size = config.proj_size if config.proj_size != 0 else config.embed_size
	if config.bidirectional:
	self.hidden_size *= 2
	self.proj_size *= 2

	self.W_Q = nn.Linear(self.hidden_size, self.proj_size, bias=False)
	self.W_K = nn.Linear(self.hidden_size, self.proj_size, bias=False)
	self.W_V = nn.Linear(self.hidden_size, self.proj_size, bias=False)

	def forward(self, rnn_output):

	Q = self.W_Q(rnn_output)
	K = self.W_K(rnn_output)
	V = self.W_V(rnn_output)

	d_k = K.size(-1)
	scores = torch.matmul(Q, K.transpose(1,2)) / np.sqrt(d_k)
	alpha_n = F.softmax(scores, dim=-1)
	context = torch.matmul(alpha_n, V)

	output = context.sum(1)

	return output, alpha_n


	class DynamicLayer(nn.Module):
	def __init__(self, config: DynamicLayerConfig):
	super().__init__()

	self.config = config

	self.wte = nn.Embedding(self.config.vocab_size, self.config.embed_size)
	self.lstm = nn.LSTM(
	input_size=self.config.embed_size,
	hidden_size=self.config.hidden_size,
	proj_size=self.config.proj_size,
	num_layers=self.config.num_layers,
	dropout=self.config.dropout,
	bidirectional=self.config.bidirectional,
	batch_first=True,
	)
	self.attention = DynamicLayerAttentionBlock(self.config)

	"""
	Arguments:
	input_ids - tensor of shape (batch_size, sequence_length). All values are in interval - [0, vocab_size).
	These indices will be processed through nn.Embedding to obtain inputs_embeds of shape (batch_size, sequence_length, embed_size)
	or

	inputs_embeds - tensor of shape (batch_size, sequence_length, embed_size)
	"""
	def forward(
	self,
	input_ids: torch.LongTensor,
	input_lens: torch.LongTensor,
	) -> torch.FloatTensor:

	input_embeds = self.wte(input_ids)

	input_packed = pack_padded_sequence(input_embeds, input_lens, batch_first=True, enforce_sorted=False)

	lstm_output, (hn, cn) = self.lstm(input_packed)

	output_padded, output_lengths = pad_packed_sequence(lstm_output, batch_first=True)

	output, _ = self.attention(output_padded)
	return output


	class Model(nn.Module):
	def __init__(self, config: DynamicLayerConfig):
	super().__init__()
	self.proj_size = config.proj_size if config.proj_size != 0 else config.embed_size
	if config.bidirectional:
	self.proj_size *= 2
	self.dynamic_layer = DynamicLayer(config)
	self.fc = nn.Linear(self.proj_size, 1)

	def forward(
	self,
	input_ids: torch.LongTensor,
	input_lens: torch.LongTensor,
	) -> torch.FloatTensor:

	fixed_sized = self.dynamic_layer(input_ids, input_lens)
	return torch.sigmoid(self.fc(fixed_sized))