|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from dataclasses import dataclass |
|
|
from typing import Dict, Optional |
|
|
|
|
|
from torch import nn as nn |
|
|
|
|
|
from nemo.collections.common.parts import MultiLayerPerceptron |
|
|
from nemo.collections.nlp.modules.common.classifier import Classifier |
|
|
from nemo.core.classes import typecheck |
|
|
from nemo.core.neural_types import LogitsType, LogprobsType, NeuralType |
|
|
|
|
|
__all__ = ['BertPretrainingTokenClassifier', 'TokenClassifier'] |
|
|
|
|
|
ACT2FN = {"gelu": nn.functional.gelu, "relu": nn.functional.relu} |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class TokenClassifierConfig: |
|
|
num_layers: int = 1 |
|
|
activation: str = 'relu' |
|
|
log_softmax: bool = True |
|
|
dropout: float = 0.0 |
|
|
use_transformer_init: bool = True |
|
|
|
|
|
|
|
|
class TokenClassifier(Classifier): |
|
|
""" |
|
|
A module to perform token level classification tasks such as Named entity recognition. |
|
|
""" |
|
|
|
|
|
@property |
|
|
def output_types(self) -> Optional[Dict[str, NeuralType]]: |
|
|
""" |
|
|
Returns definitions of module output ports. |
|
|
""" |
|
|
if not self.log_softmax: |
|
|
return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())} |
|
|
else: |
|
|
return {"log_probs": NeuralType(('B', 'T', 'C'), LogprobsType())} |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
hidden_size: int, |
|
|
num_classes: int, |
|
|
num_layers: int = 1, |
|
|
activation: str = 'relu', |
|
|
log_softmax: bool = True, |
|
|
dropout: float = 0.0, |
|
|
use_transformer_init: bool = True, |
|
|
) -> None: |
|
|
|
|
|
""" |
|
|
Initializes the Token Classifier module. |
|
|
|
|
|
Args: |
|
|
hidden_size: the size of the hidden dimension |
|
|
num_classes: number of classes |
|
|
num_layers: number of fully connected layers in the multilayer perceptron (MLP) |
|
|
activation: activation to usee between fully connected layers in the MLP |
|
|
log_softmax: whether to apply softmax to the output of the MLP |
|
|
dropout: dropout to apply to the input hidden states |
|
|
use_transformer_init: whether to initialize the weights of the classifier head with the same approach used in Transformer |
|
|
""" |
|
|
super().__init__(hidden_size=hidden_size, dropout=dropout) |
|
|
self.log_softmax = log_softmax |
|
|
self.mlp = MultiLayerPerceptron( |
|
|
hidden_size, num_classes, num_layers=num_layers, activation=activation, log_softmax=log_softmax |
|
|
) |
|
|
self.post_init(use_transformer_init=use_transformer_init) |
|
|
|
|
|
@typecheck() |
|
|
def forward(self, hidden_states): |
|
|
""" |
|
|
Performs the forward step of the module. |
|
|
Args: |
|
|
hidden_states: batch of hidden states (for example, from the BERT encoder module) |
|
|
[BATCH_SIZE x SEQ_LENGTH x HIDDEN_SIZE] |
|
|
Returns: logits value for each class [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES] |
|
|
""" |
|
|
hidden_states = self.dropout(hidden_states) |
|
|
logits = self.mlp(hidden_states) |
|
|
return logits |
|
|
|
|
|
|
|
|
class BertPretrainingTokenClassifier(Classifier): |
|
|
""" |
|
|
A module to perform token level classification tasks for Bert pretraining. |
|
|
""" |
|
|
|
|
|
@property |
|
|
def output_types(self) -> Optional[Dict[str, NeuralType]]: |
|
|
""" |
|
|
Returns definitions of module output ports. |
|
|
""" |
|
|
if not self.log_softmax: |
|
|
return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())} |
|
|
else: |
|
|
return {"log_probs": NeuralType(('B', 'T', 'C'), LogprobsType())} |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
hidden_size: int, |
|
|
num_classes: int, |
|
|
num_layers: int = 1, |
|
|
activation: str = 'relu', |
|
|
log_softmax: bool = True, |
|
|
dropout: float = 0.0, |
|
|
use_transformer_init: bool = True, |
|
|
) -> None: |
|
|
|
|
|
""" |
|
|
Initializes the Token Classifier module. |
|
|
|
|
|
Args: |
|
|
hidden_size: the size of the hidden dimension |
|
|
num_classes: number of classes |
|
|
num_layers: number of fully connected layers in the multilayer perceptron (MLP) |
|
|
activation: activation to usee between fully connected layers in the MLP |
|
|
log_softmax: whether to apply softmax to the output of the MLP |
|
|
dropout: dropout to apply to the input hidden states |
|
|
use_transformer_init: whether to initialize the weights of the classifier head with the same approach used in Transformer |
|
|
""" |
|
|
super().__init__(hidden_size=hidden_size, dropout=dropout) |
|
|
|
|
|
self.log_softmax = log_softmax |
|
|
|
|
|
if activation not in ACT2FN: |
|
|
raise ValueError(f'activation "{activation}" not found') |
|
|
self.dense = nn.Linear(hidden_size, hidden_size) |
|
|
self.act = ACT2FN[activation] |
|
|
self.norm = nn.LayerNorm(hidden_size, eps=1e-12) |
|
|
self.mlp = MultiLayerPerceptron( |
|
|
hidden_size, num_classes, num_layers=num_layers, activation=activation, log_softmax=log_softmax |
|
|
) |
|
|
self.post_init(use_transformer_init=use_transformer_init) |
|
|
|
|
|
@typecheck() |
|
|
def forward(self, hidden_states): |
|
|
""" |
|
|
Performs the forward step of the module. |
|
|
Args: |
|
|
hidden_states: batch of hidden states (for example, from the BERT encoder module) |
|
|
[BATCH_SIZE x SEQ_LENGTH x HIDDEN_SIZE] |
|
|
Returns: logits value for each class [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES] |
|
|
""" |
|
|
hidden_states = self.dropout(hidden_states) |
|
|
hidden_states = self.dense(hidden_states) |
|
|
hidden_states = self.act(hidden_states) |
|
|
transform = self.norm(hidden_states) |
|
|
logits = self.mlp(transform) |
|
|
return logits |
|
|
|