# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from dataclasses import dataclass from typing import Dict, Optional from torch import nn as nn from nemo.collections.common.parts import MultiLayerPerceptron from nemo.collections.nlp.modules.common.classifier import Classifier from nemo.core.classes import typecheck from nemo.core.neural_types import LogitsType, LogprobsType, NeuralType __all__ = ['BertPretrainingTokenClassifier', 'TokenClassifier'] ACT2FN = {"gelu": nn.functional.gelu, "relu": nn.functional.relu} @dataclass class TokenClassifierConfig: num_layers: int = 1 activation: str = 'relu' log_softmax: bool = True dropout: float = 0.0 use_transformer_init: bool = True class TokenClassifier(Classifier): """ A module to perform token level classification tasks such as Named entity recognition. """ @property def output_types(self) -> Optional[Dict[str, NeuralType]]: """ Returns definitions of module output ports. """ if not self.log_softmax: return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())} else: return {"log_probs": NeuralType(('B', 'T', 'C'), LogprobsType())} def __init__( self, hidden_size: int, num_classes: int, num_layers: int = 1, activation: str = 'relu', log_softmax: bool = True, dropout: float = 0.0, use_transformer_init: bool = True, ) -> None: """ Initializes the Token Classifier module. Args: hidden_size: the size of the hidden dimension num_classes: number of classes num_layers: number of fully connected layers in the multilayer perceptron (MLP) activation: activation to usee between fully connected layers in the MLP log_softmax: whether to apply softmax to the output of the MLP dropout: dropout to apply to the input hidden states use_transformer_init: whether to initialize the weights of the classifier head with the same approach used in Transformer """ super().__init__(hidden_size=hidden_size, dropout=dropout) self.log_softmax = log_softmax self.mlp = MultiLayerPerceptron( hidden_size, num_classes, num_layers=num_layers, activation=activation, log_softmax=log_softmax ) self.post_init(use_transformer_init=use_transformer_init) @typecheck() def forward(self, hidden_states): """ Performs the forward step of the module. Args: hidden_states: batch of hidden states (for example, from the BERT encoder module) [BATCH_SIZE x SEQ_LENGTH x HIDDEN_SIZE] Returns: logits value for each class [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES] """ hidden_states = self.dropout(hidden_states) logits = self.mlp(hidden_states) return logits class BertPretrainingTokenClassifier(Classifier): """ A module to perform token level classification tasks for Bert pretraining. """ @property def output_types(self) -> Optional[Dict[str, NeuralType]]: """ Returns definitions of module output ports. """ if not self.log_softmax: return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())} else: return {"log_probs": NeuralType(('B', 'T', 'C'), LogprobsType())} def __init__( self, hidden_size: int, num_classes: int, num_layers: int = 1, activation: str = 'relu', log_softmax: bool = True, dropout: float = 0.0, use_transformer_init: bool = True, ) -> None: """ Initializes the Token Classifier module. Args: hidden_size: the size of the hidden dimension num_classes: number of classes num_layers: number of fully connected layers in the multilayer perceptron (MLP) activation: activation to usee between fully connected layers in the MLP log_softmax: whether to apply softmax to the output of the MLP dropout: dropout to apply to the input hidden states use_transformer_init: whether to initialize the weights of the classifier head with the same approach used in Transformer """ super().__init__(hidden_size=hidden_size, dropout=dropout) self.log_softmax = log_softmax if activation not in ACT2FN: raise ValueError(f'activation "{activation}" not found') self.dense = nn.Linear(hidden_size, hidden_size) self.act = ACT2FN[activation] self.norm = nn.LayerNorm(hidden_size, eps=1e-12) self.mlp = MultiLayerPerceptron( hidden_size, num_classes, num_layers=num_layers, activation=activation, log_softmax=log_softmax ) self.post_init(use_transformer_init=use_transformer_init) @typecheck() def forward(self, hidden_states): """ Performs the forward step of the module. Args: hidden_states: batch of hidden states (for example, from the BERT encoder module) [BATCH_SIZE x SEQ_LENGTH x HIDDEN_SIZE] Returns: logits value for each class [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES] """ hidden_states = self.dropout(hidden_states) hidden_states = self.dense(hidden_states) hidden_states = self.act(hidden_states) transform = self.norm(hidden_states) logits = self.mlp(transform) return logits