NeMo / nemo /collections /nlp /modules /common /token_classifier.py
camenduru's picture
thanks to NVIDIA ❤
7934b29
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass
from typing import Dict, Optional
from torch import nn as nn
from nemo.collections.common.parts import MultiLayerPerceptron
from nemo.collections.nlp.modules.common.classifier import Classifier
from nemo.core.classes import typecheck
from nemo.core.neural_types import LogitsType, LogprobsType, NeuralType
__all__ = ['BertPretrainingTokenClassifier', 'TokenClassifier']
ACT2FN = {"gelu": nn.functional.gelu, "relu": nn.functional.relu}
@dataclass
class TokenClassifierConfig:
num_layers: int = 1
activation: str = 'relu'
log_softmax: bool = True
dropout: float = 0.0
use_transformer_init: bool = True
class TokenClassifier(Classifier):
"""
A module to perform token level classification tasks such as Named entity recognition.
"""
@property
def output_types(self) -> Optional[Dict[str, NeuralType]]:
"""
Returns definitions of module output ports.
"""
if not self.log_softmax:
return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())}
else:
return {"log_probs": NeuralType(('B', 'T', 'C'), LogprobsType())}
def __init__(
self,
hidden_size: int,
num_classes: int,
num_layers: int = 1,
activation: str = 'relu',
log_softmax: bool = True,
dropout: float = 0.0,
use_transformer_init: bool = True,
) -> None:
"""
Initializes the Token Classifier module.
Args:
hidden_size: the size of the hidden dimension
num_classes: number of classes
num_layers: number of fully connected layers in the multilayer perceptron (MLP)
activation: activation to usee between fully connected layers in the MLP
log_softmax: whether to apply softmax to the output of the MLP
dropout: dropout to apply to the input hidden states
use_transformer_init: whether to initialize the weights of the classifier head with the same approach used in Transformer
"""
super().__init__(hidden_size=hidden_size, dropout=dropout)
self.log_softmax = log_softmax
self.mlp = MultiLayerPerceptron(
hidden_size, num_classes, num_layers=num_layers, activation=activation, log_softmax=log_softmax
)
self.post_init(use_transformer_init=use_transformer_init)
@typecheck()
def forward(self, hidden_states):
"""
Performs the forward step of the module.
Args:
hidden_states: batch of hidden states (for example, from the BERT encoder module)
[BATCH_SIZE x SEQ_LENGTH x HIDDEN_SIZE]
Returns: logits value for each class [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES]
"""
hidden_states = self.dropout(hidden_states)
logits = self.mlp(hidden_states)
return logits
class BertPretrainingTokenClassifier(Classifier):
"""
A module to perform token level classification tasks for Bert pretraining.
"""
@property
def output_types(self) -> Optional[Dict[str, NeuralType]]:
"""
Returns definitions of module output ports.
"""
if not self.log_softmax:
return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())}
else:
return {"log_probs": NeuralType(('B', 'T', 'C'), LogprobsType())}
def __init__(
self,
hidden_size: int,
num_classes: int,
num_layers: int = 1,
activation: str = 'relu',
log_softmax: bool = True,
dropout: float = 0.0,
use_transformer_init: bool = True,
) -> None:
"""
Initializes the Token Classifier module.
Args:
hidden_size: the size of the hidden dimension
num_classes: number of classes
num_layers: number of fully connected layers in the multilayer perceptron (MLP)
activation: activation to usee between fully connected layers in the MLP
log_softmax: whether to apply softmax to the output of the MLP
dropout: dropout to apply to the input hidden states
use_transformer_init: whether to initialize the weights of the classifier head with the same approach used in Transformer
"""
super().__init__(hidden_size=hidden_size, dropout=dropout)
self.log_softmax = log_softmax
if activation not in ACT2FN:
raise ValueError(f'activation "{activation}" not found')
self.dense = nn.Linear(hidden_size, hidden_size)
self.act = ACT2FN[activation]
self.norm = nn.LayerNorm(hidden_size, eps=1e-12)
self.mlp = MultiLayerPerceptron(
hidden_size, num_classes, num_layers=num_layers, activation=activation, log_softmax=log_softmax
)
self.post_init(use_transformer_init=use_transformer_init)
@typecheck()
def forward(self, hidden_states):
"""
Performs the forward step of the module.
Args:
hidden_states: batch of hidden states (for example, from the BERT encoder module)
[BATCH_SIZE x SEQ_LENGTH x HIDDEN_SIZE]
Returns: logits value for each class [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES]
"""
hidden_states = self.dropout(hidden_states)
hidden_states = self.dense(hidden_states)
hidden_states = self.act(hidden_states)
transform = self.norm(hidden_states)
logits = self.mlp(transform)
return logits