NeMo / nemo /collections /nlp /modules /common /token_classifier.py

thanks to NVIDIA ❤

7934b29 almost 3 years ago

6.14 kB

	# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	from dataclasses import dataclass
	from typing import Dict, Optional

	from torch import nn as nn

	from nemo.collections.common.parts import MultiLayerPerceptron
	from nemo.collections.nlp.modules.common.classifier import Classifier
	from nemo.core.classes import typecheck
	from nemo.core.neural_types import LogitsType, LogprobsType, NeuralType

	__all__ = ['BertPretrainingTokenClassifier', 'TokenClassifier']

	ACT2FN = {"gelu": nn.functional.gelu, "relu": nn.functional.relu}


	@dataclass
	class TokenClassifierConfig:
	num_layers: int = 1
	activation: str = 'relu'
	log_softmax: bool = True
	dropout: float = 0.0
	use_transformer_init: bool = True


	class TokenClassifier(Classifier):
	"""
	A module to perform token level classification tasks such as Named entity recognition.
	"""

	@property
	def output_types(self) -> Optional[Dict[str, NeuralType]]:
	"""
	Returns definitions of module output ports.
	"""
	if not self.log_softmax:
	return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())}
	else:
	return {"log_probs": NeuralType(('B', 'T', 'C'), LogprobsType())}

	def __init__(
	self,
	hidden_size: int,
	num_classes: int,
	num_layers: int = 1,
	activation: str = 'relu',
	log_softmax: bool = True,
	dropout: float = 0.0,
	use_transformer_init: bool = True,
	) -> None:

	"""
	Initializes the Token Classifier module.

	Args:
	hidden_size: the size of the hidden dimension
	num_classes: number of classes
	num_layers: number of fully connected layers in the multilayer perceptron (MLP)
	activation: activation to usee between fully connected layers in the MLP
	log_softmax: whether to apply softmax to the output of the MLP
	dropout: dropout to apply to the input hidden states
	use_transformer_init: whether to initialize the weights of the classifier head with the same approach used in Transformer
	"""
	super().__init__(hidden_size=hidden_size, dropout=dropout)
	self.log_softmax = log_softmax
	self.mlp = MultiLayerPerceptron(
	hidden_size, num_classes, num_layers=num_layers, activation=activation, log_softmax=log_softmax
	)
	self.post_init(use_transformer_init=use_transformer_init)

	@typecheck()
	def forward(self, hidden_states):
	"""
	Performs the forward step of the module.
	Args:
	hidden_states: batch of hidden states (for example, from the BERT encoder module)
	[BATCH_SIZE x SEQ_LENGTH x HIDDEN_SIZE]
	Returns: logits value for each class [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES]
	"""
	hidden_states = self.dropout(hidden_states)
	logits = self.mlp(hidden_states)
	return logits


	class BertPretrainingTokenClassifier(Classifier):
	"""
	A module to perform token level classification tasks for Bert pretraining.
	"""

	@property
	def output_types(self) -> Optional[Dict[str, NeuralType]]:
	"""
	Returns definitions of module output ports.
	"""
	if not self.log_softmax:
	return {"logits": NeuralType(('B', 'T', 'C'), LogitsType())}
	else:
	return {"log_probs": NeuralType(('B', 'T', 'C'), LogprobsType())}

	def __init__(
	self,
	hidden_size: int,
	num_classes: int,
	num_layers: int = 1,
	activation: str = 'relu',
	log_softmax: bool = True,
	dropout: float = 0.0,
	use_transformer_init: bool = True,
	) -> None:

	"""
	Initializes the Token Classifier module.

	Args:
	hidden_size: the size of the hidden dimension
	num_classes: number of classes
	num_layers: number of fully connected layers in the multilayer perceptron (MLP)
	activation: activation to usee between fully connected layers in the MLP
	log_softmax: whether to apply softmax to the output of the MLP
	dropout: dropout to apply to the input hidden states
	use_transformer_init: whether to initialize the weights of the classifier head with the same approach used in Transformer
	"""
	super().__init__(hidden_size=hidden_size, dropout=dropout)

	self.log_softmax = log_softmax

	if activation not in ACT2FN:
	raise ValueError(f'activation "{activation}" not found')
	self.dense = nn.Linear(hidden_size, hidden_size)
	self.act = ACT2FN[activation]
	self.norm = nn.LayerNorm(hidden_size, eps=1e-12)
	self.mlp = MultiLayerPerceptron(
	hidden_size, num_classes, num_layers=num_layers, activation=activation, log_softmax=log_softmax
	)
	self.post_init(use_transformer_init=use_transformer_init)

	@typecheck()
	def forward(self, hidden_states):
	"""
	Performs the forward step of the module.
	Args:
	hidden_states: batch of hidden states (for example, from the BERT encoder module)
	[BATCH_SIZE x SEQ_LENGTH x HIDDEN_SIZE]
	Returns: logits value for each class [BATCH_SIZE x SEQ_LENGTH x NUM_CLASSES]
	"""
	hidden_states = self.dropout(hidden_states)
	hidden_states = self.dense(hidden_states)
	hidden_states = self.act(hidden_states)
	transform = self.norm(hidden_states)
	logits = self.mlp(transform)
	return logits