odg123
/

ranjit-task-logs-analysis

Model card Files Files and versions

Metrics Training metrics Community

ranjit-task-logs-analysis / egs /librispeech /ASR /pruned2_knowledge /joiner.py

odg123's picture

Upload icefall experiment results and logs

d596074 verified 3 months ago

history blame contribute delete

2.17 kB

	# Copyright 2021 Xiaomi Corp. (authors: Fangjun Kuang)
	#
	# See ../../../../LICENSE for clarification regarding multiple authors
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import torch
	import torch.nn as nn
	from scaling import ScaledLinear


	class Joiner(nn.Module):
	def __init__(
	self,
	encoder_dim: int,
	decoder_dim: int,
	joiner_dim: int,
	vocab_size: int,
	):
	super().__init__()

	self.encoder_proj = ScaledLinear(encoder_dim, joiner_dim)
	self.decoder_proj = ScaledLinear(decoder_dim, joiner_dim)
	self.output_linear = ScaledLinear(joiner_dim, vocab_size)

	def forward(
	self,
	encoder_out: torch.Tensor,
	decoder_out: torch.Tensor,
	project_input: bool = True,
	) -> torch.Tensor:
	"""
	Args:
	encoder_out:
	Output from the encoder. Its shape is (N, T, s_range, C).
	decoder_out:
	Output from the decoder. Its shape is (N, T, s_range, C).
	project_input:
	If true, apply input projections encoder_proj and decoder_proj.
	If this is false, it is the user's responsibility to do this
	manually.
	Returns:
	Return a tensor of shape (N, T, s_range, C).
	"""
	assert encoder_out.ndim == decoder_out.ndim == 4
	assert encoder_out.shape[:-1] == decoder_out.shape[:-1]

	if project_input:
	logit = self.encoder_proj(encoder_out) + self.decoder_proj(decoder_out)
	else:
	logit = encoder_out + decoder_out

	logit = self.output_linear(torch.tanh(logit))

	return logit