| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import os |
| import tempfile |
|
|
| import onnx |
| import pytest |
| import torch.cuda |
| from omegaconf import DictConfig, ListConfig, OmegaConf |
|
|
| from nemo.collections.asr.models import ( |
| EncDecClassificationModel, |
| EncDecCTCModel, |
| EncDecRNNTModel, |
| EncDecSpeakerLabelModel, |
| ) |
| from nemo.collections.asr.parts.utils import asr_module_utils |
| from nemo.collections.common.parts.adapter_modules import LinearAdapterConfig |
| from nemo.core.utils import numba_utils |
| from nemo.core.utils.numba_utils import __NUMBA_MINIMUM_VERSION__ |
|
|
| NUMBA_RNNT_LOSS_AVAILABLE = numba_utils.numba_cuda_is_supported(__NUMBA_MINIMUM_VERSION__) |
|
|
|
|
| class TestExportable: |
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_EncDecCTCModel_export_to_onnx(self): |
| model_config = DictConfig( |
| { |
| 'preprocessor': DictConfig(self.preprocessor), |
| 'encoder': DictConfig(self.encoder_dict), |
| 'decoder': DictConfig(self.decoder_dict), |
| } |
| ) |
| model = EncDecCTCModel(cfg=model_config).cuda() |
| with tempfile.TemporaryDirectory() as tmpdir: |
| filename = os.path.join(tmpdir, 'qn.onnx') |
| model.export( |
| output=filename, check_trace=True, |
| ) |
| onnx_model = onnx.load(filename) |
| onnx.checker.check_model(onnx_model, full_check=True) |
| assert onnx_model.graph.input[0].name == 'audio_signal' |
| assert onnx_model.graph.output[0].name == 'logprobs' |
|
|
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_EncDecClassificationModel_export_to_onnx(self, speech_classification_model): |
| model = speech_classification_model.cuda() |
| with tempfile.TemporaryDirectory() as tmpdir: |
| filename = os.path.join(tmpdir, 'edc.onnx') |
| model.export( |
| output=filename, check_trace=True, |
| ) |
| onnx_model = onnx.load(filename) |
| onnx.checker.check_model(onnx_model, full_check=True) |
| assert onnx_model.graph.input[0].name == 'audio_signal' |
| assert onnx_model.graph.output[0].name == 'logits' |
|
|
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_EncDecSpeakerLabelModel_export_to_onnx(self, speaker_label_model): |
| model = speaker_label_model.cuda() |
| with tempfile.TemporaryDirectory() as tmpdir: |
| filename = os.path.join(tmpdir, 'sl.onnx') |
| model.export(output=filename) |
| onnx_model = onnx.load(filename) |
| onnx.checker.check_model(onnx_model, full_check=True) |
| assert onnx_model.graph.input[0].name == 'audio_signal' |
| assert onnx_model.graph.output[0].name == 'logits' |
|
|
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_EncDecCitrinetModel_export_to_onnx(self, citrinet_model): |
| model = citrinet_model.cuda() |
| with tempfile.TemporaryDirectory() as tmpdir: |
| filename = os.path.join(tmpdir, 'citri.onnx') |
| model.export(output=filename) |
| onnx_model = onnx.load(filename) |
| onnx.checker.check_model(onnx_model, full_check=True) |
| assert onnx_model.graph.input[0].name == 'audio_signal' |
| assert onnx_model.graph.input[1].name == 'length' |
| assert onnx_model.graph.output[0].name == 'logprobs' |
|
|
| @pytest.mark.pleasefixme |
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_ConformerModel_export_to_onnx(self, conformer_model): |
| model = conformer_model.cuda() |
| with tempfile.TemporaryDirectory() as tmpdir, torch.cuda.amp.autocast(): |
| filename = os.path.join(tmpdir, 'conf.onnx') |
| device = next(model.parameters()).device |
| input_example = torch.randn(4, model.encoder._feat_in, 777, device=device) |
| input_example_length = torch.full(size=(input_example.shape[0],), fill_value=777, device=device) |
| model.export( |
| output=filename, input_example=tuple([input_example, input_example_length]), check_trace=True, |
| ) |
|
|
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_SqueezeformerModel_export_to_onnx(self, squeezeformer_model): |
| model = squeezeformer_model.cuda() |
| with tempfile.TemporaryDirectory() as tmpdir, torch.cuda.amp.autocast(): |
| filename = os.path.join(tmpdir, 'squeeze.ts') |
| device = next(model.parameters()).device |
| input_example = torch.randn(4, model.encoder._feat_in, 777, device=device) |
| input_example_length = torch.full(size=(input_example.shape[0],), fill_value=777, device=device) |
| model.export(output=filename, input_example=tuple([input_example, input_example_length]), check_trace=True) |
|
|
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_EncDecCitrinetModel_limited_SE_export_to_onnx(self, citrinet_model): |
| model = citrinet_model.cuda() |
| asr_module_utils.change_conv_asr_se_context_window(model, context_window=24, update_config=False) |
|
|
| with tempfile.TemporaryDirectory() as tmpdir, torch.cuda.amp.autocast(): |
| filename = os.path.join(tmpdir, 'citri_se.onnx') |
| model.export( |
| output=filename, check_trace=True, |
| ) |
| onnx_model = onnx.load(filename) |
| onnx.checker.check_model(onnx_model, full_check=True) |
| assert onnx_model.graph.input[0].name == 'audio_signal' |
| assert onnx_model.graph.input[1].name == 'length' |
| assert onnx_model.graph.output[0].name == 'logprobs' |
|
|
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_EncDecRNNTModel_export_to_onnx(self, citrinet_rnnt_model): |
| model = citrinet_rnnt_model.cuda() |
|
|
| with tempfile.TemporaryDirectory() as tmpdir: |
| fn = 'citri_rnnt.onnx' |
| filename = os.path.join(tmpdir, fn) |
| files, descr = model.export(output=filename, verbose=False) |
|
|
| encoder_filename = os.path.join(tmpdir, 'encoder-' + fn) |
| assert files[0] == encoder_filename |
| assert os.path.exists(encoder_filename) |
| onnx_model = onnx.load(encoder_filename) |
| onnx.checker.check_model(onnx_model, full_check=True) |
| assert len(onnx_model.graph.input) == 2 |
| assert len(onnx_model.graph.output) == 2 |
| assert onnx_model.graph.input[0].name == 'audio_signal' |
| assert onnx_model.graph.input[1].name == 'length' |
| assert onnx_model.graph.output[0].name == 'outputs' |
| assert onnx_model.graph.output[1].name == 'encoded_lengths' |
|
|
| decoder_joint_filename = os.path.join(tmpdir, 'decoder_joint-' + fn) |
| assert files[1] == decoder_joint_filename |
| assert os.path.exists(decoder_joint_filename) |
| onnx_model = onnx.load(decoder_joint_filename) |
| onnx.checker.check_model(onnx_model, full_check=True) |
|
|
| input_examples = model.decoder.input_example() |
| assert type(input_examples[-1]) == tuple |
| num_states = len(input_examples[-1]) |
| state_name = list(model.decoder.output_types.keys())[-1] |
|
|
| |
| assert len(onnx_model.graph.input) == (1 + (len(input_examples) - 1) + num_states) |
| assert onnx_model.graph.input[0].name == 'encoder_outputs' |
| assert onnx_model.graph.input[1].name == 'targets' |
| assert onnx_model.graph.input[2].name == 'target_length' |
|
|
| if num_states > 0: |
| for idx, ip in enumerate(onnx_model.graph.input[3:]): |
| assert ip.name == "input_" + state_name + '_' + str(idx + 1) |
|
|
| assert len(onnx_model.graph.output) == (len(input_examples) - 1) + num_states |
| assert onnx_model.graph.output[0].name == 'outputs' |
| assert onnx_model.graph.output[1].name == 'prednet_lengths' |
|
|
| if num_states > 0: |
| for idx, op in enumerate(onnx_model.graph.output[2:]): |
| assert op.name == "output_" + state_name + '_' + str(idx + 1) |
|
|
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_EncDecRNNTModel_export_to_ts(self, citrinet_rnnt_model): |
| model = citrinet_rnnt_model.cuda() |
|
|
| with tempfile.TemporaryDirectory() as tmpdir: |
| fn = 'citri_rnnt.ts' |
| filename = os.path.join(tmpdir, fn) |
| |
| files, descr = model.export(output=filename, verbose=False, check_trace=True) |
|
|
| encoder_filename = os.path.join(tmpdir, 'encoder-' + fn) |
| assert files[0] == encoder_filename |
| assert os.path.exists(encoder_filename) |
|
|
| ts_encoder = torch.jit.load(encoder_filename) |
| assert ts_encoder is not None |
|
|
| arguments = ts_encoder.forward.schema.arguments[1:] |
| assert arguments[0].name == 'audio_signal' |
| assert arguments[1].name == 'length' |
|
|
| decoder_joint_filename = os.path.join(tmpdir, 'decoder_joint-' + fn) |
| assert files[1] == decoder_joint_filename |
| assert os.path.exists(decoder_joint_filename) |
|
|
| ts_decoder_joint = torch.jit.load(decoder_joint_filename) |
| assert ts_decoder_joint is not None |
|
|
| ts_decoder_joint_args = ts_decoder_joint.forward.schema.arguments[1:] |
|
|
| input_examples = model.decoder.input_example() |
| assert type(input_examples[-1]) == tuple |
| num_states = len(input_examples[-1]) |
| state_name = list(model.decoder.output_types.keys())[-1] |
|
|
| |
| assert len(ts_decoder_joint_args) == (1 + (len(input_examples) - 1) + num_states) |
| assert ts_decoder_joint_args[0].name == 'encoder_outputs' |
| assert ts_decoder_joint_args[1].name == 'targets' |
| assert ts_decoder_joint_args[2].name == 'target_length' |
|
|
| if num_states > 0: |
| for idx, ip in enumerate(ts_decoder_joint_args[3:]): |
| assert ip.name == "input_" + state_name + '_' + str(idx + 1) |
|
|
| @pytest.mark.run_only_on('GPU') |
| @pytest.mark.unit |
| def test_EncDecCTCModel_adapted_export_to_onnx(self): |
| model_config = DictConfig( |
| { |
| 'preprocessor': DictConfig(self.preprocessor), |
| 'encoder': DictConfig(self.encoder_dict), |
| 'decoder': DictConfig(self.decoder_dict), |
| } |
| ) |
|
|
| |
| model_config.encoder.cls = model_config.encoder.cls + 'Adapter' |
|
|
| |
| model = EncDecCTCModel(cfg=model_config) |
|
|
| |
| adapter_cfg = OmegaConf.structured( |
| LinearAdapterConfig(in_features=model_config.encoder.params.jasper[0].filters, dim=32) |
| ) |
| model.add_adapter('temp', cfg=adapter_cfg) |
|
|
| model = model.cuda() |
|
|
| with tempfile.TemporaryDirectory() as tmpdir: |
| filename = os.path.join(tmpdir, 'qn.onnx') |
| model.export( |
| output=filename, check_trace=True, |
| ) |
| onnx_model = onnx.load(filename) |
| onnx.checker.check_model(onnx_model, full_check=True) |
| assert onnx_model.graph.input[0].name == 'audio_signal' |
| assert onnx_model.graph.output[0].name == 'logprobs' |
|
|
| def setup_method(self): |
| self.preprocessor = { |
| 'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', |
| 'params': dict({}), |
| } |
|
|
| self.encoder_dict = { |
| 'cls': 'nemo.collections.asr.modules.ConvASREncoder', |
| 'params': { |
| 'feat_in': 64, |
| 'activation': 'relu', |
| 'conv_mask': True, |
| 'jasper': [ |
| { |
| 'filters': 1024, |
| 'repeat': 1, |
| 'kernel': [1], |
| 'stride': [1], |
| 'dilation': [1], |
| 'dropout': 0.0, |
| 'residual': False, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| } |
| ], |
| }, |
| } |
|
|
| self.decoder_dict = { |
| 'cls': 'nemo.collections.asr.modules.ConvASRDecoder', |
| 'params': { |
| 'feat_in': 1024, |
| 'num_classes': 28, |
| 'vocabulary': [ |
| ' ', |
| 'a', |
| 'b', |
| 'c', |
| 'd', |
| 'e', |
| 'f', |
| 'g', |
| 'h', |
| 'i', |
| 'j', |
| 'k', |
| 'l', |
| 'm', |
| 'n', |
| 'o', |
| 'p', |
| 'q', |
| 'r', |
| 's', |
| 't', |
| 'u', |
| 'v', |
| 'w', |
| 'x', |
| 'y', |
| 'z', |
| "'", |
| ], |
| }, |
| } |
|
|
|
|
| @pytest.fixture() |
| def speech_classification_model(): |
| preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})} |
| encoder = { |
| 'cls': 'nemo.collections.asr.modules.ConvASREncoder', |
| 'params': { |
| 'feat_in': 64, |
| 'activation': 'relu', |
| 'conv_mask': True, |
| 'jasper': [ |
| { |
| 'filters': 32, |
| 'repeat': 1, |
| 'kernel': [1], |
| 'stride': [1], |
| 'dilation': [1], |
| 'dropout': 0.0, |
| 'residual': False, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| } |
| ], |
| }, |
| } |
|
|
| decoder = { |
| 'cls': 'nemo.collections.asr.modules.ConvASRDecoderClassification', |
| 'params': {'feat_in': 32, 'num_classes': 30,}, |
| } |
|
|
| modelConfig = DictConfig( |
| { |
| 'preprocessor': DictConfig(preprocessor), |
| 'encoder': DictConfig(encoder), |
| 'decoder': DictConfig(decoder), |
| 'labels': ListConfig(["dummy_cls_{}".format(i + 1) for i in range(30)]), |
| } |
| ) |
| model = EncDecClassificationModel(cfg=modelConfig) |
| return model |
|
|
|
|
| @pytest.fixture() |
| def speaker_label_model(): |
| preprocessor = { |
| '_target_': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', |
| } |
| encoder = { |
| '_target_': 'nemo.collections.asr.modules.ConvASREncoder', |
| 'feat_in': 64, |
| 'activation': 'relu', |
| 'conv_mask': True, |
| 'jasper': [ |
| { |
| 'filters': 512, |
| 'repeat': 1, |
| 'kernel': [1], |
| 'stride': [1], |
| 'dilation': [1], |
| 'dropout': 0.0, |
| 'residual': False, |
| 'separable': False, |
| } |
| ], |
| } |
|
|
| decoder = { |
| '_target_': 'nemo.collections.asr.modules.SpeakerDecoder', |
| 'feat_in': 512, |
| 'num_classes': 2, |
| 'pool_mode': 'attention', |
| 'emb_sizes': [1024], |
| } |
|
|
| modelConfig = DictConfig( |
| {'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder)} |
| ) |
| speaker_model = EncDecSpeakerLabelModel(cfg=modelConfig) |
| return speaker_model |
|
|
|
|
| @pytest.fixture() |
| def citrinet_model(): |
| preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})} |
| encoder = { |
| 'cls': 'nemo.collections.asr.modules.ConvASREncoder', |
| 'params': { |
| 'feat_in': 80, |
| 'activation': 'relu', |
| 'conv_mask': True, |
| 'jasper': [ |
| { |
| 'filters': 512, |
| 'repeat': 1, |
| 'kernel': [5], |
| 'stride': [1], |
| 'dilation': [1], |
| 'dropout': 0.0, |
| 'residual': False, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| }, |
| { |
| 'filters': 512, |
| 'repeat': 5, |
| 'kernel': [11], |
| 'stride': [2], |
| 'dilation': [1], |
| 'dropout': 0.1, |
| 'residual': True, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| 'stride_last': True, |
| 'residual_mode': 'stride_add', |
| }, |
| { |
| 'filters': 512, |
| 'repeat': 5, |
| 'kernel': [13], |
| 'stride': [1], |
| 'dilation': [1], |
| 'dropout': 0.1, |
| 'residual': True, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| }, |
| { |
| 'filters': 640, |
| 'repeat': 1, |
| 'kernel': [41], |
| 'stride': [1], |
| 'dilation': [1], |
| 'dropout': 0.0, |
| 'residual': True, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| }, |
| ], |
| }, |
| } |
|
|
| decoder = { |
| 'cls': 'nemo.collections.asr.modules.ConvASRDecoder', |
| 'params': {'feat_in': 640, 'num_classes': 1024, 'vocabulary': list(chr(i % 28) for i in range(0, 1024))}, |
| } |
|
|
| modelConfig = DictConfig( |
| {'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder)} |
| ) |
| citri_model = EncDecCTCModel(cfg=modelConfig) |
| return citri_model |
|
|
|
|
| @pytest.fixture() |
| def citrinet_rnnt_model(): |
| labels = list(chr(i % 28) for i in range(0, 1024)) |
| model_defaults = {'enc_hidden': 640, 'pred_hidden': 256, 'joint_hidden': 320} |
|
|
| preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})} |
| encoder = { |
| '_target_': 'nemo.collections.asr.modules.ConvASREncoder', |
| 'feat_in': 80, |
| 'activation': 'relu', |
| 'conv_mask': True, |
| 'jasper': [ |
| { |
| 'filters': 512, |
| 'repeat': 1, |
| 'kernel': [5], |
| 'stride': [1], |
| 'dilation': [1], |
| 'dropout': 0.0, |
| 'residual': False, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| }, |
| { |
| 'filters': 512, |
| 'repeat': 5, |
| 'kernel': [11], |
| 'stride': [2], |
| 'dilation': [1], |
| 'dropout': 0.1, |
| 'residual': True, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| 'stride_last': True, |
| 'residual_mode': 'stride_add', |
| }, |
| { |
| 'filters': 512, |
| 'repeat': 5, |
| 'kernel': [13], |
| 'stride': [1], |
| 'dilation': [1], |
| 'dropout': 0.1, |
| 'residual': True, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| }, |
| { |
| 'filters': 640, |
| 'repeat': 1, |
| 'kernel': [41], |
| 'stride': [1], |
| 'dilation': [1], |
| 'dropout': 0.0, |
| 'residual': True, |
| 'separable': True, |
| 'se': True, |
| 'se_context_size': -1, |
| }, |
| ], |
| } |
|
|
| decoder = { |
| '_target_': 'nemo.collections.asr.modules.RNNTDecoder', |
| 'prednet': {'pred_hidden': 256, 'pred_rnn_layers': 1, 'dropout': 0.0}, |
| } |
|
|
| joint = { |
| '_target_': 'nemo.collections.asr.modules.RNNTJoint', |
| 'fuse_loss_wer': False, |
| 'jointnet': {'joint_hidden': 320, 'activation': 'relu', 'dropout': 0.0}, |
| } |
|
|
| decoding = {'strategy': 'greedy_batch', 'greedy': {'max_symbols': 5}} |
|
|
| modelConfig = DictConfig( |
| { |
| 'preprocessor': DictConfig(preprocessor), |
| 'labels': labels, |
| 'model_defaults': DictConfig(model_defaults), |
| 'encoder': DictConfig(encoder), |
| 'decoder': DictConfig(decoder), |
| 'joint': DictConfig(joint), |
| 'decoding': DictConfig(decoding), |
| } |
| ) |
| citri_model = EncDecRNNTModel(cfg=modelConfig) |
| return citri_model |
|
|
|
|
| @pytest.fixture() |
| def conformer_model(): |
| preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})} |
| encoder = { |
| 'cls': 'nemo.collections.asr.modules.ConformerEncoder', |
| 'params': { |
| 'feat_in': 80, |
| 'feat_out': -1, |
| 'n_layers': 2, |
| 'd_model': 256, |
| 'subsampling': 'striding', |
| 'subsampling_factor': 4, |
| 'subsampling_conv_channels': 512, |
| 'reduction': None, |
| 'reduction_position': None, |
| 'reduction_factor': 1, |
| 'ff_expansion_factor': 4, |
| 'self_attention_model': 'rel_pos', |
| 'n_heads': 8, |
| 'att_context_size': [-1, -1], |
| 'xscaling': True, |
| 'untie_biases': True, |
| 'pos_emb_max_len': 500, |
| 'conv_kernel_size': 31, |
| 'dropout': 0.1, |
| 'dropout_pre_encoder': 0.1, |
| 'dropout_emb': 0.0, |
| 'dropout_att': 0.1, |
| }, |
| } |
|
|
| decoder = { |
| 'cls': 'nemo.collections.asr.modules.ConvASRDecoder', |
| 'params': {'feat_in': 256, 'num_classes': 1024, 'vocabulary': list(chr(i % 28) for i in range(0, 1024))}, |
| } |
|
|
| modelConfig = DictConfig( |
| {'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder)} |
| ) |
| conformer_model = EncDecCTCModel(cfg=modelConfig) |
| return conformer_model |
|
|
|
|
| @pytest.fixture() |
| def squeezeformer_model(): |
| preprocessor = {'cls': 'nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor', 'params': dict({})} |
| encoder = { |
| 'cls': 'nemo.collections.asr.modules.SqueezeformerEncoder', |
| 'params': { |
| 'feat_in': 80, |
| 'feat_out': -1, |
| 'n_layers': 2, |
| 'adaptive_scale': True, |
| 'time_reduce_idx': 1, |
| 'time_recovery_idx': None, |
| 'd_model': 256, |
| 'subsampling': 'dw_striding', |
| 'subsampling_factor': 4, |
| 'subsampling_conv_channels': 512, |
| 'ff_expansion_factor': 4, |
| 'self_attention_model': 'rel_pos', |
| 'n_heads': 8, |
| 'att_context_size': [-1, -1], |
| 'xscaling': True, |
| 'untie_biases': True, |
| 'pos_emb_max_len': 500, |
| 'conv_kernel_size': 31, |
| 'dropout': 0.1, |
| 'dropout_emb': 0.0, |
| 'dropout_att': 0.1, |
| }, |
| } |
|
|
| decoder = { |
| 'cls': 'nemo.collections.asr.modules.ConvASRDecoder', |
| 'params': {'feat_in': 256, 'num_classes': 1024, 'vocabulary': list(chr(i % 28) for i in range(0, 1024))}, |
| } |
|
|
| modelConfig = DictConfig( |
| {'preprocessor': DictConfig(preprocessor), 'encoder': DictConfig(encoder), 'decoder': DictConfig(decoder)} |
| ) |
| conformer_model = EncDecCTCModel(cfg=modelConfig) |
| return conformer_model |
|
|