| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import os |
| import tempfile |
|
|
| import pytest |
| import torch |
| from omegaconf import OmegaConf |
|
|
| import nemo.collections.nlp as nemo_nlp |
| from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer |
| from nemo.collections.nlp.modules.common.tokenizer_utils import get_tokenizer |
|
|
|
|
| def do_export(model, name: str): |
| with tempfile.TemporaryDirectory() as tmpdir: |
| |
| tmp_file_name = os.path.join(tmpdir, name + '.onnx') |
| |
| if torch.cuda.is_available(): |
| model = model.cuda() |
| model.export(tmp_file_name) |
|
|
|
|
| class TestHuggingFace: |
| config = {"language_model": {"pretrained_model_name": ""}, "tokenizer": {}} |
| omega_conf = OmegaConf.create(config) |
|
|
| @pytest.mark.unit |
| def test_list_pretrained_models(self): |
| pretrained_lm_models = nemo_nlp.modules.get_pretrained_lm_models_list() |
| assert len(pretrained_lm_models) > 0 |
|
|
| @pytest.mark.with_downloads() |
| @pytest.mark.unit |
| def test_get_pretrained_bert_model(self): |
| self.omega_conf.language_model.pretrained_model_name = 'bert-base-uncased' |
| model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf) |
| assert isinstance(model, nemo_nlp.modules.BertEncoder) |
| do_export(model, "bert-base-uncased") |
|
|
| @pytest.mark.with_downloads() |
| @pytest.mark.unit |
| def test_get_pretrained_distilbert_model(self): |
| self.omega_conf.language_model.pretrained_model_name = 'distilbert-base-uncased' |
| model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf) |
| assert isinstance(model, nemo_nlp.modules.DistilBertEncoder) |
| do_export(model, "distilbert-base-uncased") |
|
|
| @pytest.mark.with_downloads() |
| @pytest.mark.unit |
| def test_get_pretrained_roberta_model(self): |
| self.omega_conf.language_model.pretrained_model_name = 'roberta-base' |
| model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf) |
| assert isinstance(model, nemo_nlp.modules.RobertaEncoder) |
| do_export(model, "roberta-base-uncased") |
|
|
| @pytest.mark.with_downloads() |
| @pytest.mark.unit |
| def test_get_pretrained_albert_model(self): |
| self.omega_conf.language_model.pretrained_model_name = 'albert-base-v1' |
| model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf) |
| assert isinstance(model, nemo_nlp.modules.AlbertEncoder) |
| do_export(model, "albert-base-v1") |
|
|
| @pytest.mark.with_downloads() |
| @pytest.mark.unit |
| def test_get_pretrained_chinese_bert_wwm_model(self): |
| model_name = 'hfl/chinese-bert-wwm' |
| self.omega_conf.language_model.pretrained_model_name = model_name |
| model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf) |
| assert isinstance(model, nemo_nlp.modules.BertModule) |
| tokenizer = get_tokenizer(tokenizer_name=model_name) |
| assert isinstance(tokenizer, AutoTokenizer) |
|
|
| @pytest.mark.with_downloads() |
| @pytest.mark.unit |
| def test_get_pretrained_arabic_model(self): |
| model_name = 'asafaya/bert-base-arabic' |
| self.omega_conf.language_model.pretrained_model_name = model_name |
| model = nemo_nlp.modules.get_lm_model(cfg=self.omega_conf) |
| assert isinstance(model, nemo_nlp.modules.BertModule) |
| tokenizer = get_tokenizer(tokenizer_name=model_name) |
| assert isinstance(tokenizer, AutoTokenizer) |
|
|