| | |
| | |
| | |
| | |
| |
|
| | import contextlib |
| | import logging |
| | import json |
| | import os |
| | import random |
| | import sys |
| | import tempfile |
| | import unittest |
| | from io import StringIO |
| | from typing import List, Dict |
| | import torch |
| | from fairseq import options |
| | from fairseq_cli import eval_lm, train |
| | from tests.utils import ( |
| | create_dummy_data, |
| | generate_main, |
| | preprocess_lm_data, |
| | preprocess_summarization_data, |
| | preprocess_translation_data, |
| | create_laser_data_and_config_json, |
| | train_translation_model, |
| | train_language_model, |
| | ) |
| |
|
| |
|
| | try: |
| | import transformers |
| |
|
| | has_hf_transformers = True |
| | except ImportError: |
| | has_hf_transformers = False |
| |
|
| |
|
| | class TestTranslation(unittest.TestCase): |
| | def setUp(self): |
| | logging.disable(logging.CRITICAL) |
| |
|
| | def tearDown(self): |
| | logging.disable(logging.NOTSET) |
| |
|
| | def test_fconv(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_fconv") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model(data_dir, "fconv_iwslt_de_en") |
| | generate_main(data_dir) |
| |
|
| | def test_raw(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_fconv_raw") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir, ["--dataset-impl", "raw"]) |
| | train_translation_model( |
| | data_dir, "fconv_iwslt_de_en", ["--dataset-impl", "raw"] |
| | ) |
| | generate_main(data_dir, ["--dataset-impl", "raw"]) |
| |
|
| | def test_update_freq(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_update_freq") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, "fconv_iwslt_de_en", ["--update-freq", "3"] |
| | ) |
| | generate_main(data_dir) |
| |
|
| | def test_max_positions(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_max_positions") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | with self.assertRaises(Exception) as context: |
| | train_translation_model( |
| | data_dir, |
| | "fconv_iwslt_de_en", |
| | ["--max-target-positions", "5"], |
| | ) |
| | self.assertTrue( |
| | "skip this example with --skip-invalid-size-inputs-valid-test" |
| | in str(context.exception) |
| | ) |
| | train_translation_model( |
| | data_dir, |
| | "fconv_iwslt_de_en", |
| | [ |
| | "--max-target-positions", |
| | "5", |
| | "--skip-invalid-size-inputs-valid-test", |
| | ], |
| | ) |
| | with self.assertRaises(Exception) as context: |
| | generate_main(data_dir) |
| | generate_main(data_dir, ["--skip-invalid-size-inputs-valid-test"]) |
| |
|
| | def test_generation(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_sampling") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model(data_dir, "fconv_iwslt_de_en") |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--sampling", |
| | "--temperature", |
| | "2", |
| | "--beam", |
| | "2", |
| | "--nbest", |
| | "2", |
| | ], |
| | ) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--sampling", |
| | "--sampling-topk", |
| | "3", |
| | "--beam", |
| | "2", |
| | "--nbest", |
| | "2", |
| | ], |
| | ) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--sampling", |
| | "--sampling-topp", |
| | "0.2", |
| | "--beam", |
| | "2", |
| | "--nbest", |
| | "2", |
| | ], |
| | ) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--diversity-rate", |
| | "0.5", |
| | "--beam", |
| | "6", |
| | ], |
| | ) |
| | with self.assertRaises(ValueError): |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--diverse-beam-groups", |
| | "4", |
| | "--match-source-len", |
| | ], |
| | ) |
| | generate_main(data_dir, ["--prefix-size", "2"]) |
| | generate_main(data_dir, ["--retain-dropout"]) |
| |
|
| | def test_eval_bleu(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_eval_bleu") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "fconv_iwslt_de_en", |
| | [ |
| | "--eval-bleu", |
| | "--eval-bleu-print-samples", |
| | "--eval-bleu-remove-bpe", |
| | "--eval-bleu-detok", |
| | "space", |
| | "--eval-bleu-args", |
| | '{"beam": 4, "min_len": 10}', |
| | ], |
| | ) |
| |
|
| | def test_lstm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_lstm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "lstm_wiseman_iwslt_de_en", |
| | [ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--decoder-out-embed-dim", |
| | "8", |
| | ], |
| | ) |
| | generate_main(data_dir) |
| |
|
| | def test_lstm_bidirectional(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_lstm_bidirectional") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "lstm", |
| | [ |
| | "--encoder-layers", |
| | "2", |
| | "--encoder-bidirectional", |
| | "--encoder-hidden-size", |
| | "16", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--decoder-out-embed-dim", |
| | "8", |
| | "--decoder-layers", |
| | "2", |
| | ], |
| | ) |
| | generate_main(data_dir) |
| |
|
| | def test_transformer(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_transformer") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "transformer_iwslt_de_en", |
| | [ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | ], |
| | run_validation=True, |
| | ) |
| | generate_main(data_dir) |
| |
|
| | def test_multilingual_transformer(self): |
| | |
| | encoder_langtok_flags = [ |
| | [], |
| | ["--encoder-langtok", "src"], |
| | ["--encoder-langtok", "tgt"], |
| | ] |
| | decoder_langtok_flags = [[], ["--decoder-langtok"]] |
| | with contextlib.redirect_stdout(StringIO()): |
| | for i in range(len(encoder_langtok_flags)): |
| | for j in range(len(decoder_langtok_flags)): |
| | enc_ltok_flag = encoder_langtok_flags[i] |
| | dec_ltok_flag = decoder_langtok_flags[j] |
| | with tempfile.TemporaryDirectory( |
| | f"test_multilingual_transformer_{i}_{j}" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | arch="multilingual_transformer", |
| | task="multilingual_translation", |
| | extra_flags=[ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | lang_flags=["--lang-pairs", "in-out,out-in"], |
| | run_validation=True, |
| | extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
| | ) |
| | generate_main( |
| | data_dir, |
| | extra_flags=[ |
| | "--task", |
| | "multilingual_translation", |
| | "--lang-pairs", |
| | "in-out,out-in", |
| | "--source-lang", |
| | "in", |
| | "--target-lang", |
| | "out", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | ) |
| |
|
| | @unittest.skipIf( |
| | sys.platform.lower() == "darwin", "skip latent depth test on MacOS" |
| | ) |
| | def test_multilingual_translation_latent_depth(self): |
| | |
| | encoder_latent_layer = [[], ["--encoder-latent-layer"]] |
| | decoder_latent_layer = [[], ["--decoder-latent-layer"]] |
| | with contextlib.redirect_stdout(StringIO()): |
| | for i in range(len(encoder_latent_layer)): |
| | for j in range(len(decoder_latent_layer)): |
| | if i == 0 and j == 0: |
| | continue |
| | enc_ll_flag = encoder_latent_layer[i] |
| | dec_ll_flag = decoder_latent_layer[j] |
| | with tempfile.TemporaryDirectory( |
| | f"test_multilingual_translation_latent_depth_{i}_{j}" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data( |
| | data_dir, extra_flags=["--joined-dictionary"] |
| | ) |
| | train_translation_model( |
| | data_dir, |
| | arch="latent_multilingual_transformer", |
| | task="multilingual_translation_latent_depth", |
| | extra_flags=[ |
| | "--user-dir", |
| | "examples/latent_depth/latent_depth_src", |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--share-encoders", |
| | "--share-decoders", |
| | "--sparsity-weight", |
| | "0.1", |
| | ] |
| | + enc_ll_flag |
| | + dec_ll_flag, |
| | lang_flags=["--lang-pairs", "in-out,out-in"], |
| | run_validation=True, |
| | extra_valid_flags=[ |
| | "--user-dir", |
| | "examples/latent_depth/latent_depth_src", |
| | ] |
| | + enc_ll_flag |
| | + dec_ll_flag, |
| | ) |
| | generate_main( |
| | data_dir, |
| | extra_flags=[ |
| | "--user-dir", |
| | "examples/latent_depth/latent_depth_src", |
| | "--task", |
| | "multilingual_translation_latent_depth", |
| | "--lang-pairs", |
| | "in-out,out-in", |
| | "--source-lang", |
| | "in", |
| | "--target-lang", |
| | "out", |
| | ] |
| | + enc_ll_flag |
| | + dec_ll_flag, |
| | ) |
| |
|
| | def test_translation_multi_simple_epoch(self): |
| | |
| | encoder_langtok_flags = [ |
| | [], |
| | ["--encoder-langtok", "src"], |
| | ["--encoder-langtok", "tgt"], |
| | ] |
| | decoder_langtok_flags = [[], ["--decoder-langtok"]] |
| | with contextlib.redirect_stdout(StringIO()): |
| | for i in range(len(encoder_langtok_flags)): |
| | for j in range(len(decoder_langtok_flags)): |
| | enc_ltok_flag = encoder_langtok_flags[i] |
| | dec_ltok_flag = decoder_langtok_flags[j] |
| | with tempfile.TemporaryDirectory( |
| | f"test_translation_multi_simple_epoch_{i}_{j}" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data( |
| | data_dir, extra_flags=["--joined-dictionary"] |
| | ) |
| | train_translation_model( |
| | data_dir, |
| | arch="transformer", |
| | task="translation_multi_simple_epoch", |
| | extra_flags=[ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--sampling-method", |
| | "temperature", |
| | "--sampling-temperature", |
| | "1.5", |
| | "--virtual-epoch-size", |
| | "1000", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | lang_flags=["--lang-pairs", "in-out,out-in"], |
| | run_validation=True, |
| | extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
| | ) |
| | generate_main( |
| | data_dir, |
| | extra_flags=[ |
| | "--task", |
| | "translation_multi_simple_epoch", |
| | "--lang-pairs", |
| | "in-out,out-in", |
| | "--source-lang", |
| | "in", |
| | "--target-lang", |
| | "out", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | ) |
| |
|
| | def test_translation_multi_simple_epoch_no_vepoch(self): |
| | |
| | with contextlib.redirect_stdout(StringIO()): |
| | enc_ltok_flag = ["--encoder-langtok", "src"] |
| | dec_ltok_flag = ["--decoder-langtok"] |
| | with tempfile.TemporaryDirectory( |
| | "test_translation_multi_simple_epoch_dict" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir, extra_flags=[]) |
| | train_translation_model( |
| | data_dir, |
| | arch="transformer", |
| | task="translation_multi_simple_epoch", |
| | extra_flags=[ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--sampling-method", |
| | "temperature", |
| | "--sampling-temperature", |
| | "1.5", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | lang_flags=["--lang-pairs", "in-out"], |
| | run_validation=True, |
| | extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
| | ) |
| | generate_main( |
| | data_dir, |
| | extra_flags=[ |
| | "--task", |
| | "translation_multi_simple_epoch", |
| | "--lang-pairs", |
| | "in-out", |
| | "--source-lang", |
| | "in", |
| | "--target-lang", |
| | "out", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | ) |
| |
|
| | def test_translation_multi_simple_epoch_dicts(self): |
| | |
| | with contextlib.redirect_stdout(StringIO()): |
| | enc_ltok_flag = ["--encoder-langtok", "src"] |
| | dec_ltok_flag = ["--decoder-langtok"] |
| | with tempfile.TemporaryDirectory( |
| | "test_translation_multi_simple_epoch_dict" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir, extra_flags=[]) |
| | train_translation_model( |
| | data_dir, |
| | arch="transformer", |
| | task="translation_multi_simple_epoch", |
| | extra_flags=[ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--sampling-method", |
| | "temperature", |
| | "--sampling-temperature", |
| | "1.5", |
| | "--virtual-epoch-size", |
| | "1000", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | lang_flags=["--lang-pairs", "in-out"], |
| | run_validation=True, |
| | extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
| | ) |
| | generate_main( |
| | data_dir, |
| | extra_flags=[ |
| | "--task", |
| | "translation_multi_simple_epoch", |
| | "--lang-pairs", |
| | "in-out", |
| | "--source-lang", |
| | "in", |
| | "--target-lang", |
| | "out", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | ) |
| |
|
| | def test_translation_multi_simple_epoch_src_tgt_dict_spec(self): |
| | |
| | with contextlib.redirect_stdout(StringIO()): |
| | enc_ltok_flag = ["--encoder-langtok", "src"] |
| | dec_ltok_flag = ["--decoder-langtok"] |
| | with tempfile.TemporaryDirectory( |
| | "test_translation_multi_simple_epoch_dict" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir, extra_flags=[]) |
| | train_translation_model( |
| | data_dir, |
| | arch="transformer", |
| | task="translation_multi_simple_epoch", |
| | extra_flags=[ |
| | "--source-dict", |
| | f"{data_dir}/dict.in.txt", |
| | "--target-dict", |
| | f"{data_dir}/dict.out.txt", |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--sampling-method", |
| | "temperature", |
| | "--sampling-temperature", |
| | "1.5", |
| | "--virtual-epoch-size", |
| | "1000", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | lang_flags=["--lang-pairs", "in-out"], |
| | run_validation=True, |
| | extra_valid_flags=enc_ltok_flag + dec_ltok_flag, |
| | ) |
| | generate_main( |
| | data_dir, |
| | extra_flags=[ |
| | "--task", |
| | "translation_multi_simple_epoch", |
| | "--lang-pairs", |
| | "in-out", |
| | "--source-lang", |
| | "in", |
| | "--target-lang", |
| | "out", |
| | ] |
| | + enc_ltok_flag |
| | + dec_ltok_flag, |
| | ) |
| |
|
| | def test_transformer_cross_self_attention(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory( |
| | "test_transformer_cross_self_attention" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "transformer_iwslt_de_en", |
| | [ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--no-cross-attention", |
| | "--cross-self-attention", |
| | ], |
| | run_validation=True, |
| | ) |
| | generate_main(data_dir, extra_flags=[]) |
| |
|
| | def test_transformer_pointer_generator(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory( |
| | "test_transformer_pointer_generator" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_summarization_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "transformer_pointer_generator", |
| | extra_flags=[ |
| | "--user-dir", |
| | "examples/pointer_generator/pointer_generator_src", |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--alignment-layer", |
| | "-1", |
| | "--alignment-heads", |
| | "1", |
| | "--source-position-markers", |
| | "0", |
| | ], |
| | run_validation=True, |
| | extra_valid_flags=[ |
| | "--user-dir", |
| | "examples/pointer_generator/pointer_generator_src", |
| | ], |
| | ) |
| | generate_main( |
| | data_dir, |
| | extra_flags=[ |
| | "--user-dir", |
| | "examples/pointer_generator/pointer_generator_src", |
| | ], |
| | ) |
| |
|
| | def test_lightconv(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_lightconv") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "lightconv_iwslt_de_en", |
| | [ |
| | "--encoder-conv-type", |
| | "lightweight", |
| | "--decoder-conv-type", |
| | "lightweight", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | ], |
| | ) |
| | generate_main(data_dir) |
| |
|
| | def test_dynamicconv(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_dynamicconv") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "lightconv_iwslt_de_en", |
| | [ |
| | "--encoder-conv-type", |
| | "dynamic", |
| | "--decoder-conv-type", |
| | "dynamic", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | ], |
| | ) |
| | generate_main(data_dir) |
| |
|
| | def test_cmlm_transformer(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_cmlm_transformer") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir, ["--joined-dictionary"]) |
| | train_translation_model( |
| | data_dir, |
| | "cmlm_transformer", |
| | [ |
| | "--apply-bert-init", |
| | "--criterion", |
| | "nat_loss", |
| | "--noise", |
| | "full_mask", |
| | "--pred-length-offset", |
| | "--length-loss-factor", |
| | "0.1", |
| | ], |
| | task="translation_lev", |
| | ) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "translation_lev", |
| | "--iter-decode-max-iter", |
| | "9", |
| | "--iter-decode-eos-penalty", |
| | "0", |
| | "--print-step", |
| | ], |
| | ) |
| |
|
| | def test_nonautoregressive_transformer(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory( |
| | "test_nonautoregressive_transformer" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir, ["--joined-dictionary"]) |
| | train_translation_model( |
| | data_dir, |
| | "nonautoregressive_transformer", |
| | [ |
| | "--apply-bert-init", |
| | "--src-embedding-copy", |
| | "--criterion", |
| | "nat_loss", |
| | "--noise", |
| | "full_mask", |
| | "--pred-length-offset", |
| | "--length-loss-factor", |
| | "0.1", |
| | ], |
| | task="translation_lev", |
| | ) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "translation_lev", |
| | "--iter-decode-max-iter", |
| | "0", |
| | "--iter-decode-eos-penalty", |
| | "0", |
| | "--print-step", |
| | ], |
| | ) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | def test_iterative_nonautoregressive_transformer(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory( |
| | "test_iterative_nonautoregressive_transformer" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir, ["--joined-dictionary"]) |
| | train_translation_model( |
| | data_dir, |
| | "iterative_nonautoregressive_transformer", |
| | [ |
| | "--apply-bert-init", |
| | "--src-embedding-copy", |
| | "--criterion", |
| | "nat_loss", |
| | "--noise", |
| | "full_mask", |
| | "--stochastic-approx", |
| | "--dae-ratio", |
| | "0.5", |
| | "--train-step", |
| | "3", |
| | ], |
| | task="translation_lev", |
| | ) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "translation_lev", |
| | "--iter-decode-max-iter", |
| | "9", |
| | "--iter-decode-eos-penalty", |
| | "0", |
| | "--print-step", |
| | ], |
| | ) |
| |
|
| | def test_insertion_transformer(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_insertion_transformer") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir, ["--joined-dictionary"]) |
| | train_translation_model( |
| | data_dir, |
| | "insertion_transformer", |
| | [ |
| | "--apply-bert-init", |
| | "--criterion", |
| | "nat_loss", |
| | "--noise", |
| | "random_mask", |
| | ], |
| | task="translation_lev", |
| | ) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "translation_lev", |
| | "--iter-decode-max-iter", |
| | "9", |
| | "--iter-decode-eos-penalty", |
| | "0", |
| | "--print-step", |
| | ], |
| | ) |
| |
|
| | def test_mixture_of_experts(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_moe") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "transformer_iwslt_de_en", |
| | [ |
| | "--task", |
| | "translation_moe", |
| | "--user-dir", |
| | "examples/translation_moe/translation_moe_src", |
| | "--method", |
| | "hMoElp", |
| | "--mean-pool-gating-network", |
| | "--num-experts", |
| | "3", |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | ], |
| | ) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "translation_moe", |
| | "--user-dir", |
| | "examples/translation_moe/translation_moe_src", |
| | "--method", |
| | "hMoElp", |
| | "--mean-pool-gating-network", |
| | "--num-experts", |
| | "3", |
| | "--gen-expert", |
| | "0", |
| | ], |
| | ) |
| |
|
| | def test_alignment(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_alignment") as data_dir: |
| | create_dummy_data(data_dir, alignment=True) |
| | preprocess_translation_data(data_dir, ["--align-suffix", "align"]) |
| | train_translation_model( |
| | data_dir, |
| | "transformer_align", |
| | [ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--load-alignments", |
| | "--alignment-layer", |
| | "1", |
| | "--criterion", |
| | "label_smoothed_cross_entropy_with_alignment", |
| | ], |
| | run_validation=True, |
| | ) |
| | generate_main(data_dir) |
| |
|
| | def test_laser_lstm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_laser_lstm") as data_dir: |
| | laser_config_file = create_laser_data_and_config_json(data_dir) |
| | train_translation_model( |
| | laser_config_file.name, |
| | "laser_lstm", |
| | [ |
| | "--user-dir", |
| | "examples/laser/laser_src", |
| | "--weighting-alpha", |
| | "0.3", |
| | "--encoder-bidirectional", |
| | "--encoder-hidden-size", |
| | "512", |
| | "--encoder-layers", |
| | "5", |
| | "--decoder-layers", |
| | "1", |
| | "--encoder-embed-dim", |
| | "320", |
| | "--decoder-embed-dim", |
| | "320", |
| | "--decoder-lang-embed-dim", |
| | "32", |
| | "--save-dir", |
| | data_dir, |
| | "--disable-validation", |
| | ], |
| | task="laser", |
| | lang_flags=[], |
| | ) |
| |
|
| | def test_laser_transformer(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_laser_transformer") as data_dir: |
| | laser_config_file = create_laser_data_and_config_json(data_dir) |
| | train_translation_model( |
| | laser_config_file.name, |
| | "laser_transformer", |
| | [ |
| | "--user-dir", |
| | "examples/laser/laser_src", |
| | "--weighting-alpha", |
| | "0.3", |
| | "--encoder-embed-dim", |
| | "320", |
| | "--decoder-embed-dim", |
| | "320", |
| | "--decoder-lang-embed-dim", |
| | "32", |
| | "--save-dir", |
| | data_dir, |
| | "--disable-validation", |
| | ], |
| | task="laser", |
| | lang_flags=[], |
| | ) |
| |
|
| | def test_alignment_full_context(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_alignment") as data_dir: |
| | create_dummy_data(data_dir, alignment=True) |
| | preprocess_translation_data(data_dir, ["--align-suffix", "align"]) |
| | train_translation_model( |
| | data_dir, |
| | "transformer_align", |
| | [ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--load-alignments", |
| | "--alignment-layer", |
| | "1", |
| | "--criterion", |
| | "label_smoothed_cross_entropy_with_alignment", |
| | "--full-context-alignment", |
| | ], |
| | run_validation=True, |
| | ) |
| | generate_main(data_dir) |
| |
|
| | def test_transformer_layerdrop(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_transformer_layerdrop") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | train_translation_model( |
| | data_dir, |
| | "transformer_iwslt_de_en", |
| | [ |
| | "--encoder-layers", |
| | "3", |
| | "--decoder-layers", |
| | "3", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--encoder-layerdrop", |
| | "0.01", |
| | "--decoder-layerdrop", |
| | "0.01", |
| | ], |
| | ) |
| | generate_main(data_dir) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--model-overrides", |
| | "{'encoder_layers_to_keep':'0,2','decoder_layers_to_keep':'1'}", |
| | ], |
| | ) |
| |
|
| |
|
| | class TestStories(unittest.TestCase): |
| | def setUp(self): |
| | logging.disable(logging.CRITICAL) |
| |
|
| | def tearDown(self): |
| | logging.disable(logging.NOTSET) |
| |
|
| | def test_fconv_self_att_wp(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_fconv_self_att_wp") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_translation_data(data_dir) |
| | config = [ |
| | "--encoder-layers", |
| | "[(128, 3)] * 2", |
| | "--decoder-layers", |
| | "[(128, 3)] * 2", |
| | "--decoder-attention", |
| | "True", |
| | "--encoder-attention", |
| | "False", |
| | "--gated-attention", |
| | "True", |
| | "--self-attention", |
| | "True", |
| | "--project-input", |
| | "True", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--decoder-out-embed-dim", |
| | "8", |
| | "--multihead-self-attention-nheads", |
| | "2", |
| | ] |
| | train_translation_model(data_dir, "fconv_self_att_wp", config) |
| | generate_main(data_dir) |
| |
|
| | |
| | os.rename( |
| | os.path.join(data_dir, "checkpoint_last.pt"), |
| | os.path.join(data_dir, "pretrained.pt"), |
| | ) |
| | config.extend( |
| | [ |
| | "--pretrained", |
| | "True", |
| | "--pretrained-checkpoint", |
| | os.path.join(data_dir, "pretrained.pt"), |
| | "--save-dir", |
| | os.path.join(data_dir, "fusion_model"), |
| | ] |
| | ) |
| | train_translation_model(data_dir, "fconv_self_att_wp", config) |
| |
|
| |
|
| | class TestLanguageModeling(unittest.TestCase): |
| | def setUp(self): |
| | logging.disable(logging.CRITICAL) |
| |
|
| | def tearDown(self): |
| | logging.disable(logging.NOTSET) |
| |
|
| | def test_fconv_lm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_fconv_lm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_language_model( |
| | data_dir, |
| | "fconv_lm", |
| | [ |
| | "--decoder-layers", |
| | "[(850, 3)] * 2 + [(1024,4)]", |
| | "--decoder-embed-dim", |
| | "280", |
| | "--optimizer", |
| | "nag", |
| | "--lr", |
| | "0.1", |
| | ], |
| | ) |
| | eval_lm_main(data_dir) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "language_modeling", |
| | "--sample-break-mode", |
| | "eos", |
| | "--tokens-per-sample", |
| | "500", |
| | ], |
| | ) |
| |
|
| | def test_transformer_lm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_transformer_lm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_language_model( |
| | data_dir, |
| | "transformer_lm", |
| | ["--add-bos-token", '--nval', '1'], |
| | run_validation=True, |
| | ) |
| | eval_lm_main(data_dir) |
| | eval_lm_main(data_dir, extra_flags=["--context-window", "25"]) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "language_modeling", |
| | "--sample-break-mode", |
| | "eos", |
| | "--tokens-per-sample", |
| | "500", |
| | ], |
| | ) |
| |
|
| | def test_transformer_lm_with_adaptive_softmax(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory( |
| | "test_transformer_lm_with_adaptive_softmax" |
| | ) as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_language_model( |
| | data_dir, |
| | "transformer_lm", |
| | [ |
| | "--add-bos-token", |
| | "--criterion", |
| | "adaptive_loss", |
| | "--adaptive-softmax-cutoff", |
| | "5,10,15", |
| | ], |
| | run_validation=True, |
| | ) |
| | eval_lm_main(data_dir) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "language_modeling", |
| | "--sample-break-mode", |
| | "eos", |
| | "--tokens-per-sample", |
| | "500", |
| | ], |
| | ) |
| |
|
| | def test_lightconv_lm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_lightconv_lm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_language_model( |
| | data_dir, |
| | "lightconv_lm", |
| | ["--add-bos-token"], |
| | run_validation=True, |
| | ) |
| | eval_lm_main(data_dir) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "language_modeling", |
| | "--sample-break-mode", |
| | "eos", |
| | "--tokens-per-sample", |
| | "500", |
| | ], |
| | ) |
| |
|
| | def test_lstm_lm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_lstm_lm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_language_model( |
| | data_dir, |
| | "lstm_lm", |
| | ["--add-bos-token"], |
| | run_validation=True, |
| | ) |
| | eval_lm_main(data_dir) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "language_modeling", |
| | "--sample-break-mode", |
| | "eos", |
| | "--tokens-per-sample", |
| | "500", |
| | ], |
| | ) |
| |
|
| | def test_lstm_lm_residuals(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_lstm_lm_residuals") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_language_model( |
| | data_dir, |
| | "lstm_lm", |
| | ["--add-bos-token", "--residuals"], |
| | run_validation=True, |
| | ) |
| | eval_lm_main(data_dir) |
| | generate_main( |
| | data_dir, |
| | [ |
| | "--task", |
| | "language_modeling", |
| | "--sample-break-mode", |
| | "eos", |
| | "--tokens-per-sample", |
| | "500", |
| | ], |
| | ) |
| |
|
| | @unittest.skipIf(not has_hf_transformers, "skip test if transformers is missing") |
| | def test_transformer_xl_bptt_lm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_transformer_xl_bptt_lm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | task_flags = [ |
| | "--user-dir", |
| | "examples/truncated_bptt", |
| | "--task", |
| | "truncated_bptt_lm", |
| | "--batch-size", |
| | "2", |
| | "--tokens-per-sample", |
| | "50", |
| | ] |
| | train_language_model( |
| | data_dir=data_dir, |
| | arch="transformer_xl", |
| | extra_flags=task_flags |
| | + [ |
| | "--n-layer", |
| | "2", |
| | ], |
| | task="truncated_bptt_lm", |
| | run_validation=True, |
| | extra_valid_flags=task_flags, |
| | ) |
| | eval_lm_main(data_dir, extra_flags=task_flags) |
| | |
| | train_language_model( |
| | data_dir=data_dir, |
| | arch="transformer_xl", |
| | extra_flags=task_flags |
| | + [ |
| | "--n-layer", |
| | "2", |
| | "--offload-activations", |
| | ], |
| | task="truncated_bptt_lm", |
| | run_validation=True, |
| | extra_valid_flags=task_flags, |
| | ) |
| |
|
| |
|
| | class TestMaskedLanguageModel(unittest.TestCase): |
| | def setUp(self): |
| | logging.disable(logging.CRITICAL) |
| |
|
| | def tearDown(self): |
| | logging.disable(logging.NOTSET) |
| |
|
| | def test_legacy_masked_lm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_legacy_mlm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_legacy_masked_language_model(data_dir, "masked_lm") |
| |
|
| | def test_roberta_masked_lm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_roberta_mlm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_masked_lm( |
| | data_dir, "roberta_base", extra_flags=["--encoder-layers", "2"] |
| | ) |
| |
|
| | def test_roberta_sentence_prediction(self): |
| | num_classes = 3 |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_roberta_head") as data_dir: |
| | create_dummy_roberta_head_data(data_dir, num_classes=num_classes) |
| | preprocess_lm_data(os.path.join(data_dir, "input0")) |
| | preprocess_lm_data(os.path.join(data_dir, "label")) |
| | train_roberta_head(data_dir, "roberta_base", num_classes=num_classes) |
| |
|
| | def test_roberta_regression_single(self): |
| | num_classes = 1 |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory( |
| | "test_roberta_regression_single" |
| | ) as data_dir: |
| | create_dummy_roberta_head_data( |
| | data_dir, num_classes=num_classes, regression=True |
| | ) |
| | preprocess_lm_data(os.path.join(data_dir, "input0")) |
| | train_roberta_head( |
| | data_dir, |
| | "roberta_base", |
| | num_classes=num_classes, |
| | extra_flags=["--regression-target"], |
| | ) |
| |
|
| | def test_roberta_regression_multiple(self): |
| | num_classes = 3 |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory( |
| | "test_roberta_regression_multiple" |
| | ) as data_dir: |
| | create_dummy_roberta_head_data( |
| | data_dir, num_classes=num_classes, regression=True |
| | ) |
| | preprocess_lm_data(os.path.join(data_dir, "input0")) |
| | train_roberta_head( |
| | data_dir, |
| | "roberta_base", |
| | num_classes=num_classes, |
| | extra_flags=["--regression-target"], |
| | ) |
| |
|
| | def test_linformer_roberta_masked_lm(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_linformer_roberta_mlm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_masked_lm( |
| | data_dir, |
| | "linformer_roberta_base", |
| | extra_flags=[ |
| | "--user-dir", |
| | "examples/linformer/linformer_src", |
| | "--encoder-layers", |
| | "2", |
| | ], |
| | ) |
| |
|
| | def test_linformer_roberta_sentence_prediction(self): |
| | num_classes = 3 |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_linformer_roberta_head") as data_dir: |
| | create_dummy_roberta_head_data(data_dir, num_classes=num_classes) |
| | preprocess_lm_data(os.path.join(data_dir, "input0")) |
| | preprocess_lm_data(os.path.join(data_dir, "label")) |
| | train_roberta_head( |
| | data_dir, |
| | "linformer_roberta_base", |
| | num_classes=num_classes, |
| | extra_flags=["--user-dir", "examples/linformer/linformer_src"], |
| | ) |
| |
|
| | def test_linformer_roberta_regression_single(self): |
| | num_classes = 1 |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory( |
| | "test_linformer_roberta_regression_single" |
| | ) as data_dir: |
| | create_dummy_roberta_head_data( |
| | data_dir, num_classes=num_classes, regression=True |
| | ) |
| | preprocess_lm_data(os.path.join(data_dir, "input0")) |
| | train_roberta_head( |
| | data_dir, |
| | "linformer_roberta_base", |
| | num_classes=num_classes, |
| | extra_flags=[ |
| | "--regression-target", |
| | "--user-dir", |
| | "examples/linformer/linformer_src", |
| | ], |
| | ) |
| |
|
| | def test_linformer_roberta_regression_multiple(self): |
| | num_classes = 3 |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory( |
| | "test_linformer_roberta_regression_multiple" |
| | ) as data_dir: |
| | create_dummy_roberta_head_data( |
| | data_dir, num_classes=num_classes, regression=True |
| | ) |
| | preprocess_lm_data(os.path.join(data_dir, "input0")) |
| | train_roberta_head( |
| | data_dir, |
| | "linformer_roberta_base", |
| | num_classes=num_classes, |
| | extra_flags=[ |
| | "--regression-target", |
| | "--user-dir", |
| | "examples/linformer/linformer_src", |
| | ], |
| | ) |
| |
|
| | def _test_pretrained_masked_lm_for_translation(self, learned_pos_emb, encoder_only): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_mlm") as data_dir: |
| | create_dummy_data(data_dir) |
| | preprocess_lm_data(data_dir) |
| | train_legacy_masked_language_model( |
| | data_dir, |
| | arch="masked_lm", |
| | extra_args=("--encoder-learned-pos",) if learned_pos_emb else (), |
| | ) |
| | with tempfile.TemporaryDirectory( |
| | "test_mlm_translation" |
| | ) as translation_dir: |
| | create_dummy_data(translation_dir) |
| | preprocess_translation_data( |
| | translation_dir, extra_flags=["--joined-dictionary"] |
| | ) |
| | |
| | train_translation_model( |
| | translation_dir, |
| | arch="transformer_from_pretrained_xlm", |
| | extra_flags=[ |
| | "--decoder-layers", |
| | "1", |
| | "--decoder-embed-dim", |
| | "32", |
| | "--decoder-attention-heads", |
| | "1", |
| | "--decoder-ffn-embed-dim", |
| | "32", |
| | "--encoder-layers", |
| | "1", |
| | "--encoder-embed-dim", |
| | "32", |
| | "--encoder-attention-heads", |
| | "1", |
| | "--encoder-ffn-embed-dim", |
| | "32", |
| | "--pretrained-xlm-checkpoint", |
| | "{}/checkpoint_last.pt".format(data_dir), |
| | "--activation-fn", |
| | "gelu", |
| | "--max-source-positions", |
| | "500", |
| | "--max-target-positions", |
| | "500", |
| | ] |
| | + ( |
| | ["--encoder-learned-pos", "--decoder-learned-pos"] |
| | if learned_pos_emb |
| | else [] |
| | ) |
| | + (["--init-encoder-only"] if encoder_only else []), |
| | task="translation_from_pretrained_xlm", |
| | ) |
| |
|
| | def test_pretrained_masked_lm_for_translation_learned_pos_emb(self): |
| | self._test_pretrained_masked_lm_for_translation(True, False) |
| |
|
| | def test_pretrained_masked_lm_for_translation_sinusoidal_pos_emb(self): |
| | self._test_pretrained_masked_lm_for_translation(False, False) |
| |
|
| | def test_pretrained_masked_lm_for_translation_encoder_only(self): |
| | self._test_pretrained_masked_lm_for_translation(True, True) |
| |
|
| | def test_r4f_roberta(self): |
| | num_classes = 3 |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_r4f_roberta_head") as data_dir: |
| | create_dummy_roberta_head_data(data_dir, num_classes=num_classes) |
| | preprocess_lm_data(os.path.join(data_dir, "input0")) |
| | preprocess_lm_data(os.path.join(data_dir, "label")) |
| | train_roberta_head( |
| | data_dir, |
| | "roberta_base", |
| | num_classes=num_classes, |
| | extra_flags=[ |
| | "--user-dir", |
| | "examples/rxf/rxf_src", |
| | "--criterion", |
| | "sentence_prediction_r3f", |
| | "--spectral-norm-classification-head", |
| | ], |
| | ) |
| |
|
| |
|
| | def train_legacy_masked_language_model(data_dir, arch, extra_args=()): |
| | train_parser = options.get_training_parser() |
| | |
| | train_args = options.parse_args_and_arch( |
| | train_parser, |
| | [ |
| | "--task", |
| | "cross_lingual_lm", |
| | data_dir, |
| | "--arch", |
| | arch, |
| | |
| | "--optimizer", |
| | "adam", |
| | "--lr-scheduler", |
| | "reduce_lr_on_plateau", |
| | "--lr-shrink", |
| | "0.5", |
| | "--lr", |
| | "0.0001", |
| | "--stop-min-lr", |
| | "1e-09", |
| | |
| | "--dropout", |
| | "0.1", |
| | "--attention-dropout", |
| | "0.1", |
| | |
| | "--criterion", |
| | "legacy_masked_lm_loss", |
| | "--masked-lm-only", |
| | "--monolingual-langs", |
| | "in,out", |
| | "--num-segment", |
| | "5", |
| | |
| | "--encoder-layers", |
| | "1", |
| | "--encoder-embed-dim", |
| | "32", |
| | "--encoder-attention-heads", |
| | "1", |
| | "--encoder-ffn-embed-dim", |
| | "32", |
| | |
| | "--max-tokens", |
| | "500", |
| | "--tokens-per-sample", |
| | "500", |
| | "--save-dir", |
| | data_dir, |
| | "--max-epoch", |
| | "1", |
| | "--no-progress-bar", |
| | "--distributed-world-size", |
| | "1", |
| | "--dataset-impl", |
| | "raw", |
| | "--num-workers", |
| | "0", |
| | ] |
| | + list(extra_args), |
| | ) |
| | train.main(train_args) |
| |
|
| |
|
| | class TestOptimizers(unittest.TestCase): |
| | def setUp(self): |
| | logging.disable(logging.CRITICAL) |
| |
|
| | def tearDown(self): |
| | logging.disable(logging.NOTSET) |
| |
|
| | def test_optimizers(self): |
| | with contextlib.redirect_stdout(StringIO()): |
| | with tempfile.TemporaryDirectory("test_optimizers") as data_dir: |
| | |
| | create_dummy_data(data_dir, num_examples=10, maxlen=5) |
| | preprocess_translation_data(data_dir) |
| | optimizers = ["adafactor", "adam", "nag", "adagrad", "sgd", "adadelta"] |
| | last_checkpoint = os.path.join(data_dir, "checkpoint_last.pt") |
| | for optimizer in optimizers: |
| | if os.path.exists(last_checkpoint): |
| | os.remove(last_checkpoint) |
| | train_translation_model( |
| | data_dir, |
| | "lstm", |
| | [ |
| | "--required-batch-size-multiple", |
| | "1", |
| | "--encoder-layers", |
| | "1", |
| | "--encoder-hidden-size", |
| | "32", |
| | "--decoder-layers", |
| | "1", |
| | "--optimizer", |
| | optimizer, |
| | ], |
| | ) |
| | generate_main(data_dir) |
| |
|
| |
|
| | def read_last_log_entry( |
| | logs: List[logging.LogRecord], logger_name: str |
| | ) -> Dict[str, float]: |
| | for x in reversed(logs): |
| | if x.name == logger_name: |
| | return json.loads(x.message) |
| | raise ValueError(f"No entries from {logger_name} found in captured logs") |
| |
|
| |
|
| | class TestActivationCheckpointing(unittest.TestCase): |
| | base_flags = [ |
| | "--encoder-layers", |
| | "2", |
| | "--decoder-layers", |
| | "2", |
| | "--encoder-embed-dim", |
| | "8", |
| | "--decoder-embed-dim", |
| | "8", |
| | "--restore-file", |
| | "x.pt", |
| | "--log-format", |
| | "json", |
| | "--log-interval", |
| | "1", |
| | "--max-update", |
| | "2", |
| | ] |
| |
|
| | def _train(self, data_dir, extra_flags): |
| | with self.assertLogs() as logs: |
| | train_translation_model( |
| | data_dir, |
| | "transformer_iwslt_de_en", |
| | self.base_flags + extra_flags, |
| | run_validation=True, |
| | extra_valid_flags=["--log-format", "json"], |
| | ) |
| | return logs.records |
| |
|
| | def test_activation_offloading_does_not_change_metrics(self): |
| | """Neither ----checkpoint-activations nor --offload-activations should change loss""" |
| | with tempfile.TemporaryDirectory("test_transformer_with_act_cpt") as data_dir: |
| |
|
| | with self.assertLogs(): |
| | create_dummy_data(data_dir, num_examples=20) |
| | preprocess_translation_data(data_dir) |
| | offload_logs = self._train(data_dir, ["--offload-activations"]) |
| | baseline_logs = self._train(data_dir, []) |
| |
|
| | assert len(baseline_logs) == len(offload_logs) |
| |
|
| | baseline_valid_stats = read_last_log_entry(baseline_logs, "valid") |
| | offload_valid_stats = read_last_log_entry(offload_logs, "valid") |
| | baseline_train_stats = read_last_log_entry(baseline_logs, "train") |
| | offload_train_stats = read_last_log_entry(offload_logs, "train") |
| |
|
| | assert ( |
| | baseline_train_stats["train_loss"] == offload_train_stats["train_loss"] |
| | ) |
| | assert ( |
| | baseline_valid_stats["valid_loss"] == offload_valid_stats["valid_loss"] |
| | ) |
| |
|
| | def test_activation_checkpointing_does_not_change_metrics(self): |
| | """--checkpoint-activations should not change loss""" |
| |
|
| | with tempfile.TemporaryDirectory("test_transformer_with_act_cpt") as data_dir: |
| | with self.assertLogs(): |
| | create_dummy_data(data_dir, num_examples=20) |
| | preprocess_translation_data(data_dir) |
| | ckpt_logs = self._train(data_dir, ["--checkpoint-activations"]) |
| | baseline_logs = self._train(data_dir, []) |
| | assert len(baseline_logs) == len(ckpt_logs) |
| |
|
| | baseline_train_stats = read_last_log_entry(baseline_logs, "train") |
| | ckpt_train_stats = read_last_log_entry(ckpt_logs, "train") |
| | assert baseline_train_stats["train_loss"] == ckpt_train_stats["train_loss"] |
| |
|
| | baseline_valid_stats = read_last_log_entry(baseline_logs, "valid") |
| | ckpt_valid_stats = read_last_log_entry(ckpt_logs, "valid") |
| | assert baseline_valid_stats["valid_loss"] == ckpt_valid_stats["valid_loss"] |
| |
|
| |
|
| | def create_dummy_roberta_head_data( |
| | data_dir, num_examples=100, maxlen=10, num_classes=2, regression=False |
| | ): |
| | input_dir = "input0" |
| |
|
| | def _create_dummy_data(filename): |
| | random_data = torch.rand(num_examples * maxlen) |
| | input_data = 97 + torch.floor(26 * random_data).int() |
| | if regression: |
| | output_data = torch.rand((num_examples, num_classes)) |
| | else: |
| | output_data = 1 + torch.floor(num_classes * torch.rand(num_examples)).int() |
| | with open(os.path.join(data_dir, input_dir, filename + ".out"), "w") as f_in: |
| | label_filename = filename + ".label" if regression else filename + ".out" |
| | with open(os.path.join(data_dir, "label", label_filename), "w") as f_out: |
| | offset = 0 |
| | for i in range(num_examples): |
| | |
| | ex_len = random.randint(1, maxlen) |
| | ex_str = " ".join(map(chr, input_data[offset : offset + ex_len])) |
| | print(ex_str, file=f_in) |
| | |
| | if regression: |
| | class_str = " ".join(map(str, output_data[i].numpy())) |
| | print(class_str, file=f_out) |
| | else: |
| | class_str = "class{}".format(output_data[i]) |
| | print(class_str, file=f_out) |
| | offset += ex_len |
| |
|
| | os.mkdir(os.path.join(data_dir, input_dir)) |
| | os.mkdir(os.path.join(data_dir, "label")) |
| | _create_dummy_data("train") |
| | _create_dummy_data("valid") |
| | _create_dummy_data("test") |
| |
|
| |
|
| | def train_masked_lm(data_dir, arch, extra_flags=None): |
| | train_parser = options.get_training_parser() |
| | train_args = options.parse_args_and_arch( |
| | train_parser, |
| | [ |
| | "--task", |
| | "masked_lm", |
| | data_dir, |
| | "--arch", |
| | arch, |
| | "--optimizer", |
| | "adam", |
| | "--lr", |
| | "0.0001", |
| | "--criterion", |
| | "masked_lm", |
| | "--batch-size", |
| | "500", |
| | "--save-dir", |
| | data_dir, |
| | "--max-epoch", |
| | "1", |
| | "--no-progress-bar", |
| | "--distributed-world-size", |
| | "1", |
| | "--ddp-backend", |
| | "no_c10d", |
| | "--num-workers", |
| | "0", |
| | ] |
| | + (extra_flags or []), |
| | ) |
| | train.main(train_args) |
| |
|
| |
|
| | def train_roberta_head(data_dir, arch, num_classes=2, extra_flags=None): |
| | train_parser = options.get_training_parser() |
| | train_args = options.parse_args_and_arch( |
| | train_parser, |
| | [ |
| | "--task", |
| | "sentence_prediction", |
| | data_dir, |
| | "--arch", |
| | arch, |
| | "--encoder-layers", |
| | "2", |
| | "--num-classes", |
| | str(num_classes), |
| | "--optimizer", |
| | "adam", |
| | "--lr", |
| | "0.0001", |
| | "--criterion", |
| | "sentence_prediction", |
| | "--max-tokens", |
| | "500", |
| | "--max-positions", |
| | "500", |
| | "--batch-size", |
| | "500", |
| | "--save-dir", |
| | data_dir, |
| | "--max-epoch", |
| | "1", |
| | "--no-progress-bar", |
| | "--distributed-world-size", |
| | "1", |
| | "--ddp-backend", |
| | "no_c10d", |
| | "--num-workers", |
| | "0", |
| | ] |
| | + (extra_flags or []), |
| | ) |
| | train.main(train_args) |
| |
|
| |
|
| | def eval_lm_main(data_dir, extra_flags=None): |
| | eval_lm_parser = options.get_eval_lm_parser() |
| | eval_lm_args = options.parse_args_and_arch( |
| | eval_lm_parser, |
| | [ |
| | data_dir, |
| | "--path", |
| | os.path.join(data_dir, "checkpoint_last.pt"), |
| | "--no-progress-bar", |
| | "--num-workers", |
| | "0", |
| | ] |
| | + (extra_flags or []), |
| | ) |
| | eval_lm.main(eval_lm_args) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | unittest.main() |
| |
|