Spaces:
Runtime error
Runtime error
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import contextlib | |
| import logging | |
| import os | |
| import tempfile | |
| import unittest | |
| from io import StringIO | |
| import torch | |
| from fairseq import options | |
| from fairseq_cli import train | |
| from tests.utils import ( | |
| create_dummy_data, | |
| generate_main, | |
| preprocess_lm_data, | |
| preprocess_translation_data, | |
| train_translation_model, | |
| ) | |
| class TestTranslationGPU(unittest.TestCase): | |
| def setUp(self): | |
| logging.disable(logging.CRITICAL) | |
| def tearDown(self): | |
| logging.disable(logging.NOTSET) | |
| def test_fp16(self): | |
| with contextlib.redirect_stdout(StringIO()): | |
| with tempfile.TemporaryDirectory("test_fp16") as data_dir: | |
| create_dummy_data(data_dir) | |
| preprocess_translation_data(data_dir) | |
| train_translation_model(data_dir, "fconv_iwslt_de_en", ["--fp16"]) | |
| generate_main(data_dir) | |
| def test_memory_efficient_fp16(self): | |
| with contextlib.redirect_stdout(StringIO()): | |
| with tempfile.TemporaryDirectory("test_memory_efficient_fp16") as data_dir: | |
| create_dummy_data(data_dir) | |
| preprocess_translation_data(data_dir) | |
| train_translation_model( | |
| data_dir, "fconv_iwslt_de_en", ["--memory-efficient-fp16"] | |
| ) | |
| generate_main(data_dir) | |
| def test_transformer_fp16(self): | |
| with contextlib.redirect_stdout(StringIO()): | |
| with tempfile.TemporaryDirectory("test_transformer") as data_dir: | |
| create_dummy_data(data_dir) | |
| preprocess_translation_data(data_dir) | |
| train_translation_model( | |
| data_dir, | |
| "transformer_iwslt_de_en", | |
| [ | |
| "--encoder-layers", | |
| "2", | |
| "--decoder-layers", | |
| "2", | |
| "--encoder-embed-dim", | |
| "64", | |
| "--decoder-embed-dim", | |
| "64", | |
| "--fp16", | |
| ], | |
| run_validation=True, | |
| ) | |
| generate_main(data_dir) | |
| def test_levenshtein_transformer(self): | |
| with contextlib.redirect_stdout(StringIO()): | |
| with tempfile.TemporaryDirectory( | |
| "test_levenshtein_transformer" | |
| ) as data_dir: | |
| create_dummy_data(data_dir) | |
| preprocess_translation_data(data_dir, ["--joined-dictionary"]) | |
| train_translation_model( | |
| data_dir, | |
| "levenshtein_transformer", | |
| [ | |
| "--apply-bert-init", | |
| "--early-exit", | |
| "6,6,6", | |
| "--criterion", | |
| "nat_loss", | |
| ], | |
| task="translation_lev", | |
| ) | |
| gen_config = [ | |
| "--task", | |
| "translation_lev", | |
| "--iter-decode-max-iter", | |
| "9", | |
| "--iter-decode-eos-penalty", | |
| "0", | |
| "--print-step", | |
| ] | |
| # non-ensemble generation | |
| generate_main(data_dir, gen_config) | |
| # ensemble generation | |
| generate_main( | |
| data_dir, | |
| gen_config, | |
| path=os.pathsep.join([ | |
| os.path.join(data_dir, "checkpoint_last.pt"), | |
| os.path.join(data_dir, "checkpoint_last.pt"), | |
| ]), | |
| ) | |
| def _quantize_language_model(data_dir, arch, extra_flags=None, run_validation=False): | |
| train_parser = options.get_training_parser() | |
| train_args = options.parse_args_and_arch( | |
| train_parser, | |
| [ | |
| "--task", | |
| "language_modeling", | |
| data_dir, | |
| "--arch", | |
| arch, | |
| "--optimizer", | |
| "adam", | |
| "--lr", | |
| "0.0001", | |
| "--criterion", | |
| "adaptive_loss", | |
| "--adaptive-softmax-cutoff", | |
| "5,10,15", | |
| "--max-tokens", | |
| "500", | |
| "--tokens-per-sample", | |
| "500", | |
| "--save-dir", | |
| data_dir, | |
| "--max-epoch", | |
| "1", | |
| "--no-progress-bar", | |
| "--distributed-world-size", | |
| "1", | |
| "--ddp-backend", | |
| "no_c10d", | |
| "--num-workers", | |
| "0", | |
| ] | |
| + (extra_flags or []), | |
| ) | |
| train.main(train_args) | |
| # try scalar quantization | |
| scalar_quant_train_parser = options.get_training_parser() | |
| scalar_quant_train_args = options.parse_args_and_arch( | |
| scalar_quant_train_parser, | |
| [ | |
| "--task", | |
| "language_modeling", | |
| data_dir, | |
| "--arch", | |
| arch, | |
| "--optimizer", | |
| "adam", | |
| "--lr", | |
| "0.0001", | |
| "--criterion", | |
| "adaptive_loss", | |
| "--adaptive-softmax-cutoff", | |
| "5,10,15", | |
| "--max-tokens", | |
| "500", | |
| "--tokens-per-sample", | |
| "500", | |
| "--save-dir", | |
| data_dir, | |
| "--max-update", | |
| "3", | |
| "--no-progress-bar", | |
| "--distributed-world-size", | |
| "1", | |
| "--ddp-backend", | |
| "no_c10d", | |
| "--num-workers", | |
| "0", | |
| "--quant-noise-scalar", | |
| "0.5", | |
| ] | |
| + (extra_flags or []), | |
| ) | |
| train.main(scalar_quant_train_args) | |
| # try iterative PQ quantization | |
| quantize_parser = options.get_training_parser() | |
| quantize_args = options.parse_args_and_arch( | |
| quantize_parser, | |
| [ | |
| "--task", | |
| "language_modeling", | |
| data_dir, | |
| "--arch", | |
| arch, | |
| "--optimizer", | |
| "adam", | |
| "--lr", | |
| "0.0001", | |
| "--criterion", | |
| "adaptive_loss", | |
| "--adaptive-softmax-cutoff", | |
| "5,10,15", | |
| "--max-tokens", | |
| "50", | |
| "--tokens-per-sample", | |
| "50", | |
| "--max-update", | |
| "6", | |
| "--no-progress-bar", | |
| "--distributed-world-size", | |
| "1", | |
| "--ddp-backend", | |
| "no_c10d", | |
| "--num-workers", | |
| "0", | |
| "--restore-file", | |
| os.path.join(data_dir, "checkpoint_last.pt"), | |
| "--reset-optimizer", | |
| "--quantization-config-path", | |
| os.path.join( | |
| os.path.dirname(__file__), "transformer_quantization_config.yaml" | |
| ), | |
| ] | |
| + (extra_flags or []), | |
| ) | |
| train.main(quantize_args) | |
| class TestQuantization(unittest.TestCase): | |
| def setUp(self): | |
| logging.disable(logging.CRITICAL) | |
| def tearDown(self): | |
| logging.disable(logging.NOTSET) | |
| def test_quantization(self): | |
| with contextlib.redirect_stdout(StringIO()): | |
| with tempfile.TemporaryDirectory("test_quantization") as data_dir: | |
| create_dummy_data(data_dir) | |
| preprocess_lm_data(data_dir) | |
| # tests both scalar and iterative PQ quantization | |
| _quantize_language_model(data_dir, "transformer_lm") | |
| class TestOptimizersGPU(unittest.TestCase): | |
| def setUp(self): | |
| logging.disable(logging.CRITICAL) | |
| def tearDown(self): | |
| logging.disable(logging.NOTSET) | |
| def test_flat_grads(self): | |
| with contextlib.redirect_stdout(StringIO()): | |
| with tempfile.TemporaryDirectory("test_flat_grads") as data_dir: | |
| # Use just a bit of data and tiny model to keep this test runtime reasonable | |
| create_dummy_data(data_dir, num_examples=10, maxlen=5) | |
| preprocess_translation_data(data_dir) | |
| with self.assertRaises(RuntimeError): | |
| # adafactor isn't compatible with flat grads, which | |
| # are used by default with --fp16 | |
| train_translation_model( | |
| data_dir, | |
| "lstm", | |
| [ | |
| "--required-batch-size-multiple", | |
| "1", | |
| "--encoder-layers", | |
| "1", | |
| "--encoder-hidden-size", | |
| "32", | |
| "--decoder-layers", | |
| "1", | |
| "--optimizer", | |
| "adafactor", | |
| "--fp16", | |
| ], | |
| ) | |
| # but it should pass once we set --fp16-no-flatten-grads | |
| train_translation_model( | |
| data_dir, | |
| "lstm", | |
| [ | |
| "--required-batch-size-multiple", | |
| "1", | |
| "--encoder-layers", | |
| "1", | |
| "--encoder-hidden-size", | |
| "32", | |
| "--decoder-layers", | |
| "1", | |
| "--optimizer", | |
| "adafactor", | |
| "--fp16", | |
| "--fp16-no-flatten-grads", | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| unittest.main() | |