| # @package _group_ | |
| common: | |
| fp16: true | |
| log_format: json | |
| log_interval: 200 | |
| tensorboard_logdir: tb | |
| min_loss_scale: 1e-6 | |
| user_dir: /private/home/abaevski/fairseq-py/examples/data2vec | |
| checkpoint: | |
| save_interval: 1 | |
| save_interval_updates: 25000 | |
| keep_interval_updates: 1 | |
| no_epoch_checkpoints: true | |
| task: | |
| _name: audio_pretraining | |
| data: /private/home/abaevski/data/audioset | |
| max_sample_size: 320000 | |
| min_sample_size: 32000 | |
| normalize: true | |
| dataset: | |
| num_workers: 6 | |
| max_tokens: 3400000 | |
| skip_invalid_size_inputs_valid_test: true | |
| validate_interval: 5 | |
| required_batch_size_multiple: 1 | |
| disable_validation: true | |
| distributed_training: | |
| distributed_world_size: 24 | |
| ddp_backend: legacy_ddp | |
| criterion: | |
| _name: model | |
| log_keys: | |
| - ema_decay | |
| - target_var | |
| - pred_var | |
| # - avg_self_attn | |
| # - weights | |
| optimization: | |
| max_update: 200000 | |
| lr: [0.0005] | |
| optimizer: | |
| _name: adam | |
| adam_betas: (0.9,0.98) | |
| adam_eps: 1e-06 | |
| weight_decay: 0.01 | |
| lr_scheduler: | |
| _name: cosine | |
| warmup_updates: 10000 | |
| model: | |
| _name: data2vec_audio | |
| extractor_mode: layer_norm | |
| encoder_layerdrop: 0.05 | |
| dropout_input: 0.0 | |
| dropout_features: 0.0 | |
| feature_grad_mult: 1.0 | |
| encoder_embed_dim: 768 | |
| mask_prob: 0.65 | |
| mask_length: 10 | |
| loss_beta: 0 | |
| loss_scale: null | |
| instance_norm_target_layer: true | |
| layer_norm_targets: true | |
| average_top_k_layers: 12 | |
| self_attn_norm_type: deepnorm | |
| final_norm_type: deepnorm | |
| pos_conv_depth: 5 | |
| conv_pos: 95 | |
| ema_decay: 0.999 | |
| ema_end_decay: 0.9999 | |
| ema_anneal_end_step: 30000 | |
| ema_transformer_only: true | |
| ema_layers_only: false | |
| require_same_masks: true | |
| mask_dropout: 0 | |