File size: 2,187 Bytes
a9d7ce0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
[
896,
1024,
{
"accum_grad": 3,
"char_list": [],
"debugmode": 0,
"encoder_criterion": "ce",
"encoder_drop_rate": 0.1,
"encoder_input_dim": 896,
"encoder_layer_config": "transformer",
"encoder_output_dim": 896,
"encoder_pre_norm_type": "ln",
"encoder_upsample_rate": 9,
"kv_cache_prefix_finetune": 1,
"epochs": 100,
"eps": 1e-08,
"eps_decay": 0.8,
"gpu_id": null,
"gpu_num": 1,
"grad_clip": 5,
"grad_noise": false,
"idim": 896,
"init_lr": 0.0005,
"lsm_weight": 0.0,
"max_batch_size": 25,
"max_duration": 256,
"max_mem": 20000,
"mtlalpha": 0.5,
"n_iter_processes": 8,
"noam_warmup_steps": 4000,
"odim": 1024,
"opt": "noamw",
"rank": 0,
"report_interval_iters": 100,
"resume_trainer": false,
"save_interval_iters": 2000,
"seed": 19832,
"sort_duration": true,
"start_decay_epoch": 5,
"stop_learning_rate": 1e-05,
"sycn_batchnorm": false,
"tensorboard_dir": null,
"train_dtype": "bfloat16",
"transformer_attention_dim": 896,
"transformer_attention_dropout_rate": 0.1,
"transformer_attention_heads": 14,
"transformer_chunk_size": [
1
],
"transformer_concat_after": false,
"transformer_dropout_rate": 0.1,
"transformer_dynamic_chunks": false,
"transformer_input_dim": 896,
"transformer_input_layer": "linear",
"transformer_left_chunks": [
-1
],
"transformer_linear_units": 4864,
"transformer_normalize_before": true,
"transformer_num_blocks": 4,
"transformer_output_dim": 896,
"transformer_pos_enc_class": "rel-enc",
"transformer_positional_dropout_rate": 0.1,
"transformer_positionwise_conv_kernel_size": 1,
"transformer_positionwise_layer_type": "linear",
"use_zero_redun_opt": false,
"verbose": 0,
"weight_decay": 0.05,
"world_size": 1
}
] |