JoyboyGo commited on
Commit
ce4aea4
·
verified ·
1 Parent(s): 5140e9c

Upload folder using huggingface_hub

Browse files
Files changed (21) hide show
  1. .gitattributes +17 -0
  2. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/.metadata +3 -0
  3. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__0_0.distcp +3 -0
  4. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__0_1.distcp +3 -0
  5. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__1_0.distcp +3 -0
  6. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__1_1.distcp +3 -0
  7. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__2_0.distcp +3 -0
  8. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__2_1.distcp +3 -0
  9. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__3_0.distcp +3 -0
  10. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__3_1.distcp +3 -0
  11. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__4_0.distcp +3 -0
  12. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__4_1.distcp +3 -0
  13. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__5_0.distcp +3 -0
  14. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__5_1.distcp +3 -0
  15. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__6_0.distcp +3 -0
  16. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__6_1.distcp +3 -0
  17. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__7_0.distcp +3 -0
  18. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__7_1.distcp +3 -0
  19. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/common.pt +3 -0
  20. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/metadata.json +1 -0
  21. grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/modelopt_run_config.yaml +164 -0
.gitattributes CHANGED
@@ -33,3 +33,20 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/.metadata filter=lfs diff=lfs merge=lfs -text
37
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
38
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__0_1.distcp filter=lfs diff=lfs merge=lfs -text
39
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
40
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__1_1.distcp filter=lfs diff=lfs merge=lfs -text
41
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
42
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__2_1.distcp filter=lfs diff=lfs merge=lfs -text
43
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
44
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__3_1.distcp filter=lfs diff=lfs merge=lfs -text
45
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
46
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__4_1.distcp filter=lfs diff=lfs merge=lfs -text
47
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
48
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__5_1.distcp filter=lfs diff=lfs merge=lfs -text
49
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
50
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
51
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
52
+ grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2788739fde94c4ca1aa2c591c45846104d9e6f88dac4062d1575a68d61a4042f
3
+ size 2969444
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42d729d1f9b7a80ac37f628cdebb8e4fb4adc7ac052be085ff01e87ec9bae701
3
+ size 2338811157
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__0_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c274746cdf9f956b5b53b51ba83cd599061569d35485b662045759c724cc2b
3
+ size 2338795644
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82091a4c7bdd06db20dab6cb2e2cc2f545f23b07bf9d6d4859a0a38d1fa26651
3
+ size 2338090987
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__1_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1a6b3396e00cf301c4921570a66d2a1de5e47394b78a7bcb2ac51c2d4b7e2b9
3
+ size 2338066012
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47d4a2ee3d4958094331a3737bb500d29d5ea44a8c666b26dac4f25dfe08276d
3
+ size 2338583696
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__2_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4945c539dec3fb024e28360f51c165ff8f9a55eec63e9c51f0ad876bbc29219c
3
+ size 2338589940
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7137445ae24eaaa6594b206a736f24667e0c6abe681093588933f6aa928ea77c
3
+ size 2338078692
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__3_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:813515a8cc6ae01070a5f96221cfc2fb567fe35dae1ce674d236552613147a4f
3
+ size 2338067589
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdaf9b0000940bf8392a6fa25c8e178c55111f1ae9f829445b9850abaf7e63b4
3
+ size 2338591581
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__4_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df4d5d6bf613c4025da01674a8f55f71b4773daefc0254136e21478ce3c619bd
3
+ size 2338607287
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:216f82d70c83858e1c79ebe34b19007567fe88a62f934093571c7e215a28fb30
3
+ size 2338107014
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__5_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d53f0b4547fd8bb66c7cbc6269d196120b479b102fda97007b2f91894d2ca4d
3
+ size 2338070743
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c02766f4049a910f9131a91f7b97a3f33f230a4af69136d8b4bf512161b5e579
3
+ size 2338591581
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__6_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af31246da8aadc4e9d7ea40b8d3a99a193979350a4ec60dec88b254c93ea76f3
3
+ size 2338610441
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef4f992cd213a3d5736c7359849487c7d3986a730a76c567e5a6c8b96c912c9
3
+ size 2338092821
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/__7_1.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfe9b44e68ff8cee073110dc7421144729c28479cd2896052153b6c394f88310
3
+ size 2338089667
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/common.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf49a02d2a1f1727d9e271b56400b7bf1bac350d06ba43bdfa5d7f830aee0f9b
3
+ size 141799
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sharded_backend": "torch_dist", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
grid_search/yulan-gdn-sft-1b-sl65536-lr3e-6-gbs16-mb1-tp2-pp1-cp4/iter_0000953/modelopt_run_config.yaml ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ activation_func: <function silu at 0x7fb672617250>
2
+ activation_func_clamp_value: None
3
+ add_bias_linear: false
4
+ add_qkv_bias: true
5
+ apply_query_key_layer_scaling: false
6
+ apply_residual_connection_post_layernorm: false
7
+ apply_rope_fusion: true
8
+ attention_backend: AttnBackend.auto
9
+ attention_dropout: '0.1'
10
+ attention_output_gate: false
11
+ attention_softmax_in_fp32: false
12
+ attn_k_token_shift: None
13
+ attn_output_gate: None
14
+ attn_q_token_shift: None
15
+ attn_token_shift: None
16
+ attn_v_token_shift: None
17
+ autocast_dtype: torch.bfloat16
18
+ barrier_with_L1_time: true
19
+ bf16: true
20
+ bias_activation_fusion: true
21
+ bias_dropout_fusion: true
22
+ calculate_per_token_loss: false
23
+ clone_scatter_output_in_embedding: true
24
+ config_logger_dir: ''
25
+ cross_entropy_fusion_impl: native
26
+ cross_entropy_loss_fusion: false
27
+ defer_embedding_wgrad_compute: false
28
+ delay_wgrad_compute: false
29
+ deterministic_mode: false
30
+ disable_bf16_reduced_precision_matmul: false
31
+ disable_parameter_transpose_cache: false
32
+ distribute_saved_activations: false
33
+ emb_deviation_loss_coeff: 0
34
+ emb_deviation_type: None
35
+ enable_autocast: false
36
+ ffn_hidden_size: 4800
37
+ ffn_intermediate_token_shift: None
38
+ ffn_token_shift: None
39
+ finalize_model_grads_func: <function finalize_model_grads at 0x7fb5869079a0>
40
+ fine_grained_activation_offloading: false
41
+ first_last_layers_bf16: false
42
+ flash_decode: false
43
+ fp16: false
44
+ fp32_residual_connection: false
45
+ freeze_layernorm_weight: false
46
+ fused_single_qkv_rope: false
47
+ gated_linear_unit: true
48
+ glu_linear_offset: '0.0'
49
+ grad_scale_func: <bound method MegatronOptimizer.scale_loss of <megatron.core.optimizer.optimizer.ChainedOptimizer
50
+ object at 0x7fb4cc5e64a0>>
51
+ grad_sync_func: None
52
+ gradient_accumulation_fusion: true
53
+ hetereogenous_dist_checkpoint: false
54
+ heterogeneous_block_specs: false
55
+ hidden_dropout: '0.1'
56
+ hidden_size: 1920
57
+ is_hybrid_model: false
58
+ kv_channels: 64
59
+ layernorm_epsilon: 1e-06
60
+ layernorm_zero_centered_gamma: false
61
+ linear_attention_freq: '[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
62
+ 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
+ 0, 1, 0, 0, 1, 1, 1, 1, 1, 1]'
64
+ linear_attention_type: gated_delta_net
65
+ linear_conv_kernel_dim: 4
66
+ linear_key_head_dim: 64
67
+ linear_num_key_heads: 8
68
+ linear_num_value_heads: 32
69
+ linear_value_head_dim: 64
70
+ log_hidden_states: '[]'
71
+ log_params: '[]'
72
+ mamba_disable_cp: false
73
+ mamba_expand: 2
74
+ mamba_head_dim: 64
75
+ mamba_num_groups: 8
76
+ mamba_num_heads: None
77
+ mamba_state_dim: 128
78
+ masked_softmax_fusion: false
79
+ memory_efficient_layer_norm: false
80
+ min_offloaded_tensor_size: 1048576
81
+ mlp_chunks_for_prefill: 1
82
+ moe_apply_probs_on_input: false
83
+ moe_aux_loss_coeff: '0.0'
84
+ moe_deepep_num_sms: 20
85
+ moe_enable_deepep: false
86
+ moe_expert_capacity_factor: None
87
+ moe_extended_tp: false
88
+ moe_ffn_hidden_size: None
89
+ moe_flex_dispatcher_backend: deepep
90
+ moe_grouped_gemm: false
91
+ moe_hybridep_num_sms: 16
92
+ moe_input_jitter_eps: None
93
+ moe_layer_freq: 1
94
+ moe_pad_expert_input_to_capacity: false
95
+ moe_per_layer_logging: false
96
+ moe_permute_fusion: false
97
+ moe_router_bias_update_method: sign
98
+ moe_router_bias_update_rate: '0.001'
99
+ moe_router_dtype: None
100
+ moe_router_enable_expert_bias: false
101
+ moe_router_force_load_balancing: false
102
+ moe_router_fusion: false
103
+ moe_router_group_topk: None
104
+ moe_router_load_balancing_type: aux_loss
105
+ moe_router_num_groups: None
106
+ moe_router_padding_for_quantization: false
107
+ moe_router_pre_softmax: false
108
+ moe_router_score_function: softmax
109
+ moe_router_topk: 2
110
+ moe_router_topk_limited_devices: None
111
+ moe_router_topk_scaling_factor: None
112
+ moe_shared_expert_gate: false
113
+ moe_shared_expert_intermediate_size: None
114
+ moe_shared_expert_overlap: false
115
+ moe_token_dispatcher_type: allgather
116
+ moe_token_drop_policy: probs
117
+ moe_token_dropping: false
118
+ moe_use_legacy_grouped_gemm: false
119
+ moe_z_loss_coeff: None
120
+ mrope_section: None
121
+ multi_latent_attention: false
122
+ no_rope_freq: None
123
+ no_sync_func: None
124
+ normalization: RMSNorm
125
+ num_attention_heads: 30
126
+ num_layers: 56
127
+ num_layers_at_end_in_bf16: 1
128
+ num_layers_at_start_in_bf16: 1
129
+ num_moe_experts: None
130
+ num_query_groups: 6
131
+ nvidia_modelopt_version: 0.39.0
132
+ offload_modules: '[]'
133
+ param_sync_func: None
134
+ params_dtype: torch.bfloat16
135
+ perform_initialization: true
136
+ persist_layer_norm: true
137
+ qk_l2_norm: false
138
+ qk_layernorm: false
139
+ quant_recipe: None
140
+ reparam_keys: None
141
+ rotary_interleaved: false
142
+ softmax_scale: None
143
+ softmax_type: vanilla
144
+ spectral_mup_init: false
145
+ split_expert_init: true
146
+ split_fc1_init: true
147
+ split_qkv_init: true
148
+ symmetric_ar_type: None
149
+ test_mode: false
150
+ timers: <megatron.core.timers.Timers object at 0x7fb4facdbf10>
151
+ token_shift_conv_init: default
152
+ token_shift_conv_size: 4
153
+ transformer_impl: transformer_engine
154
+ use_fused_weighted_squared_relu: false
155
+ use_kitchen: false
156
+ use_mamba_mem_eff_path: true
157
+ use_ring_exchange_p2p: false
158
+ use_te_activation_func: false
159
+ use_te_rng_tracker: false
160
+ variable_seq_lengths: false
161
+ wgrad_deferral_limit: 0
162
+ window_attn_skip_freq: None
163
+ window_size: None
164
+ word_embedding_dropout_prob: '0.0'