Dyna / run.log
Elemmire's picture
Upload folder using huggingface_hub
17f3380 verified
ds_configs/stage2.json
Nan check finished
DatasetDict({
train: Dataset({
features: ['input_ids', 'attention_mask', 'labels'],
num_rows: 146580
})
})
🔍 Trainable params: 547
model.embed_tokens.weight
model.layers.0.self_attn.Wk
model.layers.0.self_attn.bk
model.layers.0.self_attn.Wv
model.layers.0.self_attn.bv
model.layers.0.self_attn.k_down
model.layers.0.self_attn.k_up
model.layers.0.self_attn.v_down
model.layers.0.self_attn.v_up
model.layers.0.self_attn.q_proj.weight
model.layers.0.self_attn.k_proj.weight
model.layers.0.self_attn.v_proj.weight
model.layers.0.self_attn.o_proj.weight
model.layers.0.mlp.gate_proj.weight
model.layers.0.mlp.up_proj.weight
model.layers.0.mlp.down_proj.weight
model.layers.0.input_layernorm.weight
model.layers.0.post_attention_layernorm.weight
model.layers.1.self_attn.Wk
model.layers.1.self_attn.bk
model.layers.1.self_attn.Wv
model.layers.1.self_attn.bv
model.layers.1.self_attn.k_down
model.layers.1.self_attn.k_up
model.layers.1.self_attn.v_down
model.layers.1.self_attn.v_up
model.layers.1.self_attn.q_proj.weight
model.layers.1.self_attn.k_proj.weight
model.layers.1.self_attn.v_proj.weight
model.layers.1.self_attn.o_proj.weight
model.layers.1.mlp.gate_proj.weight
model.layers.1.mlp.up_proj.weight
model.layers.1.mlp.down_proj.weight
model.layers.1.input_layernorm.weight
model.layers.1.post_attention_layernorm.weight
model.layers.2.self_attn.Wk
model.layers.2.self_attn.bk
model.layers.2.self_attn.Wv
model.layers.2.self_attn.bv
model.layers.2.self_attn.k_down
model.layers.2.self_attn.k_up
model.layers.2.self_attn.v_down
model.layers.2.self_attn.v_up
model.layers.2.self_attn.q_proj.weight
model.layers.2.self_attn.k_proj.weight
model.layers.2.self_attn.v_proj.weight
model.layers.2.self_attn.o_proj.weight
model.layers.2.mlp.gate_proj.weight
model.layers.2.mlp.up_proj.weight
model.layers.2.mlp.down_proj.weight
model.layers.2.input_layernorm.weight
model.layers.2.post_attention_layernorm.weight
model.layers.3.self_attn.Wk
model.layers.3.self_attn.bk
model.layers.3.self_attn.Wv
model.layers.3.self_attn.bv
model.layers.3.self_attn.k_down
model.layers.3.self_attn.k_up
model.layers.3.self_attn.v_down
model.layers.3.self_attn.v_up
model.layers.3.self_attn.q_proj.weight
model.layers.3.self_attn.k_proj.weight
model.layers.3.self_attn.v_proj.weight
model.layers.3.self_attn.o_proj.weight
model.layers.3.mlp.gate_proj.weight
model.layers.3.mlp.up_proj.weight
model.layers.3.mlp.down_proj.weight
model.layers.3.input_layernorm.weight
model.layers.3.post_attention_layernorm.weight
model.layers.4.self_attn.Wk
model.layers.4.self_attn.bk
model.layers.4.self_attn.Wv
model.layers.4.self_attn.bv
model.layers.4.self_attn.k_down
model.layers.4.self_attn.k_up
model.layers.4.self_attn.v_down
model.layers.4.self_attn.v_up
model.layers.4.self_attn.q_proj.weight
model.layers.4.self_attn.k_proj.weight
model.layers.4.self_attn.v_proj.weight
model.layers.4.self_attn.o_proj.weight
model.layers.4.mlp.gate_proj.weight
model.layers.4.mlp.up_proj.weight
model.layers.4.mlp.down_proj.weight
model.layers.4.input_layernorm.weight
model.layers.4.post_attention_layernorm.weight
model.layers.5.self_attn.Wk
model.layers.5.self_attn.bk
model.layers.5.self_attn.Wv
model.layers.5.self_attn.bv
model.layers.5.self_attn.k_down
model.layers.5.self_attn.k_up
model.layers.5.self_attn.v_down
model.layers.5.self_attn.v_up
model.layers.5.self_attn.q_proj.weight
model.layers.5.self_attn.k_proj.weight
model.layers.5.self_attn.v_proj.weight
model.layers.5.self_attn.o_proj.weight
model.layers.5.mlp.gate_proj.weight
model.layers.5.mlp.up_proj.weight
model.layers.5.mlp.down_proj.weight
model.layers.5.input_layernorm.weight
model.layers.5.post_attention_layernorm.weight
model.layers.6.self_attn.Wk
model.layers.6.self_attn.bk
model.layers.6.self_attn.Wv
model.layers.6.self_attn.bv
model.layers.6.self_attn.k_down
model.layers.6.self_attn.k_up
model.layers.6.self_attn.v_down
model.layers.6.self_attn.v_up
model.layers.6.self_attn.q_proj.weight
model.layers.6.self_attn.k_proj.weight
model.layers.6.self_attn.v_proj.weight
model.layers.6.self_attn.o_proj.weight
model.layers.6.mlp.gate_proj.weight
model.layers.6.mlp.up_proj.weight
model.layers.6.mlp.down_proj.weight
model.layers.6.input_layernorm.weight
model.layers.6.post_attention_layernorm.weight
model.layers.7.self_attn.Wk
model.layers.7.self_attn.bk
model.layers.7.self_attn.Wv
model.layers.7.self_attn.bv
model.layers.7.self_attn.k_down
model.layers.7.self_attn.k_up
model.layers.7.self_attn.v_down
model.layers.7.self_attn.v_up
model.layers.7.self_attn.q_proj.weight
model.layers.7.self_attn.k_proj.weight
model.layers.7.self_attn.v_proj.weight
model.layers.7.self_attn.o_proj.weight
model.layers.7.mlp.gate_proj.weight
model.layers.7.mlp.up_proj.weight
model.layers.7.mlp.down_proj.weight
model.layers.7.input_layernorm.weight
model.layers.7.post_attention_layernorm.weight
model.layers.8.self_attn.Wk
model.layers.8.self_attn.bk
model.layers.8.self_attn.Wv
model.layers.8.self_attn.bv
model.layers.8.self_attn.k_down
model.layers.8.self_attn.k_up
model.layers.8.self_attn.v_down
model.layers.8.self_attn.v_up
model.layers.8.self_attn.q_proj.weight
model.layers.8.self_attn.k_proj.weight
model.layers.8.self_attn.v_proj.weight
model.layers.8.self_attn.o_proj.weight
model.layers.8.mlp.gate_proj.weight
model.layers.8.mlp.up_proj.weight
model.layers.8.mlp.down_proj.weight
model.layers.8.input_layernorm.weight
model.layers.8.post_attention_layernorm.weight
model.layers.9.self_attn.Wk
model.layers.9.self_attn.bk
model.layers.9.self_attn.Wv
model.layers.9.self_attn.bv
model.layers.9.self_attn.k_down
model.layers.9.self_attn.k_up
model.layers.9.self_attn.v_down
model.layers.9.self_attn.v_up
model.layers.9.self_attn.q_proj.weight
model.layers.9.self_attn.k_proj.weight
model.layers.9.self_attn.v_proj.weight
model.layers.9.self_attn.o_proj.weight
model.layers.9.mlp.gate_proj.weight
model.layers.9.mlp.up_proj.weight
model.layers.9.mlp.down_proj.weight
model.layers.9.input_layernorm.weight
model.layers.9.post_attention_layernorm.weight
model.layers.10.self_attn.Wk
model.layers.10.self_attn.bk
model.layers.10.self_attn.Wv
model.layers.10.self_attn.bv
model.layers.10.self_attn.k_down
model.layers.10.self_attn.k_up
model.layers.10.self_attn.v_down
model.layers.10.self_attn.v_up
model.layers.10.self_attn.q_proj.weight
model.layers.10.self_attn.k_proj.weight
model.layers.10.self_attn.v_proj.weight
model.layers.10.self_attn.o_proj.weight
model.layers.10.mlp.gate_proj.weight
model.layers.10.mlp.up_proj.weight
model.layers.10.mlp.down_proj.weight
model.layers.10.input_layernorm.weight
model.layers.10.post_attention_layernorm.weight
model.layers.11.self_attn.Wk
model.layers.11.self_attn.bk
model.layers.11.self_attn.Wv
model.layers.11.self_attn.bv
model.layers.11.self_attn.k_down
model.layers.11.self_attn.k_up
model.layers.11.self_attn.v_down
model.layers.11.self_attn.v_up
model.layers.11.self_attn.q_proj.weight
model.layers.11.self_attn.k_proj.weight
model.layers.11.self_attn.v_proj.weight
model.layers.11.self_attn.o_proj.weight
model.layers.11.mlp.gate_proj.weight
model.layers.11.mlp.up_proj.weight
model.layers.11.mlp.down_proj.weight
model.layers.11.input_layernorm.weight
model.layers.11.post_attention_layernorm.weight
model.layers.12.self_attn.Wk
model.layers.12.self_attn.bk
model.layers.12.self_attn.Wv
model.layers.12.self_attn.bv
model.layers.12.self_attn.k_down
model.layers.12.self_attn.k_up
model.layers.12.self_attn.v_down
model.layers.12.self_attn.v_up
model.layers.12.self_attn.q_proj.weight
model.layers.12.self_attn.k_proj.weight
model.layers.12.self_attn.v_proj.weight
model.layers.12.self_attn.o_proj.weight
model.layers.12.mlp.gate_proj.weight
model.layers.12.mlp.up_proj.weight
model.layers.12.mlp.down_proj.weight
model.layers.12.input_layernorm.weight
model.layers.12.post_attention_layernorm.weight
model.layers.13.self_attn.Wk
model.layers.13.self_attn.bk
model.layers.13.self_attn.Wv
model.layers.13.self_attn.bv
model.layers.13.self_attn.k_down
model.layers.13.self_attn.k_up
model.layers.13.self_attn.v_down
model.layers.13.self_attn.v_up
model.layers.13.self_attn.q_proj.weight
model.layers.13.self_attn.k_proj.weight
model.layers.13.self_attn.v_proj.weight
model.layers.13.self_attn.o_proj.weight
model.layers.13.mlp.gate_proj.weight
model.layers.13.mlp.up_proj.weight
model.layers.13.mlp.down_proj.weight
model.layers.13.input_layernorm.weight
model.layers.13.post_attention_layernorm.weight
model.layers.14.self_attn.Wk
model.layers.14.self_attn.bk
model.layers.14.self_attn.Wv
model.layers.14.self_attn.bv
model.layers.14.self_attn.k_down
model.layers.14.self_attn.k_up
model.layers.14.self_attn.v_down
model.layers.14.self_attn.v_up
model.layers.14.self_attn.q_proj.weight
model.layers.14.self_attn.k_proj.weight
model.layers.14.self_attn.v_proj.weight
model.layers.14.self_attn.o_proj.weight
model.layers.14.mlp.gate_proj.weight
model.layers.14.mlp.up_proj.weight
model.layers.14.mlp.down_proj.weight
model.layers.14.input_layernorm.weight
model.layers.14.post_attention_layernorm.weight
model.layers.15.self_attn.Wk
model.layers.15.self_attn.bk
model.layers.15.self_attn.Wv
model.layers.15.self_attn.bv
model.layers.15.self_attn.k_down
model.layers.15.self_attn.k_up
model.layers.15.self_attn.v_down
model.layers.15.self_attn.v_up
model.layers.15.self_attn.q_proj.weight
model.layers.15.self_attn.k_proj.weight
model.layers.15.self_attn.v_proj.weight
model.layers.15.self_attn.o_proj.weight
model.layers.15.mlp.gate_proj.weight
model.layers.15.mlp.up_proj.weight
model.layers.15.mlp.down_proj.weight
model.layers.15.input_layernorm.weight
model.layers.15.post_attention_layernorm.weight
model.layers.16.self_attn.Wk
model.layers.16.self_attn.bk
model.layers.16.self_attn.Wv
model.layers.16.self_attn.bv
model.layers.16.self_attn.k_down
model.layers.16.self_attn.k_up
model.layers.16.self_attn.v_down
model.layers.16.self_attn.v_up
model.layers.16.self_attn.q_proj.weight
model.layers.16.self_attn.k_proj.weight
model.layers.16.self_attn.v_proj.weight
model.layers.16.self_attn.o_proj.weight
model.layers.16.mlp.gate_proj.weight
model.layers.16.mlp.up_proj.weight
model.layers.16.mlp.down_proj.weight
model.layers.16.input_layernorm.weight
model.layers.16.post_attention_layernorm.weight
model.layers.17.self_attn.Wk
model.layers.17.self_attn.bk
model.layers.17.self_attn.Wv
model.layers.17.self_attn.bv
model.layers.17.self_attn.k_down
model.layers.17.self_attn.k_up
model.layers.17.self_attn.v_down
model.layers.17.self_attn.v_up
model.layers.17.self_attn.q_proj.weight
model.layers.17.self_attn.k_proj.weight
model.layers.17.self_attn.v_proj.weight
model.layers.17.self_attn.o_proj.weight
model.layers.17.mlp.gate_proj.weight
model.layers.17.mlp.up_proj.weight
model.layers.17.mlp.down_proj.weight
model.layers.17.input_layernorm.weight
model.layers.17.post_attention_layernorm.weight
model.layers.18.self_attn.Wk
model.layers.18.self_attn.bk
model.layers.18.self_attn.Wv
model.layers.18.self_attn.bv
model.layers.18.self_attn.k_down
model.layers.18.self_attn.k_up
model.layers.18.self_attn.v_down
model.layers.18.self_attn.v_up
model.layers.18.self_attn.q_proj.weight
model.layers.18.self_attn.k_proj.weight
model.layers.18.self_attn.v_proj.weight
model.layers.18.self_attn.o_proj.weight
model.layers.18.mlp.gate_proj.weight
model.layers.18.mlp.up_proj.weight
model.layers.18.mlp.down_proj.weight
model.layers.18.input_layernorm.weight
model.layers.18.post_attention_layernorm.weight
model.layers.19.self_attn.Wk
model.layers.19.self_attn.bk
model.layers.19.self_attn.Wv
model.layers.19.self_attn.bv
model.layers.19.self_attn.k_down
model.layers.19.self_attn.k_up
model.layers.19.self_attn.v_down
model.layers.19.self_attn.v_up
model.layers.19.self_attn.q_proj.weight
model.layers.19.self_attn.k_proj.weight
model.layers.19.self_attn.v_proj.weight
model.layers.19.self_attn.o_proj.weight
model.layers.19.mlp.gate_proj.weight
model.layers.19.mlp.up_proj.weight
model.layers.19.mlp.down_proj.weight
model.layers.19.input_layernorm.weight
model.layers.19.post_attention_layernorm.weight
model.layers.20.self_attn.Wk
model.layers.20.self_attn.bk
model.layers.20.self_attn.Wv
model.layers.20.self_attn.bv
model.layers.20.self_attn.k_down
model.layers.20.self_attn.k_up
model.layers.20.self_attn.v_down
model.layers.20.self_attn.v_up
model.layers.20.self_attn.q_proj.weight
model.layers.20.self_attn.k_proj.weight
model.layers.20.self_attn.v_proj.weight
model.layers.20.self_attn.o_proj.weight
model.layers.20.mlp.gate_proj.weight
model.layers.20.mlp.up_proj.weight
model.layers.20.mlp.down_proj.weight
model.layers.20.input_layernorm.weight
model.layers.20.post_attention_layernorm.weight
model.layers.21.self_attn.Wk
model.layers.21.self_attn.bk
model.layers.21.self_attn.Wv
model.layers.21.self_attn.bv
model.layers.21.self_attn.k_down
model.layers.21.self_attn.k_up
model.layers.21.self_attn.v_down
model.layers.21.self_attn.v_up
model.layers.21.self_attn.q_proj.weight
model.layers.21.self_attn.k_proj.weight
model.layers.21.self_attn.v_proj.weight
model.layers.21.self_attn.o_proj.weight
model.layers.21.mlp.gate_proj.weight
model.layers.21.mlp.up_proj.weight
model.layers.21.mlp.down_proj.weight
model.layers.21.input_layernorm.weight
model.layers.21.post_attention_layernorm.weight
model.layers.22.self_attn.Wk
model.layers.22.self_attn.bk
model.layers.22.self_attn.Wv
model.layers.22.self_attn.bv
model.layers.22.self_attn.k_down
model.layers.22.self_attn.k_up
model.layers.22.self_attn.v_down
model.layers.22.self_attn.v_up
model.layers.22.self_attn.q_proj.weight
model.layers.22.self_attn.k_proj.weight
model.layers.22.self_attn.v_proj.weight
model.layers.22.self_attn.o_proj.weight
model.layers.22.mlp.gate_proj.weight
model.layers.22.mlp.up_proj.weight
model.layers.22.mlp.down_proj.weight
model.layers.22.input_layernorm.weight
model.layers.22.post_attention_layernorm.weight
model.layers.23.self_attn.Wk
model.layers.23.self_attn.bk
model.layers.23.self_attn.Wv
model.layers.23.self_attn.bv
model.layers.23.self_attn.k_down
model.layers.23.self_attn.k_up
model.layers.23.self_attn.v_down
model.layers.23.self_attn.v_up
model.layers.23.self_attn.q_proj.weight
model.layers.23.self_attn.k_proj.weight
model.layers.23.self_attn.v_proj.weight
model.layers.23.self_attn.o_proj.weight
model.layers.23.mlp.gate_proj.weight
model.layers.23.mlp.up_proj.weight
model.layers.23.mlp.down_proj.weight
model.layers.23.input_layernorm.weight
model.layers.23.post_attention_layernorm.weight
model.layers.24.self_attn.Wk
model.layers.24.self_attn.bk
model.layers.24.self_attn.Wv
model.layers.24.self_attn.bv
model.layers.24.self_attn.k_down
model.layers.24.self_attn.k_up
model.layers.24.self_attn.v_down
model.layers.24.self_attn.v_up
model.layers.24.self_attn.q_proj.weight
model.layers.24.self_attn.k_proj.weight
model.layers.24.self_attn.v_proj.weight
model.layers.24.self_attn.o_proj.weight
model.layers.24.mlp.gate_proj.weight
model.layers.24.mlp.up_proj.weight
model.layers.24.mlp.down_proj.weight
model.layers.24.input_layernorm.weight
model.layers.24.post_attention_layernorm.weight
model.layers.25.self_attn.Wk
model.layers.25.self_attn.bk
model.layers.25.self_attn.Wv
model.layers.25.self_attn.bv
model.layers.25.self_attn.k_down
model.layers.25.self_attn.k_up
model.layers.25.self_attn.v_down
model.layers.25.self_attn.v_up
model.layers.25.self_attn.q_proj.weight
model.layers.25.self_attn.k_proj.weight
model.layers.25.self_attn.v_proj.weight
model.layers.25.self_attn.o_proj.weight
model.layers.25.mlp.gate_proj.weight
model.layers.25.mlp.up_proj.weight
model.layers.25.mlp.down_proj.weight
model.layers.25.input_layernorm.weight
model.layers.25.post_attention_layernorm.weight
model.layers.26.self_attn.Wk
model.layers.26.self_attn.bk
model.layers.26.self_attn.Wv
model.layers.26.self_attn.bv
model.layers.26.self_attn.k_down
model.layers.26.self_attn.k_up
model.layers.26.self_attn.v_down
model.layers.26.self_attn.v_up
model.layers.26.self_attn.q_proj.weight
model.layers.26.self_attn.k_proj.weight
model.layers.26.self_attn.v_proj.weight
model.layers.26.self_attn.o_proj.weight
model.layers.26.mlp.gate_proj.weight
model.layers.26.mlp.up_proj.weight
model.layers.26.mlp.down_proj.weight
model.layers.26.input_layernorm.weight
model.layers.26.post_attention_layernorm.weight
model.layers.27.self_attn.Wk
model.layers.27.self_attn.bk
model.layers.27.self_attn.Wv
model.layers.27.self_attn.bv
model.layers.27.self_attn.k_down
model.layers.27.self_attn.k_up
model.layers.27.self_attn.v_down
model.layers.27.self_attn.v_up
model.layers.27.self_attn.q_proj.weight
model.layers.27.self_attn.k_proj.weight
model.layers.27.self_attn.v_proj.weight
model.layers.27.self_attn.o_proj.weight
model.layers.27.mlp.gate_proj.weight
model.layers.27.mlp.up_proj.weight
model.layers.27.mlp.down_proj.weight
model.layers.27.input_layernorm.weight
model.layers.27.post_attention_layernorm.weight
model.layers.28.self_attn.Wk
model.layers.28.self_attn.bk
model.layers.28.self_attn.Wv
model.layers.28.self_attn.bv
model.layers.28.self_attn.k_down
model.layers.28.self_attn.k_up
model.layers.28.self_attn.v_down
model.layers.28.self_attn.v_up
model.layers.28.self_attn.q_proj.weight
model.layers.28.self_attn.k_proj.weight
model.layers.28.self_attn.v_proj.weight
model.layers.28.self_attn.o_proj.weight
model.layers.28.mlp.gate_proj.weight
model.layers.28.mlp.up_proj.weight
model.layers.28.mlp.down_proj.weight
model.layers.28.input_layernorm.weight
model.layers.28.post_attention_layernorm.weight
model.layers.29.self_attn.Wk
model.layers.29.self_attn.bk
model.layers.29.self_attn.Wv
model.layers.29.self_attn.bv
model.layers.29.self_attn.k_down
model.layers.29.self_attn.k_up
model.layers.29.self_attn.v_down
model.layers.29.self_attn.v_up
model.layers.29.self_attn.q_proj.weight
model.layers.29.self_attn.k_proj.weight
model.layers.29.self_attn.v_proj.weight
model.layers.29.self_attn.o_proj.weight
model.layers.29.mlp.gate_proj.weight
model.layers.29.mlp.up_proj.weight
model.layers.29.mlp.down_proj.weight
model.layers.29.input_layernorm.weight
model.layers.29.post_attention_layernorm.weight
model.layers.30.self_attn.Wk
model.layers.30.self_attn.bk
model.layers.30.self_attn.Wv
model.layers.30.self_attn.bv
model.layers.30.self_attn.k_down
model.layers.30.self_attn.k_up
model.layers.30.self_attn.v_down
model.layers.30.self_attn.v_up
model.layers.30.self_attn.q_proj.weight
model.layers.30.self_attn.k_proj.weight
model.layers.30.self_attn.v_proj.weight
model.layers.30.self_attn.o_proj.weight
model.layers.30.mlp.gate_proj.weight
model.layers.30.mlp.up_proj.weight
model.layers.30.mlp.down_proj.weight
model.layers.30.input_layernorm.weight
model.layers.30.post_attention_layernorm.weight
model.layers.31.self_attn.Wk
model.layers.31.self_attn.bk
model.layers.31.self_attn.Wv
model.layers.31.self_attn.bv
model.layers.31.self_attn.k_down
model.layers.31.self_attn.k_up
model.layers.31.self_attn.v_down
model.layers.31.self_attn.v_up
model.layers.31.self_attn.q_proj.weight
model.layers.31.self_attn.k_proj.weight
model.layers.31.self_attn.v_proj.weight
model.layers.31.self_attn.o_proj.weight
model.layers.31.mlp.gate_proj.weight
model.layers.31.mlp.up_proj.weight
model.layers.31.mlp.down_proj.weight
model.layers.31.input_layernorm.weight
model.layers.31.post_attention_layernorm.weight
model.norm.weight
lm_head.weight
<class 'model_patch.custom_llama.CustomLlamaForCausalLM'>
begin training !!!!!
{'origin_loss': 5.025427341461182, 'mask_loss': 0.2418212890625, 'mask_rate': 0.49169921875, 'epoch': 0}
{'origin_loss': 4.794791221618652, 'mask_loss': 0.242431640625, 'mask_rate': 0.492431640625, 'epoch': 0}
{'origin_loss': 4.8240838050842285, 'mask_loss': 0.2388916015625, 'mask_rate': 0.48876953125, 'epoch': 0}
{'origin_loss': 4.923285961151123, 'mask_loss': 0.24462890625, 'mask_rate': 0.49462890625, 'epoch': 0}
{'origin_loss': 4.86187744140625, 'mask_loss': 0.2408447265625, 'mask_rate': 0.49072265625, 'epoch': 0}
{'origin_loss': 4.93514347076416, 'mask_loss': 0.243408203125, 'mask_rate': 0.493408203125, 'epoch': 0}
{'origin_loss': 4.94369649887085, 'mask_loss': 0.2403564453125, 'mask_rate': 0.490234375, 'epoch': 0}
{'origin_loss': 4.682024002075195, 'mask_loss': 0.2431640625, 'mask_rate': 0.4931640625, 'epoch': 0}
{'loss': 299.0613, 'grad_norm': 1758.8338623046875, 'learning_rate': 9.980000000000001e-06, 'epoch': 0.0}
{'origin_loss': 4.635133266448975, 'mask_loss': 0.180908203125, 'mask_rate': 0.42529296875, 'epoch': 0.0}
{'origin_loss': 4.52673864364624, 'mask_loss': 0.183837890625, 'mask_rate': 0.4287109375, 'epoch': 0.0}
{'origin_loss': 4.942536354064941, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.0}
{'origin_loss': 4.7229413986206055, 'mask_loss': 0.177978515625, 'mask_rate': 0.421875, 'epoch': 0.0}
{'origin_loss': 4.566638946533203, 'mask_loss': 0.1854248046875, 'mask_rate': 0.4306640625, 'epoch': 0.0}
{'origin_loss': 4.663545608520508, 'mask_loss': 0.181884765625, 'mask_rate': 0.426513671875, 'epoch': 0.0}
{'origin_loss': 4.67844295501709, 'mask_loss': 0.1829833984375, 'mask_rate': 0.427734375, 'epoch': 0.0}
{'origin_loss': 4.842960834503174, 'mask_loss': 0.1771240234375, 'mask_rate': 0.4208984375, 'epoch': 0.0}
{'loss': 224.4786, 'grad_norm': 2474.558349609375, 'learning_rate': 9.960000000000001e-06, 'epoch': 0.0}
{'origin_loss': 7.156896114349365, 'mask_loss': 0.0911865234375, 'mask_rate': 0.302001953125, 'epoch': 0.0}
{'origin_loss': 7.196861743927002, 'mask_loss': 0.09088134765625, 'mask_rate': 0.301513671875, 'epoch': 0.0}
{'origin_loss': 7.04507303237915, 'mask_loss': 0.091796875, 'mask_rate': 0.302978515625, 'epoch': 0.0}
{'origin_loss': 7.264358997344971, 'mask_loss': 0.08917236328125, 'mask_rate': 0.298583984375, 'epoch': 0.0}
{'origin_loss': 7.3085174560546875, 'mask_loss': 0.0924072265625, 'mask_rate': 0.303955078125, 'epoch': 0.0}
{'origin_loss': 7.50482177734375, 'mask_loss': 0.089599609375, 'mask_rate': 0.29931640625, 'epoch': 0.0}
{'origin_loss': 7.037359237670898, 'mask_loss': 0.09222412109375, 'mask_rate': 0.3037109375, 'epoch': 0.0}
{'origin_loss': 7.33189058303833, 'mask_loss': 0.09075927734375, 'mask_rate': 0.30126953125, 'epoch': 0.0}
{'loss': 117.887, 'grad_norm': 40.702091217041016, 'learning_rate': 9.940000000000001e-06, 'epoch': 0.0}
{'origin_loss': 6.293299674987793, 'mask_loss': 0.10369873046875, 'mask_rate': 0.322021484375, 'epoch': 0.0}
{'origin_loss': 6.43393611907959, 'mask_loss': 0.10400390625, 'mask_rate': 0.322509765625, 'epoch': 0.0}
{'origin_loss': 6.990511417388916, 'mask_loss': 0.1046142578125, 'mask_rate': 0.323486328125, 'epoch': 0.0}
{'origin_loss': 5.93210506439209, 'mask_loss': 0.1044921875, 'mask_rate': 0.3232421875, 'epoch': 0.0}
{'origin_loss': 5.918284893035889, 'mask_loss': 0.1038818359375, 'mask_rate': 0.322265625, 'epoch': 0.0}
{'origin_loss': 5.937532424926758, 'mask_loss': 0.1038818359375, 'mask_rate': 0.322265625, 'epoch': 0.0}
{'origin_loss': 5.7519917488098145, 'mask_loss': 0.10308837890625, 'mask_rate': 0.321044921875, 'epoch': 0.0}
{'origin_loss': 6.0542097091674805, 'mask_loss': 0.10418701171875, 'mask_rate': 0.32275390625, 'epoch': 0.0}
{'loss': 132.6093, 'grad_norm': 35.76948165893555, 'learning_rate': 9.920000000000002e-06, 'epoch': 0.0}
{'origin_loss': 5.38314962387085, 'mask_loss': 0.082275390625, 'mask_rate': 0.286865234375, 'epoch': 0.0}
{'origin_loss': 5.496407985687256, 'mask_loss': 0.0821533203125, 'mask_rate': 0.28662109375, 'epoch': 0.0}
{'origin_loss': 5.262571334838867, 'mask_loss': 0.08258056640625, 'mask_rate': 0.287353515625, 'epoch': 0.0}
{'origin_loss': 5.10493803024292, 'mask_loss': 0.08367919921875, 'mask_rate': 0.289306640625, 'epoch': 0.0}
{'origin_loss': 5.384873390197754, 'mask_loss': 0.081298828125, 'mask_rate': 0.28515625, 'epoch': 0.0}
{'origin_loss': 5.425041198730469, 'mask_loss': 0.08258056640625, 'mask_rate': 0.287353515625, 'epoch': 0.0}
{'origin_loss': 5.340984344482422, 'mask_loss': 0.08453369140625, 'mask_rate': 0.290771484375, 'epoch': 0.0}
{'origin_loss': 5.42966365814209, 'mask_loss': 0.08148193359375, 'mask_rate': 0.285400390625, 'epoch': 0.0}
{'loss': 105.7675, 'grad_norm': 15.62586498260498, 'learning_rate': 9.9e-06, 'epoch': 0.0}
{'origin_loss': 6.949411869049072, 'mask_loss': 0.09033203125, 'mask_rate': 0.300537109375, 'epoch': 0.0}
{'origin_loss': 5.431781768798828, 'mask_loss': 0.08941650390625, 'mask_rate': 0.299072265625, 'epoch': 0.0}
{'origin_loss': 5.7703776359558105, 'mask_loss': 0.08941650390625, 'mask_rate': 0.299072265625, 'epoch': 0.0}
{'origin_loss': 6.330740451812744, 'mask_loss': 0.09014892578125, 'mask_rate': 0.30029296875, 'epoch': 0.0}
{'origin_loss': 5.656033992767334, 'mask_loss': 0.0889892578125, 'mask_rate': 0.29833984375, 'epoch': 0.0}
{'origin_loss': 4.453754901885986, 'mask_loss': 0.089599609375, 'mask_rate': 0.29931640625, 'epoch': 0.0}
{'origin_loss': 6.461913585662842, 'mask_loss': 0.09271240234375, 'mask_rate': 0.304443359375, 'epoch': 0.0}
{'origin_loss': 5.97172737121582, 'mask_loss': 0.08990478515625, 'mask_rate': 0.2998046875, 'epoch': 0.0}
{'loss': 115.4017, 'grad_norm': 46.978607177734375, 'learning_rate': 9.88e-06, 'epoch': 0.0}
{'origin_loss': 4.793948650360107, 'mask_loss': 0.09429931640625, 'mask_rate': 0.30712890625, 'epoch': 0.0}
{'origin_loss': 4.960231781005859, 'mask_loss': 0.09375, 'mask_rate': 0.30615234375, 'epoch': 0.0}
{'origin_loss': 4.715784072875977, 'mask_loss': 0.0938720703125, 'mask_rate': 0.306396484375, 'epoch': 0.0}
{'origin_loss': 4.874630451202393, 'mask_loss': 0.09344482421875, 'mask_rate': 0.3056640625, 'epoch': 0.0}
{'origin_loss': 4.411458969116211, 'mask_loss': 0.0938720703125, 'mask_rate': 0.306396484375, 'epoch': 0.0}
{'origin_loss': 4.818345546722412, 'mask_loss': 0.09417724609375, 'mask_rate': 0.306884765625, 'epoch': 0.0}
{'origin_loss': 4.557765007019043, 'mask_loss': 0.09490966796875, 'mask_rate': 0.30810546875, 'epoch': 0.0}
{'origin_loss': 4.754171371459961, 'mask_loss': 0.0946044921875, 'mask_rate': 0.3076171875, 'epoch': 0.0}
{'loss': 119.1811, 'grad_norm': 17.317564010620117, 'learning_rate': 9.86e-06, 'epoch': 0.0}
{'origin_loss': 4.116775035858154, 'mask_loss': 0.09906005859375, 'mask_rate': 0.314697265625, 'epoch': 0.0}
{'origin_loss': 4.203145503997803, 'mask_loss': 0.0987548828125, 'mask_rate': 0.314208984375, 'epoch': 0.0}
{'origin_loss': 4.317051887512207, 'mask_loss': 0.0982666015625, 'mask_rate': 0.3134765625, 'epoch': 0.0}
{'origin_loss': 4.297688961029053, 'mask_loss': 0.0982666015625, 'mask_rate': 0.3134765625, 'epoch': 0.0}
{'origin_loss': 4.154145240783691, 'mask_loss': 0.09918212890625, 'mask_rate': 0.31494140625, 'epoch': 0.0}
{'origin_loss': 4.282235622406006, 'mask_loss': 0.0987548828125, 'mask_rate': 0.314208984375, 'epoch': 0.0}
{'origin_loss': 4.207690238952637, 'mask_loss': 0.10009765625, 'mask_rate': 0.31640625, 'epoch': 0.0}
{'origin_loss': 4.207401275634766, 'mask_loss': 0.09918212890625, 'mask_rate': 0.31494140625, 'epoch': 0.0}
{'loss': 124.5436, 'grad_norm': 15.374427795410156, 'learning_rate': 9.84e-06, 'epoch': 0.0}
{'origin_loss': 3.814931869506836, 'mask_loss': 0.11285400390625, 'mask_rate': 0.3359375, 'epoch': 0.0}
{'origin_loss': 3.6116139888763428, 'mask_loss': 0.114013671875, 'mask_rate': 0.337646484375, 'epoch': 0.0}
{'origin_loss': 3.85394287109375, 'mask_loss': 0.110107421875, 'mask_rate': 0.331787109375, 'epoch': 0.0}
{'origin_loss': 3.9354681968688965, 'mask_loss': 0.109619140625, 'mask_rate': 0.3310546875, 'epoch': 0.0}
{'origin_loss': 3.868537425994873, 'mask_loss': 0.109619140625, 'mask_rate': 0.3310546875, 'epoch': 0.0}
{'origin_loss': 3.9116299152374268, 'mask_loss': 0.112060546875, 'mask_rate': 0.334716796875, 'epoch': 0.0}
{'origin_loss': 3.731940746307373, 'mask_loss': 0.1136474609375, 'mask_rate': 0.337158203125, 'epoch': 0.0}
{'origin_loss': 3.7876198291778564, 'mask_loss': 0.1126708984375, 'mask_rate': 0.335693359375, 'epoch': 0.0}
{'loss': 139.7832, 'grad_norm': 16.045381546020508, 'learning_rate': 9.820000000000001e-06, 'epoch': 0.0}
{'origin_loss': 3.336249589920044, 'mask_loss': 0.12255859375, 'mask_rate': 0.35009765625, 'epoch': 0.0}
{'origin_loss': 3.206491470336914, 'mask_loss': 0.12188720703125, 'mask_rate': 0.34912109375, 'epoch': 0.0}
{'origin_loss': 3.210050106048584, 'mask_loss': 0.125, 'mask_rate': 0.353515625, 'epoch': 0.0}
{'origin_loss': 3.192369222640991, 'mask_loss': 0.12237548828125, 'mask_rate': 0.349853515625, 'epoch': 0.0}
{'origin_loss': 2.9892778396606445, 'mask_loss': 0.12548828125, 'mask_rate': 0.354248046875, 'epoch': 0.0}
{'origin_loss': 3.3853678703308105, 'mask_loss': 0.12310791015625, 'mask_rate': 0.350830078125, 'epoch': 0.0}
{'origin_loss': 3.418668031692505, 'mask_loss': 0.12396240234375, 'mask_rate': 0.35205078125, 'epoch': 0.0}
{'origin_loss': 3.2318124771118164, 'mask_loss': 0.12481689453125, 'mask_rate': 0.353271484375, 'epoch': 0.0}
{'loss': 153.6057, 'grad_norm': 25.099056243896484, 'learning_rate': 9.800000000000001e-06, 'epoch': 0.0}
{'origin_loss': 2.745582342147827, 'mask_loss': 0.1365966796875, 'mask_rate': 0.36962890625, 'epoch': 0.0}
{'origin_loss': 2.957270622253418, 'mask_loss': 0.1351318359375, 'mask_rate': 0.36767578125, 'epoch': 0.0}
{'origin_loss': 2.979255437850952, 'mask_loss': 0.135009765625, 'mask_rate': 0.367431640625, 'epoch': 0.0}
{'origin_loss': 3.048069477081299, 'mask_loss': 0.135498046875, 'mask_rate': 0.3681640625, 'epoch': 0.0}
{'origin_loss': 2.86751127243042, 'mask_loss': 0.134765625, 'mask_rate': 0.3671875, 'epoch': 0.0}
{'origin_loss': 3.0373928546905518, 'mask_loss': 0.134765625, 'mask_rate': 0.3671875, 'epoch': 0.0}
{'origin_loss': 2.7982470989227295, 'mask_loss': 0.135498046875, 'mask_rate': 0.3681640625, 'epoch': 0.0}
{'origin_loss': 2.737678050994873, 'mask_loss': 0.136962890625, 'mask_rate': 0.3701171875, 'epoch': 0.0}
{'loss': 167.6933, 'grad_norm': 15.155527114868164, 'learning_rate': 9.780000000000001e-06, 'epoch': 0.0}
{'origin_loss': 2.7270116806030273, 'mask_loss': 0.1463623046875, 'mask_rate': 0.382568359375, 'epoch': 0.0}
{'origin_loss': 2.7915451526641846, 'mask_loss': 0.1453857421875, 'mask_rate': 0.38134765625, 'epoch': 0.0}
{'origin_loss': 2.7690887451171875, 'mask_loss': 0.145751953125, 'mask_rate': 0.3818359375, 'epoch': 0.0}
{'origin_loss': 2.6874446868896484, 'mask_loss': 0.1439208984375, 'mask_rate': 0.37939453125, 'epoch': 0.0}
{'origin_loss': 2.6528940200805664, 'mask_loss': 0.1439208984375, 'mask_rate': 0.37939453125, 'epoch': 0.0}
{'origin_loss': 2.866830825805664, 'mask_loss': 0.1427001953125, 'mask_rate': 0.377685546875, 'epoch': 0.0}
{'origin_loss': 2.5882108211517334, 'mask_loss': 0.1446533203125, 'mask_rate': 0.38037109375, 'epoch': 0.0}
{'origin_loss': 2.7640182971954346, 'mask_loss': 0.1456298828125, 'mask_rate': 0.381591796875, 'epoch': 0.0}
{'loss': 178.7934, 'grad_norm': 5.839208602905273, 'learning_rate': 9.760000000000001e-06, 'epoch': 0.01}
{'origin_loss': 2.6537914276123047, 'mask_loss': 0.1495361328125, 'mask_rate': 0.38671875, 'epoch': 0.01}
{'origin_loss': 2.620467185974121, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.01}
{'origin_loss': 2.7361338138580322, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.01}
{'origin_loss': 2.4302871227264404, 'mask_loss': 0.151123046875, 'mask_rate': 0.388671875, 'epoch': 0.01}
{'origin_loss': 2.451557159423828, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.01}
{'origin_loss': 2.467623472213745, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.01}
{'origin_loss': 2.3483166694641113, 'mask_loss': 0.1522216796875, 'mask_rate': 0.39013671875, 'epoch': 0.01}
{'origin_loss': 2.5114283561706543, 'mask_loss': 0.15185546875, 'mask_rate': 0.3896484375, 'epoch': 0.01}
{'loss': 189.0431, 'grad_norm': 3.5794432163238525, 'learning_rate': 9.74e-06, 'epoch': 0.01}
{'origin_loss': 2.3092713356018066, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.01}
{'origin_loss': 2.5095651149749756, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.01}
{'origin_loss': 2.4884119033813477, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.01}
{'origin_loss': 2.4899842739105225, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.01}
{'origin_loss': 2.43167781829834, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.01}
{'origin_loss': 2.472752809524536, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.01}
{'origin_loss': 2.585815191268921, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.01}
{'origin_loss': 2.5069549083709717, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.01}
{'loss': 196.1774, 'grad_norm': 2.202566623687744, 'learning_rate': 9.72e-06, 'epoch': 0.01}
{'origin_loss': 2.7457754611968994, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.01}
{'origin_loss': 2.4408626556396484, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.01}
{'origin_loss': 2.417571783065796, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.01}
{'origin_loss': 2.4418437480926514, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.01}
{'origin_loss': 2.504462957382202, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.01}
{'origin_loss': 2.3123295307159424, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.01}
{'origin_loss': 2.5075268745422363, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.01}
{'origin_loss': 2.311824321746826, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.01}
{'loss': 203.5228, 'grad_norm': 5.427739143371582, 'learning_rate': 9.7e-06, 'epoch': 0.01}
{'origin_loss': 2.2457125186920166, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.01}
{'origin_loss': 2.3676846027374268, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.01}
{'origin_loss': 2.5362164974212646, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.01}
{'origin_loss': 2.283155679702759, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.01}
{'origin_loss': 2.354818344116211, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.01}
{'origin_loss': 2.2811896800994873, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.01}
{'origin_loss': 2.117638349533081, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.01}
{'origin_loss': 2.3531339168548584, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.01}
{'loss': 207.505, 'grad_norm': 1.3898520469665527, 'learning_rate': 9.68e-06, 'epoch': 0.01}
{'origin_loss': 2.355928421020508, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.01}
{'origin_loss': 2.325047731399536, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.01}
{'origin_loss': 2.443337917327881, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.01}
{'origin_loss': 2.178351879119873, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.01}
{'origin_loss': 2.4502902030944824, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.01}
{'origin_loss': 2.249969005584717, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.01}
{'origin_loss': 2.197582244873047, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.01}
{'origin_loss': 2.287151575088501, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.01}
{'loss': 212.4047, 'grad_norm': 0.9891669154167175, 'learning_rate': 9.66e-06, 'epoch': 0.01}
{'origin_loss': 2.4074532985687256, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.01}
{'origin_loss': 2.1532559394836426, 'mask_loss': 0.1773681640625, 'mask_rate': 0.421142578125, 'epoch': 0.01}
{'origin_loss': 2.163281202316284, 'mask_loss': 0.177001953125, 'mask_rate': 0.420654296875, 'epoch': 0.01}
{'origin_loss': 2.456791639328003, 'mask_loss': 0.1796875, 'mask_rate': 0.423828125, 'epoch': 0.01}
{'origin_loss': 2.2799758911132812, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.01}
{'origin_loss': 2.060213565826416, 'mask_loss': 0.1785888671875, 'mask_rate': 0.422607421875, 'epoch': 0.01}
{'origin_loss': 2.3835082054138184, 'mask_loss': 0.1810302734375, 'mask_rate': 0.425537109375, 'epoch': 0.01}
{'origin_loss': 2.3512492179870605, 'mask_loss': 0.1788330078125, 'mask_rate': 0.4228515625, 'epoch': 0.01}
{'loss': 218.8601, 'grad_norm': 220.08132934570312, 'learning_rate': 9.640000000000001e-06, 'epoch': 0.01}
{'origin_loss': 2.4434869289398193, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.01}
{'origin_loss': 2.3382408618927, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.01}
{'origin_loss': 2.197619915008545, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.01}
{'origin_loss': 2.409017562866211, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.01}
{'origin_loss': 2.0190281867980957, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.01}
{'origin_loss': 2.3717825412750244, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.01}
{'origin_loss': 2.2745511531829834, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.01}
{'origin_loss': 2.3204758167266846, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.01}
{'loss': 205.7968, 'grad_norm': 1.5955392122268677, 'learning_rate': 9.620000000000001e-06, 'epoch': 0.01}
{'origin_loss': 1.9981709718704224, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.01}
{'origin_loss': 2.2653744220733643, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.01}
{'origin_loss': 2.238309383392334, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.01}
{'origin_loss': 2.234999179840088, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.01}
{'origin_loss': 2.444540023803711, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.01}
{'origin_loss': 2.3302788734436035, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.01}
{'origin_loss': 2.3276214599609375, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.01}
{'origin_loss': 2.2706427574157715, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.01}
{'loss': 197.5294, 'grad_norm': 0.7604877948760986, 'learning_rate': 9.600000000000001e-06, 'epoch': 0.01}
{'origin_loss': 2.2787249088287354, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.01}
{'origin_loss': 2.498650550842285, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.01}
{'origin_loss': 2.2495245933532715, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.01}
{'origin_loss': 2.2686996459960938, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.01}
{'origin_loss': 2.3182270526885986, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.01}
{'origin_loss': 2.282231092453003, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.01}
{'origin_loss': 2.1627655029296875, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.01}
{'origin_loss': 2.109489679336548, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.01}
{'loss': 192.0835, 'grad_norm': 0.8449940085411072, 'learning_rate': 9.58e-06, 'epoch': 0.01}
{'origin_loss': 2.141974449157715, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.01}
{'origin_loss': 2.136730432510376, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.01}
{'origin_loss': 1.9777580499649048, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.01}
{'origin_loss': 2.3262929916381836, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.01}
{'origin_loss': 2.2510881423950195, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.01}
{'origin_loss': 2.0279877185821533, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.01}
{'origin_loss': 2.0448014736175537, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.01}
{'origin_loss': 1.9753613471984863, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.01}
{'loss': 190.2977, 'grad_norm': 0.7659944891929626, 'learning_rate': 9.56e-06, 'epoch': 0.01}
{'origin_loss': 2.299612283706665, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.01}
{'origin_loss': 2.05326509475708, 'mask_loss': 0.15087890625, 'mask_rate': 0.388427734375, 'epoch': 0.01}
{'origin_loss': 2.1886403560638428, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.01}
{'origin_loss': 2.259615659713745, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.01}
{'origin_loss': 2.1293671131134033, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.01}
{'origin_loss': 2.1997897624969482, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.01}
{'origin_loss': 2.1878795623779297, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.01}
{'origin_loss': 2.252408981323242, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.01}
{'loss': 188.7588, 'grad_norm': 0.68985515832901, 'learning_rate': 9.54e-06, 'epoch': 0.01}
{'origin_loss': 2.4694619178771973, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.01}
{'origin_loss': 2.220867395401001, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.01}
{'origin_loss': 2.259657859802246, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.01}
{'origin_loss': 2.1996817588806152, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.01}
{'origin_loss': 2.2629101276397705, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.01}
{'origin_loss': 2.888019323348999, 'mask_loss': 0.1485595703125, 'mask_rate': 0.385498046875, 'epoch': 0.01}
{'origin_loss': 2.3159220218658447, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.01}
{'origin_loss': 2.1714813709259033, 'mask_loss': 0.1512451171875, 'mask_rate': 0.388916015625, 'epoch': 0.01}
{'loss': 188.3329, 'grad_norm': 3.936243772506714, 'learning_rate': 9.52e-06, 'epoch': 0.01}
{'origin_loss': 2.401538848876953, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.01}
{'origin_loss': 2.1860246658325195, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.01}
{'origin_loss': 1.7189772129058838, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.01}
{'origin_loss': 2.1993706226348877, 'mask_loss': 0.15234375, 'mask_rate': 0.390380859375, 'epoch': 0.01}
{'origin_loss': 2.110426425933838, 'mask_loss': 0.1505126953125, 'mask_rate': 0.387939453125, 'epoch': 0.01}
{'origin_loss': 2.376067638397217, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.01}
{'origin_loss': 2.1535534858703613, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.01}
{'origin_loss': 2.2223901748657227, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.01}
{'loss': 188.7023, 'grad_norm': 1.509242057800293, 'learning_rate': 9.5e-06, 'epoch': 0.01}
{'origin_loss': 2.0979673862457275, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.01}
{'origin_loss': 2.187633752822876, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.01}
{'origin_loss': 2.1667544841766357, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.01}
{'origin_loss': 2.308316469192505, 'mask_loss': 0.1522216796875, 'mask_rate': 0.39013671875, 'epoch': 0.01}
{'origin_loss': 2.411039352416992, 'mask_loss': 0.150634765625, 'mask_rate': 0.38818359375, 'epoch': 0.01}
{'origin_loss': 2.1793055534362793, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.01}
{'origin_loss': 2.1608142852783203, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.01}
{'origin_loss': 2.0357666015625, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.01}
{'loss': 188.7716, 'grad_norm': 1.4972496032714844, 'learning_rate': 9.48e-06, 'epoch': 0.01}
{'origin_loss': 2.1144039630889893, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.01}
{'origin_loss': 2.2742369174957275, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.01}
{'origin_loss': 2.1152212619781494, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.01}
{'origin_loss': 2.368225336074829, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.01}
{'origin_loss': 2.1573798656463623, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.01}
{'origin_loss': 2.235422134399414, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.01}
{'origin_loss': 2.0724825859069824, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.01}
{'origin_loss': 2.2219908237457275, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.01}
{'loss': 190.2262, 'grad_norm': 3.8703107833862305, 'learning_rate': 9.460000000000001e-06, 'epoch': 0.01}
{'origin_loss': 2.0739119052886963, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.01}
{'origin_loss': 2.2548913955688477, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.01}
{'origin_loss': 2.322861909866333, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.01}
{'origin_loss': 2.3094351291656494, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.01}
{'origin_loss': 1.9263083934783936, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.01}
{'origin_loss': 2.253753662109375, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.01}
{'origin_loss': 2.3098621368408203, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.01}
{'origin_loss': 2.204197645187378, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.01}
{'loss': 191.2538, 'grad_norm': 4.158788204193115, 'learning_rate': 9.440000000000001e-06, 'epoch': 0.01}
{'origin_loss': 2.1087629795074463, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.01}
{'origin_loss': 2.282409906387329, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.01}
{'origin_loss': 2.3978686332702637, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.01}
{'origin_loss': 2.286386013031006, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.01}
{'origin_loss': 2.2834248542785645, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.01}
{'origin_loss': 2.1208102703094482, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.01}
{'origin_loss': 2.1507411003112793, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.01}
{'origin_loss': 2.184838056564331, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.01}
{'loss': 191.68, 'grad_norm': 0.7880619168281555, 'learning_rate': 9.42e-06, 'epoch': 0.01}
{'origin_loss': 2.041498899459839, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.01}
{'origin_loss': 2.1146066188812256, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.01}
{'origin_loss': 2.189033031463623, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.01}
{'origin_loss': 2.1851422786712646, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.01}
{'origin_loss': 2.110813856124878, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.01}
{'origin_loss': 2.2313244342803955, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.01}
{'origin_loss': 2.055567741394043, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.01}
{'origin_loss': 2.2098560333251953, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.01}
{'loss': 193.6266, 'grad_norm': 0.5986951589584351, 'learning_rate': 9.4e-06, 'epoch': 0.01}
{'origin_loss': 2.102557897567749, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.01}
{'origin_loss': 2.092967987060547, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.01}
{'origin_loss': 2.306823253631592, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.01}
{'origin_loss': 2.233640670776367, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.01}
{'origin_loss': 2.156419515609741, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.01}
{'origin_loss': 2.282433271408081, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.01}
{'origin_loss': 2.213149309158325, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.01}
{'origin_loss': 2.107656478881836, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.01}
{'loss': 193.4995, 'grad_norm': 0.5356096625328064, 'learning_rate': 9.38e-06, 'epoch': 0.01}
{'origin_loss': 2.0790135860443115, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.01}
{'origin_loss': 2.059159994125366, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.01}
{'origin_loss': 1.9176442623138428, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.01}
{'origin_loss': 2.214803695678711, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.01}
{'origin_loss': 1.7605844736099243, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.01}
{'origin_loss': 2.1044774055480957, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.01}
{'origin_loss': 2.1034231185913086, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.01}
{'origin_loss': 2.1607208251953125, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.01}
{'loss': 196.3, 'grad_norm': 0.77360600233078, 'learning_rate': 9.360000000000002e-06, 'epoch': 0.01}
{'origin_loss': 2.255275249481201, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.01}
{'origin_loss': 2.345715284347534, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.01}
{'origin_loss': 1.998439073562622, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.01}
{'origin_loss': 2.049628973007202, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.01}
{'origin_loss': 2.1292641162872314, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.01}
{'origin_loss': 2.2740516662597656, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.01}
{'origin_loss': 2.3863532543182373, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.01}
{'origin_loss': 2.2076919078826904, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.01}
{'loss': 197.3621, 'grad_norm': 0.58893221616745, 'learning_rate': 9.340000000000002e-06, 'epoch': 0.01}
{'origin_loss': 2.1678366661071777, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.01}
{'origin_loss': 2.212667465209961, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.01}
{'origin_loss': 2.1077723503112793, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.01}
{'origin_loss': 2.1226465702056885, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.01}
{'origin_loss': 1.9999682903289795, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.01}
{'origin_loss': 2.1327013969421387, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.01}
{'origin_loss': 2.502007246017456, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.01}
{'origin_loss': 2.244950771331787, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.01}
{'loss': 199.1863, 'grad_norm': 0.6420148611068726, 'learning_rate': 9.32e-06, 'epoch': 0.01}
{'origin_loss': 2.0854287147521973, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.01}
{'origin_loss': 2.2097549438476562, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.01}
{'origin_loss': 2.088646411895752, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.01}
{'origin_loss': 2.352327823638916, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.01}
{'origin_loss': 2.06740403175354, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.01}
{'origin_loss': 2.124990224838257, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.01}
{'origin_loss': 2.1610586643218994, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.01}
{'origin_loss': 2.1761817932128906, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.01}
{'loss': 200.5332, 'grad_norm': 0.5458565354347229, 'learning_rate': 9.3e-06, 'epoch': 0.02}
{'origin_loss': 2.0950865745544434, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.02}
{'origin_loss': 2.127450466156006, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.02}
{'origin_loss': 2.0818710327148438, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.02}
{'origin_loss': 1.7444924116134644, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.02}
{'origin_loss': 1.8742700815200806, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.02}
{'origin_loss': 2.0658318996429443, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.02}
{'origin_loss': 2.0254008769989014, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.02}
{'origin_loss': 2.2878262996673584, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.02}
{'loss': 202.7097, 'grad_norm': 0.5335988402366638, 'learning_rate': 9.280000000000001e-06, 'epoch': 0.02}
{'origin_loss': 2.429405927658081, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.02}
{'origin_loss': 2.1938719749450684, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.02}
{'origin_loss': 2.1031954288482666, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.02}
{'origin_loss': 1.9370577335357666, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.02}
{'origin_loss': 2.1022679805755615, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.02}
{'origin_loss': 2.1299662590026855, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.02}
{'origin_loss': 2.1078896522521973, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.02}
{'origin_loss': 2.1701316833496094, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.02}
{'loss': 206.053, 'grad_norm': 0.5667902231216431, 'learning_rate': 9.260000000000001e-06, 'epoch': 0.02}
{'origin_loss': 1.9844279289245605, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.02}
{'origin_loss': 2.230875015258789, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.02}
{'origin_loss': 2.125189781188965, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.02}
{'origin_loss': 2.098132371902466, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.02}
{'origin_loss': 2.083224058151245, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.02}
{'origin_loss': 2.0290255546569824, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.02}
{'origin_loss': 2.203105926513672, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.02}
{'origin_loss': 2.0886595249176025, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.02}
{'loss': 209.0897, 'grad_norm': 0.6494855880737305, 'learning_rate': 9.240000000000001e-06, 'epoch': 0.02}
{'origin_loss': 1.9031784534454346, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.02}
{'origin_loss': 2.2142467498779297, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.02}
{'origin_loss': 2.0459706783294678, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.02}
{'origin_loss': 2.088820695877075, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.02}
{'origin_loss': 2.279744863510132, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.02}
{'origin_loss': 2.103055000305176, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.02}
{'origin_loss': 2.1107964515686035, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.02}
{'origin_loss': 2.195230007171631, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.02}
{'loss': 211.5864, 'grad_norm': 0.5627281069755554, 'learning_rate': 9.220000000000002e-06, 'epoch': 0.02}
{'origin_loss': 2.2915396690368652, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.02}
{'origin_loss': 2.1377291679382324, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.02}
{'origin_loss': 2.4453847408294678, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.02}
{'origin_loss': 1.8472599983215332, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.02}
{'origin_loss': 2.1467370986938477, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.02}
{'origin_loss': 1.8685219287872314, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.02}
{'origin_loss': 2.168107509613037, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.02}
{'origin_loss': 2.2165141105651855, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.02}
{'loss': 213.109, 'grad_norm': 0.5118547677993774, 'learning_rate': 9.200000000000002e-06, 'epoch': 0.02}
{'origin_loss': 2.1388866901397705, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.02}
{'origin_loss': 2.0664288997650146, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.02}
{'origin_loss': 1.9789310693740845, 'mask_loss': 0.177978515625, 'mask_rate': 0.421875, 'epoch': 0.02}
{'origin_loss': 2.105377197265625, 'mask_loss': 0.177978515625, 'mask_rate': 0.421875, 'epoch': 0.02}
{'origin_loss': 2.1899240016937256, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.02}
{'origin_loss': 2.099853754043579, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.02}
{'origin_loss': 2.118734836578369, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.02}
{'origin_loss': 2.2926993370056152, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.02}
{'loss': 215.8895, 'grad_norm': 0.5233612060546875, 'learning_rate': 9.180000000000002e-06, 'epoch': 0.02}
{'origin_loss': 2.3061206340789795, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.02}
{'origin_loss': 2.118079900741577, 'mask_loss': 0.178955078125, 'mask_rate': 0.423095703125, 'epoch': 0.02}
{'origin_loss': 1.9691376686096191, 'mask_loss': 0.1771240234375, 'mask_rate': 0.4208984375, 'epoch': 0.02}
{'origin_loss': 2.0597124099731445, 'mask_loss': 0.1776123046875, 'mask_rate': 0.42138671875, 'epoch': 0.02}
{'origin_loss': 2.1684391498565674, 'mask_loss': 0.17919921875, 'mask_rate': 0.42333984375, 'epoch': 0.02}
{'origin_loss': 1.853562355041504, 'mask_loss': 0.1788330078125, 'mask_rate': 0.4228515625, 'epoch': 0.02}
{'origin_loss': 2.200838565826416, 'mask_loss': 0.17822265625, 'mask_rate': 0.422119140625, 'epoch': 0.02}
{'origin_loss': 2.0055289268493652, 'mask_loss': 0.177001953125, 'mask_rate': 0.420654296875, 'epoch': 0.02}
{'loss': 218.4602, 'grad_norm': 0.850995659828186, 'learning_rate': 9.16e-06, 'epoch': 0.02}
{'origin_loss': 1.9001476764678955, 'mask_loss': 0.1783447265625, 'mask_rate': 0.42236328125, 'epoch': 0.02}
{'origin_loss': 2.0209455490112305, 'mask_loss': 0.183349609375, 'mask_rate': 0.42822265625, 'epoch': 0.02}
{'origin_loss': 1.936936378479004, 'mask_loss': 0.1798095703125, 'mask_rate': 0.424072265625, 'epoch': 0.02}
{'origin_loss': 2.1556100845336914, 'mask_loss': 0.178955078125, 'mask_rate': 0.423095703125, 'epoch': 0.02}
{'origin_loss': 1.915722131729126, 'mask_loss': 0.1800537109375, 'mask_rate': 0.42431640625, 'epoch': 0.02}
{'origin_loss': 2.2724759578704834, 'mask_loss': 0.1800537109375, 'mask_rate': 0.42431640625, 'epoch': 0.02}
{'origin_loss': 2.064570188522339, 'mask_loss': 0.1800537109375, 'mask_rate': 0.42431640625, 'epoch': 0.02}
{'origin_loss': 2.174738645553589, 'mask_loss': 0.177734375, 'mask_rate': 0.421630859375, 'epoch': 0.02}
{'loss': 220.7114, 'grad_norm': 339.19281005859375, 'learning_rate': 9.14e-06, 'epoch': 0.02}
{'origin_loss': 2.24745512008667, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.02}
{'origin_loss': 1.9806537628173828, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.02}
{'origin_loss': 2.117900848388672, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.02}
{'origin_loss': 2.2662277221679688, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.02}
{'origin_loss': 2.286118507385254, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.02}
{'origin_loss': 2.0368738174438477, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.02}
{'origin_loss': 2.0456066131591797, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.02}
{'origin_loss': 2.3246536254882812, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.02}
{'loss': 213.5851, 'grad_norm': 0.7609156370162964, 'learning_rate': 9.12e-06, 'epoch': 0.02}
{'origin_loss': 2.2572824954986572, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.02}
{'origin_loss': 2.1055238246917725, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.02}
{'origin_loss': 2.0277163982391357, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.02}
{'origin_loss': 1.9856224060058594, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.02}
{'origin_loss': 2.1822879314422607, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.02}
{'origin_loss': 2.099759817123413, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.02}
{'origin_loss': 2.090116500854492, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.02}
{'origin_loss': 2.108652114868164, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.02}
{'loss': 207.2009, 'grad_norm': 0.4146360456943512, 'learning_rate': 9.100000000000001e-06, 'epoch': 0.02}
{'origin_loss': 1.8654661178588867, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.02}
{'origin_loss': 1.803884744644165, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.02}
{'origin_loss': 1.9490000009536743, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.02}
{'origin_loss': 2.2160897254943848, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.02}
{'origin_loss': 2.222402334213257, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.02}
{'origin_loss': 1.9772405624389648, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.02}
{'origin_loss': 2.2640976905822754, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.02}
{'origin_loss': 2.0530920028686523, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.02}
{'loss': 201.2471, 'grad_norm': 0.4722166657447815, 'learning_rate': 9.080000000000001e-06, 'epoch': 0.02}
{'origin_loss': 2.1627213954925537, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.02}
{'origin_loss': 2.174375295639038, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.02}
{'origin_loss': 2.1517155170440674, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.02}
{'origin_loss': 2.021068811416626, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.02}
{'origin_loss': 1.8476120233535767, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.02}
{'origin_loss': 2.0936245918273926, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.02}
{'origin_loss': 2.139106273651123, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.02}
{'origin_loss': 2.122920513153076, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.02}
{'loss': 200.4017, 'grad_norm': 0.4525810480117798, 'learning_rate': 9.060000000000001e-06, 'epoch': 0.02}
{'origin_loss': 2.1198997497558594, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.02}
{'origin_loss': 2.0906052589416504, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.02}
{'origin_loss': 2.0558369159698486, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.02}
{'origin_loss': 1.9870498180389404, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.02}
{'origin_loss': 1.9650993347167969, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.02}
{'origin_loss': 2.2116291522979736, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.02}
{'origin_loss': 2.181650161743164, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.02}
{'origin_loss': 2.0315184593200684, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.02}
{'loss': 196.6898, 'grad_norm': 0.6707261204719543, 'learning_rate': 9.040000000000002e-06, 'epoch': 0.02}
{'origin_loss': 2.0678844451904297, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.02}
{'origin_loss': 1.8568854331970215, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.02}
{'origin_loss': 2.0128962993621826, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.02}
{'origin_loss': 2.2626547813415527, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.02}
{'origin_loss': 1.9670755863189697, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.02}
{'origin_loss': 1.9672887325286865, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.02}
{'origin_loss': 2.1881256103515625, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.02}
{'origin_loss': 2.2610833644866943, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.02}
{'loss': 195.9167, 'grad_norm': 0.4382028877735138, 'learning_rate': 9.020000000000002e-06, 'epoch': 0.02}
{'origin_loss': 2.1443824768066406, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.02}
{'origin_loss': 2.107670307159424, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.02}
{'origin_loss': 2.102701425552368, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.02}
{'origin_loss': 2.2929961681365967, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.02}
{'origin_loss': 2.1566221714019775, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.02}
{'origin_loss': 2.115541458129883, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.02}
{'origin_loss': 2.122075080871582, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.02}
{'origin_loss': 2.0345091819763184, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.02}
{'loss': 194.4158, 'grad_norm': 0.5048220753669739, 'learning_rate': 9e-06, 'epoch': 0.02}
{'origin_loss': 2.114060163497925, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.02}
{'origin_loss': 1.9728827476501465, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.02}
{'origin_loss': 1.9807391166687012, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.02}
{'origin_loss': 2.152493476867676, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.02}
{'origin_loss': 2.0415873527526855, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.02}
{'origin_loss': 2.076831817626953, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.02}
{'origin_loss': 2.0794522762298584, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.02}
{'origin_loss': 2.0697638988494873, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.02}
{'loss': 193.2016, 'grad_norm': 0.48934313654899597, 'learning_rate': 8.98e-06, 'epoch': 0.02}
{'origin_loss': 2.0934085845947266, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.02}
{'origin_loss': 1.966902256011963, 'mask_loss': 0.15234375, 'mask_rate': 0.390380859375, 'epoch': 0.02}
{'origin_loss': 2.218205451965332, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.02}
{'origin_loss': 2.0748589038848877, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.02}
{'origin_loss': 2.322169780731201, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.02}
{'origin_loss': 1.918288230895996, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.02}
{'origin_loss': 1.938279151916504, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.02}
{'origin_loss': 2.0946342945098877, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.02}
{'loss': 190.9377, 'grad_norm': 0.4870491623878479, 'learning_rate': 8.96e-06, 'epoch': 0.02}
{'origin_loss': 1.9088866710662842, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.02}
{'origin_loss': 2.0400547981262207, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.02}
{'origin_loss': 1.9540295600891113, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.02}
{'origin_loss': 2.03182315826416, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.02}
{'origin_loss': 2.2769715785980225, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.02}
{'origin_loss': 2.0391499996185303, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.02}
{'origin_loss': 2.145298719406128, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.02}
{'origin_loss': 2.3253915309906006, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.02}
{'loss': 193.4183, 'grad_norm': 0.5258029699325562, 'learning_rate': 8.94e-06, 'epoch': 0.02}
{'origin_loss': 1.9711520671844482, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.02}
{'origin_loss': 2.134620428085327, 'mask_loss': 0.1512451171875, 'mask_rate': 0.388916015625, 'epoch': 0.02}
{'origin_loss': 2.1249876022338867, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.02}
{'origin_loss': 2.1681222915649414, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.02}
{'origin_loss': 2.08616042137146, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.02}
{'origin_loss': 2.07246732711792, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.02}
{'origin_loss': 2.1602351665496826, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.02}
{'origin_loss': 2.0021142959594727, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.02}
{'loss': 192.4806, 'grad_norm': 0.8383613228797913, 'learning_rate': 8.920000000000001e-06, 'epoch': 0.02}
{'origin_loss': 2.1426126956939697, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.02}
{'origin_loss': 2.1876211166381836, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.02}
{'origin_loss': 1.9747042655944824, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.02}
{'origin_loss': 1.9151241779327393, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.02}
{'origin_loss': 2.237837553024292, 'mask_loss': 0.1519775390625, 'mask_rate': 0.389892578125, 'epoch': 0.02}
{'origin_loss': 1.8971909284591675, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.02}
{'origin_loss': 2.069044351577759, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.02}
{'origin_loss': 1.994594931602478, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.02}
{'loss': 191.9586, 'grad_norm': 0.6260800361633301, 'learning_rate': 8.900000000000001e-06, 'epoch': 0.02}
{'origin_loss': 2.1178805828094482, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.02}
{'origin_loss': 2.0651121139526367, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.02}
{'origin_loss': 2.1270718574523926, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.02}
{'origin_loss': 1.9986873865127563, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.02}
{'origin_loss': 1.9891347885131836, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.02}
{'origin_loss': 2.1173007488250732, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.02}
{'origin_loss': 2.028219699859619, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.02}
{'origin_loss': 2.289241313934326, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.02}
{'loss': 193.1072, 'grad_norm': 0.6859664916992188, 'learning_rate': 8.880000000000001e-06, 'epoch': 0.02}
{'origin_loss': 2.225287437438965, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.02}
{'origin_loss': 2.103537082672119, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.02}
{'origin_loss': 1.9745787382125854, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.02}
{'origin_loss': 2.1409385204315186, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.02}
{'origin_loss': 2.16391658782959, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.02}
{'origin_loss': 1.8899235725402832, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.02}
{'origin_loss': 2.1809582710266113, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.02}
{'origin_loss': 1.9262197017669678, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.02}
{'loss': 195.2944, 'grad_norm': 0.4740365743637085, 'learning_rate': 8.860000000000002e-06, 'epoch': 0.02}
{'origin_loss': 2.1025643348693848, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.02}
{'origin_loss': 2.08478045463562, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.02}
{'origin_loss': 1.9742522239685059, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.02}
{'origin_loss': 2.048227548599243, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.02}
{'origin_loss': 1.9896825551986694, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.02}
{'origin_loss': 2.0804147720336914, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.02}
{'origin_loss': 2.3023619651794434, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.02}
{'origin_loss': 1.911516547203064, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.02}
{'loss': 195.0617, 'grad_norm': 0.42673376202583313, 'learning_rate': 8.84e-06, 'epoch': 0.03}
{'origin_loss': 2.1407973766326904, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.03}
{'origin_loss': 2.1380560398101807, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.03}
{'origin_loss': 1.9771391153335571, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.03}
{'origin_loss': 1.8061890602111816, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.03}
{'origin_loss': 2.089198589324951, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.03}
{'origin_loss': 2.1565849781036377, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.03}
{'origin_loss': 2.242332696914673, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.03}
{'origin_loss': 2.089691162109375, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.03}
{'loss': 198.1425, 'grad_norm': 0.44596320390701294, 'learning_rate': 8.82e-06, 'epoch': 0.03}
{'origin_loss': 2.363490581512451, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.03}
{'origin_loss': 2.3479530811309814, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.03}
{'origin_loss': 2.1648499965667725, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.03}
{'origin_loss': 2.0649890899658203, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.03}
{'origin_loss': 2.2576611042022705, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.03}
{'origin_loss': 2.0142147541046143, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.03}
{'origin_loss': 1.6738065481185913, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.03}
{'origin_loss': 2.153731107711792, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.03}
{'loss': 199.8176, 'grad_norm': 0.3949662744998932, 'learning_rate': 8.8e-06, 'epoch': 0.03}
{'origin_loss': 2.0537831783294678, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.03}
{'origin_loss': 2.0292978286743164, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.03}
{'origin_loss': 2.193524122238159, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.03}
{'origin_loss': 2.0764050483703613, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.03}
{'origin_loss': 2.1410939693450928, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.03}
{'origin_loss': 2.127714157104492, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.03}
{'origin_loss': 2.4143760204315186, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.03}
{'origin_loss': 2.041785717010498, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.03}
{'loss': 199.6035, 'grad_norm': 0.47818902134895325, 'learning_rate': 8.78e-06, 'epoch': 0.03}
{'origin_loss': 2.106577157974243, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.03}
{'origin_loss': 1.8669575452804565, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.03}
{'origin_loss': 2.2004518508911133, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.03}
{'origin_loss': 2.1478137969970703, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.03}
{'origin_loss': 1.9136157035827637, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.03}
{'origin_loss': 2.1180834770202637, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.03}
{'origin_loss': 2.14748215675354, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.03}
{'origin_loss': 2.0076770782470703, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.03}
{'loss': 200.7511, 'grad_norm': 0.404822438955307, 'learning_rate': 8.76e-06, 'epoch': 0.03}
{'origin_loss': 2.050139904022217, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.03}
{'origin_loss': 2.1357531547546387, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.03}
{'origin_loss': 1.9264274835586548, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.03}
{'origin_loss': 1.9826456308364868, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.03}
{'origin_loss': 1.9552983045578003, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.03}
{'origin_loss': 2.0363106727600098, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.03}
{'origin_loss': 2.0453829765319824, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.03}
{'origin_loss': 1.9383963346481323, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.03}
{'loss': 201.8682, 'grad_norm': 0.5469972491264343, 'learning_rate': 8.740000000000001e-06, 'epoch': 0.03}
{'origin_loss': 2.209622383117676, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.03}
{'origin_loss': 1.9135587215423584, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.03}
{'origin_loss': 2.173588514328003, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.03}
{'origin_loss': 2.1933515071868896, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.03}
{'origin_loss': 2.1913018226623535, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.03}
{'origin_loss': 1.8336814641952515, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.03}
{'origin_loss': 2.058406352996826, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.03}
{'origin_loss': 2.007117986679077, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.03}
{'loss': 203.6038, 'grad_norm': 0.509530782699585, 'learning_rate': 8.720000000000001e-06, 'epoch': 0.03}
{'origin_loss': 2.123831033706665, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.03}
{'origin_loss': 1.83909273147583, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.03}
{'origin_loss': 2.1674046516418457, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.03}
{'origin_loss': 1.9921784400939941, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.03}
{'origin_loss': 1.8766121864318848, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.03}
{'origin_loss': 2.011131763458252, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.03}
{'origin_loss': 2.2766220569610596, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.03}
{'origin_loss': 2.038623332977295, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.03}
{'loss': 205.4469, 'grad_norm': 0.3928665518760681, 'learning_rate': 8.700000000000001e-06, 'epoch': 0.03}
{'origin_loss': 2.0858912467956543, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.03}
{'origin_loss': 2.124669313430786, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.03}
{'origin_loss': 1.9941531419754028, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.03}
{'origin_loss': 2.0532290935516357, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.03}
{'origin_loss': 1.845857858657837, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.03}
{'origin_loss': 2.2706570625305176, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.03}
{'origin_loss': 2.1078879833221436, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.03}
{'origin_loss': 2.0398762226104736, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.03}
{'loss': 203.1278, 'grad_norm': 0.3962027132511139, 'learning_rate': 8.68e-06, 'epoch': 0.03}
{'origin_loss': 2.1524627208709717, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.03}
{'origin_loss': 1.974718451499939, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.03}
{'origin_loss': 2.0565898418426514, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.03}
{'origin_loss': 2.0839836597442627, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.03}
{'origin_loss': 2.035820960998535, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.03}
{'origin_loss': 2.0156044960021973, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.03}
{'origin_loss': 2.077672243118286, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.03}
{'origin_loss': 1.9861011505126953, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.03}
{'loss': 208.501, 'grad_norm': 0.3906657099723816, 'learning_rate': 8.66e-06, 'epoch': 0.03}
{'origin_loss': 1.9904247522354126, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.03}
{'origin_loss': 2.210287570953369, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.03}
{'origin_loss': 2.060668706893921, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.03}
{'origin_loss': 2.3751492500305176, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.03}
{'origin_loss': 1.849812388420105, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.03}
{'origin_loss': 1.9560036659240723, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.03}
{'origin_loss': 2.157973527908325, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.03}
{'origin_loss': 2.2701570987701416, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.03}
{'loss': 210.1557, 'grad_norm': 0.5042546987533569, 'learning_rate': 8.64e-06, 'epoch': 0.03}
{'origin_loss': 2.0803141593933105, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.03}
{'origin_loss': 2.0864744186401367, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.03}
{'origin_loss': 1.9185107946395874, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.03}
{'origin_loss': 2.279806613922119, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.03}
{'origin_loss': 2.0743656158447266, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.03}
{'origin_loss': 1.996955156326294, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.03}
{'origin_loss': 2.0449957847595215, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.03}
{'origin_loss': 2.16599440574646, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.03}
{'loss': 208.6747, 'grad_norm': 0.39712586998939514, 'learning_rate': 8.62e-06, 'epoch': 0.03}
{'origin_loss': 1.8643218278884888, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.03}
{'origin_loss': 1.8921672105789185, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.03}
{'origin_loss': 2.116361618041992, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.03}
{'origin_loss': 1.936894178390503, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.03}
{'origin_loss': 2.1132757663726807, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.03}
{'origin_loss': 2.388698101043701, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.03}
{'origin_loss': 2.0434653759002686, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.03}
{'origin_loss': 1.9164499044418335, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.03}
{'loss': 212.1589, 'grad_norm': 0.4159787595272064, 'learning_rate': 8.6e-06, 'epoch': 0.03}
{'origin_loss': 1.875977873802185, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.03}
{'origin_loss': 1.8751710653305054, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.03}
{'origin_loss': 2.129352569580078, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.03}
{'origin_loss': 2.17026686668396, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.03}
{'origin_loss': 2.238222122192383, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.03}
{'origin_loss': 1.9686602354049683, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.03}
{'origin_loss': 1.8988711833953857, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.03}
{'origin_loss': 2.0357186794281006, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.03}
{'loss': 212.3209, 'grad_norm': 0.45293959975242615, 'learning_rate': 8.580000000000001e-06, 'epoch': 0.03}
{'origin_loss': 2.133942127227783, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.03}
{'origin_loss': 2.205120086669922, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.03}
{'origin_loss': 2.1390116214752197, 'mask_loss': 0.1783447265625, 'mask_rate': 0.42236328125, 'epoch': 0.03}
{'origin_loss': 2.0033020973205566, 'mask_loss': 0.177734375, 'mask_rate': 0.421630859375, 'epoch': 0.03}
{'origin_loss': 2.101449728012085, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.03}
{'origin_loss': 2.1806328296661377, 'mask_loss': 0.17822265625, 'mask_rate': 0.422119140625, 'epoch': 0.03}
{'origin_loss': 2.139207124710083, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.03}
{'origin_loss': 2.0692601203918457, 'mask_loss': 0.17822265625, 'mask_rate': 0.422119140625, 'epoch': 0.03}
{'loss': 216.6684, 'grad_norm': 150.86337280273438, 'learning_rate': 8.560000000000001e-06, 'epoch': 0.03}
{'origin_loss': 2.073347568511963, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.03}
{'origin_loss': 1.923933744430542, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.03}
{'origin_loss': 2.194866180419922, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.03}
{'origin_loss': 2.2357029914855957, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.03}
{'origin_loss': 2.2109217643737793, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.03}
{'origin_loss': 2.0338783264160156, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.03}
{'origin_loss': 2.1164674758911133, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.03}
{'origin_loss': 2.297086477279663, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.03}
{'loss': 210.8545, 'grad_norm': 0.4000876843929291, 'learning_rate': 8.540000000000001e-06, 'epoch': 0.03}
{'origin_loss': 2.0968127250671387, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.03}
{'origin_loss': 1.990744948387146, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.03}
{'origin_loss': 2.109801769256592, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.03}
{'origin_loss': 1.9928594827651978, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.03}
{'origin_loss': 2.102419853210449, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.03}
{'origin_loss': 1.9691838026046753, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.03}
{'origin_loss': 1.815848708152771, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.03}
{'origin_loss': 2.1180078983306885, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.03}
{'loss': 204.1182, 'grad_norm': 0.3897290527820587, 'learning_rate': 8.52e-06, 'epoch': 0.03}
{'origin_loss': 2.2736830711364746, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.03}
{'origin_loss': 2.1454925537109375, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.03}
{'origin_loss': 1.9420886039733887, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.03}
{'origin_loss': 2.2028284072875977, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.03}
{'origin_loss': 2.111279010772705, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.03}
{'origin_loss': 1.9192038774490356, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.03}
{'origin_loss': 2.212543487548828, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.03}
{'origin_loss': 1.9897023439407349, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.03}
{'loss': 200.1152, 'grad_norm': 0.6074291467666626, 'learning_rate': 8.5e-06, 'epoch': 0.03}
{'origin_loss': 2.154444456100464, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.03}
{'origin_loss': 2.099034309387207, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.03}
{'origin_loss': 2.0813987255096436, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.03}
{'origin_loss': 2.026409149169922, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.03}
{'origin_loss': 1.931700348854065, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.03}
{'origin_loss': 2.022449016571045, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.03}
{'origin_loss': 2.1123528480529785, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.03}
{'origin_loss': 2.178760051727295, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.03}
{'loss': 196.6383, 'grad_norm': 0.45627614855766296, 'learning_rate': 8.48e-06, 'epoch': 0.03}
{'origin_loss': 2.0633656978607178, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.03}
{'origin_loss': 2.14235782623291, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.03}
{'origin_loss': 2.081162691116333, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.03}
{'origin_loss': 2.0750324726104736, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.03}
{'origin_loss': 2.073255777359009, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.03}
{'origin_loss': 2.1059088706970215, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.03}
{'origin_loss': 2.0150575637817383, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.03}
{'origin_loss': 1.7640981674194336, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.03}
{'loss': 195.4619, 'grad_norm': 0.44441449642181396, 'learning_rate': 8.46e-06, 'epoch': 0.03}
{'origin_loss': 2.0297203063964844, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.03}
{'origin_loss': 2.085200786590576, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.03}
{'origin_loss': 1.9822723865509033, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.03}
{'origin_loss': 1.808242678642273, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.03}
{'origin_loss': 2.0038037300109863, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.03}
{'origin_loss': 2.1541826725006104, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.03}
{'origin_loss': 2.061666488647461, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.03}
{'origin_loss': 2.0097036361694336, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.03}
{'loss': 193.2668, 'grad_norm': 0.4398936331272125, 'learning_rate': 8.44e-06, 'epoch': 0.03}
{'origin_loss': 2.061030149459839, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.03}
{'origin_loss': 1.9267817735671997, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.03}
{'origin_loss': 2.1244406700134277, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.03}
{'origin_loss': 2.190368890762329, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.03}
{'origin_loss': 1.9071629047393799, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.03}
{'origin_loss': 2.109004020690918, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.03}
{'origin_loss': 2.091064691543579, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.03}
{'origin_loss': 1.9441306591033936, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.03}
{'loss': 192.2005, 'grad_norm': 0.6601197719573975, 'learning_rate': 8.42e-06, 'epoch': 0.03}
{'origin_loss': 2.1975271701812744, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.03}
{'origin_loss': 2.1403567790985107, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.03}
{'origin_loss': 2.296903133392334, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.03}
{'origin_loss': 2.2174222469329834, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.03}
{'origin_loss': 2.137345790863037, 'mask_loss': 0.151123046875, 'mask_rate': 0.388671875, 'epoch': 0.03}
{'origin_loss': 2.1379997730255127, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.03}
{'origin_loss': 1.8751276731491089, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.03}
{'origin_loss': 2.011294364929199, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.03}
{'loss': 189.4705, 'grad_norm': 0.40131255984306335, 'learning_rate': 8.400000000000001e-06, 'epoch': 0.03}
{'origin_loss': 2.0560951232910156, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.03}
{'origin_loss': 2.2491888999938965, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.03}
{'origin_loss': 2.21600604057312, 'mask_loss': 0.1522216796875, 'mask_rate': 0.39013671875, 'epoch': 0.03}
{'origin_loss': 2.1894028186798096, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.03}
{'origin_loss': 2.2116355895996094, 'mask_loss': 0.15234375, 'mask_rate': 0.390380859375, 'epoch': 0.03}
{'origin_loss': 2.1034250259399414, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.03}
{'origin_loss': 2.0535173416137695, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.03}
{'origin_loss': 1.9755662679672241, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.03}
{'loss': 190.6475, 'grad_norm': 0.4164680540561676, 'learning_rate': 8.380000000000001e-06, 'epoch': 0.04}
{'origin_loss': 1.972944736480713, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.04}
{'origin_loss': 1.9518483877182007, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.04}
{'origin_loss': 2.1418824195861816, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.04}
{'origin_loss': 2.014086961746216, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.04}
{'origin_loss': 2.0216064453125, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.04}
{'origin_loss': 2.0308988094329834, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.04}
{'origin_loss': 2.067441701889038, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.04}
{'origin_loss': 2.070643663406372, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.04}
{'loss': 190.9089, 'grad_norm': 0.40982571244239807, 'learning_rate': 8.36e-06, 'epoch': 0.04}
{'origin_loss': 2.051656484603882, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.04}
{'origin_loss': 2.0950517654418945, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.04}
{'origin_loss': 2.043935775756836, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.04}
{'origin_loss': 1.8957762718200684, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.04}
{'origin_loss': 2.091757297515869, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.04}
{'origin_loss': 2.0501463413238525, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.04}
{'origin_loss': 1.9270281791687012, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.04}
{'origin_loss': 2.043947458267212, 'mask_loss': 0.15185546875, 'mask_rate': 0.3896484375, 'epoch': 0.04}
{'loss': 190.2749, 'grad_norm': 0.4043090045452118, 'learning_rate': 8.34e-06, 'epoch': 0.04}
{'origin_loss': 2.0991547107696533, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.04}
{'origin_loss': 1.9960052967071533, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.04}
{'origin_loss': 2.059779167175293, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.04}
{'origin_loss': 1.9997767210006714, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.04}
{'origin_loss': 2.0946669578552246, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.04}
{'origin_loss': 2.2090585231781006, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.04}
{'origin_loss': 2.011272430419922, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.04}
{'origin_loss': 2.2231528759002686, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.04}
{'loss': 189.5553, 'grad_norm': 0.41930216550827026, 'learning_rate': 8.32e-06, 'epoch': 0.04}
{'origin_loss': 2.1774022579193115, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.04}
{'origin_loss': 2.1358044147491455, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.04}
{'origin_loss': 2.2749276161193848, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.04}
{'origin_loss': 2.1160647869110107, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.04}
{'origin_loss': 2.0205323696136475, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.04}
{'origin_loss': 1.9335134029388428, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.04}
{'origin_loss': 1.9777603149414062, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.04}
{'origin_loss': 1.9197559356689453, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.04}
{'loss': 190.382, 'grad_norm': 0.43355321884155273, 'learning_rate': 8.3e-06, 'epoch': 0.04}
{'origin_loss': 2.0240063667297363, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.04}
{'origin_loss': 1.9803894758224487, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.04}
{'origin_loss': 2.1486995220184326, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.04}
{'origin_loss': 1.9420139789581299, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.04}
{'origin_loss': 2.1344919204711914, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.04}
{'origin_loss': 2.173417329788208, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.04}
{'origin_loss': 2.028454065322876, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.04}
{'origin_loss': 2.009958267211914, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.04}
{'loss': 191.6958, 'grad_norm': 0.42088553309440613, 'learning_rate': 8.28e-06, 'epoch': 0.04}
{'origin_loss': 2.1957998275756836, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.04}
{'origin_loss': 1.9993035793304443, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.04}
{'origin_loss': 2.2121024131774902, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.04}
{'origin_loss': 2.162410020828247, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.04}
{'origin_loss': 1.9306854009628296, 'mask_loss': 0.15234375, 'mask_rate': 0.390380859375, 'epoch': 0.04}
{'origin_loss': 1.9521796703338623, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.04}
{'origin_loss': 1.916782021522522, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.04}
{'origin_loss': 2.177089214324951, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.04}
{'loss': 190.4745, 'grad_norm': 0.3972455859184265, 'learning_rate': 8.26e-06, 'epoch': 0.04}
{'origin_loss': 2.182544708251953, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.04}
{'origin_loss': 2.141801357269287, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.04}
{'origin_loss': 1.8878369331359863, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.04}
{'origin_loss': 2.055231809616089, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.04}
{'origin_loss': 2.0370707511901855, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.04}
{'origin_loss': 2.0247979164123535, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.04}
{'origin_loss': 1.993660569190979, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.04}
{'origin_loss': 2.0395307540893555, 'mask_loss': 0.1488037109375, 'mask_rate': 0.3857421875, 'epoch': 0.04}
{'loss': 190.3578, 'grad_norm': 0.4490705132484436, 'learning_rate': 8.24e-06, 'epoch': 0.04}
{'origin_loss': 2.1524806022644043, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.04}
{'origin_loss': 2.197481870651245, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.04}
{'origin_loss': 2.024675130844116, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.04}
{'origin_loss': 2.2793960571289062, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.04}
{'origin_loss': 2.1629879474639893, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.04}
{'origin_loss': 1.9885969161987305, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.04}
{'origin_loss': 1.9632388353347778, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.04}
{'origin_loss': 2.116151809692383, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.04}
{'loss': 192.1106, 'grad_norm': 0.39039918780326843, 'learning_rate': 8.220000000000001e-06, 'epoch': 0.04}
{'origin_loss': 1.927398920059204, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.04}
{'origin_loss': 1.8430906534194946, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.04}
{'origin_loss': 2.1522462368011475, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.04}
{'origin_loss': 2.174309015274048, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.04}
{'origin_loss': 2.0784828662872314, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.04}
{'origin_loss': 1.9948992729187012, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.04}
{'origin_loss': 1.930572271347046, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.04}
{'origin_loss': 2.180387496948242, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.04}
{'loss': 193.1289, 'grad_norm': 0.5558077692985535, 'learning_rate': 8.2e-06, 'epoch': 0.04}
{'origin_loss': 1.830603003501892, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.04}
{'origin_loss': 2.072490930557251, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.04}
{'origin_loss': 2.2048377990722656, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.04}
{'origin_loss': 2.1270499229431152, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.04}
{'origin_loss': 2.3209404945373535, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.04}
{'origin_loss': 1.9114512205123901, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.04}
{'origin_loss': 2.2004997730255127, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.04}
{'origin_loss': 1.8940691947937012, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.04}
{'loss': 193.4609, 'grad_norm': 0.39375606179237366, 'learning_rate': 8.18e-06, 'epoch': 0.04}
{'origin_loss': 2.0390801429748535, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.04}
{'origin_loss': 2.018507957458496, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.04}
{'origin_loss': 1.972746729850769, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.04}
{'origin_loss': 1.9085960388183594, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.04}
{'origin_loss': 2.137903928756714, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.04}
{'origin_loss': 2.023667335510254, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.04}
{'origin_loss': 2.1449146270751953, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.04}
{'origin_loss': 2.0684449672698975, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.04}
{'loss': 194.7111, 'grad_norm': 0.38998669385910034, 'learning_rate': 8.16e-06, 'epoch': 0.04}
{'origin_loss': 2.1050596237182617, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.04}
{'origin_loss': 2.038933038711548, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.04}
{'origin_loss': 2.109590768814087, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.04}
{'origin_loss': 1.8281410932540894, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.04}
{'origin_loss': 2.1663739681243896, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.04}
{'origin_loss': 2.301504135131836, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.04}
{'origin_loss': 2.0769217014312744, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.04}
{'origin_loss': 2.25545597076416, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.04}
{'loss': 195.2352, 'grad_norm': 0.4345633089542389, 'learning_rate': 8.14e-06, 'epoch': 0.04}
{'origin_loss': 2.1810338497161865, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.04}
{'origin_loss': 1.9985078573226929, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.04}
{'origin_loss': 2.520624876022339, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.04}
{'origin_loss': 1.9462110996246338, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.04}
{'origin_loss': 2.047006130218506, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.04}
{'origin_loss': 2.1090738773345947, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.04}
{'origin_loss': 2.108461618423462, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.04}
{'origin_loss': 1.9512091875076294, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.04}
{'loss': 195.264, 'grad_norm': 0.3521043062210083, 'learning_rate': 8.120000000000002e-06, 'epoch': 0.04}
{'origin_loss': 2.182948350906372, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.04}
{'origin_loss': 1.9224339723587036, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.04}
{'origin_loss': 2.0836617946624756, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.04}
{'origin_loss': 1.9243710041046143, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.04}
{'origin_loss': 2.014547109603882, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.04}
{'origin_loss': 2.1698591709136963, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.04}
{'origin_loss': 1.994444727897644, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.04}
{'origin_loss': 2.266143560409546, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.04}
{'loss': 197.9604, 'grad_norm': 0.4065232574939728, 'learning_rate': 8.1e-06, 'epoch': 0.04}
{'origin_loss': 2.176175117492676, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.04}
{'origin_loss': 2.069770097732544, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.04}
{'origin_loss': 2.117779016494751, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.04}
{'origin_loss': 2.0582809448242188, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.04}
{'origin_loss': 2.234309673309326, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.04}
{'origin_loss': 2.0522849559783936, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.04}
{'origin_loss': 2.0134968757629395, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.04}
{'origin_loss': 2.254337787628174, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.04}
{'loss': 199.4502, 'grad_norm': 0.43378910422325134, 'learning_rate': 8.08e-06, 'epoch': 0.04}
{'origin_loss': 1.9698636531829834, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.04}
{'origin_loss': 1.8788418769836426, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.04}
{'origin_loss': 2.1590797901153564, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.04}
{'origin_loss': 2.044267416000366, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.04}
{'origin_loss': 2.133209228515625, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.04}
{'origin_loss': 2.105926990509033, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.04}
{'origin_loss': 2.0213396549224854, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.04}
{'origin_loss': 2.0508363246917725, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.04}
{'loss': 199.3579, 'grad_norm': 0.4165518283843994, 'learning_rate': 8.06e-06, 'epoch': 0.04}
{'origin_loss': 2.1331188678741455, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.04}
{'origin_loss': 2.119659423828125, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.04}
{'origin_loss': 1.7471344470977783, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.04}
{'origin_loss': 1.6537131071090698, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.04}
{'origin_loss': 2.307708501815796, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.04}
{'origin_loss': 2.0078506469726562, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.04}
{'origin_loss': 2.0939838886260986, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.04}
{'origin_loss': 2.1530513763427734, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.04}
{'loss': 200.9958, 'grad_norm': 0.3980746865272522, 'learning_rate': 8.040000000000001e-06, 'epoch': 0.04}
{'origin_loss': 1.9794121980667114, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.04}
{'origin_loss': 2.1383798122406006, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.04}
{'origin_loss': 2.0416438579559326, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.04}
{'origin_loss': 1.9864660501480103, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.04}
{'origin_loss': 2.2525527477264404, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.04}
{'origin_loss': 2.146238327026367, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.04}
{'origin_loss': 2.0477120876312256, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.04}
{'origin_loss': 1.9935168027877808, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.04}
{'loss': 200.8857, 'grad_norm': 0.3988703191280365, 'learning_rate': 8.020000000000001e-06, 'epoch': 0.04}
{'origin_loss': 1.893004298210144, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.04}
{'origin_loss': 2.079932689666748, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.04}
{'origin_loss': 2.1138670444488525, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.04}
{'origin_loss': 1.9118375778198242, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.04}
{'origin_loss': 2.10127854347229, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.04}
{'origin_loss': 2.0369303226470947, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.04}
{'origin_loss': 1.9318182468414307, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.04}
{'origin_loss': 2.0115466117858887, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.04}
{'loss': 202.9163, 'grad_norm': 0.35147589445114136, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.04}
{'origin_loss': 1.8138799667358398, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.04}
{'origin_loss': 1.9723823070526123, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.04}
{'origin_loss': 2.0330629348754883, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.04}
{'origin_loss': 1.9056997299194336, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.04}
{'origin_loss': 1.9486318826675415, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.04}
{'origin_loss': 2.197802782058716, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.04}
{'origin_loss': 1.9517979621887207, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.04}
{'origin_loss': 2.252256393432617, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.04}
{'loss': 203.2126, 'grad_norm': 0.5045400857925415, 'learning_rate': 7.980000000000002e-06, 'epoch': 0.04}
{'origin_loss': 2.0862877368927, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.04}
{'origin_loss': 1.9866572618484497, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.04}
{'origin_loss': 2.0935511589050293, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.04}
{'origin_loss': 1.863299012184143, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.04}
{'origin_loss': 2.1927897930145264, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.04}
{'origin_loss': 2.1391172409057617, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.04}
{'origin_loss': 2.1036839485168457, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.04}
{'origin_loss': 1.9164081811904907, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.04}
{'loss': 203.5008, 'grad_norm': 0.3803974688053131, 'learning_rate': 7.960000000000002e-06, 'epoch': 0.04}
{'origin_loss': 1.9678457975387573, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.04}
{'origin_loss': 2.064279556274414, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.04}
{'origin_loss': 1.9936460256576538, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.04}
{'origin_loss': 1.9204187393188477, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.04}
{'origin_loss': 2.189655065536499, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.04}
{'origin_loss': 2.0098557472229004, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.04}
{'origin_loss': 2.417363405227661, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.04}
{'origin_loss': 2.171369791030884, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.04}
{'loss': 205.8418, 'grad_norm': 0.5009542107582092, 'learning_rate': 7.94e-06, 'epoch': 0.04}
{'origin_loss': 2.1793715953826904, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.04}
{'origin_loss': 2.030688524246216, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.04}
{'origin_loss': 2.0054965019226074, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.04}
{'origin_loss': 2.0482354164123535, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.04}
{'origin_loss': 1.9480942487716675, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.04}
{'origin_loss': 2.067225933074951, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.04}
{'origin_loss': 2.110483169555664, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.04}
{'origin_loss': 1.9761276245117188, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.04}
{'loss': 208.3426, 'grad_norm': 0.417361855506897, 'learning_rate': 7.92e-06, 'epoch': 0.05}
{'origin_loss': 2.2446436882019043, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.05}
{'origin_loss': 1.998553991317749, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.05}
{'origin_loss': 1.9622139930725098, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.05}
{'origin_loss': 2.3760793209075928, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.05}
{'origin_loss': 2.119166851043701, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.05}
{'origin_loss': 1.9991992712020874, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.05}
{'origin_loss': 2.0495669841766357, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.05}
{'origin_loss': 2.160781145095825, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.05}
{'loss': 209.6294, 'grad_norm': 0.4146869480609894, 'learning_rate': 7.9e-06, 'epoch': 0.05}
{'origin_loss': 2.215236186981201, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.05}
{'origin_loss': 2.086786985397339, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.05}
{'origin_loss': 1.915172815322876, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.05}
{'origin_loss': 2.1591854095458984, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.05}
{'origin_loss': 1.9308558702468872, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.05}
{'origin_loss': 1.9133424758911133, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.05}
{'origin_loss': 1.8943041563034058, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.05}
{'origin_loss': 2.0565896034240723, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.05}
{'loss': 209.8027, 'grad_norm': 0.3582066297531128, 'learning_rate': 7.88e-06, 'epoch': 0.05}
{'origin_loss': 1.8911408185958862, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.05}
{'origin_loss': 1.852182388305664, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.05}
{'origin_loss': 2.1698968410491943, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.05}
{'origin_loss': 1.982559084892273, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.05}
{'origin_loss': 2.296621561050415, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.05}
{'origin_loss': 2.05537748336792, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.05}
{'origin_loss': 2.192525625228882, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.05}
{'origin_loss': 2.0648882389068604, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.05}
{'loss': 210.9382, 'grad_norm': 0.44216188788414, 'learning_rate': 7.860000000000001e-06, 'epoch': 0.05}
{'origin_loss': 2.070291519165039, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.05}
{'origin_loss': 2.083078622817993, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.05}
{'origin_loss': 2.0545692443847656, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.05}
{'origin_loss': 2.0436391830444336, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.05}
{'origin_loss': 2.0802462100982666, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.05}
{'origin_loss': 1.963870882987976, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.05}
{'origin_loss': 2.0222136974334717, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.05}
{'origin_loss': 2.041437864303589, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.05}
{'loss': 212.623, 'grad_norm': 0.36443790793418884, 'learning_rate': 7.840000000000001e-06, 'epoch': 0.05}
{'origin_loss': 1.9607224464416504, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.05}
{'origin_loss': 1.9919637441635132, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.05}
{'origin_loss': 1.9819668531417847, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.05}
{'origin_loss': 1.873847246170044, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.05}
{'origin_loss': 2.012929677963257, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.05}
{'origin_loss': 2.000046730041504, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.05}
{'origin_loss': 1.8429110050201416, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.05}
{'origin_loss': 1.9058417081832886, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.05}
{'loss': 213.2275, 'grad_norm': 0.41816121339797974, 'learning_rate': 7.820000000000001e-06, 'epoch': 0.05}
{'origin_loss': 2.1919496059417725, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.05}
{'origin_loss': 1.9473698139190674, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.05}
{'origin_loss': 2.1687240600585938, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.05}
{'origin_loss': 1.8475834131240845, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.05}
{'origin_loss': 2.0938620567321777, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.05}
{'origin_loss': 2.2272331714630127, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.05}
{'origin_loss': 1.9876964092254639, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.05}
{'origin_loss': 2.1553750038146973, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.05}
{'loss': 213.3744, 'grad_norm': 0.36879560351371765, 'learning_rate': 7.800000000000002e-06, 'epoch': 0.05}
{'origin_loss': 2.2952425479888916, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.05}
{'origin_loss': 2.1895265579223633, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.05}
{'origin_loss': 2.0758259296417236, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.05}
{'origin_loss': 2.062126398086548, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.05}
{'origin_loss': 2.264354944229126, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.05}
{'origin_loss': 2.155754327774048, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.05}
{'origin_loss': 1.9259482622146606, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.05}
{'origin_loss': 2.1180450916290283, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.05}
{'loss': 214.0733, 'grad_norm': 0.5080406069755554, 'learning_rate': 7.78e-06, 'epoch': 0.05}
{'origin_loss': 2.0294435024261475, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.05}
{'origin_loss': 2.1245014667510986, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.05}
{'origin_loss': 2.1500000953674316, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.05}
{'origin_loss': 2.037985324859619, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.05}
{'origin_loss': 2.1160528659820557, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.05}
{'origin_loss': 1.9219213724136353, 'mask_loss': 0.17919921875, 'mask_rate': 0.42333984375, 'epoch': 0.05}
{'origin_loss': 2.223804235458374, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.05}
{'origin_loss': 2.0410611629486084, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.05}
{'loss': 215.4868, 'grad_norm': 0.3547631800174713, 'learning_rate': 7.76e-06, 'epoch': 0.05}
{'origin_loss': 1.886866569519043, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.05}
{'origin_loss': 2.0776448249816895, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.05}
{'origin_loss': 1.785977840423584, 'mask_loss': 0.180419921875, 'mask_rate': 0.4248046875, 'epoch': 0.05}
{'origin_loss': 2.1392550468444824, 'mask_loss': 0.177734375, 'mask_rate': 0.421630859375, 'epoch': 0.05}
{'origin_loss': 2.0156972408294678, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.05}
{'origin_loss': 2.2107791900634766, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.05}
{'origin_loss': 2.1963002681732178, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.05}
{'origin_loss': 2.0296125411987305, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.05}
{'loss': 215.9647, 'grad_norm': 0.3920614719390869, 'learning_rate': 7.74e-06, 'epoch': 0.05}
{'origin_loss': 2.0166420936584473, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.05}
{'origin_loss': 1.8595043420791626, 'mask_loss': 0.1798095703125, 'mask_rate': 0.424072265625, 'epoch': 0.05}
{'origin_loss': 2.214078426361084, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.05}
{'origin_loss': 2.0180623531341553, 'mask_loss': 0.177001953125, 'mask_rate': 0.420654296875, 'epoch': 0.05}
{'origin_loss': 2.166050672531128, 'mask_loss': 0.179443359375, 'mask_rate': 0.423583984375, 'epoch': 0.05}
{'origin_loss': 2.0063796043395996, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.05}
{'origin_loss': 2.135561466217041, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.05}
{'origin_loss': 1.8643884658813477, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.05}
{'loss': 216.3007, 'grad_norm': 0.37607261538505554, 'learning_rate': 7.72e-06, 'epoch': 0.05}
{'origin_loss': 1.9975394010543823, 'mask_loss': 0.179443359375, 'mask_rate': 0.423583984375, 'epoch': 0.05}
{'origin_loss': 2.021038055419922, 'mask_loss': 0.1798095703125, 'mask_rate': 0.424072265625, 'epoch': 0.05}
{'origin_loss': 2.2360684871673584, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.05}
{'origin_loss': 1.9317904710769653, 'mask_loss': 0.1783447265625, 'mask_rate': 0.42236328125, 'epoch': 0.05}
{'origin_loss': 2.042264938354492, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.05}
{'origin_loss': 2.0766794681549072, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.05}
{'origin_loss': 1.918117642402649, 'mask_loss': 0.1839599609375, 'mask_rate': 0.428955078125, 'epoch': 0.05}
{'origin_loss': 2.0324974060058594, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.05}
{'loss': 218.8601, 'grad_norm': 138.86703491210938, 'learning_rate': 7.7e-06, 'epoch': 0.05}
{'origin_loss': 1.9764392375946045, 'mask_loss': 0.1785888671875, 'mask_rate': 0.422607421875, 'epoch': 0.05}
{'origin_loss': 2.1447741985321045, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.05}
{'origin_loss': 1.968894362449646, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.05}
{'origin_loss': 1.965450644493103, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.05}
{'origin_loss': 1.9385387897491455, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.05}
{'origin_loss': 1.9965424537658691, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.05}
{'origin_loss': 2.0285818576812744, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.05}
{'origin_loss': 2.217639684677124, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.05}
{'loss': 214.0296, 'grad_norm': 134.22512817382812, 'learning_rate': 7.680000000000001e-06, 'epoch': 0.05}
{'origin_loss': 2.012334108352661, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.05}
{'origin_loss': 1.9539315700531006, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.05}
{'origin_loss': 2.3006420135498047, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.05}
{'origin_loss': 2.0171737670898438, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.05}
{'origin_loss': 2.0695745944976807, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.05}
{'origin_loss': 2.1871354579925537, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.05}
{'origin_loss': 2.030596971511841, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.05}
{'origin_loss': 1.993240475654602, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.05}
{'loss': 205.43, 'grad_norm': 0.544151782989502, 'learning_rate': 7.660000000000001e-06, 'epoch': 0.05}
{'origin_loss': 1.869988203048706, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.05}
{'origin_loss': 2.0312983989715576, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.05}
{'origin_loss': 2.043912649154663, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.05}
{'origin_loss': 2.1049535274505615, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.05}
{'origin_loss': 2.146592140197754, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.05}
{'origin_loss': 2.1087472438812256, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.05}
{'origin_loss': 2.0014657974243164, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.05}
{'origin_loss': 1.996840476989746, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.05}
{'loss': 198.413, 'grad_norm': 0.46680760383605957, 'learning_rate': 7.640000000000001e-06, 'epoch': 0.05}
{'origin_loss': 2.1716411113739014, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.05}
{'origin_loss': 2.1008665561676025, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.05}
{'origin_loss': 2.0711708068847656, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.05}
{'origin_loss': 2.018211603164673, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.05}
{'origin_loss': 1.9495948553085327, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.05}
{'origin_loss': 2.1240975856781006, 'mask_loss': 0.149169921875, 'mask_rate': 0.38623046875, 'epoch': 0.05}
{'origin_loss': 2.0835697650909424, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.05}
{'origin_loss': 2.1580331325531006, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.05}
{'loss': 190.7097, 'grad_norm': 0.530566394329071, 'learning_rate': 7.620000000000001e-06, 'epoch': 0.05}
{'origin_loss': 2.0699260234832764, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.05}
{'origin_loss': 2.0804333686828613, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.05}
{'origin_loss': 1.9654477834701538, 'mask_loss': 0.1519775390625, 'mask_rate': 0.389892578125, 'epoch': 0.05}
{'origin_loss': 2.0956766605377197, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.05}
{'origin_loss': 2.2387077808380127, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.05}
{'origin_loss': 2.0060672760009766, 'mask_loss': 0.1512451171875, 'mask_rate': 0.388916015625, 'epoch': 0.05}
{'origin_loss': 2.0342321395874023, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.05}
{'origin_loss': 2.0965964794158936, 'mask_loss': 0.151611328125, 'mask_rate': 0.389404296875, 'epoch': 0.05}
{'loss': 187.8859, 'grad_norm': 0.4393795430660248, 'learning_rate': 7.600000000000001e-06, 'epoch': 0.05}
{'origin_loss': 1.9933727979660034, 'mask_loss': 0.148193359375, 'mask_rate': 0.385009765625, 'epoch': 0.05}
{'origin_loss': 2.22029972076416, 'mask_loss': 0.1473388671875, 'mask_rate': 0.3837890625, 'epoch': 0.05}
{'origin_loss': 2.089865207672119, 'mask_loss': 0.14892578125, 'mask_rate': 0.385986328125, 'epoch': 0.05}
{'origin_loss': 1.6452336311340332, 'mask_loss': 0.150634765625, 'mask_rate': 0.38818359375, 'epoch': 0.05}
{'origin_loss': 2.042728900909424, 'mask_loss': 0.146484375, 'mask_rate': 0.3828125, 'epoch': 0.05}
{'origin_loss': 1.9931423664093018, 'mask_loss': 0.151123046875, 'mask_rate': 0.388671875, 'epoch': 0.05}
{'origin_loss': 2.0025768280029297, 'mask_loss': 0.1484375, 'mask_rate': 0.38525390625, 'epoch': 0.05}
{'origin_loss': 2.132657051086426, 'mask_loss': 0.1474609375, 'mask_rate': 0.384033203125, 'epoch': 0.05}
{'loss': 182.6712, 'grad_norm': 0.46440309286117554, 'learning_rate': 7.58e-06, 'epoch': 0.05}
{'origin_loss': 1.8711204528808594, 'mask_loss': 0.14453125, 'mask_rate': 0.380126953125, 'epoch': 0.05}
{'origin_loss': 1.946035385131836, 'mask_loss': 0.1512451171875, 'mask_rate': 0.388916015625, 'epoch': 0.05}
{'origin_loss': 2.298710823059082, 'mask_loss': 0.14453125, 'mask_rate': 0.380126953125, 'epoch': 0.05}
{'origin_loss': 2.0986123085021973, 'mask_loss': 0.145263671875, 'mask_rate': 0.381103515625, 'epoch': 0.05}
{'origin_loss': 1.9934446811676025, 'mask_loss': 0.14697265625, 'mask_rate': 0.38330078125, 'epoch': 0.05}
{'origin_loss': 2.0573065280914307, 'mask_loss': 0.14892578125, 'mask_rate': 0.385986328125, 'epoch': 0.05}
{'origin_loss': 2.1057517528533936, 'mask_loss': 0.1463623046875, 'mask_rate': 0.382568359375, 'epoch': 0.05}
{'origin_loss': 1.986305594444275, 'mask_loss': 0.1474609375, 'mask_rate': 0.384033203125, 'epoch': 0.05}
{'loss': 180.6853, 'grad_norm': 0.423054039478302, 'learning_rate': 7.5600000000000005e-06, 'epoch': 0.05}
{'origin_loss': 2.01163387298584, 'mask_loss': 0.1434326171875, 'mask_rate': 0.378662109375, 'epoch': 0.05}
{'origin_loss': 2.0934746265411377, 'mask_loss': 0.14453125, 'mask_rate': 0.380126953125, 'epoch': 0.05}
{'origin_loss': 2.012640953063965, 'mask_loss': 0.1448974609375, 'mask_rate': 0.380615234375, 'epoch': 0.05}
{'origin_loss': 2.048860549926758, 'mask_loss': 0.145751953125, 'mask_rate': 0.3818359375, 'epoch': 0.05}
{'origin_loss': 2.037243604660034, 'mask_loss': 0.14501953125, 'mask_rate': 0.380859375, 'epoch': 0.05}
{'origin_loss': 2.250749349594116, 'mask_loss': 0.1424560546875, 'mask_rate': 0.37744140625, 'epoch': 0.05}
{'origin_loss': 2.280609130859375, 'mask_loss': 0.1473388671875, 'mask_rate': 0.3837890625, 'epoch': 0.05}
{'origin_loss': 2.0909922122955322, 'mask_loss': 0.144287109375, 'mask_rate': 0.3798828125, 'epoch': 0.05}
{'loss': 178.0876, 'grad_norm': 0.3736816346645355, 'learning_rate': 7.540000000000001e-06, 'epoch': 0.05}
{'origin_loss': 2.030940532684326, 'mask_loss': 0.14453125, 'mask_rate': 0.380126953125, 'epoch': 0.05}
{'origin_loss': 1.8439240455627441, 'mask_loss': 0.1463623046875, 'mask_rate': 0.382568359375, 'epoch': 0.05}
{'origin_loss': 1.8777260780334473, 'mask_loss': 0.14501953125, 'mask_rate': 0.380859375, 'epoch': 0.05}
{'origin_loss': 2.1501479148864746, 'mask_loss': 0.1453857421875, 'mask_rate': 0.38134765625, 'epoch': 0.05}
{'origin_loss': 2.0020320415496826, 'mask_loss': 0.144287109375, 'mask_rate': 0.3798828125, 'epoch': 0.05}
{'origin_loss': 2.208824634552002, 'mask_loss': 0.142333984375, 'mask_rate': 0.377197265625, 'epoch': 0.05}
{'origin_loss': 2.148010492324829, 'mask_loss': 0.14501953125, 'mask_rate': 0.380859375, 'epoch': 0.05}
{'origin_loss': 2.1378488540649414, 'mask_loss': 0.1474609375, 'mask_rate': 0.384033203125, 'epoch': 0.05}
{'loss': 178.4406, 'grad_norm': 0.5135908722877502, 'learning_rate': 7.520000000000001e-06, 'epoch': 0.05}
{'origin_loss': 2.1607818603515625, 'mask_loss': 0.140869140625, 'mask_rate': 0.375244140625, 'epoch': 0.05}
{'origin_loss': 2.102595806121826, 'mask_loss': 0.14501953125, 'mask_rate': 0.380859375, 'epoch': 0.05}
{'origin_loss': 1.6613268852233887, 'mask_loss': 0.1357421875, 'mask_rate': 0.368408203125, 'epoch': 0.05}
{'origin_loss': 2.102912664413452, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.05}
{'origin_loss': 1.9568172693252563, 'mask_loss': 0.1424560546875, 'mask_rate': 0.37744140625, 'epoch': 0.05}
{'origin_loss': 1.9550939798355103, 'mask_loss': 0.1448974609375, 'mask_rate': 0.380615234375, 'epoch': 0.05}
{'origin_loss': 2.0735373497009277, 'mask_loss': 0.1439208984375, 'mask_rate': 0.37939453125, 'epoch': 0.05}
{'origin_loss': 2.1205193996429443, 'mask_loss': 0.14306640625, 'mask_rate': 0.378173828125, 'epoch': 0.05}
{'loss': 174.9854, 'grad_norm': 0.45204970240592957, 'learning_rate': 7.500000000000001e-06, 'epoch': 0.05}
{'origin_loss': 2.146811008453369, 'mask_loss': 0.1402587890625, 'mask_rate': 0.37451171875, 'epoch': 0.05}
{'origin_loss': 1.9368752241134644, 'mask_loss': 0.143798828125, 'mask_rate': 0.379150390625, 'epoch': 0.05}
{'origin_loss': 1.9919275045394897, 'mask_loss': 0.1409912109375, 'mask_rate': 0.37548828125, 'epoch': 0.05}
{'origin_loss': 1.948856234550476, 'mask_loss': 0.1446533203125, 'mask_rate': 0.38037109375, 'epoch': 0.05}
{'origin_loss': 2.103290557861328, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.05}
{'origin_loss': 1.7043116092681885, 'mask_loss': 0.144287109375, 'mask_rate': 0.3798828125, 'epoch': 0.05}
{'origin_loss': 1.9265027046203613, 'mask_loss': 0.14501953125, 'mask_rate': 0.380859375, 'epoch': 0.05}
{'origin_loss': 2.2723476886749268, 'mask_loss': 0.141357421875, 'mask_rate': 0.3759765625, 'epoch': 0.05}
{'loss': 175.6445, 'grad_norm': 1.2698034048080444, 'learning_rate': 7.48e-06, 'epoch': 0.06}
{'origin_loss': 2.08760142326355, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.06}
{'origin_loss': 1.7949352264404297, 'mask_loss': 0.143798828125, 'mask_rate': 0.379150390625, 'epoch': 0.06}
{'origin_loss': 2.0776290893554688, 'mask_loss': 0.140869140625, 'mask_rate': 0.375244140625, 'epoch': 0.06}
{'origin_loss': 2.1292104721069336, 'mask_loss': 0.1397705078125, 'mask_rate': 0.373779296875, 'epoch': 0.06}
{'origin_loss': 1.7420861721038818, 'mask_loss': 0.1417236328125, 'mask_rate': 0.37646484375, 'epoch': 0.06}
{'origin_loss': 1.9697750806808472, 'mask_loss': 0.1409912109375, 'mask_rate': 0.37548828125, 'epoch': 0.06}
{'origin_loss': 2.075998544692993, 'mask_loss': 0.140625, 'mask_rate': 0.375, 'epoch': 0.06}
{'origin_loss': 2.0304620265960693, 'mask_loss': 0.142822265625, 'mask_rate': 0.3779296875, 'epoch': 0.06}
{'loss': 174.1447, 'grad_norm': 0.40656033158302307, 'learning_rate': 7.4600000000000006e-06, 'epoch': 0.06}
{'origin_loss': 2.149749755859375, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.06}
{'origin_loss': 2.1838748455047607, 'mask_loss': 0.1395263671875, 'mask_rate': 0.37353515625, 'epoch': 0.06}
{'origin_loss': 2.2159833908081055, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.06}
{'origin_loss': 1.7786619663238525, 'mask_loss': 0.1417236328125, 'mask_rate': 0.37646484375, 'epoch': 0.06}
{'origin_loss': 2.143648624420166, 'mask_loss': 0.140625, 'mask_rate': 0.375, 'epoch': 0.06}
{'origin_loss': 2.1275827884674072, 'mask_loss': 0.1402587890625, 'mask_rate': 0.37451171875, 'epoch': 0.06}
{'origin_loss': 1.9979679584503174, 'mask_loss': 0.1412353515625, 'mask_rate': 0.375732421875, 'epoch': 0.06}
{'origin_loss': 2.120950937271118, 'mask_loss': 0.1405029296875, 'mask_rate': 0.374755859375, 'epoch': 0.06}
{'loss': 173.5117, 'grad_norm': 0.3800049424171448, 'learning_rate': 7.440000000000001e-06, 'epoch': 0.06}
{'origin_loss': 1.9452576637268066, 'mask_loss': 0.143798828125, 'mask_rate': 0.379150390625, 'epoch': 0.06}
{'origin_loss': 2.1927125453948975, 'mask_loss': 0.1395263671875, 'mask_rate': 0.37353515625, 'epoch': 0.06}
{'origin_loss': 2.0886058807373047, 'mask_loss': 0.14013671875, 'mask_rate': 0.374267578125, 'epoch': 0.06}
{'origin_loss': 1.963761329650879, 'mask_loss': 0.1424560546875, 'mask_rate': 0.37744140625, 'epoch': 0.06}
{'origin_loss': 1.9254963397979736, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.06}
{'origin_loss': 2.084498643875122, 'mask_loss': 0.138671875, 'mask_rate': 0.372314453125, 'epoch': 0.06}
{'origin_loss': 2.1154468059539795, 'mask_loss': 0.136962890625, 'mask_rate': 0.3701171875, 'epoch': 0.06}
{'origin_loss': 2.2085494995117188, 'mask_loss': 0.14208984375, 'mask_rate': 0.376953125, 'epoch': 0.06}
{'loss': 173.1437, 'grad_norm': 0.6174355745315552, 'learning_rate': 7.420000000000001e-06, 'epoch': 0.06}
{'origin_loss': 2.0248634815216064, 'mask_loss': 0.138671875, 'mask_rate': 0.372314453125, 'epoch': 0.06}
{'origin_loss': 2.102229356765747, 'mask_loss': 0.139892578125, 'mask_rate': 0.3740234375, 'epoch': 0.06}
{'origin_loss': 2.0129215717315674, 'mask_loss': 0.141357421875, 'mask_rate': 0.3759765625, 'epoch': 0.06}
{'origin_loss': 2.1292991638183594, 'mask_loss': 0.141357421875, 'mask_rate': 0.3759765625, 'epoch': 0.06}
{'origin_loss': 2.088279962539673, 'mask_loss': 0.1373291015625, 'mask_rate': 0.37060546875, 'epoch': 0.06}
{'origin_loss': 2.126983165740967, 'mask_loss': 0.138671875, 'mask_rate': 0.372314453125, 'epoch': 0.06}
{'origin_loss': 2.1086232662200928, 'mask_loss': 0.1380615234375, 'mask_rate': 0.37158203125, 'epoch': 0.06}
{'origin_loss': 2.126800537109375, 'mask_loss': 0.141357421875, 'mask_rate': 0.3759765625, 'epoch': 0.06}
{'loss': 171.8244, 'grad_norm': 0.7998461723327637, 'learning_rate': 7.4e-06, 'epoch': 0.06}
{'origin_loss': 1.7931783199310303, 'mask_loss': 0.144287109375, 'mask_rate': 0.3798828125, 'epoch': 0.06}
{'origin_loss': 2.1995809078216553, 'mask_loss': 0.1395263671875, 'mask_rate': 0.37353515625, 'epoch': 0.06}
{'origin_loss': 2.0096514225006104, 'mask_loss': 0.14013671875, 'mask_rate': 0.374267578125, 'epoch': 0.06}
{'origin_loss': 2.1467442512512207, 'mask_loss': 0.136962890625, 'mask_rate': 0.3701171875, 'epoch': 0.06}
{'origin_loss': 2.09909725189209, 'mask_loss': 0.140869140625, 'mask_rate': 0.375244140625, 'epoch': 0.06}
{'origin_loss': 2.0137805938720703, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.06}
{'origin_loss': 1.964768886566162, 'mask_loss': 0.13623046875, 'mask_rate': 0.369140625, 'epoch': 0.06}
{'origin_loss': 2.2146408557891846, 'mask_loss': 0.140625, 'mask_rate': 0.375, 'epoch': 0.06}
{'loss': 172.3677, 'grad_norm': 0.4043624997138977, 'learning_rate': 7.3800000000000005e-06, 'epoch': 0.06}
{'origin_loss': 1.9099117517471313, 'mask_loss': 0.1434326171875, 'mask_rate': 0.378662109375, 'epoch': 0.06}
{'origin_loss': 1.9266306161880493, 'mask_loss': 0.1431884765625, 'mask_rate': 0.37841796875, 'epoch': 0.06}
{'origin_loss': 2.123565196990967, 'mask_loss': 0.1395263671875, 'mask_rate': 0.37353515625, 'epoch': 0.06}
{'origin_loss': 1.9688737392425537, 'mask_loss': 0.14208984375, 'mask_rate': 0.376953125, 'epoch': 0.06}
{'origin_loss': 2.0516417026519775, 'mask_loss': 0.1417236328125, 'mask_rate': 0.37646484375, 'epoch': 0.06}
{'origin_loss': 1.8610985279083252, 'mask_loss': 0.14013671875, 'mask_rate': 0.374267578125, 'epoch': 0.06}
{'origin_loss': 2.056232213973999, 'mask_loss': 0.1417236328125, 'mask_rate': 0.37646484375, 'epoch': 0.06}
{'origin_loss': 1.9589743614196777, 'mask_loss': 0.14013671875, 'mask_rate': 0.374267578125, 'epoch': 0.06}
{'loss': 174.029, 'grad_norm': 0.5768946409225464, 'learning_rate': 7.360000000000001e-06, 'epoch': 0.06}
{'origin_loss': 1.8752096891403198, 'mask_loss': 0.1417236328125, 'mask_rate': 0.37646484375, 'epoch': 0.06}
{'origin_loss': 1.960318684577942, 'mask_loss': 0.13720703125, 'mask_rate': 0.370361328125, 'epoch': 0.06}
{'origin_loss': 1.7164937257766724, 'mask_loss': 0.1376953125, 'mask_rate': 0.37109375, 'epoch': 0.06}
{'origin_loss': 1.8761595487594604, 'mask_loss': 0.1434326171875, 'mask_rate': 0.378662109375, 'epoch': 0.06}
{'origin_loss': 2.0859034061431885, 'mask_loss': 0.13916015625, 'mask_rate': 0.373046875, 'epoch': 0.06}
{'origin_loss': 2.0869100093841553, 'mask_loss': 0.1441650390625, 'mask_rate': 0.379638671875, 'epoch': 0.06}
{'origin_loss': 2.269489049911499, 'mask_loss': 0.140869140625, 'mask_rate': 0.375244140625, 'epoch': 0.06}
{'origin_loss': 2.0522608757019043, 'mask_loss': 0.1395263671875, 'mask_rate': 0.37353515625, 'epoch': 0.06}
{'loss': 172.8028, 'grad_norm': 0.4352279305458069, 'learning_rate': 7.340000000000001e-06, 'epoch': 0.06}
{'origin_loss': 1.8561714887619019, 'mask_loss': 0.1416015625, 'mask_rate': 0.376220703125, 'epoch': 0.06}
{'origin_loss': 2.2838339805603027, 'mask_loss': 0.1395263671875, 'mask_rate': 0.37353515625, 'epoch': 0.06}
{'origin_loss': 1.9118505716323853, 'mask_loss': 0.136962890625, 'mask_rate': 0.3701171875, 'epoch': 0.06}
{'origin_loss': 2.0298805236816406, 'mask_loss': 0.142333984375, 'mask_rate': 0.377197265625, 'epoch': 0.06}
{'origin_loss': 1.9620087146759033, 'mask_loss': 0.14013671875, 'mask_rate': 0.374267578125, 'epoch': 0.06}
{'origin_loss': 2.08681321144104, 'mask_loss': 0.14013671875, 'mask_rate': 0.374267578125, 'epoch': 0.06}
{'origin_loss': 2.1946139335632324, 'mask_loss': 0.1424560546875, 'mask_rate': 0.37744140625, 'epoch': 0.06}
{'origin_loss': 2.2214527130126953, 'mask_loss': 0.13623046875, 'mask_rate': 0.369140625, 'epoch': 0.06}
{'loss': 172.209, 'grad_norm': 0.4074043929576874, 'learning_rate': 7.32e-06, 'epoch': 0.06}
{'origin_loss': 2.0264246463775635, 'mask_loss': 0.1405029296875, 'mask_rate': 0.374755859375, 'epoch': 0.06}
{'origin_loss': 2.2783896923065186, 'mask_loss': 0.1390380859375, 'mask_rate': 0.372802734375, 'epoch': 0.06}
{'origin_loss': 1.9493801593780518, 'mask_loss': 0.1409912109375, 'mask_rate': 0.37548828125, 'epoch': 0.06}
{'origin_loss': 1.9443646669387817, 'mask_loss': 0.14208984375, 'mask_rate': 0.376953125, 'epoch': 0.06}
{'origin_loss': 1.8466721773147583, 'mask_loss': 0.140869140625, 'mask_rate': 0.375244140625, 'epoch': 0.06}
{'origin_loss': 2.1010963916778564, 'mask_loss': 0.1402587890625, 'mask_rate': 0.37451171875, 'epoch': 0.06}
{'origin_loss': 1.9139776229858398, 'mask_loss': 0.14306640625, 'mask_rate': 0.378173828125, 'epoch': 0.06}
{'origin_loss': 2.202427864074707, 'mask_loss': 0.14013671875, 'mask_rate': 0.374267578125, 'epoch': 0.06}
{'loss': 173.3297, 'grad_norm': 0.4133610725402832, 'learning_rate': 7.3e-06, 'epoch': 0.06}
{'origin_loss': 2.0140984058380127, 'mask_loss': 0.14306640625, 'mask_rate': 0.378173828125, 'epoch': 0.06}
{'origin_loss': 1.9660444259643555, 'mask_loss': 0.14306640625, 'mask_rate': 0.378173828125, 'epoch': 0.06}
{'origin_loss': 1.9367692470550537, 'mask_loss': 0.145263671875, 'mask_rate': 0.381103515625, 'epoch': 0.06}
{'origin_loss': 2.266484022140503, 'mask_loss': 0.140869140625, 'mask_rate': 0.375244140625, 'epoch': 0.06}
{'origin_loss': 2.1350135803222656, 'mask_loss': 0.1416015625, 'mask_rate': 0.376220703125, 'epoch': 0.06}
{'origin_loss': 2.105581760406494, 'mask_loss': 0.1417236328125, 'mask_rate': 0.37646484375, 'epoch': 0.06}
{'origin_loss': 2.0620453357696533, 'mask_loss': 0.1412353515625, 'mask_rate': 0.375732421875, 'epoch': 0.06}
{'origin_loss': 1.9443674087524414, 'mask_loss': 0.142333984375, 'mask_rate': 0.377197265625, 'epoch': 0.06}
{'loss': 175.2257, 'grad_norm': 0.40753525495529175, 'learning_rate': 7.280000000000001e-06, 'epoch': 0.06}
{'origin_loss': 2.000739336013794, 'mask_loss': 0.146484375, 'mask_rate': 0.3828125, 'epoch': 0.06}
{'origin_loss': 1.9742131233215332, 'mask_loss': 0.1427001953125, 'mask_rate': 0.377685546875, 'epoch': 0.06}
{'origin_loss': 1.7860713005065918, 'mask_loss': 0.14306640625, 'mask_rate': 0.378173828125, 'epoch': 0.06}
{'origin_loss': 1.9868236780166626, 'mask_loss': 0.145751953125, 'mask_rate': 0.3818359375, 'epoch': 0.06}
{'origin_loss': 2.1437323093414307, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.06}
{'origin_loss': 2.233429431915283, 'mask_loss': 0.1441650390625, 'mask_rate': 0.379638671875, 'epoch': 0.06}
{'origin_loss': 2.077784538269043, 'mask_loss': 0.143798828125, 'mask_rate': 0.379150390625, 'epoch': 0.06}
{'origin_loss': 2.021928310394287, 'mask_loss': 0.140625, 'mask_rate': 0.375, 'epoch': 0.06}
{'loss': 176.6062, 'grad_norm': 0.36897706985473633, 'learning_rate': 7.260000000000001e-06, 'epoch': 0.06}
{'origin_loss': 1.9974207878112793, 'mask_loss': 0.1417236328125, 'mask_rate': 0.37646484375, 'epoch': 0.06}
{'origin_loss': 1.9990674257278442, 'mask_loss': 0.141357421875, 'mask_rate': 0.3759765625, 'epoch': 0.06}
{'origin_loss': 2.0413761138916016, 'mask_loss': 0.1441650390625, 'mask_rate': 0.379638671875, 'epoch': 0.06}
{'origin_loss': 1.973107099533081, 'mask_loss': 0.1431884765625, 'mask_rate': 0.37841796875, 'epoch': 0.06}
{'origin_loss': 2.040300130844116, 'mask_loss': 0.14306640625, 'mask_rate': 0.378173828125, 'epoch': 0.06}
{'origin_loss': 2.152576446533203, 'mask_loss': 0.1427001953125, 'mask_rate': 0.377685546875, 'epoch': 0.06}
{'origin_loss': 1.9485845565795898, 'mask_loss': 0.1431884765625, 'mask_rate': 0.37841796875, 'epoch': 0.06}
{'origin_loss': 1.9387835264205933, 'mask_loss': 0.1390380859375, 'mask_rate': 0.372802734375, 'epoch': 0.06}
{'loss': 175.0583, 'grad_norm': 0.3653923571109772, 'learning_rate': 7.24e-06, 'epoch': 0.06}
{'origin_loss': 1.901780366897583, 'mask_loss': 0.1456298828125, 'mask_rate': 0.381591796875, 'epoch': 0.06}
{'origin_loss': 1.7213151454925537, 'mask_loss': 0.1435546875, 'mask_rate': 0.37890625, 'epoch': 0.06}
{'origin_loss': 1.8865821361541748, 'mask_loss': 0.143798828125, 'mask_rate': 0.379150390625, 'epoch': 0.06}
{'origin_loss': 1.9871792793273926, 'mask_loss': 0.14501953125, 'mask_rate': 0.380859375, 'epoch': 0.06}
{'origin_loss': 2.006087303161621, 'mask_loss': 0.144287109375, 'mask_rate': 0.3798828125, 'epoch': 0.06}
{'origin_loss': 1.9465317726135254, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.06}
{'origin_loss': 2.199827194213867, 'mask_loss': 0.1439208984375, 'mask_rate': 0.37939453125, 'epoch': 0.06}
{'origin_loss': 2.0356101989746094, 'mask_loss': 0.142333984375, 'mask_rate': 0.377197265625, 'epoch': 0.06}
{'loss': 176.8512, 'grad_norm': 0.4229579269886017, 'learning_rate': 7.22e-06, 'epoch': 0.06}
{'origin_loss': 2.170531749725342, 'mask_loss': 0.1441650390625, 'mask_rate': 0.379638671875, 'epoch': 0.06}
{'origin_loss': 2.1228439807891846, 'mask_loss': 0.1435546875, 'mask_rate': 0.37890625, 'epoch': 0.06}
{'origin_loss': 2.2606518268585205, 'mask_loss': 0.1427001953125, 'mask_rate': 0.377685546875, 'epoch': 0.06}
{'origin_loss': 2.1408205032348633, 'mask_loss': 0.142822265625, 'mask_rate': 0.3779296875, 'epoch': 0.06}
{'origin_loss': 2.137852907180786, 'mask_loss': 0.1427001953125, 'mask_rate': 0.377685546875, 'epoch': 0.06}
{'origin_loss': 1.6333749294281006, 'mask_loss': 0.14013671875, 'mask_rate': 0.374267578125, 'epoch': 0.06}
{'origin_loss': 2.2241737842559814, 'mask_loss': 0.14306640625, 'mask_rate': 0.378173828125, 'epoch': 0.06}
{'origin_loss': 1.983096718788147, 'mask_loss': 0.144287109375, 'mask_rate': 0.3798828125, 'epoch': 0.06}
{'loss': 175.8654, 'grad_norm': 0.3696923851966858, 'learning_rate': 7.2000000000000005e-06, 'epoch': 0.06}
{'origin_loss': 1.8817108869552612, 'mask_loss': 0.1461181640625, 'mask_rate': 0.38232421875, 'epoch': 0.06}
{'origin_loss': 1.9787678718566895, 'mask_loss': 0.1480712890625, 'mask_rate': 0.384765625, 'epoch': 0.06}
{'origin_loss': 1.9670335054397583, 'mask_loss': 0.1435546875, 'mask_rate': 0.37890625, 'epoch': 0.06}
{'origin_loss': 2.2215230464935303, 'mask_loss': 0.14501953125, 'mask_rate': 0.380859375, 'epoch': 0.06}
{'origin_loss': 1.8798828125, 'mask_loss': 0.1485595703125, 'mask_rate': 0.385498046875, 'epoch': 0.06}
{'origin_loss': 2.0507099628448486, 'mask_loss': 0.1435546875, 'mask_rate': 0.37890625, 'epoch': 0.06}
{'origin_loss': 2.0887451171875, 'mask_loss': 0.1453857421875, 'mask_rate': 0.38134765625, 'epoch': 0.06}
{'origin_loss': 1.9861255884170532, 'mask_loss': 0.1446533203125, 'mask_rate': 0.38037109375, 'epoch': 0.06}
{'loss': 179.0381, 'grad_norm': 0.3420279324054718, 'learning_rate': 7.180000000000001e-06, 'epoch': 0.06}
{'origin_loss': 2.130941867828369, 'mask_loss': 0.1435546875, 'mask_rate': 0.37890625, 'epoch': 0.06}
{'origin_loss': 2.345479965209961, 'mask_loss': 0.1439208984375, 'mask_rate': 0.37939453125, 'epoch': 0.06}
{'origin_loss': 2.114384651184082, 'mask_loss': 0.1453857421875, 'mask_rate': 0.38134765625, 'epoch': 0.06}
{'origin_loss': 1.9744088649749756, 'mask_loss': 0.145751953125, 'mask_rate': 0.3818359375, 'epoch': 0.06}
{'origin_loss': 2.1144168376922607, 'mask_loss': 0.142822265625, 'mask_rate': 0.3779296875, 'epoch': 0.06}
{'origin_loss': 2.0240092277526855, 'mask_loss': 0.14892578125, 'mask_rate': 0.385986328125, 'epoch': 0.06}
{'origin_loss': 2.1866674423217773, 'mask_loss': 0.1419677734375, 'mask_rate': 0.376708984375, 'epoch': 0.06}
{'origin_loss': 2.1155879497528076, 'mask_loss': 0.145751953125, 'mask_rate': 0.3818359375, 'epoch': 0.06}
{'loss': 178.1414, 'grad_norm': 0.46513718366622925, 'learning_rate': 7.16e-06, 'epoch': 0.06}
{'origin_loss': 1.9680386781692505, 'mask_loss': 0.1456298828125, 'mask_rate': 0.381591796875, 'epoch': 0.06}
{'origin_loss': 1.9656628370285034, 'mask_loss': 0.144287109375, 'mask_rate': 0.3798828125, 'epoch': 0.06}
{'origin_loss': 2.1464381217956543, 'mask_loss': 0.1456298828125, 'mask_rate': 0.381591796875, 'epoch': 0.06}
{'origin_loss': 2.0834312438964844, 'mask_loss': 0.1435546875, 'mask_rate': 0.37890625, 'epoch': 0.06}
{'origin_loss': 1.8775160312652588, 'mask_loss': 0.14599609375, 'mask_rate': 0.382080078125, 'epoch': 0.06}
{'origin_loss': 2.0200395584106445, 'mask_loss': 0.150146484375, 'mask_rate': 0.387451171875, 'epoch': 0.06}
{'origin_loss': 2.2087442874908447, 'mask_loss': 0.143798828125, 'mask_rate': 0.379150390625, 'epoch': 0.06}
{'origin_loss': 2.024613618850708, 'mask_loss': 0.1463623046875, 'mask_rate': 0.382568359375, 'epoch': 0.06}
{'loss': 179.1931, 'grad_norm': 0.35996267199516296, 'learning_rate': 7.14e-06, 'epoch': 0.06}
{'origin_loss': 2.023629665374756, 'mask_loss': 0.146728515625, 'mask_rate': 0.383056640625, 'epoch': 0.06}
{'origin_loss': 2.0920395851135254, 'mask_loss': 0.146728515625, 'mask_rate': 0.383056640625, 'epoch': 0.06}
{'origin_loss': 2.0041697025299072, 'mask_loss': 0.1505126953125, 'mask_rate': 0.387939453125, 'epoch': 0.06}
{'origin_loss': 2.053457736968994, 'mask_loss': 0.1463623046875, 'mask_rate': 0.382568359375, 'epoch': 0.06}
{'origin_loss': 2.060783863067627, 'mask_loss': 0.14599609375, 'mask_rate': 0.382080078125, 'epoch': 0.06}
{'origin_loss': 2.044942617416382, 'mask_loss': 0.1448974609375, 'mask_rate': 0.380615234375, 'epoch': 0.06}
{'origin_loss': 1.9877067804336548, 'mask_loss': 0.14599609375, 'mask_rate': 0.382080078125, 'epoch': 0.06}
{'origin_loss': 1.9454315900802612, 'mask_loss': 0.145263671875, 'mask_rate': 0.381103515625, 'epoch': 0.06}
{'loss': 180.2296, 'grad_norm': 0.4177534580230713, 'learning_rate': 7.1200000000000004e-06, 'epoch': 0.06}
{'origin_loss': 2.0532541275024414, 'mask_loss': 0.1484375, 'mask_rate': 0.38525390625, 'epoch': 0.06}
{'origin_loss': 1.9835083484649658, 'mask_loss': 0.148193359375, 'mask_rate': 0.385009765625, 'epoch': 0.06}
{'origin_loss': 2.0899009704589844, 'mask_loss': 0.1484375, 'mask_rate': 0.38525390625, 'epoch': 0.06}
{'origin_loss': 1.967761754989624, 'mask_loss': 0.1463623046875, 'mask_rate': 0.382568359375, 'epoch': 0.06}
{'origin_loss': 2.1089866161346436, 'mask_loss': 0.1473388671875, 'mask_rate': 0.3837890625, 'epoch': 0.06}
{'origin_loss': 2.0446252822875977, 'mask_loss': 0.14697265625, 'mask_rate': 0.38330078125, 'epoch': 0.06}
{'origin_loss': 2.1057772636413574, 'mask_loss': 0.144287109375, 'mask_rate': 0.3798828125, 'epoch': 0.06}
{'origin_loss': 1.9670759439468384, 'mask_loss': 0.150146484375, 'mask_rate': 0.387451171875, 'epoch': 0.06}
{'loss': 181.4464, 'grad_norm': 0.4121346175670624, 'learning_rate': 7.100000000000001e-06, 'epoch': 0.06}
{'origin_loss': 1.9433857202529907, 'mask_loss': 0.1502685546875, 'mask_rate': 0.3876953125, 'epoch': 0.06}
{'origin_loss': 2.095216751098633, 'mask_loss': 0.148193359375, 'mask_rate': 0.385009765625, 'epoch': 0.06}
{'origin_loss': 1.9215000867843628, 'mask_loss': 0.1484375, 'mask_rate': 0.38525390625, 'epoch': 0.06}
{'origin_loss': 2.2543745040893555, 'mask_loss': 0.14501953125, 'mask_rate': 0.380859375, 'epoch': 0.06}
{'origin_loss': 1.9215831756591797, 'mask_loss': 0.1463623046875, 'mask_rate': 0.382568359375, 'epoch': 0.06}
{'origin_loss': 1.911396861076355, 'mask_loss': 0.14697265625, 'mask_rate': 0.38330078125, 'epoch': 0.06}
{'origin_loss': 2.039360523223877, 'mask_loss': 0.1427001953125, 'mask_rate': 0.377685546875, 'epoch': 0.06}
{'origin_loss': 2.138766050338745, 'mask_loss': 0.14208984375, 'mask_rate': 0.376953125, 'epoch': 0.06}
{'loss': 179.8876, 'grad_norm': 0.3650530278682709, 'learning_rate': 7.08e-06, 'epoch': 0.06}
{'origin_loss': 2.1410961151123047, 'mask_loss': 0.14453125, 'mask_rate': 0.380126953125, 'epoch': 0.06}
{'origin_loss': 2.1408164501190186, 'mask_loss': 0.14990234375, 'mask_rate': 0.38720703125, 'epoch': 0.06}
{'origin_loss': 1.9851726293563843, 'mask_loss': 0.148193359375, 'mask_rate': 0.385009765625, 'epoch': 0.06}
{'origin_loss': 2.0715386867523193, 'mask_loss': 0.14990234375, 'mask_rate': 0.38720703125, 'epoch': 0.06}
{'origin_loss': 2.0949978828430176, 'mask_loss': 0.1488037109375, 'mask_rate': 0.3857421875, 'epoch': 0.06}
{'origin_loss': 1.8325657844543457, 'mask_loss': 0.145263671875, 'mask_rate': 0.381103515625, 'epoch': 0.06}
{'origin_loss': 2.2089028358459473, 'mask_loss': 0.149169921875, 'mask_rate': 0.38623046875, 'epoch': 0.06}
{'origin_loss': 2.0164794921875, 'mask_loss': 0.144287109375, 'mask_rate': 0.3798828125, 'epoch': 0.06}
{'loss': 181.4364, 'grad_norm': 0.3307526111602783, 'learning_rate': 7.06e-06, 'epoch': 0.06}
{'origin_loss': 1.9814980030059814, 'mask_loss': 0.14306640625, 'mask_rate': 0.378173828125, 'epoch': 0.06}
{'origin_loss': 2.1946463584899902, 'mask_loss': 0.146484375, 'mask_rate': 0.3828125, 'epoch': 0.06}
{'origin_loss': 1.9319798946380615, 'mask_loss': 0.1463623046875, 'mask_rate': 0.382568359375, 'epoch': 0.06}
{'origin_loss': 2.037043809890747, 'mask_loss': 0.14990234375, 'mask_rate': 0.38720703125, 'epoch': 0.06}
{'origin_loss': 1.939299464225769, 'mask_loss': 0.1485595703125, 'mask_rate': 0.385498046875, 'epoch': 0.06}
{'origin_loss': 2.096841812133789, 'mask_loss': 0.1439208984375, 'mask_rate': 0.37939453125, 'epoch': 0.06}
{'origin_loss': 2.055302858352661, 'mask_loss': 0.1474609375, 'mask_rate': 0.384033203125, 'epoch': 0.06}
{'origin_loss': 1.8347301483154297, 'mask_loss': 0.1478271484375, 'mask_rate': 0.384521484375, 'epoch': 0.06}
{'loss': 180.3839, 'grad_norm': 0.43894651532173157, 'learning_rate': 7.04e-06, 'epoch': 0.06}
{'origin_loss': 2.174779176712036, 'mask_loss': 0.14599609375, 'mask_rate': 0.382080078125, 'epoch': 0.06}
{'origin_loss': 1.9209702014923096, 'mask_loss': 0.145751953125, 'mask_rate': 0.3818359375, 'epoch': 0.06}
{'origin_loss': 1.964319109916687, 'mask_loss': 0.150146484375, 'mask_rate': 0.387451171875, 'epoch': 0.06}
{'origin_loss': 1.6964272260665894, 'mask_loss': 0.147705078125, 'mask_rate': 0.38427734375, 'epoch': 0.06}
{'origin_loss': 2.0785555839538574, 'mask_loss': 0.1478271484375, 'mask_rate': 0.384521484375, 'epoch': 0.06}
{'origin_loss': 2.2926247119903564, 'mask_loss': 0.1461181640625, 'mask_rate': 0.38232421875, 'epoch': 0.06}
{'origin_loss': 2.012636184692383, 'mask_loss': 0.1485595703125, 'mask_rate': 0.385498046875, 'epoch': 0.06}
{'origin_loss': 1.97930908203125, 'mask_loss': 0.1463623046875, 'mask_rate': 0.382568359375, 'epoch': 0.06}
{'loss': 181.14, 'grad_norm': 0.3533742129802704, 'learning_rate': 7.0200000000000006e-06, 'epoch': 0.07}
{'origin_loss': 2.1304686069488525, 'mask_loss': 0.1514892578125, 'mask_rate': 0.38916015625, 'epoch': 0.07}
{'origin_loss': 1.8905247449874878, 'mask_loss': 0.151123046875, 'mask_rate': 0.388671875, 'epoch': 0.07}
{'origin_loss': 2.210742473602295, 'mask_loss': 0.1448974609375, 'mask_rate': 0.380615234375, 'epoch': 0.07}
{'origin_loss': 1.9929132461547852, 'mask_loss': 0.1514892578125, 'mask_rate': 0.38916015625, 'epoch': 0.07}
{'origin_loss': 2.1579947471618652, 'mask_loss': 0.14501953125, 'mask_rate': 0.380859375, 'epoch': 0.07}
{'origin_loss': 2.0750482082366943, 'mask_loss': 0.14892578125, 'mask_rate': 0.385986328125, 'epoch': 0.07}
{'origin_loss': 1.8418389558792114, 'mask_loss': 0.147705078125, 'mask_rate': 0.38427734375, 'epoch': 0.07}
{'origin_loss': 1.9609650373458862, 'mask_loss': 0.1519775390625, 'mask_rate': 0.389892578125, 'epoch': 0.07}
{'loss': 183.3294, 'grad_norm': 0.43435007333755493, 'learning_rate': 7e-06, 'epoch': 0.07}
{'origin_loss': 2.0243172645568848, 'mask_loss': 0.1514892578125, 'mask_rate': 0.38916015625, 'epoch': 0.07}
{'origin_loss': 2.1563215255737305, 'mask_loss': 0.146484375, 'mask_rate': 0.3828125, 'epoch': 0.07}
{'origin_loss': 2.0282633304595947, 'mask_loss': 0.149169921875, 'mask_rate': 0.38623046875, 'epoch': 0.07}
{'origin_loss': 2.1636862754821777, 'mask_loss': 0.1497802734375, 'mask_rate': 0.386962890625, 'epoch': 0.07}
{'origin_loss': 1.9528672695159912, 'mask_loss': 0.1512451171875, 'mask_rate': 0.388916015625, 'epoch': 0.07}
{'origin_loss': 1.6548866033554077, 'mask_loss': 0.147705078125, 'mask_rate': 0.38427734375, 'epoch': 0.07}
{'origin_loss': 2.1915955543518066, 'mask_loss': 0.149169921875, 'mask_rate': 0.38623046875, 'epoch': 0.07}
{'origin_loss': 2.1500632762908936, 'mask_loss': 0.1478271484375, 'mask_rate': 0.384521484375, 'epoch': 0.07}
{'loss': 183.3527, 'grad_norm': 0.3490905463695526, 'learning_rate': 6.98e-06, 'epoch': 0.07}
{'origin_loss': 2.026304244995117, 'mask_loss': 0.151611328125, 'mask_rate': 0.389404296875, 'epoch': 0.07}
{'origin_loss': 1.7572944164276123, 'mask_loss': 0.1484375, 'mask_rate': 0.38525390625, 'epoch': 0.07}
{'origin_loss': 2.0882928371429443, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.07}
{'origin_loss': 1.976926565170288, 'mask_loss': 0.149169921875, 'mask_rate': 0.38623046875, 'epoch': 0.07}
{'origin_loss': 2.034710168838501, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.07}
{'origin_loss': 2.001070976257324, 'mask_loss': 0.15234375, 'mask_rate': 0.390380859375, 'epoch': 0.07}
{'origin_loss': 2.216538190841675, 'mask_loss': 0.1512451171875, 'mask_rate': 0.388916015625, 'epoch': 0.07}
{'origin_loss': 2.169447898864746, 'mask_loss': 0.1478271484375, 'mask_rate': 0.384521484375, 'epoch': 0.07}
{'loss': 185.4713, 'grad_norm': 0.390129029750824, 'learning_rate': 6.96e-06, 'epoch': 0.07}
{'origin_loss': 1.9813240766525269, 'mask_loss': 0.1485595703125, 'mask_rate': 0.385498046875, 'epoch': 0.07}
{'origin_loss': 2.0927302837371826, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.07}
{'origin_loss': 2.0091755390167236, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.07}
{'origin_loss': 1.9560905694961548, 'mask_loss': 0.1495361328125, 'mask_rate': 0.38671875, 'epoch': 0.07}
{'origin_loss': 2.0230705738067627, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.07}
{'origin_loss': 2.0719451904296875, 'mask_loss': 0.150146484375, 'mask_rate': 0.387451171875, 'epoch': 0.07}
{'origin_loss': 1.9208325147628784, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.07}
{'origin_loss': 1.953810214996338, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.07}
{'loss': 187.0011, 'grad_norm': 0.36682137846946716, 'learning_rate': 6.9400000000000005e-06, 'epoch': 0.07}
{'origin_loss': 1.9713681936264038, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.07}
{'origin_loss': 2.089984893798828, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.07}
{'origin_loss': 1.9761464595794678, 'mask_loss': 0.1488037109375, 'mask_rate': 0.3857421875, 'epoch': 0.07}
{'origin_loss': 2.120487928390503, 'mask_loss': 0.150634765625, 'mask_rate': 0.38818359375, 'epoch': 0.07}
{'origin_loss': 1.8407397270202637, 'mask_loss': 0.1470947265625, 'mask_rate': 0.383544921875, 'epoch': 0.07}
{'origin_loss': 2.1271023750305176, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.07}
{'origin_loss': 2.0331273078918457, 'mask_loss': 0.15234375, 'mask_rate': 0.390380859375, 'epoch': 0.07}
{'origin_loss': 2.0716025829315186, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.07}
{'loss': 186.3882, 'grad_norm': 0.4301455020904541, 'learning_rate': 6.92e-06, 'epoch': 0.07}
{'origin_loss': 2.119239330291748, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.07}
{'origin_loss': 2.091459274291992, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.07}
{'origin_loss': 2.0247645378112793, 'mask_loss': 0.1484375, 'mask_rate': 0.38525390625, 'epoch': 0.07}
{'origin_loss': 2.1365692615509033, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.07}
{'origin_loss': 1.9078724384307861, 'mask_loss': 0.15185546875, 'mask_rate': 0.3896484375, 'epoch': 0.07}
{'origin_loss': 1.90973961353302, 'mask_loss': 0.1495361328125, 'mask_rate': 0.38671875, 'epoch': 0.07}
{'origin_loss': 1.9787704944610596, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.07}
{'origin_loss': 2.0301923751831055, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.07}
{'loss': 187.1186, 'grad_norm': 0.3764599561691284, 'learning_rate': 6.9e-06, 'epoch': 0.07}
{'origin_loss': 2.0412213802337646, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.07}
{'origin_loss': 1.9040186405181885, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.07}
{'origin_loss': 2.095083475112915, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.07}
{'origin_loss': 2.1257197856903076, 'mask_loss': 0.15234375, 'mask_rate': 0.390380859375, 'epoch': 0.07}
{'origin_loss': 2.062751531600952, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.07}
{'origin_loss': 2.0554728507995605, 'mask_loss': 0.1522216796875, 'mask_rate': 0.39013671875, 'epoch': 0.07}
{'origin_loss': 1.917184591293335, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.07}
{'origin_loss': 1.8227183818817139, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.07}
{'loss': 189.6749, 'grad_norm': 0.3944285213947296, 'learning_rate': 6.88e-06, 'epoch': 0.07}
{'origin_loss': 2.0602927207946777, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.07}
{'origin_loss': 2.060270309448242, 'mask_loss': 0.151611328125, 'mask_rate': 0.389404296875, 'epoch': 0.07}
{'origin_loss': 2.0425899028778076, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.07}
{'origin_loss': 1.8979802131652832, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.07}
{'origin_loss': 2.2125401496887207, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.07}
{'origin_loss': 2.213872194290161, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.07}
{'origin_loss': 1.9863710403442383, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.07}
{'origin_loss': 1.875576138496399, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.07}
{'loss': 191.4187, 'grad_norm': 0.4033726155757904, 'learning_rate': 6.860000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.0702364444732666, 'mask_loss': 0.1512451171875, 'mask_rate': 0.388916015625, 'epoch': 0.07}
{'origin_loss': 2.2243878841400146, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.07}
{'origin_loss': 2.199398994445801, 'mask_loss': 0.15283203125, 'mask_rate': 0.390869140625, 'epoch': 0.07}
{'origin_loss': 2.1080052852630615, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.07}
{'origin_loss': 2.0340685844421387, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.07}
{'origin_loss': 2.0067436695098877, 'mask_loss': 0.151611328125, 'mask_rate': 0.389404296875, 'epoch': 0.07}
{'origin_loss': 2.0333919525146484, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.07}
{'origin_loss': 2.0299034118652344, 'mask_loss': 0.151611328125, 'mask_rate': 0.389404296875, 'epoch': 0.07}
{'loss': 188.7289, 'grad_norm': 0.40077900886535645, 'learning_rate': 6.8400000000000014e-06, 'epoch': 0.07}
{'origin_loss': 2.0988855361938477, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.07}
{'origin_loss': 2.104170799255371, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.07}
{'origin_loss': 2.263277530670166, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.07}
{'origin_loss': 2.0141632556915283, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.07}
{'origin_loss': 2.0788962841033936, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.07}
{'origin_loss': 2.1400563716888428, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.07}
{'origin_loss': 1.8890429735183716, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.07}
{'origin_loss': 2.0489563941955566, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.07}
{'loss': 191.4234, 'grad_norm': 0.3613411486148834, 'learning_rate': 6.820000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.06015944480896, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.07}
{'origin_loss': 2.231935977935791, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.07}
{'origin_loss': 1.7962431907653809, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.07}
{'origin_loss': 1.9209210872650146, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.07}
{'origin_loss': 1.7026162147521973, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.07}
{'origin_loss': 2.1248152256011963, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.07}
{'origin_loss': 2.0826690196990967, 'mask_loss': 0.1519775390625, 'mask_rate': 0.389892578125, 'epoch': 0.07}
{'origin_loss': 1.9602705240249634, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.07}
{'loss': 190.9381, 'grad_norm': 0.34711048007011414, 'learning_rate': 6.800000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.0733301639556885, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.07}
{'origin_loss': 1.9921021461486816, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.07}
{'origin_loss': 1.9594743251800537, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.07}
{'origin_loss': 2.0173330307006836, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.07}
{'origin_loss': 2.0143961906433105, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.07}
{'origin_loss': 2.021181583404541, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.07}
{'origin_loss': 2.0584592819213867, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.07}
{'origin_loss': 1.8807848691940308, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.07}
{'loss': 191.3146, 'grad_norm': 0.5126388669013977, 'learning_rate': 6.780000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.0431888103485107, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.07}
{'origin_loss': 2.1708967685699463, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.07}
{'origin_loss': 1.932586908340454, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.07}
{'origin_loss': 1.9375731945037842, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.07}
{'origin_loss': 2.2474730014801025, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.07}
{'origin_loss': 1.8626093864440918, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.07}
{'origin_loss': 1.9435805082321167, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.07}
{'origin_loss': 2.1095261573791504, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.07}
{'loss': 191.6091, 'grad_norm': 0.3757283687591553, 'learning_rate': 6.760000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.155862331390381, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.07}
{'origin_loss': 1.8757472038269043, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.07}
{'origin_loss': 1.889267086982727, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.07}
{'origin_loss': 2.128478527069092, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.07}
{'origin_loss': 1.9920581579208374, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.07}
{'origin_loss': 2.1159555912017822, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.07}
{'origin_loss': 1.9649674892425537, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.07}
{'origin_loss': 1.9085294008255005, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.07}
{'loss': 194.9101, 'grad_norm': 0.3543696701526642, 'learning_rate': 6.740000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.058763027191162, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.07}
{'origin_loss': 2.0934550762176514, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.07}
{'origin_loss': 1.7866230010986328, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.07}
{'origin_loss': 1.9609220027923584, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.07}
{'origin_loss': 1.976110816001892, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.07}
{'origin_loss': 2.2177891731262207, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.07}
{'origin_loss': 1.8258225917816162, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.07}
{'origin_loss': 1.8466399908065796, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.07}
{'loss': 193.6895, 'grad_norm': 0.4469682574272156, 'learning_rate': 6.720000000000001e-06, 'epoch': 0.07}
{'origin_loss': 1.991966724395752, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.07}
{'origin_loss': 1.8512904644012451, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.07}
{'origin_loss': 2.2388150691986084, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.07}
{'origin_loss': 1.8585110902786255, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.07}
{'origin_loss': 1.937486171722412, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.07}
{'origin_loss': 2.1726624965667725, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.07}
{'origin_loss': 2.0864644050598145, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.07}
{'origin_loss': 1.8397241830825806, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.07}
{'loss': 194.7315, 'grad_norm': 0.37843459844589233, 'learning_rate': 6.700000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.097505569458008, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.07}
{'origin_loss': 2.2497141361236572, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.07}
{'origin_loss': 2.0644965171813965, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.07}
{'origin_loss': 2.3084230422973633, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.07}
{'origin_loss': 2.0549845695495605, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.07}
{'origin_loss': 2.15779185295105, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.07}
{'origin_loss': 2.0442872047424316, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.07}
{'origin_loss': 1.9353582859039307, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.07}
{'loss': 196.0359, 'grad_norm': 0.4565919041633606, 'learning_rate': 6.680000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.058095693588257, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.07}
{'origin_loss': 1.8809068202972412, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.07}
{'origin_loss': 2.0703041553497314, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.07}
{'origin_loss': 2.1521987915039062, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.07}
{'origin_loss': 2.0856359004974365, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.07}
{'origin_loss': 2.0359432697296143, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.07}
{'origin_loss': 1.9892699718475342, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.07}
{'origin_loss': 2.0014617443084717, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.07}
{'loss': 196.7217, 'grad_norm': 0.39077556133270264, 'learning_rate': 6.660000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.2710678577423096, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.07}
{'origin_loss': 1.9648810625076294, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.07}
{'origin_loss': 2.026179790496826, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.07}
{'origin_loss': 2.033961296081543, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.07}
{'origin_loss': 2.04396390914917, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.07}
{'origin_loss': 2.0667290687561035, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.07}
{'origin_loss': 2.269662857055664, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.07}
{'origin_loss': 2.216341495513916, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.07}
{'loss': 196.5804, 'grad_norm': 0.3395457863807678, 'learning_rate': 6.640000000000001e-06, 'epoch': 0.07}
{'origin_loss': 1.8221358060836792, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.07}
{'origin_loss': 2.221726179122925, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.07}
{'origin_loss': 2.038109540939331, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.07}
{'origin_loss': 2.1863152980804443, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.07}
{'origin_loss': 1.945316195487976, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.07}
{'origin_loss': 1.998887538909912, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.07}
{'origin_loss': 2.008478879928589, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.07}
{'origin_loss': 1.8764194250106812, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.07}
{'loss': 196.5278, 'grad_norm': 0.6710689663887024, 'learning_rate': 6.620000000000001e-06, 'epoch': 0.07}
{'origin_loss': 1.9974547624588013, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.07}
{'origin_loss': 2.0194835662841797, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.07}
{'origin_loss': 2.225632667541504, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.07}
{'origin_loss': 1.8291670083999634, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.07}
{'origin_loss': 2.1981892585754395, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.07}
{'origin_loss': 2.0361669063568115, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.07}
{'origin_loss': 1.8500497341156006, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.07}
{'origin_loss': 2.058441400527954, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.07}
{'loss': 198.0893, 'grad_norm': 0.34540796279907227, 'learning_rate': 6.600000000000001e-06, 'epoch': 0.07}
{'origin_loss': 2.0888335704803467, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.07}
{'origin_loss': 1.880043864250183, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.07}
{'origin_loss': 1.9726293087005615, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.07}
{'origin_loss': 1.9358456134796143, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.07}
{'origin_loss': 2.1342105865478516, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.07}
{'origin_loss': 2.1098713874816895, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.07}
{'origin_loss': 1.909315824508667, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.07}
{'origin_loss': 2.1099720001220703, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.07}
{'loss': 198.5176, 'grad_norm': 0.40284425020217896, 'learning_rate': 6.5800000000000005e-06, 'epoch': 0.07}
{'origin_loss': 2.200287103652954, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.07}
{'origin_loss': 1.9360939264297485, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.07}
{'origin_loss': 2.0055501461029053, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.07}
{'origin_loss': 2.0650527477264404, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.07}
{'origin_loss': 2.172839641571045, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.07}
{'origin_loss': 2.099421739578247, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.07}
{'origin_loss': 2.045908212661743, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.07}
{'origin_loss': 1.8615485429763794, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.07}
{'loss': 198.8765, 'grad_norm': 0.35782694816589355, 'learning_rate': 6.560000000000001e-06, 'epoch': 0.08}
{'origin_loss': 1.8389573097229004, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.08}
{'origin_loss': 1.9465770721435547, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.08}
{'origin_loss': 2.1745152473449707, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.08}
{'origin_loss': 2.104842185974121, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.08}
{'origin_loss': 1.9016642570495605, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.08}
{'origin_loss': 2.087549924850464, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.08}
{'origin_loss': 1.86123526096344, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.08}
{'origin_loss': 2.0726490020751953, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.08}
{'loss': 201.0766, 'grad_norm': 0.3977735638618469, 'learning_rate': 6.540000000000001e-06, 'epoch': 0.08}
{'origin_loss': 2.067563533782959, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.08}
{'origin_loss': 1.9637001752853394, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.08}
{'origin_loss': 2.1308023929595947, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.08}
{'origin_loss': 2.1244630813598633, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.08}
{'origin_loss': 1.9596928358078003, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.08}
{'origin_loss': 2.0544309616088867, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.08}
{'origin_loss': 2.2649104595184326, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.08}
{'origin_loss': 1.7419806718826294, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.08}
{'loss': 200.4603, 'grad_norm': 0.4082139730453491, 'learning_rate': 6.520000000000001e-06, 'epoch': 0.08}
{'origin_loss': 2.095987319946289, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.08}
{'origin_loss': 1.9904998540878296, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.08}
{'origin_loss': 1.7846002578735352, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.08}
{'origin_loss': 1.8008685111999512, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.08}
{'origin_loss': 1.942917823791504, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.08}
{'origin_loss': 2.092735767364502, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.08}
{'origin_loss': 1.8993436098098755, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.08}
{'origin_loss': 2.031816244125366, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.08}
{'loss': 201.3298, 'grad_norm': 0.37345847487449646, 'learning_rate': 6.5000000000000004e-06, 'epoch': 0.08}
{'origin_loss': 2.027858018875122, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.08}
{'origin_loss': 2.0190789699554443, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.08}
{'origin_loss': 1.8852424621582031, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.08}
{'origin_loss': 1.965199589729309, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.08}
{'origin_loss': 2.041715145111084, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.08}
{'origin_loss': 2.301954507827759, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.08}
{'origin_loss': 2.0452167987823486, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.08}
{'origin_loss': 1.9081170558929443, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.08}
{'loss': 202.5868, 'grad_norm': 0.36531978845596313, 'learning_rate': 6.480000000000001e-06, 'epoch': 0.08}
{'origin_loss': 2.0450408458709717, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.08}
{'origin_loss': 2.0117671489715576, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.08}
{'origin_loss': 2.291355848312378, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.08}
{'origin_loss': 1.9179543256759644, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.08}
{'origin_loss': 2.0311901569366455, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.08}
{'origin_loss': 2.2636024951934814, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.08}
{'origin_loss': 1.980606198310852, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.08}
{'origin_loss': 1.6219266653060913, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.08}
{'loss': 201.3954, 'grad_norm': 0.47428926825523376, 'learning_rate': 6.460000000000001e-06, 'epoch': 0.08}
{'origin_loss': 2.01200532913208, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.08}
{'origin_loss': 1.9478822946548462, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.08}
{'origin_loss': 1.977811574935913, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.08}
{'origin_loss': 2.068742513656616, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.08}
{'origin_loss': 2.060454845428467, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.08}
{'origin_loss': 2.069962978363037, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.08}
{'origin_loss': 1.984038233757019, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.08}
{'origin_loss': 2.1313703060150146, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.08}
{'loss': 202.0784, 'grad_norm': 0.38046950101852417, 'learning_rate': 6.440000000000001e-06, 'epoch': 0.08}
{'origin_loss': 1.8879034519195557, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.08}
{'origin_loss': 1.7474440336227417, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.08}
{'origin_loss': 1.819557547569275, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.08}
{'origin_loss': 1.987830400466919, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.08}
{'origin_loss': 2.079819679260254, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.08}
{'origin_loss': 1.894605040550232, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.08}
{'origin_loss': 1.9822851419448853, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.08}
{'origin_loss': 1.8945928812026978, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.08}
{'loss': 206.3649, 'grad_norm': 0.353809118270874, 'learning_rate': 6.42e-06, 'epoch': 0.08}
{'origin_loss': 1.8626848459243774, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.08}
{'origin_loss': 2.221280097961426, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.08}
{'origin_loss': 2.0413687229156494, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.08}
{'origin_loss': 1.9904416799545288, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.08}
{'origin_loss': 2.1255664825439453, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.08}
{'origin_loss': 2.016472816467285, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.08}
{'origin_loss': 1.942812442779541, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.08}
{'origin_loss': 2.168431282043457, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.08}
{'loss': 206.0461, 'grad_norm': 0.36505019664764404, 'learning_rate': 6.4000000000000006e-06, 'epoch': 0.08}
{'origin_loss': 2.0545027256011963, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.08}
{'origin_loss': 1.9482338428497314, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.08}
{'origin_loss': 2.117356061935425, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.08}
{'origin_loss': 2.0318284034729004, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.08}
{'origin_loss': 2.278543710708618, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.08}
{'origin_loss': 1.9286352396011353, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.08}
{'origin_loss': 1.9869344234466553, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.08}
{'origin_loss': 2.082379102706909, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.08}
{'loss': 205.4442, 'grad_norm': 0.8153581023216248, 'learning_rate': 6.380000000000001e-06, 'epoch': 0.08}
{'origin_loss': 1.9369895458221436, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.08}
{'origin_loss': 2.085728168487549, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.08}
{'origin_loss': 2.045393943786621, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.08}
{'origin_loss': 1.9634453058242798, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.08}
{'origin_loss': 2.1119067668914795, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.08}
{'origin_loss': 1.9864091873168945, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.08}
{'origin_loss': 2.0166492462158203, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.08}
{'origin_loss': 1.9295907020568848, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.08}
{'loss': 207.8064, 'grad_norm': 0.38767388463020325, 'learning_rate': 6.360000000000001e-06, 'epoch': 0.08}
{'origin_loss': 1.688606858253479, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.08}
{'origin_loss': 2.0431087017059326, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.08}
{'origin_loss': 2.0114850997924805, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.08}
{'origin_loss': 2.04984188079834, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.08}
{'origin_loss': 1.9303555488586426, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.08}
{'origin_loss': 2.0726120471954346, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.08}
{'origin_loss': 2.1777374744415283, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.08}
{'origin_loss': 1.872109293937683, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.08}
{'loss': 206.5901, 'grad_norm': 0.44219452142715454, 'learning_rate': 6.34e-06, 'epoch': 0.08}
{'origin_loss': 2.022679090499878, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.08}
{'origin_loss': 1.9872119426727295, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.08}
{'origin_loss': 2.065566062927246, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.08}
{'origin_loss': 2.1365387439727783, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.08}
{'origin_loss': 1.848942518234253, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.08}
{'origin_loss': 1.9527183771133423, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.08}
{'origin_loss': 2.210670232772827, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.08}
{'origin_loss': 1.8643088340759277, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.08}
{'loss': 207.6673, 'grad_norm': 0.38827499747276306, 'learning_rate': 6.3200000000000005e-06, 'epoch': 0.08}
{'origin_loss': 1.9276213645935059, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.08}
{'origin_loss': 1.960451364517212, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.08}
{'origin_loss': 2.0535476207733154, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.08}
{'origin_loss': 1.9816828966140747, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.08}
{'origin_loss': 2.0994114875793457, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.08}
{'origin_loss': 2.220717668533325, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.08}
{'origin_loss': 2.0346689224243164, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.08}
{'origin_loss': 1.8251724243164062, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.08}
{'loss': 209.341, 'grad_norm': 0.5440427660942078, 'learning_rate': 6.300000000000001e-06, 'epoch': 0.08}
{'origin_loss': 1.9294358491897583, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.08}
{'origin_loss': 1.9609501361846924, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.08}
{'origin_loss': 2.0785608291625977, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.08}
{'origin_loss': 2.101121187210083, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.08}
{'origin_loss': 2.0140480995178223, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.08}
{'origin_loss': 2.097853899002075, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.08}
{'origin_loss': 2.097076177597046, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.08}
{'origin_loss': 2.0454142093658447, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.08}
{'loss': 207.9624, 'grad_norm': 0.38448524475097656, 'learning_rate': 6.280000000000001e-06, 'epoch': 0.08}
{'origin_loss': 2.242051839828491, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.08}
{'origin_loss': 2.2195992469787598, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.08}
{'origin_loss': 1.8739358186721802, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.08}
{'origin_loss': 2.018136501312256, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.08}
{'origin_loss': 1.9484037160873413, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.08}
{'origin_loss': 1.6440528631210327, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.08}
{'origin_loss': 1.9946963787078857, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.08}
{'origin_loss': 2.0241501331329346, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.08}
{'loss': 209.1831, 'grad_norm': 0.40128621459007263, 'learning_rate': 6.26e-06, 'epoch': 0.08}
{'origin_loss': 1.9994968175888062, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.08}
{'origin_loss': 1.7276701927185059, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.08}
{'origin_loss': 1.982549786567688, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.08}
{'origin_loss': 2.1248908042907715, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.08}
{'origin_loss': 2.011303186416626, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.08}
{'origin_loss': 2.1162376403808594, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.08}
{'origin_loss': 1.9188376665115356, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.08}
{'origin_loss': 1.9843800067901611, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.08}
{'loss': 209.9519, 'grad_norm': 0.3735082745552063, 'learning_rate': 6.24e-06, 'epoch': 0.08}
{'origin_loss': 1.9657623767852783, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.08}
{'origin_loss': 1.8933254480361938, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.08}
{'origin_loss': 2.061809539794922, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.08}
{'origin_loss': 1.984269142150879, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.08}
{'origin_loss': 1.8524192571640015, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.08}
{'origin_loss': 1.907942771911621, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.08}
{'origin_loss': 1.8663078546524048, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.08}
{'origin_loss': 2.1887381076812744, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.08}
{'loss': 211.1994, 'grad_norm': 0.46684518456459045, 'learning_rate': 6.220000000000001e-06, 'epoch': 0.08}
{'origin_loss': 2.01568603515625, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.08}
{'origin_loss': 2.051861047744751, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.08}
{'origin_loss': 1.9966039657592773, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.08}
{'origin_loss': 1.7715123891830444, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.08}
{'origin_loss': 1.6641658544540405, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.08}
{'origin_loss': 2.071596622467041, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.08}
{'origin_loss': 2.0914642810821533, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.08}
{'origin_loss': 2.008679151535034, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.08}
{'loss': 211.8652, 'grad_norm': 0.3835090696811676, 'learning_rate': 6.200000000000001e-06, 'epoch': 0.08}
{'origin_loss': 2.0579891204833984, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.08}
{'origin_loss': 2.0636613368988037, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.08}
{'origin_loss': 2.069032669067383, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.08}
{'origin_loss': 2.102069139480591, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.08}
{'origin_loss': 2.100743055343628, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.08}
{'origin_loss': 1.9189411401748657, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.08}
{'origin_loss': 2.0710225105285645, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.08}
{'origin_loss': 1.876807451248169, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.08}
{'loss': 213.4544, 'grad_norm': 0.4608250558376312, 'learning_rate': 6.18e-06, 'epoch': 0.08}
{'origin_loss': 1.8962615728378296, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.08}
{'origin_loss': 1.5557122230529785, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.08}
{'origin_loss': 2.0794970989227295, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.08}
{'origin_loss': 2.0573348999023438, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.08}
{'origin_loss': 2.1313750743865967, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.08}
{'origin_loss': 2.046096086502075, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.08}
{'origin_loss': 2.051631212234497, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.08}
{'origin_loss': 2.0273659229278564, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.08}
{'loss': 212.715, 'grad_norm': 0.38305553793907166, 'learning_rate': 6.16e-06, 'epoch': 0.08}
{'origin_loss': 1.7212141752243042, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.08}
{'origin_loss': 1.8935168981552124, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.08}
{'origin_loss': 1.9779366254806519, 'mask_loss': 0.1785888671875, 'mask_rate': 0.422607421875, 'epoch': 0.08}
{'origin_loss': 1.9084131717681885, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.08}
{'origin_loss': 2.178293466567993, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.08}
{'origin_loss': 1.9797929525375366, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.08}
{'origin_loss': 2.1474173069000244, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.08}
{'origin_loss': 2.0002026557922363, 'mask_loss': 0.1771240234375, 'mask_rate': 0.4208984375, 'epoch': 0.08}
{'loss': 212.2415, 'grad_norm': 0.5825039744377136, 'learning_rate': 6.1400000000000005e-06, 'epoch': 0.08}
{'origin_loss': 2.1534175872802734, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.08}
{'origin_loss': 2.044919967651367, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.08}
{'origin_loss': 1.9724421501159668, 'mask_loss': 0.1776123046875, 'mask_rate': 0.42138671875, 'epoch': 0.08}
{'origin_loss': 1.94655442237854, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.08}
{'origin_loss': 1.9755572080612183, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.08}
{'origin_loss': 1.995181679725647, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.08}
{'origin_loss': 2.040714979171753, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.08}
{'origin_loss': 1.9945402145385742, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.08}
{'loss': 213.7498, 'grad_norm': 0.36230769753456116, 'learning_rate': 6.120000000000001e-06, 'epoch': 0.08}
{'origin_loss': 1.710601806640625, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.08}
{'origin_loss': 2.0262417793273926, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.08}
{'origin_loss': 2.1790049076080322, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.08}
{'origin_loss': 1.8834404945373535, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.08}
{'origin_loss': 2.056809902191162, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.08}
{'origin_loss': 2.1167221069335938, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.08}
{'origin_loss': 2.226367712020874, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.08}
{'origin_loss': 1.9028816223144531, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.08}
{'loss': 212.8565, 'grad_norm': 0.32849183678627014, 'learning_rate': 6.1e-06, 'epoch': 0.09}
{'origin_loss': 2.259277820587158, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.09}
{'origin_loss': 2.0465476512908936, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.09}
{'origin_loss': 2.032660722732544, 'mask_loss': 0.179443359375, 'mask_rate': 0.423583984375, 'epoch': 0.09}
{'origin_loss': 2.0216362476348877, 'mask_loss': 0.1788330078125, 'mask_rate': 0.4228515625, 'epoch': 0.09}
{'origin_loss': 1.741510272026062, 'mask_loss': 0.1783447265625, 'mask_rate': 0.42236328125, 'epoch': 0.09}
{'origin_loss': 2.1627676486968994, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.09}
{'origin_loss': 1.9014558792114258, 'mask_loss': 0.17919921875, 'mask_rate': 0.42333984375, 'epoch': 0.09}
{'origin_loss': 2.083052396774292, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.09}
{'loss': 217.3748, 'grad_norm': 0.368170827627182, 'learning_rate': 6.08e-06, 'epoch': 0.09}
{'origin_loss': 1.5402510166168213, 'mask_loss': 0.1773681640625, 'mask_rate': 0.421142578125, 'epoch': 0.09}
{'origin_loss': 1.8199690580368042, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.09}
{'origin_loss': 2.0478665828704834, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.09}
{'origin_loss': 1.7828725576400757, 'mask_loss': 0.179443359375, 'mask_rate': 0.423583984375, 'epoch': 0.09}
{'origin_loss': 1.9822378158569336, 'mask_loss': 0.177734375, 'mask_rate': 0.421630859375, 'epoch': 0.09}
{'origin_loss': 2.1327714920043945, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.09}
{'origin_loss': 2.0859737396240234, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.09}
{'origin_loss': 2.2405805587768555, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.09}
{'loss': 215.5791, 'grad_norm': 0.3969295918941498, 'learning_rate': 6.0600000000000004e-06, 'epoch': 0.09}
{'origin_loss': 2.11535906791687, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.09}
{'origin_loss': 1.7963149547576904, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.09}
{'origin_loss': 2.0051844120025635, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.09}
{'origin_loss': 2.1665139198303223, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.09}
{'origin_loss': 2.0790903568267822, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.09}
{'origin_loss': 2.08982515335083, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.09}
{'origin_loss': 2.0180375576019287, 'mask_loss': 0.1785888671875, 'mask_rate': 0.422607421875, 'epoch': 0.09}
{'origin_loss': 2.1246700286865234, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.09}
{'loss': 215.5338, 'grad_norm': 0.3769429624080658, 'learning_rate': 6.040000000000001e-06, 'epoch': 0.09}
{'origin_loss': 2.094759941101074, 'mask_loss': 0.1785888671875, 'mask_rate': 0.422607421875, 'epoch': 0.09}
{'origin_loss': 2.0094833374023438, 'mask_loss': 0.1815185546875, 'mask_rate': 0.426025390625, 'epoch': 0.09}
{'origin_loss': 1.9462525844573975, 'mask_loss': 0.1773681640625, 'mask_rate': 0.421142578125, 'epoch': 0.09}
{'origin_loss': 2.095651149749756, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.09}
{'origin_loss': 2.077871799468994, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.09}
{'origin_loss': 2.0567688941955566, 'mask_loss': 0.177734375, 'mask_rate': 0.421630859375, 'epoch': 0.09}
{'origin_loss': 2.11993408203125, 'mask_loss': 0.177001953125, 'mask_rate': 0.420654296875, 'epoch': 0.09}
{'origin_loss': 1.9945560693740845, 'mask_loss': 0.179443359375, 'mask_rate': 0.423583984375, 'epoch': 0.09}
{'loss': 218.3306, 'grad_norm': 210.45057678222656, 'learning_rate': 6.02e-06, 'epoch': 0.09}
{'origin_loss': 1.9725421667099, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.09}
{'origin_loss': 2.1228954792022705, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.09}
{'origin_loss': 1.9728362560272217, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.09}
{'origin_loss': 1.896456003189087, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.09}
{'origin_loss': 1.7261013984680176, 'mask_loss': 0.1771240234375, 'mask_rate': 0.4208984375, 'epoch': 0.09}
{'origin_loss': 1.7590034008026123, 'mask_loss': 0.1773681640625, 'mask_rate': 0.421142578125, 'epoch': 0.09}
{'origin_loss': 2.034475803375244, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.09}
{'origin_loss': 2.287658929824829, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.09}
{'loss': 213.659, 'grad_norm': 0.46364760398864746, 'learning_rate': 6e-06, 'epoch': 0.09}
{'origin_loss': 1.9298548698425293, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.09}
{'origin_loss': 2.029897928237915, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.09}
{'origin_loss': 2.2054476737976074, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.09}
{'origin_loss': 2.0785980224609375, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.09}
{'origin_loss': 2.0303444862365723, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.09}
{'origin_loss': 1.963075876235962, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.09}
{'origin_loss': 1.9376649856567383, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.09}
{'origin_loss': 2.07613468170166, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.09}
{'loss': 209.172, 'grad_norm': 0.3607555329799652, 'learning_rate': 5.98e-06, 'epoch': 0.09}
{'origin_loss': 2.0191805362701416, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.09}
{'origin_loss': 2.121753454208374, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.09}
{'origin_loss': 2.177828788757324, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.09}
{'origin_loss': 1.7182896137237549, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.09}
{'origin_loss': 1.8987339735031128, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.09}
{'origin_loss': 1.8100358247756958, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.09}
{'origin_loss': 1.953528881072998, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.09}
{'origin_loss': 2.037473440170288, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.09}
{'loss': 204.8108, 'grad_norm': 0.36679428815841675, 'learning_rate': 5.9600000000000005e-06, 'epoch': 0.09}
{'origin_loss': 2.0036771297454834, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.09}
{'origin_loss': 1.9167908430099487, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.09}
{'origin_loss': 1.856696367263794, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.09}
{'origin_loss': 1.9047927856445312, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.09}
{'origin_loss': 2.0121705532073975, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.09}
{'origin_loss': 2.144671678543091, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.09}
{'origin_loss': 1.7875088453292847, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.09}
{'origin_loss': 1.9838286638259888, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.09}
{'loss': 203.795, 'grad_norm': 0.4162136912345886, 'learning_rate': 5.94e-06, 'epoch': 0.09}
{'origin_loss': 2.422297477722168, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.09}
{'origin_loss': 1.9335010051727295, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.09}
{'origin_loss': 2.1688718795776367, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.09}
{'origin_loss': 2.039754629135132, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.09}
{'origin_loss': 1.71315598487854, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.09}
{'origin_loss': 2.023578643798828, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.09}
{'origin_loss': 1.8911170959472656, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.09}
{'origin_loss': 2.2092783451080322, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.09}
{'loss': 199.5346, 'grad_norm': 0.3896435499191284, 'learning_rate': 5.92e-06, 'epoch': 0.09}
{'origin_loss': 2.096924304962158, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.09}
{'origin_loss': 1.9029793739318848, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.09}
{'origin_loss': 1.9204689264297485, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.09}
{'origin_loss': 1.8284074068069458, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.09}
{'origin_loss': 2.128281354904175, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.09}
{'origin_loss': 2.220524787902832, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.09}
{'origin_loss': 2.136408567428589, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.09}
{'origin_loss': 2.3186450004577637, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.09}
{'loss': 199.4441, 'grad_norm': 0.39125362038612366, 'learning_rate': 5.9e-06, 'epoch': 0.09}
{'origin_loss': 2.063194990158081, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.09}
{'origin_loss': 2.1290574073791504, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.09}
{'origin_loss': 2.0564517974853516, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.09}
{'origin_loss': 2.0933613777160645, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.09}
{'origin_loss': 2.1644649505615234, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.09}
{'origin_loss': 2.00736927986145, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.09}
{'origin_loss': 1.9399052858352661, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.09}
{'origin_loss': 1.9186667203903198, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.09}
{'loss': 197.1559, 'grad_norm': 0.3749051094055176, 'learning_rate': 5.8800000000000005e-06, 'epoch': 0.09}
{'origin_loss': 2.0945000648498535, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.09}
{'origin_loss': 2.0180187225341797, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.09}
{'origin_loss': 2.0134084224700928, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.09}
{'origin_loss': 2.0516536235809326, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.09}
{'origin_loss': 2.025394916534424, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.09}
{'origin_loss': 1.8609528541564941, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.09}
{'origin_loss': 1.826522707939148, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.09}
{'origin_loss': 1.9924031496047974, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.09}
{'loss': 197.5479, 'grad_norm': 0.3586617410182953, 'learning_rate': 5.86e-06, 'epoch': 0.09}
{'origin_loss': 1.9281423091888428, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.09}
{'origin_loss': 2.2280170917510986, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.09}
{'origin_loss': 2.1588871479034424, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.09}
{'origin_loss': 2.1905672550201416, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.09}
{'origin_loss': 1.9994957447052002, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.09}
{'origin_loss': 2.0225441455841064, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.09}
{'origin_loss': 2.0248570442199707, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.09}
{'origin_loss': 2.1185483932495117, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.09}
{'loss': 195.037, 'grad_norm': 0.46798476576805115, 'learning_rate': 5.84e-06, 'epoch': 0.09}
{'origin_loss': 2.2445595264434814, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.09}
{'origin_loss': 1.6117931604385376, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.09}
{'origin_loss': 1.6703715324401855, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.09}
{'origin_loss': 1.8547425270080566, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.09}
{'origin_loss': 2.052959680557251, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.09}
{'origin_loss': 1.8199899196624756, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.09}
{'origin_loss': 1.8835252523422241, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.09}
{'origin_loss': 2.119415044784546, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.09}
{'loss': 194.7978, 'grad_norm': 0.3969908058643341, 'learning_rate': 5.82e-06, 'epoch': 0.09}
{'origin_loss': 2.2936289310455322, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.09}
{'origin_loss': 2.1935572624206543, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.09}
{'origin_loss': 1.949432611465454, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.09}
{'origin_loss': 1.8828150033950806, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.09}
{'origin_loss': 2.1712071895599365, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.09}
{'origin_loss': 1.8886295557022095, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.09}
{'origin_loss': 1.976027250289917, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.09}
{'origin_loss': 2.1976559162139893, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.09}
{'loss': 194.7097, 'grad_norm': 0.35988888144493103, 'learning_rate': 5.8e-06, 'epoch': 0.09}
{'origin_loss': 2.0877583026885986, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.09}
{'origin_loss': 2.238541841506958, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.09}
{'origin_loss': 2.1482667922973633, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.09}
{'origin_loss': 2.0131962299346924, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.09}
{'origin_loss': 2.0008625984191895, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.09}
{'origin_loss': 2.1546831130981445, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.09}
{'origin_loss': 2.150184392929077, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.09}
{'origin_loss': 2.0912160873413086, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.09}
{'loss': 192.6106, 'grad_norm': 0.3979710042476654, 'learning_rate': 5.78e-06, 'epoch': 0.09}
{'origin_loss': 2.1225857734680176, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.09}
{'origin_loss': 1.9463021755218506, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.09}
{'origin_loss': 1.9955967664718628, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.09}
{'origin_loss': 1.977810263633728, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.09}
{'origin_loss': 1.9608829021453857, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.09}
{'origin_loss': 1.9685395956039429, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.09}
{'origin_loss': 2.1552822589874268, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.09}
{'origin_loss': 2.1261579990386963, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.09}
{'loss': 193.4379, 'grad_norm': 0.34633398056030273, 'learning_rate': 5.76e-06, 'epoch': 0.09}
{'origin_loss': 1.970578908920288, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.09}
{'origin_loss': 2.2430338859558105, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.09}
{'origin_loss': 2.20987606048584, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.09}
{'origin_loss': 2.0599207878112793, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.09}
{'origin_loss': 2.098576545715332, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.09}
{'origin_loss': 2.0270426273345947, 'mask_loss': 0.1522216796875, 'mask_rate': 0.39013671875, 'epoch': 0.09}
{'origin_loss': 2.037795066833496, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.09}
{'origin_loss': 2.176522970199585, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.09}
{'loss': 191.9154, 'grad_norm': 0.35625433921813965, 'learning_rate': 5.74e-06, 'epoch': 0.09}
{'origin_loss': 1.9873555898666382, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.09}
{'origin_loss': 2.2072839736938477, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.09}
{'origin_loss': 2.0219807624816895, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.09}
{'origin_loss': 2.0171127319335938, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.09}
{'origin_loss': 1.7106640338897705, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.09}
{'origin_loss': 2.1271016597747803, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.09}
{'origin_loss': 1.809357762336731, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.09}
{'origin_loss': 2.054705858230591, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.09}
{'loss': 193.1951, 'grad_norm': 0.7187642455101013, 'learning_rate': 5.72e-06, 'epoch': 0.09}
{'origin_loss': 2.145197868347168, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.09}
{'origin_loss': 1.8230048418045044, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.09}
{'origin_loss': 2.0065314769744873, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.09}
{'origin_loss': 1.967793583869934, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.09}
{'origin_loss': 1.970857858657837, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.09}
{'origin_loss': 1.9760814905166626, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.09}
{'origin_loss': 1.9644725322723389, 'mask_loss': 0.1505126953125, 'mask_rate': 0.387939453125, 'epoch': 0.09}
{'origin_loss': 1.9174002408981323, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.09}
{'loss': 192.3777, 'grad_norm': 0.43035295605659485, 'learning_rate': 5.7e-06, 'epoch': 0.09}
{'origin_loss': 1.7503423690795898, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.09}
{'origin_loss': 2.2225379943847656, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.09}
{'origin_loss': 2.0426220893859863, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.09}
{'origin_loss': 1.9297515153884888, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.09}
{'origin_loss': 2.034656524658203, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.09}
{'origin_loss': 1.9664320945739746, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.09}
{'origin_loss': 2.0895888805389404, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.09}
{'origin_loss': 1.7857223749160767, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.09}
{'loss': 192.3058, 'grad_norm': 0.38010165095329285, 'learning_rate': 5.68e-06, 'epoch': 0.09}
{'origin_loss': 2.069782018661499, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.09}
{'origin_loss': 2.154323101043701, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.09}
{'origin_loss': 2.0944464206695557, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.09}
{'origin_loss': 1.9769841432571411, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.09}
{'origin_loss': 1.8023358583450317, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.09}
{'origin_loss': 2.06976056098938, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.09}
{'origin_loss': 2.052170991897583, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.09}
{'origin_loss': 1.8238651752471924, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.09}
{'loss': 192.6305, 'grad_norm': 0.35892254114151, 'learning_rate': 5.66e-06, 'epoch': 0.09}
{'origin_loss': 2.043551445007324, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.09}
{'origin_loss': 1.911924123764038, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.09}
{'origin_loss': 1.950020670890808, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.09}
{'origin_loss': 1.93642258644104, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.09}
{'origin_loss': 1.8901021480560303, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.09}
{'origin_loss': 2.157029390335083, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.09}
{'origin_loss': 2.2220380306243896, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.09}
{'origin_loss': 1.8136919736862183, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.09}
{'loss': 192.1625, 'grad_norm': 0.37190914154052734, 'learning_rate': 5.64e-06, 'epoch': 0.1}
{'origin_loss': 1.912071943283081, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.1}
{'origin_loss': 2.0722620487213135, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.1}
{'origin_loss': 1.9819170236587524, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.1}
{'origin_loss': 1.8369768857955933, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.1}
{'origin_loss': 1.9674781560897827, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.1}
{'origin_loss': 1.8704053163528442, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.1}
{'origin_loss': 2.1127195358276367, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.1}
{'origin_loss': 2.088608503341675, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.1}
{'loss': 193.7928, 'grad_norm': 0.7942314147949219, 'learning_rate': 5.620000000000001e-06, 'epoch': 0.1}
{'origin_loss': 2.045161247253418, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.1}
{'origin_loss': 2.104344606399536, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.1}
{'origin_loss': 1.9813275337219238, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.1}
{'origin_loss': 2.033219575881958, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.1}
{'origin_loss': 1.9833159446716309, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.1}
{'origin_loss': 1.8635855913162231, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.1}
{'origin_loss': 2.1885058879852295, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.1}
{'origin_loss': 1.9844731092453003, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.1}
{'loss': 193.4605, 'grad_norm': 0.3443833589553833, 'learning_rate': 5.600000000000001e-06, 'epoch': 0.1}
{'origin_loss': 1.9225519895553589, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.1}
{'origin_loss': 2.0920891761779785, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.1}
{'origin_loss': 2.045884847640991, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.1}
{'origin_loss': 1.9142944812774658, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.1}
{'origin_loss': 2.0539276599884033, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.1}
{'origin_loss': 1.9750447273254395, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.1}
{'origin_loss': 1.9091218709945679, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.1}
{'origin_loss': 1.8356016874313354, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.1}
{'loss': 193.9061, 'grad_norm': 0.3801179826259613, 'learning_rate': 5.580000000000001e-06, 'epoch': 0.1}
{'origin_loss': 2.025322914123535, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.1}
{'origin_loss': 2.0543766021728516, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.1}
{'origin_loss': 1.9939364194869995, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.1}
{'origin_loss': 1.9831382036209106, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.1}
{'origin_loss': 1.9953703880310059, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.1}
{'origin_loss': 2.0201468467712402, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.1}
{'origin_loss': 1.799836277961731, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.1}
{'origin_loss': 1.9884033203125, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.1}
{'loss': 194.1076, 'grad_norm': 0.3741042912006378, 'learning_rate': 5.560000000000001e-06, 'epoch': 0.1}
{'origin_loss': 1.9301843643188477, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.1}
{'origin_loss': 2.095642328262329, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.1}
{'origin_loss': 2.045297145843506, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.1}
{'origin_loss': 1.9698606729507446, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.1}
{'origin_loss': 1.9623678922653198, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.1}
{'origin_loss': 2.202244281768799, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.1}
{'origin_loss': 2.12326717376709, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.1}
{'origin_loss': 2.1709139347076416, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.1}
{'loss': 195.5, 'grad_norm': 0.3793071210384369, 'learning_rate': 5.540000000000001e-06, 'epoch': 0.1}
{'origin_loss': 2.280830144882202, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.1}
{'origin_loss': 1.8089230060577393, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.1}
{'origin_loss': 2.0948407649993896, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.1}
{'origin_loss': 2.2442450523376465, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.1}
{'origin_loss': 2.181427240371704, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.1}
{'origin_loss': 2.0958375930786133, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.1}
{'origin_loss': 2.00598406791687, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.1}
{'origin_loss': 2.0383260250091553, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.1}
{'loss': 195.0469, 'grad_norm': 0.36926209926605225, 'learning_rate': 5.5200000000000005e-06, 'epoch': 0.1}
{'origin_loss': 2.0934383869171143, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.1}
{'origin_loss': 1.8551639318466187, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.1}
{'origin_loss': 1.9209972620010376, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.1}
{'origin_loss': 1.8714954853057861, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.1}
{'origin_loss': 2.057267904281616, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.1}
{'origin_loss': 2.220877170562744, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.1}
{'origin_loss': 2.162173271179199, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.1}
{'origin_loss': 2.060636043548584, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.1}
{'loss': 193.9834, 'grad_norm': 0.34200039505958557, 'learning_rate': 5.500000000000001e-06, 'epoch': 0.1}
{'origin_loss': 1.7927426099777222, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.1}
{'origin_loss': 2.0556375980377197, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.1}
{'origin_loss': 2.0794014930725098, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.1}
{'origin_loss': 2.054347276687622, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.1}
{'origin_loss': 1.975328803062439, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.1}
{'origin_loss': 1.9718302488327026, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.1}
{'origin_loss': 2.1174559593200684, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.1}
{'origin_loss': 1.724979281425476, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.1}
{'loss': 195.8621, 'grad_norm': 0.35388606786727905, 'learning_rate': 5.480000000000001e-06, 'epoch': 0.1}
{'origin_loss': 2.020186185836792, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.1}
{'origin_loss': 2.0638206005096436, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.1}
{'origin_loss': 2.111684799194336, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.1}
{'origin_loss': 2.0561325550079346, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.1}
{'origin_loss': 2.1544694900512695, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.1}
{'origin_loss': 1.9913663864135742, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.1}
{'origin_loss': 1.9959803819656372, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.1}
{'origin_loss': 1.9884048700332642, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.1}
{'loss': 196.2821, 'grad_norm': 0.3665127754211426, 'learning_rate': 5.460000000000001e-06, 'epoch': 0.1}
{'origin_loss': 1.972940444946289, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.1}
{'origin_loss': 1.7498070001602173, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.1}
{'origin_loss': 2.0009870529174805, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.1}
{'origin_loss': 2.06526255607605, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.1}
{'origin_loss': 1.9135048389434814, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.1}
{'origin_loss': 1.7992737293243408, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.1}
{'origin_loss': 2.0508921146392822, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.1}
{'origin_loss': 1.989160418510437, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.1}
{'loss': 195.9271, 'grad_norm': 0.33561864495277405, 'learning_rate': 5.4400000000000004e-06, 'epoch': 0.1}
{'origin_loss': 2.0428848266601562, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.1}
{'origin_loss': 1.9244825839996338, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.1}
{'origin_loss': 2.0084176063537598, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.1}
{'origin_loss': 2.211545467376709, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.1}
{'origin_loss': 2.0617589950561523, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.1}
{'origin_loss': 2.2173070907592773, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.1}
{'origin_loss': 1.8701698780059814, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.1}
{'origin_loss': 1.9088951349258423, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.1}
{'loss': 198.2963, 'grad_norm': 0.36237475275993347, 'learning_rate': 5.420000000000001e-06, 'epoch': 0.1}
{'origin_loss': 1.9462087154388428, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.1}
{'origin_loss': 2.080329418182373, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.1}
{'origin_loss': 1.8838640451431274, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.1}
{'origin_loss': 1.8083491325378418, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.1}
{'origin_loss': 1.9952623844146729, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.1}
{'origin_loss': 1.8635505437850952, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.1}
{'origin_loss': 1.9354689121246338, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.1}
{'origin_loss': 2.105949878692627, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.1}
{'loss': 197.3586, 'grad_norm': 0.3625542223453522, 'learning_rate': 5.400000000000001e-06, 'epoch': 0.1}
{'origin_loss': 2.0704689025878906, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.1}
{'origin_loss': 1.8753950595855713, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.1}
{'origin_loss': 2.064434766769409, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.1}
{'origin_loss': 1.9146053791046143, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.1}
{'origin_loss': 1.9791781902313232, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.1}
{'origin_loss': 1.9774376153945923, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.1}
{'origin_loss': 1.8977545499801636, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.1}
{'origin_loss': 1.6334880590438843, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.1}
{'loss': 197.786, 'grad_norm': 0.31568118929862976, 'learning_rate': 5.380000000000001e-06, 'epoch': 0.1}
{'origin_loss': 1.9232733249664307, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.1}
{'origin_loss': 2.0469377040863037, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.1}
{'origin_loss': 2.108110189437866, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.1}
{'origin_loss': 2.0396456718444824, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.1}
{'origin_loss': 1.9429755210876465, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.1}
{'origin_loss': 2.009328842163086, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.1}
{'origin_loss': 2.245344877243042, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.1}
{'origin_loss': 2.033622980117798, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.1}
{'loss': 198.8249, 'grad_norm': 0.3805415630340576, 'learning_rate': 5.36e-06, 'epoch': 0.1}
{'origin_loss': 2.1951847076416016, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.1}
{'origin_loss': 1.858285903930664, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.1}
{'origin_loss': 1.8565410375595093, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.1}
{'origin_loss': 2.1896109580993652, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.1}
{'origin_loss': 2.023547887802124, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.1}
{'origin_loss': 2.002976655960083, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.1}
{'origin_loss': 2.1808652877807617, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.1}
{'origin_loss': 1.9395490884780884, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.1}
{'loss': 198.2496, 'grad_norm': 0.3710382580757141, 'learning_rate': 5.3400000000000005e-06, 'epoch': 0.1}
{'origin_loss': 2.066394329071045, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.1}
{'origin_loss': 2.295322895050049, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.1}
{'origin_loss': 2.0964841842651367, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.1}
{'origin_loss': 2.009647846221924, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.1}
{'origin_loss': 1.886892557144165, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.1}
{'origin_loss': 1.8686144351959229, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.1}
{'origin_loss': 2.06742262840271, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.1}
{'origin_loss': 2.278534412384033, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.1}
{'loss': 198.618, 'grad_norm': 0.3658286929130554, 'learning_rate': 5.320000000000001e-06, 'epoch': 0.1}
{'origin_loss': 2.0422916412353516, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.1}
{'origin_loss': 2.0131258964538574, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.1}
{'origin_loss': 1.858363389968872, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.1}
{'origin_loss': 2.164846420288086, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.1}
{'origin_loss': 1.754327654838562, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.1}
{'origin_loss': 2.1325454711914062, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.1}
{'origin_loss': 2.1753225326538086, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.1}
{'origin_loss': 2.158932685852051, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.1}
{'loss': 200.6781, 'grad_norm': 0.3503478765487671, 'learning_rate': 5.300000000000001e-06, 'epoch': 0.1}
{'origin_loss': 1.9888110160827637, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.1}
{'origin_loss': 1.9635274410247803, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.1}
{'origin_loss': 1.863661289215088, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.1}
{'origin_loss': 1.7186810970306396, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.1}
{'origin_loss': 2.183854103088379, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.1}
{'origin_loss': 1.963735580444336, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.1}
{'origin_loss': 2.109754800796509, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.1}
{'origin_loss': 2.0451953411102295, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.1}
{'loss': 199.3703, 'grad_norm': 0.32795506715774536, 'learning_rate': 5.28e-06, 'epoch': 0.1}
{'origin_loss': 1.862539529800415, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.1}
{'origin_loss': 2.147346019744873, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.1}
{'origin_loss': 2.01918888092041, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.1}
{'origin_loss': 2.0273995399475098, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.1}
{'origin_loss': 1.9069077968597412, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.1}
{'origin_loss': 1.9736589193344116, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.1}
{'origin_loss': 1.9611754417419434, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.1}
{'origin_loss': 1.942369818687439, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.1}
{'loss': 201.9801, 'grad_norm': 0.3394777774810791, 'learning_rate': 5.2600000000000005e-06, 'epoch': 0.1}
{'origin_loss': 1.9948062896728516, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.1}
{'origin_loss': 2.106262445449829, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.1}
{'origin_loss': 1.9419505596160889, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.1}
{'origin_loss': 2.057924509048462, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.1}
{'origin_loss': 2.047548532485962, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.1}
{'origin_loss': 1.959611177444458, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.1}
{'origin_loss': 1.8762847185134888, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.1}
{'origin_loss': 1.9067076444625854, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.1}
{'loss': 200.7989, 'grad_norm': 0.41277047991752625, 'learning_rate': 5.240000000000001e-06, 'epoch': 0.1}
{'origin_loss': 2.0154335498809814, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.1}
{'origin_loss': 1.9232527017593384, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.1}
{'origin_loss': 2.0342483520507812, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.1}
{'origin_loss': 2.065382242202759, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.1}
{'origin_loss': 2.013577699661255, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.1}
{'origin_loss': 1.8525018692016602, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.1}
{'origin_loss': 2.0351014137268066, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.1}
{'origin_loss': 1.7852712869644165, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.1}
{'loss': 201.4656, 'grad_norm': 0.3530890941619873, 'learning_rate': 5.220000000000001e-06, 'epoch': 0.1}
{'origin_loss': 1.782686471939087, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.1}
{'origin_loss': 1.8980590105056763, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.1}
{'origin_loss': 1.9592368602752686, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.1}
{'origin_loss': 1.7396806478500366, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.1}
{'origin_loss': 2.340341806411743, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.1}
{'origin_loss': 2.0518460273742676, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.1}
{'origin_loss': 2.206089496612549, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.1}
{'origin_loss': 1.9854381084442139, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.1}
{'loss': 200.8235, 'grad_norm': 0.3767011761665344, 'learning_rate': 5.2e-06, 'epoch': 0.1}
{'origin_loss': 1.894134759902954, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.1}
{'origin_loss': 2.086996078491211, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.1}
{'origin_loss': 1.8847835063934326, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.1}
{'origin_loss': 1.9265961647033691, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.1}
{'origin_loss': 2.186917304992676, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.1}
{'origin_loss': 1.9572690725326538, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.1}
{'origin_loss': 1.8146662712097168, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.1}
{'origin_loss': 1.9840887784957886, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.1}
{'loss': 203.7482, 'grad_norm': 0.38722530007362366, 'learning_rate': 5.18e-06, 'epoch': 0.11}
{'origin_loss': 2.2324392795562744, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.11}
{'origin_loss': 2.0318732261657715, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.11}
{'origin_loss': 2.170295476913452, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.11}
{'origin_loss': 1.9391417503356934, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.11}
{'origin_loss': 2.048341989517212, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.11}
{'origin_loss': 1.900370717048645, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.11}
{'origin_loss': 1.9401981830596924, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.11}
{'origin_loss': 2.041795015335083, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.11}
{'loss': 202.7099, 'grad_norm': 0.39104321599006653, 'learning_rate': 5.1600000000000006e-06, 'epoch': 0.11}
{'origin_loss': 1.960619330406189, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.11}
{'origin_loss': 1.8638194799423218, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.11}
{'origin_loss': 1.9828708171844482, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.11}
{'origin_loss': 1.8320298194885254, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.11}
{'origin_loss': 2.016753673553467, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.11}
{'origin_loss': 2.169583559036255, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.11}
{'origin_loss': 1.9959378242492676, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.11}
{'origin_loss': 1.8921561241149902, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.11}
{'loss': 203.683, 'grad_norm': 0.42397385835647583, 'learning_rate': 5.140000000000001e-06, 'epoch': 0.11}
{'origin_loss': 2.0688607692718506, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.11}
{'origin_loss': 1.7990834712982178, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.11}
{'origin_loss': 2.1095759868621826, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.11}
{'origin_loss': 2.067540407180786, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.11}
{'origin_loss': 1.9959028959274292, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.11}
{'origin_loss': 1.9374072551727295, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.11}
{'origin_loss': 2.057523012161255, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.11}
{'origin_loss': 2.052044153213501, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.11}
{'loss': 203.7766, 'grad_norm': 0.34580641984939575, 'learning_rate': 5.12e-06, 'epoch': 0.11}
{'origin_loss': 2.052098512649536, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.11}
{'origin_loss': 1.904313564300537, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.11}
{'origin_loss': 2.1143319606781006, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.11}
{'origin_loss': 2.0036652088165283, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.11}
{'origin_loss': 2.127321720123291, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.11}
{'origin_loss': 2.132106304168701, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.11}
{'origin_loss': 1.965732455253601, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.11}
{'origin_loss': 2.1915135383605957, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.11}
{'loss': 205.0145, 'grad_norm': 0.37427911162376404, 'learning_rate': 5.1e-06, 'epoch': 0.11}
{'origin_loss': 2.15712308883667, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.11}
{'origin_loss': 2.0562617778778076, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.11}
{'origin_loss': 1.8558956384658813, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.11}
{'origin_loss': 2.1162893772125244, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.11}
{'origin_loss': 2.088261842727661, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.11}
{'origin_loss': 1.9813106060028076, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.11}
{'origin_loss': 2.2505686283111572, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.11}
{'origin_loss': 2.0500569343566895, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.11}
{'loss': 203.757, 'grad_norm': 0.32905060052871704, 'learning_rate': 5.0800000000000005e-06, 'epoch': 0.11}
{'origin_loss': 1.9554132223129272, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.11}
{'origin_loss': 2.0848515033721924, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.11}
{'origin_loss': 2.0116019248962402, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.11}
{'origin_loss': 2.1713688373565674, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.11}
{'origin_loss': 1.915455937385559, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.11}
{'origin_loss': 1.7523088455200195, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.11}
{'origin_loss': 1.9135148525238037, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.11}
{'origin_loss': 2.0618162155151367, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.11}
{'loss': 204.9208, 'grad_norm': 0.382269948720932, 'learning_rate': 5.060000000000001e-06, 'epoch': 0.11}
{'origin_loss': 2.044384717941284, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.11}
{'origin_loss': 1.9774593114852905, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.11}
{'origin_loss': 2.002351760864258, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.11}
{'origin_loss': 2.007093667984009, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.11}
{'origin_loss': 1.7168539762496948, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.11}
{'origin_loss': 1.895403265953064, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.11}
{'origin_loss': 2.237736225128174, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.11}
{'origin_loss': 2.096958637237549, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.11}
{'loss': 205.3723, 'grad_norm': 0.3901864290237427, 'learning_rate': 5.04e-06, 'epoch': 0.11}
{'origin_loss': 1.6278860569000244, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.11}
{'origin_loss': 1.8385053873062134, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.11}
{'origin_loss': 1.8989018201828003, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.11}
{'origin_loss': 1.9656569957733154, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.11}
{'origin_loss': 1.9305775165557861, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.11}
{'origin_loss': 2.030349016189575, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.11}
{'origin_loss': 1.9821289777755737, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.11}
{'origin_loss': 1.920596718788147, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.11}
{'loss': 205.4306, 'grad_norm': 0.4858621060848236, 'learning_rate': 5.02e-06, 'epoch': 0.11}
{'origin_loss': 1.9505995512008667, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.11}
{'origin_loss': 1.889073133468628, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.11}
{'origin_loss': 2.1654460430145264, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.11}
{'origin_loss': 2.091390371322632, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.11}
{'origin_loss': 2.130825996398926, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.11}
{'origin_loss': 1.8665539026260376, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.11}
{'origin_loss': 2.0301618576049805, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.11}
{'origin_loss': 1.9900994300842285, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.11}
{'loss': 206.9049, 'grad_norm': 0.3356584906578064, 'learning_rate': 5e-06, 'epoch': 0.11}
{'origin_loss': 1.8564614057540894, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.11}
{'origin_loss': 2.1282784938812256, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.11}
{'origin_loss': 2.064912796020508, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.11}
{'origin_loss': 2.043412446975708, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.11}
{'origin_loss': 1.9708431959152222, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.11}
{'origin_loss': 1.7670830488204956, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.11}
{'origin_loss': 2.094269037246704, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.11}
{'origin_loss': 2.070927619934082, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.11}
{'loss': 207.1402, 'grad_norm': 0.4742663502693176, 'learning_rate': 4.980000000000001e-06, 'epoch': 0.11}
{'origin_loss': 2.0677144527435303, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.11}
{'origin_loss': 2.0096778869628906, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.11}
{'origin_loss': 1.9305036067962646, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.11}
{'origin_loss': 2.1377952098846436, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.11}
{'origin_loss': 2.085232973098755, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.11}
{'origin_loss': 1.9768182039260864, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.11}
{'origin_loss': 2.0048410892486572, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.11}
{'origin_loss': 1.8164259195327759, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.11}
{'loss': 208.3318, 'grad_norm': 0.3834433853626251, 'learning_rate': 4.960000000000001e-06, 'epoch': 0.11}
{'origin_loss': 1.9850980043411255, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.11}
{'origin_loss': 2.1380529403686523, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.11}
{'origin_loss': 2.0252199172973633, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.11}
{'origin_loss': 1.795708179473877, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.11}
{'origin_loss': 2.1463263034820557, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.11}
{'origin_loss': 2.123026132583618, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.11}
{'origin_loss': 1.9078900814056396, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.11}
{'origin_loss': 2.0041396617889404, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.11}
{'loss': 208.2657, 'grad_norm': 0.38880544900894165, 'learning_rate': 4.94e-06, 'epoch': 0.11}
{'origin_loss': 1.9889204502105713, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.11}
{'origin_loss': 1.9559739828109741, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.11}
{'origin_loss': 2.048055410385132, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.11}
{'origin_loss': 1.9090766906738281, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.11}
{'origin_loss': 1.921477198600769, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.11}
{'origin_loss': 1.768947720527649, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.11}
{'origin_loss': 1.8178688287734985, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.11}
{'origin_loss': 1.9467294216156006, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.11}
{'loss': 210.3571, 'grad_norm': 0.34121939539909363, 'learning_rate': 4.92e-06, 'epoch': 0.11}
{'origin_loss': 2.086441993713379, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.11}
{'origin_loss': 2.015199899673462, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.11}
{'origin_loss': 1.9670964479446411, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.11}
{'origin_loss': 2.1530158519744873, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.11}
{'origin_loss': 1.8921306133270264, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.11}
{'origin_loss': 2.080514907836914, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.11}
{'origin_loss': 1.925949215888977, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.11}
{'origin_loss': 2.0697195529937744, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.11}
{'loss': 210.805, 'grad_norm': 0.3799094259738922, 'learning_rate': 4.9000000000000005e-06, 'epoch': 0.11}
{'origin_loss': 1.9715393781661987, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.11}
{'origin_loss': 2.0704081058502197, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.11}
{'origin_loss': 2.1319997310638428, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.11}
{'origin_loss': 2.040388822555542, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.11}
{'origin_loss': 2.2447445392608643, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.11}
{'origin_loss': 2.02223801612854, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.11}
{'origin_loss': 1.8873485326766968, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.11}
{'origin_loss': 2.0481977462768555, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.11}
{'loss': 210.5209, 'grad_norm': 0.3325118124485016, 'learning_rate': 4.880000000000001e-06, 'epoch': 0.11}
{'origin_loss': 2.042172908782959, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.11}
{'origin_loss': 2.160214424133301, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.11}
{'origin_loss': 1.9609156847000122, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.11}
{'origin_loss': 1.9647196531295776, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.11}
{'origin_loss': 2.048863172531128, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.11}
{'origin_loss': 2.094505548477173, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.11}
{'origin_loss': 1.9053356647491455, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.11}
{'origin_loss': 1.937337040901184, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.11}
{'loss': 210.733, 'grad_norm': 0.4777592122554779, 'learning_rate': 4.86e-06, 'epoch': 0.11}
{'origin_loss': 1.9723607301712036, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.11}
{'origin_loss': 1.8302826881408691, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.11}
{'origin_loss': 1.9319921731948853, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.11}
{'origin_loss': 1.8656654357910156, 'mask_loss': 0.1796875, 'mask_rate': 0.423828125, 'epoch': 0.11}
{'origin_loss': 2.0489253997802734, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.11}
{'origin_loss': 1.9230461120605469, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.11}
{'origin_loss': 2.066385507583618, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.11}
{'origin_loss': 2.0222420692443848, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.11}
{'loss': 212.0826, 'grad_norm': 0.38452616333961487, 'learning_rate': 4.84e-06, 'epoch': 0.11}
{'origin_loss': 1.9876768589019775, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.11}
{'origin_loss': 1.8268948793411255, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.11}
{'origin_loss': 1.9552795886993408, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.11}
{'origin_loss': 2.1241588592529297, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.11}
{'origin_loss': 2.0303735733032227, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.11}
{'origin_loss': 1.9033631086349487, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.11}
{'origin_loss': 2.1369054317474365, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.11}
{'origin_loss': 2.1081409454345703, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.11}
{'loss': 211.6028, 'grad_norm': 0.37185725569725037, 'learning_rate': 4.8200000000000004e-06, 'epoch': 0.11}
{'origin_loss': 2.152409076690674, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.11}
{'origin_loss': 1.8876726627349854, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.11}
{'origin_loss': 2.169567108154297, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.11}
{'origin_loss': 2.0263123512268066, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.11}
{'origin_loss': 1.7582478523254395, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.11}
{'origin_loss': 1.916825771331787, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.11}
{'origin_loss': 1.955733060836792, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.11}
{'origin_loss': 2.052142858505249, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.11}
{'loss': 211.8336, 'grad_norm': 0.3605596125125885, 'learning_rate': 4.800000000000001e-06, 'epoch': 0.11}
{'origin_loss': 2.0957183837890625, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.11}
{'origin_loss': 2.0305542945861816, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.11}
{'origin_loss': 2.0947632789611816, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.11}
{'origin_loss': 2.076296091079712, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.11}
{'origin_loss': 2.036468267440796, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.11}
{'origin_loss': 2.0513837337493896, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.11}
{'origin_loss': 2.0975284576416016, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.11}
{'origin_loss': 1.9168838262557983, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.11}
{'loss': 213.2218, 'grad_norm': 0.3665279448032379, 'learning_rate': 4.78e-06, 'epoch': 0.11}
{'origin_loss': 1.7070624828338623, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.11}
{'origin_loss': 2.0773262977600098, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.11}
{'origin_loss': 1.8620556592941284, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.11}
{'origin_loss': 1.859703779220581, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.11}
{'origin_loss': 2.1756324768066406, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.11}
{'origin_loss': 1.843229055404663, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.11}
{'origin_loss': 2.146697521209717, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.11}
{'origin_loss': 2.297517776489258, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.11}
{'loss': 212.043, 'grad_norm': 0.40232914686203003, 'learning_rate': 4.76e-06, 'epoch': 0.11}
{'origin_loss': 1.9318163394927979, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.11}
{'origin_loss': 1.864469051361084, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.11}
{'origin_loss': 1.8921071290969849, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.11}
{'origin_loss': 1.80447256565094, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.11}
{'origin_loss': 2.0928971767425537, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.11}
{'origin_loss': 2.005516767501831, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.11}
{'origin_loss': 1.7298802137374878, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.11}
{'origin_loss': 1.9627097845077515, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.11}
{'loss': 213.473, 'grad_norm': 0.35726842284202576, 'learning_rate': 4.74e-06, 'epoch': 0.11}
{'origin_loss': 2.0782968997955322, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.11}
{'origin_loss': 2.1232550144195557, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.11}
{'origin_loss': 2.048438787460327, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.11}
{'origin_loss': 2.0308125019073486, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.11}
{'origin_loss': 1.9747165441513062, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.11}
{'origin_loss': 1.9743037223815918, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.11}
{'origin_loss': 2.0655441284179688, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.11}
{'origin_loss': 1.9854272603988647, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.11}
{'loss': 211.9413, 'grad_norm': 0.3520333170890808, 'learning_rate': 4.7200000000000005e-06, 'epoch': 0.12}
{'origin_loss': 2.062592029571533, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.12}
{'origin_loss': 1.9699063301086426, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.12}
{'origin_loss': 1.817085862159729, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.12}
{'origin_loss': 1.9125620126724243, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.12}
{'origin_loss': 2.0352814197540283, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.12}
{'origin_loss': 1.8634549379348755, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.12}
{'origin_loss': 1.8246537446975708, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.12}
{'origin_loss': 2.018584966659546, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.12}
{'loss': 215.2974, 'grad_norm': 0.47031232714653015, 'learning_rate': 4.7e-06, 'epoch': 0.12}
{'origin_loss': 2.139498472213745, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.12}
{'origin_loss': 1.9224385023117065, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.12}
{'origin_loss': 1.9388169050216675, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.12}
{'origin_loss': 1.9558991193771362, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.12}
{'origin_loss': 1.848138689994812, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.12}
{'origin_loss': 2.022700071334839, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.12}
{'origin_loss': 1.82960045337677, 'mask_loss': 0.17919921875, 'mask_rate': 0.42333984375, 'epoch': 0.12}
{'origin_loss': 2.068187952041626, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.12}
{'loss': 214.2313, 'grad_norm': 0.3238731026649475, 'learning_rate': 4.680000000000001e-06, 'epoch': 0.12}
{'origin_loss': 1.9186121225357056, 'mask_loss': 0.177734375, 'mask_rate': 0.421630859375, 'epoch': 0.12}
{'origin_loss': 2.0676181316375732, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.12}
{'origin_loss': 2.059986114501953, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.12}
{'origin_loss': 2.045156717300415, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.12}
{'origin_loss': 1.934103012084961, 'mask_loss': 0.1771240234375, 'mask_rate': 0.4208984375, 'epoch': 0.12}
{'origin_loss': 2.03385853767395, 'mask_loss': 0.1773681640625, 'mask_rate': 0.421142578125, 'epoch': 0.12}
{'origin_loss': 2.041663885116577, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.12}
{'origin_loss': 1.6949020624160767, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.12}
{'loss': 215.5057, 'grad_norm': 118.10279846191406, 'learning_rate': 4.66e-06, 'epoch': 0.12}
{'origin_loss': 2.185990333557129, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.12}
{'origin_loss': 1.9507853984832764, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.12}
{'origin_loss': 2.1086883544921875, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.12}
{'origin_loss': 1.7229849100112915, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.12}
{'origin_loss': 1.9483996629714966, 'mask_loss': 0.1776123046875, 'mask_rate': 0.42138671875, 'epoch': 0.12}
{'origin_loss': 2.1214354038238525, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.12}
{'origin_loss': 2.1106815338134766, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.12}
{'origin_loss': 2.1113197803497314, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.12}
{'loss': 211.72, 'grad_norm': 0.36700427532196045, 'learning_rate': 4.6400000000000005e-06, 'epoch': 0.12}
{'origin_loss': 2.0009765625, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.12}
{'origin_loss': 2.0472424030303955, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.12}
{'origin_loss': 2.069518804550171, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.12}
{'origin_loss': 1.8477026224136353, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.12}
{'origin_loss': 1.9832695722579956, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.12}
{'origin_loss': 1.804917573928833, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.12}
{'origin_loss': 1.872119426727295, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.12}
{'origin_loss': 2.290933132171631, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.12}
{'loss': 210.2708, 'grad_norm': 0.3631073236465454, 'learning_rate': 4.620000000000001e-06, 'epoch': 0.12}
{'origin_loss': 1.9923425912857056, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.12}
{'origin_loss': 1.7489731311798096, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.12}
{'origin_loss': 2.024381637573242, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.12}
{'origin_loss': 2.113908052444458, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.12}
{'origin_loss': 2.204639196395874, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.12}
{'origin_loss': 2.1324222087860107, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.12}
{'origin_loss': 2.0744426250457764, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.12}
{'origin_loss': 2.0536277294158936, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.12}
{'loss': 207.0587, 'grad_norm': 0.31867238879203796, 'learning_rate': 4.600000000000001e-06, 'epoch': 0.12}
{'origin_loss': 1.8213698863983154, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.12}
{'origin_loss': 2.0715880393981934, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.12}
{'origin_loss': 2.118635654449463, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.12}
{'origin_loss': 2.023010015487671, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.12}
{'origin_loss': 2.0184073448181152, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.12}
{'origin_loss': 2.0235815048217773, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.12}
{'origin_loss': 2.5039923191070557, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.12}
{'origin_loss': 2.1417007446289062, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.12}
{'loss': 205.4809, 'grad_norm': 0.36695730686187744, 'learning_rate': 4.58e-06, 'epoch': 0.12}
{'origin_loss': 1.9748928546905518, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.12}
{'origin_loss': 2.201465129852295, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.12}
{'origin_loss': 2.0583014488220215, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.12}
{'origin_loss': 2.141199827194214, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.12}
{'origin_loss': 1.9190951585769653, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.12}
{'origin_loss': 2.0312163829803467, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.12}
{'origin_loss': 2.0033557415008545, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.12}
{'origin_loss': 2.151653528213501, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.12}
{'loss': 202.3258, 'grad_norm': 0.3520478308200836, 'learning_rate': 4.56e-06, 'epoch': 0.12}
{'origin_loss': 1.6910429000854492, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.12}
{'origin_loss': 2.1002395153045654, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.12}
{'origin_loss': 1.9785881042480469, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.12}
{'origin_loss': 2.1105759143829346, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.12}
{'origin_loss': 2.1725046634674072, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.12}
{'origin_loss': 2.0577945709228516, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.12}
{'origin_loss': 1.7727655172348022, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.12}
{'origin_loss': 2.012260913848877, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.12}
{'loss': 201.2526, 'grad_norm': 0.8225442171096802, 'learning_rate': 4.540000000000001e-06, 'epoch': 0.12}
{'origin_loss': 1.9563026428222656, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.12}
{'origin_loss': 1.7687617540359497, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.12}
{'origin_loss': 2.095817804336548, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.12}
{'origin_loss': 1.7639226913452148, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.12}
{'origin_loss': 2.088317394256592, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.12}
{'origin_loss': 1.6925495862960815, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.12}
{'origin_loss': 2.0435268878936768, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.12}
{'origin_loss': 2.0023274421691895, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.12}
{'loss': 200.9577, 'grad_norm': 0.36874300241470337, 'learning_rate': 4.520000000000001e-06, 'epoch': 0.12}
{'origin_loss': 2.046753168106079, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.12}
{'origin_loss': 1.9794583320617676, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.12}
{'origin_loss': 2.128664493560791, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.12}
{'origin_loss': 2.119260311126709, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.12}
{'origin_loss': 2.0863194465637207, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.12}
{'origin_loss': 2.1173107624053955, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.12}
{'origin_loss': 2.1476800441741943, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.12}
{'origin_loss': 2.0166900157928467, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.12}
{'loss': 199.8459, 'grad_norm': 0.3773726224899292, 'learning_rate': 4.5e-06, 'epoch': 0.12}
{'origin_loss': 2.1387696266174316, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.12}
{'origin_loss': 2.0354442596435547, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.12}
{'origin_loss': 1.9774224758148193, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.12}
{'origin_loss': 2.0767807960510254, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.12}
{'origin_loss': 1.9358229637145996, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.12}
{'origin_loss': 1.98847234249115, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.12}
{'origin_loss': 2.240111827850342, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.12}
{'origin_loss': 1.96986722946167, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.12}
{'loss': 197.9203, 'grad_norm': 0.3347685933113098, 'learning_rate': 4.48e-06, 'epoch': 0.12}
{'origin_loss': 2.1300837993621826, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.12}
{'origin_loss': 1.8973976373672485, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.12}
{'origin_loss': 2.1849822998046875, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.12}
{'origin_loss': 2.117560386657715, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.12}
{'origin_loss': 1.992363452911377, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.12}
{'origin_loss': 2.2143778800964355, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.12}
{'origin_loss': 2.120185375213623, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.12}
{'origin_loss': 2.0723938941955566, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.12}
{'loss': 198.9818, 'grad_norm': 0.36258891224861145, 'learning_rate': 4.4600000000000005e-06, 'epoch': 0.12}
{'origin_loss': 2.037053346633911, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.12}
{'origin_loss': 1.907453179359436, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.12}
{'origin_loss': 2.0183680057525635, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.12}
{'origin_loss': 1.9775733947753906, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.12}
{'origin_loss': 1.9856810569763184, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.12}
{'origin_loss': 1.9994875192642212, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.12}
{'origin_loss': 2.0604770183563232, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.12}
{'origin_loss': 1.9970892667770386, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.12}
{'loss': 197.7166, 'grad_norm': 0.4573568105697632, 'learning_rate': 4.440000000000001e-06, 'epoch': 0.12}
{'origin_loss': 2.0915610790252686, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.12}
{'origin_loss': 2.086648464202881, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.12}
{'origin_loss': 2.1085948944091797, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.12}
{'origin_loss': 2.1231322288513184, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.12}
{'origin_loss': 2.064828872680664, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.12}
{'origin_loss': 2.3207039833068848, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.12}
{'origin_loss': 1.987351655960083, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.12}
{'origin_loss': 1.8616394996643066, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.12}
{'loss': 196.4868, 'grad_norm': 0.333689421415329, 'learning_rate': 4.42e-06, 'epoch': 0.12}
{'origin_loss': 1.8616032600402832, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.12}
{'origin_loss': 1.965492844581604, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.12}
{'origin_loss': 2.036731243133545, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.12}
{'origin_loss': 2.0782129764556885, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.12}
{'origin_loss': 2.108747720718384, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.12}
{'origin_loss': 2.0743417739868164, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.12}
{'origin_loss': 2.131052255630493, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.12}
{'origin_loss': 2.102282762527466, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.12}
{'loss': 196.8573, 'grad_norm': 0.3524250388145447, 'learning_rate': 4.4e-06, 'epoch': 0.12}
{'origin_loss': 1.8381632566452026, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.12}
{'origin_loss': 1.7550370693206787, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.12}
{'origin_loss': 1.8863459825515747, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.12}
{'origin_loss': 1.8588577508926392, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.12}
{'origin_loss': 1.9665191173553467, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.12}
{'origin_loss': 2.0093629360198975, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.12}
{'origin_loss': 2.0333383083343506, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.12}
{'origin_loss': 2.1553776264190674, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.12}
{'loss': 195.641, 'grad_norm': 0.39467641711235046, 'learning_rate': 4.38e-06, 'epoch': 0.12}
{'origin_loss': 1.8389370441436768, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.12}
{'origin_loss': 1.8775157928466797, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.12}
{'origin_loss': 1.8199548721313477, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.12}
{'origin_loss': 2.0809524059295654, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.12}
{'origin_loss': 1.9922423362731934, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.12}
{'origin_loss': 1.9546095132827759, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.12}
{'origin_loss': 2.199026346206665, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.12}
{'origin_loss': 2.135197162628174, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.12}
{'loss': 196.9404, 'grad_norm': 0.3380136787891388, 'learning_rate': 4.360000000000001e-06, 'epoch': 0.12}
{'origin_loss': 2.029578447341919, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.12}
{'origin_loss': 2.2881669998168945, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.12}
{'origin_loss': 2.132079839706421, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.12}
{'origin_loss': 2.2058985233306885, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.12}
{'origin_loss': 1.9699828624725342, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.12}
{'origin_loss': 2.0047707557678223, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.12}
{'origin_loss': 1.809706211090088, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.12}
{'origin_loss': 2.1171932220458984, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.12}
{'loss': 197.8822, 'grad_norm': 0.3472007215023041, 'learning_rate': 4.34e-06, 'epoch': 0.12}
{'origin_loss': 1.8001072406768799, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.12}
{'origin_loss': 1.9987918138504028, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.12}
{'origin_loss': 2.0754215717315674, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.12}
{'origin_loss': 1.6949352025985718, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.12}
{'origin_loss': 1.9646210670471191, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.12}
{'origin_loss': 1.895827293395996, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.12}
{'origin_loss': 1.9677034616470337, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.12}
{'origin_loss': 2.0453996658325195, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.12}
{'loss': 198.5241, 'grad_norm': 0.704555094242096, 'learning_rate': 4.32e-06, 'epoch': 0.12}
{'origin_loss': 1.99215829372406, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.12}
{'origin_loss': 2.2215590476989746, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.12}
{'origin_loss': 1.9307160377502441, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.12}
{'origin_loss': 1.910644292831421, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.12}
{'origin_loss': 2.137537717819214, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.12}
{'origin_loss': 2.0139739513397217, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.12}
{'origin_loss': 2.1011054515838623, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.12}
{'origin_loss': 2.044312000274658, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.12}
{'loss': 196.7627, 'grad_norm': 0.34774911403656006, 'learning_rate': 4.3e-06, 'epoch': 0.12}
{'origin_loss': 1.8375744819641113, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.12}
{'origin_loss': 1.9049934148788452, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.12}
{'origin_loss': 2.0784804821014404, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.12}
{'origin_loss': 1.8709795475006104, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.12}
{'origin_loss': 1.8691178560256958, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.12}
{'origin_loss': 1.9378563165664673, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.12}
{'origin_loss': 1.9441745281219482, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.12}
{'origin_loss': 2.045182228088379, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.12}
{'loss': 199.6861, 'grad_norm': 0.37653931975364685, 'learning_rate': 4.2800000000000005e-06, 'epoch': 0.12}
{'origin_loss': 2.232771396636963, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.12}
{'origin_loss': 1.7737011909484863, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.12}
{'origin_loss': 2.1215837001800537, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.12}
{'origin_loss': 2.0641486644744873, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.12}
{'origin_loss': 2.229846477508545, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.12}
{'origin_loss': 1.8885841369628906, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.12}
{'origin_loss': 1.9720091819763184, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.12}
{'origin_loss': 2.0558249950408936, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.12}
{'loss': 197.3861, 'grad_norm': 0.3639388978481293, 'learning_rate': 4.26e-06, 'epoch': 0.13}
{'origin_loss': 1.9297369718551636, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.13}
{'origin_loss': 1.9570767879486084, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.13}
{'origin_loss': 2.026892900466919, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.13}
{'origin_loss': 2.0026676654815674, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.13}
{'origin_loss': 2.079629421234131, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.13}
{'origin_loss': 2.085406541824341, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.13}
{'origin_loss': 1.9177016019821167, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.13}
{'origin_loss': 2.102633237838745, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.13}
{'loss': 197.0596, 'grad_norm': 0.35973793268203735, 'learning_rate': 4.24e-06, 'epoch': 0.13}
{'origin_loss': 2.1146645545959473, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.13}
{'origin_loss': 1.9132258892059326, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.13}
{'origin_loss': 2.046437978744507, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.13}
{'origin_loss': 1.965548038482666, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.13}
{'origin_loss': 2.164818048477173, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.13}
{'origin_loss': 1.9915525913238525, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.13}
{'origin_loss': 2.003734588623047, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.13}
{'origin_loss': 1.847325086593628, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.13}
{'loss': 197.8184, 'grad_norm': 0.3320672810077667, 'learning_rate': 4.22e-06, 'epoch': 0.13}
{'origin_loss': 2.140747547149658, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.13}
{'origin_loss': 2.0462756156921387, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.13}
{'origin_loss': 1.9862935543060303, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.13}
{'origin_loss': 1.9721605777740479, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.13}
{'origin_loss': 2.0600662231445312, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.13}
{'origin_loss': 2.157808303833008, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.13}
{'origin_loss': 2.0030570030212402, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.13}
{'origin_loss': 1.9584585428237915, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.13}
{'loss': 199.4312, 'grad_norm': 0.38371679186820984, 'learning_rate': 4.2000000000000004e-06, 'epoch': 0.13}
{'origin_loss': 1.9498682022094727, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.13}
{'origin_loss': 2.0749149322509766, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.13}
{'origin_loss': 1.9291023015975952, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.13}
{'origin_loss': 2.0554587841033936, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.13}
{'origin_loss': 2.0450632572174072, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.13}
{'origin_loss': 2.0659892559051514, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.13}
{'origin_loss': 2.051015615463257, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.13}
{'origin_loss': 2.1489880084991455, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.13}
{'loss': 198.04, 'grad_norm': 0.3244228661060333, 'learning_rate': 4.18e-06, 'epoch': 0.13}
{'origin_loss': 1.8991271257400513, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.13}
{'origin_loss': 2.010162830352783, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.13}
{'origin_loss': 2.0057592391967773, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.13}
{'origin_loss': 2.167062282562256, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.13}
{'origin_loss': 1.9041627645492554, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.13}
{'origin_loss': 2.1281094551086426, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.13}
{'origin_loss': 1.9288722276687622, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.13}
{'origin_loss': 1.9273779392242432, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.13}
{'loss': 198.2932, 'grad_norm': 0.345640629529953, 'learning_rate': 4.16e-06, 'epoch': 0.13}
{'origin_loss': 2.0259921550750732, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.13}
{'origin_loss': 1.9394804239273071, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.13}
{'origin_loss': 2.0134999752044678, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.13}
{'origin_loss': 2.1041619777679443, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.13}
{'origin_loss': 1.8922374248504639, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.13}
{'origin_loss': 2.196345567703247, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.13}
{'origin_loss': 2.0849008560180664, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.13}
{'origin_loss': 2.0679144859313965, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.13}
{'loss': 197.1968, 'grad_norm': 0.3535868525505066, 'learning_rate': 4.14e-06, 'epoch': 0.13}
{'origin_loss': 2.082427740097046, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.13}
{'origin_loss': 1.7306549549102783, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.13}
{'origin_loss': 1.7932732105255127, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.13}
{'origin_loss': 1.7499473094940186, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.13}
{'origin_loss': 2.0190465450286865, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.13}
{'origin_loss': 2.158783435821533, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.13}
{'origin_loss': 2.0298566818237305, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.13}
{'origin_loss': 2.0801455974578857, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.13}
{'loss': 199.5024, 'grad_norm': 0.35123541951179504, 'learning_rate': 4.12e-06, 'epoch': 0.13}
{'origin_loss': 2.0448367595672607, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.13}
{'origin_loss': 2.108248233795166, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.13}
{'origin_loss': 1.9385790824890137, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.13}
{'origin_loss': 1.95121431350708, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.13}
{'origin_loss': 1.9194358587265015, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.13}
{'origin_loss': 1.8388056755065918, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.13}
{'origin_loss': 1.9305427074432373, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.13}
{'origin_loss': 2.165114641189575, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.13}
{'loss': 199.4402, 'grad_norm': 0.33102551102638245, 'learning_rate': 4.1e-06, 'epoch': 0.13}
{'origin_loss': 2.185868740081787, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.13}
{'origin_loss': 1.8476401567459106, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.13}
{'origin_loss': 1.8980960845947266, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.13}
{'origin_loss': 2.0603413581848145, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.13}
{'origin_loss': 1.9492524862289429, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.13}
{'origin_loss': 2.043443202972412, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.13}
{'origin_loss': 2.075380802154541, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.13}
{'origin_loss': 1.9996328353881836, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.13}
{'loss': 201.3512, 'grad_norm': 0.40245965123176575, 'learning_rate': 4.08e-06, 'epoch': 0.13}
{'origin_loss': 1.6104176044464111, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.13}
{'origin_loss': 2.1507840156555176, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.13}
{'origin_loss': 1.9758007526397705, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.13}
{'origin_loss': 1.9051940441131592, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.13}
{'origin_loss': 2.0312609672546387, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.13}
{'origin_loss': 1.924375295639038, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.13}
{'origin_loss': 1.9994944334030151, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.13}
{'origin_loss': 1.9183666706085205, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.13}
{'loss': 201.9551, 'grad_norm': 0.40674957633018494, 'learning_rate': 4.060000000000001e-06, 'epoch': 0.13}
{'origin_loss': 1.9109501838684082, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.13}
{'origin_loss': 2.2154781818389893, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.13}
{'origin_loss': 1.9875978231430054, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.13}
{'origin_loss': 2.1816046237945557, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.13}
{'origin_loss': 1.8821189403533936, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.13}
{'origin_loss': 2.0269553661346436, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.13}
{'origin_loss': 2.044099807739258, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.13}
{'origin_loss': 2.103029489517212, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.13}
{'loss': 201.544, 'grad_norm': 0.37704411149024963, 'learning_rate': 4.04e-06, 'epoch': 0.13}
{'origin_loss': 1.696449875831604, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.13}
{'origin_loss': 1.8470988273620605, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.13}
{'origin_loss': 1.9631316661834717, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.13}
{'origin_loss': 1.7360219955444336, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.13}
{'origin_loss': 2.147122383117676, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.13}
{'origin_loss': 2.0554895401000977, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.13}
{'origin_loss': 2.0103580951690674, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.13}
{'origin_loss': 1.8690001964569092, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.13}
{'loss': 201.1187, 'grad_norm': 0.32443034648895264, 'learning_rate': 4.0200000000000005e-06, 'epoch': 0.13}
{'origin_loss': 2.109114170074463, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.13}
{'origin_loss': 2.0256710052490234, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.13}
{'origin_loss': 2.049384117126465, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.13}
{'origin_loss': 2.167097568511963, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.13}
{'origin_loss': 1.734906792640686, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.13}
{'origin_loss': 2.1174113750457764, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.13}
{'origin_loss': 2.139072895050049, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.13}
{'origin_loss': 2.1326146125793457, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.13}
{'loss': 200.4032, 'grad_norm': 0.3505096137523651, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.13}
{'origin_loss': 1.894047737121582, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.13}
{'origin_loss': 2.0692012310028076, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.13}
{'origin_loss': 2.026336193084717, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.13}
{'origin_loss': 1.7981024980545044, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.13}
{'origin_loss': 2.13561749458313, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.13}
{'origin_loss': 2.0795984268188477, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.13}
{'origin_loss': 1.9775187969207764, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.13}
{'origin_loss': 1.7601042985916138, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.13}
{'loss': 202.9676, 'grad_norm': 0.3326042592525482, 'learning_rate': 3.980000000000001e-06, 'epoch': 0.13}
{'origin_loss': 2.0907270908355713, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.13}
{'origin_loss': 1.9872421026229858, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.13}
{'origin_loss': 1.8524891138076782, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.13}
{'origin_loss': 2.0923051834106445, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.13}
{'origin_loss': 1.9460582733154297, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.13}
{'origin_loss': 2.0084547996520996, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.13}
{'origin_loss': 1.8678085803985596, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.13}
{'origin_loss': 1.935048222541809, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.13}
{'loss': 202.535, 'grad_norm': 0.33032485842704773, 'learning_rate': 3.96e-06, 'epoch': 0.13}
{'origin_loss': 2.0940918922424316, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.13}
{'origin_loss': 1.8887929916381836, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.13}
{'origin_loss': 2.048647165298462, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.13}
{'origin_loss': 2.0400102138519287, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.13}
{'origin_loss': 2.143649101257324, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.13}
{'origin_loss': 1.9345219135284424, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.13}
{'origin_loss': 2.100947856903076, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.13}
{'origin_loss': 1.999884843826294, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.13}
{'loss': 201.5782, 'grad_norm': 0.3440362215042114, 'learning_rate': 3.94e-06, 'epoch': 0.13}
{'origin_loss': 1.6987388134002686, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.13}
{'origin_loss': 1.9515968561172485, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.13}
{'origin_loss': 2.227186679840088, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.13}
{'origin_loss': 2.1734888553619385, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.13}
{'origin_loss': 1.964939832687378, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.13}
{'origin_loss': 1.9447828531265259, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.13}
{'origin_loss': 1.4909659624099731, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.13}
{'origin_loss': 1.8902649879455566, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.13}
{'loss': 201.4646, 'grad_norm': 0.34557077288627625, 'learning_rate': 3.920000000000001e-06, 'epoch': 0.13}
{'origin_loss': 1.8792954683303833, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.13}
{'origin_loss': 2.082087993621826, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.13}
{'origin_loss': 2.162472724914551, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.13}
{'origin_loss': 2.0516104698181152, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.13}
{'origin_loss': 1.919076681137085, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.13}
{'origin_loss': 2.1091928482055664, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.13}
{'origin_loss': 1.8859130144119263, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.13}
{'origin_loss': 1.8299710750579834, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.13}
{'loss': 201.74, 'grad_norm': 0.3457247316837311, 'learning_rate': 3.900000000000001e-06, 'epoch': 0.13}
{'origin_loss': 1.7973875999450684, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.13}
{'origin_loss': 1.996092677116394, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.13}
{'origin_loss': 2.1087796688079834, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.13}
{'origin_loss': 1.9971767663955688, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.13}
{'origin_loss': 1.8579301834106445, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.13}
{'origin_loss': 2.0552773475646973, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.13}
{'origin_loss': 2.1059675216674805, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.13}
{'origin_loss': 1.9496805667877197, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.13}
{'loss': 202.6242, 'grad_norm': 0.33370348811149597, 'learning_rate': 3.88e-06, 'epoch': 0.13}
{'origin_loss': 1.9954379796981812, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.13}
{'origin_loss': 2.0829010009765625, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.13}
{'origin_loss': 2.0922434329986572, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.13}
{'origin_loss': 1.502933144569397, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.13}
{'origin_loss': 2.0521554946899414, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.13}
{'origin_loss': 1.9112963676452637, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.13}
{'origin_loss': 2.2072741985321045, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.13}
{'origin_loss': 1.9612993001937866, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.13}
{'loss': 203.2413, 'grad_norm': 0.34791994094848633, 'learning_rate': 3.86e-06, 'epoch': 0.13}
{'origin_loss': 2.0057969093322754, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.13}
{'origin_loss': 1.7629715204238892, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.13}
{'origin_loss': 2.177267074584961, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.13}
{'origin_loss': 2.027298927307129, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.13}
{'origin_loss': 1.8625730276107788, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.13}
{'origin_loss': 2.0972726345062256, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.13}
{'origin_loss': 1.9926753044128418, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.13}
{'origin_loss': 2.058593273162842, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.13}
{'loss': 202.8574, 'grad_norm': 0.4047863781452179, 'learning_rate': 3.8400000000000005e-06, 'epoch': 0.13}
{'origin_loss': 1.8778784275054932, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.13}
{'origin_loss': 1.974064826965332, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.13}
{'origin_loss': 1.9541404247283936, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.13}
{'origin_loss': 1.9769272804260254, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.13}
{'origin_loss': 2.0548720359802246, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.13}
{'origin_loss': 2.0488176345825195, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.13}
{'origin_loss': 1.8139655590057373, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.13}
{'origin_loss': 2.2763614654541016, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.13}
{'loss': 204.3096, 'grad_norm': 0.398624062538147, 'learning_rate': 3.820000000000001e-06, 'epoch': 0.13}
{'origin_loss': 2.094578266143799, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.13}
{'origin_loss': 1.889454960823059, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.13}
{'origin_loss': 2.0043861865997314, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.13}
{'origin_loss': 2.013530731201172, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.13}
{'origin_loss': 2.0227949619293213, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.13}
{'origin_loss': 2.1136741638183594, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.13}
{'origin_loss': 2.0629661083221436, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.13}
{'origin_loss': 2.148183584213257, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.13}
{'loss': 204.7468, 'grad_norm': 0.3357030153274536, 'learning_rate': 3.8000000000000005e-06, 'epoch': 0.14}
{'origin_loss': 1.9346562623977661, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.14}
{'origin_loss': 1.8480262756347656, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.14}
{'origin_loss': 1.9295352697372437, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.14}
{'origin_loss': 2.050405979156494, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.14}
{'origin_loss': 2.0746703147888184, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.14}
{'origin_loss': 1.9345022439956665, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.14}
{'origin_loss': 2.060330390930176, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.14}
{'origin_loss': 1.9525904655456543, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.14}
{'loss': 206.1137, 'grad_norm': 0.32984066009521484, 'learning_rate': 3.7800000000000002e-06, 'epoch': 0.14}
{'origin_loss': 1.9713563919067383, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.14}
{'origin_loss': 1.7810823917388916, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.14}
{'origin_loss': 2.1913349628448486, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.14}
{'origin_loss': 1.8855881690979004, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.14}
{'origin_loss': 2.2318427562713623, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.14}
{'origin_loss': 2.0250227451324463, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.14}
{'origin_loss': 2.1958394050598145, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.14}
{'origin_loss': 1.6501524448394775, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.14}
{'loss': 207.2571, 'grad_norm': 0.343377023935318, 'learning_rate': 3.7600000000000004e-06, 'epoch': 0.14}
{'origin_loss': 2.186894416809082, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.14}
{'origin_loss': 2.1057536602020264, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.14}
{'origin_loss': 2.1233949661254883, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.14}
{'origin_loss': 1.884016513824463, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.14}
{'origin_loss': 1.9829059839248657, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.14}
{'origin_loss': 2.0518088340759277, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.14}
{'origin_loss': 1.6611261367797852, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.14}
{'origin_loss': 2.111302375793457, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.14}
{'loss': 206.0915, 'grad_norm': 0.41913485527038574, 'learning_rate': 3.74e-06, 'epoch': 0.14}
{'origin_loss': 2.007887363433838, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.14}
{'origin_loss': 1.951560139656067, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.14}
{'origin_loss': 2.148214817047119, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.14}
{'origin_loss': 2.024054765701294, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.14}
{'origin_loss': 2.0104546546936035, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.14}
{'origin_loss': 1.9487037658691406, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.14}
{'origin_loss': 2.1952054500579834, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.14}
{'origin_loss': 2.0968360900878906, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.14}
{'loss': 205.5635, 'grad_norm': 0.33635735511779785, 'learning_rate': 3.7200000000000004e-06, 'epoch': 0.14}
{'origin_loss': 2.1412229537963867, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.14}
{'origin_loss': 1.984440565109253, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.14}
{'origin_loss': 2.04191255569458, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.14}
{'origin_loss': 2.0658340454101562, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.14}
{'origin_loss': 2.036257266998291, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.14}
{'origin_loss': 1.872019648551941, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.14}
{'origin_loss': 1.9262800216674805, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.14}
{'origin_loss': 2.1320154666900635, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.14}
{'loss': 208.1344, 'grad_norm': 0.32175105810165405, 'learning_rate': 3.7e-06, 'epoch': 0.14}
{'origin_loss': 1.7880170345306396, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.14}
{'origin_loss': 1.9386502504348755, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.14}
{'origin_loss': 1.6595818996429443, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.14}
{'origin_loss': 2.0673224925994873, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.14}
{'origin_loss': 1.9919618368148804, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.14}
{'origin_loss': 1.9385319948196411, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.14}
{'origin_loss': 1.9489766359329224, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.14}
{'origin_loss': 1.8219518661499023, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.14}
{'loss': 204.6912, 'grad_norm': 0.34424132108688354, 'learning_rate': 3.6800000000000003e-06, 'epoch': 0.14}
{'origin_loss': 1.8408498764038086, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.14}
{'origin_loss': 1.9900705814361572, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.14}
{'origin_loss': 1.9279580116271973, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'origin_loss': 2.1216583251953125, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.14}
{'origin_loss': 1.7510335445404053, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.14}
{'origin_loss': 1.9681776762008667, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.14}
{'origin_loss': 2.004833459854126, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.14}
{'origin_loss': 1.9901165962219238, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.14}
{'loss': 206.5431, 'grad_norm': 0.3193773925304413, 'learning_rate': 3.66e-06, 'epoch': 0.14}
{'origin_loss': 1.9839836359024048, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.14}
{'origin_loss': 1.9665734767913818, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.14}
{'origin_loss': 1.92318594455719, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.14}
{'origin_loss': 2.0217690467834473, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.14}
{'origin_loss': 1.9451161623001099, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.14}
{'origin_loss': 2.085484504699707, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.14}
{'origin_loss': 1.9608029127120972, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.14}
{'origin_loss': 2.068575859069824, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.14}
{'loss': 208.1663, 'grad_norm': 0.36757105588912964, 'learning_rate': 3.6400000000000003e-06, 'epoch': 0.14}
{'origin_loss': 1.9865385293960571, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.14}
{'origin_loss': 1.9496866464614868, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.14}
{'origin_loss': 2.007279872894287, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.14}
{'origin_loss': 2.19954514503479, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'origin_loss': 2.1036629676818848, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'origin_loss': 1.7364795207977295, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.14}
{'origin_loss': 2.1068825721740723, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.14}
{'origin_loss': 1.9795900583267212, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.14}
{'loss': 209.0712, 'grad_norm': 0.3422143757343292, 'learning_rate': 3.62e-06, 'epoch': 0.14}
{'origin_loss': 1.9728890657424927, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.14}
{'origin_loss': 1.9361677169799805, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.14}
{'origin_loss': 1.9106839895248413, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.14}
{'origin_loss': 2.105613946914673, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.14}
{'origin_loss': 2.1412782669067383, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'origin_loss': 1.9274892807006836, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.14}
{'origin_loss': 1.927822232246399, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.14}
{'origin_loss': 1.9408900737762451, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.14}
{'loss': 207.6235, 'grad_norm': 0.3349812924861908, 'learning_rate': 3.6000000000000003e-06, 'epoch': 0.14}
{'origin_loss': 1.9959748983383179, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.14}
{'origin_loss': 1.854264259338379, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.14}
{'origin_loss': 1.9998210668563843, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.14}
{'origin_loss': 1.9244738817214966, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.14}
{'origin_loss': 2.1476736068725586, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.14}
{'origin_loss': 2.072737216949463, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.14}
{'origin_loss': 1.9973207712173462, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.14}
{'origin_loss': 1.8443632125854492, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.14}
{'loss': 208.3546, 'grad_norm': 0.3694072663784027, 'learning_rate': 3.58e-06, 'epoch': 0.14}
{'origin_loss': 1.8292490243911743, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.14}
{'origin_loss': 1.9475131034851074, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.14}
{'origin_loss': 2.2292540073394775, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.14}
{'origin_loss': 1.9028180837631226, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.14}
{'origin_loss': 1.9602670669555664, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.14}
{'origin_loss': 2.09220814704895, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.14}
{'origin_loss': 1.8787339925765991, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.14}
{'origin_loss': 1.893593430519104, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.14}
{'loss': 210.123, 'grad_norm': 0.37985485792160034, 'learning_rate': 3.5600000000000002e-06, 'epoch': 0.14}
{'origin_loss': 1.8671766519546509, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'origin_loss': 2.200563669204712, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.14}
{'origin_loss': 2.0413928031921387, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.14}
{'origin_loss': 2.210592031478882, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.14}
{'origin_loss': 2.0239779949188232, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.14}
{'origin_loss': 2.032193183898926, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.14}
{'origin_loss': 2.0182669162750244, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.14}
{'origin_loss': 2.201436996459961, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'loss': 209.4182, 'grad_norm': 0.3797174096107483, 'learning_rate': 3.54e-06, 'epoch': 0.14}
{'origin_loss': 2.0087082386016846, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.14}
{'origin_loss': 1.9288381338119507, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.14}
{'origin_loss': 2.06809401512146, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.14}
{'origin_loss': 1.9540398120880127, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.14}
{'origin_loss': 2.053884506225586, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.14}
{'origin_loss': 1.8368130922317505, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.14}
{'origin_loss': 2.060743808746338, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.14}
{'origin_loss': 2.035360097885132, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.14}
{'loss': 211.2121, 'grad_norm': 0.3600151538848877, 'learning_rate': 3.52e-06, 'epoch': 0.14}
{'origin_loss': 2.0912322998046875, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.14}
{'origin_loss': 1.9307714700698853, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.14}
{'origin_loss': 1.9616683721542358, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.14}
{'origin_loss': 1.9619911909103394, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.14}
{'origin_loss': 1.899261236190796, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.14}
{'origin_loss': 2.055661678314209, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.14}
{'origin_loss': 2.1496706008911133, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.14}
{'origin_loss': 2.0441129207611084, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.14}
{'loss': 211.2774, 'grad_norm': 0.3365710377693176, 'learning_rate': 3.5e-06, 'epoch': 0.14}
{'origin_loss': 1.8715012073516846, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.14}
{'origin_loss': 2.099588394165039, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.14}
{'origin_loss': 2.149135112762451, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.14}
{'origin_loss': 1.8329534530639648, 'mask_loss': 0.17822265625, 'mask_rate': 0.422119140625, 'epoch': 0.14}
{'origin_loss': 1.9798707962036133, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.14}
{'origin_loss': 2.0614051818847656, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.14}
{'origin_loss': 2.023848056793213, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.14}
{'origin_loss': 2.092787027359009, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.14}
{'loss': 211.467, 'grad_norm': 0.3050265610218048, 'learning_rate': 3.48e-06, 'epoch': 0.14}
{'origin_loss': 2.087284564971924, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.14}
{'origin_loss': 1.7237918376922607, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.14}
{'origin_loss': 1.9738739728927612, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.14}
{'origin_loss': 2.0293681621551514, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.14}
{'origin_loss': 2.0124382972717285, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.14}
{'origin_loss': 2.015814781188965, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.14}
{'origin_loss': 1.9471182823181152, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'origin_loss': 2.0167171955108643, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.14}
{'loss': 210.0539, 'grad_norm': 0.3315165042877197, 'learning_rate': 3.46e-06, 'epoch': 0.14}
{'origin_loss': 2.0982837677001953, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.14}
{'origin_loss': 2.125026226043701, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.14}
{'origin_loss': 1.9943863153457642, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'origin_loss': 2.1068122386932373, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.14}
{'origin_loss': 2.0334393978118896, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.14}
{'origin_loss': 2.042663097381592, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.14}
{'origin_loss': 2.017765760421753, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.14}
{'origin_loss': 2.3146495819091797, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.14}
{'loss': 212.701, 'grad_norm': 0.36849579215049744, 'learning_rate': 3.44e-06, 'epoch': 0.14}
{'origin_loss': 2.034813642501831, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.14}
{'origin_loss': 1.9749350547790527, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'origin_loss': 1.8407329320907593, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.14}
{'origin_loss': 2.093061685562134, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.14}
{'origin_loss': 1.7367634773254395, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.14}
{'origin_loss': 2.0888915061950684, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.14}
{'origin_loss': 2.146766424179077, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.14}
{'origin_loss': 1.983144998550415, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.14}
{'loss': 210.7374, 'grad_norm': 0.35943201184272766, 'learning_rate': 3.4200000000000007e-06, 'epoch': 0.14}
{'origin_loss': 1.8946815729141235, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.14}
{'origin_loss': 1.9235304594039917, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.14}
{'origin_loss': 1.9533554315567017, 'mask_loss': 0.177001953125, 'mask_rate': 0.420654296875, 'epoch': 0.14}
{'origin_loss': 2.020350694656372, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.14}
{'origin_loss': 1.9897905588150024, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.14}
{'origin_loss': 1.958533763885498, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.14}
{'origin_loss': 2.0634169578552246, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.14}
{'origin_loss': 2.373225688934326, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.14}
{'loss': 212.6315, 'grad_norm': 0.3729906678199768, 'learning_rate': 3.4000000000000005e-06, 'epoch': 0.14}
{'origin_loss': 1.9505038261413574, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.14}
{'origin_loss': 2.0018656253814697, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.14}
{'origin_loss': 2.0157463550567627, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.14}
{'origin_loss': 1.956118106842041, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.14}
{'origin_loss': 1.8977442979812622, 'mask_loss': 0.179443359375, 'mask_rate': 0.423583984375, 'epoch': 0.14}
{'origin_loss': 1.953614592552185, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.14}
{'origin_loss': 1.9381792545318604, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.14}
{'origin_loss': 2.104222059249878, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.14}
{'loss': 213.8835, 'grad_norm': 0.3400414288043976, 'learning_rate': 3.3800000000000007e-06, 'epoch': 0.14}
{'origin_loss': 1.767421841621399, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.14}
{'origin_loss': 1.9251909255981445, 'mask_loss': 0.177978515625, 'mask_rate': 0.421875, 'epoch': 0.14}
{'origin_loss': 1.9676446914672852, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.14}
{'origin_loss': 2.13940167427063, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.14}
{'origin_loss': 2.1198229789733887, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.14}
{'origin_loss': 2.134652853012085, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.14}
{'origin_loss': 2.183513641357422, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.14}
{'origin_loss': 1.980915904045105, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.14}
{'loss': 213.4805, 'grad_norm': 0.44642144441604614, 'learning_rate': 3.3600000000000004e-06, 'epoch': 0.14}
{'origin_loss': 1.960694670677185, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.14}
{'origin_loss': 1.9266129732131958, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.14}
{'origin_loss': 2.055100440979004, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.14}
{'origin_loss': 1.9138102531433105, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.14}
{'origin_loss': 2.095201253890991, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.14}
{'origin_loss': 1.966407299041748, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.14}
{'origin_loss': 1.9170256853103638, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.14}
{'origin_loss': 2.011343240737915, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.14}
{'loss': 213.637, 'grad_norm': 0.35102593898773193, 'learning_rate': 3.3400000000000006e-06, 'epoch': 0.15}
{'origin_loss': 1.895318865776062, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.15}
{'origin_loss': 1.9376239776611328, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.15}
{'origin_loss': 1.9051860570907593, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.15}
{'origin_loss': 1.8030517101287842, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.15}
{'origin_loss': 1.9877374172210693, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.15}
{'origin_loss': 1.8024332523345947, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.15}
{'origin_loss': 1.8215982913970947, 'mask_loss': 0.1785888671875, 'mask_rate': 0.422607421875, 'epoch': 0.15}
{'origin_loss': 2.1222383975982666, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.15}
{'loss': 214.1281, 'grad_norm': 0.33557069301605225, 'learning_rate': 3.3200000000000004e-06, 'epoch': 0.15}
{'origin_loss': 2.0545153617858887, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.15}
{'origin_loss': 1.75178861618042, 'mask_loss': 0.183349609375, 'mask_rate': 0.42822265625, 'epoch': 0.15}
{'origin_loss': 1.8384592533111572, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.15}
{'origin_loss': 2.0579628944396973, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.15}
{'origin_loss': 2.0818352699279785, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.15}
{'origin_loss': 1.9138213396072388, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.15}
{'origin_loss': 1.8548879623413086, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.15}
{'origin_loss': 2.132272958755493, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.15}
{'loss': 213.6013, 'grad_norm': 0.3272329866886139, 'learning_rate': 3.3000000000000006e-06, 'epoch': 0.15}
{'origin_loss': 1.8828258514404297, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.15}
{'origin_loss': 1.8358104228973389, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.15}
{'origin_loss': 1.9117180109024048, 'mask_loss': 0.180908203125, 'mask_rate': 0.42529296875, 'epoch': 0.15}
{'origin_loss': 2.112717866897583, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.15}
{'origin_loss': 2.0433173179626465, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.15}
{'origin_loss': 2.1247692108154297, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.15}
{'origin_loss': 1.983710765838623, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.15}
{'origin_loss': 2.079362154006958, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.15}
{'loss': 215.9186, 'grad_norm': 0.35061949491500854, 'learning_rate': 3.2800000000000004e-06, 'epoch': 0.15}
{'origin_loss': 2.185481548309326, 'mask_loss': 0.1771240234375, 'mask_rate': 0.4208984375, 'epoch': 0.15}
{'origin_loss': 2.1324307918548584, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.15}
{'origin_loss': 1.9636732339859009, 'mask_loss': 0.177001953125, 'mask_rate': 0.420654296875, 'epoch': 0.15}
{'origin_loss': 2.1390528678894043, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.15}
{'origin_loss': 1.9887384176254272, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.15}
{'origin_loss': 2.033524513244629, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.15}
{'origin_loss': 2.0349252223968506, 'mask_loss': 0.1796875, 'mask_rate': 0.423828125, 'epoch': 0.15}
{'origin_loss': 1.958351731300354, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.15}
{'loss': 215.1639, 'grad_norm': 0.33783549070358276, 'learning_rate': 3.2600000000000006e-06, 'epoch': 0.15}
{'origin_loss': 1.9581135511398315, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.15}
{'origin_loss': 1.8476945161819458, 'mask_loss': 0.1796875, 'mask_rate': 0.423828125, 'epoch': 0.15}
{'origin_loss': 2.041124105453491, 'mask_loss': 0.1771240234375, 'mask_rate': 0.4208984375, 'epoch': 0.15}
{'origin_loss': 2.1183536052703857, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.15}
{'origin_loss': 1.9621871709823608, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.15}
{'origin_loss': 2.085439443588257, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.15}
{'origin_loss': 1.8011257648468018, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.15}
{'origin_loss': 2.0323269367218018, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.15}
{'loss': 214.9496, 'grad_norm': 0.3718758225440979, 'learning_rate': 3.2400000000000003e-06, 'epoch': 0.15}
{'origin_loss': 1.8408024311065674, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.15}
{'origin_loss': 1.9785043001174927, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.15}
{'origin_loss': 1.83281409740448, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.15}
{'origin_loss': 1.772343397140503, 'mask_loss': 0.1798095703125, 'mask_rate': 0.424072265625, 'epoch': 0.15}
{'origin_loss': 2.0558583736419678, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.15}
{'origin_loss': 2.0695323944091797, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.15}
{'origin_loss': 1.958580493927002, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.15}
{'origin_loss': 1.9713642597198486, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.15}
{'loss': 214.685, 'grad_norm': 0.33691322803497314, 'learning_rate': 3.2200000000000005e-06, 'epoch': 0.15}
{'origin_loss': 1.9000178575515747, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.15}
{'origin_loss': 1.6747450828552246, 'mask_loss': 0.1796875, 'mask_rate': 0.423828125, 'epoch': 0.15}
{'origin_loss': 1.94418466091156, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.15}
{'origin_loss': 2.258326292037964, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.15}
{'origin_loss': 1.906997799873352, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.15}
{'origin_loss': 2.0340306758880615, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.15}
{'origin_loss': 1.9602515697479248, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.15}
{'origin_loss': 2.119839906692505, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.15}
{'loss': 214.1623, 'grad_norm': 0.30671045184135437, 'learning_rate': 3.2000000000000003e-06, 'epoch': 0.15}
{'origin_loss': 2.067767381668091, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.15}
{'origin_loss': 1.8743921518325806, 'mask_loss': 0.1812744140625, 'mask_rate': 0.42578125, 'epoch': 0.15}
{'origin_loss': 2.049884080886841, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.15}
{'origin_loss': 2.0256381034851074, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.15}
{'origin_loss': 1.84610915184021, 'mask_loss': 0.1773681640625, 'mask_rate': 0.421142578125, 'epoch': 0.15}
{'origin_loss': 1.883589744567871, 'mask_loss': 0.1776123046875, 'mask_rate': 0.42138671875, 'epoch': 0.15}
{'origin_loss': 1.9581750631332397, 'mask_loss': 0.1773681640625, 'mask_rate': 0.421142578125, 'epoch': 0.15}
{'origin_loss': 2.0454883575439453, 'mask_loss': 0.17919921875, 'mask_rate': 0.42333984375, 'epoch': 0.15}
{'loss': 216.9376, 'grad_norm': 124.84862518310547, 'learning_rate': 3.1800000000000005e-06, 'epoch': 0.15}
{'origin_loss': 1.849234700202942, 'mask_loss': 0.178955078125, 'mask_rate': 0.423095703125, 'epoch': 0.15}
{'origin_loss': 2.0732338428497314, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.15}
{'origin_loss': 2.095853090286255, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.15}
{'origin_loss': 2.080083131790161, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.15}
{'origin_loss': 2.087266206741333, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.15}
{'origin_loss': 1.63150954246521, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.15}
{'origin_loss': 2.2380127906799316, 'mask_loss': 0.1776123046875, 'mask_rate': 0.42138671875, 'epoch': 0.15}
{'origin_loss': 2.173079490661621, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.15}
{'loss': 215.0129, 'grad_norm': 120.10501098632812, 'learning_rate': 3.1600000000000002e-06, 'epoch': 0.15}
{'origin_loss': 2.067753553390503, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.15}
{'origin_loss': 2.130953550338745, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.15}
{'origin_loss': 2.0704286098480225, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.15}
{'origin_loss': 2.080960273742676, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.15}
{'origin_loss': 2.0063085556030273, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.15}
{'origin_loss': 1.9764771461486816, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.15}
{'origin_loss': 2.3092987537384033, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.15}
{'origin_loss': 2.055457353591919, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.15}
{'loss': 210.9153, 'grad_norm': 0.3945716321468353, 'learning_rate': 3.1400000000000004e-06, 'epoch': 0.15}
{'origin_loss': 2.0799899101257324, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.15}
{'origin_loss': 1.9131298065185547, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.15}
{'origin_loss': 1.877528190612793, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.15}
{'origin_loss': 1.9013155698776245, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.15}
{'origin_loss': 2.229550838470459, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.15}
{'origin_loss': 2.00053334236145, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.15}
{'origin_loss': 2.0787384510040283, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.15}
{'origin_loss': 1.9591972827911377, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.15}
{'loss': 206.13, 'grad_norm': 0.3456399142742157, 'learning_rate': 3.12e-06, 'epoch': 0.15}
{'origin_loss': 2.093539237976074, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.15}
{'origin_loss': 2.0146639347076416, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.15}
{'origin_loss': 1.8396639823913574, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.15}
{'origin_loss': 1.805663824081421, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.15}
{'origin_loss': 1.9363741874694824, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.15}
{'origin_loss': 2.0830836296081543, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.15}
{'origin_loss': 1.8449939489364624, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.15}
{'origin_loss': 2.038477897644043, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.15}
{'loss': 206.0196, 'grad_norm': 0.3895350992679596, 'learning_rate': 3.1000000000000004e-06, 'epoch': 0.15}
{'origin_loss': 1.9007641077041626, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.15}
{'origin_loss': 2.0508904457092285, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.15}
{'origin_loss': 2.149813652038574, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.15}
{'origin_loss': 1.9991084337234497, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.15}
{'origin_loss': 1.9821197986602783, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.15}
{'origin_loss': 1.7626111507415771, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.15}
{'origin_loss': 2.18355131149292, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.15}
{'origin_loss': 2.0541505813598633, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.15}
{'loss': 202.3541, 'grad_norm': 0.36478129029273987, 'learning_rate': 3.08e-06, 'epoch': 0.15}
{'origin_loss': 2.00703501701355, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.15}
{'origin_loss': 1.6404755115509033, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.15}
{'origin_loss': 1.9908877611160278, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.15}
{'origin_loss': 2.1024985313415527, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.15}
{'origin_loss': 2.1486263275146484, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.15}
{'origin_loss': 1.9271447658538818, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.15}
{'origin_loss': 1.8375303745269775, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.15}
{'origin_loss': 2.1009714603424072, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.15}
{'loss': 200.3288, 'grad_norm': 0.33764439821243286, 'learning_rate': 3.0600000000000003e-06, 'epoch': 0.15}
{'origin_loss': 2.1049647331237793, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.15}
{'origin_loss': 1.957848072052002, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.15}
{'origin_loss': 2.0941967964172363, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.15}
{'origin_loss': 2.020040988922119, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.15}
{'origin_loss': 2.177682876586914, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.15}
{'origin_loss': 2.124490737915039, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.15}
{'origin_loss': 2.0260143280029297, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.15}
{'origin_loss': 2.038424491882324, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.15}
{'loss': 199.2554, 'grad_norm': 0.3888396918773651, 'learning_rate': 3.04e-06, 'epoch': 0.15}
{'origin_loss': 1.9654659032821655, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.15}
{'origin_loss': 2.3394665718078613, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.15}
{'origin_loss': 1.9906595945358276, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.15}
{'origin_loss': 1.861152172088623, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.15}
{'origin_loss': 1.886800765991211, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.15}
{'origin_loss': 2.0333266258239746, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.15}
{'origin_loss': 1.8818086385726929, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.15}
{'origin_loss': 2.3074843883514404, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.15}
{'loss': 197.3302, 'grad_norm': 0.6009868383407593, 'learning_rate': 3.0200000000000003e-06, 'epoch': 0.15}
{'origin_loss': 1.977595567703247, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.15}
{'origin_loss': 2.0236570835113525, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.15}
{'origin_loss': 2.021409273147583, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.15}
{'origin_loss': 1.937947154045105, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.15}
{'origin_loss': 1.9996036291122437, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.15}
{'origin_loss': 1.9885814189910889, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.15}
{'origin_loss': 2.115204095840454, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.15}
{'origin_loss': 2.048539161682129, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.15}
{'loss': 196.5297, 'grad_norm': 0.36250540614128113, 'learning_rate': 3e-06, 'epoch': 0.15}
{'origin_loss': 1.9325200319290161, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.15}
{'origin_loss': 2.0818605422973633, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.15}
{'origin_loss': 1.811593770980835, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.15}
{'origin_loss': 2.1948931217193604, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.15}
{'origin_loss': 1.86750328540802, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.15}
{'origin_loss': 2.0048441886901855, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.15}
{'origin_loss': 1.8173973560333252, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.15}
{'origin_loss': 2.113896608352661, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.15}
{'loss': 193.9468, 'grad_norm': 0.43378347158432007, 'learning_rate': 2.9800000000000003e-06, 'epoch': 0.15}
{'origin_loss': 2.0020196437835693, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.15}
{'origin_loss': 2.147489309310913, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.15}
{'origin_loss': 2.0210087299346924, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.15}
{'origin_loss': 1.8718194961547852, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.15}
{'origin_loss': 1.9217963218688965, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.15}
{'origin_loss': 2.086982488632202, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.15}
{'origin_loss': 1.7389854192733765, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.15}
{'origin_loss': 2.137594699859619, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.15}
{'loss': 195.3191, 'grad_norm': 0.37220343947410583, 'learning_rate': 2.96e-06, 'epoch': 0.15}
{'origin_loss': 1.9821405410766602, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.15}
{'origin_loss': 1.9687047004699707, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.15}
{'origin_loss': 1.9139260053634644, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.15}
{'origin_loss': 2.0561249256134033, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.15}
{'origin_loss': 2.0050642490386963, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.15}
{'origin_loss': 2.1590118408203125, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.15}
{'origin_loss': 2.038604497909546, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.15}
{'origin_loss': 2.025942087173462, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.15}
{'loss': 193.6281, 'grad_norm': 0.3366073966026306, 'learning_rate': 2.9400000000000002e-06, 'epoch': 0.15}
{'origin_loss': 2.048156261444092, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.15}
{'origin_loss': 2.023991107940674, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.15}
{'origin_loss': 2.064504623413086, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.15}
{'origin_loss': 2.0694355964660645, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.15}
{'origin_loss': 1.8843013048171997, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.15}
{'origin_loss': 2.0935254096984863, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.15}
{'origin_loss': 1.6854667663574219, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.15}
{'origin_loss': 2.1455237865448, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.15}
{'loss': 192.4706, 'grad_norm': 0.32377275824546814, 'learning_rate': 2.92e-06, 'epoch': 0.15}
{'origin_loss': 1.9081214666366577, 'mask_loss': 0.15087890625, 'mask_rate': 0.388427734375, 'epoch': 0.15}
{'origin_loss': 2.0685200691223145, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.15}
{'origin_loss': 2.129354476928711, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.15}
{'origin_loss': 1.908776044845581, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.15}
{'origin_loss': 2.0724377632141113, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.15}
{'origin_loss': 1.9450058937072754, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.15}
{'origin_loss': 1.9296228885650635, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.15}
{'origin_loss': 2.0371317863464355, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.15}
{'loss': 192.0155, 'grad_norm': 0.35567548871040344, 'learning_rate': 2.9e-06, 'epoch': 0.15}
{'origin_loss': 1.9713592529296875, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.15}
{'origin_loss': 1.9781463146209717, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.15}
{'origin_loss': 1.7883918285369873, 'mask_loss': 0.15185546875, 'mask_rate': 0.3896484375, 'epoch': 0.15}
{'origin_loss': 1.968614935874939, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.15}
{'origin_loss': 2.0600061416625977, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.15}
{'origin_loss': 1.893804907798767, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.15}
{'origin_loss': 2.117448568344116, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.15}
{'origin_loss': 1.9965715408325195, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.15}
{'loss': 191.9874, 'grad_norm': 0.3725776970386505, 'learning_rate': 2.88e-06, 'epoch': 0.16}
{'origin_loss': 2.1288890838623047, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.16}
{'origin_loss': 1.8774832487106323, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.16}
{'origin_loss': 1.9180777072906494, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.16}
{'origin_loss': 2.0674023628234863, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.16}
{'origin_loss': 1.987564206123352, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.16}
{'origin_loss': 2.2163517475128174, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.16}
{'origin_loss': 1.9316524267196655, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.16}
{'origin_loss': 1.767103910446167, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.16}
{'loss': 192.8462, 'grad_norm': 0.3631763756275177, 'learning_rate': 2.86e-06, 'epoch': 0.16}
{'origin_loss': 2.0336098670959473, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.16}
{'origin_loss': 2.1037518978118896, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.16}
{'origin_loss': 2.0219175815582275, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.16}
{'origin_loss': 1.7435706853866577, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.16}
{'origin_loss': 1.9923609495162964, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.16}
{'origin_loss': 2.192915201187134, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.16}
{'origin_loss': 2.11236834526062, 'mask_loss': 0.1522216796875, 'mask_rate': 0.39013671875, 'epoch': 0.16}
{'origin_loss': 2.0950772762298584, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.16}
{'loss': 191.3494, 'grad_norm': 0.36295080184936523, 'learning_rate': 2.84e-06, 'epoch': 0.16}
{'origin_loss': 2.1204240322113037, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.16}
{'origin_loss': 1.9057691097259521, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.16}
{'origin_loss': 1.9085489511489868, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.16}
{'origin_loss': 2.057831048965454, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.16}
{'origin_loss': 1.9862444400787354, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.16}
{'origin_loss': 2.070648193359375, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.16}
{'origin_loss': 2.188290596008301, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.16}
{'origin_loss': 2.160733222961426, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.16}
{'loss': 193.1436, 'grad_norm': 0.35508811473846436, 'learning_rate': 2.82e-06, 'epoch': 0.16}
{'origin_loss': 1.9726693630218506, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.16}
{'origin_loss': 2.1009650230407715, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.16}
{'origin_loss': 1.991190791130066, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.16}
{'origin_loss': 2.0278518199920654, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.16}
{'origin_loss': 1.9174052476882935, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.16}
{'origin_loss': 1.9333003759384155, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.16}
{'origin_loss': 2.1898934841156006, 'mask_loss': 0.1502685546875, 'mask_rate': 0.3876953125, 'epoch': 0.16}
{'origin_loss': 2.0641865730285645, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.16}
{'loss': 192.6028, 'grad_norm': 0.3386361598968506, 'learning_rate': 2.8000000000000003e-06, 'epoch': 0.16}
{'origin_loss': 1.9707294702529907, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.16}
{'origin_loss': 2.1317520141601562, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.16}
{'origin_loss': 1.8740946054458618, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.16}
{'origin_loss': 1.9959185123443604, 'mask_loss': 0.15087890625, 'mask_rate': 0.388427734375, 'epoch': 0.16}
{'origin_loss': 1.9258735179901123, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.16}
{'origin_loss': 2.0579335689544678, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.16}
{'origin_loss': 1.7088167667388916, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.16}
{'origin_loss': 1.9728710651397705, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.16}
{'loss': 190.2204, 'grad_norm': 0.3599836528301239, 'learning_rate': 2.7800000000000005e-06, 'epoch': 0.16}
{'origin_loss': 2.0107429027557373, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.16}
{'origin_loss': 1.7682873010635376, 'mask_loss': 0.1539306640625, 'mask_rate': 0.392333984375, 'epoch': 0.16}
{'origin_loss': 1.9370518922805786, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.16}
{'origin_loss': 1.9386954307556152, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.16}
{'origin_loss': 1.9551334381103516, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.16}
{'origin_loss': 2.124591112136841, 'mask_loss': 0.15087890625, 'mask_rate': 0.388427734375, 'epoch': 0.16}
{'origin_loss': 2.0168797969818115, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.16}
{'origin_loss': 2.0922276973724365, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.16}
{'loss': 191.0586, 'grad_norm': 0.35229238867759705, 'learning_rate': 2.7600000000000003e-06, 'epoch': 0.16}
{'origin_loss': 1.865355372428894, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.16}
{'origin_loss': 1.9818891286849976, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.16}
{'origin_loss': 2.059764862060547, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.16}
{'origin_loss': 1.9316455125808716, 'mask_loss': 0.150634765625, 'mask_rate': 0.38818359375, 'epoch': 0.16}
{'origin_loss': 2.0447206497192383, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.16}
{'origin_loss': 1.8671681880950928, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.16}
{'origin_loss': 2.0969433784484863, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.16}
{'origin_loss': 2.2712271213531494, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.16}
{'loss': 191.3117, 'grad_norm': 0.3631669878959656, 'learning_rate': 2.7400000000000004e-06, 'epoch': 0.16}
{'origin_loss': 1.9878181219100952, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.16}
{'origin_loss': 2.2286853790283203, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.16}
{'origin_loss': 1.7704007625579834, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.16}
{'origin_loss': 1.9711363315582275, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.16}
{'origin_loss': 1.9837039709091187, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.16}
{'origin_loss': 2.07950496673584, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.16}
{'origin_loss': 1.909515142440796, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.16}
{'origin_loss': 1.9734432697296143, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.16}
{'loss': 191.7068, 'grad_norm': 0.35493534803390503, 'learning_rate': 2.7200000000000002e-06, 'epoch': 0.16}
{'origin_loss': 1.8917617797851562, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.16}
{'origin_loss': 2.15690016746521, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.16}
{'origin_loss': 2.0210139751434326, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.16}
{'origin_loss': 1.9934005737304688, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.16}
{'origin_loss': 1.9411674737930298, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.16}
{'origin_loss': 1.9293944835662842, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.16}
{'origin_loss': 2.0074119567871094, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.16}
{'origin_loss': 2.210415840148926, 'mask_loss': 0.152587890625, 'mask_rate': 0.390625, 'epoch': 0.16}
{'loss': 192.0502, 'grad_norm': 0.3471162021160126, 'learning_rate': 2.7000000000000004e-06, 'epoch': 0.16}
{'origin_loss': 2.091256618499756, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.16}
{'origin_loss': 2.0548272132873535, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.16}
{'origin_loss': 1.940718412399292, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.16}
{'origin_loss': 2.073699474334717, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.16}
{'origin_loss': 1.7371110916137695, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.16}
{'origin_loss': 1.9254947900772095, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.16}
{'origin_loss': 2.164869785308838, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.16}
{'origin_loss': 2.0962889194488525, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.16}
{'loss': 191.2449, 'grad_norm': 0.43046823143959045, 'learning_rate': 2.68e-06, 'epoch': 0.16}
{'origin_loss': 2.094419002532959, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.16}
{'origin_loss': 1.8328322172164917, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.16}
{'origin_loss': 2.048168659210205, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.16}
{'origin_loss': 1.8966535329818726, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.16}
{'origin_loss': 2.1801509857177734, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.16}
{'origin_loss': 2.130309581756592, 'mask_loss': 0.1502685546875, 'mask_rate': 0.3876953125, 'epoch': 0.16}
{'origin_loss': 1.9936017990112305, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.16}
{'origin_loss': 2.118572950363159, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.16}
{'loss': 192.1775, 'grad_norm': 0.34090012311935425, 'learning_rate': 2.6600000000000004e-06, 'epoch': 0.16}
{'origin_loss': 2.0165836811065674, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.16}
{'origin_loss': 2.0666661262512207, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.16}
{'origin_loss': 1.8648935556411743, 'mask_loss': 0.1533203125, 'mask_rate': 0.3916015625, 'epoch': 0.16}
{'origin_loss': 2.0227513313293457, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.16}
{'origin_loss': 1.97705078125, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.16}
{'origin_loss': 2.0125174522399902, 'mask_loss': 0.1529541015625, 'mask_rate': 0.39111328125, 'epoch': 0.16}
{'origin_loss': 1.8382415771484375, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.16}
{'origin_loss': 2.071035861968994, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.16}
{'loss': 191.2025, 'grad_norm': 0.34487438201904297, 'learning_rate': 2.64e-06, 'epoch': 0.16}
{'origin_loss': 1.8654552698135376, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.16}
{'origin_loss': 2.0521786212921143, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.16}
{'origin_loss': 2.2115604877471924, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.16}
{'origin_loss': 1.8581864833831787, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.16}
{'origin_loss': 1.9469842910766602, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.16}
{'origin_loss': 1.9846373796463013, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.16}
{'origin_loss': 2.0905158519744873, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.16}
{'origin_loss': 2.07000732421875, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.16}
{'loss': 192.6506, 'grad_norm': 0.3511632978916168, 'learning_rate': 2.6200000000000003e-06, 'epoch': 0.16}
{'origin_loss': 2.253047227859497, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.16}
{'origin_loss': 1.8925130367279053, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.16}
{'origin_loss': 2.0830183029174805, 'mask_loss': 0.1549072265625, 'mask_rate': 0.3935546875, 'epoch': 0.16}
{'origin_loss': 1.973786473274231, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.16}
{'origin_loss': 2.026413679122925, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.16}
{'origin_loss': 1.8228330612182617, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.16}
{'origin_loss': 1.9278935194015503, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.16}
{'origin_loss': 2.069812297821045, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.16}
{'loss': 192.5062, 'grad_norm': 0.38149288296699524, 'learning_rate': 2.6e-06, 'epoch': 0.16}
{'origin_loss': 1.907768726348877, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.16}
{'origin_loss': 2.039307117462158, 'mask_loss': 0.154541015625, 'mask_rate': 0.39306640625, 'epoch': 0.16}
{'origin_loss': 2.02689266204834, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.16}
{'origin_loss': 2.0184316635131836, 'mask_loss': 0.1573486328125, 'mask_rate': 0.396728515625, 'epoch': 0.16}
{'origin_loss': 2.0262911319732666, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.16}
{'origin_loss': 2.04856014251709, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.16}
{'origin_loss': 1.871994137763977, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.16}
{'origin_loss': 2.096409559249878, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.16}
{'loss': 192.3638, 'grad_norm': 0.354932963848114, 'learning_rate': 2.5800000000000003e-06, 'epoch': 0.16}
{'origin_loss': 2.0106234550476074, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.16}
{'origin_loss': 1.7347840070724487, 'mask_loss': 0.1512451171875, 'mask_rate': 0.388916015625, 'epoch': 0.16}
{'origin_loss': 1.9744396209716797, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.16}
{'origin_loss': 2.255208730697632, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.16}
{'origin_loss': 1.7028688192367554, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.16}
{'origin_loss': 2.040227174758911, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.16}
{'origin_loss': 2.056320905685425, 'mask_loss': 0.1541748046875, 'mask_rate': 0.392578125, 'epoch': 0.16}
{'origin_loss': 1.7308692932128906, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.16}
{'loss': 191.4382, 'grad_norm': 0.35943496227264404, 'learning_rate': 2.56e-06, 'epoch': 0.16}
{'origin_loss': 1.7324086427688599, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.16}
{'origin_loss': 1.683914065361023, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.16}
{'origin_loss': 2.0451340675354004, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.16}
{'origin_loss': 2.0508816242218018, 'mask_loss': 0.1536865234375, 'mask_rate': 0.39208984375, 'epoch': 0.16}
{'origin_loss': 2.251128911972046, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.16}
{'origin_loss': 1.8770332336425781, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.16}
{'origin_loss': 2.298529624938965, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.16}
{'origin_loss': 2.0844695568084717, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.16}
{'loss': 192.3936, 'grad_norm': 0.33805856108665466, 'learning_rate': 2.5400000000000002e-06, 'epoch': 0.16}
{'origin_loss': 2.0072901248931885, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.16}
{'origin_loss': 2.0550754070281982, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.16}
{'origin_loss': 2.0147857666015625, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.16}
{'origin_loss': 1.5526264905929565, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.16}
{'origin_loss': 2.0424551963806152, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.16}
{'origin_loss': 2.0244874954223633, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.16}
{'origin_loss': 2.2195117473602295, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.16}
{'origin_loss': 1.6951818466186523, 'mask_loss': 0.155517578125, 'mask_rate': 0.394287109375, 'epoch': 0.16}
{'loss': 192.4202, 'grad_norm': 0.3324953019618988, 'learning_rate': 2.52e-06, 'epoch': 0.16}
{'origin_loss': 1.773798942565918, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.16}
{'origin_loss': 1.8386359214782715, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.16}
{'origin_loss': 1.9884426593780518, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.16}
{'origin_loss': 1.964370846748352, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.16}
{'origin_loss': 1.9827241897583008, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.16}
{'origin_loss': 2.158323049545288, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.16}
{'origin_loss': 2.092684507369995, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.16}
{'origin_loss': 2.020465612411499, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.16}
{'loss': 195.2587, 'grad_norm': 0.38068875670433044, 'learning_rate': 2.5e-06, 'epoch': 0.16}
{'origin_loss': 1.9555413722991943, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.16}
{'origin_loss': 1.9469234943389893, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.16}
{'origin_loss': 1.9035089015960693, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.16}
{'origin_loss': 1.8810725212097168, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.16}
{'origin_loss': 1.870811104774475, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.16}
{'origin_loss': 1.9469746351242065, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.16}
{'origin_loss': 2.02004075050354, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.16}
{'origin_loss': 2.134664297103882, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.16}
{'loss': 195.6449, 'grad_norm': 0.3735803961753845, 'learning_rate': 2.4800000000000004e-06, 'epoch': 0.16}
{'origin_loss': 2.286801338195801, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.16}
{'origin_loss': 2.0667712688446045, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.16}
{'origin_loss': 1.882988452911377, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.16}
{'origin_loss': 1.96331787109375, 'mask_loss': 0.1563720703125, 'mask_rate': 0.3955078125, 'epoch': 0.16}
{'origin_loss': 2.034322500228882, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.16}
{'origin_loss': 2.1306605339050293, 'mask_loss': 0.1552734375, 'mask_rate': 0.39404296875, 'epoch': 0.16}
{'origin_loss': 2.0648529529571533, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.16}
{'origin_loss': 2.148493766784668, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.16}
{'loss': 193.4316, 'grad_norm': 0.3352890610694885, 'learning_rate': 2.46e-06, 'epoch': 0.16}
{'origin_loss': 1.8309661149978638, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.16}
{'origin_loss': 2.0953361988067627, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.16}
{'origin_loss': 2.244546890258789, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.16}
{'origin_loss': 2.0377488136291504, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.16}
{'origin_loss': 1.994858980178833, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.16}
{'origin_loss': 2.022632360458374, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.16}
{'origin_loss': 1.9423141479492188, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.16}
{'origin_loss': 1.9422943592071533, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.16}
{'loss': 194.6388, 'grad_norm': 0.5619080066680908, 'learning_rate': 2.4400000000000004e-06, 'epoch': 0.17}
{'origin_loss': 2.0542290210723877, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.17}
{'origin_loss': 2.1732118129730225, 'mask_loss': 0.1546630859375, 'mask_rate': 0.393310546875, 'epoch': 0.17}
{'origin_loss': 1.9342834949493408, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.17}
{'origin_loss': 2.0275673866271973, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.17}
{'origin_loss': 2.076826810836792, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.17}
{'origin_loss': 2.010758638381958, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.17}
{'origin_loss': 2.2043492794036865, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.17}
{'origin_loss': 2.018942356109619, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.17}
{'loss': 193.875, 'grad_norm': 0.3450736701488495, 'learning_rate': 2.42e-06, 'epoch': 0.17}
{'origin_loss': 1.9843761920928955, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.17}
{'origin_loss': 1.9476196765899658, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.17}
{'origin_loss': 1.9998183250427246, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.17}
{'origin_loss': 1.938326358795166, 'mask_loss': 0.15625, 'mask_rate': 0.395263671875, 'epoch': 0.17}
{'origin_loss': 1.834723949432373, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.17}
{'origin_loss': 1.875512957572937, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.17}
{'origin_loss': 2.1640658378601074, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.17}
{'origin_loss': 2.146378755569458, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.17}
{'loss': 193.377, 'grad_norm': 0.4066365361213684, 'learning_rate': 2.4000000000000003e-06, 'epoch': 0.17}
{'origin_loss': 2.152527332305908, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.17}
{'origin_loss': 1.9790356159210205, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.17}
{'origin_loss': 2.0725886821746826, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.17}
{'origin_loss': 1.9958442449569702, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.17}
{'origin_loss': 1.7185512781143188, 'mask_loss': 0.1556396484375, 'mask_rate': 0.39453125, 'epoch': 0.17}
{'origin_loss': 2.0627803802490234, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.17}
{'origin_loss': 1.8585418462753296, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.17}
{'origin_loss': 1.9838507175445557, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.17}
{'loss': 194.5873, 'grad_norm': 0.36463141441345215, 'learning_rate': 2.38e-06, 'epoch': 0.17}
{'origin_loss': 2.2315824031829834, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.17}
{'origin_loss': 2.020296335220337, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.17}
{'origin_loss': 1.9612001180648804, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.17}
{'origin_loss': 2.047151565551758, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.17}
{'origin_loss': 2.0631773471832275, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.17}
{'origin_loss': 1.8098938465118408, 'mask_loss': 0.1531982421875, 'mask_rate': 0.391357421875, 'epoch': 0.17}
{'origin_loss': 1.972450852394104, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.17}
{'origin_loss': 1.9493093490600586, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.17}
{'loss': 195.1006, 'grad_norm': 0.36034315824508667, 'learning_rate': 2.3600000000000003e-06, 'epoch': 0.17}
{'origin_loss': 1.860485553741455, 'mask_loss': 0.151611328125, 'mask_rate': 0.389404296875, 'epoch': 0.17}
{'origin_loss': 2.046795606613159, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.17}
{'origin_loss': 1.8538143634796143, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.17}
{'origin_loss': 1.94384765625, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.17}
{'origin_loss': 1.8703713417053223, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.17}
{'origin_loss': 1.8676388263702393, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.17}
{'origin_loss': 2.027052640914917, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.17}
{'origin_loss': 1.8343092203140259, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.17}
{'loss': 194.3349, 'grad_norm': 0.35838115215301514, 'learning_rate': 2.3400000000000005e-06, 'epoch': 0.17}
{'origin_loss': 1.8393871784210205, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.17}
{'origin_loss': 1.9580103158950806, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.17}
{'origin_loss': 2.0411899089813232, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.17}
{'origin_loss': 2.0822668075561523, 'mask_loss': 0.153564453125, 'mask_rate': 0.391845703125, 'epoch': 0.17}
{'origin_loss': 2.0829174518585205, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.17}
{'origin_loss': 2.025113821029663, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.17}
{'origin_loss': 1.857008457183838, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.17}
{'origin_loss': 2.0424816608428955, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.17}
{'loss': 194.1317, 'grad_norm': 0.2929501235485077, 'learning_rate': 2.3200000000000002e-06, 'epoch': 0.17}
{'origin_loss': 1.8963687419891357, 'mask_loss': 0.158203125, 'mask_rate': 0.397705078125, 'epoch': 0.17}
{'origin_loss': 2.025181293487549, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.17}
{'origin_loss': 1.926309585571289, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.17}
{'origin_loss': 1.9279323816299438, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.17}
{'origin_loss': 1.9198359251022339, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.17}
{'origin_loss': 1.7828655242919922, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.17}
{'origin_loss': 2.215398073196411, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.17}
{'origin_loss': 2.0237560272216797, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.17}
{'loss': 196.5272, 'grad_norm': 0.336755633354187, 'learning_rate': 2.3000000000000004e-06, 'epoch': 0.17}
{'origin_loss': 2.2017338275909424, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.17}
{'origin_loss': 2.004791259765625, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.17}
{'origin_loss': 1.9261432886123657, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.17}
{'origin_loss': 1.8372679948806763, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.17}
{'origin_loss': 1.8669955730438232, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.17}
{'origin_loss': 1.9397343397140503, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.17}
{'origin_loss': 1.891104817390442, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.17}
{'origin_loss': 1.7908142805099487, 'mask_loss': 0.1575927734375, 'mask_rate': 0.39697265625, 'epoch': 0.17}
{'loss': 195.9167, 'grad_norm': 0.382279634475708, 'learning_rate': 2.28e-06, 'epoch': 0.17}
{'origin_loss': 1.9637457132339478, 'mask_loss': 0.154296875, 'mask_rate': 0.392822265625, 'epoch': 0.17}
{'origin_loss': 1.792356014251709, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.17}
{'origin_loss': 1.8614386320114136, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.17}
{'origin_loss': 1.923363447189331, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.17}
{'origin_loss': 1.8396214246749878, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.17}
{'origin_loss': 1.970564603805542, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.17}
{'origin_loss': 2.06988263130188, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.17}
{'origin_loss': 1.8927780389785767, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.17}
{'loss': 196.7892, 'grad_norm': 0.35872021317481995, 'learning_rate': 2.2600000000000004e-06, 'epoch': 0.17}
{'origin_loss': 1.965152621269226, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.17}
{'origin_loss': 1.9686176776885986, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.17}
{'origin_loss': 2.151005506515503, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.17}
{'origin_loss': 1.8875515460968018, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.17}
{'origin_loss': 1.923417568206787, 'mask_loss': 0.1583251953125, 'mask_rate': 0.39794921875, 'epoch': 0.17}
{'origin_loss': 2.227783441543579, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.17}
{'origin_loss': 2.2704315185546875, 'mask_loss': 0.1558837890625, 'mask_rate': 0.394775390625, 'epoch': 0.17}
{'origin_loss': 1.862931728363037, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.17}
{'loss': 195.1727, 'grad_norm': 0.3226318359375, 'learning_rate': 2.24e-06, 'epoch': 0.17}
{'origin_loss': 1.8943284749984741, 'mask_loss': 0.156005859375, 'mask_rate': 0.39501953125, 'epoch': 0.17}
{'origin_loss': 2.1013917922973633, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.17}
{'origin_loss': 1.9613524675369263, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.17}
{'origin_loss': 1.9631595611572266, 'mask_loss': 0.1578369140625, 'mask_rate': 0.397216796875, 'epoch': 0.17}
{'origin_loss': 2.047189474105835, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.17}
{'origin_loss': 2.192638397216797, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.17}
{'origin_loss': 1.8559527397155762, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.17}
{'origin_loss': 2.1982786655426025, 'mask_loss': 0.1595458984375, 'mask_rate': 0.3994140625, 'epoch': 0.17}
{'loss': 195.9643, 'grad_norm': 0.30576270818710327, 'learning_rate': 2.2200000000000003e-06, 'epoch': 0.17}
{'origin_loss': 1.936170220375061, 'mask_loss': 0.158935546875, 'mask_rate': 0.398681640625, 'epoch': 0.17}
{'origin_loss': 2.0603036880493164, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.17}
{'origin_loss': 2.0148890018463135, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.17}
{'origin_loss': 1.9225430488586426, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.17}
{'origin_loss': 1.9666080474853516, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.17}
{'origin_loss': 2.0652403831481934, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.17}
{'origin_loss': 2.0167250633239746, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.17}
{'origin_loss': 2.0953056812286377, 'mask_loss': 0.155029296875, 'mask_rate': 0.393798828125, 'epoch': 0.17}
{'loss': 195.4941, 'grad_norm': 0.3196505606174469, 'learning_rate': 2.2e-06, 'epoch': 0.17}
{'origin_loss': 1.794123649597168, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.17}
{'origin_loss': 1.9936426877975464, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.17}
{'origin_loss': 1.8660396337509155, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.17}
{'origin_loss': 1.8421056270599365, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.17}
{'origin_loss': 1.8688056468963623, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.17}
{'origin_loss': 1.9654507637023926, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.17}
{'origin_loss': 2.1226093769073486, 'mask_loss': 0.156982421875, 'mask_rate': 0.396240234375, 'epoch': 0.17}
{'origin_loss': 2.0024402141571045, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.17}
{'loss': 196.635, 'grad_norm': 0.33487144112586975, 'learning_rate': 2.1800000000000003e-06, 'epoch': 0.17}
{'origin_loss': 2.0633139610290527, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.17}
{'origin_loss': 1.9562582969665527, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.17}
{'origin_loss': 1.8929259777069092, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.17}
{'origin_loss': 1.9556076526641846, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.17}
{'origin_loss': 2.0451393127441406, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.17}
{'origin_loss': 2.0697946548461914, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.17}
{'origin_loss': 1.8386019468307495, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.17}
{'origin_loss': 2.030419111251831, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.17}
{'loss': 199.7628, 'grad_norm': 0.3997897207736969, 'learning_rate': 2.16e-06, 'epoch': 0.17}
{'origin_loss': 2.0475008487701416, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.17}
{'origin_loss': 1.9896537065505981, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.17}
{'origin_loss': 2.003490447998047, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.17}
{'origin_loss': 2.009162664413452, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.17}
{'origin_loss': 1.8937262296676636, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.17}
{'origin_loss': 2.044186592102051, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.17}
{'origin_loss': 1.9378392696380615, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.17}
{'origin_loss': 2.1017584800720215, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.17}
{'loss': 199.8159, 'grad_norm': 0.4121781587600708, 'learning_rate': 2.1400000000000003e-06, 'epoch': 0.17}
{'origin_loss': 1.8019652366638184, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.17}
{'origin_loss': 2.1024880409240723, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.17}
{'origin_loss': 1.994791865348816, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.17}
{'origin_loss': 1.9335286617279053, 'mask_loss': 0.157958984375, 'mask_rate': 0.3974609375, 'epoch': 0.17}
{'origin_loss': 1.9265270233154297, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.17}
{'origin_loss': 2.009695053100586, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.17}
{'origin_loss': 1.9372491836547852, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.17}
{'origin_loss': 2.0052144527435303, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.17}
{'loss': 198.9014, 'grad_norm': 0.37060025334358215, 'learning_rate': 2.12e-06, 'epoch': 0.17}
{'origin_loss': 2.0585055351257324, 'mask_loss': 0.1591796875, 'mask_rate': 0.39892578125, 'epoch': 0.17}
{'origin_loss': 1.9529526233673096, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.17}
{'origin_loss': 1.9019919633865356, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.17}
{'origin_loss': 1.9863585233688354, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.17}
{'origin_loss': 1.9254157543182373, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.17}
{'origin_loss': 1.9888925552368164, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.17}
{'origin_loss': 1.860363483428955, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.17}
{'origin_loss': 2.005192995071411, 'mask_loss': 0.1617431640625, 'mask_rate': 0.402099609375, 'epoch': 0.17}
{'loss': 199.3818, 'grad_norm': 0.336026132106781, 'learning_rate': 2.1000000000000002e-06, 'epoch': 0.17}
{'origin_loss': 1.864471197128296, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.17}
{'origin_loss': 2.026010513305664, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.17}
{'origin_loss': 1.85346257686615, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.17}
{'origin_loss': 2.14005446434021, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.17}
{'origin_loss': 2.067493438720703, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.17}
{'origin_loss': 1.8520771265029907, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.17}
{'origin_loss': 1.9546678066253662, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.17}
{'origin_loss': 1.9527579545974731, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.17}
{'loss': 198.9483, 'grad_norm': 0.3884202241897583, 'learning_rate': 2.08e-06, 'epoch': 0.17}
{'origin_loss': 2.1950204372406006, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.17}
{'origin_loss': 2.0037245750427246, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.17}
{'origin_loss': 2.0027148723602295, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.17}
{'origin_loss': 2.0191195011138916, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.17}
{'origin_loss': 1.8613816499710083, 'mask_loss': 0.15966796875, 'mask_rate': 0.399658203125, 'epoch': 0.17}
{'origin_loss': 2.0916178226470947, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.17}
{'origin_loss': 2.097195863723755, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.17}
{'origin_loss': 1.8470077514648438, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.17}
{'loss': 198.6085, 'grad_norm': 0.3917309641838074, 'learning_rate': 2.06e-06, 'epoch': 0.17}
{'origin_loss': 1.962581992149353, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.17}
{'origin_loss': 2.0150375366210938, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.17}
{'origin_loss': 1.8509386777877808, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.17}
{'origin_loss': 1.8257269859313965, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.17}
{'origin_loss': 1.887304425239563, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.17}
{'origin_loss': 1.8756966590881348, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.17}
{'origin_loss': 2.0484440326690674, 'mask_loss': 0.160888671875, 'mask_rate': 0.401123046875, 'epoch': 0.17}
{'origin_loss': 1.9825074672698975, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.17}
{'loss': 198.8529, 'grad_norm': 0.4342314898967743, 'learning_rate': 2.04e-06, 'epoch': 0.17}
{'origin_loss': 1.9608149528503418, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.17}
{'origin_loss': 1.8533859252929688, 'mask_loss': 0.15869140625, 'mask_rate': 0.3984375, 'epoch': 0.17}
{'origin_loss': 1.8218426704406738, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.17}
{'origin_loss': 1.9677432775497437, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.17}
{'origin_loss': 2.0151357650756836, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.17}
{'origin_loss': 2.007800817489624, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.17}
{'origin_loss': 1.894004225730896, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.17}
{'origin_loss': 1.8675060272216797, 'mask_loss': 0.1605224609375, 'mask_rate': 0.400634765625, 'epoch': 0.17}
{'loss': 199.4079, 'grad_norm': 0.3348279595375061, 'learning_rate': 2.02e-06, 'epoch': 0.17}
{'origin_loss': 2.030872344970703, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.17}
{'origin_loss': 2.125568389892578, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.17}
{'origin_loss': 1.960086464881897, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.17}
{'origin_loss': 1.952790379524231, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.17}
{'origin_loss': 2.016209363937378, 'mask_loss': 0.1585693359375, 'mask_rate': 0.398193359375, 'epoch': 0.17}
{'origin_loss': 1.8849843740463257, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.17}
{'origin_loss': 2.1177401542663574, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.17}
{'origin_loss': 2.09696888923645, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.17}
{'loss': 198.5857, 'grad_norm': 0.34363311529159546, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.17}
{'origin_loss': 1.9185543060302734, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.17}
{'origin_loss': 2.2713847160339355, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.17}
{'origin_loss': 2.1635069847106934, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.17}
{'origin_loss': 2.1933672428131104, 'mask_loss': 0.1568603515625, 'mask_rate': 0.39599609375, 'epoch': 0.17}
{'origin_loss': 2.0891523361206055, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.17}
{'origin_loss': 1.7759701013565063, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.17}
{'origin_loss': 1.9169496297836304, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.17}
{'origin_loss': 2.0883069038391113, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.17}
{'loss': 199.724, 'grad_norm': 0.3410765528678894, 'learning_rate': 1.98e-06, 'epoch': 0.18}
{'origin_loss': 2.106729745864868, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.18}
{'origin_loss': 1.819168210029602, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.18}
{'origin_loss': 1.9987473487854004, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.18}
{'origin_loss': 2.089684247970581, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.18}
{'origin_loss': 2.051919460296631, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.18}
{'origin_loss': 2.17875337600708, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.18}
{'origin_loss': 1.8687798976898193, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.18}
{'origin_loss': 2.1908981800079346, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.18}
{'loss': 200.3662, 'grad_norm': 0.31807973980903625, 'learning_rate': 1.9600000000000003e-06, 'epoch': 0.18}
{'origin_loss': 1.9337458610534668, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.18}
{'origin_loss': 2.0669970512390137, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.18}
{'origin_loss': 1.8492743968963623, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.18}
{'origin_loss': 1.9714547395706177, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.18}
{'origin_loss': 2.127157688140869, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.18}
{'origin_loss': 1.9858102798461914, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 1.9030910730361938, 'mask_loss': 0.1593017578125, 'mask_rate': 0.399169921875, 'epoch': 0.18}
{'origin_loss': 2.286868095397949, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.18}
{'loss': 200.5155, 'grad_norm': 0.35451647639274597, 'learning_rate': 1.94e-06, 'epoch': 0.18}
{'origin_loss': 1.9874323606491089, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 1.9630707502365112, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.18}
{'origin_loss': 1.8351787328720093, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.18}
{'origin_loss': 1.9138802289962769, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.18}
{'origin_loss': 1.726118803024292, 'mask_loss': 0.16015625, 'mask_rate': 0.400146484375, 'epoch': 0.18}
{'origin_loss': 2.1789886951446533, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.18}
{'origin_loss': 1.8866281509399414, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.18}
{'origin_loss': 1.8004711866378784, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.18}
{'loss': 201.3802, 'grad_norm': 0.4634140729904175, 'learning_rate': 1.9200000000000003e-06, 'epoch': 0.18}
{'origin_loss': 2.0052342414855957, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.18}
{'origin_loss': 1.94392728805542, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.18}
{'origin_loss': 2.064234972000122, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.18}
{'origin_loss': 2.0057601928710938, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.18}
{'origin_loss': 1.8303231000900269, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.18}
{'origin_loss': 2.080731153488159, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.18}
{'origin_loss': 1.983945608139038, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.18}
{'origin_loss': 1.8885526657104492, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.18}
{'loss': 202.2722, 'grad_norm': 0.35515299439430237, 'learning_rate': 1.9000000000000002e-06, 'epoch': 0.18}
{'origin_loss': 2.168748378753662, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.18}
{'origin_loss': 2.033849000930786, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 2.022324323654175, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.18}
{'origin_loss': 2.0979061126708984, 'mask_loss': 0.1611328125, 'mask_rate': 0.4013671875, 'epoch': 0.18}
{'origin_loss': 1.9421323537826538, 'mask_loss': 0.1614990234375, 'mask_rate': 0.40185546875, 'epoch': 0.18}
{'origin_loss': 2.085420608520508, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.18}
{'origin_loss': 2.034705638885498, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.18}
{'origin_loss': 2.038529872894287, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.18}
{'loss': 200.9436, 'grad_norm': 0.35259810090065, 'learning_rate': 1.8800000000000002e-06, 'epoch': 0.18}
{'origin_loss': 2.141303062438965, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.18}
{'origin_loss': 1.8965731859207153, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.18}
{'origin_loss': 1.8768483400344849, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.18}
{'origin_loss': 2.169313430786133, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.18}
{'origin_loss': 2.07246994972229, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.18}
{'origin_loss': 1.9461557865142822, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.18}
{'origin_loss': 1.9524645805358887, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.18}
{'origin_loss': 1.8973631858825684, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.18}
{'loss': 201.4941, 'grad_norm': 0.3530437648296356, 'learning_rate': 1.8600000000000002e-06, 'epoch': 0.18}
{'origin_loss': 2.0264687538146973, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.18}
{'origin_loss': 2.168675184249878, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.18}
{'origin_loss': 2.0459110736846924, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.18}
{'origin_loss': 1.8492097854614258, 'mask_loss': 0.1602783203125, 'mask_rate': 0.400390625, 'epoch': 0.18}
{'origin_loss': 1.8988574743270874, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.18}
{'origin_loss': 1.994273066520691, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.18}
{'origin_loss': 2.0582826137542725, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.18}
{'origin_loss': 1.7394611835479736, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.18}
{'loss': 202.7695, 'grad_norm': 0.32970577478408813, 'learning_rate': 1.8400000000000002e-06, 'epoch': 0.18}
{'origin_loss': 2.0046212673187256, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.18}
{'origin_loss': 2.2548208236694336, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.18}
{'origin_loss': 1.9034920930862427, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.18}
{'origin_loss': 1.7478994131088257, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.18}
{'origin_loss': 2.054131031036377, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.18}
{'origin_loss': 2.0395166873931885, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.18}
{'origin_loss': 1.9918361902236938, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.18}
{'origin_loss': 1.9716215133666992, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.18}
{'loss': 202.371, 'grad_norm': 0.3778512477874756, 'learning_rate': 1.8200000000000002e-06, 'epoch': 0.18}
{'origin_loss': 2.005944013595581, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.18}
{'origin_loss': 2.0997815132141113, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.18}
{'origin_loss': 1.9440661668777466, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.18}
{'origin_loss': 1.856897234916687, 'mask_loss': 0.1566162109375, 'mask_rate': 0.395751953125, 'epoch': 0.18}
{'origin_loss': 2.1253340244293213, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.18}
{'origin_loss': 1.9249855279922485, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.18}
{'origin_loss': 2.0161612033843994, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.18}
{'origin_loss': 2.117950201034546, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.18}
{'loss': 200.8083, 'grad_norm': 0.35242611169815063, 'learning_rate': 1.8000000000000001e-06, 'epoch': 0.18}
{'origin_loss': 1.961748480796814, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.18}
{'origin_loss': 2.074275493621826, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.18}
{'origin_loss': 1.9233129024505615, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.18}
{'origin_loss': 2.1383554935455322, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.18}
{'origin_loss': 2.177462339401245, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.18}
{'origin_loss': 1.8819230794906616, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.18}
{'origin_loss': 2.185696840286255, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.18}
{'origin_loss': 1.9590164422988892, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'loss': 203.819, 'grad_norm': 0.35122397541999817, 'learning_rate': 1.7800000000000001e-06, 'epoch': 0.18}
{'origin_loss': 2.0977861881256104, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.18}
{'origin_loss': 1.9486217498779297, 'mask_loss': 0.1612548828125, 'mask_rate': 0.401611328125, 'epoch': 0.18}
{'origin_loss': 1.9424257278442383, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.18}
{'origin_loss': 2.0165345668792725, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.18}
{'origin_loss': 2.1360552310943604, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.18}
{'origin_loss': 2.145554780960083, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.18}
{'origin_loss': 1.7293493747711182, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.18}
{'origin_loss': 2.0137155055999756, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.18}
{'loss': 201.16, 'grad_norm': 0.35455581545829773, 'learning_rate': 1.76e-06, 'epoch': 0.18}
{'origin_loss': 1.9519245624542236, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.18}
{'origin_loss': 2.083134412765503, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.18}
{'origin_loss': 1.8466287851333618, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.18}
{'origin_loss': 1.8534728288650513, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.18}
{'origin_loss': 1.8977773189544678, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.18}
{'origin_loss': 2.0235297679901123, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.18}
{'origin_loss': 1.844516634941101, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.18}
{'origin_loss': 2.0522563457489014, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.18}
{'loss': 201.6473, 'grad_norm': 0.3262402415275574, 'learning_rate': 1.74e-06, 'epoch': 0.18}
{'origin_loss': 1.9591227769851685, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.18}
{'origin_loss': 2.0758955478668213, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.18}
{'origin_loss': 2.0303955078125, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 2.1799190044403076, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.18}
{'origin_loss': 1.9625400304794312, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.18}
{'origin_loss': 1.9949694871902466, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 2.0429975986480713, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.18}
{'origin_loss': 2.0176494121551514, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.18}
{'loss': 202.7048, 'grad_norm': 0.33200863003730774, 'learning_rate': 1.72e-06, 'epoch': 0.18}
{'origin_loss': 1.999896764755249, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.18}
{'origin_loss': 2.1022703647613525, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.18}
{'origin_loss': 1.9818788766860962, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.18}
{'origin_loss': 2.080548048019409, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.18}
{'origin_loss': 2.005531072616577, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 1.9777178764343262, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.18}
{'origin_loss': 2.1952338218688965, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.18}
{'origin_loss': 1.9737777709960938, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.18}
{'loss': 203.8521, 'grad_norm': 0.34347230195999146, 'learning_rate': 1.7000000000000002e-06, 'epoch': 0.18}
{'origin_loss': 2.058610677719116, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.18}
{'origin_loss': 1.8224514722824097, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.18}
{'origin_loss': 2.2208025455474854, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.18}
{'origin_loss': 2.0792429447174072, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.18}
{'origin_loss': 1.9181716442108154, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 1.974010705947876, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.18}
{'origin_loss': 2.024007558822632, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.18}
{'origin_loss': 1.945462942123413, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.18}
{'loss': 202.4897, 'grad_norm': 0.3120831549167633, 'learning_rate': 1.6800000000000002e-06, 'epoch': 0.18}
{'origin_loss': 1.9561418294906616, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 1.9045112133026123, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.18}
{'origin_loss': 1.7697787284851074, 'mask_loss': 0.162109375, 'mask_rate': 0.402587890625, 'epoch': 0.18}
{'origin_loss': 2.028496503829956, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.18}
{'origin_loss': 2.050328254699707, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.18}
{'origin_loss': 1.8462738990783691, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.18}
{'origin_loss': 1.8782050609588623, 'mask_loss': 0.161865234375, 'mask_rate': 0.40234375, 'epoch': 0.18}
{'origin_loss': 2.0783770084381104, 'mask_loss': 0.16064453125, 'mask_rate': 0.40087890625, 'epoch': 0.18}
{'loss': 202.3609, 'grad_norm': 0.3123520612716675, 'learning_rate': 1.6600000000000002e-06, 'epoch': 0.18}
{'origin_loss': 2.1491379737854004, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.18}
{'origin_loss': 2.083040952682495, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.18}
{'origin_loss': 1.9529197216033936, 'mask_loss': 0.159912109375, 'mask_rate': 0.39990234375, 'epoch': 0.18}
{'origin_loss': 2.1003990173339844, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.18}
{'origin_loss': 1.8803156614303589, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.18}
{'origin_loss': 2.123955249786377, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.18}
{'origin_loss': 1.9986565113067627, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 1.8713808059692383, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.18}
{'loss': 204.2231, 'grad_norm': 0.34852224588394165, 'learning_rate': 1.6400000000000002e-06, 'epoch': 0.18}
{'origin_loss': 2.349716901779175, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.18}
{'origin_loss': 2.053539752960205, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.18}
{'origin_loss': 1.9099009037017822, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.18}
{'origin_loss': 1.9193421602249146, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.18}
{'origin_loss': 1.9099006652832031, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.18}
{'origin_loss': 1.9457528591156006, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.18}
{'origin_loss': 2.024742603302002, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.18}
{'origin_loss': 1.799884557723999, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.18}
{'loss': 203.7235, 'grad_norm': 0.6819204092025757, 'learning_rate': 1.6200000000000002e-06, 'epoch': 0.18}
{'origin_loss': 1.8452134132385254, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.18}
{'origin_loss': 2.049882173538208, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.18}
{'origin_loss': 2.0119519233703613, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.18}
{'origin_loss': 2.0097858905792236, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.18}
{'origin_loss': 1.8308327198028564, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.18}
{'origin_loss': 1.9394617080688477, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.18}
{'origin_loss': 2.047525405883789, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.18}
{'origin_loss': 2.117133378982544, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.18}
{'loss': 203.0596, 'grad_norm': 0.3172903060913086, 'learning_rate': 1.6000000000000001e-06, 'epoch': 0.18}
{'origin_loss': 2.064146041870117, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.18}
{'origin_loss': 1.9637917280197144, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.18}
{'origin_loss': 1.9658371210098267, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.18}
{'origin_loss': 1.7981878519058228, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.18}
{'origin_loss': 1.976898431777954, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.18}
{'origin_loss': 2.0238912105560303, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.18}
{'origin_loss': 1.9955722093582153, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.18}
{'origin_loss': 2.060953140258789, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.18}
{'loss': 204.403, 'grad_norm': 0.34061816334724426, 'learning_rate': 1.5800000000000001e-06, 'epoch': 0.18}
{'origin_loss': 1.9927089214324951, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.18}
{'origin_loss': 2.1814019680023193, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.18}
{'origin_loss': 2.1232810020446777, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.18}
{'origin_loss': 1.9483067989349365, 'mask_loss': 0.1632080078125, 'mask_rate': 0.404052734375, 'epoch': 0.18}
{'origin_loss': 2.0505659580230713, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.18}
{'origin_loss': 2.0652377605438232, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.18}
{'origin_loss': 1.9036616086959839, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.18}
{'origin_loss': 1.9937920570373535, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.18}
{'loss': 203.2667, 'grad_norm': 0.33649563789367676, 'learning_rate': 1.56e-06, 'epoch': 0.18}
{'origin_loss': 1.9092657566070557, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.18}
{'origin_loss': 1.9599483013153076, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.18}
{'origin_loss': 1.9436769485473633, 'mask_loss': 0.1636962890625, 'mask_rate': 0.404541015625, 'epoch': 0.18}
{'origin_loss': 2.003375768661499, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.18}
{'origin_loss': 1.8998503684997559, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.18}
{'origin_loss': 1.6938672065734863, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.18}
{'origin_loss': 1.9859964847564697, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.18}
{'origin_loss': 2.0476179122924805, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.18}
{'loss': 205.2898, 'grad_norm': 0.3707125782966614, 'learning_rate': 1.54e-06, 'epoch': 0.18}
{'origin_loss': 1.9411553144454956, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.18}
{'origin_loss': 1.8729896545410156, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.18}
{'origin_loss': 2.0313751697540283, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.18}
{'origin_loss': 2.117537021636963, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.18}
{'origin_loss': 1.914815068244934, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.18}
{'origin_loss': 1.8617075681686401, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.18}
{'origin_loss': 1.918980598449707, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.18}
{'origin_loss': 2.0877673625946045, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.18}
{'loss': 203.7339, 'grad_norm': 0.3784054219722748, 'learning_rate': 1.52e-06, 'epoch': 0.19}
{'origin_loss': 1.7721284627914429, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.19}
{'origin_loss': 1.987619161605835, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.19}
{'origin_loss': 1.9794363975524902, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.19}
{'origin_loss': 2.0112102031707764, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.19}
{'origin_loss': 2.0542092323303223, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.19}
{'origin_loss': 2.00813889503479, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.19}
{'origin_loss': 2.0516037940979004, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.19}
{'origin_loss': 2.043626308441162, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.19}
{'loss': 206.8948, 'grad_norm': 0.3253176212310791, 'learning_rate': 1.5e-06, 'epoch': 0.19}
{'origin_loss': 2.0568246841430664, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.19}
{'origin_loss': 2.132415294647217, 'mask_loss': 0.1624755859375, 'mask_rate': 0.403076171875, 'epoch': 0.19}
{'origin_loss': 2.0293867588043213, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.19}
{'origin_loss': 1.9083675146102905, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.19}
{'origin_loss': 1.7891255617141724, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.19}
{'origin_loss': 1.990930438041687, 'mask_loss': 0.162841796875, 'mask_rate': 0.403564453125, 'epoch': 0.19}
{'origin_loss': 2.039363384246826, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.19}
{'origin_loss': 2.1763858795166016, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.19}
{'loss': 204.5935, 'grad_norm': 0.3542703092098236, 'learning_rate': 1.48e-06, 'epoch': 0.19}
{'origin_loss': 1.916851282119751, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.19}
{'origin_loss': 2.0753490924835205, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.19}
{'origin_loss': 1.8752678632736206, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.19}
{'origin_loss': 1.9761265516281128, 'mask_loss': 0.1634521484375, 'mask_rate': 0.404296875, 'epoch': 0.19}
{'origin_loss': 2.09222412109375, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.19}
{'origin_loss': 1.8841907978057861, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.19}
{'origin_loss': 1.9219372272491455, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.19}
{'origin_loss': 2.1372690200805664, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.19}
{'loss': 202.9536, 'grad_norm': 0.33008164167404175, 'learning_rate': 1.46e-06, 'epoch': 0.19}
{'origin_loss': 1.8250908851623535, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.19}
{'origin_loss': 1.9840105772018433, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.19}
{'origin_loss': 1.7591347694396973, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.19}
{'origin_loss': 1.8271925449371338, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.19}
{'origin_loss': 1.8758533000946045, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.19}
{'origin_loss': 2.0354058742523193, 'mask_loss': 0.1622314453125, 'mask_rate': 0.40283203125, 'epoch': 0.19}
{'origin_loss': 1.898709774017334, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.19}
{'origin_loss': 1.928271770477295, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.19}
{'loss': 205.8448, 'grad_norm': 0.3317142426967621, 'learning_rate': 1.44e-06, 'epoch': 0.19}
{'origin_loss': 2.1348443031311035, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.19}
{'origin_loss': 1.8393757343292236, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.19}
{'origin_loss': 2.0888376235961914, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.19}
{'origin_loss': 1.8505961894989014, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.19}
{'origin_loss': 1.7864704132080078, 'mask_loss': 0.1572265625, 'mask_rate': 0.396484375, 'epoch': 0.19}
{'origin_loss': 1.7859156131744385, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.19}
{'origin_loss': 1.952514886856079, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.19}
{'origin_loss': 1.7935038805007935, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.19}
{'loss': 204.2009, 'grad_norm': 0.384213387966156, 'learning_rate': 1.42e-06, 'epoch': 0.19}
{'origin_loss': 2.0122640132904053, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.19}
{'origin_loss': 2.278106927871704, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.19}
{'origin_loss': 2.078627347946167, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.19}
{'origin_loss': 1.9359960556030273, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.19}
{'origin_loss': 2.083418607711792, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.19}
{'origin_loss': 1.8280251026153564, 'mask_loss': 0.1796875, 'mask_rate': 0.423828125, 'epoch': 0.19}
{'origin_loss': 2.168452262878418, 'mask_loss': 0.1630859375, 'mask_rate': 0.40380859375, 'epoch': 0.19}
{'origin_loss': 1.8475894927978516, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.19}
{'loss': 206.8572, 'grad_norm': 0.3332980275154114, 'learning_rate': 1.4000000000000001e-06, 'epoch': 0.19}
{'origin_loss': 1.8157531023025513, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.19}
{'origin_loss': 2.0140182971954346, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.19}
{'origin_loss': 2.0542447566986084, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.19}
{'origin_loss': 2.0652031898498535, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.19}
{'origin_loss': 2.2684497833251953, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.19}
{'origin_loss': 1.969045639038086, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.19}
{'origin_loss': 1.8766120672225952, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.19}
{'origin_loss': 2.091813325881958, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.19}
{'loss': 206.4256, 'grad_norm': 0.3595847189426422, 'learning_rate': 1.3800000000000001e-06, 'epoch': 0.19}
{'origin_loss': 1.531602144241333, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.19}
{'origin_loss': 2.075960874557495, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.19}
{'origin_loss': 2.1169259548187256, 'mask_loss': 0.1646728515625, 'mask_rate': 0.40576171875, 'epoch': 0.19}
{'origin_loss': 2.00034499168396, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.19}
{'origin_loss': 2.0657124519348145, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.19}
{'origin_loss': 1.8994457721710205, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.19}
{'origin_loss': 1.9036130905151367, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.19}
{'origin_loss': 1.9313303232192993, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.19}
{'loss': 206.9719, 'grad_norm': 0.32485970854759216, 'learning_rate': 1.3600000000000001e-06, 'epoch': 0.19}
{'origin_loss': 1.9837098121643066, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.19}
{'origin_loss': 1.9734232425689697, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.19}
{'origin_loss': 1.987062931060791, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.19}
{'origin_loss': 1.8147932291030884, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.19}
{'origin_loss': 1.947851300239563, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.19}
{'origin_loss': 1.8246712684631348, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.19}
{'origin_loss': 1.9339646100997925, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.19}
{'origin_loss': 2.0299453735351562, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.19}
{'loss': 206.9994, 'grad_norm': 0.33544695377349854, 'learning_rate': 1.34e-06, 'epoch': 0.19}
{'origin_loss': 2.000460624694824, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.19}
{'origin_loss': 1.9248639345169067, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.19}
{'origin_loss': 1.9083678722381592, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.19}
{'origin_loss': 1.930390477180481, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.19}
{'origin_loss': 2.138221025466919, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.19}
{'origin_loss': 2.1218276023864746, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.19}
{'origin_loss': 1.999977707862854, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.19}
{'origin_loss': 1.9756534099578857, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.19}
{'loss': 207.6093, 'grad_norm': 0.35496416687965393, 'learning_rate': 1.32e-06, 'epoch': 0.19}
{'origin_loss': 1.8994115591049194, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.19}
{'origin_loss': 2.058349132537842, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.19}
{'origin_loss': 2.1185173988342285, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.19}
{'origin_loss': 1.8531150817871094, 'mask_loss': 0.164794921875, 'mask_rate': 0.406005859375, 'epoch': 0.19}
{'origin_loss': 1.8777439594268799, 'mask_loss': 0.1644287109375, 'mask_rate': 0.405517578125, 'epoch': 0.19}
{'origin_loss': 1.9828077554702759, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.19}
{'origin_loss': 1.866995096206665, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.19}
{'origin_loss': 2.1574113368988037, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.19}
{'loss': 205.1174, 'grad_norm': 0.32649996876716614, 'learning_rate': 1.3e-06, 'epoch': 0.19}
{'origin_loss': 1.8771222829818726, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.19}
{'origin_loss': 1.8994039297103882, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.19}
{'origin_loss': 1.8666672706604004, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.19}
{'origin_loss': 1.8253252506256104, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.19}
{'origin_loss': 1.668508768081665, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.19}
{'origin_loss': 2.0228283405303955, 'mask_loss': 0.164306640625, 'mask_rate': 0.4052734375, 'epoch': 0.19}
{'origin_loss': 1.739427089691162, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.19}
{'origin_loss': 1.9496760368347168, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.19}
{'loss': 205.9967, 'grad_norm': 0.3510197401046753, 'learning_rate': 1.28e-06, 'epoch': 0.19}
{'origin_loss': 2.070765972137451, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.19}
{'origin_loss': 1.9864511489868164, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.19}
{'origin_loss': 2.1057088375091553, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.19}
{'origin_loss': 1.7998353242874146, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.19}
{'origin_loss': 1.9413650035858154, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.19}
{'origin_loss': 1.9838982820510864, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.19}
{'origin_loss': 2.1704893112182617, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.19}
{'origin_loss': 1.969495177268982, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.19}
{'loss': 207.1129, 'grad_norm': 0.6078552007675171, 'learning_rate': 1.26e-06, 'epoch': 0.19}
{'origin_loss': 1.9033883810043335, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.19}
{'origin_loss': 1.962054967880249, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.19}
{'origin_loss': 1.6643728017807007, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.19}
{'origin_loss': 2.042919635772705, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.19}
{'origin_loss': 1.94314444065094, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.19}
{'origin_loss': 2.087223768234253, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.19}
{'origin_loss': 2.099547863006592, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.19}
{'origin_loss': 1.9523371458053589, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.19}
{'loss': 207.3944, 'grad_norm': 0.3186427354812622, 'learning_rate': 1.2400000000000002e-06, 'epoch': 0.19}
{'origin_loss': 1.9340659379959106, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.19}
{'origin_loss': 2.1626832485198975, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.19}
{'origin_loss': 1.9016376733779907, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.19}
{'origin_loss': 1.9092270135879517, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.19}
{'origin_loss': 1.9055737257003784, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.19}
{'origin_loss': 1.9021719694137573, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.19}
{'origin_loss': 2.1170895099639893, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.19}
{'origin_loss': 2.0193276405334473, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.19}
{'loss': 207.4502, 'grad_norm': 0.3474067151546478, 'learning_rate': 1.2200000000000002e-06, 'epoch': 0.19}
{'origin_loss': 2.0869481563568115, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.19}
{'origin_loss': 2.024806499481201, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.19}
{'origin_loss': 1.964300513267517, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.19}
{'origin_loss': 1.9819612503051758, 'mask_loss': 0.163818359375, 'mask_rate': 0.40478515625, 'epoch': 0.19}
{'origin_loss': 2.11666202545166, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.19}
{'origin_loss': 2.104461431503296, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.19}
{'origin_loss': 2.096431255340576, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.19}
{'origin_loss': 1.9473294019699097, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.19}
{'loss': 206.306, 'grad_norm': 0.3572363257408142, 'learning_rate': 1.2000000000000002e-06, 'epoch': 0.19}
{'origin_loss': 1.9302211999893188, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.19}
{'origin_loss': 1.8550996780395508, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.19}
{'origin_loss': 1.9514906406402588, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.19}
{'origin_loss': 1.4809021949768066, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.19}
{'origin_loss': 1.8458993434906006, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.19}
{'origin_loss': 1.9253464937210083, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.19}
{'origin_loss': 2.0510382652282715, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.19}
{'origin_loss': 2.0088655948638916, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.19}
{'loss': 208.3342, 'grad_norm': 0.3475044071674347, 'learning_rate': 1.1800000000000001e-06, 'epoch': 0.19}
{'origin_loss': 1.8884695768356323, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.19}
{'origin_loss': 2.011509895324707, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.19}
{'origin_loss': 1.902747392654419, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.19}
{'origin_loss': 2.0972073078155518, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.19}
{'origin_loss': 2.103883743286133, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.19}
{'origin_loss': 1.9859373569488525, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.19}
{'origin_loss': 1.8003807067871094, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.19}
{'origin_loss': 2.217985153198242, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.19}
{'loss': 209.0635, 'grad_norm': 0.34865427017211914, 'learning_rate': 1.1600000000000001e-06, 'epoch': 0.19}
{'origin_loss': 1.9557316303253174, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.19}
{'origin_loss': 1.9392924308776855, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.19}
{'origin_loss': 2.0507149696350098, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.19}
{'origin_loss': 1.902740240097046, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.19}
{'origin_loss': 2.009082794189453, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.19}
{'origin_loss': 1.9077657461166382, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.19}
{'origin_loss': 1.7612546682357788, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.19}
{'origin_loss': 2.0579190254211426, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.19}
{'loss': 207.6356, 'grad_norm': 0.3291451036930084, 'learning_rate': 1.14e-06, 'epoch': 0.19}
{'origin_loss': 2.198141574859619, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.19}
{'origin_loss': 2.054067850112915, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.19}
{'origin_loss': 1.9807664155960083, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.19}
{'origin_loss': 2.0867362022399902, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.19}
{'origin_loss': 1.8897998332977295, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.19}
{'origin_loss': 1.856699824333191, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.19}
{'origin_loss': 2.1117451190948486, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.19}
{'origin_loss': 1.9873114824295044, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.19}
{'loss': 208.8019, 'grad_norm': 0.32519015669822693, 'learning_rate': 1.12e-06, 'epoch': 0.19}
{'origin_loss': 1.9486385583877563, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.19}
{'origin_loss': 1.9833382368087769, 'mask_loss': 0.1627197265625, 'mask_rate': 0.4033203125, 'epoch': 0.19}
{'origin_loss': 1.944450855255127, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.19}
{'origin_loss': 1.990684151649475, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.19}
{'origin_loss': 1.8997032642364502, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.19}
{'origin_loss': 2.0898513793945312, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.19}
{'origin_loss': 2.0094621181488037, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.19}
{'origin_loss': 2.25449275970459, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.19}
{'loss': 206.3745, 'grad_norm': 0.3475866913795471, 'learning_rate': 1.1e-06, 'epoch': 0.19}
{'origin_loss': 2.1280763149261475, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.19}
{'origin_loss': 2.065315008163452, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.19}
{'origin_loss': 2.1195592880249023, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.19}
{'origin_loss': 1.8929821252822876, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.19}
{'origin_loss': 1.9508198499679565, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.19}
{'origin_loss': 1.9644032716751099, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.19}
{'origin_loss': 2.0547664165496826, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.19}
{'origin_loss': 2.0033745765686035, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.19}
{'loss': 209.7099, 'grad_norm': 0.3779212534427643, 'learning_rate': 1.08e-06, 'epoch': 0.19}
{'origin_loss': 1.9573990106582642, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.19}
{'origin_loss': 2.016824245452881, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.19}
{'origin_loss': 2.197072982788086, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.19}
{'origin_loss': 1.6432504653930664, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.19}
{'origin_loss': 2.11970853805542, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.19}
{'origin_loss': 1.9206262826919556, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.19}
{'origin_loss': 1.9657844305038452, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.19}
{'origin_loss': 2.0068819522857666, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.19}
{'loss': 209.291, 'grad_norm': 0.35475966334342957, 'learning_rate': 1.06e-06, 'epoch': 0.2}
{'origin_loss': 2.050689220428467, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.2}
{'origin_loss': 1.8704876899719238, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.2}
{'origin_loss': 2.1811678409576416, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.2}
{'origin_loss': 1.9851354360580444, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.2}
{'origin_loss': 2.0036964416503906, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.2}
{'origin_loss': 2.0620498657226562, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.2}
{'origin_loss': 2.0102765560150146, 'mask_loss': 0.1666259765625, 'mask_rate': 0.408203125, 'epoch': 0.2}
{'origin_loss': 1.9099746942520142, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.2}
{'loss': 206.2592, 'grad_norm': 0.32703033089637756, 'learning_rate': 1.04e-06, 'epoch': 0.2}
{'origin_loss': 1.8978257179260254, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.2}
{'origin_loss': 1.9215152263641357, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.2}
{'origin_loss': 1.9771111011505127, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.2}
{'origin_loss': 2.0848004817962646, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.2}
{'origin_loss': 1.9386695623397827, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.2}
{'origin_loss': 2.097564220428467, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.2}
{'origin_loss': 1.7898224592208862, 'mask_loss': 0.1776123046875, 'mask_rate': 0.42138671875, 'epoch': 0.2}
{'origin_loss': 1.970808744430542, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.2}
{'loss': 209.116, 'grad_norm': 0.3410107493400574, 'learning_rate': 1.02e-06, 'epoch': 0.2}
{'origin_loss': 2.1151223182678223, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.2}
{'origin_loss': 2.1307032108306885, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.2}
{'origin_loss': 2.13053297996521, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.2}
{'origin_loss': 2.013282299041748, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.2}
{'origin_loss': 1.960646629333496, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.2}
{'origin_loss': 2.006866931915283, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.2}
{'origin_loss': 1.7743945121765137, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.2}
{'origin_loss': 2.163364887237549, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.2}
{'loss': 210.1775, 'grad_norm': 0.840883731842041, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.2}
{'origin_loss': 2.0864346027374268, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.2}
{'origin_loss': 1.8968467712402344, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.2}
{'origin_loss': 2.1712429523468018, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.2}
{'origin_loss': 1.938796877861023, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.2}
{'origin_loss': 2.088408946990967, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.2}
{'origin_loss': 2.1800479888916016, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.2}
{'origin_loss': 1.9831238985061646, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.2}
{'origin_loss': 1.748779535293579, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.2}
{'loss': 210.0898, 'grad_norm': 0.3483407199382782, 'learning_rate': 9.800000000000001e-07, 'epoch': 0.2}
{'origin_loss': 2.018402576446533, 'mask_loss': 0.166259765625, 'mask_rate': 0.40771484375, 'epoch': 0.2}
{'origin_loss': 1.9895514249801636, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.2}
{'origin_loss': 2.0468993186950684, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.2}
{'origin_loss': 2.1201088428497314, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.2}
{'origin_loss': 2.1741514205932617, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.2}
{'origin_loss': 1.9912735223770142, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.2}
{'origin_loss': 2.0065722465515137, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.2}
{'origin_loss': 2.071587562561035, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.2}
{'loss': 210.2242, 'grad_norm': 0.32418638467788696, 'learning_rate': 9.600000000000001e-07, 'epoch': 0.2}
{'origin_loss': 2.029231071472168, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.2}
{'origin_loss': 2.0217084884643555, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.2}
{'origin_loss': 1.9983340501785278, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.2}
{'origin_loss': 1.8878133296966553, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.2}
{'origin_loss': 2.3407294750213623, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.2}
{'origin_loss': 2.0638105869293213, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.2}
{'origin_loss': 2.083277940750122, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.2}
{'origin_loss': 1.8562713861465454, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.2}
{'loss': 208.7539, 'grad_norm': 0.34209519624710083, 'learning_rate': 9.400000000000001e-07, 'epoch': 0.2}
{'origin_loss': 1.8678507804870605, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.2}
{'origin_loss': 1.913698434829712, 'mask_loss': 0.166015625, 'mask_rate': 0.407470703125, 'epoch': 0.2}
{'origin_loss': 1.9521745443344116, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.2}
{'origin_loss': 1.887878656387329, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.2}
{'origin_loss': 1.9391252994537354, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.2}
{'origin_loss': 1.8775933980941772, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.2}
{'origin_loss': 1.8934638500213623, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.2}
{'origin_loss': 1.8707876205444336, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.2}
{'loss': 208.6816, 'grad_norm': 0.3512159287929535, 'learning_rate': 9.200000000000001e-07, 'epoch': 0.2}
{'origin_loss': 1.876132845878601, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.2}
{'origin_loss': 1.7847071886062622, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.2}
{'origin_loss': 1.988862156867981, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.2}
{'origin_loss': 2.0282318592071533, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.2}
{'origin_loss': 2.2218892574310303, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.2}
{'origin_loss': 2.0560169219970703, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.2}
{'origin_loss': 1.8399183750152588, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.2}
{'origin_loss': 1.7969017028808594, 'mask_loss': 0.1640625, 'mask_rate': 0.405029296875, 'epoch': 0.2}
{'loss': 210.6678, 'grad_norm': 0.35193362832069397, 'learning_rate': 9.000000000000001e-07, 'epoch': 0.2}
{'origin_loss': 2.108956813812256, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.2}
{'origin_loss': 2.1225264072418213, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.2}
{'origin_loss': 1.96384859085083, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.2}
{'origin_loss': 1.9101415872573853, 'mask_loss': 0.17822265625, 'mask_rate': 0.422119140625, 'epoch': 0.2}
{'origin_loss': 1.9481645822525024, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.2}
{'origin_loss': 1.8614252805709839, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.2}
{'origin_loss': 1.8554085493087769, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.2}
{'origin_loss': 2.106696367263794, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.2}
{'loss': 210.6721, 'grad_norm': 0.3333529829978943, 'learning_rate': 8.8e-07, 'epoch': 0.2}
{'origin_loss': 1.8244800567626953, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.2}
{'origin_loss': 2.1821961402893066, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.2}
{'origin_loss': 1.7907027006149292, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.2}
{'origin_loss': 2.0229876041412354, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.2}
{'origin_loss': 2.123537063598633, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.2}
{'origin_loss': 1.987580418586731, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.2}
{'origin_loss': 1.9246991872787476, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.2}
{'origin_loss': 2.1676485538482666, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.2}
{'loss': 208.8311, 'grad_norm': 0.3397914469242096, 'learning_rate': 8.6e-07, 'epoch': 0.2}
{'origin_loss': 2.021871328353882, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.2}
{'origin_loss': 2.1139180660247803, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.2}
{'origin_loss': 1.9741733074188232, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.2}
{'origin_loss': 2.0415616035461426, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.2}
{'origin_loss': 2.029512405395508, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.2}
{'origin_loss': 1.9086048603057861, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.2}
{'origin_loss': 1.7993433475494385, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.2}
{'origin_loss': 2.2013051509857178, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.2}
{'loss': 208.2144, 'grad_norm': 0.31238821148872375, 'learning_rate': 8.400000000000001e-07, 'epoch': 0.2}
{'origin_loss': 1.8830714225769043, 'mask_loss': 0.165283203125, 'mask_rate': 0.406494140625, 'epoch': 0.2}
{'origin_loss': 1.91257905960083, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.2}
{'origin_loss': 2.0898163318634033, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.2}
{'origin_loss': 2.2241551876068115, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.2}
{'origin_loss': 2.0533854961395264, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.2}
{'origin_loss': 1.7890546321868896, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.2}
{'origin_loss': 2.0677483081817627, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.2}
{'origin_loss': 2.0440962314605713, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.2}
{'loss': 208.1642, 'grad_norm': 0.3183729350566864, 'learning_rate': 8.200000000000001e-07, 'epoch': 0.2}
{'origin_loss': 1.8811559677124023, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.2}
{'origin_loss': 2.044654607772827, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.2}
{'origin_loss': 1.979321002960205, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.2}
{'origin_loss': 1.882920503616333, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.2}
{'origin_loss': 2.052471160888672, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.2}
{'origin_loss': 1.9077041149139404, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.2}
{'origin_loss': 1.9064947366714478, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.2}
{'origin_loss': 1.9392749071121216, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.2}
{'loss': 209.4492, 'grad_norm': 0.34720972180366516, 'learning_rate': 8.000000000000001e-07, 'epoch': 0.2}
{'origin_loss': 2.0187289714813232, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.2}
{'origin_loss': 1.9675798416137695, 'mask_loss': 0.1776123046875, 'mask_rate': 0.42138671875, 'epoch': 0.2}
{'origin_loss': 2.0543863773345947, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.2}
{'origin_loss': 2.035392999649048, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.2}
{'origin_loss': 1.929381012916565, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.2}
{'origin_loss': 1.9748482704162598, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.2}
{'origin_loss': 1.9585191011428833, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.2}
{'origin_loss': 2.1771252155303955, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.2}
{'loss': 212.1082, 'grad_norm': 0.34082722663879395, 'learning_rate': 7.8e-07, 'epoch': 0.2}
{'origin_loss': 2.1086301803588867, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.2}
{'origin_loss': 1.9474447965621948, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.2}
{'origin_loss': 1.959256649017334, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.2}
{'origin_loss': 1.8065847158432007, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.2}
{'origin_loss': 2.1206705570220947, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.2}
{'origin_loss': 2.006009578704834, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.2}
{'origin_loss': 1.8281763792037964, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.2}
{'origin_loss': 2.2664310932159424, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.2}
{'loss': 208.3648, 'grad_norm': 0.3400407135486603, 'learning_rate': 7.6e-07, 'epoch': 0.2}
{'origin_loss': 1.9465124607086182, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.2}
{'origin_loss': 1.987234115600586, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.2}
{'origin_loss': 2.0020089149475098, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.2}
{'origin_loss': 2.125649929046631, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.2}
{'origin_loss': 2.0946764945983887, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.2}
{'origin_loss': 2.1620211601257324, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.2}
{'origin_loss': 1.9471336603164673, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.2}
{'origin_loss': 1.8690301179885864, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.2}
{'loss': 210.5012, 'grad_norm': 0.3560314476490021, 'learning_rate': 7.4e-07, 'epoch': 0.2}
{'origin_loss': 1.9441304206848145, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.2}
{'origin_loss': 2.1157023906707764, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.2}
{'origin_loss': 2.172617197036743, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.2}
{'origin_loss': 1.97114098072052, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.2}
{'origin_loss': 1.7715866565704346, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.2}
{'origin_loss': 2.2202565670013428, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.2}
{'origin_loss': 2.0365428924560547, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.2}
{'origin_loss': 2.162641763687134, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.2}
{'loss': 208.1274, 'grad_norm': 0.33310666680336, 'learning_rate': 7.2e-07, 'epoch': 0.2}
{'origin_loss': 1.792255163192749, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.2}
{'origin_loss': 2.1300439834594727, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.2}
{'origin_loss': 2.2421555519104004, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.2}
{'origin_loss': 1.821986198425293, 'mask_loss': 0.177978515625, 'mask_rate': 0.421875, 'epoch': 0.2}
{'origin_loss': 1.9773491621017456, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.2}
{'origin_loss': 1.9921691417694092, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.2}
{'origin_loss': 1.9004698991775513, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.2}
{'origin_loss': 2.0580859184265137, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.2}
{'loss': 208.8956, 'grad_norm': 0.31315940618515015, 'learning_rate': 7.000000000000001e-07, 'epoch': 0.2}
{'origin_loss': 1.8161709308624268, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.2}
{'origin_loss': 1.9909387826919556, 'mask_loss': 0.177978515625, 'mask_rate': 0.421875, 'epoch': 0.2}
{'origin_loss': 1.9222149848937988, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.2}
{'origin_loss': 1.8373206853866577, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.2}
{'origin_loss': 2.017888069152832, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.2}
{'origin_loss': 2.0278561115264893, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.2}
{'origin_loss': 2.0611026287078857, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.2}
{'origin_loss': 1.9679850339889526, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.2}
{'loss': 212.1427, 'grad_norm': 0.4202367663383484, 'learning_rate': 6.800000000000001e-07, 'epoch': 0.2}
{'origin_loss': 1.7136032581329346, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.2}
{'origin_loss': 1.9238157272338867, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.2}
{'origin_loss': 1.872857928276062, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.2}
{'origin_loss': 2.026658058166504, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.2}
{'origin_loss': 2.129692554473877, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.2}
{'origin_loss': 1.8075029850006104, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.2}
{'origin_loss': 1.9236657619476318, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.2}
{'origin_loss': 1.9609761238098145, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.2}
{'loss': 210.0292, 'grad_norm': 0.38926979899406433, 'learning_rate': 6.6e-07, 'epoch': 0.2}
{'origin_loss': 2.0613059997558594, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.2}
{'origin_loss': 1.915578007698059, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.2}
{'origin_loss': 1.9187390804290771, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.2}
{'origin_loss': 1.8940718173980713, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.2}
{'origin_loss': 2.112423896789551, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.2}
{'origin_loss': 1.914926290512085, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.2}
{'origin_loss': 2.0452187061309814, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.2}
{'origin_loss': 1.779685139656067, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.2}
{'loss': 210.0334, 'grad_norm': 0.5513574481010437, 'learning_rate': 6.4e-07, 'epoch': 0.2}
{'origin_loss': 2.096540689468384, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.2}
{'origin_loss': 2.0270419120788574, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.2}
{'origin_loss': 2.0322914123535156, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.2}
{'origin_loss': 1.7566542625427246, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.2}
{'origin_loss': 1.9155104160308838, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.2}
{'origin_loss': 1.8745492696762085, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.2}
{'origin_loss': 1.796225666999817, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.2}
{'origin_loss': 2.067302703857422, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.2}
{'loss': 210.9614, 'grad_norm': 0.3488976061344147, 'learning_rate': 6.200000000000001e-07, 'epoch': 0.2}
{'origin_loss': 2.177957534790039, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.2}
{'origin_loss': 2.134120225906372, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.2}
{'origin_loss': 1.9472366571426392, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.2}
{'origin_loss': 2.0397677421569824, 'mask_loss': 0.1690673828125, 'mask_rate': 0.4111328125, 'epoch': 0.2}
{'origin_loss': 1.9191839694976807, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.2}
{'origin_loss': 2.0425870418548584, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.2}
{'origin_loss': 1.9564847946166992, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.2}
{'origin_loss': 2.135640859603882, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.2}
{'loss': 210.2785, 'grad_norm': 0.3232079744338989, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.21}
{'origin_loss': 1.987405776977539, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.21}
{'origin_loss': 2.155423402786255, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.21}
{'origin_loss': 1.768771767616272, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.21}
{'origin_loss': 1.6583126783370972, 'mask_loss': 0.179443359375, 'mask_rate': 0.423583984375, 'epoch': 0.21}
{'origin_loss': 1.88111412525177, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.21}
{'origin_loss': 1.9910811185836792, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.21}
{'origin_loss': 1.8821765184402466, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.21}
{'origin_loss': 2.034710168838501, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.21}
{'loss': 211.4355, 'grad_norm': 0.3364104628562927, 'learning_rate': 5.800000000000001e-07, 'epoch': 0.21}
{'origin_loss': 1.9069814682006836, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.21}
{'origin_loss': 1.901776671409607, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.21}
{'origin_loss': 2.0509376525878906, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.21}
{'origin_loss': 1.9817087650299072, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.21}
{'origin_loss': 1.80290949344635, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.21}
{'origin_loss': 1.9088757038116455, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.21}
{'origin_loss': 2.0186047554016113, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.21}
{'origin_loss': 2.171152353286743, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.21}
{'loss': 210.4054, 'grad_norm': 0.3472057580947876, 'learning_rate': 5.6e-07, 'epoch': 0.21}
{'origin_loss': 1.8949689865112305, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.21}
{'origin_loss': 1.914319634437561, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.21}
{'origin_loss': 1.9822287559509277, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.21}
{'origin_loss': 2.190673351287842, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.21}
{'origin_loss': 1.9921263456344604, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.21}
{'origin_loss': 2.16192889213562, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.21}
{'origin_loss': 2.084136962890625, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.21}
{'origin_loss': 2.1946167945861816, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.21}
{'loss': 208.4894, 'grad_norm': 0.3249620199203491, 'learning_rate': 5.4e-07, 'epoch': 0.21}
{'origin_loss': 2.0769078731536865, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.21}
{'origin_loss': 1.9081705808639526, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.21}
{'origin_loss': 2.0819873809814453, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.21}
{'origin_loss': 1.9964786767959595, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.21}
{'origin_loss': 2.066976308822632, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.21}
{'origin_loss': 2.1422853469848633, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.21}
{'origin_loss': 2.015066623687744, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.21}
{'origin_loss': 1.7982063293457031, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.21}
{'loss': 211.9951, 'grad_norm': 0.3192099332809448, 'learning_rate': 5.2e-07, 'epoch': 0.21}
{'origin_loss': 1.9990694522857666, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.21}
{'origin_loss': 1.973247766494751, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.21}
{'origin_loss': 1.9871244430541992, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.21}
{'origin_loss': 1.9894065856933594, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.21}
{'origin_loss': 1.9922783374786377, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.21}
{'origin_loss': 2.034808397293091, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.21}
{'origin_loss': 1.7520943880081177, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.21}
{'origin_loss': 1.839237928390503, 'mask_loss': 0.177001953125, 'mask_rate': 0.420654296875, 'epoch': 0.21}
{'loss': 212.0084, 'grad_norm': 0.3570456802845001, 'learning_rate': 5.000000000000001e-07, 'epoch': 0.21}
{'origin_loss': 1.949690818786621, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.21}
{'origin_loss': 1.9637335538864136, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.21}
{'origin_loss': 2.05397629737854, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.21}
{'origin_loss': 2.067875385284424, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.21}
{'origin_loss': 2.116095542907715, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.21}
{'origin_loss': 1.735028624534607, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.21}
{'origin_loss': 2.115725517272949, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.21}
{'origin_loss': 2.042285203933716, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.21}
{'loss': 211.1305, 'grad_norm': 0.3409837782382965, 'learning_rate': 4.800000000000001e-07, 'epoch': 0.21}
{'origin_loss': 2.1263272762298584, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.21}
{'origin_loss': 1.801046371459961, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.21}
{'origin_loss': 2.0934090614318848, 'mask_loss': 0.1678466796875, 'mask_rate': 0.40966796875, 'epoch': 0.21}
{'origin_loss': 1.6411365270614624, 'mask_loss': 0.175537109375, 'mask_rate': 0.4189453125, 'epoch': 0.21}
{'origin_loss': 2.0519237518310547, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.21}
{'origin_loss': 2.111575126647949, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.21}
{'origin_loss': 2.0376877784729004, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.21}
{'origin_loss': 1.8637489080429077, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.21}
{'loss': 210.7471, 'grad_norm': 0.35120558738708496, 'learning_rate': 4.6000000000000004e-07, 'epoch': 0.21}
{'origin_loss': 1.8627599477767944, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.21}
{'origin_loss': 2.0888047218322754, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.21}
{'origin_loss': 1.8362568616867065, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.21}
{'origin_loss': 1.991524338722229, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.21}
{'origin_loss': 2.0393123626708984, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.21}
{'origin_loss': 2.1891753673553467, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.21}
{'origin_loss': 1.8844975233078003, 'mask_loss': 0.1676025390625, 'mask_rate': 0.409423828125, 'epoch': 0.21}
{'origin_loss': 2.0611135959625244, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.21}
{'loss': 212.1348, 'grad_norm': 0.3860446810722351, 'learning_rate': 4.4e-07, 'epoch': 0.21}
{'origin_loss': 1.9925681352615356, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.21}
{'origin_loss': 1.9661616086959839, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.21}
{'origin_loss': 1.897009015083313, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.21}
{'origin_loss': 1.946792721748352, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.21}
{'origin_loss': 1.9752528667449951, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.21}
{'origin_loss': 1.943542718887329, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.21}
{'origin_loss': 2.0458123683929443, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.21}
{'origin_loss': 2.044102907180786, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.21}
{'loss': 211.4139, 'grad_norm': 0.31394433975219727, 'learning_rate': 4.2000000000000006e-07, 'epoch': 0.21}
{'origin_loss': 1.883158802986145, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.21}
{'origin_loss': 1.7791953086853027, 'mask_loss': 0.1656494140625, 'mask_rate': 0.406982421875, 'epoch': 0.21}
{'origin_loss': 1.9001778364181519, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.21}
{'origin_loss': 2.0764412879943848, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.21}
{'origin_loss': 2.0371899604797363, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.21}
{'origin_loss': 1.9091746807098389, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.21}
{'origin_loss': 1.9710086584091187, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.21}
{'origin_loss': 1.998849868774414, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.21}
{'loss': 209.9288, 'grad_norm': 0.33813536167144775, 'learning_rate': 4.0000000000000003e-07, 'epoch': 0.21}
{'origin_loss': 1.7153685092926025, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.21}
{'origin_loss': 1.939346194267273, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.21}
{'origin_loss': 2.195152759552002, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.21}
{'origin_loss': 1.845603585243225, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.21}
{'origin_loss': 1.7187376022338867, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.21}
{'origin_loss': 2.0013222694396973, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.21}
{'origin_loss': 1.9029611349105835, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.21}
{'origin_loss': 2.084611415863037, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.21}
{'loss': 211.3473, 'grad_norm': 0.7442164421081543, 'learning_rate': 3.8e-07, 'epoch': 0.21}
{'origin_loss': 1.9164750576019287, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.21}
{'origin_loss': 2.292310953140259, 'mask_loss': 0.1669921875, 'mask_rate': 0.40869140625, 'epoch': 0.21}
{'origin_loss': 2.0618550777435303, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.21}
{'origin_loss': 2.1062119007110596, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.21}
{'origin_loss': 1.9540340900421143, 'mask_loss': 0.1771240234375, 'mask_rate': 0.4208984375, 'epoch': 0.21}
{'origin_loss': 1.9828579425811768, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.21}
{'origin_loss': 2.0070343017578125, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.21}
{'origin_loss': 1.892228364944458, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.21}
{'loss': 210.6204, 'grad_norm': 0.31701019406318665, 'learning_rate': 3.6e-07, 'epoch': 0.21}
{'origin_loss': 2.0824291706085205, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.21}
{'origin_loss': 1.7821351289749146, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.21}
{'origin_loss': 2.0510571002960205, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.21}
{'origin_loss': 1.9168152809143066, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.21}
{'origin_loss': 1.8677955865859985, 'mask_loss': 0.177001953125, 'mask_rate': 0.420654296875, 'epoch': 0.21}
{'origin_loss': 2.106548547744751, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.21}
{'origin_loss': 2.0224721431732178, 'mask_loss': 0.168212890625, 'mask_rate': 0.41015625, 'epoch': 0.21}
{'origin_loss': 1.983545184135437, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.21}
{'loss': 210.586, 'grad_norm': 0.3192342221736908, 'learning_rate': 3.4000000000000003e-07, 'epoch': 0.21}
{'origin_loss': 2.0092151165008545, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.21}
{'origin_loss': 1.9666616916656494, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.21}
{'origin_loss': 1.988951325416565, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.21}
{'origin_loss': 2.0854413509368896, 'mask_loss': 0.1668701171875, 'mask_rate': 0.408447265625, 'epoch': 0.21}
{'origin_loss': 1.961281180381775, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.21}
{'origin_loss': 1.8756214380264282, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.21}
{'origin_loss': 1.9772156476974487, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.21}
{'origin_loss': 1.8753169775009155, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.21}
{'loss': 210.8737, 'grad_norm': 0.31885236501693726, 'learning_rate': 3.2e-07, 'epoch': 0.21}
{'origin_loss': 2.1712772846221924, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.21}
{'origin_loss': 1.8122210502624512, 'mask_loss': 0.1650390625, 'mask_rate': 0.40625, 'epoch': 0.21}
{'origin_loss': 2.0581552982330322, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.21}
{'origin_loss': 1.8526623249053955, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.21}
{'origin_loss': 2.0403683185577393, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.21}
{'origin_loss': 2.2761728763580322, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.21}
{'origin_loss': 1.9559569358825684, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.21}
{'origin_loss': 2.0488297939300537, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.21}
{'loss': 209.9801, 'grad_norm': 0.3244754374027252, 'learning_rate': 3.0000000000000004e-07, 'epoch': 0.21}
{'origin_loss': 2.215769052505493, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.21}
{'origin_loss': 1.8641982078552246, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.21}
{'origin_loss': 2.118464231491089, 'mask_loss': 0.1773681640625, 'mask_rate': 0.421142578125, 'epoch': 0.21}
{'origin_loss': 2.052170753479004, 'mask_loss': 0.171875, 'mask_rate': 0.41455078125, 'epoch': 0.21}
{'origin_loss': 2.0237507820129395, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.21}
{'origin_loss': 2.2101492881774902, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.21}
{'origin_loss': 2.1013288497924805, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.21}
{'origin_loss': 1.892493486404419, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.21}
{'loss': 212.2317, 'grad_norm': 0.3307158648967743, 'learning_rate': 2.8e-07, 'epoch': 0.21}
{'origin_loss': 1.8573415279388428, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.21}
{'origin_loss': 1.9244941473007202, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.21}
{'origin_loss': 2.0641512870788574, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.21}
{'origin_loss': 2.0812346935272217, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.21}
{'origin_loss': 1.8969109058380127, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.21}
{'origin_loss': 2.1382205486297607, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.21}
{'origin_loss': 2.0625083446502686, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.21}
{'origin_loss': 2.051164388656616, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.21}
{'loss': 211.9626, 'grad_norm': 0.3460601568222046, 'learning_rate': 2.6e-07, 'epoch': 0.21}
{'origin_loss': 1.8435922861099243, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.21}
{'origin_loss': 1.8052122592926025, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.21}
{'origin_loss': 2.0043466091156006, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.21}
{'origin_loss': 1.960510492324829, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.21}
{'origin_loss': 2.0055501461029053, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.21}
{'origin_loss': 2.12615966796875, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.21}
{'origin_loss': 1.7835789918899536, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.21}
{'origin_loss': 2.1022534370422363, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.21}
{'loss': 212.032, 'grad_norm': 0.3830535411834717, 'learning_rate': 2.4000000000000003e-07, 'epoch': 0.21}
{'origin_loss': 2.1746230125427246, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.21}
{'origin_loss': 2.0987071990966797, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.21}
{'origin_loss': 2.016324281692505, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.21}
{'origin_loss': 1.9855754375457764, 'mask_loss': 0.171630859375, 'mask_rate': 0.414306640625, 'epoch': 0.21}
{'origin_loss': 1.9674155712127686, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.21}
{'origin_loss': 1.9337875843048096, 'mask_loss': 0.1759033203125, 'mask_rate': 0.41943359375, 'epoch': 0.21}
{'origin_loss': 1.9843175411224365, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.21}
{'origin_loss': 1.9926481246948242, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.21}
{'loss': 212.1911, 'grad_norm': 0.36809054017066956, 'learning_rate': 2.2e-07, 'epoch': 0.21}
{'origin_loss': 2.120490789413452, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.21}
{'origin_loss': 1.7486984729766846, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.21}
{'origin_loss': 2.087463855743408, 'mask_loss': 0.16845703125, 'mask_rate': 0.410400390625, 'epoch': 0.21}
{'origin_loss': 1.9182080030441284, 'mask_loss': 0.177734375, 'mask_rate': 0.421630859375, 'epoch': 0.21}
{'origin_loss': 1.963320255279541, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.21}
{'origin_loss': 2.099775552749634, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.21}
{'origin_loss': 1.859483242034912, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.21}
{'origin_loss': 2.1633992195129395, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.21}
{'loss': 212.0576, 'grad_norm': 0.31410640478134155, 'learning_rate': 2.0000000000000002e-07, 'epoch': 0.21}
{'origin_loss': 1.8440369367599487, 'mask_loss': 0.1767578125, 'mask_rate': 0.42041015625, 'epoch': 0.21}
{'origin_loss': 1.7779881954193115, 'mask_loss': 0.170654296875, 'mask_rate': 0.4130859375, 'epoch': 0.21}
{'origin_loss': 1.9305474758148193, 'mask_loss': 0.1663818359375, 'mask_rate': 0.407958984375, 'epoch': 0.21}
{'origin_loss': 2.0584757328033447, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.21}
{'origin_loss': 2.0778610706329346, 'mask_loss': 0.17529296875, 'mask_rate': 0.418701171875, 'epoch': 0.21}
{'origin_loss': 1.8359211683273315, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.21}
{'origin_loss': 2.036701202392578, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.21}
{'origin_loss': 1.9311318397521973, 'mask_loss': 0.1728515625, 'mask_rate': 0.415771484375, 'epoch': 0.21}
{'loss': 211.6553, 'grad_norm': 0.35303643345832825, 'learning_rate': 1.8e-07, 'epoch': 0.21}
{'origin_loss': 2.0252416133880615, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.21}
{'origin_loss': 2.0160276889801025, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.21}
{'origin_loss': 1.961008906364441, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.21}
{'origin_loss': 2.1273317337036133, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.21}
{'origin_loss': 1.7967578172683716, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.21}
{'origin_loss': 2.115514039993286, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.21}
{'origin_loss': 1.959301471710205, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.21}
{'origin_loss': 2.1306300163269043, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.21}
{'loss': 211.9071, 'grad_norm': 0.3439498543739319, 'learning_rate': 1.6e-07, 'epoch': 0.21}
{'origin_loss': 2.2650697231292725, 'mask_loss': 0.174072265625, 'mask_rate': 0.417236328125, 'epoch': 0.21}
{'origin_loss': 1.9845091104507446, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.21}
{'origin_loss': 1.9104394912719727, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.21}
{'origin_loss': 1.9562253952026367, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.21}
{'origin_loss': 2.212529420852661, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.21}
{'origin_loss': 2.016207695007324, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.21}
{'origin_loss': 1.7799354791641235, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.21}
{'origin_loss': 2.1039371490478516, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.21}
{'loss': 211.888, 'grad_norm': 0.34124264121055603, 'learning_rate': 1.4e-07, 'epoch': 0.22}
{'origin_loss': 1.6744941473007202, 'mask_loss': 0.1654052734375, 'mask_rate': 0.40673828125, 'epoch': 0.22}
{'origin_loss': 1.9432165622711182, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.22}
{'origin_loss': 2.1934285163879395, 'mask_loss': 0.17041015625, 'mask_rate': 0.412841796875, 'epoch': 0.22}
{'origin_loss': 2.082216501235962, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.22}
{'origin_loss': 2.1739790439605713, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.22}
{'origin_loss': 1.8092341423034668, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.22}
{'origin_loss': 2.005174398422241, 'mask_loss': 0.177978515625, 'mask_rate': 0.421875, 'epoch': 0.22}
{'origin_loss': 2.1621971130371094, 'mask_loss': 0.1712646484375, 'mask_rate': 0.413818359375, 'epoch': 0.22}
{'loss': 210.5211, 'grad_norm': 0.32802242040634155, 'learning_rate': 1.2000000000000002e-07, 'epoch': 0.22}
{'origin_loss': 1.8101041316986084, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.22}
{'origin_loss': 1.7389891147613525, 'mask_loss': 0.1756591796875, 'mask_rate': 0.419189453125, 'epoch': 0.22}
{'origin_loss': 1.8691409826278687, 'mask_loss': 0.1685791015625, 'mask_rate': 0.41064453125, 'epoch': 0.22}
{'origin_loss': 1.9331306219100952, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.22}
{'origin_loss': 1.584062099456787, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.22}
{'origin_loss': 2.1475870609283447, 'mask_loss': 0.173095703125, 'mask_rate': 0.416015625, 'epoch': 0.22}
{'origin_loss': 1.8594168424606323, 'mask_loss': 0.1708984375, 'mask_rate': 0.413330078125, 'epoch': 0.22}
{'origin_loss': 2.0441126823425293, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.22}
{'loss': 210.7952, 'grad_norm': 0.36195433139801025, 'learning_rate': 1.0000000000000001e-07, 'epoch': 0.22}
{'origin_loss': 1.992283821105957, 'mask_loss': 0.16943359375, 'mask_rate': 0.41162109375, 'epoch': 0.22}
{'origin_loss': 2.063009023666382, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.22}
{'origin_loss': 2.166994333267212, 'mask_loss': 0.1700439453125, 'mask_rate': 0.412353515625, 'epoch': 0.22}
{'origin_loss': 1.9760043621063232, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.22}
{'origin_loss': 1.9454591274261475, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.22}
{'origin_loss': 2.0916619300842285, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.22}
{'origin_loss': 2.1811928749084473, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.22}
{'origin_loss': 2.135249614715576, 'mask_loss': 0.1688232421875, 'mask_rate': 0.410888671875, 'epoch': 0.22}
{'loss': 210.5065, 'grad_norm': 0.3141372501850128, 'learning_rate': 8e-08, 'epoch': 0.22}
{'origin_loss': 1.8321020603179932, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.22}
{'origin_loss': 2.0309793949127197, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.22}
{'origin_loss': 1.757678508758545, 'mask_loss': 0.175048828125, 'mask_rate': 0.41845703125, 'epoch': 0.22}
{'origin_loss': 2.169433355331421, 'mask_loss': 0.1732177734375, 'mask_rate': 0.416259765625, 'epoch': 0.22}
{'origin_loss': 1.69172203540802, 'mask_loss': 0.1763916015625, 'mask_rate': 0.419921875, 'epoch': 0.22}
{'origin_loss': 1.9825657606124878, 'mask_loss': 0.176513671875, 'mask_rate': 0.420166015625, 'epoch': 0.22}
{'origin_loss': 1.8993051052093506, 'mask_loss': 0.1761474609375, 'mask_rate': 0.419677734375, 'epoch': 0.22}
{'origin_loss': 1.9790104627609253, 'mask_loss': 0.167236328125, 'mask_rate': 0.408935546875, 'epoch': 0.22}
{'loss': 213.1053, 'grad_norm': 0.32945653796195984, 'learning_rate': 6.000000000000001e-08, 'epoch': 0.22}
{'origin_loss': 2.0954108238220215, 'mask_loss': 0.1658935546875, 'mask_rate': 0.4072265625, 'epoch': 0.22}
{'origin_loss': 1.8781225681304932, 'mask_loss': 0.177978515625, 'mask_rate': 0.421875, 'epoch': 0.22}
{'origin_loss': 1.9651951789855957, 'mask_loss': 0.169677734375, 'mask_rate': 0.411865234375, 'epoch': 0.22}
{'origin_loss': 2.047234535217285, 'mask_loss': 0.169189453125, 'mask_rate': 0.411376953125, 'epoch': 0.22}
{'origin_loss': 2.0468180179595947, 'mask_loss': 0.1719970703125, 'mask_rate': 0.414794921875, 'epoch': 0.22}
{'origin_loss': 1.991929292678833, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.22}
{'origin_loss': 1.8124314546585083, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.22}
{'origin_loss': 1.8943862915039062, 'mask_loss': 0.17431640625, 'mask_rate': 0.41748046875, 'epoch': 0.22}
{'loss': 210.0758, 'grad_norm': 0.365764319896698, 'learning_rate': 4e-08, 'epoch': 0.22}
{'origin_loss': 1.7113268375396729, 'mask_loss': 0.1746826171875, 'mask_rate': 0.41796875, 'epoch': 0.22}
{'origin_loss': 2.1002252101898193, 'mask_loss': 0.1710205078125, 'mask_rate': 0.41357421875, 'epoch': 0.22}
{'origin_loss': 2.100759983062744, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.22}
{'origin_loss': 2.1825554370880127, 'mask_loss': 0.1702880859375, 'mask_rate': 0.41259765625, 'epoch': 0.22}
{'origin_loss': 2.0892834663391113, 'mask_loss': 0.17138671875, 'mask_rate': 0.4140625, 'epoch': 0.22}
{'origin_loss': 1.9227205514907837, 'mask_loss': 0.172607421875, 'mask_rate': 0.41552734375, 'epoch': 0.22}
{'origin_loss': 2.0472748279571533, 'mask_loss': 0.173828125, 'mask_rate': 0.4169921875, 'epoch': 0.22}
{'origin_loss': 2.0017812252044678, 'mask_loss': 0.1697998046875, 'mask_rate': 0.412109375, 'epoch': 0.22}
{'loss': 211.4882, 'grad_norm': 0.31828057765960693, 'learning_rate': 2e-08, 'epoch': 0.22}
{'origin_loss': 2.1058671474456787, 'mask_loss': 0.1744384765625, 'mask_rate': 0.417724609375, 'epoch': 0.22}
{'origin_loss': 2.012956142425537, 'mask_loss': 0.1737060546875, 'mask_rate': 0.416748046875, 'epoch': 0.22}
{'origin_loss': 2.0406320095062256, 'mask_loss': 0.1722412109375, 'mask_rate': 0.4150390625, 'epoch': 0.22}
{'origin_loss': 2.0982584953308105, 'mask_loss': 0.1749267578125, 'mask_rate': 0.418212890625, 'epoch': 0.22}
{'origin_loss': 1.9487801790237427, 'mask_loss': 0.1734619140625, 'mask_rate': 0.41650390625, 'epoch': 0.22}
{'origin_loss': 2.023254632949829, 'mask_loss': 0.1724853515625, 'mask_rate': 0.415283203125, 'epoch': 0.22}
{'origin_loss': 2.23879075050354, 'mask_loss': 0.16796875, 'mask_rate': 0.409912109375, 'epoch': 0.22}
{'origin_loss': 1.9900197982788086, 'mask_loss': 0.16748046875, 'mask_rate': 0.4091796875, 'epoch': 0.22}
{'loss': 211.3229, 'grad_norm': 0.3420022428035736, 'learning_rate': 0.0, 'epoch': 0.22}
{'train_runtime': 55075.0125, 'train_samples_per_second': 0.581, 'train_steps_per_second': 0.009, 'train_loss': 199.97795690917968, 'epoch': 0.22}
Saving merged model with custom attention parameters...
wandb: wandb: You can sync this run to the cloud by running:wandb: wandb sync /inspire/hdd/project/heziweiproject/heziwei-25044/projects_lmlu/kvcache/wandb/offline-run-20251203_113547-8mt2ofwbwandb: Find logs at: wandb/offline-run-20251203_113547-8mt2ofwb/logs