taindp98 commited on
Commit
401bab3
·
verified ·
1 Parent(s): e617a89

Add files using upload-large-folder tool

Browse files
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "<im_end>": 32003,
3
+ "<im_patch>": 32001,
4
+ "<im_start>": 32002,
5
+ "[PAD]": 32000
6
+ }
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/netscratch/duynguyen/Research/Nghiem_LLaVA-Med/LVLM-Med/models/checkpoint_llava_med_instruct_60k_inline_mention_version_1-5_1e0_multi_graph_100_scale_dci_test_bugfix",
3
+ "architectures": [
4
+ "LlavaLlamaForCausalLM"
5
+ ],
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 1,
8
+ "freeze_mm_mlp_adapter": false,
9
+ "graph_num_features": 4096,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 11008,
14
+ "max_sequence_length": 2048,
15
+ "mm_hidden_size": 3072,
16
+ "mm_projector_type": "mlp2x_gelu",
17
+ "mm_use_im_start_end": true,
18
+ "mm_vision_select_layer": -2,
19
+ "mm_vision_tower": "openai/clip-vit-large-patch14",
20
+ "model_type": "llava",
21
+ "more_mlp": false,
22
+ "multi_graph": true,
23
+ "num_attention_heads": 32,
24
+ "num_hidden_layers": 32,
25
+ "pad_token_id": -1,
26
+ "qformer_path": "/netscratch/trnguyen/instructBLIP_checkpoint/blip2_pretrained_vitL.pth",
27
+ "remove_graph": false,
28
+ "rms_norm_eps": 1e-06,
29
+ "tie_word_embeddings": false,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.28.0.dev0",
32
+ "tune_mm_mlp_adapter": false,
33
+ "unify": true,
34
+ "use_cache": false,
35
+ "use_mm_proj": true,
36
+ "vocab_size": 32004
37
+ }
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": -1,
6
+ "transformers_version": "4.28.0.dev0",
7
+ "use_cache": false
8
+ }
pytorch_model-00001-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51bdfb88bfd1266693bbd421888040f1b1daa1b173c07b1680cb79efc6475a47
3
+ size 9878055609
pytorch_model-00002-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eb5a32e749b567c63eed72cef30595c9045a5861d0d51045241d1d00699251d
3
+ size 9894801501
pytorch_model-00003-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a276345aef3cce558f936e66af401702f1cefbf3f3173f9c68aa8fce6d8f02b7
3
+ size 7298531288
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 27071275008
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "pytorch_model-00003-of-00003.bin",
7
+ "model.embed_tokens.weight": "pytorch_model-00001-of-00003.bin",
8
+ "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
9
+ "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
10
+ "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
11
+ "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
12
+ "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
13
+ "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
14
+ "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
15
+ "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
16
+ "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
17
+ "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
18
+ "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
19
+ "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
20
+ "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
21
+ "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
22
+ "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
23
+ "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
24
+ "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
25
+ "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
26
+ "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
27
+ "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
28
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
29
+ "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
30
+ "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
31
+ "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
32
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
33
+ "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
34
+ "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
35
+ "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
36
+ "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
37
+ "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
38
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
39
+ "model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
40
+ "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
41
+ "model.layers.11.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
42
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
43
+ "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
44
+ "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
45
+ "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
46
+ "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
47
+ "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
48
+ "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
49
+ "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
50
+ "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
51
+ "model.layers.12.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
52
+ "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
53
+ "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
54
+ "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
55
+ "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
56
+ "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
57
+ "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
58
+ "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
59
+ "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
60
+ "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
61
+ "model.layers.13.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
62
+ "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
63
+ "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
64
+ "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
65
+ "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
66
+ "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
67
+ "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
68
+ "model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
69
+ "model.layers.14.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
70
+ "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
71
+ "model.layers.14.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
72
+ "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
73
+ "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
74
+ "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
75
+ "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
76
+ "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
77
+ "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
78
+ "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
79
+ "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
80
+ "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
81
+ "model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
82
+ "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
83
+ "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
84
+ "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
85
+ "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
86
+ "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
87
+ "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
88
+ "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
89
+ "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
90
+ "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
91
+ "model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
92
+ "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
93
+ "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
94
+ "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
95
+ "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
96
+ "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
97
+ "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
98
+ "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
99
+ "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
100
+ "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
101
+ "model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
102
+ "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
103
+ "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
104
+ "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
105
+ "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
106
+ "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
107
+ "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
108
+ "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
109
+ "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
110
+ "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
111
+ "model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
112
+ "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
113
+ "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
114
+ "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
115
+ "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
116
+ "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
117
+ "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
118
+ "model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
119
+ "model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
120
+ "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
121
+ "model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
122
+ "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
123
+ "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
124
+ "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
125
+ "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
126
+ "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
127
+ "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
128
+ "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
129
+ "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
130
+ "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
131
+ "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
132
+ "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
133
+ "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
134
+ "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
135
+ "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
136
+ "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
137
+ "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
138
+ "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
139
+ "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
140
+ "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
141
+ "model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
142
+ "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
143
+ "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
144
+ "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
145
+ "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
146
+ "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
147
+ "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
148
+ "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
149
+ "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
150
+ "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
151
+ "model.layers.21.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
152
+ "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
153
+ "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
154
+ "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
155
+ "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
156
+ "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
157
+ "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
158
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
159
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
160
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
161
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
162
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
163
+ "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
164
+ "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
165
+ "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
166
+ "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
167
+ "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
168
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
169
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
170
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
171
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
172
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
173
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
174
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
175
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
176
+ "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
177
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
178
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
179
+ "model.layers.24.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
180
+ "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
181
+ "model.layers.24.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
182
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
183
+ "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
184
+ "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
185
+ "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
186
+ "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
187
+ "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
188
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
189
+ "model.layers.25.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
190
+ "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
191
+ "model.layers.25.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
192
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
193
+ "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
194
+ "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
195
+ "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
196
+ "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
197
+ "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
198
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
199
+ "model.layers.26.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
200
+ "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
201
+ "model.layers.26.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
202
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
203
+ "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
204
+ "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
205
+ "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
206
+ "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
207
+ "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
208
+ "model.layers.27.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
209
+ "model.layers.27.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
210
+ "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
211
+ "model.layers.27.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
212
+ "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
213
+ "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
214
+ "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
215
+ "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
216
+ "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
217
+ "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
218
+ "model.layers.28.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
219
+ "model.layers.28.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
220
+ "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
221
+ "model.layers.28.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
222
+ "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
223
+ "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
224
+ "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
225
+ "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
226
+ "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
227
+ "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
228
+ "model.layers.29.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
229
+ "model.layers.29.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
230
+ "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
231
+ "model.layers.29.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
232
+ "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
233
+ "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
234
+ "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
235
+ "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
236
+ "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
237
+ "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
238
+ "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
239
+ "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
240
+ "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
241
+ "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
242
+ "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
243
+ "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
244
+ "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
245
+ "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
246
+ "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
247
+ "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
248
+ "model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
249
+ "model.layers.30.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
250
+ "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
251
+ "model.layers.30.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
252
+ "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
253
+ "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
254
+ "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
255
+ "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
256
+ "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
257
+ "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
258
+ "model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
259
+ "model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
260
+ "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
261
+ "model.layers.31.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
262
+ "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
263
+ "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
264
+ "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
265
+ "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
266
+ "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
267
+ "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
268
+ "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
269
+ "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
270
+ "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
271
+ "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
272
+ "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
273
+ "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
274
+ "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
275
+ "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
276
+ "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
277
+ "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
278
+ "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
279
+ "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
280
+ "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
281
+ "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
282
+ "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
283
+ "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
284
+ "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
285
+ "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
286
+ "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
287
+ "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
288
+ "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
289
+ "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
290
+ "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
291
+ "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
292
+ "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
293
+ "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
294
+ "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
295
+ "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
296
+ "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
297
+ "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
298
+ "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
299
+ "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
300
+ "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
301
+ "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
302
+ "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
303
+ "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
304
+ "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
305
+ "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
306
+ "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
307
+ "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
308
+ "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
309
+ "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
310
+ "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
311
+ "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
312
+ "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
313
+ "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
314
+ "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
315
+ "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
316
+ "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
317
+ "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
318
+ "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
319
+ "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
320
+ "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
321
+ "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
322
+ "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
323
+ "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
324
+ "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
325
+ "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
326
+ "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
327
+ "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
328
+ "model.mm_projector.0.bias": "pytorch_model-00003-of-00003.bin",
329
+ "model.mm_projector.0.weight": "pytorch_model-00003-of-00003.bin",
330
+ "model.mm_projector.2.bias": "pytorch_model-00003-of-00003.bin",
331
+ "model.mm_projector.2.weight": "pytorch_model-00003-of-00003.bin",
332
+ "model.norm.weight": "pytorch_model-00003-of-00003.bin"
333
+ }
334
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "</s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "<unk>"
6
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "__type": "AddedToken",
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "clean_up_tokenization_spaces": false,
11
+ "eos_token": {
12
+ "__type": "AddedToken",
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "model_max_length": 2048,
20
+ "pad_token": null,
21
+ "padding_side": "right",
22
+ "special_tokens_map_file": "/nobackup/haotian/.cache/huggingface/hub/models--lmsys--vicuna-7b-delta-v0/snapshots/6d2e645f62e734821be8c392c08c8b2098b35434/special_tokens_map.json",
23
+ "tokenizer_class": "LlamaTokenizer",
24
+ "unk_token": {
25
+ "__type": "AddedToken",
26
+ "content": "<unk>",
27
+ "lstrip": false,
28
+ "normalized": true,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ }
32
+ }
trainer_state.json ADDED
@@ -0,0 +1,4369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 18.0,
5
+ "global_step": 702,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.03,
12
+ "learning_rate": 1.8181818181818183e-06,
13
+ "loss": 2.5841,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.05,
18
+ "learning_rate": 3.6363636363636366e-06,
19
+ "loss": 2.5898,
20
+ "step": 2
21
+ },
22
+ {
23
+ "epoch": 0.08,
24
+ "learning_rate": 5.4545454545454545e-06,
25
+ "loss": 1.6886,
26
+ "step": 3
27
+ },
28
+ {
29
+ "epoch": 0.1,
30
+ "learning_rate": 7.272727272727273e-06,
31
+ "loss": 0.999,
32
+ "step": 4
33
+ },
34
+ {
35
+ "epoch": 0.13,
36
+ "learning_rate": 9.090909090909091e-06,
37
+ "loss": 0.5869,
38
+ "step": 5
39
+ },
40
+ {
41
+ "epoch": 0.15,
42
+ "learning_rate": 1.0909090909090909e-05,
43
+ "loss": 0.3544,
44
+ "step": 6
45
+ },
46
+ {
47
+ "epoch": 0.18,
48
+ "learning_rate": 1.2727272727272728e-05,
49
+ "loss": 0.3183,
50
+ "step": 7
51
+ },
52
+ {
53
+ "epoch": 0.21,
54
+ "learning_rate": 1.4545454545454546e-05,
55
+ "loss": 0.2377,
56
+ "step": 8
57
+ },
58
+ {
59
+ "epoch": 0.23,
60
+ "learning_rate": 1.6363636363636366e-05,
61
+ "loss": 0.289,
62
+ "step": 9
63
+ },
64
+ {
65
+ "epoch": 0.26,
66
+ "learning_rate": 1.8181818181818182e-05,
67
+ "loss": 0.1791,
68
+ "step": 10
69
+ },
70
+ {
71
+ "epoch": 0.28,
72
+ "learning_rate": 2e-05,
73
+ "loss": 0.152,
74
+ "step": 11
75
+ },
76
+ {
77
+ "epoch": 0.31,
78
+ "learning_rate": 2.1818181818181818e-05,
79
+ "loss": 0.1551,
80
+ "step": 12
81
+ },
82
+ {
83
+ "epoch": 0.33,
84
+ "learning_rate": 2.363636363636364e-05,
85
+ "loss": 0.1501,
86
+ "step": 13
87
+ },
88
+ {
89
+ "epoch": 0.36,
90
+ "learning_rate": 2.5454545454545457e-05,
91
+ "loss": 0.1434,
92
+ "step": 14
93
+ },
94
+ {
95
+ "epoch": 0.38,
96
+ "learning_rate": 2.7272727272727273e-05,
97
+ "loss": 0.1221,
98
+ "step": 15
99
+ },
100
+ {
101
+ "epoch": 0.41,
102
+ "learning_rate": 2.9090909090909093e-05,
103
+ "loss": 0.1988,
104
+ "step": 16
105
+ },
106
+ {
107
+ "epoch": 0.44,
108
+ "learning_rate": 3.090909090909091e-05,
109
+ "loss": 0.154,
110
+ "step": 17
111
+ },
112
+ {
113
+ "epoch": 0.46,
114
+ "learning_rate": 3.272727272727273e-05,
115
+ "loss": 0.1034,
116
+ "step": 18
117
+ },
118
+ {
119
+ "epoch": 0.49,
120
+ "learning_rate": 3.454545454545455e-05,
121
+ "loss": 0.1239,
122
+ "step": 19
123
+ },
124
+ {
125
+ "epoch": 0.51,
126
+ "learning_rate": 3.6363636363636364e-05,
127
+ "loss": 0.1626,
128
+ "step": 20
129
+ },
130
+ {
131
+ "epoch": 0.54,
132
+ "learning_rate": 3.818181818181819e-05,
133
+ "loss": 0.1231,
134
+ "step": 21
135
+ },
136
+ {
137
+ "epoch": 0.56,
138
+ "learning_rate": 4e-05,
139
+ "loss": 0.1245,
140
+ "step": 22
141
+ },
142
+ {
143
+ "epoch": 0.59,
144
+ "learning_rate": 3.99997865573779e-05,
145
+ "loss": 0.1269,
146
+ "step": 23
147
+ },
148
+ {
149
+ "epoch": 0.62,
150
+ "learning_rate": 3.999914623406736e-05,
151
+ "loss": 0.1271,
152
+ "step": 24
153
+ },
154
+ {
155
+ "epoch": 0.64,
156
+ "learning_rate": 3.999807904373562e-05,
157
+ "loss": 0.1462,
158
+ "step": 25
159
+ },
160
+ {
161
+ "epoch": 0.67,
162
+ "learning_rate": 3.9996585009161056e-05,
163
+ "loss": 0.1237,
164
+ "step": 26
165
+ },
166
+ {
167
+ "epoch": 0.69,
168
+ "learning_rate": 3.999466416223275e-05,
169
+ "loss": 0.1354,
170
+ "step": 27
171
+ },
172
+ {
173
+ "epoch": 0.72,
174
+ "learning_rate": 3.999231654394975e-05,
175
+ "loss": 0.0988,
176
+ "step": 28
177
+ },
178
+ {
179
+ "epoch": 0.74,
180
+ "learning_rate": 3.9989542204420243e-05,
181
+ "loss": 0.1454,
182
+ "step": 29
183
+ },
184
+ {
185
+ "epoch": 0.77,
186
+ "learning_rate": 3.9986341202860467e-05,
187
+ "loss": 0.0983,
188
+ "step": 30
189
+ },
190
+ {
191
+ "epoch": 0.79,
192
+ "learning_rate": 3.998271360759342e-05,
193
+ "loss": 0.1008,
194
+ "step": 31
195
+ },
196
+ {
197
+ "epoch": 0.82,
198
+ "learning_rate": 3.9978659496047456e-05,
199
+ "loss": 0.0895,
200
+ "step": 32
201
+ },
202
+ {
203
+ "epoch": 0.85,
204
+ "learning_rate": 3.997417895475459e-05,
205
+ "loss": 0.1208,
206
+ "step": 33
207
+ },
208
+ {
209
+ "epoch": 0.87,
210
+ "learning_rate": 3.9969272079348685e-05,
211
+ "loss": 0.087,
212
+ "step": 34
213
+ },
214
+ {
215
+ "epoch": 0.9,
216
+ "learning_rate": 3.9963938974563355e-05,
217
+ "loss": 0.1024,
218
+ "step": 35
219
+ },
220
+ {
221
+ "epoch": 0.92,
222
+ "learning_rate": 3.995817975422981e-05,
223
+ "loss": 0.0935,
224
+ "step": 36
225
+ },
226
+ {
227
+ "epoch": 0.95,
228
+ "learning_rate": 3.9951994541274345e-05,
229
+ "loss": 0.0888,
230
+ "step": 37
231
+ },
232
+ {
233
+ "epoch": 0.97,
234
+ "learning_rate": 3.994538346771576e-05,
235
+ "loss": 0.1204,
236
+ "step": 38
237
+ },
238
+ {
239
+ "epoch": 1.0,
240
+ "learning_rate": 3.9938346674662565e-05,
241
+ "loss": 0.069,
242
+ "step": 39
243
+ },
244
+ {
245
+ "epoch": 1.03,
246
+ "learning_rate": 3.9930884312309894e-05,
247
+ "loss": 0.1135,
248
+ "step": 40
249
+ },
250
+ {
251
+ "epoch": 1.05,
252
+ "learning_rate": 3.9922996539936374e-05,
253
+ "loss": 0.0879,
254
+ "step": 41
255
+ },
256
+ {
257
+ "epoch": 1.08,
258
+ "learning_rate": 3.991468352590069e-05,
259
+ "loss": 0.0729,
260
+ "step": 42
261
+ },
262
+ {
263
+ "epoch": 1.1,
264
+ "learning_rate": 3.9905945447638e-05,
265
+ "loss": 0.0738,
266
+ "step": 43
267
+ },
268
+ {
269
+ "epoch": 1.13,
270
+ "learning_rate": 3.989678249165612e-05,
271
+ "loss": 0.0896,
272
+ "step": 44
273
+ },
274
+ {
275
+ "epoch": 1.15,
276
+ "learning_rate": 3.9887194853531584e-05,
277
+ "loss": 0.0783,
278
+ "step": 45
279
+ },
280
+ {
281
+ "epoch": 1.18,
282
+ "learning_rate": 3.987718273790548e-05,
283
+ "loss": 0.0761,
284
+ "step": 46
285
+ },
286
+ {
287
+ "epoch": 1.21,
288
+ "learning_rate": 3.9866746358479e-05,
289
+ "loss": 0.0994,
290
+ "step": 47
291
+ },
292
+ {
293
+ "epoch": 1.23,
294
+ "learning_rate": 3.9855885938008986e-05,
295
+ "loss": 0.0954,
296
+ "step": 48
297
+ },
298
+ {
299
+ "epoch": 1.26,
300
+ "learning_rate": 3.984460170830308e-05,
301
+ "loss": 0.0829,
302
+ "step": 49
303
+ },
304
+ {
305
+ "epoch": 1.28,
306
+ "learning_rate": 3.983289391021486e-05,
307
+ "loss": 0.0796,
308
+ "step": 50
309
+ },
310
+ {
311
+ "epoch": 1.31,
312
+ "learning_rate": 3.9820762793638626e-05,
313
+ "loss": 0.0887,
314
+ "step": 51
315
+ },
316
+ {
317
+ "epoch": 1.33,
318
+ "learning_rate": 3.9808208617504106e-05,
319
+ "loss": 0.075,
320
+ "step": 52
321
+ },
322
+ {
323
+ "epoch": 1.36,
324
+ "learning_rate": 3.979523164977094e-05,
325
+ "loss": 0.0528,
326
+ "step": 53
327
+ },
328
+ {
329
+ "epoch": 1.38,
330
+ "learning_rate": 3.9781832167422926e-05,
331
+ "loss": 0.0804,
332
+ "step": 54
333
+ },
334
+ {
335
+ "epoch": 1.41,
336
+ "learning_rate": 3.976801045646212e-05,
337
+ "loss": 0.1193,
338
+ "step": 55
339
+ },
340
+ {
341
+ "epoch": 1.44,
342
+ "learning_rate": 3.9753766811902756e-05,
343
+ "loss": 0.0737,
344
+ "step": 56
345
+ },
346
+ {
347
+ "epoch": 1.46,
348
+ "learning_rate": 3.973910153776492e-05,
349
+ "loss": 0.0952,
350
+ "step": 57
351
+ },
352
+ {
353
+ "epoch": 1.49,
354
+ "learning_rate": 3.972401494706805e-05,
355
+ "loss": 0.0574,
356
+ "step": 58
357
+ },
358
+ {
359
+ "epoch": 1.51,
360
+ "learning_rate": 3.970850736182432e-05,
361
+ "loss": 0.0857,
362
+ "step": 59
363
+ },
364
+ {
365
+ "epoch": 1.54,
366
+ "learning_rate": 3.969257911303167e-05,
367
+ "loss": 0.0682,
368
+ "step": 60
369
+ },
370
+ {
371
+ "epoch": 1.56,
372
+ "learning_rate": 3.967623054066684e-05,
373
+ "loss": 0.0765,
374
+ "step": 61
375
+ },
376
+ {
377
+ "epoch": 1.59,
378
+ "learning_rate": 3.965946199367804e-05,
379
+ "loss": 0.0616,
380
+ "step": 62
381
+ },
382
+ {
383
+ "epoch": 1.62,
384
+ "learning_rate": 3.964227382997752e-05,
385
+ "loss": 0.0806,
386
+ "step": 63
387
+ },
388
+ {
389
+ "epoch": 1.64,
390
+ "learning_rate": 3.962466641643398e-05,
391
+ "loss": 0.0929,
392
+ "step": 64
393
+ },
394
+ {
395
+ "epoch": 1.67,
396
+ "learning_rate": 3.9606640128864635e-05,
397
+ "loss": 0.0548,
398
+ "step": 65
399
+ },
400
+ {
401
+ "epoch": 1.69,
402
+ "learning_rate": 3.958819535202732e-05,
403
+ "loss": 0.0671,
404
+ "step": 66
405
+ },
406
+ {
407
+ "epoch": 1.72,
408
+ "learning_rate": 3.956933247961218e-05,
409
+ "loss": 0.0853,
410
+ "step": 67
411
+ },
412
+ {
413
+ "epoch": 1.74,
414
+ "learning_rate": 3.9550051914233314e-05,
415
+ "loss": 0.0558,
416
+ "step": 68
417
+ },
418
+ {
419
+ "epoch": 1.77,
420
+ "learning_rate": 3.953035406742016e-05,
421
+ "loss": 0.0512,
422
+ "step": 69
423
+ },
424
+ {
425
+ "epoch": 1.79,
426
+ "learning_rate": 3.951023935960874e-05,
427
+ "loss": 0.0845,
428
+ "step": 70
429
+ },
430
+ {
431
+ "epoch": 1.82,
432
+ "learning_rate": 3.9489708220132626e-05,
433
+ "loss": 0.0885,
434
+ "step": 71
435
+ },
436
+ {
437
+ "epoch": 1.85,
438
+ "learning_rate": 3.9468761087213864e-05,
439
+ "loss": 0.0814,
440
+ "step": 72
441
+ },
442
+ {
443
+ "epoch": 1.87,
444
+ "learning_rate": 3.9447398407953536e-05,
445
+ "loss": 0.0918,
446
+ "step": 73
447
+ },
448
+ {
449
+ "epoch": 1.9,
450
+ "learning_rate": 3.942562063832228e-05,
451
+ "loss": 0.0769,
452
+ "step": 74
453
+ },
454
+ {
455
+ "epoch": 1.92,
456
+ "learning_rate": 3.940342824315052e-05,
457
+ "loss": 0.0647,
458
+ "step": 75
459
+ },
460
+ {
461
+ "epoch": 1.95,
462
+ "learning_rate": 3.9380821696118556e-05,
463
+ "loss": 0.0956,
464
+ "step": 76
465
+ },
466
+ {
467
+ "epoch": 1.97,
468
+ "learning_rate": 3.935780147974646e-05,
469
+ "loss": 0.4721,
470
+ "step": 77
471
+ },
472
+ {
473
+ "epoch": 2.0,
474
+ "learning_rate": 3.933436808538375e-05,
475
+ "loss": 0.0697,
476
+ "step": 78
477
+ },
478
+ {
479
+ "epoch": 2.03,
480
+ "learning_rate": 3.9310522013198965e-05,
481
+ "loss": 0.0723,
482
+ "step": 79
483
+ },
484
+ {
485
+ "epoch": 2.05,
486
+ "learning_rate": 3.92862637721689e-05,
487
+ "loss": 0.0685,
488
+ "step": 80
489
+ },
490
+ {
491
+ "epoch": 2.08,
492
+ "learning_rate": 3.9261593880067826e-05,
493
+ "loss": 0.0635,
494
+ "step": 81
495
+ },
496
+ {
497
+ "epoch": 2.1,
498
+ "learning_rate": 3.923651286345638e-05,
499
+ "loss": 0.0457,
500
+ "step": 82
501
+ },
502
+ {
503
+ "epoch": 2.13,
504
+ "learning_rate": 3.921102125767037e-05,
505
+ "loss": 0.0721,
506
+ "step": 83
507
+ },
508
+ {
509
+ "epoch": 2.15,
510
+ "learning_rate": 3.9185119606809305e-05,
511
+ "loss": 0.0543,
512
+ "step": 84
513
+ },
514
+ {
515
+ "epoch": 2.18,
516
+ "learning_rate": 3.9158808463724806e-05,
517
+ "loss": 0.0575,
518
+ "step": 85
519
+ },
520
+ {
521
+ "epoch": 2.21,
522
+ "learning_rate": 3.913208839000882e-05,
523
+ "loss": 0.0598,
524
+ "step": 86
525
+ },
526
+ {
527
+ "epoch": 2.23,
528
+ "learning_rate": 3.9104959955981605e-05,
529
+ "loss": 0.0556,
530
+ "step": 87
531
+ },
532
+ {
533
+ "epoch": 2.26,
534
+ "learning_rate": 3.907742374067956e-05,
535
+ "loss": 0.0557,
536
+ "step": 88
537
+ },
538
+ {
539
+ "epoch": 2.28,
540
+ "learning_rate": 3.904948033184291e-05,
541
+ "loss": 0.0618,
542
+ "step": 89
543
+ },
544
+ {
545
+ "epoch": 2.31,
546
+ "learning_rate": 3.9021130325903076e-05,
547
+ "loss": 0.0549,
548
+ "step": 90
549
+ },
550
+ {
551
+ "epoch": 2.33,
552
+ "learning_rate": 3.8992374327970024e-05,
553
+ "loss": 0.0481,
554
+ "step": 91
555
+ },
556
+ {
557
+ "epoch": 2.36,
558
+ "learning_rate": 3.896321295181932e-05,
559
+ "loss": 0.0556,
560
+ "step": 92
561
+ },
562
+ {
563
+ "epoch": 2.38,
564
+ "learning_rate": 3.893364681987902e-05,
565
+ "loss": 0.0447,
566
+ "step": 93
567
+ },
568
+ {
569
+ "epoch": 2.41,
570
+ "learning_rate": 3.89036765632164e-05,
571
+ "loss": 0.0563,
572
+ "step": 94
573
+ },
574
+ {
575
+ "epoch": 2.44,
576
+ "learning_rate": 3.887330282152446e-05,
577
+ "loss": 0.0553,
578
+ "step": 95
579
+ },
580
+ {
581
+ "epoch": 2.46,
582
+ "learning_rate": 3.8842526243108326e-05,
583
+ "loss": 0.0533,
584
+ "step": 96
585
+ },
586
+ {
587
+ "epoch": 2.49,
588
+ "learning_rate": 3.8811347484871353e-05,
589
+ "loss": 0.0411,
590
+ "step": 97
591
+ },
592
+ {
593
+ "epoch": 2.51,
594
+ "learning_rate": 3.877976721230114e-05,
595
+ "loss": 0.0698,
596
+ "step": 98
597
+ },
598
+ {
599
+ "epoch": 2.54,
600
+ "learning_rate": 3.8747786099455285e-05,
601
+ "loss": 0.0684,
602
+ "step": 99
603
+ },
604
+ {
605
+ "epoch": 2.56,
606
+ "learning_rate": 3.8715404828947055e-05,
607
+ "loss": 0.0486,
608
+ "step": 100
609
+ },
610
+ {
611
+ "epoch": 2.59,
612
+ "learning_rate": 3.868262409193078e-05,
613
+ "loss": 0.0632,
614
+ "step": 101
615
+ },
616
+ {
617
+ "epoch": 2.62,
618
+ "learning_rate": 3.864944458808712e-05,
619
+ "loss": 0.0753,
620
+ "step": 102
621
+ },
622
+ {
623
+ "epoch": 2.64,
624
+ "learning_rate": 3.861586702560808e-05,
625
+ "loss": 0.052,
626
+ "step": 103
627
+ },
628
+ {
629
+ "epoch": 2.67,
630
+ "learning_rate": 3.8581892121181984e-05,
631
+ "loss": 0.0611,
632
+ "step": 104
633
+ },
634
+ {
635
+ "epoch": 2.69,
636
+ "learning_rate": 3.854752059997807e-05,
637
+ "loss": 0.0531,
638
+ "step": 105
639
+ },
640
+ {
641
+ "epoch": 2.72,
642
+ "learning_rate": 3.851275319563113e-05,
643
+ "loss": 0.079,
644
+ "step": 106
645
+ },
646
+ {
647
+ "epoch": 2.74,
648
+ "learning_rate": 3.8477590650225735e-05,
649
+ "loss": 0.066,
650
+ "step": 107
651
+ },
652
+ {
653
+ "epoch": 2.77,
654
+ "learning_rate": 3.844203371428049e-05,
655
+ "loss": 0.0497,
656
+ "step": 108
657
+ },
658
+ {
659
+ "epoch": 2.79,
660
+ "learning_rate": 3.8406083146731956e-05,
661
+ "loss": 0.0701,
662
+ "step": 109
663
+ },
664
+ {
665
+ "epoch": 2.82,
666
+ "learning_rate": 3.836973971491847e-05,
667
+ "loss": 0.0663,
668
+ "step": 110
669
+ },
670
+ {
671
+ "epoch": 2.85,
672
+ "learning_rate": 3.8333004194563764e-05,
673
+ "loss": 0.0569,
674
+ "step": 111
675
+ },
676
+ {
677
+ "epoch": 2.87,
678
+ "learning_rate": 3.8295877369760426e-05,
679
+ "loss": 0.054,
680
+ "step": 112
681
+ },
682
+ {
683
+ "epoch": 2.9,
684
+ "learning_rate": 3.8258360032953136e-05,
685
+ "loss": 0.0642,
686
+ "step": 113
687
+ },
688
+ {
689
+ "epoch": 2.92,
690
+ "learning_rate": 3.822045298492177e-05,
691
+ "loss": 0.0547,
692
+ "step": 114
693
+ },
694
+ {
695
+ "epoch": 2.95,
696
+ "learning_rate": 3.81821570347643e-05,
697
+ "loss": 0.0511,
698
+ "step": 115
699
+ },
700
+ {
701
+ "epoch": 2.97,
702
+ "learning_rate": 3.814347299987953e-05,
703
+ "loss": 0.0566,
704
+ "step": 116
705
+ },
706
+ {
707
+ "epoch": 2.97,
708
+ "eval_test_accuracy": 74.75961538461539,
709
+ "eval_test_average": 78.50293262938611,
710
+ "eval_test_loss": 1.1910189390182495,
711
+ "eval_test_recall": 82.24624987415683,
712
+ "eval_test_runtime": 774.1404,
713
+ "eval_test_samples_per_second": 1.371,
714
+ "eval_test_steps_per_second": 0.006,
715
+ "step": 116
716
+ },
717
+ {
718
+ "epoch": 2.97,
719
+ "eval_val_accuracy": 74.40758293838863,
720
+ "eval_val_average": 78.0483275019237,
721
+ "eval_val_loss": 1.202035903930664,
722
+ "eval_val_recall": 81.68907206545876,
723
+ "eval_val_runtime": 781.9322,
724
+ "eval_val_samples_per_second": 1.347,
725
+ "eval_val_steps_per_second": 0.006,
726
+ "step": 116
727
+ },
728
+ {
729
+ "epoch": 3.0,
730
+ "learning_rate": 3.810440170594964e-05,
731
+ "loss": 0.0973,
732
+ "step": 117
733
+ },
734
+ {
735
+ "epoch": 3.03,
736
+ "learning_rate": 3.806494398692258e-05,
737
+ "loss": 0.0425,
738
+ "step": 118
739
+ },
740
+ {
741
+ "epoch": 3.05,
742
+ "learning_rate": 3.802510068499424e-05,
743
+ "loss": 0.0441,
744
+ "step": 119
745
+ },
746
+ {
747
+ "epoch": 3.08,
748
+ "learning_rate": 3.7984872650590516e-05,
749
+ "loss": 0.0477,
750
+ "step": 120
751
+ },
752
+ {
753
+ "epoch": 3.1,
754
+ "learning_rate": 3.7944260742349113e-05,
755
+ "loss": 0.0435,
756
+ "step": 121
757
+ },
758
+ {
759
+ "epoch": 3.13,
760
+ "learning_rate": 3.790326582710125e-05,
761
+ "loss": 0.0417,
762
+ "step": 122
763
+ },
764
+ {
765
+ "epoch": 3.15,
766
+ "learning_rate": 3.786188877985315e-05,
767
+ "loss": 0.0507,
768
+ "step": 123
769
+ },
770
+ {
771
+ "epoch": 3.18,
772
+ "learning_rate": 3.782013048376736e-05,
773
+ "loss": 0.0381,
774
+ "step": 124
775
+ },
776
+ {
777
+ "epoch": 3.21,
778
+ "learning_rate": 3.77779918301439e-05,
779
+ "loss": 0.0349,
780
+ "step": 125
781
+ },
782
+ {
783
+ "epoch": 3.23,
784
+ "learning_rate": 3.773547371840124e-05,
785
+ "loss": 0.0492,
786
+ "step": 126
787
+ },
788
+ {
789
+ "epoch": 3.26,
790
+ "learning_rate": 3.769257705605711e-05,
791
+ "loss": 0.0391,
792
+ "step": 127
793
+ },
794
+ {
795
+ "epoch": 3.28,
796
+ "learning_rate": 3.764930275870912e-05,
797
+ "loss": 0.0228,
798
+ "step": 128
799
+ },
800
+ {
801
+ "epoch": 3.31,
802
+ "learning_rate": 3.760565175001521e-05,
803
+ "loss": 0.0274,
804
+ "step": 129
805
+ },
806
+ {
807
+ "epoch": 3.33,
808
+ "learning_rate": 3.756162496167396e-05,
809
+ "loss": 0.0364,
810
+ "step": 130
811
+ },
812
+ {
813
+ "epoch": 3.36,
814
+ "learning_rate": 3.7517223333404694e-05,
815
+ "loss": 0.0431,
816
+ "step": 131
817
+ },
818
+ {
819
+ "epoch": 3.38,
820
+ "learning_rate": 3.7472447812927395e-05,
821
+ "loss": 0.0524,
822
+ "step": 132
823
+ },
824
+ {
825
+ "epoch": 3.41,
826
+ "learning_rate": 3.742729935594252e-05,
827
+ "loss": 0.047,
828
+ "step": 133
829
+ },
830
+ {
831
+ "epoch": 3.44,
832
+ "learning_rate": 3.738177892611057e-05,
833
+ "loss": 0.0465,
834
+ "step": 134
835
+ },
836
+ {
837
+ "epoch": 3.46,
838
+ "learning_rate": 3.733588749503154e-05,
839
+ "loss": 0.0908,
840
+ "step": 135
841
+ },
842
+ {
843
+ "epoch": 3.49,
844
+ "learning_rate": 3.728962604222416e-05,
845
+ "loss": 0.05,
846
+ "step": 136
847
+ },
848
+ {
849
+ "epoch": 3.51,
850
+ "learning_rate": 3.7242995555105016e-05,
851
+ "loss": 0.0418,
852
+ "step": 137
853
+ },
854
+ {
855
+ "epoch": 3.54,
856
+ "learning_rate": 3.719599702896745e-05,
857
+ "loss": 0.0466,
858
+ "step": 138
859
+ },
860
+ {
861
+ "epoch": 3.56,
862
+ "learning_rate": 3.714863146696033e-05,
863
+ "loss": 0.0418,
864
+ "step": 139
865
+ },
866
+ {
867
+ "epoch": 3.59,
868
+ "learning_rate": 3.710089988006662e-05,
869
+ "loss": 0.0331,
870
+ "step": 140
871
+ },
872
+ {
873
+ "epoch": 3.62,
874
+ "learning_rate": 3.705280328708185e-05,
875
+ "loss": 0.0483,
876
+ "step": 141
877
+ },
878
+ {
879
+ "epoch": 3.64,
880
+ "learning_rate": 3.700434271459229e-05,
881
+ "loss": 0.0525,
882
+ "step": 142
883
+ },
884
+ {
885
+ "epoch": 3.67,
886
+ "learning_rate": 3.695551919695311e-05,
887
+ "loss": 0.0498,
888
+ "step": 143
889
+ },
890
+ {
891
+ "epoch": 3.69,
892
+ "learning_rate": 3.690633377626628e-05,
893
+ "loss": 0.0835,
894
+ "step": 144
895
+ },
896
+ {
897
+ "epoch": 3.72,
898
+ "learning_rate": 3.685678750235831e-05,
899
+ "loss": 0.0489,
900
+ "step": 145
901
+ },
902
+ {
903
+ "epoch": 3.74,
904
+ "learning_rate": 3.680688143275786e-05,
905
+ "loss": 0.0289,
906
+ "step": 146
907
+ },
908
+ {
909
+ "epoch": 3.77,
910
+ "learning_rate": 3.675661663267317e-05,
911
+ "loss": 0.0465,
912
+ "step": 147
913
+ },
914
+ {
915
+ "epoch": 3.79,
916
+ "learning_rate": 3.670599417496931e-05,
917
+ "loss": 0.0521,
918
+ "step": 148
919
+ },
920
+ {
921
+ "epoch": 3.82,
922
+ "learning_rate": 3.6655015140145296e-05,
923
+ "loss": 0.046,
924
+ "step": 149
925
+ },
926
+ {
927
+ "epoch": 3.85,
928
+ "learning_rate": 3.6603680616311013e-05,
929
+ "loss": 0.0359,
930
+ "step": 150
931
+ },
932
+ {
933
+ "epoch": 3.87,
934
+ "learning_rate": 3.6551991699164e-05,
935
+ "loss": 0.036,
936
+ "step": 151
937
+ },
938
+ {
939
+ "epoch": 3.9,
940
+ "learning_rate": 3.6499949491966046e-05,
941
+ "loss": 0.0476,
942
+ "step": 152
943
+ },
944
+ {
945
+ "epoch": 3.92,
946
+ "learning_rate": 3.644755510551968e-05,
947
+ "loss": 0.0399,
948
+ "step": 153
949
+ },
950
+ {
951
+ "epoch": 3.95,
952
+ "learning_rate": 3.639480965814443e-05,
953
+ "loss": 0.0475,
954
+ "step": 154
955
+ },
956
+ {
957
+ "epoch": 3.97,
958
+ "learning_rate": 3.634171427565293e-05,
959
+ "loss": 0.061,
960
+ "step": 155
961
+ },
962
+ {
963
+ "epoch": 4.0,
964
+ "learning_rate": 3.628827009132697e-05,
965
+ "loss": 0.0382,
966
+ "step": 156
967
+ },
968
+ {
969
+ "epoch": 4.03,
970
+ "learning_rate": 3.623447824589323e-05,
971
+ "loss": 0.0386,
972
+ "step": 157
973
+ },
974
+ {
975
+ "epoch": 4.05,
976
+ "learning_rate": 3.6180339887498953e-05,
977
+ "loss": 0.0314,
978
+ "step": 158
979
+ },
980
+ {
981
+ "epoch": 4.08,
982
+ "learning_rate": 3.6125856171687465e-05,
983
+ "loss": 0.0253,
984
+ "step": 159
985
+ },
986
+ {
987
+ "epoch": 4.1,
988
+ "learning_rate": 3.6071028261373474e-05,
989
+ "loss": 0.0275,
990
+ "step": 160
991
+ },
992
+ {
993
+ "epoch": 4.13,
994
+ "learning_rate": 3.6015857326818295e-05,
995
+ "loss": 0.0379,
996
+ "step": 161
997
+ },
998
+ {
999
+ "epoch": 4.15,
1000
+ "learning_rate": 3.5960344545604796e-05,
1001
+ "loss": 0.0684,
1002
+ "step": 162
1003
+ },
1004
+ {
1005
+ "epoch": 4.18,
1006
+ "learning_rate": 3.5904491102612346e-05,
1007
+ "loss": 0.0297,
1008
+ "step": 163
1009
+ },
1010
+ {
1011
+ "epoch": 4.21,
1012
+ "learning_rate": 3.584829818999148e-05,
1013
+ "loss": 0.0251,
1014
+ "step": 164
1015
+ },
1016
+ {
1017
+ "epoch": 4.23,
1018
+ "learning_rate": 3.5791767007138456e-05,
1019
+ "loss": 0.0353,
1020
+ "step": 165
1021
+ },
1022
+ {
1023
+ "epoch": 4.26,
1024
+ "learning_rate": 3.573489876066967e-05,
1025
+ "loss": 0.0297,
1026
+ "step": 166
1027
+ },
1028
+ {
1029
+ "epoch": 4.28,
1030
+ "learning_rate": 3.567769466439588e-05,
1031
+ "loss": 0.0381,
1032
+ "step": 167
1033
+ },
1034
+ {
1035
+ "epoch": 4.31,
1036
+ "learning_rate": 3.5620155939296314e-05,
1037
+ "loss": 0.0381,
1038
+ "step": 168
1039
+ },
1040
+ {
1041
+ "epoch": 4.33,
1042
+ "learning_rate": 3.556228381349261e-05,
1043
+ "loss": 0.0439,
1044
+ "step": 169
1045
+ },
1046
+ {
1047
+ "epoch": 4.36,
1048
+ "learning_rate": 3.55040795222226e-05,
1049
+ "loss": 0.0311,
1050
+ "step": 170
1051
+ },
1052
+ {
1053
+ "epoch": 4.38,
1054
+ "learning_rate": 3.544554430781394e-05,
1055
+ "loss": 0.0312,
1056
+ "step": 171
1057
+ },
1058
+ {
1059
+ "epoch": 4.41,
1060
+ "learning_rate": 3.538667941965758e-05,
1061
+ "loss": 0.0249,
1062
+ "step": 172
1063
+ },
1064
+ {
1065
+ "epoch": 4.44,
1066
+ "learning_rate": 3.5327486114181144e-05,
1067
+ "loss": 0.0377,
1068
+ "step": 173
1069
+ },
1070
+ {
1071
+ "epoch": 4.46,
1072
+ "learning_rate": 3.526796565482206e-05,
1073
+ "loss": 0.0352,
1074
+ "step": 174
1075
+ },
1076
+ {
1077
+ "epoch": 4.49,
1078
+ "learning_rate": 3.520811931200063e-05,
1079
+ "loss": 0.0336,
1080
+ "step": 175
1081
+ },
1082
+ {
1083
+ "epoch": 4.51,
1084
+ "learning_rate": 3.514794836309286e-05,
1085
+ "loss": 0.0335,
1086
+ "step": 176
1087
+ },
1088
+ {
1089
+ "epoch": 4.54,
1090
+ "learning_rate": 3.5087454092403285e-05,
1091
+ "loss": 0.0328,
1092
+ "step": 177
1093
+ },
1094
+ {
1095
+ "epoch": 4.56,
1096
+ "learning_rate": 3.502663779113747e-05,
1097
+ "loss": 0.0435,
1098
+ "step": 178
1099
+ },
1100
+ {
1101
+ "epoch": 4.59,
1102
+ "learning_rate": 3.49655007573745e-05,
1103
+ "loss": 0.0301,
1104
+ "step": 179
1105
+ },
1106
+ {
1107
+ "epoch": 4.62,
1108
+ "learning_rate": 3.490404429603925e-05,
1109
+ "loss": 0.032,
1110
+ "step": 180
1111
+ },
1112
+ {
1113
+ "epoch": 4.64,
1114
+ "learning_rate": 3.484226971887456e-05,
1115
+ "loss": 0.0459,
1116
+ "step": 181
1117
+ },
1118
+ {
1119
+ "epoch": 4.67,
1120
+ "learning_rate": 3.478017834441319e-05,
1121
+ "loss": 0.0393,
1122
+ "step": 182
1123
+ },
1124
+ {
1125
+ "epoch": 4.69,
1126
+ "learning_rate": 3.4717771497949706e-05,
1127
+ "loss": 0.0295,
1128
+ "step": 183
1129
+ },
1130
+ {
1131
+ "epoch": 4.72,
1132
+ "learning_rate": 3.4655050511512236e-05,
1133
+ "loss": 0.0337,
1134
+ "step": 184
1135
+ },
1136
+ {
1137
+ "epoch": 4.74,
1138
+ "learning_rate": 3.459201672383392e-05,
1139
+ "loss": 0.0331,
1140
+ "step": 185
1141
+ },
1142
+ {
1143
+ "epoch": 4.77,
1144
+ "learning_rate": 3.452867148032449e-05,
1145
+ "loss": 0.0197,
1146
+ "step": 186
1147
+ },
1148
+ {
1149
+ "epoch": 4.79,
1150
+ "learning_rate": 3.4465016133041405e-05,
1151
+ "loss": 0.0245,
1152
+ "step": 187
1153
+ },
1154
+ {
1155
+ "epoch": 4.82,
1156
+ "learning_rate": 3.44010520406611e-05,
1157
+ "loss": 0.0272,
1158
+ "step": 188
1159
+ },
1160
+ {
1161
+ "epoch": 4.85,
1162
+ "learning_rate": 3.433678056844993e-05,
1163
+ "loss": 0.0427,
1164
+ "step": 189
1165
+ },
1166
+ {
1167
+ "epoch": 4.87,
1168
+ "learning_rate": 3.427220308823505e-05,
1169
+ "loss": 0.0439,
1170
+ "step": 190
1171
+ },
1172
+ {
1173
+ "epoch": 4.9,
1174
+ "learning_rate": 3.420732097837514e-05,
1175
+ "loss": 0.0313,
1176
+ "step": 191
1177
+ },
1178
+ {
1179
+ "epoch": 4.92,
1180
+ "learning_rate": 3.4142135623730954e-05,
1181
+ "loss": 0.0299,
1182
+ "step": 192
1183
+ },
1184
+ {
1185
+ "epoch": 4.95,
1186
+ "learning_rate": 3.4076648415635804e-05,
1187
+ "loss": 0.0363,
1188
+ "step": 193
1189
+ },
1190
+ {
1191
+ "epoch": 4.97,
1192
+ "learning_rate": 3.401086075186582e-05,
1193
+ "loss": 0.0394,
1194
+ "step": 194
1195
+ },
1196
+ {
1197
+ "epoch": 5.0,
1198
+ "learning_rate": 3.394477403661016e-05,
1199
+ "loss": 0.0246,
1200
+ "step": 195
1201
+ },
1202
+ {
1203
+ "epoch": 5.03,
1204
+ "learning_rate": 3.3878389680440995e-05,
1205
+ "loss": 0.0124,
1206
+ "step": 196
1207
+ },
1208
+ {
1209
+ "epoch": 5.05,
1210
+ "learning_rate": 3.3811709100283434e-05,
1211
+ "loss": 0.0155,
1212
+ "step": 197
1213
+ },
1214
+ {
1215
+ "epoch": 5.08,
1216
+ "learning_rate": 3.374473371938526e-05,
1217
+ "loss": 0.0143,
1218
+ "step": 198
1219
+ },
1220
+ {
1221
+ "epoch": 5.1,
1222
+ "learning_rate": 3.367746496728656e-05,
1223
+ "loss": 0.0179,
1224
+ "step": 199
1225
+ },
1226
+ {
1227
+ "epoch": 5.13,
1228
+ "learning_rate": 3.3609904279789235e-05,
1229
+ "loss": 0.0174,
1230
+ "step": 200
1231
+ },
1232
+ {
1233
+ "epoch": 5.15,
1234
+ "learning_rate": 3.3542053098926296e-05,
1235
+ "loss": 0.03,
1236
+ "step": 201
1237
+ },
1238
+ {
1239
+ "epoch": 5.18,
1240
+ "learning_rate": 3.347391287293115e-05,
1241
+ "loss": 0.0258,
1242
+ "step": 202
1243
+ },
1244
+ {
1245
+ "epoch": 5.21,
1246
+ "learning_rate": 3.3405485056206636e-05,
1247
+ "loss": 0.0297,
1248
+ "step": 203
1249
+ },
1250
+ {
1251
+ "epoch": 5.23,
1252
+ "learning_rate": 3.333677110929403e-05,
1253
+ "loss": 0.0498,
1254
+ "step": 204
1255
+ },
1256
+ {
1257
+ "epoch": 5.26,
1258
+ "learning_rate": 3.326777249884183e-05,
1259
+ "loss": 0.0205,
1260
+ "step": 205
1261
+ },
1262
+ {
1263
+ "epoch": 5.28,
1264
+ "learning_rate": 3.319849069757446e-05,
1265
+ "loss": 0.0185,
1266
+ "step": 206
1267
+ },
1268
+ {
1269
+ "epoch": 5.31,
1270
+ "learning_rate": 3.312892718426086e-05,
1271
+ "loss": 0.0164,
1272
+ "step": 207
1273
+ },
1274
+ {
1275
+ "epoch": 5.33,
1276
+ "learning_rate": 3.305908344368289e-05,
1277
+ "loss": 0.014,
1278
+ "step": 208
1279
+ },
1280
+ {
1281
+ "epoch": 5.36,
1282
+ "learning_rate": 3.298896096660367e-05,
1283
+ "loss": 0.0182,
1284
+ "step": 209
1285
+ },
1286
+ {
1287
+ "epoch": 5.38,
1288
+ "learning_rate": 3.291856124973575e-05,
1289
+ "loss": 0.0271,
1290
+ "step": 210
1291
+ },
1292
+ {
1293
+ "epoch": 5.41,
1294
+ "learning_rate": 3.284788579570912e-05,
1295
+ "loss": 0.0203,
1296
+ "step": 211
1297
+ },
1298
+ {
1299
+ "epoch": 5.44,
1300
+ "learning_rate": 3.277693611303922e-05,
1301
+ "loss": 0.0306,
1302
+ "step": 212
1303
+ },
1304
+ {
1305
+ "epoch": 5.46,
1306
+ "learning_rate": 3.2705713716094694e-05,
1307
+ "loss": 0.0174,
1308
+ "step": 213
1309
+ },
1310
+ {
1311
+ "epoch": 5.49,
1312
+ "learning_rate": 3.263422012506502e-05,
1313
+ "loss": 0.0152,
1314
+ "step": 214
1315
+ },
1316
+ {
1317
+ "epoch": 5.51,
1318
+ "learning_rate": 3.2562456865928184e-05,
1319
+ "loss": 0.0343,
1320
+ "step": 215
1321
+ },
1322
+ {
1323
+ "epoch": 5.54,
1324
+ "learning_rate": 3.249042547041799e-05,
1325
+ "loss": 0.0117,
1326
+ "step": 216
1327
+ },
1328
+ {
1329
+ "epoch": 5.56,
1330
+ "learning_rate": 3.241812747599143e-05,
1331
+ "loss": 0.025,
1332
+ "step": 217
1333
+ },
1334
+ {
1335
+ "epoch": 5.59,
1336
+ "learning_rate": 3.234556442579586e-05,
1337
+ "loss": 0.0264,
1338
+ "step": 218
1339
+ },
1340
+ {
1341
+ "epoch": 5.62,
1342
+ "learning_rate": 3.2272737868636056e-05,
1343
+ "loss": 0.0212,
1344
+ "step": 219
1345
+ },
1346
+ {
1347
+ "epoch": 5.64,
1348
+ "learning_rate": 3.219964935894114e-05,
1349
+ "loss": 0.0187,
1350
+ "step": 220
1351
+ },
1352
+ {
1353
+ "epoch": 5.67,
1354
+ "learning_rate": 3.2126300456731425e-05,
1355
+ "loss": 0.0292,
1356
+ "step": 221
1357
+ },
1358
+ {
1359
+ "epoch": 5.69,
1360
+ "learning_rate": 3.205269272758513e-05,
1361
+ "loss": 0.0223,
1362
+ "step": 222
1363
+ },
1364
+ {
1365
+ "epoch": 5.72,
1366
+ "learning_rate": 3.197882774260491e-05,
1367
+ "loss": 0.0192,
1368
+ "step": 223
1369
+ },
1370
+ {
1371
+ "epoch": 5.74,
1372
+ "learning_rate": 3.190470707838438e-05,
1373
+ "loss": 0.0209,
1374
+ "step": 224
1375
+ },
1376
+ {
1377
+ "epoch": 5.77,
1378
+ "learning_rate": 3.1830332316974427e-05,
1379
+ "loss": 0.0167,
1380
+ "step": 225
1381
+ },
1382
+ {
1383
+ "epoch": 5.79,
1384
+ "learning_rate": 3.1755705045849465e-05,
1385
+ "loss": 0.0223,
1386
+ "step": 226
1387
+ },
1388
+ {
1389
+ "epoch": 5.82,
1390
+ "learning_rate": 3.1680826857873534e-05,
1391
+ "loss": 0.0232,
1392
+ "step": 227
1393
+ },
1394
+ {
1395
+ "epoch": 5.85,
1396
+ "learning_rate": 3.160569935126632e-05,
1397
+ "loss": 0.0322,
1398
+ "step": 228
1399
+ },
1400
+ {
1401
+ "epoch": 5.87,
1402
+ "learning_rate": 3.153032412956901e-05,
1403
+ "loss": 0.0291,
1404
+ "step": 229
1405
+ },
1406
+ {
1407
+ "epoch": 5.9,
1408
+ "learning_rate": 3.145470280161011e-05,
1409
+ "loss": 0.0208,
1410
+ "step": 230
1411
+ },
1412
+ {
1413
+ "epoch": 5.92,
1414
+ "learning_rate": 3.1378836981471066e-05,
1415
+ "loss": 0.016,
1416
+ "step": 231
1417
+ },
1418
+ {
1419
+ "epoch": 5.95,
1420
+ "learning_rate": 3.130272828845184e-05,
1421
+ "loss": 0.0286,
1422
+ "step": 232
1423
+ },
1424
+ {
1425
+ "epoch": 5.95,
1426
+ "eval_test_accuracy": 80.76923076923077,
1427
+ "eval_test_average": 81.63979173281498,
1428
+ "eval_test_loss": 0.8692004680633545,
1429
+ "eval_test_recall": 82.51035269639921,
1430
+ "eval_test_runtime": 794.04,
1431
+ "eval_test_samples_per_second": 1.336,
1432
+ "eval_test_steps_per_second": 0.006,
1433
+ "step": 232
1434
+ },
1435
+ {
1436
+ "epoch": 5.95,
1437
+ "eval_val_accuracy": 82.46445497630332,
1438
+ "eval_val_average": 83.1463368864132,
1439
+ "eval_val_loss": 0.8709683418273926,
1440
+ "eval_val_recall": 83.82821879652307,
1441
+ "eval_val_runtime": 781.0898,
1442
+ "eval_val_samples_per_second": 1.348,
1443
+ "eval_val_steps_per_second": 0.006,
1444
+ "step": 232
1445
+ },
1446
+ {
1447
+ "epoch": 5.97,
1448
+ "learning_rate": 3.1226378347036334e-05,
1449
+ "loss": 0.0181,
1450
+ "step": 233
1451
+ },
1452
+ {
1453
+ "epoch": 6.0,
1454
+ "learning_rate": 3.114978878685771e-05,
1455
+ "loss": 0.0241,
1456
+ "step": 234
1457
+ },
1458
+ {
1459
+ "epoch": 6.03,
1460
+ "learning_rate": 3.1072961242663635e-05,
1461
+ "loss": 0.0094,
1462
+ "step": 235
1463
+ },
1464
+ {
1465
+ "epoch": 6.05,
1466
+ "learning_rate": 3.0995897354281347e-05,
1467
+ "loss": 0.0085,
1468
+ "step": 236
1469
+ },
1470
+ {
1471
+ "epoch": 6.08,
1472
+ "learning_rate": 3.091859876658269e-05,
1473
+ "loss": 0.0178,
1474
+ "step": 237
1475
+ },
1476
+ {
1477
+ "epoch": 6.1,
1478
+ "learning_rate": 3.084106712944899e-05,
1479
+ "loss": 0.013,
1480
+ "step": 238
1481
+ },
1482
+ {
1483
+ "epoch": 6.13,
1484
+ "learning_rate": 3.076330409773584e-05,
1485
+ "loss": 0.0132,
1486
+ "step": 239
1487
+ },
1488
+ {
1489
+ "epoch": 6.15,
1490
+ "learning_rate": 3.068531133123777e-05,
1491
+ "loss": 0.0136,
1492
+ "step": 240
1493
+ },
1494
+ {
1495
+ "epoch": 6.18,
1496
+ "learning_rate": 3.060709049465285e-05,
1497
+ "loss": 0.0089,
1498
+ "step": 241
1499
+ },
1500
+ {
1501
+ "epoch": 6.21,
1502
+ "learning_rate": 3.052864325754712e-05,
1503
+ "loss": 0.0155,
1504
+ "step": 242
1505
+ },
1506
+ {
1507
+ "epoch": 6.23,
1508
+ "learning_rate": 3.0449971294318977e-05,
1509
+ "loss": 0.0156,
1510
+ "step": 243
1511
+ },
1512
+ {
1513
+ "epoch": 6.26,
1514
+ "learning_rate": 3.0371076284163442e-05,
1515
+ "loss": 0.0181,
1516
+ "step": 244
1517
+ },
1518
+ {
1519
+ "epoch": 6.28,
1520
+ "learning_rate": 3.0291959911036293e-05,
1521
+ "loss": 0.0136,
1522
+ "step": 245
1523
+ },
1524
+ {
1525
+ "epoch": 6.31,
1526
+ "learning_rate": 3.021262386361814e-05,
1527
+ "loss": 0.0094,
1528
+ "step": 246
1529
+ },
1530
+ {
1531
+ "epoch": 6.33,
1532
+ "learning_rate": 3.013306983527839e-05,
1533
+ "loss": 0.0117,
1534
+ "step": 247
1535
+ },
1536
+ {
1537
+ "epoch": 6.36,
1538
+ "learning_rate": 3.0053299524039077e-05,
1539
+ "loss": 0.0126,
1540
+ "step": 248
1541
+ },
1542
+ {
1543
+ "epoch": 6.38,
1544
+ "learning_rate": 2.997331463253864e-05,
1545
+ "loss": 0.01,
1546
+ "step": 249
1547
+ },
1548
+ {
1549
+ "epoch": 6.41,
1550
+ "learning_rate": 2.9893116867995583e-05,
1551
+ "loss": 0.0129,
1552
+ "step": 250
1553
+ },
1554
+ {
1555
+ "epoch": 6.44,
1556
+ "learning_rate": 2.981270794217201e-05,
1557
+ "loss": 0.0068,
1558
+ "step": 251
1559
+ },
1560
+ {
1561
+ "epoch": 6.46,
1562
+ "learning_rate": 2.9732089571337126e-05,
1563
+ "loss": 0.0077,
1564
+ "step": 252
1565
+ },
1566
+ {
1567
+ "epoch": 6.49,
1568
+ "learning_rate": 2.9651263476230577e-05,
1569
+ "loss": 0.0066,
1570
+ "step": 253
1571
+ },
1572
+ {
1573
+ "epoch": 6.51,
1574
+ "learning_rate": 2.9570231382025732e-05,
1575
+ "loss": 0.0044,
1576
+ "step": 254
1577
+ },
1578
+ {
1579
+ "epoch": 6.54,
1580
+ "learning_rate": 2.9488995018292854e-05,
1581
+ "loss": 0.0123,
1582
+ "step": 255
1583
+ },
1584
+ {
1585
+ "epoch": 6.56,
1586
+ "learning_rate": 2.9407556118962192e-05,
1587
+ "loss": 0.0119,
1588
+ "step": 256
1589
+ },
1590
+ {
1591
+ "epoch": 6.59,
1592
+ "learning_rate": 2.932591642228696e-05,
1593
+ "loss": 0.0141,
1594
+ "step": 257
1595
+ },
1596
+ {
1597
+ "epoch": 6.62,
1598
+ "learning_rate": 2.924407767080627e-05,
1599
+ "loss": 0.0075,
1600
+ "step": 258
1601
+ },
1602
+ {
1603
+ "epoch": 6.64,
1604
+ "learning_rate": 2.9162041611307868e-05,
1605
+ "loss": 0.0106,
1606
+ "step": 259
1607
+ },
1608
+ {
1609
+ "epoch": 6.67,
1610
+ "learning_rate": 2.9079809994790937e-05,
1611
+ "loss": 0.0152,
1612
+ "step": 260
1613
+ },
1614
+ {
1615
+ "epoch": 6.69,
1616
+ "learning_rate": 2.899738457642866e-05,
1617
+ "loss": 0.0167,
1618
+ "step": 261
1619
+ },
1620
+ {
1621
+ "epoch": 6.72,
1622
+ "learning_rate": 2.891476711553077e-05,
1623
+ "loss": 0.0072,
1624
+ "step": 262
1625
+ },
1626
+ {
1627
+ "epoch": 6.74,
1628
+ "learning_rate": 2.883195937550602e-05,
1629
+ "loss": 0.0107,
1630
+ "step": 263
1631
+ },
1632
+ {
1633
+ "epoch": 6.77,
1634
+ "learning_rate": 2.8748963123824532e-05,
1635
+ "loss": 0.0088,
1636
+ "step": 264
1637
+ },
1638
+ {
1639
+ "epoch": 6.79,
1640
+ "learning_rate": 2.8665780131980054e-05,
1641
+ "loss": 0.0103,
1642
+ "step": 265
1643
+ },
1644
+ {
1645
+ "epoch": 6.82,
1646
+ "learning_rate": 2.858241217545218e-05,
1647
+ "loss": 0.0219,
1648
+ "step": 266
1649
+ },
1650
+ {
1651
+ "epoch": 6.85,
1652
+ "learning_rate": 2.8498861033668444e-05,
1653
+ "loss": 0.0107,
1654
+ "step": 267
1655
+ },
1656
+ {
1657
+ "epoch": 6.87,
1658
+ "learning_rate": 2.8415128489966308e-05,
1659
+ "loss": 0.0088,
1660
+ "step": 268
1661
+ },
1662
+ {
1663
+ "epoch": 6.9,
1664
+ "learning_rate": 2.8331216331555148e-05,
1665
+ "loss": 0.0171,
1666
+ "step": 269
1667
+ },
1668
+ {
1669
+ "epoch": 6.92,
1670
+ "learning_rate": 2.8247126349478073e-05,
1671
+ "loss": 0.0102,
1672
+ "step": 270
1673
+ },
1674
+ {
1675
+ "epoch": 6.95,
1676
+ "learning_rate": 2.8162860338573718e-05,
1677
+ "loss": 0.0172,
1678
+ "step": 271
1679
+ },
1680
+ {
1681
+ "epoch": 6.97,
1682
+ "learning_rate": 2.80784200974379e-05,
1683
+ "loss": 0.0089,
1684
+ "step": 272
1685
+ },
1686
+ {
1687
+ "epoch": 7.0,
1688
+ "learning_rate": 2.7993807428385285e-05,
1689
+ "loss": 0.0048,
1690
+ "step": 273
1691
+ },
1692
+ {
1693
+ "epoch": 7.03,
1694
+ "learning_rate": 2.790902413741085e-05,
1695
+ "loss": 0.0098,
1696
+ "step": 274
1697
+ },
1698
+ {
1699
+ "epoch": 7.05,
1700
+ "learning_rate": 2.7824072034151406e-05,
1701
+ "loss": 0.0252,
1702
+ "step": 275
1703
+ },
1704
+ {
1705
+ "epoch": 7.08,
1706
+ "learning_rate": 2.773895293184691e-05,
1707
+ "loss": 0.0053,
1708
+ "step": 276
1709
+ },
1710
+ {
1711
+ "epoch": 7.1,
1712
+ "learning_rate": 2.7653668647301797e-05,
1713
+ "loss": 0.0202,
1714
+ "step": 277
1715
+ },
1716
+ {
1717
+ "epoch": 7.13,
1718
+ "learning_rate": 2.756822100084621e-05,
1719
+ "loss": 0.012,
1720
+ "step": 278
1721
+ },
1722
+ {
1723
+ "epoch": 7.15,
1724
+ "learning_rate": 2.748261181629711e-05,
1725
+ "loss": 0.0082,
1726
+ "step": 279
1727
+ },
1728
+ {
1729
+ "epoch": 7.18,
1730
+ "learning_rate": 2.7396842920919384e-05,
1731
+ "loss": 0.0163,
1732
+ "step": 280
1733
+ },
1734
+ {
1735
+ "epoch": 7.21,
1736
+ "learning_rate": 2.7310916145386826e-05,
1737
+ "loss": 0.0186,
1738
+ "step": 281
1739
+ },
1740
+ {
1741
+ "epoch": 7.23,
1742
+ "learning_rate": 2.7224833323743064e-05,
1743
+ "loss": 0.0088,
1744
+ "step": 282
1745
+ },
1746
+ {
1747
+ "epoch": 7.26,
1748
+ "learning_rate": 2.7138596293362404e-05,
1749
+ "loss": 0.0137,
1750
+ "step": 283
1751
+ },
1752
+ {
1753
+ "epoch": 7.28,
1754
+ "learning_rate": 2.7052206894910653e-05,
1755
+ "loss": 0.0086,
1756
+ "step": 284
1757
+ },
1758
+ {
1759
+ "epoch": 7.31,
1760
+ "learning_rate": 2.696566697230577e-05,
1761
+ "loss": 0.0063,
1762
+ "step": 285
1763
+ },
1764
+ {
1765
+ "epoch": 7.33,
1766
+ "learning_rate": 2.6878978372678567e-05,
1767
+ "loss": 0.0114,
1768
+ "step": 286
1769
+ },
1770
+ {
1771
+ "epoch": 7.36,
1772
+ "learning_rate": 2.6792142946333227e-05,
1773
+ "loss": 0.0104,
1774
+ "step": 287
1775
+ },
1776
+ {
1777
+ "epoch": 7.38,
1778
+ "learning_rate": 2.670516254670788e-05,
1779
+ "loss": 0.0073,
1780
+ "step": 288
1781
+ },
1782
+ {
1783
+ "epoch": 7.41,
1784
+ "learning_rate": 2.6618039030334962e-05,
1785
+ "loss": 0.0112,
1786
+ "step": 289
1787
+ },
1788
+ {
1789
+ "epoch": 7.44,
1790
+ "learning_rate": 2.6530774256801666e-05,
1791
+ "loss": 0.004,
1792
+ "step": 290
1793
+ },
1794
+ {
1795
+ "epoch": 7.46,
1796
+ "learning_rate": 2.6443370088710197e-05,
1797
+ "loss": 0.0045,
1798
+ "step": 291
1799
+ },
1800
+ {
1801
+ "epoch": 7.49,
1802
+ "learning_rate": 2.6355828391638036e-05,
1803
+ "loss": 0.008,
1804
+ "step": 292
1805
+ },
1806
+ {
1807
+ "epoch": 7.51,
1808
+ "learning_rate": 2.6268151034098117e-05,
1809
+ "loss": 0.0069,
1810
+ "step": 293
1811
+ },
1812
+ {
1813
+ "epoch": 7.54,
1814
+ "learning_rate": 2.618033988749895e-05,
1815
+ "loss": 0.0086,
1816
+ "step": 294
1817
+ },
1818
+ {
1819
+ "epoch": 7.56,
1820
+ "learning_rate": 2.6092396826104674e-05,
1821
+ "loss": 0.0062,
1822
+ "step": 295
1823
+ },
1824
+ {
1825
+ "epoch": 7.59,
1826
+ "learning_rate": 2.6004323726995057e-05,
1827
+ "loss": 0.0085,
1828
+ "step": 296
1829
+ },
1830
+ {
1831
+ "epoch": 7.62,
1832
+ "learning_rate": 2.5916122470025414e-05,
1833
+ "loss": 0.0078,
1834
+ "step": 297
1835
+ },
1836
+ {
1837
+ "epoch": 7.64,
1838
+ "learning_rate": 2.5827794937786497e-05,
1839
+ "loss": 0.0051,
1840
+ "step": 298
1841
+ },
1842
+ {
1843
+ "epoch": 7.67,
1844
+ "learning_rate": 2.573934301556432e-05,
1845
+ "loss": 0.009,
1846
+ "step": 299
1847
+ },
1848
+ {
1849
+ "epoch": 7.69,
1850
+ "learning_rate": 2.5650768591299905e-05,
1851
+ "loss": 0.0063,
1852
+ "step": 300
1853
+ },
1854
+ {
1855
+ "epoch": 7.72,
1856
+ "learning_rate": 2.5562073555548984e-05,
1857
+ "loss": 0.0052,
1858
+ "step": 301
1859
+ },
1860
+ {
1861
+ "epoch": 7.74,
1862
+ "learning_rate": 2.5473259801441663e-05,
1863
+ "loss": 0.0064,
1864
+ "step": 302
1865
+ },
1866
+ {
1867
+ "epoch": 7.77,
1868
+ "learning_rate": 2.5384329224641994e-05,
1869
+ "loss": 0.0069,
1870
+ "step": 303
1871
+ },
1872
+ {
1873
+ "epoch": 7.79,
1874
+ "learning_rate": 2.5295283723307517e-05,
1875
+ "loss": 0.0129,
1876
+ "step": 304
1877
+ },
1878
+ {
1879
+ "epoch": 7.82,
1880
+ "learning_rate": 2.520612519804878e-05,
1881
+ "loss": 0.0032,
1882
+ "step": 305
1883
+ },
1884
+ {
1885
+ "epoch": 7.85,
1886
+ "learning_rate": 2.5116855551888715e-05,
1887
+ "loss": 0.0026,
1888
+ "step": 306
1889
+ },
1890
+ {
1891
+ "epoch": 7.87,
1892
+ "learning_rate": 2.5027476690222058e-05,
1893
+ "loss": 0.0063,
1894
+ "step": 307
1895
+ },
1896
+ {
1897
+ "epoch": 7.9,
1898
+ "learning_rate": 2.4937990520774664e-05,
1899
+ "loss": 0.0089,
1900
+ "step": 308
1901
+ },
1902
+ {
1903
+ "epoch": 7.92,
1904
+ "learning_rate": 2.4848398953562806e-05,
1905
+ "loss": 0.004,
1906
+ "step": 309
1907
+ },
1908
+ {
1909
+ "epoch": 7.95,
1910
+ "learning_rate": 2.4758703900852376e-05,
1911
+ "loss": 0.019,
1912
+ "step": 310
1913
+ },
1914
+ {
1915
+ "epoch": 7.97,
1916
+ "learning_rate": 2.4668907277118114e-05,
1917
+ "loss": 0.0107,
1918
+ "step": 311
1919
+ },
1920
+ {
1921
+ "epoch": 8.0,
1922
+ "learning_rate": 2.4579010999002683e-05,
1923
+ "loss": 0.015,
1924
+ "step": 312
1925
+ },
1926
+ {
1927
+ "epoch": 8.03,
1928
+ "learning_rate": 2.448901698527583e-05,
1929
+ "loss": 0.0033,
1930
+ "step": 313
1931
+ },
1932
+ {
1933
+ "epoch": 8.05,
1934
+ "learning_rate": 2.4398927156793376e-05,
1935
+ "loss": 0.0044,
1936
+ "step": 314
1937
+ },
1938
+ {
1939
+ "epoch": 8.08,
1940
+ "learning_rate": 2.4308743436456238e-05,
1941
+ "loss": 0.0022,
1942
+ "step": 315
1943
+ },
1944
+ {
1945
+ "epoch": 8.1,
1946
+ "learning_rate": 2.42184677491694e-05,
1947
+ "loss": 0.0025,
1948
+ "step": 316
1949
+ },
1950
+ {
1951
+ "epoch": 8.13,
1952
+ "learning_rate": 2.4128102021800794e-05,
1953
+ "loss": 0.0037,
1954
+ "step": 317
1955
+ },
1956
+ {
1957
+ "epoch": 8.15,
1958
+ "learning_rate": 2.4037648183140205e-05,
1959
+ "loss": 0.003,
1960
+ "step": 318
1961
+ },
1962
+ {
1963
+ "epoch": 8.18,
1964
+ "learning_rate": 2.3947108163858085e-05,
1965
+ "loss": 0.0044,
1966
+ "step": 319
1967
+ },
1968
+ {
1969
+ "epoch": 8.21,
1970
+ "learning_rate": 2.385648389646434e-05,
1971
+ "loss": 0.0094,
1972
+ "step": 320
1973
+ },
1974
+ {
1975
+ "epoch": 8.23,
1976
+ "learning_rate": 2.37657773152671e-05,
1977
+ "loss": 0.0162,
1978
+ "step": 321
1979
+ },
1980
+ {
1981
+ "epoch": 8.26,
1982
+ "learning_rate": 2.367499035633141e-05,
1983
+ "loss": 0.007,
1984
+ "step": 322
1985
+ },
1986
+ {
1987
+ "epoch": 8.28,
1988
+ "learning_rate": 2.358412495743794e-05,
1989
+ "loss": 0.0039,
1990
+ "step": 323
1991
+ },
1992
+ {
1993
+ "epoch": 8.31,
1994
+ "learning_rate": 2.3493183058041578e-05,
1995
+ "loss": 0.006,
1996
+ "step": 324
1997
+ },
1998
+ {
1999
+ "epoch": 8.33,
2000
+ "learning_rate": 2.340216659923008e-05,
2001
+ "loss": 0.0022,
2002
+ "step": 325
2003
+ },
2004
+ {
2005
+ "epoch": 8.36,
2006
+ "learning_rate": 2.33110775236826e-05,
2007
+ "loss": 0.0083,
2008
+ "step": 326
2009
+ },
2010
+ {
2011
+ "epoch": 8.38,
2012
+ "learning_rate": 2.321991777562826e-05,
2013
+ "loss": 0.0064,
2014
+ "step": 327
2015
+ },
2016
+ {
2017
+ "epoch": 8.41,
2018
+ "learning_rate": 2.312868930080462e-05,
2019
+ "loss": 0.0038,
2020
+ "step": 328
2021
+ },
2022
+ {
2023
+ "epoch": 8.44,
2024
+ "learning_rate": 2.303739404641617e-05,
2025
+ "loss": 0.0039,
2026
+ "step": 329
2027
+ },
2028
+ {
2029
+ "epoch": 8.46,
2030
+ "learning_rate": 2.2946033961092754e-05,
2031
+ "loss": 0.0039,
2032
+ "step": 330
2033
+ },
2034
+ {
2035
+ "epoch": 8.49,
2036
+ "learning_rate": 2.285461099484799e-05,
2037
+ "loss": 0.009,
2038
+ "step": 331
2039
+ },
2040
+ {
2041
+ "epoch": 8.51,
2042
+ "learning_rate": 2.2763127099037646e-05,
2043
+ "loss": 0.0046,
2044
+ "step": 332
2045
+ },
2046
+ {
2047
+ "epoch": 8.54,
2048
+ "learning_rate": 2.267158422631798e-05,
2049
+ "loss": 0.0055,
2050
+ "step": 333
2051
+ },
2052
+ {
2053
+ "epoch": 8.56,
2054
+ "learning_rate": 2.257998433060407e-05,
2055
+ "loss": 0.0048,
2056
+ "step": 334
2057
+ },
2058
+ {
2059
+ "epoch": 8.59,
2060
+ "learning_rate": 2.248832936702811e-05,
2061
+ "loss": 0.0032,
2062
+ "step": 335
2063
+ },
2064
+ {
2065
+ "epoch": 8.62,
2066
+ "learning_rate": 2.2396621291897666e-05,
2067
+ "loss": 0.0031,
2068
+ "step": 336
2069
+ },
2070
+ {
2071
+ "epoch": 8.64,
2072
+ "learning_rate": 2.2304862062653956e-05,
2073
+ "loss": 0.0036,
2074
+ "step": 337
2075
+ },
2076
+ {
2077
+ "epoch": 8.67,
2078
+ "learning_rate": 2.2213053637830016e-05,
2079
+ "loss": 0.0018,
2080
+ "step": 338
2081
+ },
2082
+ {
2083
+ "epoch": 8.69,
2084
+ "learning_rate": 2.2121197977008953e-05,
2085
+ "loss": 0.0057,
2086
+ "step": 339
2087
+ },
2088
+ {
2089
+ "epoch": 8.72,
2090
+ "learning_rate": 2.2029297040782063e-05,
2091
+ "loss": 0.0043,
2092
+ "step": 340
2093
+ },
2094
+ {
2095
+ "epoch": 8.74,
2096
+ "learning_rate": 2.1937352790707028e-05,
2097
+ "loss": 0.0063,
2098
+ "step": 341
2099
+ },
2100
+ {
2101
+ "epoch": 8.77,
2102
+ "learning_rate": 2.184536718926604e-05,
2103
+ "loss": 0.0068,
2104
+ "step": 342
2105
+ },
2106
+ {
2107
+ "epoch": 8.79,
2108
+ "learning_rate": 2.1753342199823894e-05,
2109
+ "loss": 0.0032,
2110
+ "step": 343
2111
+ },
2112
+ {
2113
+ "epoch": 8.82,
2114
+ "learning_rate": 2.166127978658608e-05,
2115
+ "loss": 0.003,
2116
+ "step": 344
2117
+ },
2118
+ {
2119
+ "epoch": 8.85,
2120
+ "learning_rate": 2.1569181914556904e-05,
2121
+ "loss": 0.0091,
2122
+ "step": 345
2123
+ },
2124
+ {
2125
+ "epoch": 8.87,
2126
+ "learning_rate": 2.147705054949748e-05,
2127
+ "loss": 0.0047,
2128
+ "step": 346
2129
+ },
2130
+ {
2131
+ "epoch": 8.9,
2132
+ "learning_rate": 2.1384887657883836e-05,
2133
+ "loss": 0.0019,
2134
+ "step": 347
2135
+ },
2136
+ {
2137
+ "epoch": 8.92,
2138
+ "learning_rate": 2.1292695206864887e-05,
2139
+ "loss": 0.0036,
2140
+ "step": 348
2141
+ },
2142
+ {
2143
+ "epoch": 8.92,
2144
+ "eval_test_accuracy": 83.89423076923077,
2145
+ "eval_test_average": 83.77504585208074,
2146
+ "eval_test_loss": 1.025055170059204,
2147
+ "eval_test_recall": 83.6558609349307,
2148
+ "eval_test_runtime": 778.8006,
2149
+ "eval_test_samples_per_second": 1.362,
2150
+ "eval_test_steps_per_second": 0.006,
2151
+ "step": 348
2152
+ },
2153
+ {
2154
+ "epoch": 8.92,
2155
+ "eval_val_accuracy": 83.88625592417061,
2156
+ "eval_val_average": 84.50143402474953,
2157
+ "eval_val_loss": 1.0246869325637817,
2158
+ "eval_val_recall": 85.11661212532843,
2159
+ "eval_val_runtime": 801.1127,
2160
+ "eval_val_samples_per_second": 1.314,
2161
+ "eval_val_steps_per_second": 0.006,
2162
+ "step": 348
2163
+ },
2164
+ {
2165
+ "epoch": 8.95,
2166
+ "learning_rate": 2.1200475164220488e-05,
2167
+ "loss": 0.0042,
2168
+ "step": 349
2169
+ },
2170
+ {
2171
+ "epoch": 8.97,
2172
+ "learning_rate": 2.11082294983194e-05,
2173
+ "loss": 0.0038,
2174
+ "step": 350
2175
+ },
2176
+ {
2177
+ "epoch": 9.0,
2178
+ "learning_rate": 2.1015960178077317e-05,
2179
+ "loss": 0.0019,
2180
+ "step": 351
2181
+ },
2182
+ {
2183
+ "epoch": 9.03,
2184
+ "learning_rate": 2.0923669172914796e-05,
2185
+ "loss": 0.0005,
2186
+ "step": 352
2187
+ },
2188
+ {
2189
+ "epoch": 9.05,
2190
+ "learning_rate": 2.0831358452715255e-05,
2191
+ "loss": 0.0036,
2192
+ "step": 353
2193
+ },
2194
+ {
2195
+ "epoch": 9.08,
2196
+ "learning_rate": 2.0739029987782903e-05,
2197
+ "loss": 0.001,
2198
+ "step": 354
2199
+ },
2200
+ {
2201
+ "epoch": 9.1,
2202
+ "learning_rate": 2.0646685748800716e-05,
2203
+ "loss": 0.0015,
2204
+ "step": 355
2205
+ },
2206
+ {
2207
+ "epoch": 9.13,
2208
+ "learning_rate": 2.055432770678833e-05,
2209
+ "loss": 0.0061,
2210
+ "step": 356
2211
+ },
2212
+ {
2213
+ "epoch": 9.15,
2214
+ "learning_rate": 2.0461957833060025e-05,
2215
+ "loss": 0.0025,
2216
+ "step": 357
2217
+ },
2218
+ {
2219
+ "epoch": 9.18,
2220
+ "learning_rate": 2.03695780991826e-05,
2221
+ "loss": 0.001,
2222
+ "step": 358
2223
+ },
2224
+ {
2225
+ "epoch": 9.21,
2226
+ "learning_rate": 2.0277190476933318e-05,
2227
+ "loss": 0.0037,
2228
+ "step": 359
2229
+ },
2230
+ {
2231
+ "epoch": 9.23,
2232
+ "learning_rate": 2.018479693825782e-05,
2233
+ "loss": 0.0011,
2234
+ "step": 360
2235
+ },
2236
+ {
2237
+ "epoch": 9.26,
2238
+ "learning_rate": 2.009239945522801e-05,
2239
+ "loss": 0.005,
2240
+ "step": 361
2241
+ },
2242
+ {
2243
+ "epoch": 9.28,
2244
+ "learning_rate": 2e-05,
2245
+ "loss": 0.0045,
2246
+ "step": 362
2247
+ },
2248
+ {
2249
+ "epoch": 9.31,
2250
+ "learning_rate": 1.9907600544771994e-05,
2251
+ "loss": 0.003,
2252
+ "step": 363
2253
+ },
2254
+ {
2255
+ "epoch": 9.33,
2256
+ "learning_rate": 1.9815203061742188e-05,
2257
+ "loss": 0.0032,
2258
+ "step": 364
2259
+ },
2260
+ {
2261
+ "epoch": 9.36,
2262
+ "learning_rate": 1.9722809523066685e-05,
2263
+ "loss": 0.0088,
2264
+ "step": 365
2265
+ },
2266
+ {
2267
+ "epoch": 9.38,
2268
+ "learning_rate": 1.9630421900817407e-05,
2269
+ "loss": 0.0009,
2270
+ "step": 366
2271
+ },
2272
+ {
2273
+ "epoch": 9.41,
2274
+ "learning_rate": 1.9538042166939982e-05,
2275
+ "loss": 0.0043,
2276
+ "step": 367
2277
+ },
2278
+ {
2279
+ "epoch": 9.44,
2280
+ "learning_rate": 1.9445672293211675e-05,
2281
+ "loss": 0.0029,
2282
+ "step": 368
2283
+ },
2284
+ {
2285
+ "epoch": 9.46,
2286
+ "learning_rate": 1.9353314251199298e-05,
2287
+ "loss": 0.0022,
2288
+ "step": 369
2289
+ },
2290
+ {
2291
+ "epoch": 9.49,
2292
+ "learning_rate": 1.9260970012217107e-05,
2293
+ "loss": 0.0012,
2294
+ "step": 370
2295
+ },
2296
+ {
2297
+ "epoch": 9.51,
2298
+ "learning_rate": 1.916864154728476e-05,
2299
+ "loss": 0.0037,
2300
+ "step": 371
2301
+ },
2302
+ {
2303
+ "epoch": 9.54,
2304
+ "learning_rate": 1.9076330827085214e-05,
2305
+ "loss": 0.006,
2306
+ "step": 372
2307
+ },
2308
+ {
2309
+ "epoch": 9.56,
2310
+ "learning_rate": 1.898403982192269e-05,
2311
+ "loss": 0.0051,
2312
+ "step": 373
2313
+ },
2314
+ {
2315
+ "epoch": 9.59,
2316
+ "learning_rate": 1.8891770501680602e-05,
2317
+ "loss": 0.0006,
2318
+ "step": 374
2319
+ },
2320
+ {
2321
+ "epoch": 9.62,
2322
+ "learning_rate": 1.879952483577952e-05,
2323
+ "loss": 0.002,
2324
+ "step": 375
2325
+ },
2326
+ {
2327
+ "epoch": 9.64,
2328
+ "learning_rate": 1.8707304793135117e-05,
2329
+ "loss": 0.0023,
2330
+ "step": 376
2331
+ },
2332
+ {
2333
+ "epoch": 9.67,
2334
+ "learning_rate": 1.861511234211617e-05,
2335
+ "loss": 0.0031,
2336
+ "step": 377
2337
+ },
2338
+ {
2339
+ "epoch": 9.69,
2340
+ "learning_rate": 1.8522949450502522e-05,
2341
+ "loss": 0.0057,
2342
+ "step": 378
2343
+ },
2344
+ {
2345
+ "epoch": 9.72,
2346
+ "learning_rate": 1.8430818085443106e-05,
2347
+ "loss": 0.0008,
2348
+ "step": 379
2349
+ },
2350
+ {
2351
+ "epoch": 9.74,
2352
+ "learning_rate": 1.8338720213413924e-05,
2353
+ "loss": 0.0014,
2354
+ "step": 380
2355
+ },
2356
+ {
2357
+ "epoch": 9.77,
2358
+ "learning_rate": 1.8246657800176116e-05,
2359
+ "loss": 0.001,
2360
+ "step": 381
2361
+ },
2362
+ {
2363
+ "epoch": 9.79,
2364
+ "learning_rate": 1.815463281073396e-05,
2365
+ "loss": 0.0013,
2366
+ "step": 382
2367
+ },
2368
+ {
2369
+ "epoch": 9.82,
2370
+ "learning_rate": 1.8062647209292975e-05,
2371
+ "loss": 0.0014,
2372
+ "step": 383
2373
+ },
2374
+ {
2375
+ "epoch": 9.85,
2376
+ "learning_rate": 1.7970702959217944e-05,
2377
+ "loss": 0.009,
2378
+ "step": 384
2379
+ },
2380
+ {
2381
+ "epoch": 9.87,
2382
+ "learning_rate": 1.7878802022991054e-05,
2383
+ "loss": 0.0038,
2384
+ "step": 385
2385
+ },
2386
+ {
2387
+ "epoch": 9.9,
2388
+ "learning_rate": 1.7786946362169987e-05,
2389
+ "loss": 0.0041,
2390
+ "step": 386
2391
+ },
2392
+ {
2393
+ "epoch": 9.92,
2394
+ "learning_rate": 1.769513793734605e-05,
2395
+ "loss": 0.0014,
2396
+ "step": 387
2397
+ },
2398
+ {
2399
+ "epoch": 9.95,
2400
+ "learning_rate": 1.760337870810234e-05,
2401
+ "loss": 0.0006,
2402
+ "step": 388
2403
+ },
2404
+ {
2405
+ "epoch": 9.97,
2406
+ "learning_rate": 1.75116706329719e-05,
2407
+ "loss": 0.0009,
2408
+ "step": 389
2409
+ },
2410
+ {
2411
+ "epoch": 10.0,
2412
+ "learning_rate": 1.742001566939594e-05,
2413
+ "loss": 0.0011,
2414
+ "step": 390
2415
+ },
2416
+ {
2417
+ "epoch": 10.03,
2418
+ "learning_rate": 1.7328415773682028e-05,
2419
+ "loss": 0.0002,
2420
+ "step": 391
2421
+ },
2422
+ {
2423
+ "epoch": 10.05,
2424
+ "learning_rate": 1.7236872900962364e-05,
2425
+ "loss": 0.0008,
2426
+ "step": 392
2427
+ },
2428
+ {
2429
+ "epoch": 10.08,
2430
+ "learning_rate": 1.7145389005152017e-05,
2431
+ "loss": 0.0005,
2432
+ "step": 393
2433
+ },
2434
+ {
2435
+ "epoch": 10.1,
2436
+ "learning_rate": 1.705396603890725e-05,
2437
+ "loss": 0.0007,
2438
+ "step": 394
2439
+ },
2440
+ {
2441
+ "epoch": 10.13,
2442
+ "learning_rate": 1.6962605953583833e-05,
2443
+ "loss": 0.0011,
2444
+ "step": 395
2445
+ },
2446
+ {
2447
+ "epoch": 10.15,
2448
+ "learning_rate": 1.687131069919538e-05,
2449
+ "loss": 0.0014,
2450
+ "step": 396
2451
+ },
2452
+ {
2453
+ "epoch": 10.18,
2454
+ "learning_rate": 1.678008222437174e-05,
2455
+ "loss": 0.0006,
2456
+ "step": 397
2457
+ },
2458
+ {
2459
+ "epoch": 10.21,
2460
+ "learning_rate": 1.66889224763174e-05,
2461
+ "loss": 0.0004,
2462
+ "step": 398
2463
+ },
2464
+ {
2465
+ "epoch": 10.23,
2466
+ "learning_rate": 1.659783340076992e-05,
2467
+ "loss": 0.002,
2468
+ "step": 399
2469
+ },
2470
+ {
2471
+ "epoch": 10.26,
2472
+ "learning_rate": 1.6506816941958425e-05,
2473
+ "loss": 0.0017,
2474
+ "step": 400
2475
+ },
2476
+ {
2477
+ "epoch": 10.28,
2478
+ "learning_rate": 1.6415875042562065e-05,
2479
+ "loss": 0.001,
2480
+ "step": 401
2481
+ },
2482
+ {
2483
+ "epoch": 10.31,
2484
+ "learning_rate": 1.6325009643668592e-05,
2485
+ "loss": 0.0005,
2486
+ "step": 402
2487
+ },
2488
+ {
2489
+ "epoch": 10.33,
2490
+ "learning_rate": 1.6234222684732908e-05,
2491
+ "loss": 0.0002,
2492
+ "step": 403
2493
+ },
2494
+ {
2495
+ "epoch": 10.36,
2496
+ "learning_rate": 1.6143516103535666e-05,
2497
+ "loss": 0.0002,
2498
+ "step": 404
2499
+ },
2500
+ {
2501
+ "epoch": 10.38,
2502
+ "learning_rate": 1.6052891836141925e-05,
2503
+ "loss": 0.0003,
2504
+ "step": 405
2505
+ },
2506
+ {
2507
+ "epoch": 10.41,
2508
+ "learning_rate": 1.59623518168598e-05,
2509
+ "loss": 0.0003,
2510
+ "step": 406
2511
+ },
2512
+ {
2513
+ "epoch": 10.44,
2514
+ "learning_rate": 1.5871897978199213e-05,
2515
+ "loss": 0.002,
2516
+ "step": 407
2517
+ },
2518
+ {
2519
+ "epoch": 10.46,
2520
+ "learning_rate": 1.578153225083061e-05,
2521
+ "loss": 0.0002,
2522
+ "step": 408
2523
+ },
2524
+ {
2525
+ "epoch": 10.49,
2526
+ "learning_rate": 1.569125656354377e-05,
2527
+ "loss": 0.003,
2528
+ "step": 409
2529
+ },
2530
+ {
2531
+ "epoch": 10.51,
2532
+ "learning_rate": 1.5601072843206634e-05,
2533
+ "loss": 0.0003,
2534
+ "step": 410
2535
+ },
2536
+ {
2537
+ "epoch": 10.54,
2538
+ "learning_rate": 1.5510983014724178e-05,
2539
+ "loss": 0.0005,
2540
+ "step": 411
2541
+ },
2542
+ {
2543
+ "epoch": 10.56,
2544
+ "learning_rate": 1.5420989000997324e-05,
2545
+ "loss": 0.0005,
2546
+ "step": 412
2547
+ },
2548
+ {
2549
+ "epoch": 10.59,
2550
+ "learning_rate": 1.53310927228819e-05,
2551
+ "loss": 0.0011,
2552
+ "step": 413
2553
+ },
2554
+ {
2555
+ "epoch": 10.62,
2556
+ "learning_rate": 1.524129609914763e-05,
2557
+ "loss": 0.0025,
2558
+ "step": 414
2559
+ },
2560
+ {
2561
+ "epoch": 10.64,
2562
+ "learning_rate": 1.5151601046437206e-05,
2563
+ "loss": 0.0005,
2564
+ "step": 415
2565
+ },
2566
+ {
2567
+ "epoch": 10.67,
2568
+ "learning_rate": 1.5062009479225336e-05,
2569
+ "loss": 0.0003,
2570
+ "step": 416
2571
+ },
2572
+ {
2573
+ "epoch": 10.69,
2574
+ "learning_rate": 1.4972523309777947e-05,
2575
+ "loss": 0.0006,
2576
+ "step": 417
2577
+ },
2578
+ {
2579
+ "epoch": 10.72,
2580
+ "learning_rate": 1.4883144448111288e-05,
2581
+ "loss": 0.0008,
2582
+ "step": 418
2583
+ },
2584
+ {
2585
+ "epoch": 10.74,
2586
+ "learning_rate": 1.4793874801951221e-05,
2587
+ "loss": 0.0007,
2588
+ "step": 419
2589
+ },
2590
+ {
2591
+ "epoch": 10.77,
2592
+ "learning_rate": 1.4704716276692483e-05,
2593
+ "loss": 0.0002,
2594
+ "step": 420
2595
+ },
2596
+ {
2597
+ "epoch": 10.79,
2598
+ "learning_rate": 1.4615670775358015e-05,
2599
+ "loss": 0.0013,
2600
+ "step": 421
2601
+ },
2602
+ {
2603
+ "epoch": 10.82,
2604
+ "learning_rate": 1.4526740198558345e-05,
2605
+ "loss": 0.0002,
2606
+ "step": 422
2607
+ },
2608
+ {
2609
+ "epoch": 10.85,
2610
+ "learning_rate": 1.443792644445102e-05,
2611
+ "loss": 0.0009,
2612
+ "step": 423
2613
+ },
2614
+ {
2615
+ "epoch": 10.87,
2616
+ "learning_rate": 1.43492314087001e-05,
2617
+ "loss": 0.002,
2618
+ "step": 424
2619
+ },
2620
+ {
2621
+ "epoch": 10.9,
2622
+ "learning_rate": 1.4260656984435683e-05,
2623
+ "loss": 0.0003,
2624
+ "step": 425
2625
+ },
2626
+ {
2627
+ "epoch": 10.92,
2628
+ "learning_rate": 1.417220506221351e-05,
2629
+ "loss": 0.0009,
2630
+ "step": 426
2631
+ },
2632
+ {
2633
+ "epoch": 10.95,
2634
+ "learning_rate": 1.4083877529974594e-05,
2635
+ "loss": 0.001,
2636
+ "step": 427
2637
+ },
2638
+ {
2639
+ "epoch": 10.97,
2640
+ "learning_rate": 1.3995676273004948e-05,
2641
+ "loss": 0.0002,
2642
+ "step": 428
2643
+ },
2644
+ {
2645
+ "epoch": 11.0,
2646
+ "learning_rate": 1.3907603173895327e-05,
2647
+ "loss": 0.0019,
2648
+ "step": 429
2649
+ },
2650
+ {
2651
+ "epoch": 11.03,
2652
+ "learning_rate": 1.3819660112501054e-05,
2653
+ "loss": 0.0002,
2654
+ "step": 430
2655
+ },
2656
+ {
2657
+ "epoch": 11.05,
2658
+ "learning_rate": 1.3731848965901891e-05,
2659
+ "loss": 0.0001,
2660
+ "step": 431
2661
+ },
2662
+ {
2663
+ "epoch": 11.08,
2664
+ "learning_rate": 1.364417160836197e-05,
2665
+ "loss": 0.0006,
2666
+ "step": 432
2667
+ },
2668
+ {
2669
+ "epoch": 11.1,
2670
+ "learning_rate": 1.355662991128981e-05,
2671
+ "loss": 0.0006,
2672
+ "step": 433
2673
+ },
2674
+ {
2675
+ "epoch": 11.13,
2676
+ "learning_rate": 1.3469225743198337e-05,
2677
+ "loss": 0.0001,
2678
+ "step": 434
2679
+ },
2680
+ {
2681
+ "epoch": 11.15,
2682
+ "learning_rate": 1.3381960969665041e-05,
2683
+ "loss": 0.0038,
2684
+ "step": 435
2685
+ },
2686
+ {
2687
+ "epoch": 11.18,
2688
+ "learning_rate": 1.329483745329213e-05,
2689
+ "loss": 0.0001,
2690
+ "step": 436
2691
+ },
2692
+ {
2693
+ "epoch": 11.21,
2694
+ "learning_rate": 1.3207857053666773e-05,
2695
+ "loss": 0.0004,
2696
+ "step": 437
2697
+ },
2698
+ {
2699
+ "epoch": 11.23,
2700
+ "learning_rate": 1.3121021627321438e-05,
2701
+ "loss": 0.0019,
2702
+ "step": 438
2703
+ },
2704
+ {
2705
+ "epoch": 11.26,
2706
+ "learning_rate": 1.303433302769423e-05,
2707
+ "loss": 0.0008,
2708
+ "step": 439
2709
+ },
2710
+ {
2711
+ "epoch": 11.28,
2712
+ "learning_rate": 1.2947793105089347e-05,
2713
+ "loss": 0.0002,
2714
+ "step": 440
2715
+ },
2716
+ {
2717
+ "epoch": 11.31,
2718
+ "learning_rate": 1.2861403706637594e-05,
2719
+ "loss": 0.0012,
2720
+ "step": 441
2721
+ },
2722
+ {
2723
+ "epoch": 11.33,
2724
+ "learning_rate": 1.2775166676256942e-05,
2725
+ "loss": 0.0007,
2726
+ "step": 442
2727
+ },
2728
+ {
2729
+ "epoch": 11.36,
2730
+ "learning_rate": 1.2689083854613178e-05,
2731
+ "loss": 0.0029,
2732
+ "step": 443
2733
+ },
2734
+ {
2735
+ "epoch": 11.38,
2736
+ "learning_rate": 1.260315707908062e-05,
2737
+ "loss": 0.0001,
2738
+ "step": 444
2739
+ },
2740
+ {
2741
+ "epoch": 11.41,
2742
+ "learning_rate": 1.2517388183702895e-05,
2743
+ "loss": 0.0001,
2744
+ "step": 445
2745
+ },
2746
+ {
2747
+ "epoch": 11.44,
2748
+ "learning_rate": 1.2431778999153796e-05,
2749
+ "loss": 0.0003,
2750
+ "step": 446
2751
+ },
2752
+ {
2753
+ "epoch": 11.46,
2754
+ "learning_rate": 1.2346331352698206e-05,
2755
+ "loss": 0.0002,
2756
+ "step": 447
2757
+ },
2758
+ {
2759
+ "epoch": 11.49,
2760
+ "learning_rate": 1.2261047068153098e-05,
2761
+ "loss": 0.0002,
2762
+ "step": 448
2763
+ },
2764
+ {
2765
+ "epoch": 11.51,
2766
+ "learning_rate": 1.21759279658486e-05,
2767
+ "loss": 0.0005,
2768
+ "step": 449
2769
+ },
2770
+ {
2771
+ "epoch": 11.54,
2772
+ "learning_rate": 1.2090975862589151e-05,
2773
+ "loss": 0.0001,
2774
+ "step": 450
2775
+ },
2776
+ {
2777
+ "epoch": 11.56,
2778
+ "learning_rate": 1.2006192571614723e-05,
2779
+ "loss": 0.0001,
2780
+ "step": 451
2781
+ },
2782
+ {
2783
+ "epoch": 11.59,
2784
+ "learning_rate": 1.1921579902562103e-05,
2785
+ "loss": 0.0006,
2786
+ "step": 452
2787
+ },
2788
+ {
2789
+ "epoch": 11.62,
2790
+ "learning_rate": 1.183713966142629e-05,
2791
+ "loss": 0.0001,
2792
+ "step": 453
2793
+ },
2794
+ {
2795
+ "epoch": 11.64,
2796
+ "learning_rate": 1.1752873650521934e-05,
2797
+ "loss": 0.0001,
2798
+ "step": 454
2799
+ },
2800
+ {
2801
+ "epoch": 11.67,
2802
+ "learning_rate": 1.166878366844486e-05,
2803
+ "loss": 0.0001,
2804
+ "step": 455
2805
+ },
2806
+ {
2807
+ "epoch": 11.69,
2808
+ "learning_rate": 1.1584871510033707e-05,
2809
+ "loss": 0.0001,
2810
+ "step": 456
2811
+ },
2812
+ {
2813
+ "epoch": 11.72,
2814
+ "learning_rate": 1.150113896633157e-05,
2815
+ "loss": 0.0001,
2816
+ "step": 457
2817
+ },
2818
+ {
2819
+ "epoch": 11.74,
2820
+ "learning_rate": 1.1417587824547822e-05,
2821
+ "loss": 0.0001,
2822
+ "step": 458
2823
+ },
2824
+ {
2825
+ "epoch": 11.77,
2826
+ "learning_rate": 1.1334219868019946e-05,
2827
+ "loss": 0.0004,
2828
+ "step": 459
2829
+ },
2830
+ {
2831
+ "epoch": 11.79,
2832
+ "learning_rate": 1.1251036876175476e-05,
2833
+ "loss": 0.0001,
2834
+ "step": 460
2835
+ },
2836
+ {
2837
+ "epoch": 11.82,
2838
+ "learning_rate": 1.1168040624493982e-05,
2839
+ "loss": 0.0012,
2840
+ "step": 461
2841
+ },
2842
+ {
2843
+ "epoch": 11.85,
2844
+ "learning_rate": 1.1085232884469236e-05,
2845
+ "loss": 0.0004,
2846
+ "step": 462
2847
+ },
2848
+ {
2849
+ "epoch": 11.87,
2850
+ "learning_rate": 1.1002615423571344e-05,
2851
+ "loss": 0.0006,
2852
+ "step": 463
2853
+ },
2854
+ {
2855
+ "epoch": 11.9,
2856
+ "learning_rate": 1.0920190005209066e-05,
2857
+ "loss": 0.0001,
2858
+ "step": 464
2859
+ },
2860
+ {
2861
+ "epoch": 11.9,
2862
+ "eval_test_accuracy": 84.85576923076923,
2863
+ "eval_test_average": 84.64341198207478,
2864
+ "eval_test_loss": 1.012276291847229,
2865
+ "eval_test_recall": 84.43105473338032,
2866
+ "eval_test_runtime": 790.3907,
2867
+ "eval_test_samples_per_second": 1.342,
2868
+ "eval_test_steps_per_second": 0.006,
2869
+ "step": 464
2870
+ },
2871
+ {
2872
+ "epoch": 11.9,
2873
+ "eval_val_accuracy": 85.30805687203792,
2874
+ "eval_val_average": 85.0317735118623,
2875
+ "eval_val_loss": 1.010198950767517,
2876
+ "eval_val_recall": 84.75549015168666,
2877
+ "eval_val_runtime": 789.112,
2878
+ "eval_val_samples_per_second": 1.334,
2879
+ "eval_val_steps_per_second": 0.006,
2880
+ "step": 464
2881
+ },
2882
+ {
2883
+ "epoch": 11.92,
2884
+ "learning_rate": 1.083795838869213e-05,
2885
+ "loss": 0.0001,
2886
+ "step": 465
2887
+ },
2888
+ {
2889
+ "epoch": 11.95,
2890
+ "learning_rate": 1.0755922329193739e-05,
2891
+ "loss": 0.0005,
2892
+ "step": 466
2893
+ },
2894
+ {
2895
+ "epoch": 11.97,
2896
+ "learning_rate": 1.0674083577713037e-05,
2897
+ "loss": 0.0001,
2898
+ "step": 467
2899
+ },
2900
+ {
2901
+ "epoch": 12.0,
2902
+ "learning_rate": 1.0592443881037816e-05,
2903
+ "loss": 0.0002,
2904
+ "step": 468
2905
+ },
2906
+ {
2907
+ "epoch": 12.03,
2908
+ "learning_rate": 1.051100498170715e-05,
2909
+ "loss": 0.0001,
2910
+ "step": 469
2911
+ },
2912
+ {
2913
+ "epoch": 12.05,
2914
+ "learning_rate": 1.0429768617974271e-05,
2915
+ "loss": 0.0001,
2916
+ "step": 470
2917
+ },
2918
+ {
2919
+ "epoch": 12.08,
2920
+ "learning_rate": 1.0348736523769423e-05,
2921
+ "loss": 0.0001,
2922
+ "step": 471
2923
+ },
2924
+ {
2925
+ "epoch": 12.1,
2926
+ "learning_rate": 1.0267910428662878e-05,
2927
+ "loss": 0.0001,
2928
+ "step": 472
2929
+ },
2930
+ {
2931
+ "epoch": 12.13,
2932
+ "learning_rate": 1.0187292057827993e-05,
2933
+ "loss": 0.0001,
2934
+ "step": 473
2935
+ },
2936
+ {
2937
+ "epoch": 12.15,
2938
+ "learning_rate": 1.0106883132004428e-05,
2939
+ "loss": 0.0001,
2940
+ "step": 474
2941
+ },
2942
+ {
2943
+ "epoch": 12.18,
2944
+ "learning_rate": 1.0026685367461364e-05,
2945
+ "loss": 0.0002,
2946
+ "step": 475
2947
+ },
2948
+ {
2949
+ "epoch": 12.21,
2950
+ "learning_rate": 9.946700475960933e-06,
2951
+ "loss": 0.0001,
2952
+ "step": 476
2953
+ },
2954
+ {
2955
+ "epoch": 12.23,
2956
+ "learning_rate": 9.866930164721615e-06,
2957
+ "loss": 0.0001,
2958
+ "step": 477
2959
+ },
2960
+ {
2961
+ "epoch": 12.26,
2962
+ "learning_rate": 9.787376136381866e-06,
2963
+ "loss": 0.0001,
2964
+ "step": 478
2965
+ },
2966
+ {
2967
+ "epoch": 12.28,
2968
+ "learning_rate": 9.70804008896371e-06,
2969
+ "loss": 0.0003,
2970
+ "step": 479
2971
+ },
2972
+ {
2973
+ "epoch": 12.31,
2974
+ "learning_rate": 9.628923715836558e-06,
2975
+ "loss": 0.0,
2976
+ "step": 480
2977
+ },
2978
+ {
2979
+ "epoch": 12.33,
2980
+ "learning_rate": 9.550028705681024e-06,
2981
+ "loss": 0.0,
2982
+ "step": 481
2983
+ },
2984
+ {
2985
+ "epoch": 12.36,
2986
+ "learning_rate": 9.471356742452881e-06,
2987
+ "loss": 0.0,
2988
+ "step": 482
2989
+ },
2990
+ {
2991
+ "epoch": 12.38,
2992
+ "learning_rate": 9.392909505347157e-06,
2993
+ "loss": 0.0005,
2994
+ "step": 483
2995
+ },
2996
+ {
2997
+ "epoch": 12.41,
2998
+ "learning_rate": 9.314688668762232e-06,
2999
+ "loss": 0.0007,
3000
+ "step": 484
3001
+ },
3002
+ {
3003
+ "epoch": 12.44,
3004
+ "learning_rate": 9.23669590226417e-06,
3005
+ "loss": 0.0001,
3006
+ "step": 485
3007
+ },
3008
+ {
3009
+ "epoch": 12.46,
3010
+ "learning_rate": 9.158932870551012e-06,
3011
+ "loss": 0.0001,
3012
+ "step": 486
3013
+ },
3014
+ {
3015
+ "epoch": 12.49,
3016
+ "learning_rate": 9.081401233417315e-06,
3017
+ "loss": 0.0001,
3018
+ "step": 487
3019
+ },
3020
+ {
3021
+ "epoch": 12.51,
3022
+ "learning_rate": 9.004102645718655e-06,
3023
+ "loss": 0.0001,
3024
+ "step": 488
3025
+ },
3026
+ {
3027
+ "epoch": 12.54,
3028
+ "learning_rate": 8.92703875733637e-06,
3029
+ "loss": 0.0001,
3030
+ "step": 489
3031
+ },
3032
+ {
3033
+ "epoch": 12.56,
3034
+ "learning_rate": 8.85021121314229e-06,
3035
+ "loss": 0.0001,
3036
+ "step": 490
3037
+ },
3038
+ {
3039
+ "epoch": 12.59,
3040
+ "learning_rate": 8.773621652963673e-06,
3041
+ "loss": 0.0,
3042
+ "step": 491
3043
+ },
3044
+ {
3045
+ "epoch": 12.62,
3046
+ "learning_rate": 8.697271711548163e-06,
3047
+ "loss": 0.0001,
3048
+ "step": 492
3049
+ },
3050
+ {
3051
+ "epoch": 12.64,
3052
+ "learning_rate": 8.621163018528942e-06,
3053
+ "loss": 0.0001,
3054
+ "step": 493
3055
+ },
3056
+ {
3057
+ "epoch": 12.67,
3058
+ "learning_rate": 8.545297198389896e-06,
3059
+ "loss": 0.0,
3060
+ "step": 494
3061
+ },
3062
+ {
3063
+ "epoch": 12.69,
3064
+ "learning_rate": 8.469675870431e-06,
3065
+ "loss": 0.0001,
3066
+ "step": 495
3067
+ },
3068
+ {
3069
+ "epoch": 12.72,
3070
+ "learning_rate": 8.394300648733688e-06,
3071
+ "loss": 0.0001,
3072
+ "step": 496
3073
+ },
3074
+ {
3075
+ "epoch": 12.74,
3076
+ "learning_rate": 8.319173142126473e-06,
3077
+ "loss": 0.0001,
3078
+ "step": 497
3079
+ },
3080
+ {
3081
+ "epoch": 12.77,
3082
+ "learning_rate": 8.24429495415054e-06,
3083
+ "loss": 0.0,
3084
+ "step": 498
3085
+ },
3086
+ {
3087
+ "epoch": 12.79,
3088
+ "learning_rate": 8.169667683025582e-06,
3089
+ "loss": 0.0001,
3090
+ "step": 499
3091
+ },
3092
+ {
3093
+ "epoch": 12.82,
3094
+ "learning_rate": 8.095292921615628e-06,
3095
+ "loss": 0.0,
3096
+ "step": 500
3097
+ },
3098
+ {
3099
+ "epoch": 12.85,
3100
+ "learning_rate": 8.021172257395092e-06,
3101
+ "loss": 0.0001,
3102
+ "step": 501
3103
+ },
3104
+ {
3105
+ "epoch": 12.87,
3106
+ "learning_rate": 7.947307272414874e-06,
3107
+ "loss": 0.0001,
3108
+ "step": 502
3109
+ },
3110
+ {
3111
+ "epoch": 12.9,
3112
+ "learning_rate": 7.873699543268572e-06,
3113
+ "loss": 0.0001,
3114
+ "step": 503
3115
+ },
3116
+ {
3117
+ "epoch": 12.92,
3118
+ "learning_rate": 7.800350641058867e-06,
3119
+ "loss": 0.0001,
3120
+ "step": 504
3121
+ },
3122
+ {
3123
+ "epoch": 12.95,
3124
+ "learning_rate": 7.727262131363949e-06,
3125
+ "loss": 0.0001,
3126
+ "step": 505
3127
+ },
3128
+ {
3129
+ "epoch": 12.97,
3130
+ "learning_rate": 7.654435574204145e-06,
3131
+ "loss": 0.0001,
3132
+ "step": 506
3133
+ },
3134
+ {
3135
+ "epoch": 13.0,
3136
+ "learning_rate": 7.581872524008574e-06,
3137
+ "loss": 0.0001,
3138
+ "step": 507
3139
+ },
3140
+ {
3141
+ "epoch": 13.03,
3142
+ "learning_rate": 7.509574529582022e-06,
3143
+ "loss": 0.0001,
3144
+ "step": 508
3145
+ },
3146
+ {
3147
+ "epoch": 13.05,
3148
+ "learning_rate": 7.437543134071823e-06,
3149
+ "loss": 0.0001,
3150
+ "step": 509
3151
+ },
3152
+ {
3153
+ "epoch": 13.08,
3154
+ "learning_rate": 7.365779874934987e-06,
3155
+ "loss": 0.0001,
3156
+ "step": 510
3157
+ },
3158
+ {
3159
+ "epoch": 13.1,
3160
+ "learning_rate": 7.294286283905314e-06,
3161
+ "loss": 0.0,
3162
+ "step": 511
3163
+ },
3164
+ {
3165
+ "epoch": 13.13,
3166
+ "learning_rate": 7.223063886960779e-06,
3167
+ "loss": 0.0,
3168
+ "step": 512
3169
+ },
3170
+ {
3171
+ "epoch": 13.15,
3172
+ "learning_rate": 7.152114204290879e-06,
3173
+ "loss": 0.0001,
3174
+ "step": 513
3175
+ },
3176
+ {
3177
+ "epoch": 13.18,
3178
+ "learning_rate": 7.081438750264258e-06,
3179
+ "loss": 0.0,
3180
+ "step": 514
3181
+ },
3182
+ {
3183
+ "epoch": 13.21,
3184
+ "learning_rate": 7.01103903339633e-06,
3185
+ "loss": 0.0,
3186
+ "step": 515
3187
+ },
3188
+ {
3189
+ "epoch": 13.23,
3190
+ "learning_rate": 6.940916556317119e-06,
3191
+ "loss": 0.0001,
3192
+ "step": 516
3193
+ },
3194
+ {
3195
+ "epoch": 13.26,
3196
+ "learning_rate": 6.87107281573915e-06,
3197
+ "loss": 0.0001,
3198
+ "step": 517
3199
+ },
3200
+ {
3201
+ "epoch": 13.28,
3202
+ "learning_rate": 6.801509302425553e-06,
3203
+ "loss": 0.0,
3204
+ "step": 518
3205
+ },
3206
+ {
3207
+ "epoch": 13.31,
3208
+ "learning_rate": 6.732227501158182e-06,
3209
+ "loss": 0.0,
3210
+ "step": 519
3211
+ },
3212
+ {
3213
+ "epoch": 13.33,
3214
+ "learning_rate": 6.6632288907059795e-06,
3215
+ "loss": 0.0003,
3216
+ "step": 520
3217
+ },
3218
+ {
3219
+ "epoch": 13.36,
3220
+ "learning_rate": 6.594514943793369e-06,
3221
+ "loss": 0.0,
3222
+ "step": 521
3223
+ },
3224
+ {
3225
+ "epoch": 13.38,
3226
+ "learning_rate": 6.526087127068857e-06,
3227
+ "loss": 0.0001,
3228
+ "step": 522
3229
+ },
3230
+ {
3231
+ "epoch": 13.41,
3232
+ "learning_rate": 6.457946901073704e-06,
3233
+ "loss": 0.0,
3234
+ "step": 523
3235
+ },
3236
+ {
3237
+ "epoch": 13.44,
3238
+ "learning_rate": 6.3900957202107695e-06,
3239
+ "loss": 0.0,
3240
+ "step": 524
3241
+ },
3242
+ {
3243
+ "epoch": 13.46,
3244
+ "learning_rate": 6.322535032713437e-06,
3245
+ "loss": 0.0,
3246
+ "step": 525
3247
+ },
3248
+ {
3249
+ "epoch": 13.49,
3250
+ "learning_rate": 6.255266280614747e-06,
3251
+ "loss": 0.0001,
3252
+ "step": 526
3253
+ },
3254
+ {
3255
+ "epoch": 13.51,
3256
+ "learning_rate": 6.188290899716569e-06,
3257
+ "loss": 0.0001,
3258
+ "step": 527
3259
+ },
3260
+ {
3261
+ "epoch": 13.54,
3262
+ "learning_rate": 6.1216103195590085e-06,
3263
+ "loss": 0.0001,
3264
+ "step": 528
3265
+ },
3266
+ {
3267
+ "epoch": 13.56,
3268
+ "learning_rate": 6.055225963389841e-06,
3269
+ "loss": 0.0001,
3270
+ "step": 529
3271
+ },
3272
+ {
3273
+ "epoch": 13.59,
3274
+ "learning_rate": 5.989139248134181e-06,
3275
+ "loss": 0.0,
3276
+ "step": 530
3277
+ },
3278
+ {
3279
+ "epoch": 13.62,
3280
+ "learning_rate": 5.923351584364201e-06,
3281
+ "loss": 0.0001,
3282
+ "step": 531
3283
+ },
3284
+ {
3285
+ "epoch": 13.64,
3286
+ "learning_rate": 5.857864376269051e-06,
3287
+ "loss": 0.0,
3288
+ "step": 532
3289
+ },
3290
+ {
3291
+ "epoch": 13.67,
3292
+ "learning_rate": 5.792679021624872e-06,
3293
+ "loss": 0.0001,
3294
+ "step": 533
3295
+ },
3296
+ {
3297
+ "epoch": 13.69,
3298
+ "learning_rate": 5.727796911764955e-06,
3299
+ "loss": 0.0,
3300
+ "step": 534
3301
+ },
3302
+ {
3303
+ "epoch": 13.72,
3304
+ "learning_rate": 5.6632194315500845e-06,
3305
+ "loss": 0.0001,
3306
+ "step": 535
3307
+ },
3308
+ {
3309
+ "epoch": 13.74,
3310
+ "learning_rate": 5.598947959338912e-06,
3311
+ "loss": 0.0001,
3312
+ "step": 536
3313
+ },
3314
+ {
3315
+ "epoch": 13.77,
3316
+ "learning_rate": 5.534983866958608e-06,
3317
+ "loss": 0.0,
3318
+ "step": 537
3319
+ },
3320
+ {
3321
+ "epoch": 13.79,
3322
+ "learning_rate": 5.471328519675521e-06,
3323
+ "loss": 0.0,
3324
+ "step": 538
3325
+ },
3326
+ {
3327
+ "epoch": 13.82,
3328
+ "learning_rate": 5.407983276166084e-06,
3329
+ "loss": 0.0,
3330
+ "step": 539
3331
+ },
3332
+ {
3333
+ "epoch": 13.85,
3334
+ "learning_rate": 5.344949488487776e-06,
3335
+ "loss": 0.0001,
3336
+ "step": 540
3337
+ },
3338
+ {
3339
+ "epoch": 13.87,
3340
+ "learning_rate": 5.2822285020503e-06,
3341
+ "loss": 0.0001,
3342
+ "step": 541
3343
+ },
3344
+ {
3345
+ "epoch": 13.9,
3346
+ "learning_rate": 5.219821655586821e-06,
3347
+ "loss": 0.0,
3348
+ "step": 542
3349
+ },
3350
+ {
3351
+ "epoch": 13.92,
3352
+ "learning_rate": 5.157730281125444e-06,
3353
+ "loss": 0.0,
3354
+ "step": 543
3355
+ },
3356
+ {
3357
+ "epoch": 13.95,
3358
+ "learning_rate": 5.095955703960746e-06,
3359
+ "loss": 0.0001,
3360
+ "step": 544
3361
+ },
3362
+ {
3363
+ "epoch": 13.97,
3364
+ "learning_rate": 5.034499242625504e-06,
3365
+ "loss": 0.0,
3366
+ "step": 545
3367
+ },
3368
+ {
3369
+ "epoch": 14.0,
3370
+ "learning_rate": 4.9733622088625335e-06,
3371
+ "loss": 0.0,
3372
+ "step": 546
3373
+ },
3374
+ {
3375
+ "epoch": 14.03,
3376
+ "learning_rate": 4.912545907596722e-06,
3377
+ "loss": 0.0,
3378
+ "step": 547
3379
+ },
3380
+ {
3381
+ "epoch": 14.05,
3382
+ "learning_rate": 4.852051636907144e-06,
3383
+ "loss": 0.0,
3384
+ "step": 548
3385
+ },
3386
+ {
3387
+ "epoch": 14.08,
3388
+ "learning_rate": 4.791880687999382e-06,
3389
+ "loss": 0.0,
3390
+ "step": 549
3391
+ },
3392
+ {
3393
+ "epoch": 14.1,
3394
+ "learning_rate": 4.732034345177941e-06,
3395
+ "loss": 0.0,
3396
+ "step": 550
3397
+ },
3398
+ {
3399
+ "epoch": 14.13,
3400
+ "learning_rate": 4.672513885818859e-06,
3401
+ "loss": 0.0,
3402
+ "step": 551
3403
+ },
3404
+ {
3405
+ "epoch": 14.15,
3406
+ "learning_rate": 4.613320580342422e-06,
3407
+ "loss": 0.0,
3408
+ "step": 552
3409
+ },
3410
+ {
3411
+ "epoch": 14.18,
3412
+ "learning_rate": 4.55445569218607e-06,
3413
+ "loss": 0.0001,
3414
+ "step": 553
3415
+ },
3416
+ {
3417
+ "epoch": 14.21,
3418
+ "learning_rate": 4.495920477777403e-06,
3419
+ "loss": 0.0001,
3420
+ "step": 554
3421
+ },
3422
+ {
3423
+ "epoch": 14.23,
3424
+ "learning_rate": 4.437716186507397e-06,
3425
+ "loss": 0.0,
3426
+ "step": 555
3427
+ },
3428
+ {
3429
+ "epoch": 14.26,
3430
+ "learning_rate": 4.379844060703693e-06,
3431
+ "loss": 0.0,
3432
+ "step": 556
3433
+ },
3434
+ {
3435
+ "epoch": 14.28,
3436
+ "learning_rate": 4.3223053356041315e-06,
3437
+ "loss": 0.0001,
3438
+ "step": 557
3439
+ },
3440
+ {
3441
+ "epoch": 14.31,
3442
+ "learning_rate": 4.265101239330336e-06,
3443
+ "loss": 0.0,
3444
+ "step": 558
3445
+ },
3446
+ {
3447
+ "epoch": 14.33,
3448
+ "learning_rate": 4.2082329928615494e-06,
3449
+ "loss": 0.0001,
3450
+ "step": 559
3451
+ },
3452
+ {
3453
+ "epoch": 14.36,
3454
+ "learning_rate": 4.151701810008524e-06,
3455
+ "loss": 0.0,
3456
+ "step": 560
3457
+ },
3458
+ {
3459
+ "epoch": 14.38,
3460
+ "learning_rate": 4.095508897387661e-06,
3461
+ "loss": 0.0001,
3462
+ "step": 561
3463
+ },
3464
+ {
3465
+ "epoch": 14.41,
3466
+ "learning_rate": 4.03965545439521e-06,
3467
+ "loss": 0.0001,
3468
+ "step": 562
3469
+ },
3470
+ {
3471
+ "epoch": 14.44,
3472
+ "learning_rate": 3.984142673181717e-06,
3473
+ "loss": 0.0,
3474
+ "step": 563
3475
+ },
3476
+ {
3477
+ "epoch": 14.46,
3478
+ "learning_rate": 3.9289717386265255e-06,
3479
+ "loss": 0.0,
3480
+ "step": 564
3481
+ },
3482
+ {
3483
+ "epoch": 14.49,
3484
+ "learning_rate": 3.8741438283125374e-06,
3485
+ "loss": 0.0001,
3486
+ "step": 565
3487
+ },
3488
+ {
3489
+ "epoch": 14.51,
3490
+ "learning_rate": 3.819660112501053e-06,
3491
+ "loss": 0.0001,
3492
+ "step": 566
3493
+ },
3494
+ {
3495
+ "epoch": 14.54,
3496
+ "learning_rate": 3.765521754106776e-06,
3497
+ "loss": 0.0,
3498
+ "step": 567
3499
+ },
3500
+ {
3501
+ "epoch": 14.56,
3502
+ "learning_rate": 3.711729908673034e-06,
3503
+ "loss": 0.0,
3504
+ "step": 568
3505
+ },
3506
+ {
3507
+ "epoch": 14.59,
3508
+ "learning_rate": 3.6582857243470706e-06,
3509
+ "loss": 0.0,
3510
+ "step": 569
3511
+ },
3512
+ {
3513
+ "epoch": 14.62,
3514
+ "learning_rate": 3.60519034185558e-06,
3515
+ "loss": 0.0,
3516
+ "step": 570
3517
+ },
3518
+ {
3519
+ "epoch": 14.64,
3520
+ "learning_rate": 3.552444894480318e-06,
3521
+ "loss": 0.0,
3522
+ "step": 571
3523
+ },
3524
+ {
3525
+ "epoch": 14.67,
3526
+ "learning_rate": 3.5000505080339565e-06,
3527
+ "loss": 0.0001,
3528
+ "step": 572
3529
+ },
3530
+ {
3531
+ "epoch": 14.69,
3532
+ "learning_rate": 3.448008300836003e-06,
3533
+ "loss": 0.0001,
3534
+ "step": 573
3535
+ },
3536
+ {
3537
+ "epoch": 14.72,
3538
+ "learning_rate": 3.3963193836889907e-06,
3539
+ "loss": 0.0,
3540
+ "step": 574
3541
+ },
3542
+ {
3543
+ "epoch": 14.74,
3544
+ "learning_rate": 3.344984859854703e-06,
3545
+ "loss": 0.0,
3546
+ "step": 575
3547
+ },
3548
+ {
3549
+ "epoch": 14.77,
3550
+ "learning_rate": 3.2940058250306927e-06,
3551
+ "loss": 0.0,
3552
+ "step": 576
3553
+ },
3554
+ {
3555
+ "epoch": 14.79,
3556
+ "learning_rate": 3.2433833673268358e-06,
3557
+ "loss": 0.0,
3558
+ "step": 577
3559
+ },
3560
+ {
3561
+ "epoch": 14.82,
3562
+ "learning_rate": 3.193118567242148e-06,
3563
+ "loss": 0.0,
3564
+ "step": 578
3565
+ },
3566
+ {
3567
+ "epoch": 14.85,
3568
+ "learning_rate": 3.1432124976416988e-06,
3569
+ "loss": 0.0001,
3570
+ "step": 579
3571
+ },
3572
+ {
3573
+ "epoch": 14.87,
3574
+ "learning_rate": 3.093666223733731e-06,
3575
+ "loss": 0.0,
3576
+ "step": 580
3577
+ },
3578
+ {
3579
+ "epoch": 14.87,
3580
+ "eval_test_accuracy": 85.57692307692307,
3581
+ "eval_test_average": 85.24300699300699,
3582
+ "eval_test_loss": 1.0223864316940308,
3583
+ "eval_test_recall": 84.9090909090909,
3584
+ "eval_test_runtime": 785.4649,
3585
+ "eval_test_samples_per_second": 1.351,
3586
+ "eval_test_steps_per_second": 0.006,
3587
+ "step": 580
3588
+ },
3589
+ {
3590
+ "epoch": 14.87,
3591
+ "eval_val_accuracy": 85.07109004739335,
3592
+ "eval_val_average": 84.80103442072331,
3593
+ "eval_val_loss": 1.0219820737838745,
3594
+ "eval_val_recall": 84.53097879405325,
3595
+ "eval_val_runtime": 794.4161,
3596
+ "eval_val_samples_per_second": 1.326,
3597
+ "eval_val_steps_per_second": 0.006,
3598
+ "step": 580
3599
+ },
3600
+ {
3601
+ "epoch": 14.9,
3602
+ "learning_rate": 3.0444808030468966e-06,
3603
+ "loss": 0.0001,
3604
+ "step": 581
3605
+ },
3606
+ {
3607
+ "epoch": 14.92,
3608
+ "learning_rate": 2.9956572854077205e-06,
3609
+ "loss": 0.0,
3610
+ "step": 582
3611
+ },
3612
+ {
3613
+ "epoch": 14.95,
3614
+ "learning_rate": 2.947196712918157e-06,
3615
+ "loss": 0.0,
3616
+ "step": 583
3617
+ },
3618
+ {
3619
+ "epoch": 14.97,
3620
+ "learning_rate": 2.89910011993338e-06,
3621
+ "loss": 0.0001,
3622
+ "step": 584
3623
+ },
3624
+ {
3625
+ "epoch": 15.0,
3626
+ "learning_rate": 2.8513685330396755e-06,
3627
+ "loss": 0.0,
3628
+ "step": 585
3629
+ },
3630
+ {
3631
+ "epoch": 15.03,
3632
+ "learning_rate": 2.804002971032551e-06,
3633
+ "loss": 0.0,
3634
+ "step": 586
3635
+ },
3636
+ {
3637
+ "epoch": 15.05,
3638
+ "learning_rate": 2.7570044448949886e-06,
3639
+ "loss": 0.0,
3640
+ "step": 587
3641
+ },
3642
+ {
3643
+ "epoch": 15.08,
3644
+ "learning_rate": 2.7103739577758426e-06,
3645
+ "loss": 0.0,
3646
+ "step": 588
3647
+ },
3648
+ {
3649
+ "epoch": 15.1,
3650
+ "learning_rate": 2.664112504968468e-06,
3651
+ "loss": 0.0001,
3652
+ "step": 589
3653
+ },
3654
+ {
3655
+ "epoch": 15.13,
3656
+ "learning_rate": 2.618221073889433e-06,
3657
+ "loss": 0.0,
3658
+ "step": 590
3659
+ },
3660
+ {
3661
+ "epoch": 15.15,
3662
+ "learning_rate": 2.5727006440574866e-06,
3663
+ "loss": 0.0001,
3664
+ "step": 591
3665
+ },
3666
+ {
3667
+ "epoch": 15.18,
3668
+ "learning_rate": 2.5275521870726107e-06,
3669
+ "loss": 0.0,
3670
+ "step": 592
3671
+ },
3672
+ {
3673
+ "epoch": 15.21,
3674
+ "learning_rate": 2.4827766665953147e-06,
3675
+ "loss": 0.0,
3676
+ "step": 593
3677
+ },
3678
+ {
3679
+ "epoch": 15.23,
3680
+ "learning_rate": 2.4383750383260417e-06,
3681
+ "loss": 0.0,
3682
+ "step": 594
3683
+ },
3684
+ {
3685
+ "epoch": 15.26,
3686
+ "learning_rate": 2.394348249984797e-06,
3687
+ "loss": 0.0,
3688
+ "step": 595
3689
+ },
3690
+ {
3691
+ "epoch": 15.28,
3692
+ "learning_rate": 2.3506972412908866e-06,
3693
+ "loss": 0.0,
3694
+ "step": 596
3695
+ },
3696
+ {
3697
+ "epoch": 15.31,
3698
+ "learning_rate": 2.3074229439428964e-06,
3699
+ "loss": 0.0001,
3700
+ "step": 597
3701
+ },
3702
+ {
3703
+ "epoch": 15.33,
3704
+ "learning_rate": 2.264526281598762e-06,
3705
+ "loss": 0.0,
3706
+ "step": 598
3707
+ },
3708
+ {
3709
+ "epoch": 15.36,
3710
+ "learning_rate": 2.2220081698561067e-06,
3711
+ "loss": 0.0001,
3712
+ "step": 599
3713
+ },
3714
+ {
3715
+ "epoch": 15.38,
3716
+ "learning_rate": 2.1798695162326444e-06,
3717
+ "loss": 0.0,
3718
+ "step": 600
3719
+ },
3720
+ {
3721
+ "epoch": 15.41,
3722
+ "learning_rate": 2.138111220146857e-06,
3723
+ "loss": 0.0,
3724
+ "step": 601
3725
+ },
3726
+ {
3727
+ "epoch": 15.44,
3728
+ "learning_rate": 2.0967341728987554e-06,
3729
+ "loss": 0.0,
3730
+ "step": 602
3731
+ },
3732
+ {
3733
+ "epoch": 15.46,
3734
+ "learning_rate": 2.055739257650895e-06,
3735
+ "loss": 0.0,
3736
+ "step": 603
3737
+ },
3738
+ {
3739
+ "epoch": 15.49,
3740
+ "learning_rate": 2.015127349409489e-06,
3741
+ "loss": 0.0001,
3742
+ "step": 604
3743
+ },
3744
+ {
3745
+ "epoch": 15.51,
3746
+ "learning_rate": 1.974899315005763e-06,
3747
+ "loss": 0.0001,
3748
+ "step": 605
3749
+ },
3750
+ {
3751
+ "epoch": 15.54,
3752
+ "learning_rate": 1.9350560130774234e-06,
3753
+ "loss": 0.0,
3754
+ "step": 606
3755
+ },
3756
+ {
3757
+ "epoch": 15.56,
3758
+ "learning_rate": 1.895598294050358e-06,
3759
+ "loss": 0.0001,
3760
+ "step": 607
3761
+ },
3762
+ {
3763
+ "epoch": 15.59,
3764
+ "learning_rate": 1.8565270001204693e-06,
3765
+ "loss": 0.0,
3766
+ "step": 608
3767
+ },
3768
+ {
3769
+ "epoch": 15.62,
3770
+ "learning_rate": 1.817842965235701e-06,
3771
+ "loss": 0.0001,
3772
+ "step": 609
3773
+ },
3774
+ {
3775
+ "epoch": 15.64,
3776
+ "learning_rate": 1.7795470150782312e-06,
3777
+ "loss": 0.0,
3778
+ "step": 610
3779
+ },
3780
+ {
3781
+ "epoch": 15.67,
3782
+ "learning_rate": 1.7416399670468687e-06,
3783
+ "loss": 0.0001,
3784
+ "step": 611
3785
+ },
3786
+ {
3787
+ "epoch": 15.69,
3788
+ "learning_rate": 1.7041226302395797e-06,
3789
+ "loss": 0.0,
3790
+ "step": 612
3791
+ },
3792
+ {
3793
+ "epoch": 15.72,
3794
+ "learning_rate": 1.6669958054362444e-06,
3795
+ "loss": 0.0,
3796
+ "step": 613
3797
+ },
3798
+ {
3799
+ "epoch": 15.74,
3800
+ "learning_rate": 1.6302602850815397e-06,
3801
+ "loss": 0.0,
3802
+ "step": 614
3803
+ },
3804
+ {
3805
+ "epoch": 15.77,
3806
+ "learning_rate": 1.5939168532680516e-06,
3807
+ "loss": 0.0,
3808
+ "step": 615
3809
+ },
3810
+ {
3811
+ "epoch": 15.79,
3812
+ "learning_rate": 1.55796628571951e-06,
3813
+ "loss": 0.0001,
3814
+ "step": 616
3815
+ },
3816
+ {
3817
+ "epoch": 15.82,
3818
+ "learning_rate": 1.5224093497742654e-06,
3819
+ "loss": 0.0,
3820
+ "step": 617
3821
+ },
3822
+ {
3823
+ "epoch": 15.85,
3824
+ "learning_rate": 1.487246804368876e-06,
3825
+ "loss": 0.0,
3826
+ "step": 618
3827
+ },
3828
+ {
3829
+ "epoch": 15.87,
3830
+ "learning_rate": 1.452479400021929e-06,
3831
+ "loss": 0.0001,
3832
+ "step": 619
3833
+ },
3834
+ {
3835
+ "epoch": 15.9,
3836
+ "learning_rate": 1.418107878818027e-06,
3837
+ "loss": 0.0,
3838
+ "step": 620
3839
+ },
3840
+ {
3841
+ "epoch": 15.92,
3842
+ "learning_rate": 1.3841329743919206e-06,
3843
+ "loss": 0.0,
3844
+ "step": 621
3845
+ },
3846
+ {
3847
+ "epoch": 15.95,
3848
+ "learning_rate": 1.3505554119128861e-06,
3849
+ "loss": 0.0,
3850
+ "step": 622
3851
+ },
3852
+ {
3853
+ "epoch": 15.97,
3854
+ "learning_rate": 1.3173759080692184e-06,
3855
+ "loss": 0.0,
3856
+ "step": 623
3857
+ },
3858
+ {
3859
+ "epoch": 16.0,
3860
+ "learning_rate": 1.2845951710529513e-06,
3861
+ "loss": 0.0,
3862
+ "step": 624
3863
+ },
3864
+ {
3865
+ "epoch": 16.03,
3866
+ "learning_rate": 1.2522139005447232e-06,
3867
+ "loss": 0.0,
3868
+ "step": 625
3869
+ },
3870
+ {
3871
+ "epoch": 16.05,
3872
+ "learning_rate": 1.2202327876988719e-06,
3873
+ "loss": 0.0,
3874
+ "step": 626
3875
+ },
3876
+ {
3877
+ "epoch": 16.08,
3878
+ "learning_rate": 1.1886525151286477e-06,
3879
+ "loss": 0.0,
3880
+ "step": 627
3881
+ },
3882
+ {
3883
+ "epoch": 16.1,
3884
+ "learning_rate": 1.157473756891674e-06,
3885
+ "loss": 0.0001,
3886
+ "step": 628
3887
+ },
3888
+ {
3889
+ "epoch": 16.13,
3890
+ "learning_rate": 1.1266971784755398e-06,
3891
+ "loss": 0.0,
3892
+ "step": 629
3893
+ },
3894
+ {
3895
+ "epoch": 16.15,
3896
+ "learning_rate": 1.0963234367836106e-06,
3897
+ "loss": 0.0001,
3898
+ "step": 630
3899
+ },
3900
+ {
3901
+ "epoch": 16.18,
3902
+ "learning_rate": 1.0663531801209826e-06,
3903
+ "loss": 0.0,
3904
+ "step": 631
3905
+ },
3906
+ {
3907
+ "epoch": 16.21,
3908
+ "learning_rate": 1.036787048180683e-06,
3909
+ "loss": 0.0,
3910
+ "step": 632
3911
+ },
3912
+ {
3913
+ "epoch": 16.23,
3914
+ "learning_rate": 1.007625672029977e-06,
3915
+ "loss": 0.0,
3916
+ "step": 633
3917
+ },
3918
+ {
3919
+ "epoch": 16.26,
3920
+ "learning_rate": 9.788696740969295e-07,
3921
+ "loss": 0.0,
3922
+ "step": 634
3923
+ },
3924
+ {
3925
+ "epoch": 16.28,
3926
+ "learning_rate": 9.50519668157095e-07,
3927
+ "loss": 0.0001,
3928
+ "step": 635
3929
+ },
3930
+ {
3931
+ "epoch": 16.31,
3932
+ "learning_rate": 9.225762593204379e-07,
3933
+ "loss": 0.0,
3934
+ "step": 636
3935
+ },
3936
+ {
3937
+ "epoch": 16.33,
3938
+ "learning_rate": 8.950400440184004e-07,
3939
+ "loss": 0.0,
3940
+ "step": 637
3941
+ },
3942
+ {
3943
+ "epoch": 16.36,
3944
+ "learning_rate": 8.679116099911855e-07,
3945
+ "loss": 0.0,
3946
+ "step": 638
3947
+ },
3948
+ {
3949
+ "epoch": 16.38,
3950
+ "learning_rate": 8.411915362751988e-07,
3951
+ "loss": 0.0,
3952
+ "step": 639
3953
+ },
3954
+ {
3955
+ "epoch": 16.41,
3956
+ "learning_rate": 8.148803931907023e-07,
3957
+ "loss": 0.0,
3958
+ "step": 640
3959
+ },
3960
+ {
3961
+ "epoch": 16.44,
3962
+ "learning_rate": 7.889787423296335e-07,
3963
+ "loss": 0.0,
3964
+ "step": 641
3965
+ },
3966
+ {
3967
+ "epoch": 16.46,
3968
+ "learning_rate": 7.634871365436192e-07,
3969
+ "loss": 0.0001,
3970
+ "step": 642
3971
+ },
3972
+ {
3973
+ "epoch": 16.49,
3974
+ "learning_rate": 7.384061199321757e-07,
3975
+ "loss": 0.0,
3976
+ "step": 643
3977
+ },
3978
+ {
3979
+ "epoch": 16.51,
3980
+ "learning_rate": 7.137362278311033e-07,
3981
+ "loss": 0.0,
3982
+ "step": 644
3983
+ },
3984
+ {
3985
+ "epoch": 16.54,
3986
+ "learning_rate": 6.894779868010415e-07,
3987
+ "loss": 0.0001,
3988
+ "step": 645
3989
+ },
3990
+ {
3991
+ "epoch": 16.56,
3992
+ "learning_rate": 6.656319146162516e-07,
3993
+ "loss": 0.0,
3994
+ "step": 646
3995
+ },
3996
+ {
3997
+ "epoch": 16.59,
3998
+ "learning_rate": 6.421985202535497e-07,
3999
+ "loss": 0.0001,
4000
+ "step": 647
4001
+ },
4002
+ {
4003
+ "epoch": 16.62,
4004
+ "learning_rate": 6.191783038814492e-07,
4005
+ "loss": 0.0,
4006
+ "step": 648
4007
+ },
4008
+ {
4009
+ "epoch": 16.64,
4010
+ "learning_rate": 5.965717568494844e-07,
4011
+ "loss": 0.0,
4012
+ "step": 649
4013
+ },
4014
+ {
4015
+ "epoch": 16.67,
4016
+ "learning_rate": 5.743793616777216e-07,
4017
+ "loss": 0.0,
4018
+ "step": 650
4019
+ },
4020
+ {
4021
+ "epoch": 16.69,
4022
+ "learning_rate": 5.526015920464689e-07,
4023
+ "loss": 0.0,
4024
+ "step": 651
4025
+ },
4026
+ {
4027
+ "epoch": 16.72,
4028
+ "learning_rate": 5.312389127861428e-07,
4029
+ "loss": 0.0,
4030
+ "step": 652
4031
+ },
4032
+ {
4033
+ "epoch": 16.74,
4034
+ "learning_rate": 5.102917798673779e-07,
4035
+ "loss": 0.0001,
4036
+ "step": 653
4037
+ },
4038
+ {
4039
+ "epoch": 16.77,
4040
+ "learning_rate": 4.89760640391268e-07,
4041
+ "loss": 0.0,
4042
+ "step": 654
4043
+ },
4044
+ {
4045
+ "epoch": 16.79,
4046
+ "learning_rate": 4.696459325798386e-07,
4047
+ "loss": 0.0,
4048
+ "step": 655
4049
+ },
4050
+ {
4051
+ "epoch": 16.82,
4052
+ "learning_rate": 4.499480857666849e-07,
4053
+ "loss": 0.0001,
4054
+ "step": 656
4055
+ },
4056
+ {
4057
+ "epoch": 16.85,
4058
+ "learning_rate": 4.306675203878219e-07,
4059
+ "loss": 0.0,
4060
+ "step": 657
4061
+ },
4062
+ {
4063
+ "epoch": 16.87,
4064
+ "learning_rate": 4.118046479726823e-07,
4065
+ "loss": 0.0,
4066
+ "step": 658
4067
+ },
4068
+ {
4069
+ "epoch": 16.9,
4070
+ "learning_rate": 3.9335987113537035e-07,
4071
+ "loss": 0.0,
4072
+ "step": 659
4073
+ },
4074
+ {
4075
+ "epoch": 16.92,
4076
+ "learning_rate": 3.75333583566031e-07,
4077
+ "loss": 0.0,
4078
+ "step": 660
4079
+ },
4080
+ {
4081
+ "epoch": 16.95,
4082
+ "learning_rate": 3.57726170022481e-07,
4083
+ "loss": 0.0,
4084
+ "step": 661
4085
+ },
4086
+ {
4087
+ "epoch": 16.97,
4088
+ "learning_rate": 3.4053800632196434e-07,
4089
+ "loss": 0.0001,
4090
+ "step": 662
4091
+ },
4092
+ {
4093
+ "epoch": 17.0,
4094
+ "learning_rate": 3.2376945933316086e-07,
4095
+ "loss": 0.0,
4096
+ "step": 663
4097
+ },
4098
+ {
4099
+ "epoch": 17.03,
4100
+ "learning_rate": 3.074208869683282e-07,
4101
+ "loss": 0.0,
4102
+ "step": 664
4103
+ },
4104
+ {
4105
+ "epoch": 17.05,
4106
+ "learning_rate": 2.914926381756855e-07,
4107
+ "loss": 0.0,
4108
+ "step": 665
4109
+ },
4110
+ {
4111
+ "epoch": 17.08,
4112
+ "learning_rate": 2.7598505293194855e-07,
4113
+ "loss": 0.0,
4114
+ "step": 666
4115
+ },
4116
+ {
4117
+ "epoch": 17.1,
4118
+ "learning_rate": 2.6089846223508853e-07,
4119
+ "loss": 0.0,
4120
+ "step": 667
4121
+ },
4122
+ {
4123
+ "epoch": 17.13,
4124
+ "learning_rate": 2.462331880972468e-07,
4125
+ "loss": 0.0,
4126
+ "step": 668
4127
+ },
4128
+ {
4129
+ "epoch": 17.15,
4130
+ "learning_rate": 2.3198954353788271e-07,
4131
+ "loss": 0.0,
4132
+ "step": 669
4133
+ },
4134
+ {
4135
+ "epoch": 17.18,
4136
+ "learning_rate": 2.1816783257708084e-07,
4137
+ "loss": 0.0,
4138
+ "step": 670
4139
+ },
4140
+ {
4141
+ "epoch": 17.21,
4142
+ "learning_rate": 2.0476835022906317e-07,
4143
+ "loss": 0.0,
4144
+ "step": 671
4145
+ },
4146
+ {
4147
+ "epoch": 17.23,
4148
+ "learning_rate": 1.9179138249589836e-07,
4149
+ "loss": 0.0,
4150
+ "step": 672
4151
+ },
4152
+ {
4153
+ "epoch": 17.26,
4154
+ "learning_rate": 1.792372063613823e-07,
4155
+ "loss": 0.0,
4156
+ "step": 673
4157
+ },
4158
+ {
4159
+ "epoch": 17.28,
4160
+ "learning_rate": 1.6710608978514509e-07,
4161
+ "loss": 0.0,
4162
+ "step": 674
4163
+ },
4164
+ {
4165
+ "epoch": 17.31,
4166
+ "learning_rate": 1.5539829169691989e-07,
4167
+ "loss": 0.0001,
4168
+ "step": 675
4169
+ },
4170
+ {
4171
+ "epoch": 17.33,
4172
+ "learning_rate": 1.4411406199102084e-07,
4173
+ "loss": 0.0001,
4174
+ "step": 676
4175
+ },
4176
+ {
4177
+ "epoch": 17.36,
4178
+ "learning_rate": 1.3325364152100063e-07,
4179
+ "loss": 0.0,
4180
+ "step": 677
4181
+ },
4182
+ {
4183
+ "epoch": 17.38,
4184
+ "learning_rate": 1.2281726209452782e-07,
4185
+ "loss": 0.0001,
4186
+ "step": 678
4187
+ },
4188
+ {
4189
+ "epoch": 17.41,
4190
+ "learning_rate": 1.1280514646841545e-07,
4191
+ "loss": 0.0,
4192
+ "step": 679
4193
+ },
4194
+ {
4195
+ "epoch": 17.44,
4196
+ "learning_rate": 1.0321750834388911e-07,
4197
+ "loss": 0.0,
4198
+ "step": 680
4199
+ },
4200
+ {
4201
+ "epoch": 17.46,
4202
+ "learning_rate": 9.405455236200844e-08,
4203
+ "loss": 0.0,
4204
+ "step": 681
4205
+ },
4206
+ {
4207
+ "epoch": 17.49,
4208
+ "learning_rate": 8.531647409931065e-08,
4209
+ "loss": 0.0,
4210
+ "step": 682
4211
+ },
4212
+ {
4213
+ "epoch": 17.51,
4214
+ "learning_rate": 7.700346006362714e-08,
4215
+ "loss": 0.0,
4216
+ "step": 683
4217
+ },
4218
+ {
4219
+ "epoch": 17.54,
4220
+ "learning_rate": 6.91156876901089e-08,
4221
+ "loss": 0.0,
4222
+ "step": 684
4223
+ },
4224
+ {
4225
+ "epoch": 17.56,
4226
+ "learning_rate": 6.165332533744072e-08,
4227
+ "loss": 0.0,
4228
+ "step": 685
4229
+ },
4230
+ {
4231
+ "epoch": 17.59,
4232
+ "learning_rate": 5.4616532284239576e-08,
4233
+ "loss": 0.0001,
4234
+ "step": 686
4235
+ },
4236
+ {
4237
+ "epoch": 17.62,
4238
+ "learning_rate": 4.800545872566176e-08,
4239
+ "loss": 0.0001,
4240
+ "step": 687
4241
+ },
4242
+ {
4243
+ "epoch": 17.64,
4244
+ "learning_rate": 4.182024577019439e-08,
4245
+ "loss": 0.0,
4246
+ "step": 688
4247
+ },
4248
+ {
4249
+ "epoch": 17.67,
4250
+ "learning_rate": 3.606102543664447e-08,
4251
+ "loss": 0.0,
4252
+ "step": 689
4253
+ },
4254
+ {
4255
+ "epoch": 17.69,
4256
+ "learning_rate": 3.072792065132113e-08,
4257
+ "loss": 0.0,
4258
+ "step": 690
4259
+ },
4260
+ {
4261
+ "epoch": 17.72,
4262
+ "learning_rate": 2.582104524541107e-08,
4263
+ "loss": 0.0,
4264
+ "step": 691
4265
+ },
4266
+ {
4267
+ "epoch": 17.74,
4268
+ "learning_rate": 2.1340503952551606e-08,
4269
+ "loss": 0.0,
4270
+ "step": 692
4271
+ },
4272
+ {
4273
+ "epoch": 17.77,
4274
+ "learning_rate": 1.7286392406588026e-08,
4275
+ "loss": 0.0001,
4276
+ "step": 693
4277
+ },
4278
+ {
4279
+ "epoch": 17.79,
4280
+ "learning_rate": 1.365879713954188e-08,
4281
+ "loss": 0.0,
4282
+ "step": 694
4283
+ },
4284
+ {
4285
+ "epoch": 17.82,
4286
+ "learning_rate": 1.0457795579756901e-08,
4287
+ "loss": 0.0,
4288
+ "step": 695
4289
+ },
4290
+ {
4291
+ "epoch": 17.85,
4292
+ "learning_rate": 7.683456050251447e-09,
4293
+ "loss": 0.0,
4294
+ "step": 696
4295
+ },
4296
+ {
4297
+ "epoch": 17.85,
4298
+ "eval_test_accuracy": 85.57692307692307,
4299
+ "eval_test_average": 85.16548761316201,
4300
+ "eval_test_loss": 1.0252264738082886,
4301
+ "eval_test_recall": 84.75405214940098,
4302
+ "eval_test_runtime": 782.1434,
4303
+ "eval_test_samples_per_second": 1.357,
4304
+ "eval_test_steps_per_second": 0.006,
4305
+ "step": 696
4306
+ },
4307
+ {
4308
+ "epoch": 17.85,
4309
+ "eval_val_accuracy": 85.07109004739335,
4310
+ "eval_val_average": 84.84065407207038,
4311
+ "eval_val_loss": 1.024724006652832,
4312
+ "eval_val_recall": 84.6102180967474,
4313
+ "eval_val_runtime": 782.6042,
4314
+ "eval_val_samples_per_second": 1.346,
4315
+ "eval_val_steps_per_second": 0.006,
4316
+ "step": 696
4317
+ },
4318
+ {
4319
+ "epoch": 17.87,
4320
+ "learning_rate": 5.335837767255214e-09,
4321
+ "loss": 0.0,
4322
+ "step": 697
4323
+ },
4324
+ {
4325
+ "epoch": 17.9,
4326
+ "learning_rate": 3.414990838945809e-09,
4327
+ "loss": 0.0,
4328
+ "step": 698
4329
+ },
4330
+ {
4331
+ "epoch": 17.92,
4332
+ "learning_rate": 1.920956264387375e-09,
4333
+ "loss": 0.0,
4334
+ "step": 699
4335
+ },
4336
+ {
4337
+ "epoch": 17.95,
4338
+ "learning_rate": 8.537659326424141e-10,
4339
+ "loss": 0.0,
4340
+ "step": 700
4341
+ },
4342
+ {
4343
+ "epoch": 17.97,
4344
+ "learning_rate": 2.1344262210565293e-10,
4345
+ "loss": 0.0,
4346
+ "step": 701
4347
+ },
4348
+ {
4349
+ "epoch": 18.0,
4350
+ "learning_rate": 0.0,
4351
+ "loss": 0.0,
4352
+ "step": 702
4353
+ },
4354
+ {
4355
+ "epoch": 18.0,
4356
+ "step": 702,
4357
+ "total_flos": 6.135775686914212e+17,
4358
+ "train_loss": 0.034780138895060625,
4359
+ "train_runtime": 14932.2475,
4360
+ "train_samples_per_second": 5.93,
4361
+ "train_steps_per_second": 0.047
4362
+ }
4363
+ ],
4364
+ "max_steps": 702,
4365
+ "num_train_epochs": 18,
4366
+ "total_flos": 6.135775686914212e+17,
4367
+ "trial_name": null,
4368
+ "trial_params": null
4369
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb9cdc8f80afab5311b3222468462bc9782415f64fdf14cf1c41ffd61c871b7f
3
+ size 4600