BryanSagbay commited on
Commit
d030ab5
·
verified ·
1 Parent(s): e297508

Upload 10 files

Browse files
config.json ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/albert-base-spanish",
3
+ "architectures": [
4
+ "AlbertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout_prob": 0.1,
9
+ "down_scale_factor": 1,
10
+ "embedding_size": 128,
11
+ "eos_token_id": 3,
12
+ "gap_size": 0,
13
+ "hidden_act": "gelu",
14
+ "hidden_dropout_prob": 0,
15
+ "hidden_size": 768,
16
+ "id2label": {
17
+ "0": "control de bienes",
18
+ "1": "capacitaciones",
19
+ "2": "aval",
20
+ "3": "adquisiciones informe rt",
21
+ "4": "adquisiciones",
22
+ "5": "erp crm",
23
+ "6": "control de bienes prestamos",
24
+ "7": "control de cambios",
25
+ "8": "erp bienestar",
26
+ "9": "erp centros de produccion",
27
+ "10": "erp gestion academica evaluacion docente",
28
+ "11": "erp conceder acceso",
29
+ "12": "erp eliminar accesos",
30
+ "13": "erp financiero",
31
+ "14": "erp gestion academica",
32
+ "15": "erp no reportes",
33
+ "16": "erp gestion de biblioteca",
34
+ "17": "erp gestion logistica",
35
+ "18": "erp internacionalizacion",
36
+ "19": "erp investigacion cientifica",
37
+ "20": "erp talento humano",
38
+ "21": "erp salud ocupacional",
39
+ "22": "erp planificacion",
40
+ "23": "erp otros",
41
+ "24": "erp odoo monica otros",
42
+ "25": "evea conceder acceso",
43
+ "26": "erp vinculacion",
44
+ "27": "erp votaciones",
45
+ "28": "evea acceso al servicio evea",
46
+ "29": "evea cursos",
47
+ "30": "evea modificacion de la informacion",
48
+ "31": "evea eliminar accesos",
49
+ "32": "evea encuesta cuestionario tareas examenes",
50
+ "33": "evea interrupcion del servicio",
51
+ "34": "evea matriculacion en cursos",
52
+ "35": "evea reportes",
53
+ "36": "evea nuevo",
54
+ "37": "evea otro",
55
+ "38": "evea plataforma",
56
+ "39": "evea problema con la configuracion",
57
+ "40": "firma electronica quipux",
58
+ "41": "encuesta",
59
+ "42": "evea sincronizacion",
60
+ "43": "evea sincronizacion proveedor",
61
+ "44": "evea respaldos",
62
+ "45": "infraestructura respaldo y recuperacion",
63
+ "46": "gestion",
64
+ "47": "infraestructura conectividad",
65
+ "48": "infraestructura mantenimiento preventivo",
66
+ "49": "infraestructura uso de infraestructura fisica",
67
+ "50": "office 365 conceder acceso",
68
+ "51": "informe",
69
+ "52": "office 365",
70
+ "53": "office 365 acceso al servicio",
71
+ "54": "office 365 bloqueo de servicio",
72
+ "55": "office 365 problema con el servicio",
73
+ "56": "office 365 creacion de cuenta",
74
+ "57": "office 365 eliminar accesos",
75
+ "58": "office 365 interrupcion del servicio",
76
+ "59": "office 365 otro",
77
+ "60": "poa",
78
+ "61": "pagina web",
79
+ "62": "otros",
80
+ "63": "oficio",
81
+ "64": "office 365 roles y permisos",
82
+ "65": "reportes",
83
+ "66": "proveedor",
84
+ "67": "redes sociales",
85
+ "68": "proyectos",
86
+ "69": "repositorio base digital",
87
+ "70": "soporte equipos mantenimiento de hardware",
88
+ "71": "revistas",
89
+ "72": "servicios conceder accesos",
90
+ "73": "servicios elimnar accesos",
91
+ "74": "soporte equipos garantia",
92
+ "75": "soporte erp gestion logistica",
93
+ "76": "soporte equipos mantenimiento de software",
94
+ "77": "soporte erp bienestar",
95
+ "78": "soporte erp financiero",
96
+ "79": "soporte erp gestion academica",
97
+ "80": "soporte infraestructura",
98
+ "81": "soporte evea sincronizacion",
99
+ "82": "soporte erp talento humano",
100
+ "83": "soporte evea plataforma",
101
+ "84": "soporte erp otros",
102
+ "85": "soporte seguridad",
103
+ "86": "soporte otros",
104
+ "87": "soporte pagina web",
105
+ "88": "soporte repositorio base digital",
106
+ "89": "soporte revistas",
107
+ "90": "soporte odoo monica otros",
108
+ "91": "soporte infraestructura conectividad",
109
+ "92": "soporte infraestructura respaldo y recuperacion",
110
+ "93": "soporte mantenimiento correctivo",
111
+ "94": "soporte office 365",
112
+ "95": "streaming eventos",
113
+ "96": "soporte streaming eventos",
114
+ "97": "soporte turniting",
115
+ "98": "soporte zoom",
116
+ "99": "sin datos",
117
+ "100": "turnitin",
118
+ "101": "zoom credenciales zoom",
119
+ "102": "zoom interrupcion del servicio",
120
+ "103": "zoom licenciamiento",
121
+ "104": "zoom otros",
122
+ "105": "zoom webinar",
123
+ "106": "zoom conceder accesos",
124
+ "107": "zoom eliminar accesos",
125
+ "108": "zoom videoconferencia",
126
+ "109": "reporte informe"
127
+ },
128
+ "initializer_range": 0.02,
129
+ "inner_group_num": 1,
130
+ "intermediate_size": 3072,
131
+ "label2id": {
132
+ "adquisiciones": 4,
133
+ "adquisiciones informe rt": 3,
134
+ "aval": 2,
135
+ "capacitaciones": 1,
136
+ "control de bienes": 0,
137
+ "control de bienes prestamos": 6,
138
+ "control de cambios": 7,
139
+ "encuesta": 41,
140
+ "erp bienestar": 8,
141
+ "erp centros de produccion": 9,
142
+ "erp conceder acceso": 11,
143
+ "erp crm": 5,
144
+ "erp eliminar accesos": 12,
145
+ "erp financiero": 13,
146
+ "erp gestion academica": 14,
147
+ "erp gestion academica evaluacion docente": 10,
148
+ "erp gestion de biblioteca": 16,
149
+ "erp gestion logistica": 17,
150
+ "erp internacionalizacion": 18,
151
+ "erp investigacion cientifica": 19,
152
+ "erp no reportes": 15,
153
+ "erp odoo monica otros": 24,
154
+ "erp otros": 23,
155
+ "erp planificacion": 22,
156
+ "erp salud ocupacional": 21,
157
+ "erp talento humano": 20,
158
+ "erp vinculacion": 26,
159
+ "erp votaciones": 27,
160
+ "evea acceso al servicio evea": 28,
161
+ "evea conceder acceso": 25,
162
+ "evea cursos": 29,
163
+ "evea eliminar accesos": 31,
164
+ "evea encuesta cuestionario tareas examenes": 32,
165
+ "evea interrupcion del servicio": 33,
166
+ "evea matriculacion en cursos": 34,
167
+ "evea modificacion de la informacion": 30,
168
+ "evea nuevo": 36,
169
+ "evea otro": 37,
170
+ "evea plataforma": 38,
171
+ "evea problema con la configuracion": 39,
172
+ "evea reportes": 35,
173
+ "evea respaldos": 44,
174
+ "evea sincronizacion": 42,
175
+ "evea sincronizacion proveedor": 43,
176
+ "firma electronica quipux": 40,
177
+ "gestion": 46,
178
+ "informe": 51,
179
+ "infraestructura conectividad": 47,
180
+ "infraestructura mantenimiento preventivo": 48,
181
+ "infraestructura respaldo y recuperacion": 45,
182
+ "infraestructura uso de infraestructura fisica": 49,
183
+ "office 365": 52,
184
+ "office 365 acceso al servicio": 53,
185
+ "office 365 bloqueo de servicio": 54,
186
+ "office 365 conceder acceso": 50,
187
+ "office 365 creacion de cuenta": 56,
188
+ "office 365 eliminar accesos": 57,
189
+ "office 365 interrupcion del servicio": 58,
190
+ "office 365 otro": 59,
191
+ "office 365 problema con el servicio": 55,
192
+ "office 365 roles y permisos": 64,
193
+ "oficio": 63,
194
+ "otros": 62,
195
+ "pagina web": 61,
196
+ "poa": 60,
197
+ "proveedor": 66,
198
+ "proyectos": 68,
199
+ "redes sociales": 67,
200
+ "reporte informe": 109,
201
+ "reportes": 65,
202
+ "repositorio base digital": 69,
203
+ "revistas": 71,
204
+ "servicios conceder accesos": 72,
205
+ "servicios elimnar accesos": 73,
206
+ "sin datos": 99,
207
+ "soporte equipos garantia": 74,
208
+ "soporte equipos mantenimiento de hardware": 70,
209
+ "soporte equipos mantenimiento de software": 76,
210
+ "soporte erp bienestar": 77,
211
+ "soporte erp financiero": 78,
212
+ "soporte erp gestion academica": 79,
213
+ "soporte erp gestion logistica": 75,
214
+ "soporte erp otros": 84,
215
+ "soporte erp talento humano": 82,
216
+ "soporte evea plataforma": 83,
217
+ "soporte evea sincronizacion": 81,
218
+ "soporte infraestructura": 80,
219
+ "soporte infraestructura conectividad": 91,
220
+ "soporte infraestructura respaldo y recuperacion": 92,
221
+ "soporte mantenimiento correctivo": 93,
222
+ "soporte odoo monica otros": 90,
223
+ "soporte office 365": 94,
224
+ "soporte otros": 86,
225
+ "soporte pagina web": 87,
226
+ "soporte repositorio base digital": 88,
227
+ "soporte revistas": 89,
228
+ "soporte seguridad": 85,
229
+ "soporte streaming eventos": 96,
230
+ "soporte turniting": 97,
231
+ "soporte zoom": 98,
232
+ "streaming eventos": 95,
233
+ "turnitin": 100,
234
+ "zoom conceder accesos": 106,
235
+ "zoom credenciales zoom": 101,
236
+ "zoom eliminar accesos": 107,
237
+ "zoom interrupcion del servicio": 102,
238
+ "zoom licenciamiento": 103,
239
+ "zoom otros": 104,
240
+ "zoom videoconferencia": 108,
241
+ "zoom webinar": 105
242
+ },
243
+ "layer_norm_eps": 1e-12,
244
+ "max_position_embeddings": 512,
245
+ "model_type": "albert",
246
+ "net_structure_type": 0,
247
+ "num_attention_heads": 12,
248
+ "num_hidden_groups": 1,
249
+ "num_hidden_layers": 12,
250
+ "num_memory_blocks": 0,
251
+ "pad_token_id": 0,
252
+ "position_embedding_type": "absolute",
253
+ "problem_type": "single_label_classification",
254
+ "torch_dtype": "float32",
255
+ "transformers_version": "4.39.3",
256
+ "type_vocab_size": 2,
257
+ "vocab_size": 31000
258
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:909e97aade0be53fc202864814e872ee3509c2b036257c62995f0c7f7a9e2543
3
+ size 47588120
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41fba1428b748c8b47cca1be887635074f6905ff26550bbad0bb1cf40ba8728
3
+ size 95192333
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be4aae32274ac6eb63aa22c35c92225e8f4e902c4a18206cedebd97731c0d092
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d95796ee97736aa2d0fb68a4f67e3fddbc81fbce9ec77a81f9e8eb8c1259245
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[SEP]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28c98d44f2c8ba92d861c8705ca4386454387ac7070fb00e9cd934f401387656
3
+ size 797897
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": true,
48
+ "eos_token": "[SEP]",
49
+ "keep_accents": true,
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 512,
52
+ "pad_token": "<pad>",
53
+ "remove_space": true,
54
+ "sep_token": "[SEP]",
55
+ "sp_model_kwargs": {},
56
+ "tokenizer_class": "AlbertTokenizer",
57
+ "unk_token": "<unk>"
58
+ }
trainer_state.json ADDED
@@ -0,0 +1,2174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9182948490230906,
3
+ "best_model_checkpoint": "./results/checkpoint-29910",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 29910,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "grad_norm": 4.954967021942139,
14
+ "learning_rate": 4.9832831828819794e-05,
15
+ "loss": 4.7403,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.03,
20
+ "grad_norm": 9.918214797973633,
21
+ "learning_rate": 4.9665663657639585e-05,
22
+ "loss": 4.7281,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.05,
27
+ "grad_norm": 6.391179084777832,
28
+ "learning_rate": 4.949849548645938e-05,
29
+ "loss": 4.6786,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.07,
34
+ "grad_norm": 6.756315231323242,
35
+ "learning_rate": 4.9331327315279175e-05,
36
+ "loss": 4.6128,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.08,
41
+ "grad_norm": 8.407713890075684,
42
+ "learning_rate": 4.916415914409897e-05,
43
+ "loss": 4.4836,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.1,
48
+ "grad_norm": 8.354033470153809,
49
+ "learning_rate": 4.899699097291876e-05,
50
+ "loss": 4.3776,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.12,
55
+ "grad_norm": 7.996518611907959,
56
+ "learning_rate": 4.882982280173855e-05,
57
+ "loss": 4.2701,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.13,
62
+ "grad_norm": 14.100532531738281,
63
+ "learning_rate": 4.866265463055835e-05,
64
+ "loss": 4.1032,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 0.15,
69
+ "grad_norm": 10.907315254211426,
70
+ "learning_rate": 4.849548645937814e-05,
71
+ "loss": 3.952,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 0.17,
76
+ "grad_norm": 9.731605529785156,
77
+ "learning_rate": 4.8328318288197924e-05,
78
+ "loss": 3.732,
79
+ "step": 1000
80
+ },
81
+ {
82
+ "epoch": 0.18,
83
+ "grad_norm": 9.989665985107422,
84
+ "learning_rate": 4.816115011701772e-05,
85
+ "loss": 3.5489,
86
+ "step": 1100
87
+ },
88
+ {
89
+ "epoch": 0.2,
90
+ "grad_norm": 9.542133331298828,
91
+ "learning_rate": 4.7993981945837514e-05,
92
+ "loss": 3.3949,
93
+ "step": 1200
94
+ },
95
+ {
96
+ "epoch": 0.22,
97
+ "grad_norm": 11.988595008850098,
98
+ "learning_rate": 4.7826813774657305e-05,
99
+ "loss": 3.216,
100
+ "step": 1300
101
+ },
102
+ {
103
+ "epoch": 0.23,
104
+ "grad_norm": 13.553967475891113,
105
+ "learning_rate": 4.76596456034771e-05,
106
+ "loss": 2.9855,
107
+ "step": 1400
108
+ },
109
+ {
110
+ "epoch": 0.25,
111
+ "grad_norm": 42.20621109008789,
112
+ "learning_rate": 4.7492477432296895e-05,
113
+ "loss": 2.7659,
114
+ "step": 1500
115
+ },
116
+ {
117
+ "epoch": 0.27,
118
+ "grad_norm": 18.790130615234375,
119
+ "learning_rate": 4.732530926111669e-05,
120
+ "loss": 2.5604,
121
+ "step": 1600
122
+ },
123
+ {
124
+ "epoch": 0.28,
125
+ "grad_norm": 20.554113388061523,
126
+ "learning_rate": 4.715814108993648e-05,
127
+ "loss": 2.4376,
128
+ "step": 1700
129
+ },
130
+ {
131
+ "epoch": 0.3,
132
+ "grad_norm": 18.882707595825195,
133
+ "learning_rate": 4.699097291875627e-05,
134
+ "loss": 2.3501,
135
+ "step": 1800
136
+ },
137
+ {
138
+ "epoch": 0.32,
139
+ "grad_norm": 14.733109474182129,
140
+ "learning_rate": 4.682380474757606e-05,
141
+ "loss": 2.168,
142
+ "step": 1900
143
+ },
144
+ {
145
+ "epoch": 0.33,
146
+ "grad_norm": 17.430740356445312,
147
+ "learning_rate": 4.665663657639586e-05,
148
+ "loss": 2.0081,
149
+ "step": 2000
150
+ },
151
+ {
152
+ "epoch": 0.35,
153
+ "grad_norm": 21.797836303710938,
154
+ "learning_rate": 4.6489468405215644e-05,
155
+ "loss": 1.9554,
156
+ "step": 2100
157
+ },
158
+ {
159
+ "epoch": 0.37,
160
+ "grad_norm": 13.148958206176758,
161
+ "learning_rate": 4.632230023403544e-05,
162
+ "loss": 1.8524,
163
+ "step": 2200
164
+ },
165
+ {
166
+ "epoch": 0.38,
167
+ "grad_norm": 14.161394119262695,
168
+ "learning_rate": 4.6155132062855234e-05,
169
+ "loss": 1.793,
170
+ "step": 2300
171
+ },
172
+ {
173
+ "epoch": 0.4,
174
+ "grad_norm": 20.908519744873047,
175
+ "learning_rate": 4.5987963891675026e-05,
176
+ "loss": 1.6493,
177
+ "step": 2400
178
+ },
179
+ {
180
+ "epoch": 0.42,
181
+ "grad_norm": 15.107952117919922,
182
+ "learning_rate": 4.582079572049482e-05,
183
+ "loss": 1.5724,
184
+ "step": 2500
185
+ },
186
+ {
187
+ "epoch": 0.43,
188
+ "grad_norm": 18.561201095581055,
189
+ "learning_rate": 4.5653627549314615e-05,
190
+ "loss": 1.4915,
191
+ "step": 2600
192
+ },
193
+ {
194
+ "epoch": 0.45,
195
+ "grad_norm": 15.365275382995605,
196
+ "learning_rate": 4.548645937813441e-05,
197
+ "loss": 1.488,
198
+ "step": 2700
199
+ },
200
+ {
201
+ "epoch": 0.47,
202
+ "grad_norm": 16.04875946044922,
203
+ "learning_rate": 4.53192912069542e-05,
204
+ "loss": 1.4316,
205
+ "step": 2800
206
+ },
207
+ {
208
+ "epoch": 0.48,
209
+ "grad_norm": 13.593673706054688,
210
+ "learning_rate": 4.515212303577399e-05,
211
+ "loss": 1.456,
212
+ "step": 2900
213
+ },
214
+ {
215
+ "epoch": 0.5,
216
+ "grad_norm": 16.379798889160156,
217
+ "learning_rate": 4.498495486459378e-05,
218
+ "loss": 1.3006,
219
+ "step": 3000
220
+ },
221
+ {
222
+ "epoch": 0.52,
223
+ "grad_norm": 13.564205169677734,
224
+ "learning_rate": 4.481778669341358e-05,
225
+ "loss": 1.2661,
226
+ "step": 3100
227
+ },
228
+ {
229
+ "epoch": 0.53,
230
+ "grad_norm": 15.44586181640625,
231
+ "learning_rate": 4.4650618522233364e-05,
232
+ "loss": 1.2917,
233
+ "step": 3200
234
+ },
235
+ {
236
+ "epoch": 0.55,
237
+ "grad_norm": 12.80644416809082,
238
+ "learning_rate": 4.448345035105316e-05,
239
+ "loss": 1.1765,
240
+ "step": 3300
241
+ },
242
+ {
243
+ "epoch": 0.57,
244
+ "grad_norm": 19.545106887817383,
245
+ "learning_rate": 4.4316282179872954e-05,
246
+ "loss": 1.1622,
247
+ "step": 3400
248
+ },
249
+ {
250
+ "epoch": 0.59,
251
+ "grad_norm": 14.377379417419434,
252
+ "learning_rate": 4.414911400869275e-05,
253
+ "loss": 1.1047,
254
+ "step": 3500
255
+ },
256
+ {
257
+ "epoch": 0.6,
258
+ "grad_norm": 21.595245361328125,
259
+ "learning_rate": 4.398194583751254e-05,
260
+ "loss": 1.1384,
261
+ "step": 3600
262
+ },
263
+ {
264
+ "epoch": 0.62,
265
+ "grad_norm": 14.641448020935059,
266
+ "learning_rate": 4.3814777666332335e-05,
267
+ "loss": 1.0872,
268
+ "step": 3700
269
+ },
270
+ {
271
+ "epoch": 0.64,
272
+ "grad_norm": 13.082781791687012,
273
+ "learning_rate": 4.364760949515213e-05,
274
+ "loss": 1.0366,
275
+ "step": 3800
276
+ },
277
+ {
278
+ "epoch": 0.65,
279
+ "grad_norm": 18.576641082763672,
280
+ "learning_rate": 4.348044132397192e-05,
281
+ "loss": 1.0953,
282
+ "step": 3900
283
+ },
284
+ {
285
+ "epoch": 0.67,
286
+ "grad_norm": 9.915220260620117,
287
+ "learning_rate": 4.331327315279171e-05,
288
+ "loss": 1.001,
289
+ "step": 4000
290
+ },
291
+ {
292
+ "epoch": 0.69,
293
+ "grad_norm": 12.059024810791016,
294
+ "learning_rate": 4.31461049816115e-05,
295
+ "loss": 1.0585,
296
+ "step": 4100
297
+ },
298
+ {
299
+ "epoch": 0.7,
300
+ "grad_norm": 17.607337951660156,
301
+ "learning_rate": 4.29789368104313e-05,
302
+ "loss": 1.0179,
303
+ "step": 4200
304
+ },
305
+ {
306
+ "epoch": 0.72,
307
+ "grad_norm": 16.324430465698242,
308
+ "learning_rate": 4.2811768639251084e-05,
309
+ "loss": 0.9491,
310
+ "step": 4300
311
+ },
312
+ {
313
+ "epoch": 0.74,
314
+ "grad_norm": 19.5161075592041,
315
+ "learning_rate": 4.264460046807088e-05,
316
+ "loss": 0.9374,
317
+ "step": 4400
318
+ },
319
+ {
320
+ "epoch": 0.75,
321
+ "grad_norm": 20.448488235473633,
322
+ "learning_rate": 4.2477432296890674e-05,
323
+ "loss": 0.9146,
324
+ "step": 4500
325
+ },
326
+ {
327
+ "epoch": 0.77,
328
+ "grad_norm": 10.544804573059082,
329
+ "learning_rate": 4.231026412571047e-05,
330
+ "loss": 0.9187,
331
+ "step": 4600
332
+ },
333
+ {
334
+ "epoch": 0.79,
335
+ "grad_norm": 17.095731735229492,
336
+ "learning_rate": 4.214309595453026e-05,
337
+ "loss": 0.8732,
338
+ "step": 4700
339
+ },
340
+ {
341
+ "epoch": 0.8,
342
+ "grad_norm": 18.1314754486084,
343
+ "learning_rate": 4.197592778335005e-05,
344
+ "loss": 0.9072,
345
+ "step": 4800
346
+ },
347
+ {
348
+ "epoch": 0.82,
349
+ "grad_norm": 8.516233444213867,
350
+ "learning_rate": 4.180875961216985e-05,
351
+ "loss": 0.8264,
352
+ "step": 4900
353
+ },
354
+ {
355
+ "epoch": 0.84,
356
+ "grad_norm": 12.620676040649414,
357
+ "learning_rate": 4.164159144098964e-05,
358
+ "loss": 0.8425,
359
+ "step": 5000
360
+ },
361
+ {
362
+ "epoch": 0.85,
363
+ "grad_norm": 23.544219970703125,
364
+ "learning_rate": 4.147442326980943e-05,
365
+ "loss": 0.8371,
366
+ "step": 5100
367
+ },
368
+ {
369
+ "epoch": 0.87,
370
+ "grad_norm": 15.980536460876465,
371
+ "learning_rate": 4.130725509862922e-05,
372
+ "loss": 0.8257,
373
+ "step": 5200
374
+ },
375
+ {
376
+ "epoch": 0.89,
377
+ "grad_norm": 16.621524810791016,
378
+ "learning_rate": 4.114008692744902e-05,
379
+ "loss": 0.7705,
380
+ "step": 5300
381
+ },
382
+ {
383
+ "epoch": 0.9,
384
+ "grad_norm": 25.2496280670166,
385
+ "learning_rate": 4.0972918756268804e-05,
386
+ "loss": 0.7741,
387
+ "step": 5400
388
+ },
389
+ {
390
+ "epoch": 0.92,
391
+ "grad_norm": 12.541385650634766,
392
+ "learning_rate": 4.08057505850886e-05,
393
+ "loss": 0.7408,
394
+ "step": 5500
395
+ },
396
+ {
397
+ "epoch": 0.94,
398
+ "grad_norm": 30.975236892700195,
399
+ "learning_rate": 4.0638582413908394e-05,
400
+ "loss": 0.7417,
401
+ "step": 5600
402
+ },
403
+ {
404
+ "epoch": 0.95,
405
+ "grad_norm": 16.33625030517578,
406
+ "learning_rate": 4.0471414242728186e-05,
407
+ "loss": 0.766,
408
+ "step": 5700
409
+ },
410
+ {
411
+ "epoch": 0.97,
412
+ "grad_norm": 17.48399543762207,
413
+ "learning_rate": 4.030424607154798e-05,
414
+ "loss": 0.8336,
415
+ "step": 5800
416
+ },
417
+ {
418
+ "epoch": 0.99,
419
+ "grad_norm": 19.421096801757812,
420
+ "learning_rate": 4.013707790036777e-05,
421
+ "loss": 0.7135,
422
+ "step": 5900
423
+ },
424
+ {
425
+ "epoch": 1.0,
426
+ "eval_accuracy": 0.7995263469508584,
427
+ "eval_f1": 0.7955612032049123,
428
+ "eval_loss": 0.7165877223014832,
429
+ "eval_precision": 0.805591523931921,
430
+ "eval_recall": 0.7995263469508584,
431
+ "eval_runtime": 64.1068,
432
+ "eval_samples_per_second": 131.733,
433
+ "eval_steps_per_second": 8.236,
434
+ "step": 5982
435
+ },
436
+ {
437
+ "epoch": 1.0,
438
+ "grad_norm": 10.486939430236816,
439
+ "learning_rate": 3.996990972918757e-05,
440
+ "loss": 0.685,
441
+ "step": 6000
442
+ },
443
+ {
444
+ "epoch": 1.02,
445
+ "grad_norm": 19.489837646484375,
446
+ "learning_rate": 3.980274155800736e-05,
447
+ "loss": 0.6431,
448
+ "step": 6100
449
+ },
450
+ {
451
+ "epoch": 1.04,
452
+ "grad_norm": 8.935369491577148,
453
+ "learning_rate": 3.963557338682715e-05,
454
+ "loss": 0.6402,
455
+ "step": 6200
456
+ },
457
+ {
458
+ "epoch": 1.05,
459
+ "grad_norm": 10.298083305358887,
460
+ "learning_rate": 3.946840521564694e-05,
461
+ "loss": 0.6261,
462
+ "step": 6300
463
+ },
464
+ {
465
+ "epoch": 1.07,
466
+ "grad_norm": 18.606569290161133,
467
+ "learning_rate": 3.930123704446674e-05,
468
+ "loss": 0.5874,
469
+ "step": 6400
470
+ },
471
+ {
472
+ "epoch": 1.09,
473
+ "grad_norm": 12.412484169006348,
474
+ "learning_rate": 3.913406887328653e-05,
475
+ "loss": 0.5923,
476
+ "step": 6500
477
+ },
478
+ {
479
+ "epoch": 1.1,
480
+ "grad_norm": 9.3939847946167,
481
+ "learning_rate": 3.8966900702106316e-05,
482
+ "loss": 0.6091,
483
+ "step": 6600
484
+ },
485
+ {
486
+ "epoch": 1.12,
487
+ "grad_norm": 14.168825149536133,
488
+ "learning_rate": 3.8799732530926114e-05,
489
+ "loss": 0.6259,
490
+ "step": 6700
491
+ },
492
+ {
493
+ "epoch": 1.14,
494
+ "grad_norm": 18.846487045288086,
495
+ "learning_rate": 3.8632564359745906e-05,
496
+ "loss": 0.5543,
497
+ "step": 6800
498
+ },
499
+ {
500
+ "epoch": 1.15,
501
+ "grad_norm": 7.268430709838867,
502
+ "learning_rate": 3.84653961885657e-05,
503
+ "loss": 0.5615,
504
+ "step": 6900
505
+ },
506
+ {
507
+ "epoch": 1.17,
508
+ "grad_norm": 6.565930366516113,
509
+ "learning_rate": 3.829822801738549e-05,
510
+ "loss": 0.5725,
511
+ "step": 7000
512
+ },
513
+ {
514
+ "epoch": 1.19,
515
+ "grad_norm": 11.122172355651855,
516
+ "learning_rate": 3.813105984620529e-05,
517
+ "loss": 0.543,
518
+ "step": 7100
519
+ },
520
+ {
521
+ "epoch": 1.2,
522
+ "grad_norm": 15.909794807434082,
523
+ "learning_rate": 3.796389167502508e-05,
524
+ "loss": 0.5053,
525
+ "step": 7200
526
+ },
527
+ {
528
+ "epoch": 1.22,
529
+ "grad_norm": 17.935998916625977,
530
+ "learning_rate": 3.779672350384487e-05,
531
+ "loss": 0.5866,
532
+ "step": 7300
533
+ },
534
+ {
535
+ "epoch": 1.24,
536
+ "grad_norm": 7.46903657913208,
537
+ "learning_rate": 3.762955533266466e-05,
538
+ "loss": 0.5573,
539
+ "step": 7400
540
+ },
541
+ {
542
+ "epoch": 1.25,
543
+ "grad_norm": 10.208723068237305,
544
+ "learning_rate": 3.746238716148446e-05,
545
+ "loss": 0.511,
546
+ "step": 7500
547
+ },
548
+ {
549
+ "epoch": 1.27,
550
+ "grad_norm": 15.062224388122559,
551
+ "learning_rate": 3.729521899030425e-05,
552
+ "loss": 0.5211,
553
+ "step": 7600
554
+ },
555
+ {
556
+ "epoch": 1.29,
557
+ "grad_norm": 11.787239074707031,
558
+ "learning_rate": 3.7128050819124036e-05,
559
+ "loss": 0.5687,
560
+ "step": 7700
561
+ },
562
+ {
563
+ "epoch": 1.3,
564
+ "grad_norm": 20.22210693359375,
565
+ "learning_rate": 3.6960882647943834e-05,
566
+ "loss": 0.544,
567
+ "step": 7800
568
+ },
569
+ {
570
+ "epoch": 1.32,
571
+ "grad_norm": 22.17251205444336,
572
+ "learning_rate": 3.6793714476763626e-05,
573
+ "loss": 0.5223,
574
+ "step": 7900
575
+ },
576
+ {
577
+ "epoch": 1.34,
578
+ "grad_norm": 16.83318519592285,
579
+ "learning_rate": 3.662654630558342e-05,
580
+ "loss": 0.5043,
581
+ "step": 8000
582
+ },
583
+ {
584
+ "epoch": 1.35,
585
+ "grad_norm": 10.143548965454102,
586
+ "learning_rate": 3.645937813440321e-05,
587
+ "loss": 0.5181,
588
+ "step": 8100
589
+ },
590
+ {
591
+ "epoch": 1.37,
592
+ "grad_norm": 20.629831314086914,
593
+ "learning_rate": 3.629220996322301e-05,
594
+ "loss": 0.4886,
595
+ "step": 8200
596
+ },
597
+ {
598
+ "epoch": 1.39,
599
+ "grad_norm": 12.14686107635498,
600
+ "learning_rate": 3.61250417920428e-05,
601
+ "loss": 0.5667,
602
+ "step": 8300
603
+ },
604
+ {
605
+ "epoch": 1.4,
606
+ "grad_norm": 17.1881160736084,
607
+ "learning_rate": 3.595787362086259e-05,
608
+ "loss": 0.5211,
609
+ "step": 8400
610
+ },
611
+ {
612
+ "epoch": 1.42,
613
+ "grad_norm": 7.506267070770264,
614
+ "learning_rate": 3.579070544968238e-05,
615
+ "loss": 0.5356,
616
+ "step": 8500
617
+ },
618
+ {
619
+ "epoch": 1.44,
620
+ "grad_norm": 23.122560501098633,
621
+ "learning_rate": 3.562353727850217e-05,
622
+ "loss": 0.5044,
623
+ "step": 8600
624
+ },
625
+ {
626
+ "epoch": 1.45,
627
+ "grad_norm": 21.808191299438477,
628
+ "learning_rate": 3.545636910732197e-05,
629
+ "loss": 0.5059,
630
+ "step": 8700
631
+ },
632
+ {
633
+ "epoch": 1.47,
634
+ "grad_norm": 12.899435997009277,
635
+ "learning_rate": 3.5289200936141756e-05,
636
+ "loss": 0.5082,
637
+ "step": 8800
638
+ },
639
+ {
640
+ "epoch": 1.49,
641
+ "grad_norm": 11.228046417236328,
642
+ "learning_rate": 3.5122032764961554e-05,
643
+ "loss": 0.4466,
644
+ "step": 8900
645
+ },
646
+ {
647
+ "epoch": 1.5,
648
+ "grad_norm": 15.656624794006348,
649
+ "learning_rate": 3.4954864593781346e-05,
650
+ "loss": 0.4877,
651
+ "step": 9000
652
+ },
653
+ {
654
+ "epoch": 1.52,
655
+ "grad_norm": 14.958187103271484,
656
+ "learning_rate": 3.478769642260114e-05,
657
+ "loss": 0.4283,
658
+ "step": 9100
659
+ },
660
+ {
661
+ "epoch": 1.54,
662
+ "grad_norm": 27.727924346923828,
663
+ "learning_rate": 3.462052825142093e-05,
664
+ "loss": 0.504,
665
+ "step": 9200
666
+ },
667
+ {
668
+ "epoch": 1.55,
669
+ "grad_norm": 21.103147506713867,
670
+ "learning_rate": 3.445336008024073e-05,
671
+ "loss": 0.5081,
672
+ "step": 9300
673
+ },
674
+ {
675
+ "epoch": 1.57,
676
+ "grad_norm": 14.884688377380371,
677
+ "learning_rate": 3.428619190906052e-05,
678
+ "loss": 0.47,
679
+ "step": 9400
680
+ },
681
+ {
682
+ "epoch": 1.59,
683
+ "grad_norm": 26.825908660888672,
684
+ "learning_rate": 3.411902373788031e-05,
685
+ "loss": 0.4587,
686
+ "step": 9500
687
+ },
688
+ {
689
+ "epoch": 1.6,
690
+ "grad_norm": 23.39227867126465,
691
+ "learning_rate": 3.39518555667001e-05,
692
+ "loss": 0.4621,
693
+ "step": 9600
694
+ },
695
+ {
696
+ "epoch": 1.62,
697
+ "grad_norm": 15.503640174865723,
698
+ "learning_rate": 3.378468739551989e-05,
699
+ "loss": 0.5122,
700
+ "step": 9700
701
+ },
702
+ {
703
+ "epoch": 1.64,
704
+ "grad_norm": 13.298539161682129,
705
+ "learning_rate": 3.361751922433969e-05,
706
+ "loss": 0.4846,
707
+ "step": 9800
708
+ },
709
+ {
710
+ "epoch": 1.65,
711
+ "grad_norm": 17.961261749267578,
712
+ "learning_rate": 3.3450351053159476e-05,
713
+ "loss": 0.4576,
714
+ "step": 9900
715
+ },
716
+ {
717
+ "epoch": 1.67,
718
+ "grad_norm": 15.622933387756348,
719
+ "learning_rate": 3.3283182881979274e-05,
720
+ "loss": 0.4239,
721
+ "step": 10000
722
+ },
723
+ {
724
+ "epoch": 1.69,
725
+ "grad_norm": 15.286486625671387,
726
+ "learning_rate": 3.3116014710799066e-05,
727
+ "loss": 0.4478,
728
+ "step": 10100
729
+ },
730
+ {
731
+ "epoch": 1.71,
732
+ "grad_norm": 28.045799255371094,
733
+ "learning_rate": 3.294884653961886e-05,
734
+ "loss": 0.4457,
735
+ "step": 10200
736
+ },
737
+ {
738
+ "epoch": 1.72,
739
+ "grad_norm": 23.578136444091797,
740
+ "learning_rate": 3.278167836843865e-05,
741
+ "loss": 0.464,
742
+ "step": 10300
743
+ },
744
+ {
745
+ "epoch": 1.74,
746
+ "grad_norm": 12.858305931091309,
747
+ "learning_rate": 3.261451019725844e-05,
748
+ "loss": 0.4507,
749
+ "step": 10400
750
+ },
751
+ {
752
+ "epoch": 1.76,
753
+ "grad_norm": 18.197952270507812,
754
+ "learning_rate": 3.244734202607824e-05,
755
+ "loss": 0.4158,
756
+ "step": 10500
757
+ },
758
+ {
759
+ "epoch": 1.77,
760
+ "grad_norm": 5.134513854980469,
761
+ "learning_rate": 3.228017385489803e-05,
762
+ "loss": 0.4088,
763
+ "step": 10600
764
+ },
765
+ {
766
+ "epoch": 1.79,
767
+ "grad_norm": 2.1014363765716553,
768
+ "learning_rate": 3.211300568371782e-05,
769
+ "loss": 0.4524,
770
+ "step": 10700
771
+ },
772
+ {
773
+ "epoch": 1.81,
774
+ "grad_norm": 14.459040641784668,
775
+ "learning_rate": 3.194583751253761e-05,
776
+ "loss": 0.4637,
777
+ "step": 10800
778
+ },
779
+ {
780
+ "epoch": 1.82,
781
+ "grad_norm": 29.922468185424805,
782
+ "learning_rate": 3.177866934135741e-05,
783
+ "loss": 0.4302,
784
+ "step": 10900
785
+ },
786
+ {
787
+ "epoch": 1.84,
788
+ "grad_norm": 23.523460388183594,
789
+ "learning_rate": 3.1611501170177196e-05,
790
+ "loss": 0.4155,
791
+ "step": 11000
792
+ },
793
+ {
794
+ "epoch": 1.86,
795
+ "grad_norm": 11.668371200561523,
796
+ "learning_rate": 3.1444332998996994e-05,
797
+ "loss": 0.4238,
798
+ "step": 11100
799
+ },
800
+ {
801
+ "epoch": 1.87,
802
+ "grad_norm": 15.930005073547363,
803
+ "learning_rate": 3.1277164827816786e-05,
804
+ "loss": 0.4072,
805
+ "step": 11200
806
+ },
807
+ {
808
+ "epoch": 1.89,
809
+ "grad_norm": 18.61160659790039,
810
+ "learning_rate": 3.110999665663658e-05,
811
+ "loss": 0.4348,
812
+ "step": 11300
813
+ },
814
+ {
815
+ "epoch": 1.91,
816
+ "grad_norm": 27.475053787231445,
817
+ "learning_rate": 3.094282848545637e-05,
818
+ "loss": 0.4648,
819
+ "step": 11400
820
+ },
821
+ {
822
+ "epoch": 1.92,
823
+ "grad_norm": 6.477468013763428,
824
+ "learning_rate": 3.077566031427616e-05,
825
+ "loss": 0.4241,
826
+ "step": 11500
827
+ },
828
+ {
829
+ "epoch": 1.94,
830
+ "grad_norm": 26.99014663696289,
831
+ "learning_rate": 3.060849214309596e-05,
832
+ "loss": 0.4243,
833
+ "step": 11600
834
+ },
835
+ {
836
+ "epoch": 1.96,
837
+ "grad_norm": 16.152755737304688,
838
+ "learning_rate": 3.0441323971915747e-05,
839
+ "loss": 0.4186,
840
+ "step": 11700
841
+ },
842
+ {
843
+ "epoch": 1.97,
844
+ "grad_norm": 15.536150932312012,
845
+ "learning_rate": 3.0274155800735542e-05,
846
+ "loss": 0.3808,
847
+ "step": 11800
848
+ },
849
+ {
850
+ "epoch": 1.99,
851
+ "grad_norm": 23.708145141601562,
852
+ "learning_rate": 3.0106987629555333e-05,
853
+ "loss": 0.4365,
854
+ "step": 11900
855
+ },
856
+ {
857
+ "epoch": 2.0,
858
+ "eval_accuracy": 0.8680876258140912,
859
+ "eval_f1": 0.8628914936078326,
860
+ "eval_loss": 0.4633374810218811,
861
+ "eval_precision": 0.8684864554322808,
862
+ "eval_recall": 0.8680876258140912,
863
+ "eval_runtime": 64.0052,
864
+ "eval_samples_per_second": 131.942,
865
+ "eval_steps_per_second": 8.249,
866
+ "step": 11964
867
+ },
868
+ {
869
+ "epoch": 2.01,
870
+ "grad_norm": 10.474257469177246,
871
+ "learning_rate": 2.9939819458375128e-05,
872
+ "loss": 0.3853,
873
+ "step": 12000
874
+ },
875
+ {
876
+ "epoch": 2.02,
877
+ "grad_norm": 15.668170928955078,
878
+ "learning_rate": 2.977265128719492e-05,
879
+ "loss": 0.2858,
880
+ "step": 12100
881
+ },
882
+ {
883
+ "epoch": 2.04,
884
+ "grad_norm": 10.29902172088623,
885
+ "learning_rate": 2.960548311601471e-05,
886
+ "loss": 0.2803,
887
+ "step": 12200
888
+ },
889
+ {
890
+ "epoch": 2.06,
891
+ "grad_norm": 33.27579116821289,
892
+ "learning_rate": 2.9438314944834506e-05,
893
+ "loss": 0.2858,
894
+ "step": 12300
895
+ },
896
+ {
897
+ "epoch": 2.07,
898
+ "grad_norm": 13.799466133117676,
899
+ "learning_rate": 2.9271146773654294e-05,
900
+ "loss": 0.2793,
901
+ "step": 12400
902
+ },
903
+ {
904
+ "epoch": 2.09,
905
+ "grad_norm": 4.722692489624023,
906
+ "learning_rate": 2.9103978602474092e-05,
907
+ "loss": 0.2935,
908
+ "step": 12500
909
+ },
910
+ {
911
+ "epoch": 2.11,
912
+ "grad_norm": 8.643231391906738,
913
+ "learning_rate": 2.893681043129388e-05,
914
+ "loss": 0.2825,
915
+ "step": 12600
916
+ },
917
+ {
918
+ "epoch": 2.12,
919
+ "grad_norm": 10.378469467163086,
920
+ "learning_rate": 2.876964226011368e-05,
921
+ "loss": 0.2845,
922
+ "step": 12700
923
+ },
924
+ {
925
+ "epoch": 2.14,
926
+ "grad_norm": 9.1376953125,
927
+ "learning_rate": 2.8602474088933467e-05,
928
+ "loss": 0.2725,
929
+ "step": 12800
930
+ },
931
+ {
932
+ "epoch": 2.16,
933
+ "grad_norm": 10.372312545776367,
934
+ "learning_rate": 2.8435305917753262e-05,
935
+ "loss": 0.3067,
936
+ "step": 12900
937
+ },
938
+ {
939
+ "epoch": 2.17,
940
+ "grad_norm": 23.952699661254883,
941
+ "learning_rate": 2.8268137746573053e-05,
942
+ "loss": 0.2934,
943
+ "step": 13000
944
+ },
945
+ {
946
+ "epoch": 2.19,
947
+ "grad_norm": 2.125562906265259,
948
+ "learning_rate": 2.8100969575392848e-05,
949
+ "loss": 0.2535,
950
+ "step": 13100
951
+ },
952
+ {
953
+ "epoch": 2.21,
954
+ "grad_norm": 8.090828895568848,
955
+ "learning_rate": 2.793380140421264e-05,
956
+ "loss": 0.295,
957
+ "step": 13200
958
+ },
959
+ {
960
+ "epoch": 2.22,
961
+ "grad_norm": 13.274210929870605,
962
+ "learning_rate": 2.776663323303243e-05,
963
+ "loss": 0.2851,
964
+ "step": 13300
965
+ },
966
+ {
967
+ "epoch": 2.24,
968
+ "grad_norm": 5.6807732582092285,
969
+ "learning_rate": 2.7599465061852226e-05,
970
+ "loss": 0.2662,
971
+ "step": 13400
972
+ },
973
+ {
974
+ "epoch": 2.26,
975
+ "grad_norm": 11.885269165039062,
976
+ "learning_rate": 2.7432296890672014e-05,
977
+ "loss": 0.2969,
978
+ "step": 13500
979
+ },
980
+ {
981
+ "epoch": 2.27,
982
+ "grad_norm": 21.52318000793457,
983
+ "learning_rate": 2.7265128719491812e-05,
984
+ "loss": 0.2706,
985
+ "step": 13600
986
+ },
987
+ {
988
+ "epoch": 2.29,
989
+ "grad_norm": 21.661279678344727,
990
+ "learning_rate": 2.70979605483116e-05,
991
+ "loss": 0.2715,
992
+ "step": 13700
993
+ },
994
+ {
995
+ "epoch": 2.31,
996
+ "grad_norm": 27.985078811645508,
997
+ "learning_rate": 2.69307923771314e-05,
998
+ "loss": 0.3016,
999
+ "step": 13800
1000
+ },
1001
+ {
1002
+ "epoch": 2.32,
1003
+ "grad_norm": 11.431729316711426,
1004
+ "learning_rate": 2.6763624205951187e-05,
1005
+ "loss": 0.2501,
1006
+ "step": 13900
1007
+ },
1008
+ {
1009
+ "epoch": 2.34,
1010
+ "grad_norm": 5.3406901359558105,
1011
+ "learning_rate": 2.6596456034770982e-05,
1012
+ "loss": 0.2762,
1013
+ "step": 14000
1014
+ },
1015
+ {
1016
+ "epoch": 2.36,
1017
+ "grad_norm": 11.077746391296387,
1018
+ "learning_rate": 2.6429287863590773e-05,
1019
+ "loss": 0.2819,
1020
+ "step": 14100
1021
+ },
1022
+ {
1023
+ "epoch": 2.37,
1024
+ "grad_norm": 17.451330184936523,
1025
+ "learning_rate": 2.6262119692410565e-05,
1026
+ "loss": 0.3074,
1027
+ "step": 14200
1028
+ },
1029
+ {
1030
+ "epoch": 2.39,
1031
+ "grad_norm": 7.353370189666748,
1032
+ "learning_rate": 2.609495152123036e-05,
1033
+ "loss": 0.3068,
1034
+ "step": 14300
1035
+ },
1036
+ {
1037
+ "epoch": 2.41,
1038
+ "grad_norm": 12.055102348327637,
1039
+ "learning_rate": 2.592778335005015e-05,
1040
+ "loss": 0.2779,
1041
+ "step": 14400
1042
+ },
1043
+ {
1044
+ "epoch": 2.42,
1045
+ "grad_norm": 17.555917739868164,
1046
+ "learning_rate": 2.5760615178869946e-05,
1047
+ "loss": 0.2421,
1048
+ "step": 14500
1049
+ },
1050
+ {
1051
+ "epoch": 2.44,
1052
+ "grad_norm": 22.887771606445312,
1053
+ "learning_rate": 2.5593447007689734e-05,
1054
+ "loss": 0.3016,
1055
+ "step": 14600
1056
+ },
1057
+ {
1058
+ "epoch": 2.46,
1059
+ "grad_norm": 1.915899395942688,
1060
+ "learning_rate": 2.5426278836509533e-05,
1061
+ "loss": 0.2638,
1062
+ "step": 14700
1063
+ },
1064
+ {
1065
+ "epoch": 2.47,
1066
+ "grad_norm": 13.446496963500977,
1067
+ "learning_rate": 2.525911066532932e-05,
1068
+ "loss": 0.293,
1069
+ "step": 14800
1070
+ },
1071
+ {
1072
+ "epoch": 2.49,
1073
+ "grad_norm": 12.734638214111328,
1074
+ "learning_rate": 2.509194249414912e-05,
1075
+ "loss": 0.2668,
1076
+ "step": 14900
1077
+ },
1078
+ {
1079
+ "epoch": 2.51,
1080
+ "grad_norm": 15.557112693786621,
1081
+ "learning_rate": 2.4924774322968907e-05,
1082
+ "loss": 0.2691,
1083
+ "step": 15000
1084
+ },
1085
+ {
1086
+ "epoch": 2.52,
1087
+ "grad_norm": 10.383445739746094,
1088
+ "learning_rate": 2.4757606151788702e-05,
1089
+ "loss": 0.2204,
1090
+ "step": 15100
1091
+ },
1092
+ {
1093
+ "epoch": 2.54,
1094
+ "grad_norm": 7.19666862487793,
1095
+ "learning_rate": 2.4590437980608493e-05,
1096
+ "loss": 0.2447,
1097
+ "step": 15200
1098
+ },
1099
+ {
1100
+ "epoch": 2.56,
1101
+ "grad_norm": 17.903339385986328,
1102
+ "learning_rate": 2.442326980942829e-05,
1103
+ "loss": 0.2504,
1104
+ "step": 15300
1105
+ },
1106
+ {
1107
+ "epoch": 2.57,
1108
+ "grad_norm": 10.492616653442383,
1109
+ "learning_rate": 2.425610163824808e-05,
1110
+ "loss": 0.2256,
1111
+ "step": 15400
1112
+ },
1113
+ {
1114
+ "epoch": 2.59,
1115
+ "grad_norm": 11.051074028015137,
1116
+ "learning_rate": 2.408893346706787e-05,
1117
+ "loss": 0.259,
1118
+ "step": 15500
1119
+ },
1120
+ {
1121
+ "epoch": 2.61,
1122
+ "grad_norm": 23.400402069091797,
1123
+ "learning_rate": 2.3921765295887663e-05,
1124
+ "loss": 0.2487,
1125
+ "step": 15600
1126
+ },
1127
+ {
1128
+ "epoch": 2.62,
1129
+ "grad_norm": 20.601686477661133,
1130
+ "learning_rate": 2.3754597124707458e-05,
1131
+ "loss": 0.2338,
1132
+ "step": 15700
1133
+ },
1134
+ {
1135
+ "epoch": 2.64,
1136
+ "grad_norm": 12.519159317016602,
1137
+ "learning_rate": 2.358742895352725e-05,
1138
+ "loss": 0.2652,
1139
+ "step": 15800
1140
+ },
1141
+ {
1142
+ "epoch": 2.66,
1143
+ "grad_norm": 21.95683479309082,
1144
+ "learning_rate": 2.342026078234704e-05,
1145
+ "loss": 0.2306,
1146
+ "step": 15900
1147
+ },
1148
+ {
1149
+ "epoch": 2.67,
1150
+ "grad_norm": 24.98236656188965,
1151
+ "learning_rate": 2.3253092611166836e-05,
1152
+ "loss": 0.2475,
1153
+ "step": 16000
1154
+ },
1155
+ {
1156
+ "epoch": 2.69,
1157
+ "grad_norm": 6.362200736999512,
1158
+ "learning_rate": 2.3085924439986627e-05,
1159
+ "loss": 0.2646,
1160
+ "step": 16100
1161
+ },
1162
+ {
1163
+ "epoch": 2.71,
1164
+ "grad_norm": 14.293391227722168,
1165
+ "learning_rate": 2.2918756268806422e-05,
1166
+ "loss": 0.2404,
1167
+ "step": 16200
1168
+ },
1169
+ {
1170
+ "epoch": 2.72,
1171
+ "grad_norm": 11.405878067016602,
1172
+ "learning_rate": 2.2751588097626213e-05,
1173
+ "loss": 0.2651,
1174
+ "step": 16300
1175
+ },
1176
+ {
1177
+ "epoch": 2.74,
1178
+ "grad_norm": 15.082180976867676,
1179
+ "learning_rate": 2.258441992644601e-05,
1180
+ "loss": 0.281,
1181
+ "step": 16400
1182
+ },
1183
+ {
1184
+ "epoch": 2.76,
1185
+ "grad_norm": 27.33397674560547,
1186
+ "learning_rate": 2.2417251755265796e-05,
1187
+ "loss": 0.2492,
1188
+ "step": 16500
1189
+ },
1190
+ {
1191
+ "epoch": 2.77,
1192
+ "grad_norm": 10.052102088928223,
1193
+ "learning_rate": 2.225008358408559e-05,
1194
+ "loss": 0.2382,
1195
+ "step": 16600
1196
+ },
1197
+ {
1198
+ "epoch": 2.79,
1199
+ "grad_norm": 15.405964851379395,
1200
+ "learning_rate": 2.2082915412905383e-05,
1201
+ "loss": 0.2496,
1202
+ "step": 16700
1203
+ },
1204
+ {
1205
+ "epoch": 2.81,
1206
+ "grad_norm": 7.162382125854492,
1207
+ "learning_rate": 2.1915747241725178e-05,
1208
+ "loss": 0.2343,
1209
+ "step": 16800
1210
+ },
1211
+ {
1212
+ "epoch": 2.83,
1213
+ "grad_norm": 11.130888938903809,
1214
+ "learning_rate": 2.174857907054497e-05,
1215
+ "loss": 0.2474,
1216
+ "step": 16900
1217
+ },
1218
+ {
1219
+ "epoch": 2.84,
1220
+ "grad_norm": 8.277360916137695,
1221
+ "learning_rate": 2.158141089936476e-05,
1222
+ "loss": 0.2687,
1223
+ "step": 17000
1224
+ },
1225
+ {
1226
+ "epoch": 2.86,
1227
+ "grad_norm": 31.100744247436523,
1228
+ "learning_rate": 2.1414242728184556e-05,
1229
+ "loss": 0.2422,
1230
+ "step": 17100
1231
+ },
1232
+ {
1233
+ "epoch": 2.88,
1234
+ "grad_norm": 12.757442474365234,
1235
+ "learning_rate": 2.1247074557004347e-05,
1236
+ "loss": 0.2275,
1237
+ "step": 17200
1238
+ },
1239
+ {
1240
+ "epoch": 2.89,
1241
+ "grad_norm": 4.860738277435303,
1242
+ "learning_rate": 2.1079906385824142e-05,
1243
+ "loss": 0.2252,
1244
+ "step": 17300
1245
+ },
1246
+ {
1247
+ "epoch": 2.91,
1248
+ "grad_norm": 10.574835777282715,
1249
+ "learning_rate": 2.091273821464393e-05,
1250
+ "loss": 0.2114,
1251
+ "step": 17400
1252
+ },
1253
+ {
1254
+ "epoch": 2.93,
1255
+ "grad_norm": 13.01117992401123,
1256
+ "learning_rate": 2.0745570043463725e-05,
1257
+ "loss": 0.2407,
1258
+ "step": 17500
1259
+ },
1260
+ {
1261
+ "epoch": 2.94,
1262
+ "grad_norm": 4.970390319824219,
1263
+ "learning_rate": 2.0578401872283517e-05,
1264
+ "loss": 0.2509,
1265
+ "step": 17600
1266
+ },
1267
+ {
1268
+ "epoch": 2.96,
1269
+ "grad_norm": 18.95350456237793,
1270
+ "learning_rate": 2.041123370110331e-05,
1271
+ "loss": 0.2814,
1272
+ "step": 17700
1273
+ },
1274
+ {
1275
+ "epoch": 2.98,
1276
+ "grad_norm": 1.5296308994293213,
1277
+ "learning_rate": 2.0244065529923103e-05,
1278
+ "loss": 0.235,
1279
+ "step": 17800
1280
+ },
1281
+ {
1282
+ "epoch": 2.99,
1283
+ "grad_norm": 12.501904487609863,
1284
+ "learning_rate": 2.0076897358742898e-05,
1285
+ "loss": 0.2479,
1286
+ "step": 17900
1287
+ },
1288
+ {
1289
+ "epoch": 3.0,
1290
+ "eval_accuracy": 0.8965068087625814,
1291
+ "eval_f1": 0.8930257247589533,
1292
+ "eval_loss": 0.36622655391693115,
1293
+ "eval_precision": 0.8950199629292306,
1294
+ "eval_recall": 0.8965068087625814,
1295
+ "eval_runtime": 64.0862,
1296
+ "eval_samples_per_second": 131.776,
1297
+ "eval_steps_per_second": 8.239,
1298
+ "step": 17946
1299
+ },
1300
+ {
1301
+ "epoch": 3.01,
1302
+ "grad_norm": 19.13836097717285,
1303
+ "learning_rate": 1.990972918756269e-05,
1304
+ "loss": 0.2272,
1305
+ "step": 18000
1306
+ },
1307
+ {
1308
+ "epoch": 3.03,
1309
+ "grad_norm": 8.622084617614746,
1310
+ "learning_rate": 1.9742561016382484e-05,
1311
+ "loss": 0.131,
1312
+ "step": 18100
1313
+ },
1314
+ {
1315
+ "epoch": 3.04,
1316
+ "grad_norm": 32.99411392211914,
1317
+ "learning_rate": 1.9575392845202276e-05,
1318
+ "loss": 0.1477,
1319
+ "step": 18200
1320
+ },
1321
+ {
1322
+ "epoch": 3.06,
1323
+ "grad_norm": 5.467390060424805,
1324
+ "learning_rate": 1.9408224674022067e-05,
1325
+ "loss": 0.1439,
1326
+ "step": 18300
1327
+ },
1328
+ {
1329
+ "epoch": 3.08,
1330
+ "grad_norm": 2.5153982639312744,
1331
+ "learning_rate": 1.924105650284186e-05,
1332
+ "loss": 0.1405,
1333
+ "step": 18400
1334
+ },
1335
+ {
1336
+ "epoch": 3.09,
1337
+ "grad_norm": 20.424579620361328,
1338
+ "learning_rate": 1.907388833166165e-05,
1339
+ "loss": 0.1594,
1340
+ "step": 18500
1341
+ },
1342
+ {
1343
+ "epoch": 3.11,
1344
+ "grad_norm": 5.207544803619385,
1345
+ "learning_rate": 1.8906720160481445e-05,
1346
+ "loss": 0.1323,
1347
+ "step": 18600
1348
+ },
1349
+ {
1350
+ "epoch": 3.13,
1351
+ "grad_norm": 8.750362396240234,
1352
+ "learning_rate": 1.8739551989301237e-05,
1353
+ "loss": 0.1683,
1354
+ "step": 18700
1355
+ },
1356
+ {
1357
+ "epoch": 3.14,
1358
+ "grad_norm": 2.464329481124878,
1359
+ "learning_rate": 1.857238381812103e-05,
1360
+ "loss": 0.1388,
1361
+ "step": 18800
1362
+ },
1363
+ {
1364
+ "epoch": 3.16,
1365
+ "grad_norm": 3.784031867980957,
1366
+ "learning_rate": 1.8405215646940823e-05,
1367
+ "loss": 0.149,
1368
+ "step": 18900
1369
+ },
1370
+ {
1371
+ "epoch": 3.18,
1372
+ "grad_norm": 2.632542610168457,
1373
+ "learning_rate": 1.8238047475760618e-05,
1374
+ "loss": 0.1284,
1375
+ "step": 19000
1376
+ },
1377
+ {
1378
+ "epoch": 3.19,
1379
+ "grad_norm": 11.050533294677734,
1380
+ "learning_rate": 1.807087930458041e-05,
1381
+ "loss": 0.1525,
1382
+ "step": 19100
1383
+ },
1384
+ {
1385
+ "epoch": 3.21,
1386
+ "grad_norm": 7.363661766052246,
1387
+ "learning_rate": 1.7903711133400204e-05,
1388
+ "loss": 0.1481,
1389
+ "step": 19200
1390
+ },
1391
+ {
1392
+ "epoch": 3.23,
1393
+ "grad_norm": 9.882287979125977,
1394
+ "learning_rate": 1.7736542962219992e-05,
1395
+ "loss": 0.1231,
1396
+ "step": 19300
1397
+ },
1398
+ {
1399
+ "epoch": 3.24,
1400
+ "grad_norm": 24.93657684326172,
1401
+ "learning_rate": 1.7569374791039787e-05,
1402
+ "loss": 0.1332,
1403
+ "step": 19400
1404
+ },
1405
+ {
1406
+ "epoch": 3.26,
1407
+ "grad_norm": 2.2802133560180664,
1408
+ "learning_rate": 1.740220661985958e-05,
1409
+ "loss": 0.1425,
1410
+ "step": 19500
1411
+ },
1412
+ {
1413
+ "epoch": 3.28,
1414
+ "grad_norm": 1.5991661548614502,
1415
+ "learning_rate": 1.7235038448679374e-05,
1416
+ "loss": 0.1283,
1417
+ "step": 19600
1418
+ },
1419
+ {
1420
+ "epoch": 3.29,
1421
+ "grad_norm": 8.344457626342773,
1422
+ "learning_rate": 1.7067870277499165e-05,
1423
+ "loss": 0.1502,
1424
+ "step": 19700
1425
+ },
1426
+ {
1427
+ "epoch": 3.31,
1428
+ "grad_norm": 12.95904541015625,
1429
+ "learning_rate": 1.6900702106318957e-05,
1430
+ "loss": 0.1287,
1431
+ "step": 19800
1432
+ },
1433
+ {
1434
+ "epoch": 3.33,
1435
+ "grad_norm": 20.562625885009766,
1436
+ "learning_rate": 1.673353393513875e-05,
1437
+ "loss": 0.1422,
1438
+ "step": 19900
1439
+ },
1440
+ {
1441
+ "epoch": 3.34,
1442
+ "grad_norm": 4.20346736907959,
1443
+ "learning_rate": 1.6566365763958543e-05,
1444
+ "loss": 0.1082,
1445
+ "step": 20000
1446
+ },
1447
+ {
1448
+ "epoch": 3.36,
1449
+ "grad_norm": 25.636775970458984,
1450
+ "learning_rate": 1.6399197592778338e-05,
1451
+ "loss": 0.1416,
1452
+ "step": 20100
1453
+ },
1454
+ {
1455
+ "epoch": 3.38,
1456
+ "grad_norm": 23.23301887512207,
1457
+ "learning_rate": 1.6232029421598126e-05,
1458
+ "loss": 0.1497,
1459
+ "step": 20200
1460
+ },
1461
+ {
1462
+ "epoch": 3.39,
1463
+ "grad_norm": 22.21303939819336,
1464
+ "learning_rate": 1.606486125041792e-05,
1465
+ "loss": 0.1568,
1466
+ "step": 20300
1467
+ },
1468
+ {
1469
+ "epoch": 3.41,
1470
+ "grad_norm": 21.14128303527832,
1471
+ "learning_rate": 1.5897693079237712e-05,
1472
+ "loss": 0.139,
1473
+ "step": 20400
1474
+ },
1475
+ {
1476
+ "epoch": 3.43,
1477
+ "grad_norm": 22.63404083251953,
1478
+ "learning_rate": 1.5730524908057507e-05,
1479
+ "loss": 0.1518,
1480
+ "step": 20500
1481
+ },
1482
+ {
1483
+ "epoch": 3.44,
1484
+ "grad_norm": 13.030010223388672,
1485
+ "learning_rate": 1.55633567368773e-05,
1486
+ "loss": 0.1319,
1487
+ "step": 20600
1488
+ },
1489
+ {
1490
+ "epoch": 3.46,
1491
+ "grad_norm": 18.308670043945312,
1492
+ "learning_rate": 1.5396188565697094e-05,
1493
+ "loss": 0.1494,
1494
+ "step": 20700
1495
+ },
1496
+ {
1497
+ "epoch": 3.48,
1498
+ "grad_norm": 24.907419204711914,
1499
+ "learning_rate": 1.5229020394516885e-05,
1500
+ "loss": 0.1425,
1501
+ "step": 20800
1502
+ },
1503
+ {
1504
+ "epoch": 3.49,
1505
+ "grad_norm": 19.32282066345215,
1506
+ "learning_rate": 1.5061852223336678e-05,
1507
+ "loss": 0.1264,
1508
+ "step": 20900
1509
+ },
1510
+ {
1511
+ "epoch": 3.51,
1512
+ "grad_norm": 17.444271087646484,
1513
+ "learning_rate": 1.4894684052156472e-05,
1514
+ "loss": 0.14,
1515
+ "step": 21000
1516
+ },
1517
+ {
1518
+ "epoch": 3.53,
1519
+ "grad_norm": 1.832461953163147,
1520
+ "learning_rate": 1.4727515880976261e-05,
1521
+ "loss": 0.1438,
1522
+ "step": 21100
1523
+ },
1524
+ {
1525
+ "epoch": 3.54,
1526
+ "grad_norm": 10.410861015319824,
1527
+ "learning_rate": 1.4560347709796055e-05,
1528
+ "loss": 0.1393,
1529
+ "step": 21200
1530
+ },
1531
+ {
1532
+ "epoch": 3.56,
1533
+ "grad_norm": 3.6459202766418457,
1534
+ "learning_rate": 1.4393179538615848e-05,
1535
+ "loss": 0.1077,
1536
+ "step": 21300
1537
+ },
1538
+ {
1539
+ "epoch": 3.58,
1540
+ "grad_norm": 3.216399669647217,
1541
+ "learning_rate": 1.4226011367435641e-05,
1542
+ "loss": 0.1154,
1543
+ "step": 21400
1544
+ },
1545
+ {
1546
+ "epoch": 3.59,
1547
+ "grad_norm": 5.621729373931885,
1548
+ "learning_rate": 1.4058843196255434e-05,
1549
+ "loss": 0.1208,
1550
+ "step": 21500
1551
+ },
1552
+ {
1553
+ "epoch": 3.61,
1554
+ "grad_norm": 5.559453010559082,
1555
+ "learning_rate": 1.3891675025075226e-05,
1556
+ "loss": 0.1441,
1557
+ "step": 21600
1558
+ },
1559
+ {
1560
+ "epoch": 3.63,
1561
+ "grad_norm": 22.32745933532715,
1562
+ "learning_rate": 1.3724506853895019e-05,
1563
+ "loss": 0.1176,
1564
+ "step": 21700
1565
+ },
1566
+ {
1567
+ "epoch": 3.64,
1568
+ "grad_norm": 4.509443759918213,
1569
+ "learning_rate": 1.3557338682714812e-05,
1570
+ "loss": 0.1382,
1571
+ "step": 21800
1572
+ },
1573
+ {
1574
+ "epoch": 3.66,
1575
+ "grad_norm": 15.154895782470703,
1576
+ "learning_rate": 1.3390170511534605e-05,
1577
+ "loss": 0.1475,
1578
+ "step": 21900
1579
+ },
1580
+ {
1581
+ "epoch": 3.68,
1582
+ "grad_norm": 0.8804099559783936,
1583
+ "learning_rate": 1.3223002340354398e-05,
1584
+ "loss": 0.1325,
1585
+ "step": 22000
1586
+ },
1587
+ {
1588
+ "epoch": 3.69,
1589
+ "grad_norm": 1.9917913675308228,
1590
+ "learning_rate": 1.3055834169174188e-05,
1591
+ "loss": 0.1255,
1592
+ "step": 22100
1593
+ },
1594
+ {
1595
+ "epoch": 3.71,
1596
+ "grad_norm": 16.314374923706055,
1597
+ "learning_rate": 1.2888665997993981e-05,
1598
+ "loss": 0.1275,
1599
+ "step": 22200
1600
+ },
1601
+ {
1602
+ "epoch": 3.73,
1603
+ "grad_norm": 5.355242729187012,
1604
+ "learning_rate": 1.2721497826813775e-05,
1605
+ "loss": 0.1185,
1606
+ "step": 22300
1607
+ },
1608
+ {
1609
+ "epoch": 3.74,
1610
+ "grad_norm": 20.218473434448242,
1611
+ "learning_rate": 1.2554329655633568e-05,
1612
+ "loss": 0.1203,
1613
+ "step": 22400
1614
+ },
1615
+ {
1616
+ "epoch": 3.76,
1617
+ "grad_norm": 1.39955735206604,
1618
+ "learning_rate": 1.2387161484453361e-05,
1619
+ "loss": 0.1636,
1620
+ "step": 22500
1621
+ },
1622
+ {
1623
+ "epoch": 3.78,
1624
+ "grad_norm": 17.855899810791016,
1625
+ "learning_rate": 1.2219993313273154e-05,
1626
+ "loss": 0.1369,
1627
+ "step": 22600
1628
+ },
1629
+ {
1630
+ "epoch": 3.79,
1631
+ "grad_norm": 14.41054630279541,
1632
+ "learning_rate": 1.2052825142092947e-05,
1633
+ "loss": 0.1245,
1634
+ "step": 22700
1635
+ },
1636
+ {
1637
+ "epoch": 3.81,
1638
+ "grad_norm": 11.451350212097168,
1639
+ "learning_rate": 1.1885656970912739e-05,
1640
+ "loss": 0.1508,
1641
+ "step": 22800
1642
+ },
1643
+ {
1644
+ "epoch": 3.83,
1645
+ "grad_norm": 9.41112995147705,
1646
+ "learning_rate": 1.171848879973253e-05,
1647
+ "loss": 0.125,
1648
+ "step": 22900
1649
+ },
1650
+ {
1651
+ "epoch": 3.84,
1652
+ "grad_norm": 29.826963424682617,
1653
+ "learning_rate": 1.1551320628552324e-05,
1654
+ "loss": 0.1545,
1655
+ "step": 23000
1656
+ },
1657
+ {
1658
+ "epoch": 3.86,
1659
+ "grad_norm": 11.454690933227539,
1660
+ "learning_rate": 1.1384152457372117e-05,
1661
+ "loss": 0.1353,
1662
+ "step": 23100
1663
+ },
1664
+ {
1665
+ "epoch": 3.88,
1666
+ "grad_norm": 12.364923477172852,
1667
+ "learning_rate": 1.121698428619191e-05,
1668
+ "loss": 0.1346,
1669
+ "step": 23200
1670
+ },
1671
+ {
1672
+ "epoch": 3.9,
1673
+ "grad_norm": 1.8181456327438354,
1674
+ "learning_rate": 1.1049816115011702e-05,
1675
+ "loss": 0.1092,
1676
+ "step": 23300
1677
+ },
1678
+ {
1679
+ "epoch": 3.91,
1680
+ "grad_norm": 30.87436866760254,
1681
+ "learning_rate": 1.0882647943831495e-05,
1682
+ "loss": 0.1059,
1683
+ "step": 23400
1684
+ },
1685
+ {
1686
+ "epoch": 3.93,
1687
+ "grad_norm": 16.423452377319336,
1688
+ "learning_rate": 1.0715479772651288e-05,
1689
+ "loss": 0.1157,
1690
+ "step": 23500
1691
+ },
1692
+ {
1693
+ "epoch": 3.95,
1694
+ "grad_norm": 27.86665153503418,
1695
+ "learning_rate": 1.0548311601471081e-05,
1696
+ "loss": 0.1317,
1697
+ "step": 23600
1698
+ },
1699
+ {
1700
+ "epoch": 3.96,
1701
+ "grad_norm": 24.479764938354492,
1702
+ "learning_rate": 1.0381143430290873e-05,
1703
+ "loss": 0.1184,
1704
+ "step": 23700
1705
+ },
1706
+ {
1707
+ "epoch": 3.98,
1708
+ "grad_norm": 1.4079170227050781,
1709
+ "learning_rate": 1.0213975259110666e-05,
1710
+ "loss": 0.1303,
1711
+ "step": 23800
1712
+ },
1713
+ {
1714
+ "epoch": 4.0,
1715
+ "grad_norm": 4.259897232055664,
1716
+ "learning_rate": 1.0046807087930459e-05,
1717
+ "loss": 0.1322,
1718
+ "step": 23900
1719
+ },
1720
+ {
1721
+ "epoch": 4.0,
1722
+ "eval_accuracy": 0.9113084665482534,
1723
+ "eval_f1": 0.9092055511030135,
1724
+ "eval_loss": 0.3260073661804199,
1725
+ "eval_precision": 0.9099757491171729,
1726
+ "eval_recall": 0.9113084665482534,
1727
+ "eval_runtime": 64.1166,
1728
+ "eval_samples_per_second": 131.713,
1729
+ "eval_steps_per_second": 8.235,
1730
+ "step": 23928
1731
+ },
1732
+ {
1733
+ "epoch": 4.01,
1734
+ "grad_norm": 13.925552368164062,
1735
+ "learning_rate": 9.879638916750252e-06,
1736
+ "loss": 0.0687,
1737
+ "step": 24000
1738
+ },
1739
+ {
1740
+ "epoch": 4.03,
1741
+ "grad_norm": 0.18495211005210876,
1742
+ "learning_rate": 9.712470745570044e-06,
1743
+ "loss": 0.066,
1744
+ "step": 24100
1745
+ },
1746
+ {
1747
+ "epoch": 4.05,
1748
+ "grad_norm": 1.0808857679367065,
1749
+ "learning_rate": 9.545302574389837e-06,
1750
+ "loss": 0.0648,
1751
+ "step": 24200
1752
+ },
1753
+ {
1754
+ "epoch": 4.06,
1755
+ "grad_norm": 1.0073552131652832,
1756
+ "learning_rate": 9.378134403209628e-06,
1757
+ "loss": 0.071,
1758
+ "step": 24300
1759
+ },
1760
+ {
1761
+ "epoch": 4.08,
1762
+ "grad_norm": 15.166232109069824,
1763
+ "learning_rate": 9.210966232029422e-06,
1764
+ "loss": 0.0666,
1765
+ "step": 24400
1766
+ },
1767
+ {
1768
+ "epoch": 4.1,
1769
+ "grad_norm": 18.000640869140625,
1770
+ "learning_rate": 9.043798060849215e-06,
1771
+ "loss": 0.0778,
1772
+ "step": 24500
1773
+ },
1774
+ {
1775
+ "epoch": 4.11,
1776
+ "grad_norm": 1.214728593826294,
1777
+ "learning_rate": 8.876629889669008e-06,
1778
+ "loss": 0.07,
1779
+ "step": 24600
1780
+ },
1781
+ {
1782
+ "epoch": 4.13,
1783
+ "grad_norm": 1.982407808303833,
1784
+ "learning_rate": 8.7094617184888e-06,
1785
+ "loss": 0.0752,
1786
+ "step": 24700
1787
+ },
1788
+ {
1789
+ "epoch": 4.15,
1790
+ "grad_norm": 20.929153442382812,
1791
+ "learning_rate": 8.542293547308593e-06,
1792
+ "loss": 0.0785,
1793
+ "step": 24800
1794
+ },
1795
+ {
1796
+ "epoch": 4.16,
1797
+ "grad_norm": 0.8963820934295654,
1798
+ "learning_rate": 8.375125376128386e-06,
1799
+ "loss": 0.0524,
1800
+ "step": 24900
1801
+ },
1802
+ {
1803
+ "epoch": 4.18,
1804
+ "grad_norm": 3.5774483680725098,
1805
+ "learning_rate": 8.207957204948179e-06,
1806
+ "loss": 0.0692,
1807
+ "step": 25000
1808
+ },
1809
+ {
1810
+ "epoch": 4.2,
1811
+ "grad_norm": 3.7253074645996094,
1812
+ "learning_rate": 8.04078903376797e-06,
1813
+ "loss": 0.0641,
1814
+ "step": 25100
1815
+ },
1816
+ {
1817
+ "epoch": 4.21,
1818
+ "grad_norm": 1.2855291366577148,
1819
+ "learning_rate": 7.873620862587764e-06,
1820
+ "loss": 0.0699,
1821
+ "step": 25200
1822
+ },
1823
+ {
1824
+ "epoch": 4.23,
1825
+ "grad_norm": 1.9972455501556396,
1826
+ "learning_rate": 7.706452691407557e-06,
1827
+ "loss": 0.062,
1828
+ "step": 25300
1829
+ },
1830
+ {
1831
+ "epoch": 4.25,
1832
+ "grad_norm": 1.0809322595596313,
1833
+ "learning_rate": 7.539284520227349e-06,
1834
+ "loss": 0.058,
1835
+ "step": 25400
1836
+ },
1837
+ {
1838
+ "epoch": 4.26,
1839
+ "grad_norm": 3.876232862472534,
1840
+ "learning_rate": 7.3721163490471425e-06,
1841
+ "loss": 0.0693,
1842
+ "step": 25500
1843
+ },
1844
+ {
1845
+ "epoch": 4.28,
1846
+ "grad_norm": 6.069151878356934,
1847
+ "learning_rate": 7.204948177866934e-06,
1848
+ "loss": 0.0617,
1849
+ "step": 25600
1850
+ },
1851
+ {
1852
+ "epoch": 4.3,
1853
+ "grad_norm": 0.895815372467041,
1854
+ "learning_rate": 7.037780006686727e-06,
1855
+ "loss": 0.0623,
1856
+ "step": 25700
1857
+ },
1858
+ {
1859
+ "epoch": 4.31,
1860
+ "grad_norm": 0.4176822602748871,
1861
+ "learning_rate": 6.8706118355065195e-06,
1862
+ "loss": 0.0833,
1863
+ "step": 25800
1864
+ },
1865
+ {
1866
+ "epoch": 4.33,
1867
+ "grad_norm": 0.6760619878768921,
1868
+ "learning_rate": 6.703443664326313e-06,
1869
+ "loss": 0.0567,
1870
+ "step": 25900
1871
+ },
1872
+ {
1873
+ "epoch": 4.35,
1874
+ "grad_norm": 14.889734268188477,
1875
+ "learning_rate": 6.536275493146106e-06,
1876
+ "loss": 0.053,
1877
+ "step": 26000
1878
+ },
1879
+ {
1880
+ "epoch": 4.36,
1881
+ "grad_norm": 0.5385121703147888,
1882
+ "learning_rate": 6.369107321965897e-06,
1883
+ "loss": 0.0703,
1884
+ "step": 26100
1885
+ },
1886
+ {
1887
+ "epoch": 4.38,
1888
+ "grad_norm": 6.336006164550781,
1889
+ "learning_rate": 6.201939150785691e-06,
1890
+ "loss": 0.063,
1891
+ "step": 26200
1892
+ },
1893
+ {
1894
+ "epoch": 4.4,
1895
+ "grad_norm": 0.20758749544620514,
1896
+ "learning_rate": 6.034770979605484e-06,
1897
+ "loss": 0.0753,
1898
+ "step": 26300
1899
+ },
1900
+ {
1901
+ "epoch": 4.41,
1902
+ "grad_norm": 11.717066764831543,
1903
+ "learning_rate": 5.867602808425276e-06,
1904
+ "loss": 0.0598,
1905
+ "step": 26400
1906
+ },
1907
+ {
1908
+ "epoch": 4.43,
1909
+ "grad_norm": 26.475128173828125,
1910
+ "learning_rate": 5.7004346372450685e-06,
1911
+ "loss": 0.064,
1912
+ "step": 26500
1913
+ },
1914
+ {
1915
+ "epoch": 4.45,
1916
+ "grad_norm": 20.872194290161133,
1917
+ "learning_rate": 5.533266466064862e-06,
1918
+ "loss": 0.0708,
1919
+ "step": 26600
1920
+ },
1921
+ {
1922
+ "epoch": 4.46,
1923
+ "grad_norm": 1.2749828100204468,
1924
+ "learning_rate": 5.366098294884654e-06,
1925
+ "loss": 0.0705,
1926
+ "step": 26700
1927
+ },
1928
+ {
1929
+ "epoch": 4.48,
1930
+ "grad_norm": 6.7912702560424805,
1931
+ "learning_rate": 5.198930123704447e-06,
1932
+ "loss": 0.0742,
1933
+ "step": 26800
1934
+ },
1935
+ {
1936
+ "epoch": 4.5,
1937
+ "grad_norm": 10.904654502868652,
1938
+ "learning_rate": 5.03176195252424e-06,
1939
+ "loss": 0.0665,
1940
+ "step": 26900
1941
+ },
1942
+ {
1943
+ "epoch": 4.51,
1944
+ "grad_norm": 6.191511154174805,
1945
+ "learning_rate": 4.864593781344033e-06,
1946
+ "loss": 0.0549,
1947
+ "step": 27000
1948
+ },
1949
+ {
1950
+ "epoch": 4.53,
1951
+ "grad_norm": 2.479524850845337,
1952
+ "learning_rate": 4.697425610163825e-06,
1953
+ "loss": 0.0539,
1954
+ "step": 27100
1955
+ },
1956
+ {
1957
+ "epoch": 4.55,
1958
+ "grad_norm": 0.7285805940628052,
1959
+ "learning_rate": 4.5302574389836175e-06,
1960
+ "loss": 0.0662,
1961
+ "step": 27200
1962
+ },
1963
+ {
1964
+ "epoch": 4.56,
1965
+ "grad_norm": 4.313304901123047,
1966
+ "learning_rate": 4.363089267803411e-06,
1967
+ "loss": 0.0571,
1968
+ "step": 27300
1969
+ },
1970
+ {
1971
+ "epoch": 4.58,
1972
+ "grad_norm": 17.61699867248535,
1973
+ "learning_rate": 4.195921096623203e-06,
1974
+ "loss": 0.0634,
1975
+ "step": 27400
1976
+ },
1977
+ {
1978
+ "epoch": 4.6,
1979
+ "grad_norm": 1.3776081800460815,
1980
+ "learning_rate": 4.028752925442996e-06,
1981
+ "loss": 0.0526,
1982
+ "step": 27500
1983
+ },
1984
+ {
1985
+ "epoch": 4.61,
1986
+ "grad_norm": 0.36369597911834717,
1987
+ "learning_rate": 3.8615847542627886e-06,
1988
+ "loss": 0.0669,
1989
+ "step": 27600
1990
+ },
1991
+ {
1992
+ "epoch": 4.63,
1993
+ "grad_norm": 4.591643333435059,
1994
+ "learning_rate": 3.6944165830825813e-06,
1995
+ "loss": 0.0578,
1996
+ "step": 27700
1997
+ },
1998
+ {
1999
+ "epoch": 4.65,
2000
+ "grad_norm": 0.930225670337677,
2001
+ "learning_rate": 3.5272484119023737e-06,
2002
+ "loss": 0.0456,
2003
+ "step": 27800
2004
+ },
2005
+ {
2006
+ "epoch": 4.66,
2007
+ "grad_norm": 1.136043906211853,
2008
+ "learning_rate": 3.360080240722167e-06,
2009
+ "loss": 0.0617,
2010
+ "step": 27900
2011
+ },
2012
+ {
2013
+ "epoch": 4.68,
2014
+ "grad_norm": 0.6426201462745667,
2015
+ "learning_rate": 3.1929120695419596e-06,
2016
+ "loss": 0.0568,
2017
+ "step": 28000
2018
+ },
2019
+ {
2020
+ "epoch": 4.7,
2021
+ "grad_norm": 2.6884241104125977,
2022
+ "learning_rate": 3.025743898361752e-06,
2023
+ "loss": 0.0606,
2024
+ "step": 28100
2025
+ },
2026
+ {
2027
+ "epoch": 4.71,
2028
+ "grad_norm": 0.4525424838066101,
2029
+ "learning_rate": 2.8585757271815448e-06,
2030
+ "loss": 0.066,
2031
+ "step": 28200
2032
+ },
2033
+ {
2034
+ "epoch": 4.73,
2035
+ "grad_norm": 1.0276681184768677,
2036
+ "learning_rate": 2.6914075560013375e-06,
2037
+ "loss": 0.0444,
2038
+ "step": 28300
2039
+ },
2040
+ {
2041
+ "epoch": 4.75,
2042
+ "grad_norm": 7.886939525604248,
2043
+ "learning_rate": 2.5242393848211303e-06,
2044
+ "loss": 0.065,
2045
+ "step": 28400
2046
+ },
2047
+ {
2048
+ "epoch": 4.76,
2049
+ "grad_norm": 0.37203583121299744,
2050
+ "learning_rate": 2.357071213640923e-06,
2051
+ "loss": 0.0559,
2052
+ "step": 28500
2053
+ },
2054
+ {
2055
+ "epoch": 4.78,
2056
+ "grad_norm": 6.219501495361328,
2057
+ "learning_rate": 2.1899030424607154e-06,
2058
+ "loss": 0.07,
2059
+ "step": 28600
2060
+ },
2061
+ {
2062
+ "epoch": 4.8,
2063
+ "grad_norm": 8.10631275177002,
2064
+ "learning_rate": 2.022734871280508e-06,
2065
+ "loss": 0.0623,
2066
+ "step": 28700
2067
+ },
2068
+ {
2069
+ "epoch": 4.81,
2070
+ "grad_norm": 24.999059677124023,
2071
+ "learning_rate": 1.855566700100301e-06,
2072
+ "loss": 0.0701,
2073
+ "step": 28800
2074
+ },
2075
+ {
2076
+ "epoch": 4.83,
2077
+ "grad_norm": 3.5445597171783447,
2078
+ "learning_rate": 1.6883985289200935e-06,
2079
+ "loss": 0.0561,
2080
+ "step": 28900
2081
+ },
2082
+ {
2083
+ "epoch": 4.85,
2084
+ "grad_norm": 11.693018913269043,
2085
+ "learning_rate": 1.5212303577398863e-06,
2086
+ "loss": 0.062,
2087
+ "step": 29000
2088
+ },
2089
+ {
2090
+ "epoch": 4.86,
2091
+ "grad_norm": 17.059640884399414,
2092
+ "learning_rate": 1.354062186559679e-06,
2093
+ "loss": 0.0663,
2094
+ "step": 29100
2095
+ },
2096
+ {
2097
+ "epoch": 4.88,
2098
+ "grad_norm": 3.2128794193267822,
2099
+ "learning_rate": 1.1868940153794718e-06,
2100
+ "loss": 0.0541,
2101
+ "step": 29200
2102
+ },
2103
+ {
2104
+ "epoch": 4.9,
2105
+ "grad_norm": 1.6803439855575562,
2106
+ "learning_rate": 1.0197258441992646e-06,
2107
+ "loss": 0.0619,
2108
+ "step": 29300
2109
+ },
2110
+ {
2111
+ "epoch": 4.91,
2112
+ "grad_norm": 7.980160236358643,
2113
+ "learning_rate": 8.525576730190572e-07,
2114
+ "loss": 0.0649,
2115
+ "step": 29400
2116
+ },
2117
+ {
2118
+ "epoch": 4.93,
2119
+ "grad_norm": 0.3919593393802643,
2120
+ "learning_rate": 6.853895018388499e-07,
2121
+ "loss": 0.0753,
2122
+ "step": 29500
2123
+ },
2124
+ {
2125
+ "epoch": 4.95,
2126
+ "grad_norm": 2.870180368423462,
2127
+ "learning_rate": 5.182213306586426e-07,
2128
+ "loss": 0.0461,
2129
+ "step": 29600
2130
+ },
2131
+ {
2132
+ "epoch": 4.96,
2133
+ "grad_norm": 0.5204899907112122,
2134
+ "learning_rate": 3.510531594784353e-07,
2135
+ "loss": 0.0446,
2136
+ "step": 29700
2137
+ },
2138
+ {
2139
+ "epoch": 4.98,
2140
+ "grad_norm": 2.318403482437134,
2141
+ "learning_rate": 1.8388498829822804e-07,
2142
+ "loss": 0.0588,
2143
+ "step": 29800
2144
+ },
2145
+ {
2146
+ "epoch": 5.0,
2147
+ "grad_norm": 1.1591626405715942,
2148
+ "learning_rate": 1.6716817118020728e-08,
2149
+ "loss": 0.0589,
2150
+ "step": 29900
2151
+ },
2152
+ {
2153
+ "epoch": 5.0,
2154
+ "eval_accuracy": 0.9182948490230906,
2155
+ "eval_f1": 0.9165254517429693,
2156
+ "eval_loss": 0.3342040479183197,
2157
+ "eval_precision": 0.9170562701684628,
2158
+ "eval_recall": 0.9182948490230906,
2159
+ "eval_runtime": 63.9141,
2160
+ "eval_samples_per_second": 132.131,
2161
+ "eval_steps_per_second": 8.261,
2162
+ "step": 29910
2163
+ }
2164
+ ],
2165
+ "logging_steps": 100,
2166
+ "max_steps": 29910,
2167
+ "num_input_tokens_seen": 0,
2168
+ "num_train_epochs": 5,
2169
+ "save_steps": 500,
2170
+ "total_flos": 1.15579279766016e+16,
2171
+ "train_batch_size": 16,
2172
+ "trial_name": null,
2173
+ "trial_params": null
2174
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bdc897def407dfe2e4318176425168f13f8e1110eb62832d0ba83b7723ba91d
3
+ size 4856