hadifar commited on
Commit
b9e57d8
·
1 Parent(s): abfac89

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +340 -0
trainer_state.json ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 24450,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "learning_rate": 4.897750511247444e-05,
13
+ "loss": 1.7982,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.12,
18
+ "learning_rate": 4.7955010224948876e-05,
19
+ "loss": 1.6538,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.18,
24
+ "learning_rate": 4.693251533742332e-05,
25
+ "loss": 1.5864,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.25,
30
+ "learning_rate": 4.591002044989775e-05,
31
+ "loss": 1.5518,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.31,
36
+ "learning_rate": 4.488752556237219e-05,
37
+ "loss": 1.5189,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.37,
42
+ "learning_rate": 4.386503067484663e-05,
43
+ "loss": 1.5078,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.43,
48
+ "learning_rate": 4.2842535787321066e-05,
49
+ "loss": 1.4769,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.49,
54
+ "learning_rate": 4.18200408997955e-05,
55
+ "loss": 1.4499,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.55,
60
+ "learning_rate": 4.079754601226994e-05,
61
+ "loss": 1.4412,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.61,
66
+ "learning_rate": 3.9775051124744376e-05,
67
+ "loss": 1.4154,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.67,
72
+ "learning_rate": 3.875255623721882e-05,
73
+ "loss": 1.4142,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.74,
78
+ "learning_rate": 3.773006134969325e-05,
79
+ "loss": 1.4084,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.8,
84
+ "learning_rate": 3.670756646216769e-05,
85
+ "loss": 1.3804,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.86,
90
+ "learning_rate": 3.568507157464213e-05,
91
+ "loss": 1.3813,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.92,
96
+ "learning_rate": 3.4662576687116566e-05,
97
+ "loss": 1.3566,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.98,
102
+ "learning_rate": 3.3640081799591e-05,
103
+ "loss": 1.3624,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 1.0,
108
+ "eval_accuracy": 0.7132048832822165,
109
+ "eval_loss": 1.3430438041687012,
110
+ "eval_runtime": 167.0718,
111
+ "eval_samples_per_second": 91.2,
112
+ "eval_steps_per_second": 11.402,
113
+ "step": 8150
114
+ },
115
+ {
116
+ "epoch": 1.04,
117
+ "learning_rate": 3.261758691206544e-05,
118
+ "loss": 1.3519,
119
+ "step": 8500
120
+ },
121
+ {
122
+ "epoch": 1.1,
123
+ "learning_rate": 3.159509202453988e-05,
124
+ "loss": 1.3203,
125
+ "step": 9000
126
+ },
127
+ {
128
+ "epoch": 1.17,
129
+ "learning_rate": 3.057259713701431e-05,
130
+ "loss": 1.3242,
131
+ "step": 9500
132
+ },
133
+ {
134
+ "epoch": 1.23,
135
+ "learning_rate": 2.9550102249488753e-05,
136
+ "loss": 1.3069,
137
+ "step": 10000
138
+ },
139
+ {
140
+ "epoch": 1.29,
141
+ "learning_rate": 2.8527607361963193e-05,
142
+ "loss": 1.2915,
143
+ "step": 10500
144
+ },
145
+ {
146
+ "epoch": 1.35,
147
+ "learning_rate": 2.7505112474437626e-05,
148
+ "loss": 1.3011,
149
+ "step": 11000
150
+ },
151
+ {
152
+ "epoch": 1.41,
153
+ "learning_rate": 2.6482617586912066e-05,
154
+ "loss": 1.299,
155
+ "step": 11500
156
+ },
157
+ {
158
+ "epoch": 1.47,
159
+ "learning_rate": 2.5460122699386503e-05,
160
+ "loss": 1.2907,
161
+ "step": 12000
162
+ },
163
+ {
164
+ "epoch": 1.53,
165
+ "learning_rate": 2.4437627811860943e-05,
166
+ "loss": 1.2647,
167
+ "step": 12500
168
+ },
169
+ {
170
+ "epoch": 1.6,
171
+ "learning_rate": 2.341513292433538e-05,
172
+ "loss": 1.2744,
173
+ "step": 13000
174
+ },
175
+ {
176
+ "epoch": 1.66,
177
+ "learning_rate": 2.239263803680982e-05,
178
+ "loss": 1.2614,
179
+ "step": 13500
180
+ },
181
+ {
182
+ "epoch": 1.72,
183
+ "learning_rate": 2.1370143149284256e-05,
184
+ "loss": 1.2614,
185
+ "step": 14000
186
+ },
187
+ {
188
+ "epoch": 1.78,
189
+ "learning_rate": 2.0347648261758693e-05,
190
+ "loss": 1.2551,
191
+ "step": 14500
192
+ },
193
+ {
194
+ "epoch": 1.84,
195
+ "learning_rate": 1.932515337423313e-05,
196
+ "loss": 1.2401,
197
+ "step": 15000
198
+ },
199
+ {
200
+ "epoch": 1.9,
201
+ "learning_rate": 1.8302658486707566e-05,
202
+ "loss": 1.2415,
203
+ "step": 15500
204
+ },
205
+ {
206
+ "epoch": 1.96,
207
+ "learning_rate": 1.7280163599182006e-05,
208
+ "loss": 1.2328,
209
+ "step": 16000
210
+ },
211
+ {
212
+ "epoch": 2.0,
213
+ "eval_accuracy": 0.7264976637836763,
214
+ "eval_loss": 1.2673941850662231,
215
+ "eval_runtime": 167.9128,
216
+ "eval_samples_per_second": 90.744,
217
+ "eval_steps_per_second": 11.345,
218
+ "step": 16300
219
+ },
220
+ {
221
+ "epoch": 2.02,
222
+ "learning_rate": 1.6257668711656443e-05,
223
+ "loss": 1.2239,
224
+ "step": 16500
225
+ },
226
+ {
227
+ "epoch": 2.09,
228
+ "learning_rate": 1.523517382413088e-05,
229
+ "loss": 1.2146,
230
+ "step": 17000
231
+ },
232
+ {
233
+ "epoch": 2.15,
234
+ "learning_rate": 1.4212678936605318e-05,
235
+ "loss": 1.2142,
236
+ "step": 17500
237
+ },
238
+ {
239
+ "epoch": 2.21,
240
+ "learning_rate": 1.3190184049079754e-05,
241
+ "loss": 1.2113,
242
+ "step": 18000
243
+ },
244
+ {
245
+ "epoch": 2.27,
246
+ "learning_rate": 1.2167689161554193e-05,
247
+ "loss": 1.2011,
248
+ "step": 18500
249
+ },
250
+ {
251
+ "epoch": 2.33,
252
+ "learning_rate": 1.1145194274028631e-05,
253
+ "loss": 1.2008,
254
+ "step": 19000
255
+ },
256
+ {
257
+ "epoch": 2.39,
258
+ "learning_rate": 1.0122699386503068e-05,
259
+ "loss": 1.1976,
260
+ "step": 19500
261
+ },
262
+ {
263
+ "epoch": 2.45,
264
+ "learning_rate": 9.100204498977506e-06,
265
+ "loss": 1.1867,
266
+ "step": 20000
267
+ },
268
+ {
269
+ "epoch": 2.52,
270
+ "learning_rate": 8.077709611451943e-06,
271
+ "loss": 1.1797,
272
+ "step": 20500
273
+ },
274
+ {
275
+ "epoch": 2.58,
276
+ "learning_rate": 7.05521472392638e-06,
277
+ "loss": 1.1877,
278
+ "step": 21000
279
+ },
280
+ {
281
+ "epoch": 2.64,
282
+ "learning_rate": 6.032719836400819e-06,
283
+ "loss": 1.1828,
284
+ "step": 21500
285
+ },
286
+ {
287
+ "epoch": 2.7,
288
+ "learning_rate": 5.0102249488752554e-06,
289
+ "loss": 1.175,
290
+ "step": 22000
291
+ },
292
+ {
293
+ "epoch": 2.76,
294
+ "learning_rate": 3.987730061349693e-06,
295
+ "loss": 1.1786,
296
+ "step": 22500
297
+ },
298
+ {
299
+ "epoch": 2.82,
300
+ "learning_rate": 2.965235173824131e-06,
301
+ "loss": 1.1747,
302
+ "step": 23000
303
+ },
304
+ {
305
+ "epoch": 2.88,
306
+ "learning_rate": 1.942740286298569e-06,
307
+ "loss": 1.1736,
308
+ "step": 23500
309
+ },
310
+ {
311
+ "epoch": 2.94,
312
+ "learning_rate": 9.202453987730062e-07,
313
+ "loss": 1.1753,
314
+ "step": 24000
315
+ },
316
+ {
317
+ "epoch": 3.0,
318
+ "eval_accuracy": 0.7363739433117852,
319
+ "eval_loss": 1.2128783464431763,
320
+ "eval_runtime": 167.207,
321
+ "eval_samples_per_second": 91.127,
322
+ "eval_steps_per_second": 11.393,
323
+ "step": 24450
324
+ },
325
+ {
326
+ "epoch": 3.0,
327
+ "step": 24450,
328
+ "total_flos": 1.0143417323453616e+17,
329
+ "train_loss": 1.316010577430023,
330
+ "train_runtime": 32228.2163,
331
+ "train_samples_per_second": 24.276,
332
+ "train_steps_per_second": 0.759
333
+ }
334
+ ],
335
+ "max_steps": 24450,
336
+ "num_train_epochs": 3,
337
+ "total_flos": 1.0143417323453616e+17,
338
+ "trial_name": null,
339
+ "trial_params": null
340
+ }