LamaDiab commited on
Commit
35823b8
·
verified ·
1 Parent(s): 5fb59af

Final training metrics

Browse files
Files changed (1) hide show
  1. training_metrics.json +414 -0
training_metrics.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metrics_history": [
3
+ {
4
+ "epoch": 0.05819366852886406,
5
+ "global_step": 500,
6
+ "eval_loss": 4.736684322357178,
7
+ "eval_cosine_accuracy": 0.9417916536331177
8
+ },
9
+ {
10
+ "epoch": 0.11638733705772812,
11
+ "global_step": 1000,
12
+ "eval_loss": 4.456507682800293,
13
+ "eval_cosine_accuracy": 0.9421085715293884
14
+ },
15
+ {
16
+ "epoch": 0.17458100558659218,
17
+ "global_step": 1500,
18
+ "eval_loss": 4.417118072509766,
19
+ "eval_cosine_accuracy": 0.9389393329620361
20
+ },
21
+ {
22
+ "epoch": 0.23277467411545624,
23
+ "global_step": 2000,
24
+ "eval_loss": 4.384617805480957,
25
+ "eval_cosine_accuracy": 0.9376716613769531
26
+ },
27
+ {
28
+ "epoch": 0.2909683426443203,
29
+ "global_step": 2500,
30
+ "eval_loss": 4.382342338562012,
31
+ "eval_cosine_accuracy": 0.929537296295166
32
+ },
33
+ {
34
+ "epoch": 0.34916201117318435,
35
+ "global_step": 3000,
36
+ "eval_loss": 4.398530006408691,
37
+ "eval_cosine_accuracy": 0.9236213564872742
38
+ },
39
+ {
40
+ "epoch": 0.4073556797020484,
41
+ "global_step": 3500,
42
+ "eval_loss": 4.428633689880371,
43
+ "eval_cosine_accuracy": 0.9207690954208374
44
+ },
45
+ {
46
+ "epoch": 0.4655493482309125,
47
+ "global_step": 4000,
48
+ "eval_loss": 4.413504600524902,
49
+ "eval_cosine_accuracy": 0.9240439534187317
50
+ },
51
+ {
52
+ "epoch": 0.5237430167597765,
53
+ "global_step": 4500,
54
+ "eval_loss": 4.584815979003906,
55
+ "eval_cosine_accuracy": 0.9183393120765686
56
+ },
57
+ {
58
+ "epoch": 0.5819366852886406,
59
+ "global_step": 5000,
60
+ "eval_loss": 4.731673240661621,
61
+ "eval_cosine_accuracy": 0.9231988191604614
62
+ },
63
+ {
64
+ "epoch": 0.6401303538175046,
65
+ "global_step": 5500,
66
+ "eval_loss": 4.590743064880371,
67
+ "eval_cosine_accuracy": 0.9202408790588379
68
+ },
69
+ {
70
+ "epoch": 0.6983240223463687,
71
+ "global_step": 6000,
72
+ "eval_loss": 4.662985324859619,
73
+ "eval_cosine_accuracy": 0.9188675284385681
74
+ },
75
+ {
76
+ "epoch": 0.7565176908752328,
77
+ "global_step": 6500,
78
+ "eval_loss": 4.784646511077881,
79
+ "eval_cosine_accuracy": 0.9199239611625671
80
+ },
81
+ {
82
+ "epoch": 0.8147113594040968,
83
+ "global_step": 7000,
84
+ "eval_loss": 4.925645351409912,
85
+ "eval_cosine_accuracy": 0.9054510593414307
86
+ },
87
+ {
88
+ "epoch": 0.8729050279329609,
89
+ "global_step": 7500,
90
+ "eval_loss": 4.843690395355225,
91
+ "eval_cosine_accuracy": 0.9056623578071594
92
+ },
93
+ {
94
+ "epoch": 0.931098696461825,
95
+ "global_step": 8000,
96
+ "eval_loss": 4.833749771118164,
97
+ "eval_cosine_accuracy": 0.907247006893158
98
+ },
99
+ {
100
+ "epoch": 0.9892923649906891,
101
+ "global_step": 8500,
102
+ "eval_loss": 4.809013366699219,
103
+ "eval_cosine_accuracy": 0.9006972312927246
104
+ },
105
+ {
106
+ "epoch": 1.047486033519553,
107
+ "global_step": 9000,
108
+ "eval_loss": 4.823788166046143,
109
+ "eval_cosine_accuracy": 0.8939362168312073
110
+ },
111
+ {
112
+ "epoch": 1.105679702048417,
113
+ "global_step": 9500,
114
+ "eval_loss": 4.753988265991211,
115
+ "eval_cosine_accuracy": 0.9032326340675354
116
+ },
117
+ {
118
+ "epoch": 1.1638733705772812,
119
+ "global_step": 10000,
120
+ "eval_loss": 4.775505065917969,
121
+ "eval_cosine_accuracy": 0.9001690149307251
122
+ },
123
+ {
124
+ "epoch": 1.2220670391061452,
125
+ "global_step": 10500,
126
+ "eval_loss": 4.801342010498047,
127
+ "eval_cosine_accuracy": 0.895415186882019
128
+ },
129
+ {
130
+ "epoch": 1.2802607076350094,
131
+ "global_step": 11000,
132
+ "eval_loss": 4.581014156341553,
133
+ "eval_cosine_accuracy": 0.9016479849815369
134
+ },
135
+ {
136
+ "epoch": 1.3384543761638734,
137
+ "global_step": 11500,
138
+ "eval_loss": 4.6524434089660645,
139
+ "eval_cosine_accuracy": 0.8949925899505615
140
+ },
141
+ {
142
+ "epoch": 1.3966480446927374,
143
+ "global_step": 12000,
144
+ "eval_loss": 4.835806369781494,
145
+ "eval_cosine_accuracy": 0.8828438520431519
146
+ },
147
+ {
148
+ "epoch": 1.4548417132216014,
149
+ "global_step": 12500,
150
+ "eval_loss": 4.768213272094727,
151
+ "eval_cosine_accuracy": 0.8947813510894775
152
+ },
153
+ {
154
+ "epoch": 1.5130353817504656,
155
+ "global_step": 13000,
156
+ "eval_loss": 4.828193664550781,
157
+ "eval_cosine_accuracy": 0.8974223732948303
158
+ },
159
+ {
160
+ "epoch": 1.5712290502793296,
161
+ "global_step": 13500,
162
+ "eval_loss": 4.6300153732299805,
163
+ "eval_cosine_accuracy": 0.8914008140563965
164
+ },
165
+ {
166
+ "epoch": 1.6294227188081938,
167
+ "global_step": 14000,
168
+ "eval_loss": 4.730409145355225,
169
+ "eval_cosine_accuracy": 0.892034649848938
170
+ },
171
+ {
172
+ "epoch": 1.6876163873370578,
173
+ "global_step": 14500,
174
+ "eval_loss": 4.478672981262207,
175
+ "eval_cosine_accuracy": 0.9077752232551575
176
+ },
177
+ {
178
+ "epoch": 1.7458100558659218,
179
+ "global_step": 15000,
180
+ "eval_loss": 4.705593109130859,
181
+ "eval_cosine_accuracy": 0.8841115832328796
182
+ },
183
+ {
184
+ "epoch": 1.8040037243947857,
185
+ "global_step": 15500,
186
+ "eval_loss": 4.561342716217041,
187
+ "eval_cosine_accuracy": 0.8960490226745605
188
+ },
189
+ {
190
+ "epoch": 1.86219739292365,
191
+ "global_step": 16000,
192
+ "eval_loss": 4.534769535064697,
193
+ "eval_cosine_accuracy": 0.9019649028778076
194
+ },
195
+ {
196
+ "epoch": 1.920391061452514,
197
+ "global_step": 16500,
198
+ "eval_loss": 4.725633144378662,
199
+ "eval_cosine_accuracy": 0.913057267665863
200
+ },
201
+ {
202
+ "epoch": 1.9785847299813781,
203
+ "global_step": 17000,
204
+ "eval_loss": 4.73307991027832,
205
+ "eval_cosine_accuracy": 0.8949925899505615
206
+ },
207
+ {
208
+ "epoch": 2.036778398510242,
209
+ "global_step": 17500,
210
+ "eval_loss": 4.846031188964844,
211
+ "eval_cosine_accuracy": 0.8875977396965027
212
+ },
213
+ {
214
+ "epoch": 2.094972067039106,
215
+ "global_step": 18000,
216
+ "eval_loss": 4.649144649505615,
217
+ "eval_cosine_accuracy": 0.8969997763633728
218
+ },
219
+ {
220
+ "epoch": 2.15316573556797,
221
+ "global_step": 18500,
222
+ "eval_loss": 4.538448810577393,
223
+ "eval_cosine_accuracy": 0.8948869705200195
224
+ },
225
+ {
226
+ "epoch": 2.211359404096834,
227
+ "global_step": 19000,
228
+ "eval_loss": 4.58160400390625,
229
+ "eval_cosine_accuracy": 0.9011197686195374
230
+ },
231
+ {
232
+ "epoch": 2.2695530726256985,
233
+ "global_step": 19500,
234
+ "eval_loss": 4.580519199371338,
235
+ "eval_cosine_accuracy": 0.9015423655509949
236
+ },
237
+ {
238
+ "epoch": 2.3277467411545625,
239
+ "global_step": 20000,
240
+ "eval_loss": 4.5440673828125,
241
+ "eval_cosine_accuracy": 0.8975279927253723
242
+ },
243
+ {
244
+ "epoch": 2.3859404096834265,
245
+ "global_step": 20500,
246
+ "eval_loss": 4.727934837341309,
247
+ "eval_cosine_accuracy": 0.8922459483146667
248
+ },
249
+ {
250
+ "epoch": 2.4441340782122905,
251
+ "global_step": 21000,
252
+ "eval_loss": 4.589358806610107,
253
+ "eval_cosine_accuracy": 0.9073526263237
254
+ },
255
+ {
256
+ "epoch": 2.5023277467411544,
257
+ "global_step": 21500,
258
+ "eval_loss": 4.702400207519531,
259
+ "eval_cosine_accuracy": 0.8977392911911011
260
+ },
261
+ {
262
+ "epoch": 2.560521415270019,
263
+ "global_step": 22000,
264
+ "eval_loss": 4.601651668548584,
265
+ "eval_cosine_accuracy": 0.9057680368423462
266
+ },
267
+ {
268
+ "epoch": 2.618715083798883,
269
+ "global_step": 22500,
270
+ "eval_loss": 4.483080863952637,
271
+ "eval_cosine_accuracy": 0.9074582457542419
272
+ },
273
+ {
274
+ "epoch": 2.676908752327747,
275
+ "global_step": 23000,
276
+ "eval_loss": 4.641151428222656,
277
+ "eval_cosine_accuracy": 0.8939362168312073
278
+ },
279
+ {
280
+ "epoch": 2.735102420856611,
281
+ "global_step": 23500,
282
+ "eval_loss": 4.60220193862915,
283
+ "eval_cosine_accuracy": 0.8949925899505615
284
+ },
285
+ {
286
+ "epoch": 2.793296089385475,
287
+ "global_step": 24000,
288
+ "eval_loss": 4.529559135437012,
289
+ "eval_cosine_accuracy": 0.8974223732948303
290
+ },
291
+ {
292
+ "epoch": 2.851489757914339,
293
+ "global_step": 24500,
294
+ "eval_loss": 4.752791881561279,
295
+ "eval_cosine_accuracy": 0.8923515677452087
296
+ },
297
+ {
298
+ "epoch": 2.9096834264432028,
299
+ "global_step": 25000,
300
+ "eval_loss": 4.698167324066162,
301
+ "eval_cosine_accuracy": 0.8986900448799133
302
+ },
303
+ {
304
+ "epoch": 2.967877094972067,
305
+ "global_step": 25500,
306
+ "eval_loss": 4.680306434631348,
307
+ "eval_cosine_accuracy": 0.8960490226745605
308
+ },
309
+ {
310
+ "epoch": 3.026070763500931,
311
+ "global_step": 26000,
312
+ "eval_loss": 4.683339595794678,
313
+ "eval_cosine_accuracy": 0.8993238806724548
314
+ },
315
+ {
316
+ "epoch": 3.084264432029795,
317
+ "global_step": 26500,
318
+ "eval_loss": 4.631614685058594,
319
+ "eval_cosine_accuracy": 0.896682858467102
320
+ },
321
+ {
322
+ "epoch": 3.142458100558659,
323
+ "global_step": 27000,
324
+ "eval_loss": 4.696747779846191,
325
+ "eval_cosine_accuracy": 0.8975279927253723
326
+ },
327
+ {
328
+ "epoch": 3.200651769087523,
329
+ "global_step": 27500,
330
+ "eval_loss": 4.715923309326172,
331
+ "eval_cosine_accuracy": 0.896682858467102
332
+ },
333
+ {
334
+ "epoch": 3.2588454376163876,
335
+ "global_step": 28000,
336
+ "eval_loss": 4.600804328918457,
337
+ "eval_cosine_accuracy": 0.9014367461204529
338
+ },
339
+ {
340
+ "epoch": 3.3170391061452515,
341
+ "global_step": 28500,
342
+ "eval_loss": 4.584753036499023,
343
+ "eval_cosine_accuracy": 0.9018592834472656
344
+ },
345
+ {
346
+ "epoch": 3.3752327746741155,
347
+ "global_step": 29000,
348
+ "eval_loss": 4.553245544433594,
349
+ "eval_cosine_accuracy": 0.9040777683258057
350
+ },
351
+ {
352
+ "epoch": 3.4334264432029795,
353
+ "global_step": 29500,
354
+ "eval_loss": 4.594734191894531,
355
+ "eval_cosine_accuracy": 0.9052398204803467
356
+ },
357
+ {
358
+ "epoch": 3.4916201117318435,
359
+ "global_step": 30000,
360
+ "eval_loss": 4.592144012451172,
361
+ "eval_cosine_accuracy": 0.9071413278579712
362
+ },
363
+ {
364
+ "epoch": 3.5498137802607075,
365
+ "global_step": 30500,
366
+ "eval_loss": 4.61123514175415,
367
+ "eval_cosine_accuracy": 0.9049229025840759
368
+ },
369
+ {
370
+ "epoch": 3.6080074487895715,
371
+ "global_step": 31000,
372
+ "eval_loss": 4.575016498565674,
373
+ "eval_cosine_accuracy": 0.9083033800125122
374
+ },
375
+ {
376
+ "epoch": 3.666201117318436,
377
+ "global_step": 31500,
378
+ "eval_loss": 4.594618320465088,
379
+ "eval_cosine_accuracy": 0.9043946862220764
380
+ },
381
+ {
382
+ "epoch": 3.7243947858473,
383
+ "global_step": 32000,
384
+ "eval_loss": 4.5688347816467285,
385
+ "eval_cosine_accuracy": 0.9060849547386169
386
+ },
387
+ {
388
+ "epoch": 3.782588454376164,
389
+ "global_step": 32500,
390
+ "eval_loss": 4.608202934265137,
391
+ "eval_cosine_accuracy": 0.9062961935997009
392
+ },
393
+ {
394
+ "epoch": 3.840782122905028,
395
+ "global_step": 33000,
396
+ "eval_loss": 4.594712734222412,
397
+ "eval_cosine_accuracy": 0.9059792757034302
398
+ },
399
+ {
400
+ "epoch": 3.898975791433892,
401
+ "global_step": 33500,
402
+ "eval_loss": 4.600465297698975,
403
+ "eval_cosine_accuracy": 0.9069300889968872
404
+ },
405
+ {
406
+ "epoch": 3.9571694599627563,
407
+ "global_step": 34000,
408
+ "eval_loss": 4.5989670753479,
409
+ "eval_cosine_accuracy": 0.9064018726348877
410
+ }
411
+ ],
412
+ "total_epochs": 4.0,
413
+ "total_steps": 34368
414
+ }