lemms commited on
Commit
5034c08
·
verified ·
1 Parent(s): 5fe38d7

Upload training_log.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_log.json +362 -0
training_log.json ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "step": 100,
4
+ "loss": 7.657252192497253,
5
+ "perplexity": 2115.935250373028,
6
+ "learning_rate": 0.00015150000000000019,
7
+ "step_time": 9.368009328842163,
8
+ "tokens_per_second": 109.30817466708918,
9
+ "memory_mb": 756.6015625
10
+ },
11
+ {
12
+ "step": 200,
13
+ "loss": 7.487581491470337,
14
+ "perplexity": 1785.7280665447065,
15
+ "learning_rate": 0.0002999999999999999,
16
+ "step_time": 10.94300365447998,
17
+ "tokens_per_second": 93.57577063229641,
18
+ "memory_mb": 730.99609375
19
+ },
20
+ {
21
+ "step": 300,
22
+ "loss": 7.163171410560608,
23
+ "perplexity": 1290.9987344513497,
24
+ "learning_rate": 0.00029794904665665113,
25
+ "step_time": 8.949450016021729,
26
+ "tokens_per_second": 114.42043904002891,
27
+ "memory_mb": 733.89453125
28
+ },
29
+ {
30
+ "step": 400,
31
+ "loss": 6.677380561828613,
32
+ "perplexity": 794.2359329078598,
33
+ "learning_rate": 0.00029185850380610337,
34
+ "step_time": 8.572781324386597,
35
+ "tokens_per_second": 119.44781527169884,
36
+ "memory_mb": 827.69140625
37
+ },
38
+ {
39
+ "step": 500,
40
+ "loss": 6.8126842975616455,
41
+ "perplexity": 909.3083881764071,
42
+ "learning_rate": 0.0002819134295109075,
43
+ "step_time": 8.801953315734863,
44
+ "tokens_per_second": 116.33781312716582,
45
+ "memory_mb": 849.21484375
46
+ },
47
+ {
48
+ "step": 600,
49
+ "loss": 6.901162624359131,
50
+ "perplexity": 993.4290292468936,
51
+ "learning_rate": 0.0002684159998210713,
52
+ "step_time": 8.660848379135132,
53
+ "tokens_per_second": 118.23322094714423,
54
+ "memory_mb": 734.16015625
55
+ },
56
+ {
57
+ "step": 700,
58
+ "loss": 6.6995872259140015,
59
+ "perplexity": 812.0705542959239,
60
+ "learning_rate": 0.0002517763273076916,
61
+ "step_time": 9.10423469543457,
62
+ "tokens_per_second": 112.47513209578146,
63
+ "memory_mb": 734.64453125
64
+ },
65
+ {
66
+ "step": 800,
67
+ "loss": 6.729204297065735,
68
+ "perplexity": 836.4814103649661,
69
+ "learning_rate": 0.00023250000000000793,
70
+ "step_time": 8.646829605102539,
71
+ "tokens_per_second": 118.42490794495734,
72
+ "memory_mb": 733.84765625
73
+ },
74
+ {
75
+ "step": 900,
76
+ "loss": 6.801577568054199,
77
+ "perplexity": 899.2648246887062,
78
+ "learning_rate": 0.00021117271934897237,
79
+ "step_time": 8.66753602027893,
80
+ "tokens_per_second": 118.14199532649263,
81
+ "memory_mb": 839.50390625
82
+ },
83
+ {
84
+ "step": 1000,
85
+ "loss": 6.5149312019348145,
86
+ "perplexity": 675.1475110828569,
87
+ "learning_rate": 0.00018844250398504186,
88
+ "step_time": 9.189276933670044,
89
+ "tokens_per_second": 111.43423006961567,
90
+ "memory_mb": 734.55078125
91
+ },
92
+ {
93
+ "step": 1100,
94
+ "loss": 6.551132082939148,
95
+ "perplexity": 700.0362244642821,
96
+ "learning_rate": 0.00016500000000000537,
97
+ "step_time": 8.517168760299683,
98
+ "tokens_per_second": 120.22774572379963,
99
+ "memory_mb": 913.80078125
100
+ },
101
+ {
102
+ "step": 1200,
103
+ "loss": 6.6684452295303345,
104
+ "perplexity": 787.170782663513,
105
+ "learning_rate": 0.00014155749601496882,
106
+ "step_time": 9.400139570236206,
107
+ "tokens_per_second": 108.93455276369572,
108
+ "memory_mb": 968.5859375
109
+ },
110
+ {
111
+ "step": 1300,
112
+ "loss": 6.406905889511108,
113
+ "perplexity": 606.0156976890383,
114
+ "learning_rate": 0.00011882728065103813,
115
+ "step_time": 8.490540981292725,
116
+ "tokens_per_second": 120.6048003603289,
117
+ "memory_mb": 979.359375
118
+ },
119
+ {
120
+ "step": 1400,
121
+ "loss": 6.3421419858932495,
122
+ "perplexity": 568.011682273887,
123
+ "learning_rate": 9.750000000000261e-05,
124
+ "step_time": 10.675879955291748,
125
+ "tokens_per_second": 95.91715196201983,
126
+ "memory_mb": 733.6328125
127
+ },
128
+ {
129
+ "step": 1500,
130
+ "loss": 6.335531115531921,
131
+ "perplexity": 564.2690154518974,
132
+ "learning_rate": 7.822367269231907e-05,
133
+ "step_time": 8.945564270019531,
134
+ "tokens_per_second": 114.47014062957085,
135
+ "memory_mb": 994.3515625
136
+ },
137
+ {
138
+ "step": 1600,
139
+ "loss": 6.629261136054993,
140
+ "perplexity": 756.9227010883292,
141
+ "learning_rate": 6.158400017893925e-05,
142
+ "step_time": 10.422486782073975,
143
+ "tokens_per_second": 98.2491051714468,
144
+ "memory_mb": 730.56640625
145
+ },
146
+ {
147
+ "step": 1700,
148
+ "loss": 6.302608251571655,
149
+ "perplexity": 545.9941446328027,
150
+ "learning_rate": 4.808657048910149e-05,
151
+ "step_time": 9.476832628250122,
152
+ "tokens_per_second": 108.05297932006208,
153
+ "memory_mb": 745.43359375
154
+ },
155
+ {
156
+ "step": 1800,
157
+ "loss": 6.266754984855652,
158
+ "perplexity": 526.765240241726,
159
+ "learning_rate": 3.8141496193902704e-05,
160
+ "step_time": 8.648972511291504,
161
+ "tokens_per_second": 118.3955664864394,
162
+ "memory_mb": 1009.6484375
163
+ },
164
+ {
165
+ "step": 1900,
166
+ "loss": 6.480456352233887,
167
+ "perplexity": 652.268542568135,
168
+ "learning_rate": 3.2050953343351995e-05,
169
+ "step_time": 8.593879699707031,
170
+ "tokens_per_second": 119.15456531639704,
171
+ "memory_mb": 735.796875
172
+ },
173
+ {
174
+ "step": 2000,
175
+ "loss": 5.975515604019165,
176
+ "perplexity": 393.6710271356782,
177
+ "learning_rate": 2.9999999999999997e-05,
178
+ "step_time": 8.94594669342041,
179
+ "tokens_per_second": 114.46524723349116,
180
+ "memory_mb": 729.84375
181
+ },
182
+ {
183
+ "step": 2100,
184
+ "loss": 6.554613709449768,
185
+ "perplexity": 702.4777368926921,
186
+ "learning_rate": 3.205095334333285e-05,
187
+ "step_time": 9.199656009674072,
188
+ "tokens_per_second": 111.3085096794047,
189
+ "memory_mb": 752.64453125
190
+ },
191
+ {
192
+ "step": 2200,
193
+ "loss": 6.471360206604004,
194
+ "perplexity": 646.3623155888199,
195
+ "learning_rate": 3.814149619382671e-05,
196
+ "step_time": 8.654725313186646,
197
+ "tokens_per_second": 118.3168688715975,
198
+ "memory_mb": 735.9375
199
+ },
200
+ {
201
+ "step": 2300,
202
+ "loss": 6.382450699806213,
203
+ "perplexity": 591.3752163574818,
204
+ "learning_rate": 4.808657048893273e-05,
205
+ "step_time": 8.75070834159851,
206
+ "tokens_per_second": 117.01909834340836,
207
+ "memory_mb": 776.53125
208
+ },
209
+ {
210
+ "step": 2400,
211
+ "loss": 5.957324385643005,
212
+ "perplexity": 386.5744152212925,
213
+ "learning_rate": 6.158400017864459e-05,
214
+ "step_time": 8.642782926559448,
215
+ "tokens_per_second": 118.4803562349376,
216
+ "memory_mb": 923.15234375
217
+ },
218
+ {
219
+ "step": 2500,
220
+ "loss": 6.218142509460449,
221
+ "perplexity": 501.7703322170689,
222
+ "learning_rate": 7.822367269186924e-05,
223
+ "step_time": 9.567925691604614,
224
+ "tokens_per_second": 107.0242425585004,
225
+ "memory_mb": 1050.1875
226
+ },
227
+ {
228
+ "step": 2600,
229
+ "loss": 6.314482092857361,
230
+ "perplexity": 552.515834581567,
231
+ "learning_rate": 9.749999999937299e-05,
232
+ "step_time": 8.212730169296265,
233
+ "tokens_per_second": 124.68448115199003,
234
+ "memory_mb": 738.69921875
235
+ },
236
+ {
237
+ "step": 2700,
238
+ "loss": 6.240906238555908,
239
+ "perplexity": 513.3234937600264,
240
+ "learning_rate": 0.00011882728065020969,
241
+ "step_time": 8.88506555557251,
242
+ "tokens_per_second": 115.24957172181702,
243
+ "memory_mb": 739.125
244
+ },
245
+ {
246
+ "step": 2800,
247
+ "loss": 6.308051347732544,
248
+ "perplexity": 548.9741461252174,
249
+ "learning_rate": 0.0001415574960139285,
250
+ "step_time": 9.43014669418335,
251
+ "tokens_per_second": 108.58791842884247,
252
+ "memory_mb": 732.40234375
253
+ },
254
+ {
255
+ "step": 2900,
256
+ "loss": 6.410398960113525,
257
+ "perplexity": 608.136254778883,
258
+ "learning_rate": 0.00016499999999874617,
259
+ "step_time": 8.674461603164673,
260
+ "tokens_per_second": 118.0476722182294,
261
+ "memory_mb": 733.71484375
262
+ },
263
+ {
264
+ "step": 3000,
265
+ "loss": 6.160744071006775,
266
+ "perplexity": 473.7804700263767,
267
+ "learning_rate": 0.0001884425039835638,
268
+ "step_time": 9.056138277053833,
269
+ "tokens_per_second": 113.07247843096432,
270
+ "memory_mb": 803.75390625
271
+ },
272
+ {
273
+ "step": 3100,
274
+ "loss": 6.270610332489014,
275
+ "perplexity": 528.8000232415734,
276
+ "learning_rate": 0.0002111727193472824,
277
+ "step_time": 9.650378227233887,
278
+ "tokens_per_second": 106.10983071214939,
279
+ "memory_mb": 790.328125
280
+ },
281
+ {
282
+ "step": 3200,
283
+ "loss": 6.4413875341415405,
284
+ "perplexity": 627.2765639068664,
285
+ "learning_rate": 0.00023249999999811922,
286
+ "step_time": 8.436410665512085,
287
+ "tokens_per_second": 121.37863371043517,
288
+ "memory_mb": 940.3671875
289
+ },
290
+ {
291
+ "step": 3300,
292
+ "loss": 6.163665413856506,
293
+ "perplexity": 475.1665688640195,
294
+ "learning_rate": 0.0002517763273056231,
295
+ "step_time": 10.011188507080078,
296
+ "tokens_per_second": 102.28555773131335,
297
+ "memory_mb": 747.43359375
298
+ },
299
+ {
300
+ "step": 3400,
301
+ "loss": 6.067382216453552,
302
+ "perplexity": 431.5494984476535,
303
+ "learning_rate": 0.00026841599981884787,
304
+ "step_time": 8.671327114105225,
305
+ "tokens_per_second": 118.0903437876665,
306
+ "memory_mb": 970.37109375
307
+ },
308
+ {
309
+ "step": 3500,
310
+ "loss": 6.14486300945282,
311
+ "perplexity": 466.3157638357051,
312
+ "learning_rate": 0.0002819134295085615,
313
+ "step_time": 8.194751024246216,
314
+ "tokens_per_second": 124.95803679333764,
315
+ "memory_mb": 736.890625
316
+ },
317
+ {
318
+ "step": 3600,
319
+ "loss": 6.448354721069336,
320
+ "perplexity": 631.6621769355031,
321
+ "learning_rate": 0.00029185850380367053,
322
+ "step_time": 8.76004934310913,
323
+ "tokens_per_second": 116.89431872955184,
324
+ "memory_mb": 781.22265625
325
+ },
326
+ {
327
+ "step": 3700,
328
+ "loss": 6.149544358253479,
329
+ "perplexity": 468.5038682213576,
330
+ "learning_rate": 0.00029794904665416755,
331
+ "step_time": 11.995165824890137,
332
+ "tokens_per_second": 85.36772354369505,
333
+ "memory_mb": 836.3046875
334
+ },
335
+ {
336
+ "step": 3800,
337
+ "loss": 6.083824634552002,
338
+ "perplexity": 438.70387214992155,
339
+ "learning_rate": 0.0002999999999975028,
340
+ "step_time": 8.194983720779419,
341
+ "tokens_per_second": 124.95448861033346,
342
+ "memory_mb": 766.9453125
343
+ },
344
+ {
345
+ "step": 3900,
346
+ "loss": 6.295181512832642,
347
+ "perplexity": 541.954209109435,
348
+ "learning_rate": 0.0002979490466541723,
349
+ "step_time": 9.617033004760742,
350
+ "tokens_per_second": 106.47774625428518,
351
+ "memory_mb": 837.29296875
352
+ },
353
+ {
354
+ "step": 4000,
355
+ "loss": 5.752694249153137,
356
+ "perplexity": 315.038309582537,
357
+ "learning_rate": 0.0002918585038036804,
358
+ "step_time": 10.153574228286743,
359
+ "tokens_per_second": 100.85118569845567,
360
+ "memory_mb": 729.46484375
361
+ }
362
+ ]