File size: 10,324 Bytes
5034c08
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
[
  {
    "step": 100,
    "loss": 7.657252192497253,
    "perplexity": 2115.935250373028,
    "learning_rate": 0.00015150000000000019,
    "step_time": 9.368009328842163,
    "tokens_per_second": 109.30817466708918,
    "memory_mb": 756.6015625
  },
  {
    "step": 200,
    "loss": 7.487581491470337,
    "perplexity": 1785.7280665447065,
    "learning_rate": 0.0002999999999999999,
    "step_time": 10.94300365447998,
    "tokens_per_second": 93.57577063229641,
    "memory_mb": 730.99609375
  },
  {
    "step": 300,
    "loss": 7.163171410560608,
    "perplexity": 1290.9987344513497,
    "learning_rate": 0.00029794904665665113,
    "step_time": 8.949450016021729,
    "tokens_per_second": 114.42043904002891,
    "memory_mb": 733.89453125
  },
  {
    "step": 400,
    "loss": 6.677380561828613,
    "perplexity": 794.2359329078598,
    "learning_rate": 0.00029185850380610337,
    "step_time": 8.572781324386597,
    "tokens_per_second": 119.44781527169884,
    "memory_mb": 827.69140625
  },
  {
    "step": 500,
    "loss": 6.8126842975616455,
    "perplexity": 909.3083881764071,
    "learning_rate": 0.0002819134295109075,
    "step_time": 8.801953315734863,
    "tokens_per_second": 116.33781312716582,
    "memory_mb": 849.21484375
  },
  {
    "step": 600,
    "loss": 6.901162624359131,
    "perplexity": 993.4290292468936,
    "learning_rate": 0.0002684159998210713,
    "step_time": 8.660848379135132,
    "tokens_per_second": 118.23322094714423,
    "memory_mb": 734.16015625
  },
  {
    "step": 700,
    "loss": 6.6995872259140015,
    "perplexity": 812.0705542959239,
    "learning_rate": 0.0002517763273076916,
    "step_time": 9.10423469543457,
    "tokens_per_second": 112.47513209578146,
    "memory_mb": 734.64453125
  },
  {
    "step": 800,
    "loss": 6.729204297065735,
    "perplexity": 836.4814103649661,
    "learning_rate": 0.00023250000000000793,
    "step_time": 8.646829605102539,
    "tokens_per_second": 118.42490794495734,
    "memory_mb": 733.84765625
  },
  {
    "step": 900,
    "loss": 6.801577568054199,
    "perplexity": 899.2648246887062,
    "learning_rate": 0.00021117271934897237,
    "step_time": 8.66753602027893,
    "tokens_per_second": 118.14199532649263,
    "memory_mb": 839.50390625
  },
  {
    "step": 1000,
    "loss": 6.5149312019348145,
    "perplexity": 675.1475110828569,
    "learning_rate": 0.00018844250398504186,
    "step_time": 9.189276933670044,
    "tokens_per_second": 111.43423006961567,
    "memory_mb": 734.55078125
  },
  {
    "step": 1100,
    "loss": 6.551132082939148,
    "perplexity": 700.0362244642821,
    "learning_rate": 0.00016500000000000537,
    "step_time": 8.517168760299683,
    "tokens_per_second": 120.22774572379963,
    "memory_mb": 913.80078125
  },
  {
    "step": 1200,
    "loss": 6.6684452295303345,
    "perplexity": 787.170782663513,
    "learning_rate": 0.00014155749601496882,
    "step_time": 9.400139570236206,
    "tokens_per_second": 108.93455276369572,
    "memory_mb": 968.5859375
  },
  {
    "step": 1300,
    "loss": 6.406905889511108,
    "perplexity": 606.0156976890383,
    "learning_rate": 0.00011882728065103813,
    "step_time": 8.490540981292725,
    "tokens_per_second": 120.6048003603289,
    "memory_mb": 979.359375
  },
  {
    "step": 1400,
    "loss": 6.3421419858932495,
    "perplexity": 568.011682273887,
    "learning_rate": 9.750000000000261e-05,
    "step_time": 10.675879955291748,
    "tokens_per_second": 95.91715196201983,
    "memory_mb": 733.6328125
  },
  {
    "step": 1500,
    "loss": 6.335531115531921,
    "perplexity": 564.2690154518974,
    "learning_rate": 7.822367269231907e-05,
    "step_time": 8.945564270019531,
    "tokens_per_second": 114.47014062957085,
    "memory_mb": 994.3515625
  },
  {
    "step": 1600,
    "loss": 6.629261136054993,
    "perplexity": 756.9227010883292,
    "learning_rate": 6.158400017893925e-05,
    "step_time": 10.422486782073975,
    "tokens_per_second": 98.2491051714468,
    "memory_mb": 730.56640625
  },
  {
    "step": 1700,
    "loss": 6.302608251571655,
    "perplexity": 545.9941446328027,
    "learning_rate": 4.808657048910149e-05,
    "step_time": 9.476832628250122,
    "tokens_per_second": 108.05297932006208,
    "memory_mb": 745.43359375
  },
  {
    "step": 1800,
    "loss": 6.266754984855652,
    "perplexity": 526.765240241726,
    "learning_rate": 3.8141496193902704e-05,
    "step_time": 8.648972511291504,
    "tokens_per_second": 118.3955664864394,
    "memory_mb": 1009.6484375
  },
  {
    "step": 1900,
    "loss": 6.480456352233887,
    "perplexity": 652.268542568135,
    "learning_rate": 3.2050953343351995e-05,
    "step_time": 8.593879699707031,
    "tokens_per_second": 119.15456531639704,
    "memory_mb": 735.796875
  },
  {
    "step": 2000,
    "loss": 5.975515604019165,
    "perplexity": 393.6710271356782,
    "learning_rate": 2.9999999999999997e-05,
    "step_time": 8.94594669342041,
    "tokens_per_second": 114.46524723349116,
    "memory_mb": 729.84375
  },
  {
    "step": 2100,
    "loss": 6.554613709449768,
    "perplexity": 702.4777368926921,
    "learning_rate": 3.205095334333285e-05,
    "step_time": 9.199656009674072,
    "tokens_per_second": 111.3085096794047,
    "memory_mb": 752.64453125
  },
  {
    "step": 2200,
    "loss": 6.471360206604004,
    "perplexity": 646.3623155888199,
    "learning_rate": 3.814149619382671e-05,
    "step_time": 8.654725313186646,
    "tokens_per_second": 118.3168688715975,
    "memory_mb": 735.9375
  },
  {
    "step": 2300,
    "loss": 6.382450699806213,
    "perplexity": 591.3752163574818,
    "learning_rate": 4.808657048893273e-05,
    "step_time": 8.75070834159851,
    "tokens_per_second": 117.01909834340836,
    "memory_mb": 776.53125
  },
  {
    "step": 2400,
    "loss": 5.957324385643005,
    "perplexity": 386.5744152212925,
    "learning_rate": 6.158400017864459e-05,
    "step_time": 8.642782926559448,
    "tokens_per_second": 118.4803562349376,
    "memory_mb": 923.15234375
  },
  {
    "step": 2500,
    "loss": 6.218142509460449,
    "perplexity": 501.7703322170689,
    "learning_rate": 7.822367269186924e-05,
    "step_time": 9.567925691604614,
    "tokens_per_second": 107.0242425585004,
    "memory_mb": 1050.1875
  },
  {
    "step": 2600,
    "loss": 6.314482092857361,
    "perplexity": 552.515834581567,
    "learning_rate": 9.749999999937299e-05,
    "step_time": 8.212730169296265,
    "tokens_per_second": 124.68448115199003,
    "memory_mb": 738.69921875
  },
  {
    "step": 2700,
    "loss": 6.240906238555908,
    "perplexity": 513.3234937600264,
    "learning_rate": 0.00011882728065020969,
    "step_time": 8.88506555557251,
    "tokens_per_second": 115.24957172181702,
    "memory_mb": 739.125
  },
  {
    "step": 2800,
    "loss": 6.308051347732544,
    "perplexity": 548.9741461252174,
    "learning_rate": 0.0001415574960139285,
    "step_time": 9.43014669418335,
    "tokens_per_second": 108.58791842884247,
    "memory_mb": 732.40234375
  },
  {
    "step": 2900,
    "loss": 6.410398960113525,
    "perplexity": 608.136254778883,
    "learning_rate": 0.00016499999999874617,
    "step_time": 8.674461603164673,
    "tokens_per_second": 118.0476722182294,
    "memory_mb": 733.71484375
  },
  {
    "step": 3000,
    "loss": 6.160744071006775,
    "perplexity": 473.7804700263767,
    "learning_rate": 0.0001884425039835638,
    "step_time": 9.056138277053833,
    "tokens_per_second": 113.07247843096432,
    "memory_mb": 803.75390625
  },
  {
    "step": 3100,
    "loss": 6.270610332489014,
    "perplexity": 528.8000232415734,
    "learning_rate": 0.0002111727193472824,
    "step_time": 9.650378227233887,
    "tokens_per_second": 106.10983071214939,
    "memory_mb": 790.328125
  },
  {
    "step": 3200,
    "loss": 6.4413875341415405,
    "perplexity": 627.2765639068664,
    "learning_rate": 0.00023249999999811922,
    "step_time": 8.436410665512085,
    "tokens_per_second": 121.37863371043517,
    "memory_mb": 940.3671875
  },
  {
    "step": 3300,
    "loss": 6.163665413856506,
    "perplexity": 475.1665688640195,
    "learning_rate": 0.0002517763273056231,
    "step_time": 10.011188507080078,
    "tokens_per_second": 102.28555773131335,
    "memory_mb": 747.43359375
  },
  {
    "step": 3400,
    "loss": 6.067382216453552,
    "perplexity": 431.5494984476535,
    "learning_rate": 0.00026841599981884787,
    "step_time": 8.671327114105225,
    "tokens_per_second": 118.0903437876665,
    "memory_mb": 970.37109375
  },
  {
    "step": 3500,
    "loss": 6.14486300945282,
    "perplexity": 466.3157638357051,
    "learning_rate": 0.0002819134295085615,
    "step_time": 8.194751024246216,
    "tokens_per_second": 124.95803679333764,
    "memory_mb": 736.890625
  },
  {
    "step": 3600,
    "loss": 6.448354721069336,
    "perplexity": 631.6621769355031,
    "learning_rate": 0.00029185850380367053,
    "step_time": 8.76004934310913,
    "tokens_per_second": 116.89431872955184,
    "memory_mb": 781.22265625
  },
  {
    "step": 3700,
    "loss": 6.149544358253479,
    "perplexity": 468.5038682213576,
    "learning_rate": 0.00029794904665416755,
    "step_time": 11.995165824890137,
    "tokens_per_second": 85.36772354369505,
    "memory_mb": 836.3046875
  },
  {
    "step": 3800,
    "loss": 6.083824634552002,
    "perplexity": 438.70387214992155,
    "learning_rate": 0.0002999999999975028,
    "step_time": 8.194983720779419,
    "tokens_per_second": 124.95448861033346,
    "memory_mb": 766.9453125
  },
  {
    "step": 3900,
    "loss": 6.295181512832642,
    "perplexity": 541.954209109435,
    "learning_rate": 0.0002979490466541723,
    "step_time": 9.617033004760742,
    "tokens_per_second": 106.47774625428518,
    "memory_mb": 837.29296875
  },
  {
    "step": 4000,
    "loss": 5.752694249153137,
    "perplexity": 315.038309582537,
    "learning_rate": 0.0002918585038036804,
    "step_time": 10.153574228286743,
    "tokens_per_second": 100.85118569845567,
    "memory_mb": 729.46484375
  }
]