File size: 46,961 Bytes
07662b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
timestamp,event,step,epoch,key,value
1774091071.9870658,train_step,10,1,train/step_loss,1.5507386380975896
1774091071.9870658,train_step,10,1,train/step_real_loss,1.2866992354393005
1774091071.9870658,train_step,10,1,train/lr,4.761904761904762e-05
1774091071.9870658,train_step,10,1,train/step_canary_loss,10.0
1774091071.9870658,train_step,10,1,perf/step_duration_sec,8.244300998747349
1774091071.9870658,train_step,10,1,perf/samples_per_sec,8.00553012438873
1774091071.9870658,train_step,10,1,perf/tokens_per_sec,6440.812872803661
1774091071.9870658,train_step,10,1,perf/logical_batch_size,66.0
1774091071.9870658,train_step,10,1,perf/logical_token_count,53100.0
1774091071.9870658,train_step,10,1,perf/gradient_accumulation_steps,8.0
1774091071.9870658,train_step,10,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091071.9870658,train_step,10,1,system/cuda_max_memory_allocated_gb,82.16860580444336
1774091154.380765,train_step,20,1,train/step_loss,1.4504926845208923
1774091154.380765,train_step,20,1,train/step_real_loss,1.2006134390830994
1774091154.380765,train_step,20,1,train/lr,9.523809523809524e-05
1774091154.380765,train_step,20,1,train/step_canary_loss,6.78125
1774091154.380765,train_step,20,1,perf/step_duration_sec,8.244972918182611
1774091154.380765,train_step,20,1,perf/samples_per_sec,8.126163744242886
1774091154.380765,train_step,20,1,perf/tokens_per_sec,6422.095078472549
1774091154.380765,train_step,20,1,perf/logical_batch_size,67.0
1774091154.380765,train_step,20,1,perf/logical_token_count,52950.0
1774091154.380765,train_step,20,1,perf/gradient_accumulation_steps,8.0
1774091154.380765,train_step,20,1,system/cuda_memory_allocated_gb,17.911073684692383
1774091154.380765,train_step,20,1,system/cuda_max_memory_allocated_gb,82.16860580444336
1774091235.6533117,train_step,30,1,train/step_loss,0.9861254319548607
1774091235.6533117,train_step,30,1,train/step_real_loss,0.9861254319548607
1774091235.6533117,train_step,30,1,train/lr,9.98706541985615e-05
1774091235.6533117,train_step,30,1,perf/step_duration_sec,7.973596462979913
1774091235.6533117,train_step,30,1,perf/samples_per_sec,8.026490968929942
1774091235.6533117,train_step,30,1,perf/tokens_per_sec,7148.9697609673985
1774091235.6533117,train_step,30,1,perf/logical_batch_size,64.0
1774091235.6533117,train_step,30,1,perf/logical_token_count,57003.0
1774091235.6533117,train_step,30,1,perf/gradient_accumulation_steps,8.0
1774091235.6533117,train_step,30,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091235.6533117,train_step,30,1,system/cuda_max_memory_allocated_gb,82.16860580444336
1774091318.662035,train_step,40,1,train/step_loss,1.0557814985513687
1774091318.662035,train_step,40,1,train/step_real_loss,1.0557814985513687
1774091318.662035,train_step,40,1,train/lr,9.942439201095397e-05
1774091318.662035,train_step,40,1,perf/step_duration_sec,8.119743634015322
1774091318.662035,train_step,40,1,perf/samples_per_sec,7.882022251526572
1774091318.662035,train_step,40,1,perf/tokens_per_sec,6340.347961767047
1774091318.662035,train_step,40,1,perf/logical_batch_size,64.0
1774091318.662035,train_step,40,1,perf/logical_token_count,51482.0
1774091318.662035,train_step,40,1,perf/gradient_accumulation_steps,8.0
1774091318.662035,train_step,40,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091318.662035,train_step,40,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774091402.0365882,train_step,50,1,train/step_loss,1.017115435233483
1774091402.0365882,train_step,50,1,train/step_real_loss,0.9500000476837158
1774091402.0365882,train_step,50,1,train/lr,9.866246608261724e-05
1774091402.0365882,train_step,50,1,train/step_canary_loss,5.3125
1774091402.0365882,train_step,50,1,perf/step_duration_sec,8.119036318734288
1774091402.0365882,train_step,50,1,perf/samples_per_sec,8.005876245437603
1774091402.0365882,train_step,50,1,perf/tokens_per_sec,6307.645142790008
1774091402.0365882,train_step,50,1,perf/logical_batch_size,65.0
1774091402.0365882,train_step,50,1,perf/logical_token_count,51212.0
1774091402.0365882,train_step,50,1,perf/gradient_accumulation_steps,8.0
1774091402.0365882,train_step,50,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091402.0365882,train_step,50,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774091429.1958544,eval_step,50,1,eval/loss,0.8033588177513802
1774091429.1958544,eval_step,50,1,eval/duration_sec,27.156014366075397
1774091511.9092546,train_step,60,1,train/step_loss,1.2237938749256418
1774091511.9092546,train_step,60,1,train/step_real_loss,0.9667061120271683
1774091511.9092546,train_step,60,1,train/lr,9.7589742682592e-05
1774091511.9092546,train_step,60,1,train/step_canary_loss,6.708333333333333
1774091511.9092546,train_step,60,1,perf/step_duration_sec,8.495171755552292
1774091511.9092546,train_step,60,1,perf/samples_per_sec,7.886832889071372
1774091511.9092546,train_step,60,1,perf/tokens_per_sec,6228.008276044599
1774091511.9092546,train_step,60,1,perf/logical_batch_size,67.0
1774091511.9092546,train_step,60,1,perf/logical_token_count,52908.0
1774091511.9092546,train_step,60,1,perf/gradient_accumulation_steps,8.0
1774091511.9092546,train_step,60,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091511.9092546,train_step,60,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774091593.5344725,train_step,70,1,train/step_loss,1.0743033794256358
1774091593.5344725,train_step,70,1,train/step_real_loss,0.9387456253170967
1774091593.5344725,train_step,70,1,train/lr,9.621307308142384e-05
1774091593.5344725,train_step,70,1,train/step_canary_loss,9.75
1774091593.5344725,train_step,70,1,perf/step_duration_sec,8.257235972210765
1774091593.5344725,train_step,70,1,perf/samples_per_sec,7.8718835478062665
1774091593.5344725,train_step,70,1,perf/tokens_per_sec,6568.057420488078
1774091593.5344725,train_step,70,1,perf/logical_batch_size,65.0
1774091593.5344725,train_step,70,1,perf/logical_token_count,54234.0
1774091593.5344725,train_step,70,1,perf/gradient_accumulation_steps,8.0
1774091593.5344725,train_step,70,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091593.5344725,train_step,70,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774091676.5485113,train_step,80,1,train/step_loss,0.8626528829336166
1774091676.5485113,train_step,80,1,train/step_real_loss,0.8626528829336166
1774091676.5485113,train_step,80,1,train/lr,9.454124979346391e-05
1774091676.5485113,train_step,80,1,perf/step_duration_sec,8.386640733107924
1774091676.5485113,train_step,80,1,perf/samples_per_sec,7.631184169765056
1774091676.5485113,train_step,80,1,perf/tokens_per_sec,6658.446662625317
1774091676.5485113,train_step,80,1,perf/logical_batch_size,64.0
1774091676.5485113,train_step,80,1,perf/logical_token_count,55842.0
1774091676.5485113,train_step,80,1,perf/gradient_accumulation_steps,8.0
1774091676.5485113,train_step,80,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091676.5485113,train_step,80,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774091757.879373,train_step,90,1,train/step_loss,0.9340124875307083
1774091757.879373,train_step,90,1,train/step_real_loss,0.9340124875307083
1774091757.879373,train_step,90,1,train/lr,9.258495042083221e-05
1774091757.879373,train_step,90,1,perf/step_duration_sec,8.493970146402717
1774091757.879373,train_step,90,1,perf/samples_per_sec,7.534756880103312
1774091757.879373,train_step,90,1,perf/tokens_per_sec,5785.162786428072
1774091757.879373,train_step,90,1,perf/logical_batch_size,64.0
1774091757.879373,train_step,90,1,perf/logical_token_count,49139.0
1774091757.879373,train_step,90,1,perf/gradient_accumulation_steps,8.0
1774091757.879373,train_step,90,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091757.879373,train_step,90,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774091839.6918592,train_step,100,1,train/step_loss,0.9577739440477812
1774091839.6918592,train_step,100,1,train/step_real_loss,0.880453996360302
1774091839.6918592,train_step,100,1,train/lr,9.035666945770107e-05
1774091839.6918592,train_step,100,1,train/step_canary_loss,5.90625
1774091839.6918592,train_step,100,1,perf/step_duration_sec,8.234961070120335
1774091839.6918592,train_step,100,1,perf/samples_per_sec,7.893176354633353
1774091839.6918592,train_step,100,1,perf/tokens_per_sec,6771.616711381143
1774091839.6918592,train_step,100,1,perf/logical_batch_size,65.0
1774091839.6918592,train_step,100,1,perf/logical_token_count,55764.0
1774091839.6918592,train_step,100,1,perf/gradient_accumulation_steps,8.0
1774091839.6918592,train_step,100,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091839.6918592,train_step,100,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774091866.8768253,eval_step,100,1,eval/loss,0.7754902612717671
1774091866.8768253,eval_step,100,1,eval/duration_sec,27.182465851306915
1774091950.1670244,train_step,110,1,train/step_loss,0.9567192720644402
1774091950.1670244,train_step,110,1,train/step_real_loss,0.860762245953083
1774091950.1670244,train_step,110,1,train/lr,8.787063849045118e-05
1774091950.1670244,train_step,110,1,train/step_canary_loss,4.02734375
1774091950.1670244,train_step,110,1,perf/step_duration_sec,8.036363879218698
1774091950.1670244,train_step,110,1,perf/samples_per_sec,8.21266943507498
1774091950.1670244,train_step,110,1,perf/tokens_per_sec,6478.054102878841
1774091950.1670244,train_step,110,1,perf/logical_batch_size,66.0
1774091950.1670244,train_step,110,1,perf/logical_token_count,52060.0
1774091950.1670244,train_step,110,1,perf/gradient_accumulation_steps,8.0
1774091950.1670244,train_step,110,1,system/cuda_memory_allocated_gb,17.816345691680908
1774091950.1670244,train_step,110,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092031.8751383,train_step,120,1,train/step_loss,0.9170611575245857
1774092031.8751383,train_step,120,1,train/step_real_loss,0.9170611575245857
1774092031.8751383,train_step,120,1,train/lr,8.5142735303366e-05
1774092031.8751383,train_step,120,1,perf/step_duration_sec,7.79720464348793
1774092031.8751383,train_step,120,1,perf/samples_per_sec,8.208069805305357
1774092031.8751383,train_step,120,1,perf/tokens_per_sec,6600.4423832818775
1774092031.8751383,train_step,120,1,perf/logical_batch_size,64.0
1774092031.8751383,train_step,120,1,perf/logical_token_count,51465.0
1774092031.8751383,train_step,120,1,perf/gradient_accumulation_steps,8.0
1774092031.8751383,train_step,120,1,system/cuda_memory_allocated_gb,17.816345691680908
1774092031.8751383,train_step,120,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092114.4179726,train_step,130,1,train/step_loss,0.9158061426697355
1774092114.4179726,train_step,130,1,train/step_real_loss,0.8711828961968422
1774092114.4179726,train_step,130,1,train/lr,8.219038247038819e-05
1774092114.4179726,train_step,130,1,train/step_canary_loss,2.34375
1774092114.4179726,train_step,130,1,perf/step_duration_sec,8.22744246572256
1774092114.4179726,train_step,130,1,perf/samples_per_sec,8.021933945447975
1774092114.4179726,train_step,130,1,perf/tokens_per_sec,6342.311139506375
1774092114.4179726,train_step,130,1,perf/logical_batch_size,66.0
1774092114.4179726,train_step,130,1,perf/logical_token_count,52181.0
1774092114.4179726,train_step,130,1,perf/gradient_accumulation_steps,8.0
1774092114.4179726,train_step,130,1,system/cuda_memory_allocated_gb,17.816345691680908
1774092114.4179726,train_step,130,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092196.3902268,train_step,140,1,train/step_loss,0.9856266433542425
1774092196.3902268,train_step,140,1,train/step_real_loss,0.8489470109343529
1774092196.3902268,train_step,140,1,train/lr,7.903243608061246e-05
1774092196.3902268,train_step,140,1,train/step_canary_loss,5.359375
1774092196.3902268,train_step,140,1,perf/step_duration_sec,8.333684524521232
1774092196.3902268,train_step,140,1,perf/samples_per_sec,7.9196662419605675
1774092196.3902268,train_step,140,1,perf/tokens_per_sec,6461.007714123103
1774092196.3902268,train_step,140,1,perf/logical_batch_size,66.0
1774092196.3902268,train_step,140,1,perf/logical_token_count,53844.0
1774092196.3902268,train_step,140,1,perf/gradient_accumulation_steps,8.0
1774092196.3902268,train_step,140,1,system/cuda_memory_allocated_gb,17.816345691680908
1774092196.3902268,train_step,140,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092278.982639,train_step,150,1,train/step_loss,0.94221530854702
1774092278.982639,train_step,150,1,train/step_real_loss,0.94221530854702
1774092278.982639,train_step,150,1,train/lr,7.568906530820282e-05
1774092278.982639,train_step,150,1,perf/step_duration_sec,8.388992108404636
1774092278.982639,train_step,150,1,perf/samples_per_sec,7.629045202686584
1774092278.982639,train_step,150,1,perf/tokens_per_sec,6081.421861022835
1774092278.982639,train_step,150,1,perf/logical_batch_size,64.0
1774092278.982639,train_step,150,1,perf/logical_token_count,51017.0
1774092278.982639,train_step,150,1,perf/gradient_accumulation_steps,8.0
1774092278.982639,train_step,150,1,system/cuda_memory_allocated_gb,17.816345691680908
1774092278.982639,train_step,150,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092306.1516783,eval_step,150,1,eval/loss,0.7622188641422096
1774092306.1516783,eval_step,150,1,eval/duration_sec,27.166858203709126
1774092388.1349308,train_step,160,1,train/step_loss,0.9667639210820198
1774092388.1349308,train_step,160,1,train/step_real_loss,0.9667639210820198
1774092388.1349308,train_step,160,1,train/lr,7.21816235958972e-05
1774092388.1349308,train_step,160,1,perf/step_duration_sec,8.235355000942945
1774092388.1349308,train_step,160,1,perf/samples_per_sec,7.771371117902267
1774092388.1349308,train_step,160,1,perf/tokens_per_sec,6008.24129552819
1774092388.1349308,train_step,160,1,perf/logical_batch_size,64.0
1774092388.1349308,train_step,160,1,perf/logical_token_count,49480.0
1774092388.1349308,train_step,160,1,perf/gradient_accumulation_steps,8.0
1774092388.1349308,train_step,160,1,system/cuda_memory_allocated_gb,17.816345691680908
1774092388.1349308,train_step,160,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092469.9282126,train_step,170,1,train/step_loss,0.9262635037302971
1774092469.9282126,train_step,170,1,train/step_real_loss,0.9262635037302971
1774092469.9282126,train_step,170,1,train/lr,6.853251227482479e-05
1774092469.9282126,train_step,170,1,perf/step_duration_sec,7.9527543764561415
1774092469.9282126,train_step,170,1,perf/samples_per_sec,8.047526299752175
1774092469.9282126,train_step,170,1,perf/tokens_per_sec,6280.842791759705
1774092469.9282126,train_step,170,1,perf/logical_batch_size,64.0
1774092469.9282126,train_step,170,1,perf/logical_token_count,49950.0
1774092469.9282126,train_step,170,1,perf/gradient_accumulation_steps,8.0
1774092469.9282126,train_step,170,1,system/cuda_memory_allocated_gb,17.816345691680908
1774092469.9282126,train_step,170,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092551.943189,train_step,180,1,train/step_loss,0.8705217910535408
1774092551.943189,train_step,180,1,train/step_real_loss,0.8467002063989639
1774092551.943189,train_step,180,1,train/lr,6.476503749166904e-05
1774092551.943189,train_step,180,1,train/step_canary_loss,1.6328125
1774092551.943189,train_step,180,1,perf/step_duration_sec,8.540382800623775
1774092551.943189,train_step,180,1,perf/samples_per_sec,7.7279908337574135
1774092551.943189,train_step,180,1,perf/tokens_per_sec,6385.076790236751
1774092551.943189,train_step,180,1,perf/logical_batch_size,66.0
1774092551.943189,train_step,180,1,perf/logical_token_count,54531.0
1774092551.943189,train_step,180,1,perf/gradient_accumulation_steps,8.0
1774092551.943189,train_step,180,1,system/cuda_memory_allocated_gb,17.816345691680908
1774092551.943189,train_step,180,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092634.1860914,train_step,190,1,train/step_loss,0.8329041105050307
1774092634.1860914,train_step,190,1,train/step_real_loss,0.830781526863575
1774092634.1860914,train_step,190,1,train/lr,6.090326135695403e-05
1774092634.1860914,train_step,190,1,train/step_canary_loss,0.96875
1774092634.1860914,train_step,190,1,perf/step_duration_sec,7.938739079982042
1774092634.1860914,train_step,190,1,perf/samples_per_sec,8.18769824088324
1774092634.1860914,train_step,190,1,perf/tokens_per_sec,6963.07051322375
1774092634.1860914,train_step,190,1,perf/logical_batch_size,65.0
1774092634.1860914,train_step,190,1,perf/logical_token_count,55278.0
1774092634.1860914,train_step,190,1,perf/gradient_accumulation_steps,8.0
1774092634.1860914,train_step,190,1,system/cuda_memory_allocated_gb,17.816345691680908
1774092634.1860914,train_step,190,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092717.69117,train_step,200,1,train/step_loss,0.9225348509274997
1774092717.69117,train_step,200,1,train/step_real_loss,0.8622424304485321
1774092717.69117,train_step,200,1,train/lr,5.697184826514057e-05
1774092717.69117,train_step,200,1,train/step_canary_loss,4.78125
1774092717.69117,train_step,200,1,perf/step_duration_sec,8.257816776633263
1774092717.69117,train_step,200,1,perf/samples_per_sec,7.871329887571168
1774092717.69117,train_step,200,1,perf/tokens_per_sec,6772.4922352662325
1774092717.69117,train_step,200,1,perf/logical_batch_size,65.0
1774092717.69117,train_step,200,1,perf/logical_token_count,55926.0
1774092717.69117,train_step,200,1,perf/gradient_accumulation_steps,8.0
1774092717.69117,train_step,200,1,system/cuda_memory_allocated_gb,17.816345691680908
1774092717.69117,train_step,200,1,system/cuda_max_memory_allocated_gb,88.38552379608154
1774092744.8800561,eval_step,200,1,eval/loss,0.7524948388166811
1774092744.8800561,eval_step,200,1,eval/duration_sec,27.18629032932222
1774092830.2611096,train_epoch,207,1,train/epoch_loss,1.036294524908722
1774092830.2611096,train_epoch,207,1,train/epoch_real_loss,0.957591299003593
1774092830.2611096,train_epoch,207,1,train/epoch_canary_loss,5.154934052784185
1774092830.2611096,train_epoch,207,1,perf/epoch_duration_sec,1814.8832574244589
1774092830.2611096,train_epoch,207,1,perf/epoch_samples_per_sec,29.762244915220545
1774092830.2611096,train_epoch,207,1,perf/epoch_tokens_per_sec,24175.732417227537
1774092830.2611096,train_epoch,207,1,perf/epoch_samples,54015.0
1774092830.2611096,train_epoch,207,1,perf/epoch_tokens,43876132.0
1774092830.2611096,train_epoch,207,1,system/cuda_epoch_peak_memory_gb,88.38552379608154
1774092830.2611096,train_epoch,207,1,eval/loss,0.7516984458284817
1774092830.2611096,train_epoch,207,1,eval/duration_sec,27.281295781955123
1774092840.935869,audit_epoch,207,1,audit/delta,1e-05
1774092840.935869,audit_epoch,207,1,audit/num_canaries,500.0
1774092840.935869,audit_epoch,207,1,audit/num_members,250.0
1774092840.935869,audit_epoch,207,1,audit/paper_guess_fraction,0.2
1774092840.935869,audit_epoch,207,1,audit/paper_guess_steps,20.0
1774092840.935869,audit_epoch,207,1,audit/loss/auc,0.998072
1774092840.935869,audit_epoch,207,1,audit/loss/empirical_epsilon/0.05,3.4791953936219215
1774092840.935869,audit_epoch,207,1,audit/loss/empirical_epsilon/0.01,3.023197554051876
1774092840.935869,audit_epoch,207,1,audit/loss/empirical_epsilon_details/0.05/epsilon,3.4791953936219215
1774092840.935869,audit_epoch,207,1,audit/loss/empirical_epsilon_details/0.05/num_guesses,100.0
1774092840.935869,audit_epoch,207,1,audit/loss/empirical_epsilon_details/0.05/correct_guesses,100.0
1774092840.935869,audit_epoch,207,1,audit/loss/empirical_epsilon_details/0.01/epsilon,3.023197554051876
1774092840.935869,audit_epoch,207,1,audit/loss/empirical_epsilon_details/0.01/num_guesses,100.0
1774092840.935869,audit_epoch,207,1,audit/loss/empirical_epsilon_details/0.01/correct_guesses,100.0
1774092840.935869,audit_epoch,207,1,audit/embedding/auc,0.96
1774092840.935869,audit_epoch,207,1,audit/embedding/empirical_epsilon/0.05,3.4791953936219215
1774092840.935869,audit_epoch,207,1,audit/embedding/empirical_epsilon/0.01,3.023197554051876
1774092840.935869,audit_epoch,207,1,audit/embedding/empirical_epsilon_details/0.05/epsilon,3.4791953936219215
1774092840.935869,audit_epoch,207,1,audit/embedding/empirical_epsilon_details/0.05/num_guesses,100.0
1774092840.935869,audit_epoch,207,1,audit/embedding/empirical_epsilon_details/0.05/correct_guesses,100.0
1774092840.935869,audit_epoch,207,1,audit/embedding/empirical_epsilon_details/0.01/epsilon,3.023197554051876
1774092840.935869,audit_epoch,207,1,audit/embedding/empirical_epsilon_details/0.01/num_guesses,100.0
1774092840.935869,audit_epoch,207,1,audit/embedding/empirical_epsilon_details/0.01/correct_guesses,100.0
1774092840.935869,audit_epoch,207,1,perf/audit_duration_sec,6.3129072319716215
1774092866.0795767,train_step,210,2,train/step_loss,0.8786039505944108
1774092866.0795767,train_step,210,2,train/step_real_loss,0.8291560411453247
1774092866.0795767,train_step,210,2,train/lr,5.29959073680547e-05
1774092866.0795767,train_step,210,2,train/step_canary_loss,2.4609375
1774092866.0795767,train_step,210,2,perf/step_duration_sec,8.105620637536049
1774092866.0795767,train_step,210,2,perf/samples_per_sec,8.142498020985931
1774092866.0795767,train_step,210,2,perf/tokens_per_sec,6755.805933775599
1774092866.0795767,train_step,210,2,perf/logical_batch_size,66.0
1774092866.0795767,train_step,210,2,perf/logical_token_count,54760.0
1774092866.0795767,train_step,210,2,perf/gradient_accumulation_steps,8.0
1774092866.0795767,train_step,210,2,system/cuda_memory_allocated_gb,17.911073684692383
1774092866.0795767,train_step,210,2,system/cuda_max_memory_allocated_gb,75.98184061050415
1774092947.7881637,train_step,220,2,train/step_loss,0.8338463682394761
1774092947.7881637,train_step,220,2,train/step_real_loss,0.8286867365241051
1774092947.7881637,train_step,220,2,train/lr,4.9000832207739676e-05
1774092947.7881637,train_step,220,2,train/step_canary_loss,1.1640625
1774092947.7881637,train_step,220,2,perf/step_duration_sec,8.100939376279712
1774092947.7881637,train_step,220,2,perf/samples_per_sec,8.023760823383757
1774092947.7881637,train_step,220,2,perf/tokens_per_sec,6522.083124668936
1774092947.7881637,train_step,220,2,perf/logical_batch_size,65.0
1774092947.7881637,train_step,220,2,perf/logical_token_count,52835.0
1774092947.7881637,train_step,220,2,perf/gradient_accumulation_steps,8.0
1774092947.7881637,train_step,220,2,system/cuda_memory_allocated_gb,17.816345691680908
1774092947.7881637,train_step,220,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093029.3670137,train_step,230,2,train/step_loss,0.7926744809517494
1774093029.3670137,train_step,230,2,train/step_real_loss,0.8034120723605156
1774093029.3670137,train_step,230,2,train/lr,4.501213853296425e-05
1774093029.3670137,train_step,230,2,train/step_canary_loss,0.10546875
1774093029.3670137,train_step,230,2,perf/step_duration_sec,8.09845926053822
1774093029.3670137,train_step,230,2,perf/samples_per_sec,8.026218063073905
1774093029.3670137,train_step,230,2,perf/tokens_per_sec,6708.683497950849
1774093029.3670137,train_step,230,2,perf/logical_batch_size,65.0
1774093029.3670137,train_step,230,2,perf/logical_token_count,54330.0
1774093029.3670137,train_step,230,2,perf/gradient_accumulation_steps,8.0
1774093029.3670137,train_step,230,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093029.3670137,train_step,230,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093111.4853563,train_step,240,2,train/step_loss,0.8129148510369387
1774093111.4853563,train_step,240,2,train/step_real_loss,0.7906041964888573
1774093111.4853563,train_step,240,2,train/lr,4.1055301335220955e-05
1774093111.4853563,train_step,240,2,train/step_canary_loss,1.52685546875
1774093111.4853563,train_step,240,2,perf/step_duration_sec,8.505769751966
1774093111.4853563,train_step,240,2,perf/samples_per_sec,7.75943881913156
1774093111.4853563,train_step,240,2,perf/tokens_per_sec,6383.901937558238
1774093111.4853563,train_step,240,2,perf/logical_batch_size,66.0
1774093111.4853563,train_step,240,2,perf/logical_token_count,54300.0
1774093111.4853563,train_step,240,2,perf/gradient_accumulation_steps,8.0
1774093111.4853563,train_step,240,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093111.4853563,train_step,240,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093192.88336,train_step,250,2,train/step_loss,0.8410789425556476
1774093192.88336,train_step,250,2,train/step_real_loss,0.850802831351757
1774093192.88336,train_step,250,2,train/lr,3.715559214503298e-05
1774093192.88336,train_step,250,2,train/step_canary_loss,0.21875
1774093192.88336,train_step,250,2,perf/step_duration_sec,8.232771871611476
1774093192.88336,train_step,250,2,perf/samples_per_sec,7.895275250385015
1774093192.88336,train_step,250,2,perf/tokens_per_sec,6654.745309889904
1774093192.88336,train_step,250,2,perf/logical_batch_size,65.0
1774093192.88336,train_step,250,2,perf/logical_token_count,54787.0
1774093192.88336,train_step,250,2,perf/gradient_accumulation_steps,8.0
1774093192.88336,train_step,250,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093192.88336,train_step,250,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093220.0732572,eval_step,250,2,eval/loss,0.748522541389383
1774093220.0732572,eval_step,250,2,eval/duration_sec,27.187080297619104
1774093302.2765276,train_step,260,2,train/step_loss,0.8724462010643699
1774093302.2765276,train_step,260,2,train/step_real_loss,0.8488068357110023
1774093302.2765276,train_step,260,2,train/lr,3.33379176277258e-05
1774093302.2765276,train_step,260,2,train/step_canary_loss,1.62890625
1774093302.2765276,train_step,260,2,perf/step_duration_sec,8.470736568793654
1774093302.2765276,train_step,260,2,perf/samples_per_sec,7.7915302245551095
1774093302.2765276,train_step,260,2,perf/tokens_per_sec,6277.49423773815
1774093302.2765276,train_step,260,2,perf/logical_batch_size,66.0
1774093302.2765276,train_step,260,2,perf/logical_token_count,53175.0
1774093302.2765276,train_step,260,2,perf/gradient_accumulation_steps,8.0
1774093302.2765276,train_step,260,2,system/cuda_memory_allocated_gb,17.911073684692383
1774093302.2765276,train_step,260,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093384.391679,train_step,270,2,train/step_loss,0.8640658855438232
1774093384.391679,train_step,270,2,train/step_real_loss,0.7926059737801552
1774093384.391679,train_step,270,2,train/lr,2.962666050951997e-05
1774093384.391679,train_step,270,2,train/step_canary_loss,5.4375
1774093384.391679,train_step,270,2,perf/step_duration_sec,7.947357261553407
1774093384.391679,train_step,270,2,perf/samples_per_sec,8.178819431516908
1774093384.391679,train_step,270,2,perf/tokens_per_sec,6407.161314659091
1774093384.391679,train_step,270,2,perf/logical_batch_size,65.0
1774093384.391679,train_step,270,2,perf/logical_token_count,50920.0
1774093384.391679,train_step,270,2,perf/gradient_accumulation_steps,8.0
1774093384.391679,train_step,270,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093384.391679,train_step,270,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093468.0585487,train_step,280,2,train/step_loss,0.7910019425245431
1774093468.0585487,train_step,280,2,train/step_real_loss,0.8018507286906242
1774093468.0585487,train_step,280,2,train/lr,2.604552384991855e-05
1774093468.0585487,train_step,280,2,train/step_canary_loss,0.0966796875
1774093468.0585487,train_step,280,2,perf/step_duration_sec,8.62639987282455
1774093468.0585487,train_step,280,2,perf/samples_per_sec,7.53500892124967
1774093468.0585487,train_step,280,2,perf/tokens_per_sec,6275.039506402552
1774093468.0585487,train_step,280,2,perf/logical_batch_size,65.0
1774093468.0585487,train_step,280,2,perf/logical_token_count,54131.0
1774093468.0585487,train_step,280,2,perf/gradient_accumulation_steps,8.0
1774093468.0585487,train_step,280,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093468.0585487,train_step,280,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093550.7132335,train_step,290,2,train/step_loss,0.8430270507480159
1774093550.7132335,train_step,290,2,train/step_real_loss,0.8613760396838188
1774093550.7132335,train_step,290,2,train/lr,2.2617379654990623e-05
1774093550.7132335,train_step,290,2,train/step_canary_loss,0.255859375
1774093550.7132335,train_step,290,2,perf/step_duration_sec,8.335046991705894
1774093550.7132335,train_step,290,2,perf/samples_per_sec,7.9183716739300705
1774093550.7132335,train_step,290,2,perf/tokens_per_sec,6261.392413496007
1774093550.7132335,train_step,290,2,perf/logical_batch_size,66.0
1774093550.7132335,train_step,290,2,perf/logical_token_count,52189.0
1774093550.7132335,train_step,290,2,perf/gradient_accumulation_steps,8.0
1774093550.7132335,train_step,290,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093550.7132335,train_step,290,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093632.5524132,train_step,300,2,train/step_loss,0.7691241015087474
1774093632.5524132,train_step,300,2,train/step_real_loss,0.784599058330059
1774093632.5524132,train_step,300,2,train/lr,1.936412279842705e-05
1774093632.5524132,train_step,300,2,train/step_canary_loss,0.27392578125
1774093632.5524132,train_step,300,2,perf/step_duration_sec,8.091827008873224
1774093632.5524132,train_step,300,2,perf/samples_per_sec,8.156378025336753
1774093632.5524132,train_step,300,2,perf/tokens_per_sec,5984.062677922065
1774093632.5524132,train_step,300,2,perf/logical_batch_size,66.0
1774093632.5524132,train_step,300,2,perf/logical_token_count,48422.0
1774093632.5524132,train_step,300,2,perf/gradient_accumulation_steps,8.0
1774093632.5524132,train_step,300,2,system/cuda_memory_allocated_gb,17.911073684692383
1774093632.5524132,train_step,300,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093659.719083,eval_step,300,2,eval/loss,0.7462242173737493
1774093659.719083,eval_step,300,2,eval/duration_sec,27.16416385397315
1774093742.5612013,train_step,310,2,train/step_loss,0.7514625552928809
1774093742.5612013,train_step,310,2,train/step_real_loss,0.7710395082831383
1774093742.5612013,train_step,310,2,train/lr,1.6306531183346385e-05
1774093742.5612013,train_step,310,2,train/step_canary_loss,0.125
1774093742.5612013,train_step,310,2,perf/step_duration_sec,8.240195842459798
1774093742.5612013,train_step,310,2,perf/samples_per_sec,8.009518373327666
1774093742.5612013,train_step,310,2,perf/tokens_per_sec,6369.751520897312
1774093742.5612013,train_step,310,2,perf/logical_batch_size,66.0
1774093742.5612013,train_step,310,2,perf/logical_token_count,52488.0
1774093742.5612013,train_step,310,2,perf/gradient_accumulation_steps,8.0
1774093742.5612013,train_step,310,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093742.5612013,train_step,310,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093824.76255,train_step,320,2,train/step_loss,0.8317307600608239
1774093824.76255,train_step,320,2,train/step_real_loss,0.8416290208697319
1774093824.76255,train_step,320,2,train/lr,1.3464133037968912e-05
1774093824.76255,train_step,320,2,train/step_canary_loss,0.1982421875
1774093824.76255,train_step,320,2,perf/step_duration_sec,7.953533122316003
1774093824.76255,train_step,320,2,perf/samples_per_sec,8.172468637569782
1774093824.76255,train_step,320,2,perf/tokens_per_sec,6226.666720107783
1774093824.76255,train_step,320,2,perf/logical_batch_size,65.0
1774093824.76255,train_step,320,2,perf/logical_token_count,49524.0
1774093824.76255,train_step,320,2,perf/gradient_accumulation_steps,8.0
1774093824.76255,train_step,320,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093824.76255,train_step,320,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093907.919859,train_step,330,2,train/step_loss,0.8719267258277306
1774093907.919859,train_step,330,2,train/step_real_loss,0.8635779246687889
1774093907.919859,train_step,330,2,train/lr,1.0855082192715294e-05
1774093907.919859,train_step,330,2,train/step_canary_loss,1.40625
1774093907.919859,train_step,330,2,perf/step_duration_sec,8.101611092686653
1774093907.919859,train_step,330,2,perf/samples_per_sec,8.023095561656334
1774093907.919859,train_step,330,2,perf/tokens_per_sec,5623.572827524039
1774093907.919859,train_step,330,2,perf/logical_batch_size,65.0
1774093907.919859,train_step,330,2,perf/logical_token_count,45560.0
1774093907.919859,train_step,330,2,perf/gradient_accumulation_steps,8.0
1774093907.919859,train_step,330,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093907.919859,train_step,330,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774093989.7239172,train_step,340,2,train/step_loss,0.8476224361043988
1774093989.7239172,train_step,340,2,train/step_real_loss,0.8228105828166008
1774093989.7239172,train_step,340,2,train/lr,8.49604213531004e-06
1774093989.7239172,train_step,340,2,train/step_canary_loss,1.6416015625
1774093989.7239172,train_step,340,2,perf/step_duration_sec,8.274557610973716
1774093989.7239172,train_step,340,2,perf/samples_per_sec,7.976257233676254
1774093989.7239172,train_step,340,2,perf/tokens_per_sec,6674.435371234426
1774093989.7239172,train_step,340,2,perf/logical_batch_size,66.0
1774093989.7239172,train_step,340,2,perf/logical_token_count,55228.0
1774093989.7239172,train_step,340,2,perf/gradient_accumulation_steps,8.0
1774093989.7239172,train_step,340,2,system/cuda_memory_allocated_gb,17.816345691680908
1774093989.7239172,train_step,340,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774094071.9116077,train_step,350,2,train/step_loss,0.8573094416355741
1774094071.9116077,train_step,350,2,train/step_real_loss,0.816773310303688
1774094071.9116077,train_step,350,2,train/lr,6.402079584406673e-06
1774094071.9116077,train_step,350,2,train/step_canary_loss,1.376171875
1774094071.9116077,train_step,350,2,perf/step_duration_sec,8.637357847765088
1774094071.9116077,train_step,350,2,perf/samples_per_sec,7.988554048140279
1774094071.9116077,train_step,350,2,perf/tokens_per_sec,6212.547974249382
1774094071.9116077,train_step,350,2,perf/logical_batch_size,69.0
1774094071.9116077,train_step,350,2,perf/logical_token_count,53660.0
1774094071.9116077,train_step,350,2,perf/gradient_accumulation_steps,8.0
1774094071.9116077,train_step,350,2,system/cuda_memory_allocated_gb,17.816345691680908
1774094071.9116077,train_step,350,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774094099.091823,eval_step,350,2,eval/loss,0.7456388631942629
1774094099.091823,eval_step,350,2,eval/duration_sec,27.17722495831549
1774094181.5654633,train_step,360,2,train/step_loss,0.8446246770712046
1774094181.5654633,train_step,360,2,train/step_real_loss,0.8453097268939018
1774094181.5654633,train_step,360,2,train/lr,4.586568261458729e-06
1774094181.5654633,train_step,360,2,train/step_canary_loss,0.80078125
1774094181.5654633,train_step,360,2,perf/step_duration_sec,7.976690696552396
1774094181.5654633,train_step,360,2,perf/samples_per_sec,8.148742689508275
1774094181.5654633,train_step,360,2,perf/tokens_per_sec,6948.119478163339
1774094181.5654633,train_step,360,2,perf/logical_batch_size,65.0
1774094181.5654633,train_step,360,2,perf/logical_token_count,55423.0
1774094181.5654633,train_step,360,2,perf/gradient_accumulation_steps,8.0
1774094181.5654633,train_step,360,2,system/cuda_memory_allocated_gb,17.816345691680908
1774094181.5654633,train_step,360,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774094263.5117958,train_step,370,2,train/step_loss,0.8721330687403679
1774094263.5117958,train_step,370,2,train/step_real_loss,0.8721330687403679
1774094263.5117958,train_step,370,2,train/lr,3.06110347542643e-06
1774094263.5117958,train_step,370,2,perf/step_duration_sec,8.37641635723412
1774094263.5117958,train_step,370,2,perf/samples_per_sec,7.640498904370688
1774094263.5117958,train_step,370,2,perf/tokens_per_sec,5830.058812420977
1774094263.5117958,train_step,370,2,perf/logical_batch_size,64.0
1774094263.5117958,train_step,370,2,perf/logical_token_count,48835.0
1774094263.5117958,train_step,370,2,perf/gradient_accumulation_steps,8.0
1774094263.5117958,train_step,370,2,system/cuda_memory_allocated_gb,17.816345691680908
1774094263.5117958,train_step,370,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774094346.1630964,train_step,380,2,train/step_loss,0.81929341004445
1774094346.1630964,train_step,380,2,train/step_real_loss,0.8280055150389671
1774094346.1630964,train_step,380,2,train/lr,1.8354280658494649e-06
1774094346.1630964,train_step,380,2,train/step_canary_loss,0.26171875
1774094346.1630964,train_step,380,2,perf/step_duration_sec,7.9715049508959055
1774094346.1630964,train_step,380,2,perf/samples_per_sec,8.154043734576712
1774094346.1630964,train_step,380,2,perf/tokens_per_sec,6787.676898315027
1774094346.1630964,train_step,380,2,perf/logical_batch_size,65.0
1774094346.1630964,train_step,380,2,perf/logical_token_count,54108.0
1774094346.1630964,train_step,380,2,perf/gradient_accumulation_steps,8.0
1774094346.1630964,train_step,380,2,system/cuda_memory_allocated_gb,17.816345691680908
1774094346.1630964,train_step,380,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774094429.2324018,train_step,390,2,train/step_loss,0.8554338663816452
1774094429.2324018,train_step,390,2,train/step_real_loss,0.8554338663816452
1774094429.2324018,train_step,390,2,train/lr,9.17370177272775e-07
1774094429.2324018,train_step,390,2,perf/step_duration_sec,8.24701151996851
1774094429.2324018,train_step,390,2,perf/samples_per_sec,7.760386880148844
1774094429.2324018,train_step,390,2,perf/tokens_per_sec,6142.34621563781
1774094429.2324018,train_step,390,2,perf/logical_batch_size,64.0
1774094429.2324018,train_step,390,2,perf/logical_token_count,50656.0
1774094429.2324018,train_step,390,2,perf/gradient_accumulation_steps,8.0
1774094429.2324018,train_step,390,2,system/cuda_memory_allocated_gb,17.816345691680908
1774094429.2324018,train_step,390,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774094512.2904398,train_step,400,2,train/step_loss,0.7998756021261215
1774094512.2904398,train_step,400,2,train/step_real_loss,0.7998756021261215
1774094512.2904398,train_step,400,2,train/lr,3.127932624475638e-07
1774094512.2904398,train_step,400,2,perf/step_duration_sec,7.943094424903393
1774094512.2904398,train_step,400,2,perf/samples_per_sec,8.0573132555677
1774094512.2904398,train_step,400,2,perf/tokens_per_sec,6869.86671452841
1774094512.2904398,train_step,400,2,perf/logical_batch_size,64.0
1774094512.2904398,train_step,400,2,perf/logical_token_count,54568.0
1774094512.2904398,train_step,400,2,perf/gradient_accumulation_steps,8.0
1774094512.2904398,train_step,400,2,system/cuda_memory_allocated_gb,17.816345691680908
1774094512.2904398,train_step,400,2,system/cuda_max_memory_allocated_gb,82.1704511642456
1774094539.4831598,eval_step,400,2,eval/loss,0.7455017728538349
1774094539.4831598,eval_step,400,2,eval/duration_sec,27.190383709967136
1774094621.4459202,train_step,410,2,train/step_loss,0.854436350591255
1774094621.4459202,train_step,410,2,train/step_real_loss,0.8517185375094414
1774094621.4459202,train_step,410,2,train/lr,2.5558633627303928e-08
1774094621.4459202,train_step,410,2,train/step_canary_loss,0.94140625
1774094621.4459202,train_step,410,2,perf/step_duration_sec,8.378037076443434
1774094621.4459202,train_step,410,2,perf/samples_per_sec,7.877740262760654
1774094621.4459202,train_step,410,2,perf/tokens_per_sec,6647.619184760483
1774094621.4459202,train_step,410,2,perf/logical_batch_size,66.0
1774094621.4459202,train_step,410,2,perf/logical_token_count,55694.0
1774094621.4459202,train_step,410,2,perf/gradient_accumulation_steps,8.0
1774094621.4459202,train_step,410,2,system/cuda_memory_allocated_gb,17.816345691680908
1774094621.4459202,train_step,410,2,system/cuda_max_memory_allocated_gb,88.38552379608154
1774094682.1799755,train_epoch,414,2,train/epoch_loss,0.840315752633972
1774094682.1799755,train_epoch,414,2,train/epoch_real_loss,0.8362452851041504
1774094682.1799755,train_epoch,414,2,train/epoch_canary_loss,1.0832555509625612
1774094682.1799755,train_epoch,414,2,perf/epoch_duration_sec,1814.1021996028721
1774094682.1799755,train_epoch,414,2,perf/epoch_samples_per_sec,29.765136722628178
1774094682.1799755,train_epoch,414,2,perf/epoch_tokens_per_sec,24186.19579955584
1774094682.1799755,train_epoch,414,2,perf/epoch_samples,53997.0
1774094682.1799755,train_epoch,414,2,perf/epoch_tokens,43876231.0
1774094682.1799755,train_epoch,414,2,system/cuda_epoch_peak_memory_gb,88.38552379608154
1774094682.1799755,train_epoch,414,2,eval/loss,0.7454980848164394
1774094682.1799755,train_epoch,414,2,eval/duration_sec,27.13153049722314
1774094693.161264,audit_epoch,414,2,audit/delta,1e-05
1774094693.161264,audit_epoch,414,2,audit/num_canaries,500.0
1774094693.161264,audit_epoch,414,2,audit/num_members,250.0
1774094693.161264,audit_epoch,414,2,audit/paper_guess_fraction,0.2
1774094693.161264,audit_epoch,414,2,audit/paper_guess_steps,20.0
1774094693.161264,audit_epoch,414,2,audit/loss/auc,1.0
1774094693.161264,audit_epoch,414,2,audit/loss/empirical_epsilon/0.05,3.4791953936219215
1774094693.161264,audit_epoch,414,2,audit/loss/empirical_epsilon/0.01,3.023197554051876
1774094693.161264,audit_epoch,414,2,audit/loss/empirical_epsilon_details/0.05/epsilon,3.4791953936219215
1774094693.161264,audit_epoch,414,2,audit/loss/empirical_epsilon_details/0.05/num_guesses,100.0
1774094693.161264,audit_epoch,414,2,audit/loss/empirical_epsilon_details/0.05/correct_guesses,100.0
1774094693.161264,audit_epoch,414,2,audit/loss/empirical_epsilon_details/0.01/epsilon,3.023197554051876
1774094693.161264,audit_epoch,414,2,audit/loss/empirical_epsilon_details/0.01/num_guesses,100.0
1774094693.161264,audit_epoch,414,2,audit/loss/empirical_epsilon_details/0.01/correct_guesses,100.0
1774094693.161264,audit_epoch,414,2,audit/embedding/auc,0.916224
1774094693.161264,audit_epoch,414,2,audit/embedding/empirical_epsilon/0.05,3.4791953936219215
1774094693.161264,audit_epoch,414,2,audit/embedding/empirical_epsilon/0.01,3.023197554051876
1774094693.161264,audit_epoch,414,2,audit/embedding/empirical_epsilon_details/0.05/epsilon,3.4791953936219215
1774094693.161264,audit_epoch,414,2,audit/embedding/empirical_epsilon_details/0.05/num_guesses,100.0
1774094693.161264,audit_epoch,414,2,audit/embedding/empirical_epsilon_details/0.05/correct_guesses,100.0
1774094693.161264,audit_epoch,414,2,audit/embedding/empirical_epsilon_details/0.01/epsilon,3.023197554051876
1774094693.161264,audit_epoch,414,2,audit/embedding/empirical_epsilon_details/0.01/num_guesses,100.0
1774094693.161264,audit_epoch,414,2,audit/embedding/empirical_epsilon_details/0.01/correct_guesses,100.0
1774094693.161264,audit_epoch,414,2,perf/audit_duration_sec,6.722778998315334
1774094704.3854914,audit_final,414,2,audit/delta,1e-05
1774094704.3854914,audit_final,414,2,audit/num_canaries,500.0
1774094704.3854914,audit_final,414,2,audit/num_members,250.0
1774094704.3854914,audit_final,414,2,audit/paper_guess_fraction,0.2
1774094704.3854914,audit_final,414,2,audit/paper_guess_steps,20.0
1774094704.3854914,audit_final,414,2,audit/loss/auc,1.0
1774094704.3854914,audit_final,414,2,audit/loss/empirical_epsilon/0.05,3.4791953936219215
1774094704.3854914,audit_final,414,2,audit/loss/empirical_epsilon/0.01,3.023197554051876
1774094704.3854914,audit_final,414,2,audit/loss/empirical_epsilon_details/0.05/epsilon,3.4791953936219215
1774094704.3854914,audit_final,414,2,audit/loss/empirical_epsilon_details/0.05/num_guesses,100.0
1774094704.3854914,audit_final,414,2,audit/loss/empirical_epsilon_details/0.05/correct_guesses,100.0
1774094704.3854914,audit_final,414,2,audit/loss/empirical_epsilon_details/0.01/epsilon,3.023197554051876
1774094704.3854914,audit_final,414,2,audit/loss/empirical_epsilon_details/0.01/num_guesses,100.0
1774094704.3854914,audit_final,414,2,audit/loss/empirical_epsilon_details/0.01/correct_guesses,100.0
1774094704.3854914,audit_final,414,2,audit/embedding/auc,0.916224
1774094704.3854914,audit_final,414,2,audit/embedding/empirical_epsilon/0.05,3.4791953936219215
1774094704.3854914,audit_final,414,2,audit/embedding/empirical_epsilon/0.01,3.023197554051876
1774094704.3854914,audit_final,414,2,audit/embedding/empirical_epsilon_details/0.05/epsilon,3.4791953936219215
1774094704.3854914,audit_final,414,2,audit/embedding/empirical_epsilon_details/0.05/num_guesses,100.0
1774094704.3854914,audit_final,414,2,audit/embedding/empirical_epsilon_details/0.05/correct_guesses,100.0
1774094704.3854914,audit_final,414,2,audit/embedding/empirical_epsilon_details/0.01/epsilon,3.023197554051876
1774094704.3854914,audit_final,414,2,audit/embedding/empirical_epsilon_details/0.01/num_guesses,100.0
1774094704.3854914,audit_final,414,2,audit/embedding/empirical_epsilon_details/0.01/correct_guesses,100.0
1774094704.914049,energy_final,414,,energy/codecarbon/duration,3820.65162669681
1774094704.914049,energy_final,414,,energy/codecarbon/emissions,0.5568149811716148
1774094704.914049,energy_final,414,,energy/codecarbon/emissions_rate,0.0001457382236267942
1774094704.914049,energy_final,414,,energy/codecarbon/cpu_power,179.3439937196561
1774094704.914049,energy_final,414,,energy/codecarbon/gpu_power,2316.658494927477
1774094704.914049,energy_final,414,,energy/codecarbon/ram_power,70.0
1774094704.914049,energy_final,414,,energy/codecarbon/cpu_energy,0.18368298514637418
1774094704.914049,energy_final,414,,energy/codecarbon/gpu_energy,2.457235179675422
1774094704.914049,energy_final,414,,energy/codecarbon/ram_energy,0.07169108754482326
1774094704.914049,energy_final,414,,energy/codecarbon/energy_consumed,2.7126092523666183
1774094704.914049,energy_final,414,,energy/codecarbon/water_consumed,0.0
1774094704.914049,energy_final,414,,energy/codecarbon/cpu_count,224.0
1774094704.914049,energy_final,414,,energy/codecarbon/gpu_count,4.0
1774094704.914049,energy_final,414,,energy/codecarbon/longitude,-121.9552
1774094704.914049,energy_final,414,,energy/codecarbon/latitude,37.3541
1774094704.914049,energy_final,414,,energy/codecarbon/ram_total_size,2015.5625190734863
1774094704.914049,energy_final,414,,energy/codecarbon/cpu_utilization_percent,2.2497114375655825
1774094704.914049,energy_final,414,,energy/codecarbon/gpu_utilization_percent,96.45330535152151
1774094704.914049,energy_final,414,,energy/codecarbon/ram_utilization_percent,2.3057974816369358
1774094704.914049,energy_final,414,,energy/codecarbon/ram_used_gb,46.78091819539025
1774094704.914049,energy_final,414,,energy/codecarbon/pue,1.0
1774094704.914049,energy_final,414,,energy/codecarbon/wue,0.0