Nadav commited on
Commit
63659f2
·
1 Parent(s): d59ea5f

Training in progress, step 40000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a06ffce25ec404e52367f5cc0b644052826374a7fdd783f9655fcd285df66d6d
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21e0468f83252fbd857b959ddace7a9f33b4b96fb87a6baf73ead5e873aabb0c
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c329e242cb4a3f96564fe11ec3eccc7274448a9d37d936eb1f9c1b5bcd37ec91
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bc7e4def826ce0471f39109f9fdb3314c8bc2030700c1f3907e774f1c01f478
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3050f6cd4e244bcfafaa04a6d1b955b9b5bab22bee349f2d0379131ff0874e3f
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85d1c2df1a9b702c1ab654e9e5e408f92b9742bb1e3ca9287f5761ef0b320b84
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a980a1351c683c05bfb6454932005b70f1b4871d3735c6a11b55d7963483b96
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c80c9fc8381c1a21d068329ef7b88e8e389828d1de4a65d282cc16135a4f7204
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbf786375f854cc04cfc07ff0949c0fe8ff8ba67bd06f47c9df99e37ce7c9a9a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3321618f4e6ed92cd7e76689475cb3f46497297ac6a6a8342c9bad133a001350
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.6125576939824087,
5
- "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -414,11 +414,147 @@
414
  "eval_samples_per_second": 62.361,
415
  "eval_steps_per_second": 0.985,
416
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  }
418
  ],
419
  "max_steps": 100000,
420
  "num_train_epochs": 9,
421
- "total_flos": 1.4128888515849526e+21,
422
  "trial_name": null,
423
  "trial_params": null
424
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.483410258643212,
5
+ "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
414
  "eval_samples_per_second": 62.361,
415
  "eval_steps_per_second": 0.985,
416
  "step": 30000
417
+ },
418
+ {
419
+ "epoch": 2.66,
420
+ "learning_rate": 8.150097177421913e-05,
421
+ "loss": 0.4223,
422
+ "step": 30500
423
+ },
424
+ {
425
+ "epoch": 2.7,
426
+ "learning_rate": 8.091455446965055e-05,
427
+ "loss": 0.4215,
428
+ "step": 31000
429
+ },
430
+ {
431
+ "epoch": 2.74,
432
+ "learning_rate": 8.032183951740807e-05,
433
+ "loss": 0.4197,
434
+ "step": 31500
435
+ },
436
+ {
437
+ "epoch": 2.79,
438
+ "learning_rate": 7.972297316103718e-05,
439
+ "loss": 0.4206,
440
+ "step": 32000
441
+ },
442
+ {
443
+ "epoch": 2.83,
444
+ "learning_rate": 7.911931879453039e-05,
445
+ "loss": 0.4195,
446
+ "step": 32500
447
+ },
448
+ {
449
+ "epoch": 2.87,
450
+ "learning_rate": 7.850860595403895e-05,
451
+ "loss": 0.4202,
452
+ "step": 33000
453
+ },
454
+ {
455
+ "epoch": 2.92,
456
+ "learning_rate": 7.789218909766887e-05,
457
+ "loss": 0.4183,
458
+ "step": 33500
459
+ },
460
+ {
461
+ "epoch": 2.96,
462
+ "learning_rate": 7.727022031705582e-05,
463
+ "loss": 0.418,
464
+ "step": 34000
465
+ },
466
+ {
467
+ "epoch": 3.0,
468
+ "learning_rate": 7.664411309317395e-05,
469
+ "loss": 0.4179,
470
+ "step": 34500
471
+ },
472
+ {
473
+ "epoch": 3.05,
474
+ "learning_rate": 7.601151251252289e-05,
475
+ "loss": 0.4168,
476
+ "step": 35000
477
+ },
478
+ {
479
+ "epoch": 3.05,
480
+ "eval_loss": 0.3975510597229004,
481
+ "eval_runtime": 48.348,
482
+ "eval_samples_per_second": 103.417,
483
+ "eval_steps_per_second": 1.634,
484
+ "step": 35000
485
+ },
486
+ {
487
+ "epoch": 3.09,
488
+ "learning_rate": 7.537382403643505e-05,
489
+ "loss": 0.4172,
490
+ "step": 35500
491
+ },
492
+ {
493
+ "epoch": 3.14,
494
+ "learning_rate": 7.473120500500002e-05,
495
+ "loss": 0.4165,
496
+ "step": 36000
497
+ },
498
+ {
499
+ "epoch": 3.18,
500
+ "learning_rate": 7.408511341338508e-05,
501
+ "loss": 0.417,
502
+ "step": 36500
503
+ },
504
+ {
505
+ "epoch": 3.22,
506
+ "learning_rate": 7.343311918291752e-05,
507
+ "loss": 0.4172,
508
+ "step": 37000
509
+ },
510
+ {
511
+ "epoch": 3.27,
512
+ "learning_rate": 7.277667323698597e-05,
513
+ "loss": 0.4163,
514
+ "step": 37500
515
+ },
516
+ {
517
+ "epoch": 3.31,
518
+ "learning_rate": 7.211726318817271e-05,
519
+ "loss": 0.4136,
520
+ "step": 38000
521
+ },
522
+ {
523
+ "epoch": 3.35,
524
+ "learning_rate": 7.145240886444487e-05,
525
+ "loss": 0.4144,
526
+ "step": 38500
527
+ },
528
+ {
529
+ "epoch": 3.4,
530
+ "learning_rate": 7.078359153588611e-05,
531
+ "loss": 0.414,
532
+ "step": 39000
533
+ },
534
+ {
535
+ "epoch": 3.44,
536
+ "learning_rate": 7.011097622316453e-05,
537
+ "loss": 0.4119,
538
+ "step": 39500
539
+ },
540
+ {
541
+ "epoch": 3.48,
542
+ "learning_rate": 6.943472888404397e-05,
543
+ "loss": 0.413,
544
+ "step": 40000
545
+ },
546
+ {
547
+ "epoch": 3.48,
548
+ "eval_loss": 0.39355719089508057,
549
+ "eval_runtime": 61.3446,
550
+ "eval_samples_per_second": 81.507,
551
+ "eval_steps_per_second": 1.288,
552
+ "step": 40000
553
  }
554
  ],
555
  "max_steps": 100000,
556
  "num_train_epochs": 9,
557
+ "total_flos": 1.8838402100126674e+21,
558
  "trial_name": null,
559
  "trial_params": null
560
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c329e242cb4a3f96564fe11ec3eccc7274448a9d37d936eb1f9c1b5bcd37ec91
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bc7e4def826ce0471f39109f9fdb3314c8bc2030700c1f3907e774f1c01f478
3
  size 449471589