schnell commited on
Commit
f820086
·
1 Parent(s): 93f6f6d

Training in progress, epoch 12

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:427ddde5949f77db8935be38c6362c73ffac529ca902634168abcb77846282d4
3
  size 236469913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef2ed6e9c5769612ee83bbcff1434945866238417fae08918c197f74b46418b8
3
  size 236469913
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf9217f1422e2ba20adfa234e66d641852a206c5630b921ecc80d6c9999bdad2
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c098f75814bf0d67b7b2990383c442d386c5dedd85129e03b179a63a78cf10e
3
  size 118242180
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:896681ae078ffd1d4e4769bf2d6e9344fd51fb18220bfe249c5a5fc74a830917
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd9765584191118ad624ada4294aa4c7c4b446e8275a08afe002751786a580c3
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac1e8edaf46acd0fd7646a0bb25c1e4b46b0ab04d87474b4fe175e85e479d6d4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85a3eb859005f1a2e2b3cdf3c05d91017c106fb92e1c7a7c1999929d8f053b63
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f35c7bd008e8e8a41d0ab633d161ba4bfa16e5435c68d6d7e739be6d290d478
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b7fc01a18c41ee4d5f6a3a44379550533c02e5ae590743297a4e13668b3d82
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1b3125518446f5f519d5932574d9e19db8e3320097f9a530448ee7f4f0d7d71
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb87d42e1d664298b59b2f6667ddacc7adf88687ba59853e16a623f40650b2d6
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e08fe68888b57c19aba30af213d1336bce889e7e0465c4a58939b446db8405c5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd0be3c29b40bb10e427fd603007010cb5d019dfe3909eb92cb96939b7adcda3
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.0,
5
- "global_step": 764203,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9273,11 +9273,854 @@
9273
  "eval_samples_per_second": 969.38,
9274
  "eval_steps_per_second": 40.391,
9275
  "step": 764203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9276
  }
9277
  ],
9278
  "max_steps": 972622,
9279
  "num_train_epochs": 14,
9280
- "total_flos": 4.6996799356928e+18,
9281
  "trial_name": null,
9282
  "trial_params": null
9283
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.0,
5
+ "global_step": 833676,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9273
  "eval_samples_per_second": 969.38,
9274
  "eval_steps_per_second": 40.391,
9275
  "step": 764203
9276
+ },
9277
+ {
9278
+ "epoch": 11.0,
9279
+ "learning_rate": 2.1648362490198828e-05,
9280
+ "loss": 2.7987,
9281
+ "step": 764500
9282
+ },
9283
+ {
9284
+ "epoch": 11.01,
9285
+ "learning_rate": 2.15965396019296e-05,
9286
+ "loss": 2.7989,
9287
+ "step": 765000
9288
+ },
9289
+ {
9290
+ "epoch": 11.02,
9291
+ "learning_rate": 2.1544612860176862e-05,
9292
+ "loss": 2.7949,
9293
+ "step": 765500
9294
+ },
9295
+ {
9296
+ "epoch": 11.03,
9297
+ "learning_rate": 2.149268611842413e-05,
9298
+ "loss": 2.8003,
9299
+ "step": 766000
9300
+ },
9301
+ {
9302
+ "epoch": 11.03,
9303
+ "learning_rate": 2.1440759376671392e-05,
9304
+ "loss": 2.7988,
9305
+ "step": 766500
9306
+ },
9307
+ {
9308
+ "epoch": 11.04,
9309
+ "learning_rate": 2.1388936488402165e-05,
9310
+ "loss": 2.796,
9311
+ "step": 767000
9312
+ },
9313
+ {
9314
+ "epoch": 11.05,
9315
+ "learning_rate": 2.1337009746649427e-05,
9316
+ "loss": 2.7986,
9317
+ "step": 767500
9318
+ },
9319
+ {
9320
+ "epoch": 11.05,
9321
+ "learning_rate": 2.128508300489669e-05,
9322
+ "loss": 2.7976,
9323
+ "step": 768000
9324
+ },
9325
+ {
9326
+ "epoch": 11.06,
9327
+ "learning_rate": 2.1233156263143956e-05,
9328
+ "loss": 2.7985,
9329
+ "step": 768500
9330
+ },
9331
+ {
9332
+ "epoch": 11.07,
9333
+ "learning_rate": 2.118122952139122e-05,
9334
+ "loss": 2.7967,
9335
+ "step": 769000
9336
+ },
9337
+ {
9338
+ "epoch": 11.08,
9339
+ "learning_rate": 2.112930277963849e-05,
9340
+ "loss": 2.7995,
9341
+ "step": 769500
9342
+ },
9343
+ {
9344
+ "epoch": 11.08,
9345
+ "learning_rate": 2.1077479891369256e-05,
9346
+ "loss": 2.7985,
9347
+ "step": 770000
9348
+ },
9349
+ {
9350
+ "epoch": 11.09,
9351
+ "learning_rate": 2.1025553149616524e-05,
9352
+ "loss": 2.7961,
9353
+ "step": 770500
9354
+ },
9355
+ {
9356
+ "epoch": 11.1,
9357
+ "learning_rate": 2.0973626407863786e-05,
9358
+ "loss": 2.7965,
9359
+ "step": 771000
9360
+ },
9361
+ {
9362
+ "epoch": 11.11,
9363
+ "learning_rate": 2.0921699666111054e-05,
9364
+ "loss": 2.7944,
9365
+ "step": 771500
9366
+ },
9367
+ {
9368
+ "epoch": 11.11,
9369
+ "learning_rate": 2.0869772924358315e-05,
9370
+ "loss": 2.7952,
9371
+ "step": 772000
9372
+ },
9373
+ {
9374
+ "epoch": 11.12,
9375
+ "learning_rate": 2.081784618260558e-05,
9376
+ "loss": 2.7922,
9377
+ "step": 772500
9378
+ },
9379
+ {
9380
+ "epoch": 11.13,
9381
+ "learning_rate": 2.0765919440852845e-05,
9382
+ "loss": 2.7979,
9383
+ "step": 773000
9384
+ },
9385
+ {
9386
+ "epoch": 11.13,
9387
+ "learning_rate": 2.0714096552583618e-05,
9388
+ "loss": 2.7943,
9389
+ "step": 773500
9390
+ },
9391
+ {
9392
+ "epoch": 11.14,
9393
+ "learning_rate": 2.066216981083088e-05,
9394
+ "loss": 2.7962,
9395
+ "step": 774000
9396
+ },
9397
+ {
9398
+ "epoch": 11.15,
9399
+ "learning_rate": 2.0610243069078145e-05,
9400
+ "loss": 2.7974,
9401
+ "step": 774500
9402
+ },
9403
+ {
9404
+ "epoch": 11.16,
9405
+ "learning_rate": 2.055831632732541e-05,
9406
+ "loss": 2.7969,
9407
+ "step": 775000
9408
+ },
9409
+ {
9410
+ "epoch": 11.16,
9411
+ "learning_rate": 2.050649343905618e-05,
9412
+ "loss": 2.7929,
9413
+ "step": 775500
9414
+ },
9415
+ {
9416
+ "epoch": 11.17,
9417
+ "learning_rate": 2.0454566697303444e-05,
9418
+ "loss": 2.7957,
9419
+ "step": 776000
9420
+ },
9421
+ {
9422
+ "epoch": 11.18,
9423
+ "learning_rate": 2.040263995555071e-05,
9424
+ "loss": 2.7974,
9425
+ "step": 776500
9426
+ },
9427
+ {
9428
+ "epoch": 11.18,
9429
+ "learning_rate": 2.0350713213797977e-05,
9430
+ "loss": 2.8017,
9431
+ "step": 777000
9432
+ },
9433
+ {
9434
+ "epoch": 11.19,
9435
+ "learning_rate": 2.029878647204524e-05,
9436
+ "loss": 2.7986,
9437
+ "step": 777500
9438
+ },
9439
+ {
9440
+ "epoch": 11.2,
9441
+ "learning_rate": 2.0246859730292507e-05,
9442
+ "loss": 2.7928,
9443
+ "step": 778000
9444
+ },
9445
+ {
9446
+ "epoch": 11.21,
9447
+ "learning_rate": 2.019493298853977e-05,
9448
+ "loss": 2.7981,
9449
+ "step": 778500
9450
+ },
9451
+ {
9452
+ "epoch": 11.21,
9453
+ "learning_rate": 2.014311010027054e-05,
9454
+ "loss": 2.7985,
9455
+ "step": 779000
9456
+ },
9457
+ {
9458
+ "epoch": 11.22,
9459
+ "learning_rate": 2.0091183358517803e-05,
9460
+ "loss": 2.7979,
9461
+ "step": 779500
9462
+ },
9463
+ {
9464
+ "epoch": 11.23,
9465
+ "learning_rate": 2.0039256616765068e-05,
9466
+ "loss": 2.8037,
9467
+ "step": 780000
9468
+ },
9469
+ {
9470
+ "epoch": 11.23,
9471
+ "learning_rate": 1.9987329875012333e-05,
9472
+ "loss": 2.7996,
9473
+ "step": 780500
9474
+ },
9475
+ {
9476
+ "epoch": 11.24,
9477
+ "learning_rate": 1.9935403133259598e-05,
9478
+ "loss": 2.7954,
9479
+ "step": 781000
9480
+ },
9481
+ {
9482
+ "epoch": 11.25,
9483
+ "learning_rate": 1.9883476391506863e-05,
9484
+ "loss": 2.8006,
9485
+ "step": 781500
9486
+ },
9487
+ {
9488
+ "epoch": 11.26,
9489
+ "learning_rate": 1.9831549649754128e-05,
9490
+ "loss": 2.7932,
9491
+ "step": 782000
9492
+ },
9493
+ {
9494
+ "epoch": 11.26,
9495
+ "learning_rate": 1.9779622908001393e-05,
9496
+ "loss": 2.7921,
9497
+ "step": 782500
9498
+ },
9499
+ {
9500
+ "epoch": 11.27,
9501
+ "learning_rate": 1.972790387321567e-05,
9502
+ "loss": 2.7929,
9503
+ "step": 783000
9504
+ },
9505
+ {
9506
+ "epoch": 11.28,
9507
+ "learning_rate": 1.9675977131462932e-05,
9508
+ "loss": 2.7967,
9509
+ "step": 783500
9510
+ },
9511
+ {
9512
+ "epoch": 11.28,
9513
+ "learning_rate": 1.9624050389710197e-05,
9514
+ "loss": 2.7964,
9515
+ "step": 784000
9516
+ },
9517
+ {
9518
+ "epoch": 11.29,
9519
+ "learning_rate": 1.957212364795746e-05,
9520
+ "loss": 2.7967,
9521
+ "step": 784500
9522
+ },
9523
+ {
9524
+ "epoch": 11.3,
9525
+ "learning_rate": 1.9520196906204726e-05,
9526
+ "loss": 2.7948,
9527
+ "step": 785000
9528
+ },
9529
+ {
9530
+ "epoch": 11.31,
9531
+ "learning_rate": 1.94683740179355e-05,
9532
+ "loss": 2.7907,
9533
+ "step": 785500
9534
+ },
9535
+ {
9536
+ "epoch": 11.31,
9537
+ "learning_rate": 1.941644727618276e-05,
9538
+ "loss": 2.7975,
9539
+ "step": 786000
9540
+ },
9541
+ {
9542
+ "epoch": 11.32,
9543
+ "learning_rate": 1.936452053443003e-05,
9544
+ "loss": 2.7976,
9545
+ "step": 786500
9546
+ },
9547
+ {
9548
+ "epoch": 11.33,
9549
+ "learning_rate": 1.931259379267729e-05,
9550
+ "loss": 2.797,
9551
+ "step": 787000
9552
+ },
9553
+ {
9554
+ "epoch": 11.34,
9555
+ "learning_rate": 1.926066705092456e-05,
9556
+ "loss": 2.7934,
9557
+ "step": 787500
9558
+ },
9559
+ {
9560
+ "epoch": 11.34,
9561
+ "learning_rate": 1.920874030917182e-05,
9562
+ "loss": 2.7977,
9563
+ "step": 788000
9564
+ },
9565
+ {
9566
+ "epoch": 11.35,
9567
+ "learning_rate": 1.9156917420902594e-05,
9568
+ "loss": 2.7952,
9569
+ "step": 788500
9570
+ },
9571
+ {
9572
+ "epoch": 11.36,
9573
+ "learning_rate": 1.9104990679149855e-05,
9574
+ "loss": 2.7913,
9575
+ "step": 789000
9576
+ },
9577
+ {
9578
+ "epoch": 11.36,
9579
+ "learning_rate": 1.905306393739712e-05,
9580
+ "loss": 2.7928,
9581
+ "step": 789500
9582
+ },
9583
+ {
9584
+ "epoch": 11.37,
9585
+ "learning_rate": 1.9001137195644385e-05,
9586
+ "loss": 2.7925,
9587
+ "step": 790000
9588
+ },
9589
+ {
9590
+ "epoch": 11.38,
9591
+ "learning_rate": 1.894921045389165e-05,
9592
+ "loss": 2.7978,
9593
+ "step": 790500
9594
+ },
9595
+ {
9596
+ "epoch": 11.39,
9597
+ "learning_rate": 1.8897283712138915e-05,
9598
+ "loss": 2.7974,
9599
+ "step": 791000
9600
+ },
9601
+ {
9602
+ "epoch": 11.39,
9603
+ "learning_rate": 1.884535697038618e-05,
9604
+ "loss": 2.7968,
9605
+ "step": 791500
9606
+ },
9607
+ {
9608
+ "epoch": 11.4,
9609
+ "learning_rate": 1.8793430228633445e-05,
9610
+ "loss": 2.7926,
9611
+ "step": 792000
9612
+ },
9613
+ {
9614
+ "epoch": 11.41,
9615
+ "learning_rate": 1.8741607340364214e-05,
9616
+ "loss": 2.7963,
9617
+ "step": 792500
9618
+ },
9619
+ {
9620
+ "epoch": 11.41,
9621
+ "learning_rate": 1.8689680598611483e-05,
9622
+ "loss": 2.7893,
9623
+ "step": 793000
9624
+ },
9625
+ {
9626
+ "epoch": 11.42,
9627
+ "learning_rate": 1.863785771034225e-05,
9628
+ "loss": 2.7877,
9629
+ "step": 793500
9630
+ },
9631
+ {
9632
+ "epoch": 11.43,
9633
+ "learning_rate": 1.8585930968589517e-05,
9634
+ "loss": 2.796,
9635
+ "step": 794000
9636
+ },
9637
+ {
9638
+ "epoch": 11.44,
9639
+ "learning_rate": 1.853400422683678e-05,
9640
+ "loss": 2.7906,
9641
+ "step": 794500
9642
+ },
9643
+ {
9644
+ "epoch": 11.44,
9645
+ "learning_rate": 1.8482077485084047e-05,
9646
+ "loss": 2.7987,
9647
+ "step": 795000
9648
+ },
9649
+ {
9650
+ "epoch": 11.45,
9651
+ "learning_rate": 1.843015074333131e-05,
9652
+ "loss": 2.7931,
9653
+ "step": 795500
9654
+ },
9655
+ {
9656
+ "epoch": 11.46,
9657
+ "learning_rate": 1.837832785506208e-05,
9658
+ "loss": 2.7866,
9659
+ "step": 796000
9660
+ },
9661
+ {
9662
+ "epoch": 11.46,
9663
+ "learning_rate": 1.8326401113309343e-05,
9664
+ "loss": 2.7934,
9665
+ "step": 796500
9666
+ },
9667
+ {
9668
+ "epoch": 11.47,
9669
+ "learning_rate": 1.8274474371556608e-05,
9670
+ "loss": 2.7929,
9671
+ "step": 797000
9672
+ },
9673
+ {
9674
+ "epoch": 11.48,
9675
+ "learning_rate": 1.8222547629803873e-05,
9676
+ "loss": 2.7929,
9677
+ "step": 797500
9678
+ },
9679
+ {
9680
+ "epoch": 11.49,
9681
+ "learning_rate": 1.8170620888051138e-05,
9682
+ "loss": 2.7977,
9683
+ "step": 798000
9684
+ },
9685
+ {
9686
+ "epoch": 11.49,
9687
+ "learning_rate": 1.8118694146298403e-05,
9688
+ "loss": 2.7898,
9689
+ "step": 798500
9690
+ },
9691
+ {
9692
+ "epoch": 11.5,
9693
+ "learning_rate": 1.8066871258029172e-05,
9694
+ "loss": 2.787,
9695
+ "step": 799000
9696
+ },
9697
+ {
9698
+ "epoch": 11.51,
9699
+ "learning_rate": 1.8014944516276437e-05,
9700
+ "loss": 2.7933,
9701
+ "step": 799500
9702
+ },
9703
+ {
9704
+ "epoch": 11.52,
9705
+ "learning_rate": 1.7963017774523702e-05,
9706
+ "loss": 2.7907,
9707
+ "step": 800000
9708
+ },
9709
+ {
9710
+ "epoch": 11.52,
9711
+ "learning_rate": 1.791109103277097e-05,
9712
+ "loss": 2.7924,
9713
+ "step": 800500
9714
+ },
9715
+ {
9716
+ "epoch": 11.53,
9717
+ "learning_rate": 1.7859164291018232e-05,
9718
+ "loss": 2.7948,
9719
+ "step": 801000
9720
+ },
9721
+ {
9722
+ "epoch": 11.54,
9723
+ "learning_rate": 1.7807237549265497e-05,
9724
+ "loss": 2.7944,
9725
+ "step": 801500
9726
+ },
9727
+ {
9728
+ "epoch": 11.54,
9729
+ "learning_rate": 1.775531080751276e-05,
9730
+ "loss": 2.795,
9731
+ "step": 802000
9732
+ },
9733
+ {
9734
+ "epoch": 11.55,
9735
+ "learning_rate": 1.7703487919243535e-05,
9736
+ "loss": 2.7972,
9737
+ "step": 802500
9738
+ },
9739
+ {
9740
+ "epoch": 11.56,
9741
+ "learning_rate": 1.7651561177490796e-05,
9742
+ "loss": 2.7953,
9743
+ "step": 803000
9744
+ },
9745
+ {
9746
+ "epoch": 11.57,
9747
+ "learning_rate": 1.759963443573806e-05,
9748
+ "loss": 2.7893,
9749
+ "step": 803500
9750
+ },
9751
+ {
9752
+ "epoch": 11.57,
9753
+ "learning_rate": 1.7547707693985326e-05,
9754
+ "loss": 2.7897,
9755
+ "step": 804000
9756
+ },
9757
+ {
9758
+ "epoch": 11.58,
9759
+ "learning_rate": 1.749578095223259e-05,
9760
+ "loss": 2.792,
9761
+ "step": 804500
9762
+ },
9763
+ {
9764
+ "epoch": 11.59,
9765
+ "learning_rate": 1.7443854210479856e-05,
9766
+ "loss": 2.791,
9767
+ "step": 805000
9768
+ },
9769
+ {
9770
+ "epoch": 11.59,
9771
+ "learning_rate": 1.7392031322210625e-05,
9772
+ "loss": 2.7957,
9773
+ "step": 805500
9774
+ },
9775
+ {
9776
+ "epoch": 11.6,
9777
+ "learning_rate": 1.734010458045789e-05,
9778
+ "loss": 2.7909,
9779
+ "step": 806000
9780
+ },
9781
+ {
9782
+ "epoch": 11.61,
9783
+ "learning_rate": 1.7288177838705155e-05,
9784
+ "loss": 2.7908,
9785
+ "step": 806500
9786
+ },
9787
+ {
9788
+ "epoch": 11.62,
9789
+ "learning_rate": 1.723625109695242e-05,
9790
+ "loss": 2.7928,
9791
+ "step": 807000
9792
+ },
9793
+ {
9794
+ "epoch": 11.62,
9795
+ "learning_rate": 1.7184324355199685e-05,
9796
+ "loss": 2.7933,
9797
+ "step": 807500
9798
+ },
9799
+ {
9800
+ "epoch": 11.63,
9801
+ "learning_rate": 1.713239761344695e-05,
9802
+ "loss": 2.7879,
9803
+ "step": 808000
9804
+ },
9805
+ {
9806
+ "epoch": 11.64,
9807
+ "learning_rate": 1.7080470871694215e-05,
9808
+ "loss": 2.7841,
9809
+ "step": 808500
9810
+ },
9811
+ {
9812
+ "epoch": 11.64,
9813
+ "learning_rate": 1.7028647983424984e-05,
9814
+ "loss": 2.7956,
9815
+ "step": 809000
9816
+ },
9817
+ {
9818
+ "epoch": 11.65,
9819
+ "learning_rate": 1.697672124167225e-05,
9820
+ "loss": 2.789,
9821
+ "step": 809500
9822
+ },
9823
+ {
9824
+ "epoch": 11.66,
9825
+ "learning_rate": 1.6924794499919514e-05,
9826
+ "loss": 2.7915,
9827
+ "step": 810000
9828
+ },
9829
+ {
9830
+ "epoch": 11.67,
9831
+ "learning_rate": 1.687286775816678e-05,
9832
+ "loss": 2.7944,
9833
+ "step": 810500
9834
+ },
9835
+ {
9836
+ "epoch": 11.67,
9837
+ "learning_rate": 1.6820941016414044e-05,
9838
+ "loss": 2.7884,
9839
+ "step": 811000
9840
+ },
9841
+ {
9842
+ "epoch": 11.68,
9843
+ "learning_rate": 1.676901427466131e-05,
9844
+ "loss": 2.786,
9845
+ "step": 811500
9846
+ },
9847
+ {
9848
+ "epoch": 11.69,
9849
+ "learning_rate": 1.6717087532908574e-05,
9850
+ "loss": 2.791,
9851
+ "step": 812000
9852
+ },
9853
+ {
9854
+ "epoch": 11.7,
9855
+ "learning_rate": 1.666516079115584e-05,
9856
+ "loss": 2.7934,
9857
+ "step": 812500
9858
+ },
9859
+ {
9860
+ "epoch": 11.7,
9861
+ "learning_rate": 1.661333790288661e-05,
9862
+ "loss": 2.7899,
9863
+ "step": 813000
9864
+ },
9865
+ {
9866
+ "epoch": 11.71,
9867
+ "learning_rate": 1.6561515014617378e-05,
9868
+ "loss": 2.7997,
9869
+ "step": 813500
9870
+ },
9871
+ {
9872
+ "epoch": 11.72,
9873
+ "learning_rate": 1.6509588272864643e-05,
9874
+ "loss": 2.7874,
9875
+ "step": 814000
9876
+ },
9877
+ {
9878
+ "epoch": 11.72,
9879
+ "learning_rate": 1.6457661531111908e-05,
9880
+ "loss": 2.788,
9881
+ "step": 814500
9882
+ },
9883
+ {
9884
+ "epoch": 11.73,
9885
+ "learning_rate": 1.6405734789359173e-05,
9886
+ "loss": 2.7897,
9887
+ "step": 815000
9888
+ },
9889
+ {
9890
+ "epoch": 11.74,
9891
+ "learning_rate": 1.6353911901089942e-05,
9892
+ "loss": 2.7917,
9893
+ "step": 815500
9894
+ },
9895
+ {
9896
+ "epoch": 11.75,
9897
+ "learning_rate": 1.6301985159337207e-05,
9898
+ "loss": 2.7895,
9899
+ "step": 816000
9900
+ },
9901
+ {
9902
+ "epoch": 11.75,
9903
+ "learning_rate": 1.6250058417584475e-05,
9904
+ "loss": 2.7891,
9905
+ "step": 816500
9906
+ },
9907
+ {
9908
+ "epoch": 11.76,
9909
+ "learning_rate": 1.6198131675831737e-05,
9910
+ "loss": 2.7911,
9911
+ "step": 817000
9912
+ },
9913
+ {
9914
+ "epoch": 11.77,
9915
+ "learning_rate": 1.614630878756251e-05,
9916
+ "loss": 2.7892,
9917
+ "step": 817500
9918
+ },
9919
+ {
9920
+ "epoch": 11.77,
9921
+ "learning_rate": 1.609438204580977e-05,
9922
+ "loss": 2.7903,
9923
+ "step": 818000
9924
+ },
9925
+ {
9926
+ "epoch": 11.78,
9927
+ "learning_rate": 1.6042559157540545e-05,
9928
+ "loss": 2.7909,
9929
+ "step": 818500
9930
+ },
9931
+ {
9932
+ "epoch": 11.79,
9933
+ "learning_rate": 1.5990632415787806e-05,
9934
+ "loss": 2.7937,
9935
+ "step": 819000
9936
+ },
9937
+ {
9938
+ "epoch": 11.8,
9939
+ "learning_rate": 1.5938705674035074e-05,
9940
+ "loss": 2.7903,
9941
+ "step": 819500
9942
+ },
9943
+ {
9944
+ "epoch": 11.8,
9945
+ "learning_rate": 1.5886778932282336e-05,
9946
+ "loss": 2.7915,
9947
+ "step": 820000
9948
+ },
9949
+ {
9950
+ "epoch": 11.81,
9951
+ "learning_rate": 1.58348521905296e-05,
9952
+ "loss": 2.7842,
9953
+ "step": 820500
9954
+ },
9955
+ {
9956
+ "epoch": 11.82,
9957
+ "learning_rate": 1.5782925448776866e-05,
9958
+ "loss": 2.7848,
9959
+ "step": 821000
9960
+ },
9961
+ {
9962
+ "epoch": 11.82,
9963
+ "learning_rate": 1.573099870702413e-05,
9964
+ "loss": 2.7886,
9965
+ "step": 821500
9966
+ },
9967
+ {
9968
+ "epoch": 11.83,
9969
+ "learning_rate": 1.5679071965271395e-05,
9970
+ "loss": 2.7883,
9971
+ "step": 822000
9972
+ },
9973
+ {
9974
+ "epoch": 11.84,
9975
+ "learning_rate": 1.562714522351866e-05,
9976
+ "loss": 2.787,
9977
+ "step": 822500
9978
+ },
9979
+ {
9980
+ "epoch": 11.85,
9981
+ "learning_rate": 1.5575218481765925e-05,
9982
+ "loss": 2.7858,
9983
+ "step": 823000
9984
+ },
9985
+ {
9986
+ "epoch": 11.85,
9987
+ "learning_rate": 1.552329174001319e-05,
9988
+ "loss": 2.7867,
9989
+ "step": 823500
9990
+ },
9991
+ {
9992
+ "epoch": 11.86,
9993
+ "learning_rate": 1.5471468851743963e-05,
9994
+ "loss": 2.7886,
9995
+ "step": 824000
9996
+ },
9997
+ {
9998
+ "epoch": 11.87,
9999
+ "learning_rate": 1.5419542109991225e-05,
10000
+ "loss": 2.7878,
10001
+ "step": 824500
10002
+ },
10003
+ {
10004
+ "epoch": 11.88,
10005
+ "learning_rate": 1.536761536823849e-05,
10006
+ "loss": 2.7858,
10007
+ "step": 825000
10008
+ },
10009
+ {
10010
+ "epoch": 11.88,
10011
+ "learning_rate": 1.5315688626485755e-05,
10012
+ "loss": 2.7834,
10013
+ "step": 825500
10014
+ },
10015
+ {
10016
+ "epoch": 11.89,
10017
+ "learning_rate": 1.5263865738216528e-05,
10018
+ "loss": 2.7869,
10019
+ "step": 826000
10020
+ },
10021
+ {
10022
+ "epoch": 11.9,
10023
+ "learning_rate": 1.5211938996463789e-05,
10024
+ "loss": 2.7848,
10025
+ "step": 826500
10026
+ },
10027
+ {
10028
+ "epoch": 11.9,
10029
+ "learning_rate": 1.5160012254711054e-05,
10030
+ "loss": 2.7913,
10031
+ "step": 827000
10032
+ },
10033
+ {
10034
+ "epoch": 11.91,
10035
+ "learning_rate": 1.510808551295832e-05,
10036
+ "loss": 2.7855,
10037
+ "step": 827500
10038
+ },
10039
+ {
10040
+ "epoch": 11.92,
10041
+ "learning_rate": 1.5056158771205584e-05,
10042
+ "loss": 2.7869,
10043
+ "step": 828000
10044
+ },
10045
+ {
10046
+ "epoch": 11.93,
10047
+ "learning_rate": 1.5004335882936355e-05,
10048
+ "loss": 2.7922,
10049
+ "step": 828500
10050
+ },
10051
+ {
10052
+ "epoch": 11.93,
10053
+ "learning_rate": 1.4952409141183618e-05,
10054
+ "loss": 2.7892,
10055
+ "step": 829000
10056
+ },
10057
+ {
10058
+ "epoch": 11.94,
10059
+ "learning_rate": 1.4900482399430885e-05,
10060
+ "loss": 2.7889,
10061
+ "step": 829500
10062
+ },
10063
+ {
10064
+ "epoch": 11.95,
10065
+ "learning_rate": 1.4848555657678148e-05,
10066
+ "loss": 2.7905,
10067
+ "step": 830000
10068
+ },
10069
+ {
10070
+ "epoch": 11.95,
10071
+ "learning_rate": 1.479673276940892e-05,
10072
+ "loss": 2.786,
10073
+ "step": 830500
10074
+ },
10075
+ {
10076
+ "epoch": 11.96,
10077
+ "learning_rate": 1.4744806027656183e-05,
10078
+ "loss": 2.7905,
10079
+ "step": 831000
10080
+ },
10081
+ {
10082
+ "epoch": 11.97,
10083
+ "learning_rate": 1.469287928590345e-05,
10084
+ "loss": 2.7838,
10085
+ "step": 831500
10086
+ },
10087
+ {
10088
+ "epoch": 11.98,
10089
+ "learning_rate": 1.4640952544150712e-05,
10090
+ "loss": 2.786,
10091
+ "step": 832000
10092
+ },
10093
+ {
10094
+ "epoch": 11.98,
10095
+ "learning_rate": 1.4589129655881484e-05,
10096
+ "loss": 2.7855,
10097
+ "step": 832500
10098
+ },
10099
+ {
10100
+ "epoch": 11.99,
10101
+ "learning_rate": 1.4537202914128747e-05,
10102
+ "loss": 2.7836,
10103
+ "step": 833000
10104
+ },
10105
+ {
10106
+ "epoch": 12.0,
10107
+ "learning_rate": 1.4485276172376014e-05,
10108
+ "loss": 2.7894,
10109
+ "step": 833500
10110
+ },
10111
+ {
10112
+ "epoch": 12.0,
10113
+ "eval_accuracy": 0.5055095615843279,
10114
+ "eval_loss": 2.6172478199005127,
10115
+ "eval_runtime": 556.0647,
10116
+ "eval_samples_per_second": 969.198,
10117
+ "eval_steps_per_second": 40.384,
10118
+ "step": 833676
10119
  }
10120
  ],
10121
  "max_steps": 972622,
10122
  "num_train_epochs": 14,
10123
+ "total_flos": 5.126958960674341e+18,
10124
  "trial_name": null,
10125
  "trial_params": null
10126
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf9217f1422e2ba20adfa234e66d641852a206c5630b921ecc80d6c9999bdad2
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c098f75814bf0d67b7b2990383c442d386c5dedd85129e03b179a63a78cf10e
3
  size 118242180
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:691c58d07abd0364109c6890a1a42b4785eb7fdcf6aab10cc7829acc674dcedd
3
- size 251425
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3853a5a36f581057de1289aa6295c5ae41dbb1a1d29e8d6a716b1229b4dd2ec3
3
+ size 273994