mohammadmahdinouri commited on
Commit
c9f29c5
·
verified ·
1 Parent(s): c6b76bc

Training in progress, step 76000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a67b2c60c5b42d0ad22d6b38771528b94fc53ceec628d0597d6fa521952a684c
3
  size 304481530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc06a7f089f926af24a4dafd2fc5c68a00957b0501ae37664b3613577e08b3af
3
  size 304481530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6916c88bb66f81e6f1308f6aadeffdb932cc73012f17c967d2f81582f0d6ec4
3
  size 402029570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9513e34a098ccfc4d0eeceb95099c5472b4ff0a71cffb25d876aad974cab2486
3
  size 402029570
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1caa66015d3956d30ec507257de058a8c2fd4bde8e3572a38d393062e23e25fa
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3691082114682896d0f28ee5b4c8f41d4639d4efe6c895c755146048ab7c832
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43374ebce165dffb63c7f0a02b8a1fb69d9d2182c0805086854a706ff35de8db
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c20256e223141e6700101ae515de5a6287d380eaea8a4346e1c56536ce67dcb
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23f5704701def73bff9de54ed2bc9c44e464b4fd7bf79cf9e15b571b97700de5
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5392970d44236e7a431111e07b6640793728800da16c52d461401ef3040338a
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2ec98dcfff897ba38371ec424fd9cb0533d296496a8ad5f5af6ba3e2b631320
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756f7cda01c1bba0353fe356cfd74ccb32f9626ff6708219372ea8a4c1ba35dc
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83d418122fbb0fa369cfecb2f66848d24fc6c35ef433b91965b2ecce9163409e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:178aedfc2920966f379dc01376957ddc00b6df6f84cf67e8abd741361412b63d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.11109860223145246,
6
  "eval_steps": 500,
7
- "global_step": 75000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -26258,6 +26258,356 @@
26258
  "learning_rate": 0.00048160271404356535,
26259
  "loss": 16.4454,
26260
  "step": 75000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26261
  }
26262
  ],
26263
  "logging_steps": 20,
@@ -26277,7 +26627,7 @@
26277
  "attributes": {}
26278
  }
26279
  },
26280
- "total_flos": 5.514317317520595e+19,
26281
  "train_batch_size": 48,
26282
  "trial_name": null,
26283
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.11257991692787182,
6
  "eval_steps": 500,
7
+ "global_step": 76000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
26258
  "learning_rate": 0.00048160271404356535,
26259
  "loss": 16.4454,
26260
  "step": 75000
26261
+ },
26262
+ {
26263
+ "epoch": 0.11112822852538085,
26264
+ "grad_norm": 5.6875,
26265
+ "learning_rate": 0.0004815977751085578,
26266
+ "loss": 16.438,
26267
+ "step": 75020
26268
+ },
26269
+ {
26270
+ "epoch": 0.11115785481930923,
26271
+ "grad_norm": 6.6875,
26272
+ "learning_rate": 0.0004815928361735502,
26273
+ "loss": 16.4092,
26274
+ "step": 75040
26275
+ },
26276
+ {
26277
+ "epoch": 0.11118748111323762,
26278
+ "grad_norm": 6.40625,
26279
+ "learning_rate": 0.0004815878972385427,
26280
+ "loss": 16.364,
26281
+ "step": 75060
26282
+ },
26283
+ {
26284
+ "epoch": 0.111217107407166,
26285
+ "grad_norm": 6.90625,
26286
+ "learning_rate": 0.0004815829583035351,
26287
+ "loss": 16.3725,
26288
+ "step": 75080
26289
+ },
26290
+ {
26291
+ "epoch": 0.11124673370109439,
26292
+ "grad_norm": 7.28125,
26293
+ "learning_rate": 0.00048157801936852754,
26294
+ "loss": 16.4758,
26295
+ "step": 75100
26296
+ },
26297
+ {
26298
+ "epoch": 0.11127635999502278,
26299
+ "grad_norm": 7.03125,
26300
+ "learning_rate": 0.00048157308043352,
26301
+ "loss": 16.3516,
26302
+ "step": 75120
26303
+ },
26304
+ {
26305
+ "epoch": 0.11130598628895116,
26306
+ "grad_norm": 7.375,
26307
+ "learning_rate": 0.00048156814149851243,
26308
+ "loss": 16.4029,
26309
+ "step": 75140
26310
+ },
26311
+ {
26312
+ "epoch": 0.11133561258287955,
26313
+ "grad_norm": 6.40625,
26314
+ "learning_rate": 0.0004815632025635048,
26315
+ "loss": 16.4283,
26316
+ "step": 75160
26317
+ },
26318
+ {
26319
+ "epoch": 0.11136523887680795,
26320
+ "grad_norm": 6.40625,
26321
+ "learning_rate": 0.0004815582636284973,
26322
+ "loss": 16.3905,
26323
+ "step": 75180
26324
+ },
26325
+ {
26326
+ "epoch": 0.11139486517073634,
26327
+ "grad_norm": 7.875,
26328
+ "learning_rate": 0.0004815533246934897,
26329
+ "loss": 16.3731,
26330
+ "step": 75200
26331
+ },
26332
+ {
26333
+ "epoch": 0.11142449146466472,
26334
+ "grad_norm": 6.1875,
26335
+ "learning_rate": 0.00048154838575848217,
26336
+ "loss": 16.4243,
26337
+ "step": 75220
26338
+ },
26339
+ {
26340
+ "epoch": 0.11145411775859311,
26341
+ "grad_norm": 7.28125,
26342
+ "learning_rate": 0.00048154344682347456,
26343
+ "loss": 16.3272,
26344
+ "step": 75240
26345
+ },
26346
+ {
26347
+ "epoch": 0.1114837440525215,
26348
+ "grad_norm": 7.125,
26349
+ "learning_rate": 0.00048153850788846696,
26350
+ "loss": 16.3074,
26351
+ "step": 75260
26352
+ },
26353
+ {
26354
+ "epoch": 0.11151337034644988,
26355
+ "grad_norm": 7.78125,
26356
+ "learning_rate": 0.00048153356895345946,
26357
+ "loss": 16.4066,
26358
+ "step": 75280
26359
+ },
26360
+ {
26361
+ "epoch": 0.11154299664037827,
26362
+ "grad_norm": 7.125,
26363
+ "learning_rate": 0.00048152863001845185,
26364
+ "loss": 16.3641,
26365
+ "step": 75300
26366
+ },
26367
+ {
26368
+ "epoch": 0.11157262293430666,
26369
+ "grad_norm": 7.0625,
26370
+ "learning_rate": 0.0004815236910834443,
26371
+ "loss": 16.3674,
26372
+ "step": 75320
26373
+ },
26374
+ {
26375
+ "epoch": 0.11160224922823504,
26376
+ "grad_norm": 7.03125,
26377
+ "learning_rate": 0.0004815187521484367,
26378
+ "loss": 16.3299,
26379
+ "step": 75340
26380
+ },
26381
+ {
26382
+ "epoch": 0.11163187552216343,
26383
+ "grad_norm": 6.65625,
26384
+ "learning_rate": 0.0004815138132134292,
26385
+ "loss": 16.4177,
26386
+ "step": 75360
26387
+ },
26388
+ {
26389
+ "epoch": 0.11166150181609182,
26390
+ "grad_norm": 7.1875,
26391
+ "learning_rate": 0.0004815088742784216,
26392
+ "loss": 16.3523,
26393
+ "step": 75380
26394
+ },
26395
+ {
26396
+ "epoch": 0.1116911281100202,
26397
+ "grad_norm": 7.125,
26398
+ "learning_rate": 0.00048150393534341404,
26399
+ "loss": 16.3449,
26400
+ "step": 75400
26401
+ },
26402
+ {
26403
+ "epoch": 0.11172075440394859,
26404
+ "grad_norm": 6.34375,
26405
+ "learning_rate": 0.0004814989964084065,
26406
+ "loss": 16.3556,
26407
+ "step": 75420
26408
+ },
26409
+ {
26410
+ "epoch": 0.11175038069787697,
26411
+ "grad_norm": 7.28125,
26412
+ "learning_rate": 0.00048149405747339893,
26413
+ "loss": 16.4188,
26414
+ "step": 75440
26415
+ },
26416
+ {
26417
+ "epoch": 0.11178000699180536,
26418
+ "grad_norm": 6.65625,
26419
+ "learning_rate": 0.0004814891185383913,
26420
+ "loss": 16.3877,
26421
+ "step": 75460
26422
+ },
26423
+ {
26424
+ "epoch": 0.11180963328573375,
26425
+ "grad_norm": 6.71875,
26426
+ "learning_rate": 0.0004814841796033838,
26427
+ "loss": 16.3696,
26428
+ "step": 75480
26429
+ },
26430
+ {
26431
+ "epoch": 0.11183925957966215,
26432
+ "grad_norm": 6.8125,
26433
+ "learning_rate": 0.0004814792406683762,
26434
+ "loss": 16.3543,
26435
+ "step": 75500
26436
+ },
26437
+ {
26438
+ "epoch": 0.11186888587359053,
26439
+ "grad_norm": 7.21875,
26440
+ "learning_rate": 0.00048147430173336867,
26441
+ "loss": 16.4576,
26442
+ "step": 75520
26443
+ },
26444
+ {
26445
+ "epoch": 0.11189851216751892,
26446
+ "grad_norm": 6.625,
26447
+ "learning_rate": 0.00048146936279836106,
26448
+ "loss": 16.3673,
26449
+ "step": 75540
26450
+ },
26451
+ {
26452
+ "epoch": 0.11192813846144731,
26453
+ "grad_norm": 7.25,
26454
+ "learning_rate": 0.0004814644238633535,
26455
+ "loss": 16.4263,
26456
+ "step": 75560
26457
+ },
26458
+ {
26459
+ "epoch": 0.1119577647553757,
26460
+ "grad_norm": 6.75,
26461
+ "learning_rate": 0.00048145948492834596,
26462
+ "loss": 16.3437,
26463
+ "step": 75580
26464
+ },
26465
+ {
26466
+ "epoch": 0.11198739104930408,
26467
+ "grad_norm": 6.21875,
26468
+ "learning_rate": 0.00048145454599333835,
26469
+ "loss": 16.3618,
26470
+ "step": 75600
26471
+ },
26472
+ {
26473
+ "epoch": 0.11201701734323247,
26474
+ "grad_norm": 6.71875,
26475
+ "learning_rate": 0.0004814496070583308,
26476
+ "loss": 16.3418,
26477
+ "step": 75620
26478
+ },
26479
+ {
26480
+ "epoch": 0.11204664363716085,
26481
+ "grad_norm": 6.90625,
26482
+ "learning_rate": 0.0004814446681233232,
26483
+ "loss": 16.3916,
26484
+ "step": 75640
26485
+ },
26486
+ {
26487
+ "epoch": 0.11207626993108924,
26488
+ "grad_norm": 7.21875,
26489
+ "learning_rate": 0.0004814397291883157,
26490
+ "loss": 16.3521,
26491
+ "step": 75660
26492
+ },
26493
+ {
26494
+ "epoch": 0.11210589622501763,
26495
+ "grad_norm": 6.84375,
26496
+ "learning_rate": 0.0004814347902533081,
26497
+ "loss": 16.3464,
26498
+ "step": 75680
26499
+ },
26500
+ {
26501
+ "epoch": 0.11213552251894601,
26502
+ "grad_norm": 6.40625,
26503
+ "learning_rate": 0.00048142985131830054,
26504
+ "loss": 16.3965,
26505
+ "step": 75700
26506
+ },
26507
+ {
26508
+ "epoch": 0.1121651488128744,
26509
+ "grad_norm": 6.53125,
26510
+ "learning_rate": 0.000481424912383293,
26511
+ "loss": 16.379,
26512
+ "step": 75720
26513
+ },
26514
+ {
26515
+ "epoch": 0.11219477510680279,
26516
+ "grad_norm": 6.375,
26517
+ "learning_rate": 0.00048141997344828543,
26518
+ "loss": 16.3858,
26519
+ "step": 75740
26520
+ },
26521
+ {
26522
+ "epoch": 0.11222440140073117,
26523
+ "grad_norm": 6.75,
26524
+ "learning_rate": 0.0004814150345132778,
26525
+ "loss": 16.3445,
26526
+ "step": 75760
26527
+ },
26528
+ {
26529
+ "epoch": 0.11225402769465956,
26530
+ "grad_norm": 6.34375,
26531
+ "learning_rate": 0.0004814100955782703,
26532
+ "loss": 16.2839,
26533
+ "step": 75780
26534
+ },
26535
+ {
26536
+ "epoch": 0.11228365398858794,
26537
+ "grad_norm": 6.15625,
26538
+ "learning_rate": 0.0004814051566432627,
26539
+ "loss": 16.3684,
26540
+ "step": 75800
26541
+ },
26542
+ {
26543
+ "epoch": 0.11231328028251635,
26544
+ "grad_norm": 7.375,
26545
+ "learning_rate": 0.00048140021770825517,
26546
+ "loss": 16.3344,
26547
+ "step": 75820
26548
+ },
26549
+ {
26550
+ "epoch": 0.11234290657644473,
26551
+ "grad_norm": 7.0625,
26552
+ "learning_rate": 0.00048139527877324756,
26553
+ "loss": 16.3251,
26554
+ "step": 75840
26555
+ },
26556
+ {
26557
+ "epoch": 0.11237253287037312,
26558
+ "grad_norm": 7.1875,
26559
+ "learning_rate": 0.00048139033983824,
26560
+ "loss": 16.3266,
26561
+ "step": 75860
26562
+ },
26563
+ {
26564
+ "epoch": 0.1124021591643015,
26565
+ "grad_norm": 6.5625,
26566
+ "learning_rate": 0.00048138540090323246,
26567
+ "loss": 16.3339,
26568
+ "step": 75880
26569
+ },
26570
+ {
26571
+ "epoch": 0.11243178545822989,
26572
+ "grad_norm": 7.59375,
26573
+ "learning_rate": 0.0004813804619682249,
26574
+ "loss": 16.3593,
26575
+ "step": 75900
26576
+ },
26577
+ {
26578
+ "epoch": 0.11246141175215828,
26579
+ "grad_norm": 6.1875,
26580
+ "learning_rate": 0.0004813755230332173,
26581
+ "loss": 16.3293,
26582
+ "step": 75920
26583
+ },
26584
+ {
26585
+ "epoch": 0.11249103804608666,
26586
+ "grad_norm": 7.3125,
26587
+ "learning_rate": 0.0004813705840982097,
26588
+ "loss": 16.3553,
26589
+ "step": 75940
26590
+ },
26591
+ {
26592
+ "epoch": 0.11252066434001505,
26593
+ "grad_norm": 6.9375,
26594
+ "learning_rate": 0.0004813656451632022,
26595
+ "loss": 16.375,
26596
+ "step": 75960
26597
+ },
26598
+ {
26599
+ "epoch": 0.11255029063394344,
26600
+ "grad_norm": 6.65625,
26601
+ "learning_rate": 0.0004813607062281946,
26602
+ "loss": 16.3077,
26603
+ "step": 75980
26604
+ },
26605
+ {
26606
+ "epoch": 0.11257991692787182,
26607
+ "grad_norm": 7.25,
26608
+ "learning_rate": 0.00048135576729318704,
26609
+ "loss": 16.3442,
26610
+ "step": 76000
26611
  }
26612
  ],
26613
  "logging_steps": 20,
 
26627
  "attributes": {}
26628
  }
26629
  },
26630
+ "total_flos": 5.587853722915124e+19,
26631
  "train_batch_size": 48,
26632
  "trial_name": null,
26633
  "trial_params": null