Wilsonwin commited on
Commit
2ff0dd3
·
verified ·
1 Parent(s): 3988a03

Training in progress, step 11838, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbe911786fb4b3454d02608c237b36cc20b52333d42fa68272921c094a01a632
3
  size 328277848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:321e47c1dc832a190631f95fac9772b430fc73140d3d5243eca49fa4976c0528
3
  size 328277848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83e156617695722c4ccab8876c70abb964581f51616c0cec63d83f236c2f3130
3
  size 318646859
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2507c13555041b38460e977baf58f0396d26b9357458953b37b5599c0ee7222
3
  size 318646859
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:116f0b85bffdc97adeb264e8dbd65d6acc7d514e82a48ea5ea50bd5091784a48
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b172ef1a2b23540cb3d53eed9b6dcd9ee9e06553bb8c4f5a46142cb0fe60689
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9428957594188208,
6
  "eval_steps": 500,
7
- "global_step": 11500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -8249,6 +8249,237 @@
8249
  "eval_samples_per_second": 246.128,
8250
  "eval_steps_per_second": 5.169,
8251
  "step": 11500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8252
  }
8253
  ],
8254
  "logging_steps": 10,
@@ -8263,12 +8494,12 @@
8263
  "should_evaluate": false,
8264
  "should_log": false,
8265
  "should_save": true,
8266
- "should_training_stop": false
8267
  },
8268
  "attributes": {}
8269
  }
8270
  },
8271
- "total_flos": 3.846232536325816e+17,
8272
  "train_batch_size": 48,
8273
  "trial_name": null,
8274
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
  "eval_steps": 500,
7
+ "global_step": 11838,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
8249
  "eval_samples_per_second": 246.128,
8250
  "eval_steps_per_second": 5.169,
8251
  "step": 11500
8252
+ },
8253
+ {
8254
+ "epoch": 1.9445852339922283,
8255
+ "grad_norm": 0.4352650046348572,
8256
+ "learning_rate": 8.270650415620584e-07,
8257
+ "loss": 4.2965538024902346,
8258
+ "step": 11510
8259
+ },
8260
+ {
8261
+ "epoch": 1.9462747085656362,
8262
+ "grad_norm": 0.43585142493247986,
8263
+ "learning_rate": 7.775944524542055e-07,
8264
+ "loss": 4.270129776000976,
8265
+ "step": 11520
8266
+ },
8267
+ {
8268
+ "epoch": 1.9479641831390437,
8269
+ "grad_norm": 0.4469541907310486,
8270
+ "learning_rate": 7.296455308872406e-07,
8271
+ "loss": 4.283909606933594,
8272
+ "step": 11530
8273
+ },
8274
+ {
8275
+ "epoch": 1.9496536577124515,
8276
+ "grad_norm": 0.4361380636692047,
8277
+ "learning_rate": 6.832187658113441e-07,
8278
+ "loss": 4.296160125732422,
8279
+ "step": 11540
8280
+ },
8281
+ {
8282
+ "epoch": 1.9513431322858592,
8283
+ "grad_norm": 0.44409504532814026,
8284
+ "learning_rate": 6.383146306547626e-07,
8285
+ "loss": 4.304541778564453,
8286
+ "step": 11550
8287
+ },
8288
+ {
8289
+ "epoch": 1.9530326068592667,
8290
+ "grad_norm": 0.45062074065208435,
8291
+ "learning_rate": 5.949335833189628e-07,
8292
+ "loss": 4.3281913757324215,
8293
+ "step": 11560
8294
+ },
8295
+ {
8296
+ "epoch": 1.9547220814326745,
8297
+ "grad_norm": 0.45208507776260376,
8298
+ "learning_rate": 5.530760661741018e-07,
8299
+ "loss": 4.3035846710205075,
8300
+ "step": 11570
8301
+ },
8302
+ {
8303
+ "epoch": 1.9564115560060822,
8304
+ "grad_norm": 0.44333794713020325,
8305
+ "learning_rate": 5.127425060543478e-07,
8306
+ "loss": 4.278887939453125,
8307
+ "step": 11580
8308
+ },
8309
+ {
8310
+ "epoch": 1.9581010305794897,
8311
+ "grad_norm": 0.44367748498916626,
8312
+ "learning_rate": 4.7393331425364943e-07,
8313
+ "loss": 4.281793594360352,
8314
+ "step": 11590
8315
+ },
8316
+ {
8317
+ "epoch": 1.9597905051528974,
8318
+ "grad_norm": 0.4411092698574066,
8319
+ "learning_rate": 4.3664888652144017e-07,
8320
+ "loss": 4.278807067871094,
8321
+ "step": 11600
8322
+ },
8323
+ {
8324
+ "epoch": 1.9614799797263052,
8325
+ "grad_norm": 0.44609910249710083,
8326
+ "learning_rate": 4.008896030587072e-07,
8327
+ "loss": 4.270274353027344,
8328
+ "step": 11610
8329
+ },
8330
+ {
8331
+ "epoch": 1.9631694542997127,
8332
+ "grad_norm": 0.43740522861480713,
8333
+ "learning_rate": 3.6665582851406195e-07,
8334
+ "loss": 4.296014785766602,
8335
+ "step": 11620
8336
+ },
8337
+ {
8338
+ "epoch": 1.9648589288731204,
8339
+ "grad_norm": 0.44448962807655334,
8340
+ "learning_rate": 3.3394791198000927e-07,
8341
+ "loss": 4.282284927368164,
8342
+ "step": 11630
8343
+ },
8344
+ {
8345
+ "epoch": 1.9665484034465281,
8346
+ "grad_norm": 0.45065152645111084,
8347
+ "learning_rate": 3.027661869893672e-07,
8348
+ "loss": 4.2820892333984375,
8349
+ "step": 11640
8350
+ },
8351
+ {
8352
+ "epoch": 1.9682378780199357,
8353
+ "grad_norm": 0.4398045539855957,
8354
+ "learning_rate": 2.731109715119861e-07,
8355
+ "loss": 4.281244277954102,
8356
+ "step": 11650
8357
+ },
8358
+ {
8359
+ "epoch": 1.9699273525933436,
8360
+ "grad_norm": 0.4467960000038147,
8361
+ "learning_rate": 2.4498256795135173e-07,
8362
+ "loss": 4.307322311401367,
8363
+ "step": 11660
8364
+ },
8365
+ {
8366
+ "epoch": 1.9716168271667511,
8367
+ "grad_norm": 0.4327242970466614,
8368
+ "learning_rate": 2.183812631415871e-07,
8369
+ "loss": 4.275672149658203,
8370
+ "step": 11670
8371
+ },
8372
+ {
8373
+ "epoch": 1.9733063017401589,
8374
+ "grad_norm": 0.43306484818458557,
8375
+ "learning_rate": 1.933073283445219e-07,
8376
+ "loss": 4.291437149047852,
8377
+ "step": 11680
8378
+ },
8379
+ {
8380
+ "epoch": 1.9749957763135666,
8381
+ "grad_norm": 0.4464097023010254,
8382
+ "learning_rate": 1.697610192469112e-07,
8383
+ "loss": 4.312542343139649,
8384
+ "step": 11690
8385
+ },
8386
+ {
8387
+ "epoch": 1.976685250886974,
8388
+ "grad_norm": 0.4436480700969696,
8389
+ "learning_rate": 1.4774257595783766e-07,
8390
+ "loss": 4.300673294067383,
8391
+ "step": 11700
8392
+ },
8393
+ {
8394
+ "epoch": 1.9783747254603818,
8395
+ "grad_norm": 0.44450485706329346,
8396
+ "learning_rate": 1.272522230062467e-07,
8397
+ "loss": 4.290340805053711,
8398
+ "step": 11710
8399
+ },
8400
+ {
8401
+ "epoch": 1.9800642000337896,
8402
+ "grad_norm": 0.4362986981868744,
8403
+ "learning_rate": 1.0829016933869838e-07,
8404
+ "loss": 4.2894245147705075,
8405
+ "step": 11720
8406
+ },
8407
+ {
8408
+ "epoch": 1.981753674607197,
8409
+ "grad_norm": 0.43450725078582764,
8410
+ "learning_rate": 9.085660831715247e-08,
8411
+ "loss": 4.298795700073242,
8412
+ "step": 11730
8413
+ },
8414
+ {
8415
+ "epoch": 1.9834431491806048,
8416
+ "grad_norm": 0.44246765971183777,
8417
+ "learning_rate": 7.495171771710328e-08,
8418
+ "loss": 4.293585968017578,
8419
+ "step": 11740
8420
+ },
8421
+ {
8422
+ "epoch": 1.9851326237540126,
8423
+ "grad_norm": 0.43929263949394226,
8424
+ "learning_rate": 6.057565972568123e-08,
8425
+ "loss": 4.293174743652344,
8426
+ "step": 11750
8427
+ },
8428
+ {
8429
+ "epoch": 1.98682209832742,
8430
+ "grad_norm": 0.4450415372848511,
8431
+ "learning_rate": 4.772858094005405e-08,
8432
+ "loss": 4.3004913330078125,
8433
+ "step": 11760
8434
+ },
8435
+ {
8436
+ "epoch": 1.9885115729008278,
8437
+ "grad_norm": 0.4472520053386688,
8438
+ "learning_rate": 3.641061236591136e-08,
8439
+ "loss": 4.2836250305175785,
8440
+ "step": 11770
8441
+ },
8442
+ {
8443
+ "epoch": 1.9902010474742355,
8444
+ "grad_norm": 0.44302183389663696,
8445
+ "learning_rate": 2.6621869416099118e-08,
8446
+ "loss": 4.290175247192383,
8447
+ "step": 11780
8448
+ },
8449
+ {
8450
+ "epoch": 1.991890522047643,
8451
+ "grad_norm": 0.4414844512939453,
8452
+ "learning_rate": 1.8362451909520458e-08,
8453
+ "loss": 4.286873245239258,
8454
+ "step": 11790
8455
+ },
8456
+ {
8457
+ "epoch": 1.993579996621051,
8458
+ "grad_norm": 0.44598934054374695,
8459
+ "learning_rate": 1.16324440700033e-08,
8460
+ "loss": 4.297615051269531,
8461
+ "step": 11800
8462
+ },
8463
+ {
8464
+ "epoch": 1.9952694711944585,
8465
+ "grad_norm": 0.4523853361606598,
8466
+ "learning_rate": 6.431914525567572e-09,
8467
+ "loss": 4.289733505249023,
8468
+ "step": 11810
8469
+ },
8470
+ {
8471
+ "epoch": 1.996958945767866,
8472
+ "grad_norm": 0.4494129419326782,
8473
+ "learning_rate": 2.760916307625871e-09,
8474
+ "loss": 4.304800415039063,
8475
+ "step": 11820
8476
+ },
8477
+ {
8478
+ "epoch": 1.998648420341274,
8479
+ "grad_norm": 0.4344528913497925,
8480
+ "learning_rate": 6.194868504838524e-10,
8481
+ "loss": 4.279055786132813,
8482
+ "step": 11830
8483
  }
8484
  ],
8485
  "logging_steps": 10,
 
8494
  "should_evaluate": false,
8495
  "should_log": false,
8496
  "should_save": true,
8497
+ "should_training_stop": true
8498
  },
8499
  "attributes": {}
8500
  }
8501
  },
8502
+ "total_flos": 3.959258038224814e+17,
8503
  "train_batch_size": 48,
8504
  "trial_name": null,
8505
  "trial_params": null