FormlessAI commited on
Commit
0015fa0
·
verified ·
1 Parent(s): eef38eb

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1500999ffdde982cb75dcaac931db3ae514ebeccf285ed4d6ce30206824f84e1
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f79687bb9f5f0706366f2c8be19c15c67471263d18f90d20b5060477db9fc88
3
  size 1037269336
last-checkpoint/global_step5450/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f78bf83e85325413df0f2bbacc777613cf49c82d2596e1692688e54d70978f84
3
+ size 781993445
last-checkpoint/global_step5450/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7163f01fcaee0f145ef1c75eff4454c619bcde5defdbee79ee575da430be5511
3
+ size 781993509
last-checkpoint/global_step5450/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a871b3a94e17f81203820fec74d78f54c3ec465668d40c6c5bdb43c82c175d
3
+ size 781993509
last-checkpoint/global_step5450/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b6c79cb7ac7b4c8bd9401c92c69b89682dbdaf4e12f4225e20205f83e52d77c
3
+ size 781993509
last-checkpoint/global_step5450/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37adf013650b50908706c5990607180032ad85ba97a40654a79dfd22b7b740e5
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5350
 
1
+ global_step5450
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:704aa408c70df7490b4abe5a20c7e5618fa72a104a517eb0305fcdbf58fc3623
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09517ccbebacf6bd023c1ab9d33afd5ec868b9be2770425bd6ebefa3839d5f4d
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd995fd505f81fa381a72edaafcd078b5a1afa9f4db01b8612d064a32d1c8579
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d62fb500bd7f639c86a4805d99914de20d8c185a99a488bb6ea36449fa573a0
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cb3928ede9d05ff204c0b223ef4aaf387da2b05e6ef0c832d041064f2b83a74
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:763a3f897c1e33a97ed5b1f4dd7ab1bdca39ada5f60f258f0e9cd8f218878aaa
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:660ed3228c426f6f1703fda8186c724de56c8eb1cc8b587eacd8b1ec37ef97c3
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:074a5f66e9ddd88b37d69172a271f48d50878d6d7b7fdbdb1735f35f2e0a2b15
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d046f42dd649480d7077992c7a5dfc15f777cb5e409b629b7885023f2c1d52e
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aab12b223f7247afaaf46de482c72204945729f45b93a867c6ad025ed23f245
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.81765878200531,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7777293211222561,
6
  "eval_steps": 50,
7
- "global_step": 5350,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -8354,6 +8354,162 @@
8354
  "eval_samples_per_second": 172.536,
8355
  "eval_steps_per_second": 10.819,
8356
  "step": 5350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8357
  }
8358
  ],
8359
  "logging_steps": 5,
@@ -8382,7 +8538,7 @@
8382
  "attributes": {}
8383
  }
8384
  },
8385
- "total_flos": 1.394682414789296e+18,
8386
  "train_batch_size": 4,
8387
  "trial_name": null,
8388
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.8068690299987793,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7922663177787469,
6
  "eval_steps": 50,
7
+ "global_step": 5450,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
8354
  "eval_samples_per_second": 172.536,
8355
  "eval_steps_per_second": 10.819,
8356
  "step": 5350
8357
+ },
8358
+ {
8359
+ "epoch": 0.7784561709550807,
8360
+ "grad_norm": 2.200019598007202,
8361
+ "learning_rate": 4.5272915415458025e-05,
8362
+ "loss": 1.8298,
8363
+ "step": 5355
8364
+ },
8365
+ {
8366
+ "epoch": 0.7791830207879052,
8367
+ "grad_norm": 2.1728880405426025,
8368
+ "learning_rate": 4.5193801886732545e-05,
8369
+ "loss": 1.8708,
8370
+ "step": 5360
8371
+ },
8372
+ {
8373
+ "epoch": 0.7799098706207298,
8374
+ "grad_norm": 2.5079801082611084,
8375
+ "learning_rate": 4.511470070524542e-05,
8376
+ "loss": 2.0338,
8377
+ "step": 5365
8378
+ },
8379
+ {
8380
+ "epoch": 0.7806367204535543,
8381
+ "grad_norm": 2.2738819122314453,
8382
+ "learning_rate": 4.5035612070133724e-05,
8383
+ "loss": 2.0264,
8384
+ "step": 5370
8385
+ },
8386
+ {
8387
+ "epoch": 0.7813635702863788,
8388
+ "grad_norm": 2.4879889488220215,
8389
+ "learning_rate": 4.495653618050305e-05,
8390
+ "loss": 2.0748,
8391
+ "step": 5375
8392
+ },
8393
+ {
8394
+ "epoch": 0.7820904201192034,
8395
+ "grad_norm": 2.4474239349365234,
8396
+ "learning_rate": 4.487747323542682e-05,
8397
+ "loss": 2.1098,
8398
+ "step": 5380
8399
+ },
8400
+ {
8401
+ "epoch": 0.7828172699520279,
8402
+ "grad_norm": 2.3970248699188232,
8403
+ "learning_rate": 4.4798423433945934e-05,
8404
+ "loss": 1.9963,
8405
+ "step": 5385
8406
+ },
8407
+ {
8408
+ "epoch": 0.7835441197848525,
8409
+ "grad_norm": 2.433213472366333,
8410
+ "learning_rate": 4.4719386975068136e-05,
8411
+ "loss": 1.8989,
8412
+ "step": 5390
8413
+ },
8414
+ {
8415
+ "epoch": 0.784270969617677,
8416
+ "grad_norm": 2.2355504035949707,
8417
+ "learning_rate": 4.464036405776766e-05,
8418
+ "loss": 1.9435,
8419
+ "step": 5395
8420
+ },
8421
+ {
8422
+ "epoch": 0.7849978194505015,
8423
+ "grad_norm": 2.4685556888580322,
8424
+ "learning_rate": 4.45613548809846e-05,
8425
+ "loss": 2.0392,
8426
+ "step": 5400
8427
+ },
8428
+ {
8429
+ "epoch": 0.7849978194505015,
8430
+ "eval_loss": 1.8167221546173096,
8431
+ "eval_runtime": 21.1854,
8432
+ "eval_samples_per_second": 155.815,
8433
+ "eval_steps_per_second": 9.771,
8434
+ "step": 5400
8435
+ },
8436
+ {
8437
+ "epoch": 0.7857246692833261,
8438
+ "grad_norm": 2.497147560119629,
8439
+ "learning_rate": 4.4482359643624416e-05,
8440
+ "loss": 2.0125,
8441
+ "step": 5405
8442
+ },
8443
+ {
8444
+ "epoch": 0.7864515191161506,
8445
+ "grad_norm": 2.2153327465057373,
8446
+ "learning_rate": 4.440337854455758e-05,
8447
+ "loss": 1.9912,
8448
+ "step": 5410
8449
+ },
8450
+ {
8451
+ "epoch": 0.7871783689489752,
8452
+ "grad_norm": 2.377063751220703,
8453
+ "learning_rate": 4.4324411782618886e-05,
8454
+ "loss": 2.0526,
8455
+ "step": 5415
8456
+ },
8457
+ {
8458
+ "epoch": 0.7879052187817996,
8459
+ "grad_norm": 2.868448257446289,
8460
+ "learning_rate": 4.424545955660708e-05,
8461
+ "loss": 2.0737,
8462
+ "step": 5420
8463
+ },
8464
+ {
8465
+ "epoch": 0.7886320686146242,
8466
+ "grad_norm": 2.2389824390411377,
8467
+ "learning_rate": 4.416652206528426e-05,
8468
+ "loss": 2.0863,
8469
+ "step": 5425
8470
+ },
8471
+ {
8472
+ "epoch": 0.7893589184474488,
8473
+ "grad_norm": 2.287515878677368,
8474
+ "learning_rate": 4.4087599507375526e-05,
8475
+ "loss": 2.1026,
8476
+ "step": 5430
8477
+ },
8478
+ {
8479
+ "epoch": 0.7900857682802733,
8480
+ "grad_norm": 2.3255653381347656,
8481
+ "learning_rate": 4.4008692081568266e-05,
8482
+ "loss": 1.9035,
8483
+ "step": 5435
8484
+ },
8485
+ {
8486
+ "epoch": 0.7908126181130979,
8487
+ "grad_norm": 2.2820541858673096,
8488
+ "learning_rate": 4.3929799986511875e-05,
8489
+ "loss": 2.0517,
8490
+ "step": 5440
8491
+ },
8492
+ {
8493
+ "epoch": 0.7915394679459223,
8494
+ "grad_norm": 2.6271772384643555,
8495
+ "learning_rate": 4.3850923420817075e-05,
8496
+ "loss": 2.0402,
8497
+ "step": 5445
8498
+ },
8499
+ {
8500
+ "epoch": 0.7922663177787469,
8501
+ "grad_norm": 2.2059221267700195,
8502
+ "learning_rate": 4.3772062583055546e-05,
8503
+ "loss": 2.1433,
8504
+ "step": 5450
8505
+ },
8506
+ {
8507
+ "epoch": 0.7922663177787469,
8508
+ "eval_loss": 1.8068690299987793,
8509
+ "eval_runtime": 19.1691,
8510
+ "eval_samples_per_second": 172.204,
8511
+ "eval_steps_per_second": 10.799,
8512
+ "step": 5450
8513
  }
8514
  ],
8515
  "logging_steps": 5,
 
8538
  "attributes": {}
8539
  }
8540
  },
8541
+ "total_flos": 1.4208114924453888e+18,
8542
  "train_batch_size": 4,
8543
  "trial_name": null,
8544
  "trial_params": null