FormlessAI commited on
Commit
2641d98
·
verified ·
1 Parent(s): a5216cc

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f79687bb9f5f0706366f2c8be19c15c67471263d18f90d20b5060477db9fc88
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67f14b217337322ca15f39ff83faddad85eaa2b699e839156e4b5a8f9a547f46
3
  size 1037269336
last-checkpoint/global_step5550/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c87cdd188b7d8338c5735456cd0b5d5f76da1ae45c65ae64203c43bb2cc5cbb
3
+ size 781993445
last-checkpoint/global_step5550/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd4e113b5fb8155b35a8a0c4e6fc10ed855790f08ce29e733f8d66fda5af32d
3
+ size 781993509
last-checkpoint/global_step5550/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae567e275bbee69ba582162f258fe56cae2d7f50794f0cc5549cee9c1092a4b0
3
+ size 781993509
last-checkpoint/global_step5550/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd59427a0336d2178aac48cec15bb26146284b95ac46e9b37d8119c87145dc57
3
+ size 781993509
last-checkpoint/global_step5550/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67e00afbb6ddc69255a0727f2ff587593f7d4cec424b3a448da9f5de9671f71e
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step5450
 
1
+ global_step5550
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09517ccbebacf6bd023c1ab9d33afd5ec868b9be2770425bd6ebefa3839d5f4d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c363c9d6409891b8a0c216d60fd16b304f275f82249966d3bc42689f8ffeca4
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d62fb500bd7f639c86a4805d99914de20d8c185a99a488bb6ea36449fa573a0
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12da4effb035daeb8df40f07293059c0cd2a4fed6029443b6a20828e64db1c2a
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:763a3f897c1e33a97ed5b1f4dd7ab1bdca39ada5f60f258f0e9cd8f218878aaa
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e949608ee1f454e74a01438a4b477930ebee2355ab0167ac452e85c9078851f2
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:074a5f66e9ddd88b37d69172a271f48d50878d6d7b7fdbdb1735f35f2e0a2b15
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a78f5e77cc25088c5130959b2bf3c1ee05d44c7e4aef6524adc45fb65662182
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9aab12b223f7247afaaf46de482c72204945729f45b93a867c6ad025ed23f245
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a52d6724c4ae1e5cf5a59cbc21dfbb5e7c37003b46ec99ab94837465b1b4c4d
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 1.8068690299987793,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7922663177787469,
6
  "eval_steps": 50,
7
- "global_step": 5450,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -8510,6 +8510,162 @@
8510
  "eval_samples_per_second": 172.204,
8511
  "eval_steps_per_second": 10.799,
8512
  "step": 5450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8513
  }
8514
  ],
8515
  "logging_steps": 5,
@@ -8538,7 +8694,7 @@
8538
  "attributes": {}
8539
  }
8540
  },
8541
- "total_flos": 1.4208114924453888e+18,
8542
  "train_batch_size": 4,
8543
  "trial_name": null,
8544
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 1.7976654767990112,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8068033144352377,
6
  "eval_steps": 50,
7
+ "global_step": 5550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
8510
  "eval_samples_per_second": 172.204,
8511
  "eval_steps_per_second": 10.799,
8512
  "step": 5450
8513
+ },
8514
+ {
8515
+ "epoch": 0.7929931676115715,
8516
+ "grad_norm": 2.3272275924682617,
8517
+ "learning_rate": 4.369321767175934e-05,
8518
+ "loss": 1.8659,
8519
+ "step": 5455
8520
+ },
8521
+ {
8522
+ "epoch": 0.793720017444396,
8523
+ "grad_norm": 2.3955276012420654,
8524
+ "learning_rate": 4.3614388885420454e-05,
8525
+ "loss": 1.9935,
8526
+ "step": 5460
8527
+ },
8528
+ {
8529
+ "epoch": 0.7944468672772206,
8530
+ "grad_norm": 2.2696611881256104,
8531
+ "learning_rate": 4.353557642249021e-05,
8532
+ "loss": 2.107,
8533
+ "step": 5465
8534
+ },
8535
+ {
8536
+ "epoch": 0.795173717110045,
8537
+ "grad_norm": 2.432871103286743,
8538
+ "learning_rate": 4.345678048137896e-05,
8539
+ "loss": 2.1197,
8540
+ "step": 5470
8541
+ },
8542
+ {
8543
+ "epoch": 0.7959005669428696,
8544
+ "grad_norm": 2.0708091259002686,
8545
+ "learning_rate": 4.337800126045534e-05,
8546
+ "loss": 2.096,
8547
+ "step": 5475
8548
+ },
8549
+ {
8550
+ "epoch": 0.7966274167756942,
8551
+ "grad_norm": 2.519308567047119,
8552
+ "learning_rate": 4.3299238958045964e-05,
8553
+ "loss": 1.914,
8554
+ "step": 5480
8555
+ },
8556
+ {
8557
+ "epoch": 0.7973542666085187,
8558
+ "grad_norm": 2.087770938873291,
8559
+ "learning_rate": 4.3220493772434835e-05,
8560
+ "loss": 1.9734,
8561
+ "step": 5485
8562
+ },
8563
+ {
8564
+ "epoch": 0.7980811164413432,
8565
+ "grad_norm": 2.3013477325439453,
8566
+ "learning_rate": 4.314176590186285e-05,
8567
+ "loss": 2.0952,
8568
+ "step": 5490
8569
+ },
8570
+ {
8571
+ "epoch": 0.7988079662741677,
8572
+ "grad_norm": 2.3956501483917236,
8573
+ "learning_rate": 4.306305554452735e-05,
8574
+ "loss": 2.1661,
8575
+ "step": 5495
8576
+ },
8577
+ {
8578
+ "epoch": 0.7995348161069923,
8579
+ "grad_norm": 2.290743827819824,
8580
+ "learning_rate": 4.298436289858153e-05,
8581
+ "loss": 1.9764,
8582
+ "step": 5500
8583
+ },
8584
+ {
8585
+ "epoch": 0.7995348161069923,
8586
+ "eval_loss": 1.8048888444900513,
8587
+ "eval_runtime": 20.7967,
8588
+ "eval_samples_per_second": 158.727,
8589
+ "eval_steps_per_second": 9.953,
8590
+ "step": 5500
8591
+ },
8592
+ {
8593
+ "epoch": 0.8002616659398168,
8594
+ "grad_norm": 2.141601324081421,
8595
+ "learning_rate": 4.2905688162134085e-05,
8596
+ "loss": 1.8667,
8597
+ "step": 5505
8598
+ },
8599
+ {
8600
+ "epoch": 0.8009885157726414,
8601
+ "grad_norm": 2.3627877235412598,
8602
+ "learning_rate": 4.2827031533248535e-05,
8603
+ "loss": 2.1887,
8604
+ "step": 5510
8605
+ },
8606
+ {
8607
+ "epoch": 0.8017153656054659,
8608
+ "grad_norm": 2.5023484230041504,
8609
+ "learning_rate": 4.2748393209942855e-05,
8610
+ "loss": 2.13,
8611
+ "step": 5515
8612
+ },
8613
+ {
8614
+ "epoch": 0.8024422154382904,
8615
+ "grad_norm": 2.540010690689087,
8616
+ "learning_rate": 4.266977339018893e-05,
8617
+ "loss": 2.1042,
8618
+ "step": 5520
8619
+ },
8620
+ {
8621
+ "epoch": 0.803169065271115,
8622
+ "grad_norm": 2.3447046279907227,
8623
+ "learning_rate": 4.259117227191208e-05,
8624
+ "loss": 2.1636,
8625
+ "step": 5525
8626
+ },
8627
+ {
8628
+ "epoch": 0.8038959151039395,
8629
+ "grad_norm": 2.090090751647949,
8630
+ "learning_rate": 4.251259005299049e-05,
8631
+ "loss": 1.8241,
8632
+ "step": 5530
8633
+ },
8634
+ {
8635
+ "epoch": 0.8046227649367641,
8636
+ "grad_norm": 2.4929826259613037,
8637
+ "learning_rate": 4.243402693125484e-05,
8638
+ "loss": 2.0696,
8639
+ "step": 5535
8640
+ },
8641
+ {
8642
+ "epoch": 0.8053496147695886,
8643
+ "grad_norm": 1.9764723777770996,
8644
+ "learning_rate": 4.235548310448767e-05,
8645
+ "loss": 1.9418,
8646
+ "step": 5540
8647
+ },
8648
+ {
8649
+ "epoch": 0.8060764646024131,
8650
+ "grad_norm": 2.151935338973999,
8651
+ "learning_rate": 4.2276958770423e-05,
8652
+ "loss": 1.9833,
8653
+ "step": 5545
8654
+ },
8655
+ {
8656
+ "epoch": 0.8068033144352377,
8657
+ "grad_norm": 2.3030054569244385,
8658
+ "learning_rate": 4.2198454126745694e-05,
8659
+ "loss": 2.0953,
8660
+ "step": 5550
8661
+ },
8662
+ {
8663
+ "epoch": 0.8068033144352377,
8664
+ "eval_loss": 1.7976654767990112,
8665
+ "eval_runtime": 19.0973,
8666
+ "eval_samples_per_second": 172.852,
8667
+ "eval_steps_per_second": 10.839,
8668
+ "step": 5550
8669
  }
8670
  ],
8671
  "logging_steps": 5,
 
8694
  "attributes": {}
8695
  }
8696
  },
8697
+ "total_flos": 1.4465642591598674e+18,
8698
  "train_batch_size": 4,
8699
  "trial_name": null,
8700
  "trial_params": null