irishprancer commited on
Commit
f9287f2
·
verified ·
1 Parent(s): 52087f4

Training in progress, step 3600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5ae9b29eb37fb42fbf9eb5c115694ca371fc0e26d4460d51c1e57b3b7f723c9
3
  size 1482788592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56adc46736dfdc18a58910a686e2a779cba708bf6c825f8800fef1e78b71d1bc
3
  size 1482788592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9da6009db3b3db494405891115b35bd636e8f9d306e763d8bed169ece55bbae
3
  size 2897966906
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3deb754b02aaaeec6deedc193d54436001e2674d2c2998d170671b9e84137f76
3
  size 2897966906
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:141d455bf1ffc585ca3b0d4600374ccf796a7b5d1e74d5bec5dfdbb48f5b8730
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06599328356df05dcd7620b70dec8196479b5dd136c51c39b6857ed431a006a3
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22c2119f62c9bb82318a2a55e4166f17944514b5630048a532b95bcfb2f08059
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d555d4e386572ab2daf72a4552c1fc6dfd64262cf0a36378fd375ab0aa6f30eb
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.9126400947570801,
3
- "best_model_checkpoint": "./output/checkpoint-3450",
4
- "epoch": 0.18417680973734785,
5
  "eval_steps": 150,
6
- "global_step": 3450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2606,6 +2606,119 @@
2606
  "eval_samples_per_second": 10.938,
2607
  "eval_steps_per_second": 10.938,
2608
  "step": 3450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2609
  }
2610
  ],
2611
  "logging_steps": 10,
@@ -2625,7 +2738,7 @@
2625
  "attributes": {}
2626
  }
2627
  },
2628
- "total_flos": 9.690903503673754e+17,
2629
  "train_batch_size": 32,
2630
  "trial_name": null,
2631
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9102906584739685,
3
+ "best_model_checkpoint": "./output/checkpoint-3600",
4
+ "epoch": 0.19218449711723254,
5
  "eval_steps": 150,
6
+ "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2606
  "eval_samples_per_second": 10.938,
2607
  "eval_steps_per_second": 10.938,
2608
  "step": 3450
2609
+ },
2610
+ {
2611
+ "epoch": 0.1847106555626735,
2612
+ "grad_norm": 3.9905812740325928,
2613
+ "learning_rate": 5.021124391867238e-06,
2614
+ "loss": 0.9259,
2615
+ "step": 3460
2616
+ },
2617
+ {
2618
+ "epoch": 0.18524450138799914,
2619
+ "grad_norm": 3.8994193077087402,
2620
+ "learning_rate": 4.961427693487651e-06,
2621
+ "loss": 0.9445,
2622
+ "step": 3470
2623
+ },
2624
+ {
2625
+ "epoch": 0.18577834721332478,
2626
+ "grad_norm": 3.8168013095855713,
2627
+ "learning_rate": 4.901986630230546e-06,
2628
+ "loss": 0.9217,
2629
+ "step": 3480
2630
+ },
2631
+ {
2632
+ "epoch": 0.18631219303865043,
2633
+ "grad_norm": 3.907036781311035,
2634
+ "learning_rate": 4.842803645485225e-06,
2635
+ "loss": 0.9357,
2636
+ "step": 3490
2637
+ },
2638
+ {
2639
+ "epoch": 0.18684603886397608,
2640
+ "grad_norm": 3.967823028564453,
2641
+ "learning_rate": 4.783881172032377e-06,
2642
+ "loss": 0.9106,
2643
+ "step": 3500
2644
+ },
2645
+ {
2646
+ "epoch": 0.18737988468930172,
2647
+ "grad_norm": 4.027585506439209,
2648
+ "learning_rate": 4.7252216319441075e-06,
2649
+ "loss": 0.932,
2650
+ "step": 3510
2651
+ },
2652
+ {
2653
+ "epoch": 0.18791373051462737,
2654
+ "grad_norm": 4.006540775299072,
2655
+ "learning_rate": 4.666827436484353e-06,
2656
+ "loss": 0.8955,
2657
+ "step": 3520
2658
+ },
2659
+ {
2660
+ "epoch": 0.18844757633995302,
2661
+ "grad_norm": 3.8036961555480957,
2662
+ "learning_rate": 4.6087009860097786e-06,
2663
+ "loss": 0.9312,
2664
+ "step": 3530
2665
+ },
2666
+ {
2667
+ "epoch": 0.18898142216527866,
2668
+ "grad_norm": 3.7361538410186768,
2669
+ "learning_rate": 4.550844669871094e-06,
2670
+ "loss": 0.9386,
2671
+ "step": 3540
2672
+ },
2673
+ {
2674
+ "epoch": 0.1895152679906043,
2675
+ "grad_norm": 4.001733779907227,
2676
+ "learning_rate": 4.493260866314849e-06,
2677
+ "loss": 0.94,
2678
+ "step": 3550
2679
+ },
2680
+ {
2681
+ "epoch": 0.19004911381592995,
2682
+ "grad_norm": 3.9928550720214844,
2683
+ "learning_rate": 4.435951942385668e-06,
2684
+ "loss": 0.9325,
2685
+ "step": 3560
2686
+ },
2687
+ {
2688
+ "epoch": 0.1905829596412556,
2689
+ "grad_norm": 3.8929483890533447,
2690
+ "learning_rate": 4.378920253828951e-06,
2691
+ "loss": 0.9225,
2692
+ "step": 3570
2693
+ },
2694
+ {
2695
+ "epoch": 0.19111680546658125,
2696
+ "grad_norm": 3.993201971054077,
2697
+ "learning_rate": 4.3221681449940384e-06,
2698
+ "loss": 0.9303,
2699
+ "step": 3580
2700
+ },
2701
+ {
2702
+ "epoch": 0.1916506512919069,
2703
+ "grad_norm": 4.079780101776123,
2704
+ "learning_rate": 4.2656979487378346e-06,
2705
+ "loss": 0.9439,
2706
+ "step": 3590
2707
+ },
2708
+ {
2709
+ "epoch": 0.19218449711723254,
2710
+ "grad_norm": 3.9021401405334473,
2711
+ "learning_rate": 4.2095119863289336e-06,
2712
+ "loss": 0.9037,
2713
+ "step": 3600
2714
+ },
2715
+ {
2716
+ "epoch": 0.19218449711723254,
2717
+ "eval_loss": 0.9102906584739685,
2718
+ "eval_runtime": 45.8489,
2719
+ "eval_samples_per_second": 10.905,
2720
+ "eval_steps_per_second": 10.905,
2721
+ "step": 3600
2722
  }
2723
  ],
2724
  "logging_steps": 10,
 
2738
  "attributes": {}
2739
  }
2740
  },
2741
+ "total_flos": 1.0113807646103962e+18,
2742
  "train_batch_size": 32,
2743
  "trial_name": null,
2744
  "trial_params": null