error577 commited on
Commit
292c7b5
·
verified ·
1 Parent(s): 6a5c8eb

Training in progress, step 380, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0ec5ec60bddcdb7cd4495c196d4c736d2acc5422a89cead9c05237947944583
3
  size 838906392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee5fba5b390fc66cb1e3f32ce959e31a91eb2761e2b006e4624b977adaa6779
3
  size 838906392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9b36d851f6c92c79d874462b221f2fb965d98ad5f1628c620d0acb8014d249a
3
  size 639365221
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a54a6c36243501fd43d201d1a8f3acc70b557a919ecd49d532133252e27fc165
3
  size 639365221
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d0c1565ab0ac65993469070bc840463dcbb0cba12990df8bdd934c33cb2d7ab
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2a40a2f0b1c0334b5def0fdf5c80618a0bea05d5e20573d789bb7328ef2eae4
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6263112efb882a4c9ac90f3e55cc6def45a59dd01f17967ec210c01a6df4033
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d56bff5d768bcbc6b1abc460eaac5a172bfe59f1beb9abb05741a0af1e0d811
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 360,
3
- "best_metric": 0.3793332874774933,
4
- "best_model_checkpoint": "miner_id_24/checkpoint-360",
5
- "epoch": 0.6380150642445724,
6
  "eval_steps": 20,
7
- "global_step": 360,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2680,6 +2680,154 @@
2680
  "eval_samples_per_second": 3.474,
2681
  "eval_steps_per_second": 0.906,
2682
  "step": 360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2683
  }
2684
  ],
2685
  "logging_steps": 1,
@@ -2708,7 +2856,7 @@
2708
  "attributes": {}
2709
  }
2710
  },
2711
- "total_flos": 9.165376189366272e+17,
2712
  "train_batch_size": 4,
2713
  "trial_name": null,
2714
  "trial_params": null
 
1
  {
2
+ "best_global_step": 380,
3
+ "best_metric": 0.3527662754058838,
4
+ "best_model_checkpoint": "miner_id_24/checkpoint-380",
5
+ "epoch": 0.6734603455914931,
6
  "eval_steps": 20,
7
+ "global_step": 380,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2680
  "eval_samples_per_second": 3.474,
2681
  "eval_steps_per_second": 0.906,
2682
  "step": 360
2683
+ },
2684
+ {
2685
+ "epoch": 0.6397873283119184,
2686
+ "grad_norm": 0.19102731347084045,
2687
+ "learning_rate": 0.00017938268614966994,
2688
+ "loss": 0.2565,
2689
+ "step": 361
2690
+ },
2691
+ {
2692
+ "epoch": 0.6415595923792645,
2693
+ "grad_norm": 0.27359429001808167,
2694
+ "learning_rate": 0.00017926896034306332,
2695
+ "loss": 0.3371,
2696
+ "step": 362
2697
+ },
2698
+ {
2699
+ "epoch": 0.6433318564466105,
2700
+ "grad_norm": 0.1932365894317627,
2701
+ "learning_rate": 0.00017915495800101594,
2702
+ "loss": 0.2547,
2703
+ "step": 363
2704
+ },
2705
+ {
2706
+ "epoch": 0.6451041205139566,
2707
+ "grad_norm": 0.19281119108200073,
2708
+ "learning_rate": 0.00017904067952123303,
2709
+ "loss": 0.218,
2710
+ "step": 364
2711
+ },
2712
+ {
2713
+ "epoch": 0.6468763845813026,
2714
+ "grad_norm": 0.3079637289047241,
2715
+ "learning_rate": 0.00017892612530238334,
2716
+ "loss": 0.4615,
2717
+ "step": 365
2718
+ },
2719
+ {
2720
+ "epoch": 0.6486486486486487,
2721
+ "grad_norm": 0.23658387362957,
2722
+ "learning_rate": 0.0001788112957440974,
2723
+ "loss": 0.4055,
2724
+ "step": 366
2725
+ },
2726
+ {
2727
+ "epoch": 0.6504209127159947,
2728
+ "grad_norm": 0.27093634009361267,
2729
+ "learning_rate": 0.00017869619124696634,
2730
+ "loss": 0.409,
2731
+ "step": 367
2732
+ },
2733
+ {
2734
+ "epoch": 0.6521931767833408,
2735
+ "grad_norm": 0.23031426966190338,
2736
+ "learning_rate": 0.00017858081221254048,
2737
+ "loss": 0.3424,
2738
+ "step": 368
2739
+ },
2740
+ {
2741
+ "epoch": 0.6539654408506868,
2742
+ "grad_norm": 0.22337500751018524,
2743
+ "learning_rate": 0.00017846515904332782,
2744
+ "loss": 0.3182,
2745
+ "step": 369
2746
+ },
2747
+ {
2748
+ "epoch": 0.6557377049180327,
2749
+ "grad_norm": 0.2885172963142395,
2750
+ "learning_rate": 0.00017834923214279268,
2751
+ "loss": 0.3405,
2752
+ "step": 370
2753
+ },
2754
+ {
2755
+ "epoch": 0.6575099689853788,
2756
+ "grad_norm": 0.26560068130493164,
2757
+ "learning_rate": 0.00017823303191535442,
2758
+ "loss": 0.3328,
2759
+ "step": 371
2760
+ },
2761
+ {
2762
+ "epoch": 0.6592822330527248,
2763
+ "grad_norm": 0.18252065777778625,
2764
+ "learning_rate": 0.00017811655876638578,
2765
+ "loss": 0.2438,
2766
+ "step": 372
2767
+ },
2768
+ {
2769
+ "epoch": 0.6610544971200709,
2770
+ "grad_norm": 0.19787971675395966,
2771
+ "learning_rate": 0.00017799981310221173,
2772
+ "loss": 0.2515,
2773
+ "step": 373
2774
+ },
2775
+ {
2776
+ "epoch": 0.6628267611874169,
2777
+ "grad_norm": 0.2631565034389496,
2778
+ "learning_rate": 0.00017788279533010785,
2779
+ "loss": 0.3755,
2780
+ "step": 374
2781
+ },
2782
+ {
2783
+ "epoch": 0.664599025254763,
2784
+ "grad_norm": 0.24500946700572968,
2785
+ "learning_rate": 0.00017776550585829896,
2786
+ "loss": 0.3366,
2787
+ "step": 375
2788
+ },
2789
+ {
2790
+ "epoch": 0.666371289322109,
2791
+ "grad_norm": 0.3467278778553009,
2792
+ "learning_rate": 0.00017764794509595786,
2793
+ "loss": 0.5403,
2794
+ "step": 376
2795
+ },
2796
+ {
2797
+ "epoch": 0.6681435533894551,
2798
+ "grad_norm": 0.2552179992198944,
2799
+ "learning_rate": 0.00017753011345320366,
2800
+ "loss": 0.3533,
2801
+ "step": 377
2802
+ },
2803
+ {
2804
+ "epoch": 0.6699158174568011,
2805
+ "grad_norm": 0.3037780225276947,
2806
+ "learning_rate": 0.00017741201134110042,
2807
+ "loss": 0.5212,
2808
+ "step": 378
2809
+ },
2810
+ {
2811
+ "epoch": 0.671688081524147,
2812
+ "grad_norm": 0.29784807562828064,
2813
+ "learning_rate": 0.0001772936391716559,
2814
+ "loss": 0.4664,
2815
+ "step": 379
2816
+ },
2817
+ {
2818
+ "epoch": 0.6734603455914931,
2819
+ "grad_norm": 0.22430609166622162,
2820
+ "learning_rate": 0.00017717499735781983,
2821
+ "loss": 0.2937,
2822
+ "step": 380
2823
+ },
2824
+ {
2825
+ "epoch": 0.6734603455914931,
2826
+ "eval_loss": 0.3527662754058838,
2827
+ "eval_runtime": 13.1985,
2828
+ "eval_samples_per_second": 3.485,
2829
+ "eval_steps_per_second": 0.909,
2830
+ "step": 380
2831
  }
2832
  ],
2833
  "logging_steps": 1,
 
2856
  "attributes": {}
2857
  }
2858
  },
2859
+ "total_flos": 9.674563755442176e+17,
2860
  "train_batch_size": 4,
2861
  "trial_name": null,
2862
  "trial_params": null