moos124 commited on
Commit
424e467
·
verified ·
1 Parent(s): 52dff6b

Training in progress, step 3790, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0b22f2b8e16dd03b7c3e672c42ea1e3e5cfbb3c1c023ba5236c52f0aca78a99
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38de148b42235201b6f25e82304ce8313ab40648d17c6d2773a002faf625cfb2
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21f36ca82f6b28cc096c00a59bbce7315559b2897c76766b0aab03a207982d9e
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17e147d862df4e75cb4a34bfc407d6115443fcf13052e77ce4269ec3ecb3a800
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8eac438107aca69f51de74f4ea98552a5795b41511b19696d4efdb3761312b6
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b08ed07c30748809e170fd825eca3191e635e294121c3c9b7fbbf0d0e06fd62
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:779aef313e0c4c3e3c6e3d8d0f56512aa049be75c927e8009ad0e60d38c476e3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd6961a16775d2dfd539e613859a399e48d4390d84444f6f9175e36534a47b5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8064,
6
  "eval_steps": 500,
7
- "global_step": 3780,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3798,6 +3798,16 @@
3798
  "mean_token_accuracy": 0.774232342839241,
3799
  "num_tokens": 17611723.0,
3800
  "step": 3780
 
 
 
 
 
 
 
 
 
 
3801
  }
3802
  ],
3803
  "logging_steps": 10,
@@ -3817,7 +3827,7 @@
3817
  "attributes": {}
3818
  }
3819
  },
3820
- "total_flos": 8.3374429502592e+16,
3821
  "train_batch_size": 4,
3822
  "trial_name": null,
3823
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8085333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 3790,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3798
  "mean_token_accuracy": 0.774232342839241,
3799
  "num_tokens": 17611723.0,
3800
  "step": 3780
3801
+ },
3802
+ {
3803
+ "entropy": 0.9692328073084354,
3804
+ "epoch": 0.8085333333333333,
3805
+ "grad_norm": 0.24602019786834717,
3806
+ "learning_rate": 6.775887153189233e-05,
3807
+ "loss": 1.06738224029541,
3808
+ "mean_token_accuracy": 0.7612074792385102,
3809
+ "num_tokens": 17657838.0,
3810
+ "step": 3790
3811
  }
3812
  ],
3813
  "logging_steps": 10,
 
3827
  "attributes": {}
3828
  }
3829
  },
3830
+ "total_flos": 8.36086750049833e+16,
3831
  "train_batch_size": 4,
3832
  "trial_name": null,
3833
  "trial_params": null