moos124 commited on
Commit
a3b85bb
·
verified ·
1 Parent(s): 7b1e5b1

Training in progress, step 3780, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55c1a161977e9ae31c5e7068aa29473c8a56d1e4487ac3f26b676df25a23cf6a
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b22f2b8e16dd03b7c3e672c42ea1e3e5cfbb3c1c023ba5236c52f0aca78a99
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d110d205e7820b84308ba24c1b34260b2fc80a37fe7d91d947d9f51077ecbb74
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21f36ca82f6b28cc096c00a59bbce7315559b2897c76766b0aab03a207982d9e
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f36ee8ffd6e236e454a94766e95e7bbeff897da092727caafc81f8a4d30246d6
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8eac438107aca69f51de74f4ea98552a5795b41511b19696d4efdb3761312b6
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e0ba4e5eaa10dc017da6a4a30c7d09ec7aae6a3438aa93ce0f4d5ab71016c7e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:779aef313e0c4c3e3c6e3d8d0f56512aa049be75c927e8009ad0e60d38c476e3
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8042666666666667,
6
  "eval_steps": 500,
7
- "global_step": 3770,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3788,6 +3788,16 @@
3788
  "mean_token_accuracy": 0.774385878443718,
3789
  "num_tokens": 17563524.0,
3790
  "step": 3770
 
 
 
 
 
 
 
 
 
 
3791
  }
3792
  ],
3793
  "logging_steps": 10,
@@ -3807,7 +3817,7 @@
3807
  "attributes": {}
3808
  }
3809
  },
3810
- "total_flos": 8.315729753880883e+16,
3811
  "train_batch_size": 4,
3812
  "trial_name": null,
3813
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8064,
6
  "eval_steps": 500,
7
+ "global_step": 3780,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3788
  "mean_token_accuracy": 0.774385878443718,
3789
  "num_tokens": 17563524.0,
3790
  "step": 3770
3791
+ },
3792
+ {
3793
+ "entropy": 0.9020247898995877,
3794
+ "epoch": 0.8064,
3795
+ "grad_norm": 0.25860196352005005,
3796
+ "learning_rate": 6.79205518062118e-05,
3797
+ "loss": 0.9976821899414062,
3798
+ "mean_token_accuracy": 0.774232342839241,
3799
+ "num_tokens": 17611723.0,
3800
+ "step": 3780
3801
  }
3802
  ],
3803
  "logging_steps": 10,
 
3817
  "attributes": {}
3818
  }
3819
  },
3820
+ "total_flos": 8.3374429502592e+16,
3821
  "train_batch_size": 4,
3822
  "trial_name": null,
3823
  "trial_params": null