moos124 commited on
Commit
c062f9f
·
verified ·
1 Parent(s): 9cf4b4d

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbfebfb020a6922ac7c187ec43e6f2446a7f9d245a376eb38de42ed46b1590e1
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04487b11e0f640f09f4ed23c72c8385649b1fd08438d57fbbedbb6a798d2571
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b904d73ce7aa570ef7776576e060395a7cc4149d14b35769204c250bd553f164
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f281ecd379973caf012ddda74df4aea97dbcc4553957903055ef1afe1230483
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09728db8e28af893fcd6887250b0e05bc3ac00085eefbd783ad496394a300ca7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27f52121e1217ecba835bb0ac8d5ca1dcd4c515954315ed60934935e9ad009f5
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0d97838d356de43e090e5798f1ba163245c9f1fea822a25f9209f3bf9aa6e5c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb511b5274f88689b93046fff8a536b8af17a3a3dca64503b335e8a38d5a613b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.027733333333333332,
6
  "eval_steps": 500,
7
- "global_step": 130,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -148,6 +148,16 @@
148
  "mean_token_accuracy": 0.7144808873534203,
149
  "num_tokens": 599533.0,
150
  "step": 130
 
 
 
 
 
 
 
 
 
 
151
  }
152
  ],
153
  "logging_steps": 10,
@@ -167,7 +177,7 @@
167
  "attributes": {}
168
  }
169
  },
170
- "total_flos": 2892863914924032.0,
171
  "train_batch_size": 4,
172
  "trial_name": null,
173
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.029866666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 140,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
148
  "mean_token_accuracy": 0.7144808873534203,
149
  "num_tokens": 599533.0,
150
  "step": 130
151
+ },
152
+ {
153
+ "entropy": 1.241466723382473,
154
+ "epoch": 0.029866666666666666,
155
+ "grad_norm": 0.2866012752056122,
156
+ "learning_rate": 4.633333333333333e-05,
157
+ "loss": 1.275578498840332,
158
+ "mean_token_accuracy": 0.7148015096783638,
159
+ "num_tokens": 657261.0,
160
+ "step": 140
161
  }
162
  ],
163
  "logging_steps": 10,
 
177
  "attributes": {}
178
  }
179
  },
180
+ "total_flos": 3151360355911680.0,
181
  "train_batch_size": 4,
182
  "trial_name": null,
183
  "trial_params": null