moos124 commited on
Commit
850c173
·
verified ·
1 Parent(s): 32cf3ce

Training in progress, step 190, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b13cb2ba5b381ba323f547a1af4df6e9946f6fc7deed924d55d4409d3c442139
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a57fa937ff04bdb0d277f6c926d885d7554dc6ea5ebd9ae35e7036ef13db5bb6
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1222b74ab01701bb0e3675bc733feedb9c3434c7221420b22d2e5af7b7777f93
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ffdc5b361590cff028df75ea2c8649214b0bc354fec669fe97155a74a342cb7
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6db7479ac2562551b5835210b51f514e7bcc3c860874d079bc84238128a76ad
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cdf1de3f2f16aa745528237922430f0dcc6c568ae39f40d63fc3ac92475d5cb
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49640debfc36158f546fab6fec8b882ce2c7918646e5b9b6e88ed52c84825de3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7aba7a3fce4d4f19edab460710dd14fcd7492540cd4a3fd9914056f5f980e21
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0384,
6
  "eval_steps": 500,
7
- "global_step": 180,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -198,6 +198,16 @@
198
  "mean_token_accuracy": 0.7424697011709214,
199
  "num_tokens": 838647.0,
200
  "step": 180
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 10,
@@ -217,7 +227,7 @@
217
  "attributes": {}
218
  }
219
  },
220
- "total_flos": 4013859270902784.0,
221
  "train_batch_size": 4,
222
  "trial_name": null,
223
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.04053333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 190,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
198
  "mean_token_accuracy": 0.7424697011709214,
199
  "num_tokens": 838647.0,
200
  "step": 180
201
+ },
202
+ {
203
+ "entropy": 1.0386934965848922,
204
+ "epoch": 0.04053333333333333,
205
+ "grad_norm": 0.2709059417247772,
206
+ "learning_rate": 6.3e-05,
207
+ "loss": 1.1370309829711913,
208
+ "mean_token_accuracy": 0.7532300829887391,
209
+ "num_tokens": 884497.0,
210
+ "step": 190
211
  }
212
  ],
213
  "logging_steps": 10,
 
227
  "attributes": {}
228
  }
229
  },
230
+ "total_flos": 4231622065492992.0,
231
  "train_batch_size": 4,
232
  "trial_name": null,
233
  "trial_params": null