moos124 commited on
Commit
44c1799
·
verified ·
1 Parent(s): 32d1359

Training in progress, step 180, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d74edd1550f3054e6c6139caf7d16e4fba4e83f275ff94e54f95511292c6eef0
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b13cb2ba5b381ba323f547a1af4df6e9946f6fc7deed924d55d4409d3c442139
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80f329ce846ef5208cb4db50950c0159fb4339bbdfa9deca86421fcb3a3e22fe
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1222b74ab01701bb0e3675bc733feedb9c3434c7221420b22d2e5af7b7777f93
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05ed2df097c924d834b0115a6978c7176f079a672434526c0cd830d074b234e6
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6db7479ac2562551b5835210b51f514e7bcc3c860874d079bc84238128a76ad
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e24a40a25924ca0c915ae0e90b167f67b5630f335c6534762576ecaa3d8bf48
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49640debfc36158f546fab6fec8b882ce2c7918646e5b9b6e88ed52c84825de3
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.03626666666666667,
6
  "eval_steps": 500,
7
- "global_step": 170,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -188,6 +188,16 @@
188
  "mean_token_accuracy": 0.7382922798395157,
189
  "num_tokens": 794488.0,
190
  "step": 170
 
 
 
 
 
 
 
 
 
 
191
  }
192
  ],
193
  "logging_steps": 10,
@@ -207,7 +217,7 @@
207
  "attributes": {}
208
  }
209
  },
210
- "total_flos": 3810848905328640.0,
211
  "train_batch_size": 4,
212
  "trial_name": null,
213
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0384,
6
  "eval_steps": 500,
7
+ "global_step": 180,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
188
  "mean_token_accuracy": 0.7382922798395157,
189
  "num_tokens": 794488.0,
190
  "step": 170
191
+ },
192
+ {
193
+ "entropy": 1.0789968609809875,
194
+ "epoch": 0.0384,
195
+ "grad_norm": 0.2265262007713318,
196
+ "learning_rate": 5.966666666666667e-05,
197
+ "loss": 1.1242941856384276,
198
+ "mean_token_accuracy": 0.7424697011709214,
199
+ "num_tokens": 838647.0,
200
+ "step": 180
201
  }
202
  ],
203
  "logging_steps": 10,
 
217
  "attributes": {}
218
  }
219
  },
220
+ "total_flos": 4013859270902784.0,
221
  "train_batch_size": 4,
222
  "trial_name": null,
223
  "trial_params": null