Reggie commited on
Commit
8f590ef
·
verified ·
1 Parent(s): 4adeb12

Training in progress, step 1400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73a10ff5eb3169db4be2445ff7800df6ca56baf41d13df8beabc8dbd49758ae1
3
  size 69782384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:451ebae1b7f378f28693bb6805c9d2a93630612c77e99bdb73fa9c9edfe30db6
3
  size 69782384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2eae897c01400a9b8db73e8dccb20a5cf751422c61f8a0f8ea7fa2d6cb6a973b
3
  size 139790651
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f91cf4feaebc2ba5b127eb9e0ec545a5d00e2dbe0d14025bd5b07dfcf7a65438
3
  size 139790651
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e0747b8c0f8a4ac9c7c69f279af311715abe07e1d91c1c29f20fdd8f88956b7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cf7b0988afb384dddecf96a63fb78047758fa9ead514308fc2fc5f66072f899
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e82b50b2ea56f39b6747507c52624e7d162a635e82738b894c21d143ffb08c1
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85998f717c2646507a73441adc7d553a890ecbe1f11ba2e9fab5ee2dce5ce5f0
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 1200,
3
- "best_metric": 2.21370530128479,
4
- "best_model_checkpoint": "./outputs/checkpoint-1200",
5
- "epoch": 1.3029315960912053,
6
  "eval_steps": 200,
7
- "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -224,6 +224,42 @@
224
  "eval_samples_per_second": 81.833,
225
  "eval_steps_per_second": 2.583,
226
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  }
228
  ],
229
  "logging_steps": 50,
@@ -252,7 +288,7 @@
252
  "attributes": {}
253
  }
254
  },
255
- "total_flos": 3.361352657491722e+17,
256
  "train_batch_size": 32,
257
  "trial_name": null,
258
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1400,
3
+ "best_metric": 2.2031383514404297,
4
+ "best_model_checkpoint": "./outputs/checkpoint-1400",
5
+ "epoch": 1.520086862106406,
6
  "eval_steps": 200,
7
+ "global_step": 1400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
224
  "eval_samples_per_second": 81.833,
225
  "eval_steps_per_second": 2.583,
226
  "step": 1200
227
+ },
228
+ {
229
+ "epoch": 1.3572204125950054,
230
+ "grad_norm": 0.824234664440155,
231
+ "learning_rate": 0.00011813720269038712,
232
+ "loss": 2.1931,
233
+ "step": 1250
234
+ },
235
+ {
236
+ "epoch": 1.4115092290988056,
237
+ "grad_norm": 0.8461474776268005,
238
+ "learning_rate": 0.00011241613230933439,
239
+ "loss": 2.1755,
240
+ "step": 1300
241
+ },
242
+ {
243
+ "epoch": 1.4657980456026058,
244
+ "grad_norm": 0.8048629760742188,
245
+ "learning_rate": 0.00010665345121389123,
246
+ "loss": 2.186,
247
+ "step": 1350
248
+ },
249
+ {
250
+ "epoch": 1.520086862106406,
251
+ "grad_norm": 0.7822412848472595,
252
+ "learning_rate": 0.00010086847212328548,
253
+ "loss": 2.1949,
254
+ "step": 1400
255
+ },
256
+ {
257
+ "epoch": 1.520086862106406,
258
+ "eval_loss": 2.2031383514404297,
259
+ "eval_runtime": 7.3421,
260
+ "eval_samples_per_second": 81.993,
261
+ "eval_steps_per_second": 2.588,
262
+ "step": 1400
263
  }
264
  ],
265
  "logging_steps": 50,
 
288
  "attributes": {}
289
  }
290
  },
291
+ "total_flos": 3.921549791849349e+17,
292
  "train_batch_size": 32,
293
  "trial_name": null,
294
  "trial_params": null