Reggie commited on
Commit
70d76c1
·
verified ·
1 Parent(s): ab365fd

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:550d11d2e0d45d6a0d008261bef62de4360b487f887046f596e045ccd6f0a6eb
3
  size 69782384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dec94902aeef1da66fe3799f460774d7e11ff3ca1348ec1fd836a52e60e3a304
3
  size 69782384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5388aad62ba3c6868bf12ad11bc022fae098f88cd35d07a7860da2b8bdeef209
3
  size 139790651
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:605837a2951e9cd7a6fa40ee23c3d5a25507180cd69e9506a92611de6b726860
3
  size 139790651
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc5c5eb75b44b8144a33c4463d137bd72be57e280cc3f9d5750a61e28416daac
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff888a4ec4f2fab2bfd92e943e0a38d0b8f4e4883309963373b6809f52c08918
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4597266b92ceab6117e6a70e4971c3980afac249cc4bfe8f85b293a0771ae1e4
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59f59a7859ef7b5a7545b424e0b16de3a0435ca89fcc924c7795332a2791a7e
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1800,
3
  "best_metric": 2.1885855197906494,
4
  "best_model_checkpoint": "./outputs/checkpoint-1800",
5
- "epoch": 1.9543973941368078,
6
  "eval_steps": 200,
7
- "global_step": 1800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -332,6 +332,42 @@
332
  "eval_samples_per_second": 81.85,
333
  "eval_steps_per_second": 2.583,
334
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  }
336
  ],
337
  "logging_steps": 50,
@@ -346,7 +382,7 @@
346
  "early_stopping_threshold": 0.0
347
  },
348
  "attributes": {
349
- "early_stopping_patience_counter": 0
350
  }
351
  },
352
  "TrainerControl": {
@@ -360,7 +396,7 @@
360
  "attributes": {}
361
  }
362
  },
363
- "total_flos": 5.041672846318633e+17,
364
  "train_batch_size": 32,
365
  "trial_name": null,
366
  "trial_params": null
 
2
  "best_global_step": 1800,
3
  "best_metric": 2.1885855197906494,
4
  "best_model_checkpoint": "./outputs/checkpoint-1800",
5
+ "epoch": 2.1715526601520088,
6
  "eval_steps": 200,
7
+ "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
332
  "eval_samples_per_second": 81.85,
333
  "eval_steps_per_second": 2.583,
334
  "step": 1800
335
+ },
336
+ {
337
+ "epoch": 2.008686210640608,
338
+ "grad_norm": 0.9358265399932861,
339
+ "learning_rate": 5.097252214851365e-05,
340
+ "loss": 2.165,
341
+ "step": 1850
342
+ },
343
+ {
344
+ "epoch": 2.0629750271444083,
345
+ "grad_norm": 1.0949631929397583,
346
+ "learning_rate": 4.601121531384579e-05,
347
+ "loss": 2.1348,
348
+ "step": 1900
349
+ },
350
+ {
351
+ "epoch": 2.1172638436482085,
352
+ "grad_norm": 1.3909953832626343,
353
+ "learning_rate": 4.1230843398675555e-05,
354
+ "loss": 2.1399,
355
+ "step": 1950
356
+ },
357
+ {
358
+ "epoch": 2.1715526601520088,
359
+ "grad_norm": 1.2448049783706665,
360
+ "learning_rate": 3.664742706752925e-05,
361
+ "loss": 2.1417,
362
+ "step": 2000
363
+ },
364
+ {
365
+ "epoch": 2.1715526601520088,
366
+ "eval_loss": 2.194857120513916,
367
+ "eval_runtime": 7.3786,
368
+ "eval_samples_per_second": 81.587,
369
+ "eval_steps_per_second": 2.575,
370
+ "step": 2000
371
  }
372
  ],
373
  "logging_steps": 50,
 
382
  "early_stopping_threshold": 0.0
383
  },
384
  "attributes": {
385
+ "early_stopping_patience_counter": 1
386
  }
387
  },
388
  "TrainerControl": {
 
396
  "attributes": {}
397
  }
398
  },
399
+ "total_flos": 5.602045311097897e+17,
400
  "train_batch_size": 32,
401
  "trial_name": null,
402
  "trial_params": null