Reggie commited on
Commit
28552cf
·
verified ·
1 Parent(s): cf342f2

Training in progress, step 2200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dec94902aeef1da66fe3799f460774d7e11ff3ca1348ec1fd836a52e60e3a304
3
  size 69782384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a227244b79f305f140948c1079fcdc545f071391bb7e4ff9cf542a898d157d1c
3
  size 69782384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:605837a2951e9cd7a6fa40ee23c3d5a25507180cd69e9506a92611de6b726860
3
  size 139790651
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9e0d7d55f9a230dad78340581c7686c5db109d5d23cb37fd0af07013b1770c4
3
  size 139790651
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff888a4ec4f2fab2bfd92e943e0a38d0b8f4e4883309963373b6809f52c08918
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92bebe68c4903b16edae13a87b639062f86297d20f12bfc3a43205a7d64356a6
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f59f59a7859ef7b5a7545b424e0b16de3a0435ca89fcc924c7795332a2791a7e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42a9c19735c8da4979dd9d9844a70622260a91c03b4b752eeed5aefa51d6bbfb
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1800,
3
  "best_metric": 2.1885855197906494,
4
  "best_model_checkpoint": "./outputs/checkpoint-1800",
5
- "epoch": 2.1715526601520088,
6
  "eval_steps": 200,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -368,6 +368,42 @@
368
  "eval_samples_per_second": 81.587,
369
  "eval_steps_per_second": 2.575,
370
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  }
372
  ],
373
  "logging_steps": 50,
@@ -382,7 +418,7 @@
382
  "early_stopping_threshold": 0.0
383
  },
384
  "attributes": {
385
- "early_stopping_patience_counter": 1
386
  }
387
  },
388
  "TrainerControl": {
@@ -396,7 +432,7 @@
396
  "attributes": {}
397
  }
398
  },
399
- "total_flos": 5.602045311097897e+17,
400
  "train_batch_size": 32,
401
  "trial_name": null,
402
  "trial_params": null
 
2
  "best_global_step": 1800,
3
  "best_metric": 2.1885855197906494,
4
  "best_model_checkpoint": "./outputs/checkpoint-1800",
5
+ "epoch": 2.3887079261672097,
6
  "eval_steps": 200,
7
+ "global_step": 2200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
368
  "eval_samples_per_second": 81.587,
369
  "eval_steps_per_second": 2.575,
370
  "step": 2000
371
+ },
372
+ {
373
+ "epoch": 2.225841476655809,
374
+ "grad_norm": 1.2955018281936646,
375
+ "learning_rate": 3.227632691927414e-05,
376
+ "loss": 2.1308,
377
+ "step": 2050
378
+ },
379
+ {
380
+ "epoch": 2.2801302931596092,
381
+ "grad_norm": 1.0340995788574219,
382
+ "learning_rate": 2.8132192008487768e-05,
383
+ "loss": 2.129,
384
+ "step": 2100
385
+ },
386
+ {
387
+ "epoch": 2.3344191096634095,
388
+ "grad_norm": 1.2585651874542236,
389
+ "learning_rate": 2.4228910751455625e-05,
390
+ "loss": 2.1403,
391
+ "step": 2150
392
+ },
393
+ {
394
+ "epoch": 2.3887079261672097,
395
+ "grad_norm": 1.2523435354232788,
396
+ "learning_rate": 2.0579564381328775e-05,
397
+ "loss": 2.1197,
398
+ "step": 2200
399
+ },
400
+ {
401
+ "epoch": 2.3887079261672097,
402
+ "eval_loss": 2.190831422805786,
403
+ "eval_runtime": 7.3454,
404
+ "eval_samples_per_second": 81.956,
405
+ "eval_steps_per_second": 2.587,
406
+ "step": 2200
407
  }
408
  ],
409
  "logging_steps": 50,
 
418
  "early_stopping_threshold": 0.0
419
  },
420
  "attributes": {
421
+ "early_stopping_patience_counter": 2
422
  }
423
  },
424
  "TrainerControl": {
 
432
  "attributes": {}
433
  }
434
  },
435
+ "total_flos": 6.162730083190702e+17,
436
  "train_batch_size": 32,
437
  "trial_name": null,
438
  "trial_params": null