advy commited on
Commit
d2a3ec5
·
verified ·
1 Parent(s): 6106582

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:addd3dd183f9c0fb2f62b3ff32005cb369d656dac8b5af5b044a7fce8f21a53f
3
  size 41977360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60d80833c27cee255799a3bfbda672d7114700cf2a4ca494036a68c03d65f71a
3
  size 41977360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d1503ba5046d07c21f93b4575d1b083dbc181a049aa2f247b683f047bd09e7a
3
  size 84100858
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff91055c1d115ea31ce5e367e6b60d51fcc58fd188c250c59669def6ed92fdf7
3
  size 84100858
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aec629a82fc6a705e145bf37ce628a81a3dd7365c4e459a322e17229e377107e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb3a36c4212e103c674b3f32de687c7d3da32dcbbb2bf6a69db439d5bbf72e15
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:718aa1d234ba2748901ce4d93b9b9f5911dad7fc2a60aef63bb9baa629751755
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c8659615615a93a70aac05e323342ecdd678d5af9aa183dc0739a5db1bba97
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7326057553291321,
3
- "best_model_checkpoint": "./results/phi2-mentalchat16k/checkpoint-1800",
4
- "epoch": 2.6925953627524306,
5
  "eval_steps": 100,
6
- "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -410,6 +410,50 @@
410
  "eval_samples_per_second": 12.168,
411
  "eval_steps_per_second": 3.042,
412
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  }
414
  ],
415
  "logging_steps": 50,
@@ -438,7 +482,7 @@
438
  "attributes": {}
439
  }
440
  },
441
- "total_flos": 1.097538439389696e+17,
442
  "train_batch_size": 4,
443
  "trial_name": null,
444
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7297702431678772,
3
+ "best_model_checkpoint": "./results/phi2-mentalchat16k/checkpoint-2000",
4
+ "epoch": 2.9917726252804786,
5
  "eval_steps": 100,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
410
  "eval_samples_per_second": 12.168,
411
  "eval_steps_per_second": 3.042,
412
  "step": 1800
413
+ },
414
+ {
415
+ "epoch": 2.767389678384443,
416
+ "grad_norm": 0.30385613441467285,
417
+ "learning_rate": 4.569041677996858e-05,
418
+ "loss": 0.7131,
419
+ "step": 1850
420
+ },
421
+ {
422
+ "epoch": 2.842183994016455,
423
+ "grad_norm": 0.344064325094223,
424
+ "learning_rate": 4.0702796629261964e-05,
425
+ "loss": 0.6994,
426
+ "step": 1900
427
+ },
428
+ {
429
+ "epoch": 2.842183994016455,
430
+ "eval_loss": 0.7319425344467163,
431
+ "eval_runtime": 38.8045,
432
+ "eval_samples_per_second": 12.164,
433
+ "eval_steps_per_second": 3.041,
434
+ "step": 1900
435
+ },
436
+ {
437
+ "epoch": 2.9169783096484667,
438
+ "grad_norm": 0.3289170563220978,
439
+ "learning_rate": 3.593305083535229e-05,
440
+ "loss": 0.7064,
441
+ "step": 1950
442
+ },
443
+ {
444
+ "epoch": 2.9917726252804786,
445
+ "grad_norm": 0.32823771238327026,
446
+ "learning_rate": 3.139870476601171e-05,
447
+ "loss": 0.6995,
448
+ "step": 2000
449
+ },
450
+ {
451
+ "epoch": 2.9917726252804786,
452
+ "eval_loss": 0.7297702431678772,
453
+ "eval_runtime": 38.8084,
454
+ "eval_samples_per_second": 12.162,
455
+ "eval_steps_per_second": 3.041,
456
+ "step": 2000
457
  }
458
  ],
459
  "logging_steps": 50,
 
482
  "attributes": {}
483
  }
484
  },
485
+ "total_flos": 1.219471678929408e+17,
486
  "train_batch_size": 4,
487
  "trial_name": null,
488
  "trial_params": null