FredericFan commited on
Commit
f993fd2
·
verified ·
1 Parent(s): f993d4f

Training in progress, step 17000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08df383ddf8539b1c156d553a75182fefc1bd9cb8d1be80a3bb2d48549c72268
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7142eae13c2e07731af54b545ac51bda5556ea3f592995cf80a6a611f7a001
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4942b168045dd01e1507ab88999a6fa88c39bef7244ce53f7c7f5c00a6c5e0d5
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8599a0264032f7e486689cdb52a108f56cf68a52b701dfa3ff59464f8da7ad06
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac7de248f1039c222833b4241b4257bf2afda51b5d7ba778b660b2c2050b7e97
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c552c5778e7213237c153e06b58720168ca8baccd54cd6b284f96928b35e9241
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c923446f6a9040a7fff312fb9744a6058c7cd7ec45e59a8a878177b2e96073c7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:570514209dc9b87d23b36d54c5948989ae9b03cae5fc9a097914a9ef458ad51f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08243728429079056,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-15000",
4
- "epoch": 1.32,
5
  "eval_steps": 500,
6
- "global_step": 16500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2581,6 +2581,84 @@
2581
  "eval_samples_per_second": 22.705,
2582
  "eval_steps_per_second": 5.676,
2583
  "step": 16500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2584
  }
2585
  ],
2586
  "logging_steps": 50,
@@ -2600,7 +2678,7 @@
2600
  "attributes": {}
2601
  }
2602
  },
2603
- "total_flos": 4.019122077696e+16,
2604
  "train_batch_size": 4,
2605
  "trial_name": null,
2606
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08219126611948013,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-17000",
4
+ "epoch": 1.3599999999999999,
5
  "eval_steps": 500,
6
+ "global_step": 17000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2581
  "eval_samples_per_second": 22.705,
2582
  "eval_steps_per_second": 5.676,
2583
  "step": 16500
2584
+ },
2585
+ {
2586
+ "epoch": 1.324,
2587
+ "grad_norm": 0.10573418438434601,
2588
+ "learning_rate": 1.0144800000000002e-05,
2589
+ "loss": 0.058,
2590
+ "step": 16550
2591
+ },
2592
+ {
2593
+ "epoch": 1.328,
2594
+ "grad_norm": 0.1370289921760559,
2595
+ "learning_rate": 1.00848e-05,
2596
+ "loss": 0.0552,
2597
+ "step": 16600
2598
+ },
2599
+ {
2600
+ "epoch": 1.332,
2601
+ "grad_norm": 0.15583133697509766,
2602
+ "learning_rate": 1.0024800000000001e-05,
2603
+ "loss": 0.0505,
2604
+ "step": 16650
2605
+ },
2606
+ {
2607
+ "epoch": 1.336,
2608
+ "grad_norm": 0.12616191804409027,
2609
+ "learning_rate": 9.9648e-06,
2610
+ "loss": 0.0563,
2611
+ "step": 16700
2612
+ },
2613
+ {
2614
+ "epoch": 1.34,
2615
+ "grad_norm": 0.1533484160900116,
2616
+ "learning_rate": 9.9048e-06,
2617
+ "loss": 0.0564,
2618
+ "step": 16750
2619
+ },
2620
+ {
2621
+ "epoch": 1.3439999999999999,
2622
+ "grad_norm": 0.11609365046024323,
2623
+ "learning_rate": 9.844800000000001e-06,
2624
+ "loss": 0.0538,
2625
+ "step": 16800
2626
+ },
2627
+ {
2628
+ "epoch": 1.3479999999999999,
2629
+ "grad_norm": 0.12423556298017502,
2630
+ "learning_rate": 9.7848e-06,
2631
+ "loss": 0.0519,
2632
+ "step": 16850
2633
+ },
2634
+ {
2635
+ "epoch": 1.3519999999999999,
2636
+ "grad_norm": 0.13122333586215973,
2637
+ "learning_rate": 9.7248e-06,
2638
+ "loss": 0.0518,
2639
+ "step": 16900
2640
+ },
2641
+ {
2642
+ "epoch": 1.3559999999999999,
2643
+ "grad_norm": 0.17409804463386536,
2644
+ "learning_rate": 9.664800000000001e-06,
2645
+ "loss": 0.0544,
2646
+ "step": 16950
2647
+ },
2648
+ {
2649
+ "epoch": 1.3599999999999999,
2650
+ "grad_norm": 0.13961808383464813,
2651
+ "learning_rate": 9.6048e-06,
2652
+ "loss": 0.0523,
2653
+ "step": 17000
2654
+ },
2655
+ {
2656
+ "epoch": 1.3599999999999999,
2657
+ "eval_loss": 0.08219126611948013,
2658
+ "eval_runtime": 88.051,
2659
+ "eval_samples_per_second": 22.714,
2660
+ "eval_steps_per_second": 5.679,
2661
+ "step": 17000
2662
  }
2663
  ],
2664
  "logging_steps": 50,
 
2678
  "attributes": {}
2679
  }
2680
  },
2681
+ "total_flos": 4.140913655808e+16,
2682
  "train_batch_size": 4,
2683
  "trial_name": null,
2684
  "trial_params": null