moos124 commited on
Commit
b3305ad
·
verified ·
1 Parent(s): b1b48db

Training in progress, step 680, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0531f8ac4ffd361df2a883371a501814336cb53edb4dea0066d7ab4aa93e3f53
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62fa0dae9180b8e9cf91b61b9b0567019ae5925eeaa05eadbc254e528f7a2648
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d2483c8508d4b9ec6d70bd368be4872d1ea2ed1409a7140667d2e3e3ce1b3b9
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929396c762c2458f69fce4eee4ff4f8bdbe90788d9ee70d2fce722708de74c60
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf717f19fdf97da383af66ee53d68fb9633f6ad6fa65078ba8a8bc9e9a19c857
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d875597fcb96668619d5dfe31a66ddf6368794a8fc9781b7cc0c3d1a9465e866
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36844094c1f7cad7a42b80920bb25d88c570bab17b2a4fd3b3b6930500e65ec2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ead9e97463c05478511a6c80978c58c03ec8da4f5aacb879ebc6eb4dc33296d
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.14293333333333333,
6
  "eval_steps": 500,
7
- "global_step": 670,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -688,6 +688,16 @@
688
  "mean_token_accuracy": 0.7364178076386452,
689
  "num_tokens": 3101650.0,
690
  "step": 670
 
 
 
 
 
 
 
 
 
 
691
  }
692
  ],
693
  "logging_steps": 10,
@@ -707,7 +717,7 @@
707
  "attributes": {}
708
  }
709
  },
710
- "total_flos": 1.46852905944576e+16,
711
  "train_batch_size": 4,
712
  "trial_name": null,
713
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.14506666666666668,
6
  "eval_steps": 500,
7
+ "global_step": 680,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
688
  "mean_token_accuracy": 0.7364178076386452,
689
  "num_tokens": 3101650.0,
690
  "step": 670
691
+ },
692
+ {
693
+ "entropy": 0.8284098848700523,
694
+ "epoch": 0.14506666666666668,
695
+ "grad_norm": 0.3110564053058624,
696
+ "learning_rate": 9.957035847679749e-05,
697
+ "loss": 0.905357551574707,
698
+ "mean_token_accuracy": 0.7894617035984993,
699
+ "num_tokens": 3143357.0,
700
+ "step": 680
701
  }
702
  ],
703
  "logging_steps": 10,
 
717
  "attributes": {}
718
  }
719
  },
720
+ "total_flos": 1.4888787600940032e+16,
721
  "train_batch_size": 4,
722
  "trial_name": null,
723
  "trial_params": null