FormlessAI commited on
Commit
ea25282
·
verified ·
1 Parent(s): eb34db6

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78d6ee2f3cbc4cdb6322476898ecd624062b3c89295c346cbe39f5dde5c2f1ca
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e89cd6eb023a8692705065f92f0d5371ce91e0df62544eb097bf03d18a65d84
3
  size 98088784
last-checkpoint/global_step2700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b7fa48200a5a042da0464ddaf341e8d30efd19d0e7b003acd0e140fc773067
3
+ size 73939813
last-checkpoint/global_step2700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:772bd63266afa3802bdd244de18d28b742842cc53dec6c297b70d6eaac8e06b5
3
+ size 73939813
last-checkpoint/global_step2700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:974d834923240a6423062503234126d5259de91b219571e9a4e038fcc8cf1b1f
3
+ size 73939877
last-checkpoint/global_step2700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3537ad743b04b039461b68e9164c6804f256bfe23970460856f048a4790810c
3
+ size 73939877
last-checkpoint/global_step2700/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8843c00c3514b22c11b1cbcc95febc90df3000d919ac4c3992920b30483c41
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2650
 
1
+ global_step2700
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:848a3bab324e767a79a267a7d989130506e36a874e0e4127d7de190f8481ff36
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede0b1fc36335323175e2edd53261702567c37590048e71947e3c426e85b5358
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20080be7211d83966edb6b3c0a1d6111047cd73e94f53eb27565c9307a2a212e
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c225af08c27371f3ad7414b25e230715ecb3f8cb70b10c4259ad6c0fda5ccd6
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33e203ff29a80c4fd7eec0f580181ca0e442129d3db97917e3c8f0d40bfecd90
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88a43fa2cddcb58d4826c017c03a1d3e5471c2689be34ccc5b5029741e5921b
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c0a80ff898e8e7e6862710635ce5d434aff7f8563552950e7cbca15d4174c7f
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0df0f25af541c09f9f007e452faad4d72697d4a9c57c257b511f80692e8b902e
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3c1fb4651c4fe532f8496df418c3c94914ae6844908aef425566dd85e1002e1
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eaf8009039a6f74bcd21b41a171fcf3e5ca003f7176f1eba8b4e8308150dea7
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6307803988456726,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.597672994488671,
6
  "eval_steps": 50,
7
- "global_step": 2650,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4142,6 +4142,84 @@
4142
  "eval_samples_per_second": 125.519,
4143
  "eval_steps_per_second": 15.698,
4144
  "step": 2650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4145
  }
4146
  ],
4147
  "logging_steps": 5,
@@ -4170,7 +4248,7 @@
4170
  "attributes": {}
4171
  }
4172
  },
4173
- "total_flos": 1.3663956002574172e+18,
4174
  "train_batch_size": 2,
4175
  "trial_name": null,
4176
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6300840377807617,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.6466625842008575,
6
  "eval_steps": 50,
7
+ "global_step": 2700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4142
  "eval_samples_per_second": 125.519,
4143
  "eval_steps_per_second": 15.698,
4144
  "step": 2650
4145
+ },
4146
+ {
4147
+ "epoch": 2.60257195345989,
4148
+ "grad_norm": 0.2412237972021103,
4149
+ "learning_rate": 5.170746606940992e-05,
4150
+ "loss": 0.671,
4151
+ "step": 2655
4152
+ },
4153
+ {
4154
+ "epoch": 2.6074709124311086,
4155
+ "grad_norm": 0.22563929855823517,
4156
+ "learning_rate": 5.153028819149837e-05,
4157
+ "loss": 0.6472,
4158
+ "step": 2660
4159
+ },
4160
+ {
4161
+ "epoch": 2.612369871402327,
4162
+ "grad_norm": 0.21387755870819092,
4163
+ "learning_rate": 5.135315006687393e-05,
4164
+ "loss": 0.6689,
4165
+ "step": 2665
4166
+ },
4167
+ {
4168
+ "epoch": 2.6172688303735456,
4169
+ "grad_norm": 0.21877914667129517,
4170
+ "learning_rate": 5.117605351590485e-05,
4171
+ "loss": 0.6697,
4172
+ "step": 2670
4173
+ },
4174
+ {
4175
+ "epoch": 2.6221677893447644,
4176
+ "grad_norm": 0.236678346991539,
4177
+ "learning_rate": 5.099900035853222e-05,
4178
+ "loss": 0.6506,
4179
+ "step": 2675
4180
+ },
4181
+ {
4182
+ "epoch": 2.6270667483159826,
4183
+ "grad_norm": 0.2781834602355957,
4184
+ "learning_rate": 5.082199241425118e-05,
4185
+ "loss": 0.6589,
4186
+ "step": 2680
4187
+ },
4188
+ {
4189
+ "epoch": 2.6319657072872014,
4190
+ "grad_norm": 0.24861498177051544,
4191
+ "learning_rate": 5.064503150209222e-05,
4192
+ "loss": 0.6571,
4193
+ "step": 2685
4194
+ },
4195
+ {
4196
+ "epoch": 2.63686466625842,
4197
+ "grad_norm": 0.2387225478887558,
4198
+ "learning_rate": 5.046811944060252e-05,
4199
+ "loss": 0.6723,
4200
+ "step": 2690
4201
+ },
4202
+ {
4203
+ "epoch": 2.641763625229639,
4204
+ "grad_norm": 0.20970548689365387,
4205
+ "learning_rate": 5.029125804782722e-05,
4206
+ "loss": 0.6585,
4207
+ "step": 2695
4208
+ },
4209
+ {
4210
+ "epoch": 2.6466625842008575,
4211
+ "grad_norm": 0.2293289601802826,
4212
+ "learning_rate": 5.01144491412908e-05,
4213
+ "loss": 0.6658,
4214
+ "step": 2700
4215
+ },
4216
+ {
4217
+ "epoch": 2.6466625842008575,
4218
+ "eval_loss": 0.6300840377807617,
4219
+ "eval_runtime": 15.5653,
4220
+ "eval_samples_per_second": 125.857,
4221
+ "eval_steps_per_second": 15.74,
4222
+ "step": 2700
4223
  }
4224
  ],
4225
  "logging_steps": 5,
 
4248
  "attributes": {}
4249
  }
4250
  },
4251
+ "total_flos": 1.3915592955642511e+18,
4252
  "train_batch_size": 2,
4253
  "trial_name": null,
4254
  "trial_params": null