WafaaFraih commited on
Commit
d0f30b5
·
verified ·
1 Parent(s): 4366350

Training in progress, step 375, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e60dc13a9be0f38c3273ec0396322e8df046ab7dc8a41881063db63515dfb185
3
  size 25205016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22b286c2a7b81b7310d1f8db2e18dc5eac7bf25c8623555bfda3247681643782
3
  size 25205016
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04dabc213e593408cdafbc438c3a0a6e16453b239fbffcb809bf8cfb9c6908dd
3
  size 13061323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1fc30dd04182b1f8e8a996db59c1422f4398cda9daec3cb19101e38c2c2f54
3
  size 13061323
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20c730066fb7aa74695b6df8b6564b44490e40e603f80cd15c4b77e785da187d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9daf34a6f5cec182719ab04edb21954bac15f15a7e3ae3e9fc6e999b4ea1160f
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbfdebb582e3efa85be96dfd074d121a404370a5e4c9114efc541665f084d3a8
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1d6e49e1244c2136fab7f3074345ca5593ed632d4d0d6d129c6fe8e480d66c
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcf9aea5557374939f32266171ea84daa512daedb5e609b80ef3a7dd04f0e6f2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de59d3546d424e9956bea9e19bc51e16199139902a3c29741a9536eb04acc24f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.4,
6
  "eval_steps": 50,
7
- "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -140,6 +140,35 @@
140
  "eval_samples_per_second": 8.4,
141
  "eval_steps_per_second": 1.064,
142
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  }
144
  ],
145
  "logging_steps": 25,
@@ -154,12 +183,12 @@
154
  "should_evaluate": false,
155
  "should_log": false,
156
  "should_save": true,
157
- "should_training_stop": false
158
  },
159
  "attributes": {}
160
  }
161
  },
162
- "total_flos": 5544301156761600.0,
163
  "train_batch_size": 4,
164
  "trial_name": null,
165
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
  "eval_steps": 50,
7
+ "global_step": 375,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
140
  "eval_samples_per_second": 8.4,
141
  "eval_steps_per_second": 1.064,
142
  "step": 300
143
+ },
144
+ {
145
+ "epoch": 2.6,
146
+ "grad_norm": 2.8476767539978027,
147
+ "learning_rate": 1.0336662707363287e-06,
148
+ "loss": 6.9329,
149
+ "step": 325
150
+ },
151
+ {
152
+ "epoch": 2.8,
153
+ "grad_norm": 3.1517913341522217,
154
+ "learning_rate": 2.921771798838069e-07,
155
+ "loss": 6.861,
156
+ "step": 350
157
+ },
158
+ {
159
+ "epoch": 2.8,
160
+ "eval_loss": 6.814030170440674,
161
+ "eval_runtime": 36.1954,
162
+ "eval_samples_per_second": 8.288,
163
+ "eval_steps_per_second": 1.05,
164
+ "step": 350
165
+ },
166
+ {
167
+ "epoch": 3.0,
168
+ "grad_norm": 3.607140064239502,
169
+ "learning_rate": 3.3703469648760367e-09,
170
+ "loss": 6.8769,
171
+ "step": 375
172
  }
173
  ],
174
  "logging_steps": 25,
 
183
  "should_evaluate": false,
184
  "should_log": false,
185
  "should_save": true,
186
+ "should_training_stop": true
187
  },
188
  "attributes": {}
189
  }
190
  },
191
+ "total_flos": 6930376445952000.0,
192
  "train_batch_size": 4,
193
  "trial_name": null,
194
  "trial_params": null