SystemAdmin123 commited on
Commit
a840c32
·
verified ·
1 Parent(s): 632b175

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfb6c282582daa46607f42fb293fdbf3819e8d75d52dd0d1b623400802d28b35
3
  size 250490408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c27d9594ab02f9adc827e9cc100409e429b6fa6e5da458f22196243beabd0e12
3
  size 250490408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfedfce98860d74e92fcd3e31dca7238b9b93ac3bee488a9a090a1706464f3a0
3
  size 255265850
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3184cf8f88bad2cfb5a68fc4094b566ab6b6cce219f98681e60f0f6402fd93e
3
  size 255265850
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b63e3edd0c0cc48f086057ce6e75022d83d87a3b00734c15ba24422849770cd
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e44ef27abe50e5bba1d9636856695c0706b4a69481203dbe05866fc8428b12b
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:186e88827d2475291b71f4a0a4127f7fbf5706df899ca40fe1878eb7c8301c05
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c36c84de099f12bcc525eb47423becd04c47e16e865404a5529083e8a6215c3a
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb0e11d33e42a9adcc5c976e37e059307e91eb6ae74c969ff1b3eb2f755782d6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cd11d413bc67bf01de9a1a006e9e7655be307353028b25f5b3c299e5b6b7a44
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.5,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 147.399,
145
  "eval_steps_per_second": 2.357,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -163,7 +206,7 @@
163
  "attributes": {}
164
  }
165
  },
166
- "total_flos": 9728424859926528.0,
167
  "train_batch_size": 32,
168
  "trial_name": null,
169
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.666666666666668,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 147.399,
145
  "eval_steps_per_second": 2.357,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 13.333333333333334,
150
+ "grad_norm": 4.0,
151
+ "learning_rate": 0.00017541066097768963,
152
+ "loss": 3.6203,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 14.166666666666666,
157
+ "grad_norm": 4.21875,
158
+ "learning_rate": 0.00017167825131684513,
159
+ "loss": 3.533,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 15.0,
164
+ "grad_norm": 4.59375,
165
+ "learning_rate": 0.00016772815716257412,
166
+ "loss": 3.4567,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 15.833333333333334,
171
+ "grad_norm": 5.125,
172
+ "learning_rate": 0.00016357237482099684,
173
+ "loss": 3.3726,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 16.666666666666668,
178
+ "grad_norm": 4.03125,
179
+ "learning_rate": 0.00015922352526649803,
180
+ "loss": 3.3007,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 16.666666666666668,
185
+ "eval_loss": 3.2971582412719727,
186
+ "eval_runtime": 10.605,
187
+ "eval_samples_per_second": 141.537,
188
+ "eval_steps_per_second": 2.263,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
206
  "attributes": {}
207
  }
208
  },
209
+ "total_flos": 1.2971581575790592e+16,
210
  "train_batch_size": 32,
211
  "trial_name": null,
212
  "trial_params": null