3N3G commited on
Commit
4e055b2
·
verified ·
1 Parent(s): 4cc000b

Training in progress, step 32, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59b3ab8b1df8c7dbb70c1aa738717d9974dfc7364347cd05afb11ed8108a092d
3
  size 4969539560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:943608aa5c6d1a33cd3de6c65e5e3dc4364cc2718c2e96c2431f1f8af7ed45a8
3
  size 4969539560
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24fd6512970980552052be46e5e1ffd23abb94389499198cd04131d5c72db1ea
3
  size 1912795688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0f8c786480fb81dbef237ecccb6d214b6a308947b049039952c10df566011d7
3
  size 1912795688
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 4.0,
6
  "eval_steps": 100,
7
- "global_step": 16,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -120,6 +120,118 @@
120
  "learning_rate": 6.986255778798252e-08,
121
  "loss": 0.7012,
122
  "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
  ],
125
  "logging_steps": 1,
@@ -134,12 +246,12 @@
134
  "should_evaluate": false,
135
  "should_log": false,
136
  "should_save": true,
137
- "should_training_stop": false
138
  },
139
  "attributes": {}
140
  }
141
  },
142
- "total_flos": 5366131440353280.0,
143
  "train_batch_size": 1,
144
  "trial_name": null,
145
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 8.0,
6
  "eval_steps": 100,
7
+ "global_step": 32,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
120
  "learning_rate": 6.986255778798252e-08,
121
  "loss": 0.7012,
122
  "step": 16
123
+ },
124
+ {
125
+ "epoch": 4.290909090909091,
126
+ "grad_norm": 9.836618423461914,
127
+ "learning_rate": 6.501344202803413e-08,
128
+ "loss": 0.777,
129
+ "step": 17
130
+ },
131
+ {
132
+ "epoch": 4.581818181818182,
133
+ "grad_norm": 11.242887496948242,
134
+ "learning_rate": 6.003840142464886e-08,
135
+ "loss": 0.8631,
136
+ "step": 18
137
+ },
138
+ {
139
+ "epoch": 4.872727272727273,
140
+ "grad_norm": 10.001364707946777,
141
+ "learning_rate": 5.5e-08,
142
+ "loss": 0.7819,
143
+ "step": 19
144
+ },
145
+ {
146
+ "epoch": 5.0,
147
+ "grad_norm": 10.092758178710938,
148
+ "learning_rate": 4.996159857535115e-08,
149
+ "loss": 0.7722,
150
+ "step": 20
151
+ },
152
+ {
153
+ "epoch": 5.290909090909091,
154
+ "grad_norm": 9.45466423034668,
155
+ "learning_rate": 4.498655797196585e-08,
156
+ "loss": 0.7416,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 5.581818181818182,
161
+ "grad_norm": 10.496912956237793,
162
+ "learning_rate": 4.0137442212017494e-08,
163
+ "loss": 0.8161,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 5.872727272727273,
168
+ "grad_norm": 10.202836036682129,
169
+ "learning_rate": 3.5475231739709885e-08,
170
+ "loss": 0.823,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 6.0,
175
+ "grad_norm": 12.920607566833496,
176
+ "learning_rate": 3.105855655680986e-08,
177
+ "loss": 0.8315,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 6.290909090909091,
182
+ "grad_norm": 10.253811836242676,
183
+ "learning_rate": 2.6942958916356994e-08,
184
+ "loss": 0.8316,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 6.581818181818182,
189
+ "grad_norm": 9.783924102783203,
190
+ "learning_rate": 2.3180194846605363e-08,
191
+ "loss": 0.7542,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 6.872727272727273,
196
+ "grad_norm": 10.855210304260254,
197
+ "learning_rate": 1.981758328893866e-08,
198
+ "loss": 0.8357,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 7.0,
203
+ "grad_norm": 10.147912979125977,
204
+ "learning_rate": 1.6897411034727217e-08,
205
+ "loss": 0.7426,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 7.290909090909091,
210
+ "grad_norm": 10.078908920288086,
211
+ "learning_rate": 1.4456400944391145e-08,
212
+ "loss": 0.7832,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 7.581818181818182,
217
+ "grad_norm": 10.833037376403809,
218
+ "learning_rate": 1.2525250136123459e-08,
219
+ "loss": 0.7954,
220
+ "step": 30
221
+ },
222
+ {
223
+ "epoch": 7.872727272727273,
224
+ "grad_norm": 9.931336402893066,
225
+ "learning_rate": 1.1128243951817936e-08,
226
+ "loss": 0.7893,
227
+ "step": 31
228
+ },
229
+ {
230
+ "epoch": 8.0,
231
+ "grad_norm": 10.87130355834961,
232
+ "learning_rate": 1.0282950554804083e-08,
233
+ "loss": 0.9104,
234
+ "step": 32
235
  }
236
  ],
237
  "logging_steps": 1,
 
246
  "should_evaluate": false,
247
  "should_log": false,
248
  "should_save": true,
249
+ "should_training_stop": true
250
  },
251
  "attributes": {}
252
  }
253
  },
254
+ "total_flos": 1.073226288070656e+16,
255
  "train_batch_size": 1,
256
  "trial_name": null,
257
  "trial_params": null