3N3G commited on
Commit
edc307b
·
verified ·
1 Parent(s): 4ce2f57

Training in progress, step 28, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ba3a3db349e135630227b69d3b515f4496a179bcceedee16fae37b44a13c45
3
  size 4967215360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:302a39c9d5329aabe20f61d80d89f05070bb802acbec752413fd637b7e66bd79
3
  size 4967215360
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f848578fbc87f766e993623cf533a89e3b6f9ebe7feb0b3b27cd2a05ddeb760
3
  size 3077766632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e7bd6a8c24f94c6c5ddc25d7185ca414e65c6937283d89755268e46f5613f88
3
  size 3077766632
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.0,
6
  "eval_steps": 14,
7
- "global_step": 14,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -114,6 +114,112 @@
114
  "eval_samples_per_second": 12.13,
115
  "eval_steps_per_second": 3.154,
116
  "step": 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
118
  ],
119
  "logging_steps": 1,
@@ -128,12 +234,12 @@
128
  "should_evaluate": false,
129
  "should_log": false,
130
  "should_save": true,
131
- "should_training_stop": false
132
  },
133
  "attributes": {}
134
  }
135
  },
136
- "total_flos": 2778037092352.0,
137
  "train_batch_size": 1,
138
  "trial_name": null,
139
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 4.0,
6
  "eval_steps": 14,
7
+ "global_step": 28,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
114
  "eval_samples_per_second": 12.13,
115
  "eval_steps_per_second": 3.154,
116
  "step": 14
117
+ },
118
+ {
119
+ "epoch": 2.142857142857143,
120
+ "grad_norm": 0.7105587071696532,
121
+ "learning_rate": 6.343215915635762e-05,
122
+ "loss": 0.3543,
123
+ "step": 15
124
+ },
125
+ {
126
+ "epoch": 2.2857142857142856,
127
+ "grad_norm": 0.627398851675487,
128
+ "learning_rate": 5.782557337881911e-05,
129
+ "loss": 0.3089,
130
+ "step": 16
131
+ },
132
+ {
133
+ "epoch": 2.4285714285714284,
134
+ "grad_norm": 0.6105424817514371,
135
+ "learning_rate": 5.2174426621180906e-05,
136
+ "loss": 0.2729,
137
+ "step": 17
138
+ },
139
+ {
140
+ "epoch": 2.571428571428571,
141
+ "grad_norm": 0.6952762206191616,
142
+ "learning_rate": 4.6567840843642384e-05,
143
+ "loss": 0.2858,
144
+ "step": 18
145
+ },
146
+ {
147
+ "epoch": 2.7142857142857144,
148
+ "grad_norm": 0.46310196187720826,
149
+ "learning_rate": 4.109423525312738e-05,
150
+ "loss": 0.2778,
151
+ "step": 19
152
+ },
153
+ {
154
+ "epoch": 2.857142857142857,
155
+ "grad_norm": 0.9186572654489482,
156
+ "learning_rate": 3.583993187957173e-05,
157
+ "loss": 0.2704,
158
+ "step": 20
159
+ },
160
+ {
161
+ "epoch": 3.0,
162
+ "grad_norm": 0.4188753131719169,
163
+ "learning_rate": 3.088779422594514e-05,
164
+ "loss": 0.2588,
165
+ "step": 21
166
+ },
167
+ {
168
+ "epoch": 3.142857142857143,
169
+ "grad_norm": 0.6042841569424611,
170
+ "learning_rate": 2.6315920461308964e-05,
171
+ "loss": 0.2056,
172
+ "step": 22
173
+ },
174
+ {
175
+ "epoch": 3.2857142857142856,
176
+ "grad_norm": 0.5170326076545962,
177
+ "learning_rate": 2.219641176603649e-05,
178
+ "loss": 0.1908,
179
+ "step": 23
180
+ },
181
+ {
182
+ "epoch": 3.4285714285714284,
183
+ "grad_norm": 0.4239245143510279,
184
+ "learning_rate": 1.8594235253127375e-05,
185
+ "loss": 0.1778,
186
+ "step": 24
187
+ },
188
+ {
189
+ "epoch": 3.571428571428571,
190
+ "grad_norm": 0.44150441663060497,
191
+ "learning_rate": 1.556619939802615e-05,
192
+ "loss": 0.179,
193
+ "step": 25
194
+ },
195
+ {
196
+ "epoch": 3.7142857142857144,
197
+ "grad_norm": 0.666599351157649,
198
+ "learning_rate": 1.3160058135028691e-05,
199
+ "loss": 0.173,
200
+ "step": 26
201
+ },
202
+ {
203
+ "epoch": 3.857142857142857,
204
+ "grad_norm": 0.5574526628587583,
205
+ "learning_rate": 1.1413757749211602e-05,
206
+ "loss": 0.1666,
207
+ "step": 27
208
+ },
209
+ {
210
+ "epoch": 4.0,
211
+ "grad_norm": 0.45628437147964007,
212
+ "learning_rate": 1.0354838440848503e-05,
213
+ "loss": 0.1653,
214
+ "step": 28
215
+ },
216
+ {
217
+ "epoch": 4.0,
218
+ "eval_loss": 0.46450117230415344,
219
+ "eval_runtime": 4.2935,
220
+ "eval_samples_per_second": 11.645,
221
+ "eval_steps_per_second": 3.028,
222
+ "step": 28
223
  }
224
  ],
225
  "logging_steps": 1,
 
234
  "should_evaluate": false,
235
  "should_log": false,
236
  "should_save": true,
237
+ "should_training_stop": true
238
  },
239
  "attributes": {}
240
  }
241
  },
242
+ "total_flos": 5574381731840.0,
243
  "train_batch_size": 1,
244
  "trial_name": null,
245
  "trial_params": null