NairaRahim commited on
Commit
8f513a9
·
verified ·
1 Parent(s): df5c0cb

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa9f769a1a8a8b6a773996d7c24b349e4672595d1b4ff0b77824a8db6f68871c
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf382415b8b4e319747d4c583bb2c22fdcb33f73e65a25fb65dde51f022f0b3a
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f7703588a46eff1240aab8e93209d8f65d0aae932e6049646bf5f7ac4a0b063
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb172aa8a3695c37694e8b016fb08371ff016bf0005d26a5ad71d0066147ef06
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:516b7135de753fb3094aa8b4ec4118118441d7163be5041898012aee7e4ea239
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fda2382d9098c76a47d94697adf0e77400f7b6a3f4a525cb2f195f30c9813189
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39665badb31ad7d6b4ab988a13e0c580a201e6ed151e19101e4c789a0afc002a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2590224ad91e1be553a4c5db4bec4d60b1f52733b47fba11c73ce6465c9447e8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 35.558197021484375,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-1305",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 1305,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -106,6 +106,105 @@
106
  "eval_samples_per_second": 26.451,
107
  "eval_steps_per_second": 3.324,
108
  "step": 1305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  }
110
  ],
111
  "logging_steps": 100,
@@ -134,7 +233,7 @@
134
  "attributes": {}
135
  }
136
  },
137
- "total_flos": 1407310903047168.0,
138
  "train_batch_size": 8,
139
  "trial_name": null,
140
  "trial_params": null
 
1
  {
2
+ "best_metric": 35.211631774902344,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-2610",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 2610,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
106
  "eval_samples_per_second": 26.451,
107
  "eval_steps_per_second": 3.324,
108
  "step": 1305
109
+ },
110
+ {
111
+ "epoch": 1.0727969348659003,
112
+ "grad_norm": 3.0501019954681396,
113
+ "learning_rate": 4.932998084291188e-05,
114
+ "loss": 34.3557,
115
+ "step": 1400
116
+ },
117
+ {
118
+ "epoch": 1.1494252873563218,
119
+ "grad_norm": 3.027714252471924,
120
+ "learning_rate": 4.928208812260537e-05,
121
+ "loss": 34.2442,
122
+ "step": 1500
123
+ },
124
+ {
125
+ "epoch": 1.2260536398467432,
126
+ "grad_norm": 3.693758249282837,
127
+ "learning_rate": 4.923419540229886e-05,
128
+ "loss": 33.5375,
129
+ "step": 1600
130
+ },
131
+ {
132
+ "epoch": 1.3026819923371646,
133
+ "grad_norm": 3.7679357528686523,
134
+ "learning_rate": 4.9186302681992344e-05,
135
+ "loss": 33.7891,
136
+ "step": 1700
137
+ },
138
+ {
139
+ "epoch": 1.3793103448275863,
140
+ "grad_norm": 3.2367331981658936,
141
+ "learning_rate": 4.9138409961685824e-05,
142
+ "loss": 33.4964,
143
+ "step": 1800
144
+ },
145
+ {
146
+ "epoch": 1.4559386973180077,
147
+ "grad_norm": 3.6876628398895264,
148
+ "learning_rate": 4.909051724137931e-05,
149
+ "loss": 34.7739,
150
+ "step": 1900
151
+ },
152
+ {
153
+ "epoch": 1.5325670498084292,
154
+ "grad_norm": 1.9550260305404663,
155
+ "learning_rate": 4.90426245210728e-05,
156
+ "loss": 34.2552,
157
+ "step": 2000
158
+ },
159
+ {
160
+ "epoch": 1.6091954022988506,
161
+ "grad_norm": 4.955118656158447,
162
+ "learning_rate": 4.8994731800766285e-05,
163
+ "loss": 33.9766,
164
+ "step": 2100
165
+ },
166
+ {
167
+ "epoch": 1.685823754789272,
168
+ "grad_norm": 6.145394802093506,
169
+ "learning_rate": 4.894683908045977e-05,
170
+ "loss": 34.1676,
171
+ "step": 2200
172
+ },
173
+ {
174
+ "epoch": 1.7624521072796935,
175
+ "grad_norm": 6.15125846862793,
176
+ "learning_rate": 4.889894636015326e-05,
177
+ "loss": 34.3084,
178
+ "step": 2300
179
+ },
180
+ {
181
+ "epoch": 1.839080459770115,
182
+ "grad_norm": 2.647857427597046,
183
+ "learning_rate": 4.8851053639846746e-05,
184
+ "loss": 34.6449,
185
+ "step": 2400
186
+ },
187
+ {
188
+ "epoch": 1.9157088122605364,
189
+ "grad_norm": 4.066762447357178,
190
+ "learning_rate": 4.880316091954023e-05,
191
+ "loss": 34.1318,
192
+ "step": 2500
193
+ },
194
+ {
195
+ "epoch": 1.9923371647509578,
196
+ "grad_norm": 5.785406589508057,
197
+ "learning_rate": 4.875526819923372e-05,
198
+ "loss": 34.1303,
199
+ "step": 2600
200
+ },
201
+ {
202
+ "epoch": 2.0,
203
+ "eval_loss": 35.211631774902344,
204
+ "eval_runtime": 49.3338,
205
+ "eval_samples_per_second": 26.452,
206
+ "eval_steps_per_second": 3.324,
207
+ "step": 2610
208
  }
209
  ],
210
  "logging_steps": 100,
 
233
  "attributes": {}
234
  }
235
  },
236
+ "total_flos": 2814621806094336.0,
237
  "train_batch_size": 8,
238
  "trial_name": null,
239
  "trial_params": null