error577 commited on
Commit
3fff18f
·
verified ·
1 Parent(s): 10e3563

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f7f2a15b6de18ff56263960023f120d936179ecf9ff8822fb4dbf6bf7949d0e
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7af36e198ceb1d302cb562bbb3dc9a7375d049e35455f558af1c5f3575bf1d7
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da793d08115ae91a604aac5fc0bcd54cc608654003d8c32e73f87846089873ac
3
  size 170920532
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe6d668006fb535878145ee06b5142e4d3416d48a3a22d19887946d60b1ed74
3
  size 170920532
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1b98aafa34eb44815b8c81dd5a5c9ec149b14b0ce72824e63c08d5dafe68f81
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5510a5ba9c2306cb1cc1948c25f245584ae6bb11cae37474b32d1e5b9d035c3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa1e27bc8c4f6272ee858bf97369b9e68f7265e3e9a72207bbd5098643e86719
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d93420319c4318ff13366855f16b6ec61d99b866bdf2a20293a1621b040b36f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2687623732677425,
5
  "eval_steps": 20,
6
- "global_step": 280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2087,6 +2087,154 @@
2087
  "eval_samples_per_second": 3.965,
2088
  "eval_steps_per_second": 3.965,
2089
  "step": 280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2090
  }
2091
  ],
2092
  "logging_steps": 1,
@@ -2101,12 +2249,12 @@
2101
  "should_evaluate": false,
2102
  "should_log": false,
2103
  "should_save": true,
2104
- "should_training_stop": false
2105
  },
2106
  "attributes": {}
2107
  }
2108
  },
2109
- "total_flos": 1.132444992405504e+17,
2110
  "train_batch_size": 1,
2111
  "trial_name": null,
2112
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.28795968564400987,
5
  "eval_steps": 20,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2087
  "eval_samples_per_second": 3.965,
2088
  "eval_steps_per_second": 3.965,
2089
  "step": 280
2090
+ },
2091
+ {
2092
+ "epoch": 0.2697222388865559,
2093
+ "grad_norm": 1.7310993671417236,
2094
+ "learning_rate": 3.166200546578718e-06,
2095
+ "loss": 2.5755,
2096
+ "step": 281
2097
+ },
2098
+ {
2099
+ "epoch": 0.2706821045053692,
2100
+ "grad_norm": 1.7038969993591309,
2101
+ "learning_rate": 2.8427160470641253e-06,
2102
+ "loss": 2.5515,
2103
+ "step": 282
2104
+ },
2105
+ {
2106
+ "epoch": 0.2716419701241826,
2107
+ "grad_norm": 1.7379988431930542,
2108
+ "learning_rate": 2.5365011072835117e-06,
2109
+ "loss": 2.624,
2110
+ "step": 283
2111
+ },
2112
+ {
2113
+ "epoch": 0.27260183574299596,
2114
+ "grad_norm": 1.8372347354888916,
2115
+ "learning_rate": 2.2475916629177415e-06,
2116
+ "loss": 2.6323,
2117
+ "step": 284
2118
+ },
2119
+ {
2120
+ "epoch": 0.27356170136180935,
2121
+ "grad_norm": 1.8558735847473145,
2122
+ "learning_rate": 1.9760216187710787e-06,
2123
+ "loss": 2.5233,
2124
+ "step": 285
2125
+ },
2126
+ {
2127
+ "epoch": 0.2745215669806227,
2128
+ "grad_norm": 1.8913716077804565,
2129
+ "learning_rate": 1.7218228447922867e-06,
2130
+ "loss": 2.4645,
2131
+ "step": 286
2132
+ },
2133
+ {
2134
+ "epoch": 0.2754814325994361,
2135
+ "grad_norm": 2.0636091232299805,
2136
+ "learning_rate": 1.4850251723345196e-06,
2137
+ "loss": 2.6357,
2138
+ "step": 287
2139
+ },
2140
+ {
2141
+ "epoch": 0.2764412982182494,
2142
+ "grad_norm": 1.8926513195037842,
2143
+ "learning_rate": 1.2656563906545902e-06,
2144
+ "loss": 2.4288,
2145
+ "step": 288
2146
+ },
2147
+ {
2148
+ "epoch": 0.2774011638370628,
2149
+ "grad_norm": 2.2932565212249756,
2150
+ "learning_rate": 1.0637422436516274e-06,
2151
+ "loss": 2.5419,
2152
+ "step": 289
2153
+ },
2154
+ {
2155
+ "epoch": 0.27836102945587615,
2156
+ "grad_norm": 2.0671229362487793,
2157
+ "learning_rate": 8.793064268460604e-07,
2158
+ "loss": 2.6396,
2159
+ "step": 290
2160
+ },
2161
+ {
2162
+ "epoch": 0.27932089507468955,
2163
+ "grad_norm": 2.107043743133545,
2164
+ "learning_rate": 7.123705845987093e-07,
2165
+ "loss": 2.6949,
2166
+ "step": 291
2167
+ },
2168
+ {
2169
+ "epoch": 0.2802807606935029,
2170
+ "grad_norm": 2.4163978099823,
2171
+ "learning_rate": 5.629543075708176e-07,
2172
+ "loss": 2.6468,
2173
+ "step": 292
2174
+ },
2175
+ {
2176
+ "epoch": 0.2812406263123163,
2177
+ "grad_norm": 2.3940374851226807,
2178
+ "learning_rate": 4.310751304249738e-07,
2179
+ "loss": 2.8079,
2180
+ "step": 293
2181
+ },
2182
+ {
2183
+ "epoch": 0.2822004919311296,
2184
+ "grad_norm": 2.288954973220825,
2185
+ "learning_rate": 3.167485297673411e-07,
2186
+ "loss": 2.5619,
2187
+ "step": 294
2188
+ },
2189
+ {
2190
+ "epoch": 0.283160357549943,
2191
+ "grad_norm": 2.4529731273651123,
2192
+ "learning_rate": 2.1998792233142714e-07,
2193
+ "loss": 2.539,
2194
+ "step": 295
2195
+ },
2196
+ {
2197
+ "epoch": 0.28412022316875635,
2198
+ "grad_norm": 2.7025234699249268,
2199
+ "learning_rate": 1.4080466340349316e-07,
2200
+ "loss": 2.7581,
2201
+ "step": 296
2202
+ },
2203
+ {
2204
+ "epoch": 0.28508008878756974,
2205
+ "grad_norm": 2.71325421333313,
2206
+ "learning_rate": 7.92080454900701e-08,
2207
+ "loss": 2.4698,
2208
+ "step": 297
2209
+ },
2210
+ {
2211
+ "epoch": 0.2860399544063831,
2212
+ "grad_norm": 3.07059907913208,
2213
+ "learning_rate": 3.5205297227380855e-08,
2214
+ "loss": 2.6203,
2215
+ "step": 298
2216
+ },
2217
+ {
2218
+ "epoch": 0.2869998200251965,
2219
+ "grad_norm": 3.6069223880767822,
2220
+ "learning_rate": 8.801582533035644e-09,
2221
+ "loss": 2.6736,
2222
+ "step": 299
2223
+ },
2224
+ {
2225
+ "epoch": 0.28795968564400987,
2226
+ "grad_norm": 4.671467304229736,
2227
+ "learning_rate": 0.0,
2228
+ "loss": 2.4599,
2229
+ "step": 300
2230
+ },
2231
+ {
2232
+ "epoch": 0.28795968564400987,
2233
+ "eval_loss": 2.5693867206573486,
2234
+ "eval_runtime": 86.0535,
2235
+ "eval_samples_per_second": 3.963,
2236
+ "eval_steps_per_second": 3.963,
2237
+ "step": 300
2238
  }
2239
  ],
2240
  "logging_steps": 1,
 
2249
  "should_evaluate": false,
2250
  "should_log": false,
2251
  "should_save": true,
2252
+ "should_training_stop": true
2253
  },
2254
  "attributes": {}
2255
  }
2256
  },
2257
+ "total_flos": 1.205066715365376e+17,
2258
  "train_batch_size": 1,
2259
  "trial_name": null,
2260
  "trial_params": null