irishprancer commited on
Commit
c5d8453
·
verified ·
1 Parent(s): 509b415

Training in progress, step 3450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:415b012a16d8a3069af7b2d00a549e623ca759b20310f8dd3bc8182b71b9f1ce
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d85d797097afce863d924a902fd673ba283872940210e3c933a64dde7a4be42
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fb0dddd1ba0510c292970530f8d534b9d2c07fb94582bd515965a0add9b808f
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32f59ddc5d1d02f125383197f9e87f354af36a0ab9f9c34a9101568ff6028781
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46d43b62f9cc9036b40877de2b586a52a3533079415fdaa5852bb39f1d0f3f9c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8298cd2038dcf4bc8c0ba6dfa0d230a23246f758d069f7c77f9c04f77b6d8f8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85714b7c0ed500123430db079e03a8a9d980fe0b19bc329292c309cdba264050
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c1ec9215b291496d6c7959ac8b8fcef926d7c6ecd1d84b37dbf2c985bef91a3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.717534065246582,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 143.47826086956522,
5
  "eval_steps": 150,
6
- "global_step": 3300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3157,6 +3157,151 @@
3157
  "EMA_steps_per_second": 25.181,
3158
  "epoch": 143.47826086956522,
3159
  "step": 3300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3160
  }
3161
  ],
3162
  "logging_steps": 10,
@@ -3176,7 +3321,7 @@
3176
  "attributes": {}
3177
  }
3178
  },
3179
- "total_flos": 8.51203574828974e+16,
3180
  "train_batch_size": 4,
3181
  "trial_name": null,
3182
  "trial_params": null
 
1
  {
2
  "best_metric": 0.717534065246582,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 150.0,
5
  "eval_steps": 150,
6
+ "global_step": 3450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3157
  "EMA_steps_per_second": 25.181,
3158
  "epoch": 143.47826086956522,
3159
  "step": 3300
3160
+ },
3161
+ {
3162
+ "epoch": 143.91304347826087,
3163
+ "grad_norm": 1.8851526975631714,
3164
+ "learning_rate": 3.7921521115670724e-06,
3165
+ "loss": 0.2538,
3166
+ "step": 3310
3167
+ },
3168
+ {
3169
+ "epoch": 144.34782608695653,
3170
+ "grad_norm": 1.569898247718811,
3171
+ "learning_rate": 3.7921134038575663e-06,
3172
+ "loss": 0.2145,
3173
+ "step": 3320
3174
+ },
3175
+ {
3176
+ "epoch": 144.7826086956522,
3177
+ "grad_norm": 1.718190312385559,
3178
+ "learning_rate": 3.79207394481317e-06,
3179
+ "loss": 0.2708,
3180
+ "step": 3330
3181
+ },
3182
+ {
3183
+ "epoch": 145.2173913043478,
3184
+ "grad_norm": 2.9095687866210938,
3185
+ "learning_rate": 3.7920337344495226e-06,
3186
+ "loss": 0.2084,
3187
+ "step": 3340
3188
+ },
3189
+ {
3190
+ "epoch": 145.65217391304347,
3191
+ "grad_norm": 1.8533018827438354,
3192
+ "learning_rate": 3.791992772782563e-06,
3193
+ "loss": 0.2381,
3194
+ "step": 3350
3195
+ },
3196
+ {
3197
+ "epoch": 146.08695652173913,
3198
+ "grad_norm": 1.9780678749084473,
3199
+ "learning_rate": 3.791951059828527e-06,
3200
+ "loss": 0.2651,
3201
+ "step": 3360
3202
+ },
3203
+ {
3204
+ "epoch": 146.52173913043478,
3205
+ "grad_norm": 1.834191083908081,
3206
+ "learning_rate": 3.791908595603947e-06,
3207
+ "loss": 0.2269,
3208
+ "step": 3370
3209
+ },
3210
+ {
3211
+ "epoch": 146.95652173913044,
3212
+ "grad_norm": 1.6292699575424194,
3213
+ "learning_rate": 3.7918653801256568e-06,
3214
+ "loss": 0.2159,
3215
+ "step": 3380
3216
+ },
3217
+ {
3218
+ "epoch": 147.3913043478261,
3219
+ "grad_norm": 1.5715214014053345,
3220
+ "learning_rate": 3.791821413410784e-06,
3221
+ "loss": 0.2288,
3222
+ "step": 3390
3223
+ },
3224
+ {
3225
+ "epoch": 147.82608695652175,
3226
+ "grad_norm": 1.5430243015289307,
3227
+ "learning_rate": 3.791776695476756e-06,
3228
+ "loss": 0.2538,
3229
+ "step": 3400
3230
+ },
3231
+ {
3232
+ "epoch": 148.2608695652174,
3233
+ "grad_norm": 1.466277837753296,
3234
+ "learning_rate": 3.791731226341297e-06,
3235
+ "loss": 0.2156,
3236
+ "step": 3410
3237
+ },
3238
+ {
3239
+ "epoch": 148.69565217391303,
3240
+ "grad_norm": 1.8279281854629517,
3241
+ "learning_rate": 3.7916850060224308e-06,
3242
+ "loss": 0.2498,
3243
+ "step": 3420
3244
+ },
3245
+ {
3246
+ "epoch": 149.1304347826087,
3247
+ "grad_norm": 1.7966867685317993,
3248
+ "learning_rate": 3.791638034538477e-06,
3249
+ "loss": 0.2716,
3250
+ "step": 3430
3251
+ },
3252
+ {
3253
+ "epoch": 149.56521739130434,
3254
+ "grad_norm": 2.2440056800842285,
3255
+ "learning_rate": 3.7915903119080527e-06,
3256
+ "loss": 0.265,
3257
+ "step": 3440
3258
+ },
3259
+ {
3260
+ "epoch": 150.0,
3261
+ "grad_norm": 3.2762231826782227,
3262
+ "learning_rate": 3.7915418381500747e-06,
3263
+ "loss": 0.2208,
3264
+ "step": 3450
3265
+ },
3266
+ {
3267
+ "epoch": 150.0,
3268
+ "eval_loss": 0.9505823850631714,
3269
+ "eval_runtime": 0.4422,
3270
+ "eval_samples_per_second": 22.615,
3271
+ "eval_steps_per_second": 22.615,
3272
+ "step": 3450
3273
+ },
3274
+ {
3275
+ "Start_State_loss": 0.7309322357177734,
3276
+ "Start_State_runtime": 0.4072,
3277
+ "Start_State_samples_per_second": 24.558,
3278
+ "Start_State_steps_per_second": 24.558,
3279
+ "epoch": 150.0,
3280
+ "step": 3450
3281
+ },
3282
+ {
3283
+ "Raw_Model_loss": 0.9505823850631714,
3284
+ "Raw_Model_runtime": 0.4153,
3285
+ "Raw_Model_samples_per_second": 24.076,
3286
+ "Raw_Model_steps_per_second": 24.076,
3287
+ "epoch": 150.0,
3288
+ "step": 3450
3289
+ },
3290
+ {
3291
+ "SWA_loss": 0.8045159578323364,
3292
+ "SWA_runtime": 0.402,
3293
+ "SWA_samples_per_second": 24.876,
3294
+ "SWA_steps_per_second": 24.876,
3295
+ "epoch": 150.0,
3296
+ "step": 3450
3297
+ },
3298
+ {
3299
+ "EMA_loss": 0.7316843271255493,
3300
+ "EMA_runtime": 0.4,
3301
+ "EMA_samples_per_second": 25.002,
3302
+ "EMA_steps_per_second": 25.002,
3303
+ "epoch": 150.0,
3304
+ "step": 3450
3305
  }
3306
  ],
3307
  "logging_steps": 10,
 
3321
  "attributes": {}
3322
  }
3323
  },
3324
+ "total_flos": 8.89909420608553e+16,
3325
  "train_batch_size": 4,
3326
  "trial_name": null,
3327
  "trial_params": null