irishprancer commited on
Commit
275ac59
·
verified ·
1 Parent(s): 20a1cc6

Training in progress, step 3450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:561dd93e7200e620b3a628a98f78b77e49214dc77820bd53e01765ece4e79aaf
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad88d566a2de22280de9f59b5c145992a9fd5fe8fe840eb9fb608784af8061f4
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2fbfde6e57429b12b8a8888f9861f253842d30f7969950972c18e96feefbcc7
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1414fec916d0b25beb8d137f45e63474d39eeed8c60eddd52432b5ce01f3b4b6
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c26e3773f4be8664a2594f025c73a5f9434f857a45f46fc072657f1fdefb7000
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da7a2430b39e5e44cb64bbbf1f8e636e91b380491fea68813c5289acab7fa99d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a0ab9d5b0198a16acdaf1c9f1e4c57811cdaac3c11a1070ce9660ad9c246b9d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94e1d8202f6c1b191d74521b1a79cc0e162b70d269a8fc663a163d38b719239
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 143.47826086956522,
5
  "eval_steps": 150,
6
- "global_step": 3300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3197,6 +3197,151 @@
3197
  "EMA_steps_per_second": 24.887,
3198
  "epoch": 143.47826086956522,
3199
  "step": 3300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3200
  }
3201
  ],
3202
  "logging_steps": 10,
@@ -3216,7 +3361,7 @@
3216
  "attributes": {}
3217
  }
3218
  },
3219
- "total_flos": 8.51203574828974e+16,
3220
  "train_batch_size": 4,
3221
  "trial_name": null,
3222
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 150.0,
5
  "eval_steps": 150,
6
+ "global_step": 3450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3197
  "EMA_steps_per_second": 24.887,
3198
  "epoch": 143.47826086956522,
3199
  "step": 3300
3200
+ },
3201
+ {
3202
+ "epoch": 143.91304347826087,
3203
+ "grad_norm": 1.9059531688690186,
3204
+ "learning_rate": 3.909079208908198e-06,
3205
+ "loss": 0.2535,
3206
+ "step": 3310
3207
+ },
3208
+ {
3209
+ "epoch": 144.34782608695653,
3210
+ "grad_norm": 1.500815510749817,
3211
+ "learning_rate": 3.909039307686411e-06,
3212
+ "loss": 0.2141,
3213
+ "step": 3320
3214
+ },
3215
+ {
3216
+ "epoch": 144.7826086956522,
3217
+ "grad_norm": 1.7481781244277954,
3218
+ "learning_rate": 3.908998631963098e-06,
3219
+ "loss": 0.2706,
3220
+ "step": 3330
3221
+ },
3222
+ {
3223
+ "epoch": 145.2173913043478,
3224
+ "grad_norm": 2.9067223072052,
3225
+ "learning_rate": 3.908957181754379e-06,
3226
+ "loss": 0.2078,
3227
+ "step": 3340
3228
+ },
3229
+ {
3230
+ "epoch": 145.65217391304347,
3231
+ "grad_norm": 1.8537293672561646,
3232
+ "learning_rate": 3.908914957076686e-06,
3233
+ "loss": 0.2382,
3234
+ "step": 3350
3235
+ },
3236
+ {
3237
+ "epoch": 146.08695652173913,
3238
+ "grad_norm": 2.053541421890259,
3239
+ "learning_rate": 3.908871957946754e-06,
3240
+ "loss": 0.265,
3241
+ "step": 3360
3242
+ },
3243
+ {
3244
+ "epoch": 146.52173913043478,
3245
+ "grad_norm": 2.026669979095459,
3246
+ "learning_rate": 3.908828184381628e-06,
3247
+ "loss": 0.2265,
3248
+ "step": 3370
3249
+ },
3250
+ {
3251
+ "epoch": 146.95652173913044,
3252
+ "grad_norm": 1.6259890794754028,
3253
+ "learning_rate": 3.908783636398657e-06,
3254
+ "loss": 0.2153,
3255
+ "step": 3380
3256
+ },
3257
+ {
3258
+ "epoch": 147.3913043478261,
3259
+ "grad_norm": 1.7665131092071533,
3260
+ "learning_rate": 3.908738314015499e-06,
3261
+ "loss": 0.2287,
3262
+ "step": 3390
3263
+ },
3264
+ {
3265
+ "epoch": 147.82608695652175,
3266
+ "grad_norm": 1.5578436851501465,
3267
+ "learning_rate": 3.908692217250118e-06,
3268
+ "loss": 0.2535,
3269
+ "step": 3400
3270
+ },
3271
+ {
3272
+ "epoch": 148.2608695652174,
3273
+ "grad_norm": 1.5355435609817505,
3274
+ "learning_rate": 3.908645346120786e-06,
3275
+ "loss": 0.2154,
3276
+ "step": 3410
3277
+ },
3278
+ {
3279
+ "epoch": 148.69565217391303,
3280
+ "grad_norm": 1.8538081645965576,
3281
+ "learning_rate": 3.908597700646081e-06,
3282
+ "loss": 0.2498,
3283
+ "step": 3420
3284
+ },
3285
+ {
3286
+ "epoch": 149.1304347826087,
3287
+ "grad_norm": 1.8780725002288818,
3288
+ "learning_rate": 3.908549280844888e-06,
3289
+ "loss": 0.2714,
3290
+ "step": 3430
3291
+ },
3292
+ {
3293
+ "epoch": 149.56521739130434,
3294
+ "grad_norm": 2.210402250289917,
3295
+ "learning_rate": 3.908500086736398e-06,
3296
+ "loss": 0.2647,
3297
+ "step": 3440
3298
+ },
3299
+ {
3300
+ "epoch": 150.0,
3301
+ "grad_norm": 3.242107391357422,
3302
+ "learning_rate": 3.908450118340112e-06,
3303
+ "loss": 0.2203,
3304
+ "step": 3450
3305
+ },
3306
+ {
3307
+ "epoch": 150.0,
3308
+ "eval_loss": 0.9529827237129211,
3309
+ "eval_runtime": 0.4227,
3310
+ "eval_samples_per_second": 23.657,
3311
+ "eval_steps_per_second": 23.657,
3312
+ "step": 3450
3313
+ },
3314
+ {
3315
+ "Start_State_loss": 0.861186683177948,
3316
+ "Start_State_runtime": 0.4313,
3317
+ "Start_State_samples_per_second": 23.184,
3318
+ "Start_State_steps_per_second": 23.184,
3319
+ "epoch": 150.0,
3320
+ "step": 3450
3321
+ },
3322
+ {
3323
+ "Raw_Model_loss": 0.9529827237129211,
3324
+ "Raw_Model_runtime": 0.4191,
3325
+ "Raw_Model_samples_per_second": 23.858,
3326
+ "Raw_Model_steps_per_second": 23.858,
3327
+ "epoch": 150.0,
3328
+ "step": 3450
3329
+ },
3330
+ {
3331
+ "SWA_loss": 0.7833188772201538,
3332
+ "SWA_runtime": 0.4121,
3333
+ "SWA_samples_per_second": 24.265,
3334
+ "SWA_steps_per_second": 24.265,
3335
+ "epoch": 150.0,
3336
+ "step": 3450
3337
+ },
3338
+ {
3339
+ "EMA_loss": 0.8595975637435913,
3340
+ "EMA_runtime": 0.4017,
3341
+ "EMA_samples_per_second": 24.892,
3342
+ "EMA_steps_per_second": 24.892,
3343
+ "epoch": 150.0,
3344
+ "step": 3450
3345
  }
3346
  ],
3347
  "logging_steps": 10,
 
3361
  "attributes": {}
3362
  }
3363
  },
3364
+ "total_flos": 8.89909420608553e+16,
3365
  "train_batch_size": 4,
3366
  "trial_name": null,
3367
  "trial_params": null