irishprancer commited on
Commit
bf5f84a
·
verified ·
1 Parent(s): 7cc7813

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4c395680d89b01821da1bb33984f1c2e9553029f87090f8cd3c027b66de846e
3
  size 1482788592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89096aa0386bd07d2645f68b9a29bafea399f79871d40185060fa68039636b1f
3
  size 1482788592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99f6515a9eba4ffb6aed2e9196810686af54d3882ecef9ebccbe475775dec4c1
3
  size 2897966842
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da854ce99198fc748d7dc94ec6e70f668228ef91b0f8c29002a2372a254620c5
3
  size 2897966842
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2aba1e092bb5e9c7cb4142b16fd16e351b46865a4a17fbe78a8e97a303f189e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2acbf6096e071e41d5582e116888a7c6ecc44cbafa614aca8be409b37f1ebc9a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba021163979d3718a3a614ea92e798f442b4a1460b3153e40b61917eeda84568
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f72d12e08f2981b12196a00ff48fe5cac0ba4d9d1aa54f91464a195ecde87c8
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2168010473251343,
3
- "best_model_checkpoint": "./output/checkpoint-4200",
4
- "epoch": 0.2776125322228832,
5
  "eval_steps": 150,
6
- "global_step": 4200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3171,6 +3171,232 @@
3171
  "eval_samples_per_second": 9.742,
3172
  "eval_steps_per_second": 9.742,
3173
  "step": 4200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3174
  }
3175
  ],
3176
  "logging_steps": 10,
@@ -3190,7 +3416,7 @@
3190
  "attributes": {}
3191
  }
3192
  },
3193
- "total_flos": 2.9003778620227584e+17,
3194
  "train_batch_size": 4,
3195
  "trial_name": null,
3196
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2155283689498901,
3
+ "best_model_checkpoint": "./output/checkpoint-4500",
4
+ "epoch": 0.297441998810232,
5
  "eval_steps": 150,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3171
  "eval_samples_per_second": 9.742,
3172
  "eval_steps_per_second": 9.742,
3173
  "step": 4200
3174
+ },
3175
+ {
3176
+ "epoch": 0.2782735144424615,
3177
+ "grad_norm": 11.23477840423584,
3178
+ "learning_rate": 1.4037255773076804e-06,
3179
+ "loss": 1.0421,
3180
+ "step": 4210
3181
+ },
3182
+ {
3183
+ "epoch": 0.2789344966620398,
3184
+ "grad_norm": 10.921051979064941,
3185
+ "learning_rate": 1.3691524078729481e-06,
3186
+ "loss": 1.055,
3187
+ "step": 4220
3188
+ },
3189
+ {
3190
+ "epoch": 0.27959547888161806,
3191
+ "grad_norm": 7.342863082885742,
3192
+ "learning_rate": 1.3349825379277099e-06,
3193
+ "loss": 1.2973,
3194
+ "step": 4230
3195
+ },
3196
+ {
3197
+ "epoch": 0.2802564611011964,
3198
+ "grad_norm": 11.837105751037598,
3199
+ "learning_rate": 1.3012173720614862e-06,
3200
+ "loss": 1.2177,
3201
+ "step": 4240
3202
+ },
3203
+ {
3204
+ "epoch": 0.2809174433207747,
3205
+ "grad_norm": 13.415239334106445,
3206
+ "learning_rate": 1.267858298227995e-06,
3207
+ "loss": 1.1455,
3208
+ "step": 4250
3209
+ },
3210
+ {
3211
+ "epoch": 0.28157842554035295,
3212
+ "grad_norm": 11.301210403442383,
3213
+ "learning_rate": 1.2349066876881063e-06,
3214
+ "loss": 1.1602,
3215
+ "step": 4260
3216
+ },
3217
+ {
3218
+ "epoch": 0.2822394077599313,
3219
+ "grad_norm": 5.907723903656006,
3220
+ "learning_rate": 1.202363894953462e-06,
3221
+ "loss": 1.1053,
3222
+ "step": 4270
3223
+ },
3224
+ {
3225
+ "epoch": 0.28290038997950956,
3226
+ "grad_norm": 12.926289558410645,
3227
+ "learning_rate": 1.1702312577308133e-06,
3228
+ "loss": 1.2056,
3229
+ "step": 4280
3230
+ },
3231
+ {
3232
+ "epoch": 0.28356137219908784,
3233
+ "grad_norm": 10.026867866516113,
3234
+ "learning_rate": 1.1385100968670189e-06,
3235
+ "loss": 1.1685,
3236
+ "step": 4290
3237
+ },
3238
+ {
3239
+ "epoch": 0.2842223544186661,
3240
+ "grad_norm": 12.193798065185547,
3241
+ "learning_rate": 1.107201716294762e-06,
3242
+ "loss": 1.1253,
3243
+ "step": 4300
3244
+ },
3245
+ {
3246
+ "epoch": 0.28488333663824444,
3247
+ "grad_norm": 6.5807294845581055,
3248
+ "learning_rate": 1.076307402978938e-06,
3249
+ "loss": 1.1252,
3250
+ "step": 4310
3251
+ },
3252
+ {
3253
+ "epoch": 0.2855443188578227,
3254
+ "grad_norm": 11.568461418151855,
3255
+ "learning_rate": 1.0458284268637652e-06,
3256
+ "loss": 1.2131,
3257
+ "step": 4320
3258
+ },
3259
+ {
3260
+ "epoch": 0.286205301077401,
3261
+ "grad_norm": 5.46840238571167,
3262
+ "learning_rate": 1.0157660408205728e-06,
3263
+ "loss": 1.0678,
3264
+ "step": 4330
3265
+ },
3266
+ {
3267
+ "epoch": 0.28686628329697933,
3268
+ "grad_norm": 13.20085334777832,
3269
+ "learning_rate": 9.861214805963042e-07,
3270
+ "loss": 1.1974,
3271
+ "step": 4340
3272
+ },
3273
+ {
3274
+ "epoch": 0.2875272655165576,
3275
+ "grad_norm": 13.585931777954102,
3276
+ "learning_rate": 9.568959647627223e-07,
3277
+ "loss": 1.1664,
3278
+ "step": 4350
3279
+ },
3280
+ {
3281
+ "epoch": 0.2875272655165576,
3282
+ "eval_loss": 1.21638822555542,
3283
+ "eval_runtime": 51.7738,
3284
+ "eval_samples_per_second": 9.677,
3285
+ "eval_steps_per_second": 9.677,
3286
+ "step": 4350
3287
+ },
3288
+ {
3289
+ "epoch": 0.2881882477361359,
3290
+ "grad_norm": 7.628300189971924,
3291
+ "learning_rate": 9.280906946663111e-07,
3292
+ "loss": 1.0584,
3293
+ "step": 4360
3294
+ },
3295
+ {
3296
+ "epoch": 0.2888492299557142,
3297
+ "grad_norm": 8.380716323852539,
3298
+ "learning_rate": 8.997068543789051e-07,
3299
+ "loss": 1.1137,
3300
+ "step": 4370
3301
+ },
3302
+ {
3303
+ "epoch": 0.2895102121752925,
3304
+ "grad_norm": 12.071667671203613,
3305
+ "learning_rate": 8.717456106490042e-07,
3306
+ "loss": 1.0887,
3307
+ "step": 4380
3308
+ },
3309
+ {
3310
+ "epoch": 0.29017119439487077,
3311
+ "grad_norm": 6.33940315246582,
3312
+ "learning_rate": 8.442081128538243e-07,
3313
+ "loss": 1.0145,
3314
+ "step": 4390
3315
+ },
3316
+ {
3317
+ "epoch": 0.29083217661444905,
3318
+ "grad_norm": 9.972112655639648,
3319
+ "learning_rate": 8.170954929520389e-07,
3320
+ "loss": 1.1362,
3321
+ "step": 4400
3322
+ },
3323
+ {
3324
+ "epoch": 0.2914931588340274,
3325
+ "grad_norm": 12.998346328735352,
3326
+ "learning_rate": 7.904088654372622e-07,
3327
+ "loss": 1.148,
3328
+ "step": 4410
3329
+ },
3330
+ {
3331
+ "epoch": 0.29215414105360565,
3332
+ "grad_norm": 5.646799087524414,
3333
+ "learning_rate": 7.641493272922243e-07,
3334
+ "loss": 1.1281,
3335
+ "step": 4420
3336
+ },
3337
+ {
3338
+ "epoch": 0.29281512327318393,
3339
+ "grad_norm": 10.702962875366211,
3340
+ "learning_rate": 7.383179579436903e-07,
3341
+ "loss": 1.1785,
3342
+ "step": 4430
3343
+ },
3344
+ {
3345
+ "epoch": 0.29347610549276226,
3346
+ "grad_norm": 5.956870079040527,
3347
+ "learning_rate": 7.129158192180766e-07,
3348
+ "loss": 1.1568,
3349
+ "step": 4440
3350
+ },
3351
+ {
3352
+ "epoch": 0.29413708771234054,
3353
+ "grad_norm": 11.048665046691895,
3354
+ "learning_rate": 6.879439552978142e-07,
3355
+ "loss": 1.0652,
3356
+ "step": 4450
3357
+ },
3358
+ {
3359
+ "epoch": 0.2947980699319188,
3360
+ "grad_norm": 5.649775505065918,
3361
+ "learning_rate": 6.634033926784221e-07,
3362
+ "loss": 1.1235,
3363
+ "step": 4460
3364
+ },
3365
+ {
3366
+ "epoch": 0.29545905215149715,
3367
+ "grad_norm": 11.055773735046387,
3368
+ "learning_rate": 6.392951401263069e-07,
3369
+ "loss": 1.285,
3370
+ "step": 4470
3371
+ },
3372
+ {
3373
+ "epoch": 0.2961200343710754,
3374
+ "grad_norm": 7.027043342590332,
3375
+ "learning_rate": 6.156201886373113e-07,
3376
+ "loss": 1.209,
3377
+ "step": 4480
3378
+ },
3379
+ {
3380
+ "epoch": 0.2967810165906537,
3381
+ "grad_norm": 11.43958854675293,
3382
+ "learning_rate": 5.923795113959569e-07,
3383
+ "loss": 1.2139,
3384
+ "step": 4490
3385
+ },
3386
+ {
3387
+ "epoch": 0.297441998810232,
3388
+ "grad_norm": 11.668280601501465,
3389
+ "learning_rate": 5.695740637354591e-07,
3390
+ "loss": 1.2407,
3391
+ "step": 4500
3392
+ },
3393
+ {
3394
+ "epoch": 0.297441998810232,
3395
+ "eval_loss": 1.2155283689498901,
3396
+ "eval_runtime": 48.0067,
3397
+ "eval_samples_per_second": 10.436,
3398
+ "eval_steps_per_second": 10.436,
3399
+ "step": 4500
3400
  }
3401
  ],
3402
  "logging_steps": 10,
 
3416
  "attributes": {}
3417
  }
3418
  },
3419
+ "total_flos": 3.104694939554611e+17,
3420
  "train_batch_size": 4,
3421
  "trial_name": null,
3422
  "trial_params": null