Sabbir772 commited on
Commit
22ad1da
·
verified ·
1 Parent(s): 4bdb542

Training in progress, epoch 29, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1bdcc22122be16bcad201b13438cbcd5bb3a61bbd2cb3d243f13927651c8ef3
3
  size 990185320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c66c8955128e5e62b623b11b9ae6effa8174d3e5b88cc5a8d94a8e6d659abc1b
3
  size 990185320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66bba534d6f1ca378d37f9424710e1fbb0cf4f775c1e889d437a4390c2e6da59
3
  size 1980541387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c77cbc69914cf82936274255b687c22dd295cf06c93e14ff29417415459cea06
3
  size 1980541387
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c33bcc6689ffa514c871162fcc88c5e26610e3e356b556757408394db2158e3
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed1a06b153dad4a8a660e42029973a714386f051e63eb7e369425dfe3df9276
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37380fd84c1c4a4c2909f470440f6cf70cc0f0dbedd46d88c29bfc45ff95dfcc
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f1547202e5461888783dd093e6ac1ad6ae74788ba3d5b6af2761bd28f88426a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 28.0,
6
  "eval_steps": 500,
7
- "global_step": 43092,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3242,6 +3242,126 @@
3242
  "eval_samples_per_second": 22.068,
3243
  "eval_steps_per_second": 2.759,
3244
  "step": 43092
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3245
  }
3246
  ],
3247
  "logging_steps": 100,
@@ -3261,7 +3381,7 @@
3261
  "attributes": {}
3262
  }
3263
  },
3264
- "total_flos": 6.352643822557594e+16,
3265
  "train_batch_size": 8,
3266
  "trial_name": null,
3267
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 29.0,
6
  "eval_steps": 500,
7
+ "global_step": 44631,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3242
  "eval_samples_per_second": 22.068,
3243
  "eval_steps_per_second": 2.759,
3244
  "step": 43092
3245
+ },
3246
+ {
3247
+ "epoch": 28.00519818063678,
3248
+ "grad_norm": 4.655136585235596,
3249
+ "learning_rate": 3.3257526532380337e-06,
3250
+ "loss": 1.4119,
3251
+ "step": 43100
3252
+ },
3253
+ {
3254
+ "epoch": 28.07017543859649,
3255
+ "grad_norm": 5.602964878082275,
3256
+ "learning_rate": 3.2174572233051767e-06,
3257
+ "loss": 1.3656,
3258
+ "step": 43200
3259
+ },
3260
+ {
3261
+ "epoch": 28.135152696556204,
3262
+ "grad_norm": 3.6727871894836426,
3263
+ "learning_rate": 3.10916179337232e-06,
3264
+ "loss": 1.4041,
3265
+ "step": 43300
3266
+ },
3267
+ {
3268
+ "epoch": 28.20012995451592,
3269
+ "grad_norm": 5.562687397003174,
3270
+ "learning_rate": 3.000866363439463e-06,
3271
+ "loss": 1.4439,
3272
+ "step": 43400
3273
+ },
3274
+ {
3275
+ "epoch": 28.265107212475634,
3276
+ "grad_norm": 3.7093451023101807,
3277
+ "learning_rate": 2.892570933506606e-06,
3278
+ "loss": 1.3836,
3279
+ "step": 43500
3280
+ },
3281
+ {
3282
+ "epoch": 28.33008447043535,
3283
+ "grad_norm": 6.225944519042969,
3284
+ "learning_rate": 2.7842755035737496e-06,
3285
+ "loss": 1.4018,
3286
+ "step": 43600
3287
+ },
3288
+ {
3289
+ "epoch": 28.395061728395063,
3290
+ "grad_norm": 3.9284849166870117,
3291
+ "learning_rate": 2.6759800736408926e-06,
3292
+ "loss": 1.4189,
3293
+ "step": 43700
3294
+ },
3295
+ {
3296
+ "epoch": 28.460038986354775,
3297
+ "grad_norm": 4.287786483764648,
3298
+ "learning_rate": 2.5676846437080356e-06,
3299
+ "loss": 1.4119,
3300
+ "step": 43800
3301
+ },
3302
+ {
3303
+ "epoch": 28.52501624431449,
3304
+ "grad_norm": 5.376986980438232,
3305
+ "learning_rate": 2.459389213775179e-06,
3306
+ "loss": 1.4671,
3307
+ "step": 43900
3308
+ },
3309
+ {
3310
+ "epoch": 28.589993502274204,
3311
+ "grad_norm": 4.01196813583374,
3312
+ "learning_rate": 2.351093783842322e-06,
3313
+ "loss": 1.413,
3314
+ "step": 44000
3315
+ },
3316
+ {
3317
+ "epoch": 28.65497076023392,
3318
+ "grad_norm": 9.470341682434082,
3319
+ "learning_rate": 2.242798353909465e-06,
3320
+ "loss": 1.4883,
3321
+ "step": 44100
3322
+ },
3323
+ {
3324
+ "epoch": 28.719948018193634,
3325
+ "grad_norm": 3.921780586242676,
3326
+ "learning_rate": 2.1345029239766084e-06,
3327
+ "loss": 1.4371,
3328
+ "step": 44200
3329
+ },
3330
+ {
3331
+ "epoch": 28.784925276153345,
3332
+ "grad_norm": 6.419370651245117,
3333
+ "learning_rate": 2.0262074940437514e-06,
3334
+ "loss": 1.468,
3335
+ "step": 44300
3336
+ },
3337
+ {
3338
+ "epoch": 28.84990253411306,
3339
+ "grad_norm": 3.46016263961792,
3340
+ "learning_rate": 1.9179120641108944e-06,
3341
+ "loss": 1.4476,
3342
+ "step": 44400
3343
+ },
3344
+ {
3345
+ "epoch": 28.914879792072774,
3346
+ "grad_norm": 5.6550822257995605,
3347
+ "learning_rate": 1.8096166341780376e-06,
3348
+ "loss": 1.4096,
3349
+ "step": 44500
3350
+ },
3351
+ {
3352
+ "epoch": 28.97985705003249,
3353
+ "grad_norm": 4.346546173095703,
3354
+ "learning_rate": 1.701321204245181e-06,
3355
+ "loss": 1.4135,
3356
+ "step": 44600
3357
+ },
3358
+ {
3359
+ "epoch": 29.0,
3360
+ "eval_loss": 1.3682384490966797,
3361
+ "eval_runtime": 61.7947,
3362
+ "eval_samples_per_second": 22.138,
3363
+ "eval_steps_per_second": 2.767,
3364
+ "step": 44631
3365
  }
3366
  ],
3367
  "logging_steps": 100,
 
3381
  "attributes": {}
3382
  }
3383
  },
3384
+ "total_flos": 6.774142686776525e+16,
3385
  "train_batch_size": 8,
3386
  "trial_name": null,
3387
  "trial_params": null