Sabbir772 commited on
Commit
d0a98cb
·
verified ·
1 Parent(s): 22ad1da

Training in progress, epoch 30, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c66c8955128e5e62b623b11b9ae6effa8174d3e5b88cc5a8d94a8e6d659abc1b
3
  size 990185320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efa0d3a9c1506002f928fecb048a5cd100d53c2f5b5dbd9403c23027a7acb60c
3
  size 990185320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c77cbc69914cf82936274255b687c22dd295cf06c93e14ff29417415459cea06
3
  size 1980541387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fef191fd03c6714ffbb4b03d87e8dd4d98235051bcb5b2f7eb2e8394c2e9665
3
  size 1980541387
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ed1a06b153dad4a8a660e42029973a714386f051e63eb7e369425dfe3df9276
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9044f2a8508562d34a024d3e8ad4386288255ee124dc5623e57819532eef2e88
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f1547202e5461888783dd093e6ac1ad6ae74788ba3d5b6af2761bd28f88426a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecea6d0fd3948e5b4ccfc315e9a77fbe98506ece50162f5206e2243ebb2a7de9
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 29.0,
6
  "eval_steps": 500,
7
- "global_step": 44631,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3362,6 +3362,119 @@
3362
  "eval_samples_per_second": 22.138,
3363
  "eval_steps_per_second": 2.767,
3364
  "step": 44631
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3365
  }
3366
  ],
3367
  "logging_steps": 100,
@@ -3376,12 +3489,12 @@
3376
  "should_evaluate": false,
3377
  "should_log": false,
3378
  "should_save": true,
3379
- "should_training_stop": false
3380
  },
3381
  "attributes": {}
3382
  }
3383
  },
3384
- "total_flos": 6.774142686776525e+16,
3385
  "train_batch_size": 8,
3386
  "trial_name": null,
3387
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 30.0,
6
  "eval_steps": 500,
7
+ "global_step": 46170,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3362
  "eval_samples_per_second": 22.138,
3363
  "eval_steps_per_second": 2.767,
3364
  "step": 44631
3365
+ },
3366
+ {
3367
+ "epoch": 29.044834307992204,
3368
+ "grad_norm": 5.871100425720215,
3369
+ "learning_rate": 1.593025774312324e-06,
3370
+ "loss": 1.4208,
3371
+ "step": 44700
3372
+ },
3373
+ {
3374
+ "epoch": 29.109811565951915,
3375
+ "grad_norm": 4.591679096221924,
3376
+ "learning_rate": 1.4847303443794673e-06,
3377
+ "loss": 1.3698,
3378
+ "step": 44800
3379
+ },
3380
+ {
3381
+ "epoch": 29.17478882391163,
3382
+ "grad_norm": 4.2391157150268555,
3383
+ "learning_rate": 1.3764349144466105e-06,
3384
+ "loss": 1.411,
3385
+ "step": 44900
3386
+ },
3387
+ {
3388
+ "epoch": 29.239766081871345,
3389
+ "grad_norm": 5.3565239906311035,
3390
+ "learning_rate": 1.2681394845137535e-06,
3391
+ "loss": 1.4736,
3392
+ "step": 45000
3393
+ },
3394
+ {
3395
+ "epoch": 29.30474333983106,
3396
+ "grad_norm": 3.925321578979492,
3397
+ "learning_rate": 1.1598440545808967e-06,
3398
+ "loss": 1.4591,
3399
+ "step": 45100
3400
+ },
3401
+ {
3402
+ "epoch": 29.369720597790774,
3403
+ "grad_norm": 4.0369462966918945,
3404
+ "learning_rate": 1.05154862464804e-06,
3405
+ "loss": 1.4353,
3406
+ "step": 45200
3407
+ },
3408
+ {
3409
+ "epoch": 29.43469785575049,
3410
+ "grad_norm": 6.875803470611572,
3411
+ "learning_rate": 9.43253194715183e-07,
3412
+ "loss": 1.4348,
3413
+ "step": 45300
3414
+ },
3415
+ {
3416
+ "epoch": 29.4996751137102,
3417
+ "grad_norm": 5.557791233062744,
3418
+ "learning_rate": 8.349577647823262e-07,
3419
+ "loss": 1.3936,
3420
+ "step": 45400
3421
+ },
3422
+ {
3423
+ "epoch": 29.564652371669915,
3424
+ "grad_norm": 2.878941059112549,
3425
+ "learning_rate": 7.266623348494695e-07,
3426
+ "loss": 1.4049,
3427
+ "step": 45500
3428
+ },
3429
+ {
3430
+ "epoch": 29.62962962962963,
3431
+ "grad_norm": 4.448305130004883,
3432
+ "learning_rate": 6.183669049166126e-07,
3433
+ "loss": 1.4117,
3434
+ "step": 45600
3435
+ },
3436
+ {
3437
+ "epoch": 29.694606887589345,
3438
+ "grad_norm": 4.321474075317383,
3439
+ "learning_rate": 5.100714749837557e-07,
3440
+ "loss": 1.4495,
3441
+ "step": 45700
3442
+ },
3443
+ {
3444
+ "epoch": 29.75958414554906,
3445
+ "grad_norm": 5.657812118530273,
3446
+ "learning_rate": 4.0177604505089883e-07,
3447
+ "loss": 1.3955,
3448
+ "step": 45800
3449
+ },
3450
+ {
3451
+ "epoch": 29.82456140350877,
3452
+ "grad_norm": 4.73406457901001,
3453
+ "learning_rate": 2.93480615118042e-07,
3454
+ "loss": 1.4954,
3455
+ "step": 45900
3456
+ },
3457
+ {
3458
+ "epoch": 29.889538661468485,
3459
+ "grad_norm": 3.9184389114379883,
3460
+ "learning_rate": 1.851851851851852e-07,
3461
+ "loss": 1.463,
3462
+ "step": 46000
3463
+ },
3464
+ {
3465
+ "epoch": 29.9545159194282,
3466
+ "grad_norm": 3.517953872680664,
3467
+ "learning_rate": 7.688975525232836e-08,
3468
+ "loss": 1.4204,
3469
+ "step": 46100
3470
+ },
3471
+ {
3472
+ "epoch": 30.0,
3473
+ "eval_loss": 1.3695261478424072,
3474
+ "eval_runtime": 62.0405,
3475
+ "eval_samples_per_second": 22.05,
3476
+ "eval_steps_per_second": 2.756,
3477
+ "step": 46170
3478
  }
3479
  ],
3480
  "logging_steps": 100,
 
3489
  "should_evaluate": false,
3490
  "should_log": false,
3491
  "should_save": true,
3492
+ "should_training_stop": true
3493
  },
3494
  "attributes": {}
3495
  }
3496
  },
3497
+ "total_flos": 7.195641550995456e+16,
3498
  "train_batch_size": 8,
3499
  "trial_name": null,
3500
  "trial_params": null