Training in progress, epoch 4, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +403 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25ef5980a866c075243cbca6296c7973e976c78a14321fe2714e84fa4887656f
 size 110385904

 version https://git-lfs.github.com/spec/v1
+oid sha256:116bc00da50d25dcc272533129d200b856bacec241e236b6db2a9ecf18c05922
 size 110385904

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89d5648ac686d3b7617b3067c5ba0c213c7a760c4922f82dd2c2c53e72d473aa
 size 220436730

 version https://git-lfs.github.com/spec/v1
+oid sha256:7df46f260a7759a7c30b2145c459383d33624e387eb96c8208346b53716d81a4
 size 220436730

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4928b39aef16b4ccfdae8117738f87968b9461ea10d0859ffd4b43ce42030e6e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6fea20947238c1f9d5daa925821c4146833bd2c976bc44d7f6f2f755645070f2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d1e3c127b2fc073a228b2fd82f0458974e79c45f15454c1c54a51df347dead6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:64e1e8324d63cee4f88909dbf92a51081cdf1093a4acda403ce37f08bc679a7e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.0,
   "best_model_checkpoint": "/content/drive/MyDrive/Colab Notebooks/16_label_check_point/checkpoint-563",
-  "epoch": 4.0,
   "eval_steps": 500,
-  "global_step": 2249,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1611,6 +1611,405 @@
       "eval_samples_per_second": 192.275,
       "eval_steps_per_second": 6.009,
       "step": 2249
     }
   ],
   "logging_steps": 10,
@@ -1625,12 +2024,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.154959575454581e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.0,
   "best_model_checkpoint": "/content/drive/MyDrive/Colab Notebooks/16_label_check_point/checkpoint-563",
+  "epoch": 4.997333333333334,
   "eval_steps": 500,
+  "global_step": 2810,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 192.275,
       "eval_steps_per_second": 6.009,
       "step": 2249
+    },
+    {
+      "epoch": 4.001777777777778,
+      "grad_norm": 0.0036902178544551134,
+      "learning_rate": 1.1071569790431002e-05,
+      "loss": 0.0003,
+      "step": 2250
+    },
+    {
+      "epoch": 4.019555555555556,
+      "grad_norm": 0.00998405460268259,
+      "learning_rate": 1.0873863187030448e-05,
+      "loss": 0.002,
+      "step": 2260
+    },
+    {
+      "epoch": 4.037333333333334,
+      "grad_norm": 23.953229904174805,
+      "learning_rate": 1.0676156583629894e-05,
+      "loss": 0.0187,
+      "step": 2270
+    },
+    {
+      "epoch": 4.0551111111111116,
+      "grad_norm": 0.008150537498295307,
+      "learning_rate": 1.0478449980229341e-05,
+      "loss": 0.0153,
+      "step": 2280
+    },
+    {
+      "epoch": 4.072888888888889,
+      "grad_norm": 0.5894471406936646,
+      "learning_rate": 1.0280743376828787e-05,
+      "loss": 0.0002,
+      "step": 2290
+    },
+    {
+      "epoch": 4.0906666666666665,
+      "grad_norm": 0.07007890194654465,
+      "learning_rate": 1.0083036773428232e-05,
+      "loss": 0.0367,
+      "step": 2300
+    },
+    {
+      "epoch": 4.108444444444444,
+      "grad_norm": 0.07020383328199387,
+      "learning_rate": 9.885330170027678e-06,
+      "loss": 0.0017,
+      "step": 2310
+    },
+    {
+      "epoch": 4.126222222222222,
+      "grad_norm": 0.0013433824060484767,
+      "learning_rate": 9.687623566627125e-06,
+      "loss": 0.0176,
+      "step": 2320
+    },
+    {
+      "epoch": 4.144,
+      "grad_norm": 0.0036678831093013287,
+      "learning_rate": 9.489916963226571e-06,
+      "loss": 0.001,
+      "step": 2330
+    },
+    {
+      "epoch": 4.161777777777778,
+      "grad_norm": 20.646207809448242,
+      "learning_rate": 9.29221035982602e-06,
+      "loss": 0.0071,
+      "step": 2340
+    },
+    {
+      "epoch": 4.179555555555556,
+      "grad_norm": 0.004499041475355625,
+      "learning_rate": 9.094503756425466e-06,
+      "loss": 0.0004,
+      "step": 2350
+    },
+    {
+      "epoch": 4.197333333333333,
+      "grad_norm": 0.0007168107549659908,
+      "learning_rate": 8.896797153024912e-06,
+      "loss": 0.0025,
+      "step": 2360
+    },
+    {
+      "epoch": 4.215111111111111,
+      "grad_norm": 0.015021364204585552,
+      "learning_rate": 8.699090549624359e-06,
+      "loss": 0.0277,
+      "step": 2370
+    },
+    {
+      "epoch": 4.232888888888889,
+      "grad_norm": 0.006119410507380962,
+      "learning_rate": 8.501383946223804e-06,
+      "loss": 0.0009,
+      "step": 2380
+    },
+    {
+      "epoch": 4.250666666666667,
+      "grad_norm": 0.0018322835676372051,
+      "learning_rate": 8.30367734282325e-06,
+      "loss": 0.0009,
+      "step": 2390
+    },
+    {
+      "epoch": 4.2684444444444445,
+      "grad_norm": 0.0025883447378873825,
+      "learning_rate": 8.105970739422696e-06,
+      "loss": 0.0179,
+      "step": 2400
+    },
+    {
+      "epoch": 4.286222222222222,
+      "grad_norm": 0.010295086540281773,
+      "learning_rate": 7.908264136022143e-06,
+      "loss": 0.0002,
+      "step": 2410
+    },
+    {
+      "epoch": 4.304,
+      "grad_norm": 0.27159199118614197,
+      "learning_rate": 7.710557532621591e-06,
+      "loss": 0.0003,
+      "step": 2420
+    },
+    {
+      "epoch": 4.321777777777778,
+      "grad_norm": 0.014537914656102657,
+      "learning_rate": 7.5128509292210365e-06,
+      "loss": 0.0108,
+      "step": 2430
+    },
+    {
+      "epoch": 4.339555555555555,
+      "grad_norm": 0.001482433988712728,
+      "learning_rate": 7.315144325820483e-06,
+      "loss": 0.0003,
+      "step": 2440
+    },
+    {
+      "epoch": 4.357333333333333,
+      "grad_norm": 0.0015277402708306909,
+      "learning_rate": 7.11743772241993e-06,
+      "loss": 0.0496,
+      "step": 2450
+    },
+    {
+      "epoch": 4.375111111111111,
+      "grad_norm": 0.005141290370374918,
+      "learning_rate": 6.919731119019375e-06,
+      "loss": 0.0001,
+      "step": 2460
+    },
+    {
+      "epoch": 4.392888888888889,
+      "grad_norm": 27.9423770904541,
+      "learning_rate": 6.722024515618821e-06,
+      "loss": 0.0393,
+      "step": 2470
+    },
+    {
+      "epoch": 4.410666666666667,
+      "grad_norm": 0.010187560692429543,
+      "learning_rate": 6.524317912218268e-06,
+      "loss": 0.0001,
+      "step": 2480
+    },
+    {
+      "epoch": 4.428444444444445,
+      "grad_norm": 0.002554529346525669,
+      "learning_rate": 6.326611308817715e-06,
+      "loss": 0.0031,
+      "step": 2490
+    },
+    {
+      "epoch": 4.4462222222222225,
+      "grad_norm": 4.20240592956543,
+      "learning_rate": 6.1289047054171615e-06,
+      "loss": 0.0016,
+      "step": 2500
+    },
+    {
+      "epoch": 4.464,
+      "grad_norm": 0.013741197995841503,
+      "learning_rate": 5.931198102016608e-06,
+      "loss": 0.0003,
+      "step": 2510
+    },
+    {
+      "epoch": 4.481777777777777,
+      "grad_norm": 0.043951794505119324,
+      "learning_rate": 5.7334914986160535e-06,
+      "loss": 0.0125,
+      "step": 2520
+    },
+    {
+      "epoch": 4.499555555555555,
+      "grad_norm": 0.11376281827688217,
+      "learning_rate": 5.535784895215501e-06,
+      "loss": 0.003,
+      "step": 2530
+    },
+    {
+      "epoch": 4.517333333333333,
+      "grad_norm": 0.05734412372112274,
+      "learning_rate": 5.338078291814947e-06,
+      "loss": 0.0009,
+      "step": 2540
+    },
+    {
+      "epoch": 4.535111111111111,
+      "grad_norm": 0.0010632964549586177,
+      "learning_rate": 5.140371688414394e-06,
+      "loss": 0.0196,
+      "step": 2550
+    },
+    {
+      "epoch": 4.552888888888889,
+      "grad_norm": 0.0036729658022522926,
+      "learning_rate": 4.942665085013839e-06,
+      "loss": 0.0071,
+      "step": 2560
+    },
+    {
+      "epoch": 4.570666666666667,
+      "grad_norm": 5.985267162322998,
+      "learning_rate": 4.744958481613286e-06,
+      "loss": 0.0311,
+      "step": 2570
+    },
+    {
+      "epoch": 4.588444444444445,
+      "grad_norm": 0.996809720993042,
+      "learning_rate": 4.547251878212733e-06,
+      "loss": 0.0012,
+      "step": 2580
+    },
+    {
+      "epoch": 4.606222222222222,
+      "grad_norm": 0.11869648844003677,
+      "learning_rate": 4.349545274812179e-06,
+      "loss": 0.0003,
+      "step": 2590
+    },
+    {
+      "epoch": 4.624,
+      "grad_norm": 0.006143218372017145,
+      "learning_rate": 4.151838671411625e-06,
+      "loss": 0.0013,
+      "step": 2600
+    },
+    {
+      "epoch": 4.641777777777778,
+      "grad_norm": 0.024631284177303314,
+      "learning_rate": 3.954132068011071e-06,
+      "loss": 0.0388,
+      "step": 2610
+    },
+    {
+      "epoch": 4.6595555555555555,
+      "grad_norm": 0.0017836794722825289,
+      "learning_rate": 3.7564254646105183e-06,
+      "loss": 0.0015,
+      "step": 2620
+    },
+    {
+      "epoch": 4.677333333333333,
+      "grad_norm": 0.003801500890403986,
+      "learning_rate": 3.558718861209965e-06,
+      "loss": 0.0003,
+      "step": 2630
+    },
+    {
+      "epoch": 4.695111111111111,
+      "grad_norm": 0.004178278613835573,
+      "learning_rate": 3.3610122578094107e-06,
+      "loss": 0.001,
+      "step": 2640
+    },
+    {
+      "epoch": 4.712888888888889,
+      "grad_norm": 0.0044832993298769,
+      "learning_rate": 3.1633056544088575e-06,
+      "loss": 0.0194,
+      "step": 2650
+    },
+    {
+      "epoch": 4.730666666666667,
+      "grad_norm": 0.0029173328075557947,
+      "learning_rate": 2.965599051008304e-06,
+      "loss": 0.0302,
+      "step": 2660
+    },
+    {
+      "epoch": 4.748444444444445,
+      "grad_norm": 0.0005038917297497392,
+      "learning_rate": 2.7678924476077504e-06,
+      "loss": 0.011,
+      "step": 2670
+    },
+    {
+      "epoch": 4.766222222222222,
+      "grad_norm": 0.01968969963490963,
+      "learning_rate": 2.570185844207197e-06,
+      "loss": 0.0002,
+      "step": 2680
+    },
+    {
+      "epoch": 4.784,
+      "grad_norm": 0.02507755346596241,
+      "learning_rate": 2.372479240806643e-06,
+      "loss": 0.0012,
+      "step": 2690
+    },
+    {
+      "epoch": 4.801777777777778,
+      "grad_norm": 6.288967609405518,
+      "learning_rate": 2.1747726374060897e-06,
+      "loss": 0.0195,
+      "step": 2700
+    },
+    {
+      "epoch": 4.819555555555556,
+      "grad_norm": 0.19547367095947266,
+      "learning_rate": 1.9770660340055357e-06,
+      "loss": 0.0034,
+      "step": 2710
+    },
+    {
+      "epoch": 4.8373333333333335,
+      "grad_norm": 0.007160472217947245,
+      "learning_rate": 1.7793594306049826e-06,
+      "loss": 0.0008,
+      "step": 2720
+    },
+    {
+      "epoch": 4.855111111111111,
+      "grad_norm": 0.1027381643652916,
+      "learning_rate": 1.5816528272044288e-06,
+      "loss": 0.0006,
+      "step": 2730
+    },
+    {
+      "epoch": 4.872888888888889,
+      "grad_norm": 0.6163949966430664,
+      "learning_rate": 1.3839462238038752e-06,
+      "loss": 0.0001,
+      "step": 2740
+    },
+    {
+      "epoch": 4.890666666666666,
+      "grad_norm": 0.0065285759046673775,
+      "learning_rate": 1.1862396204033214e-06,
+      "loss": 0.0,
+      "step": 2750
+    },
+    {
+      "epoch": 4.908444444444444,
+      "grad_norm": 2.0664429664611816,
+      "learning_rate": 9.885330170027678e-07,
+      "loss": 0.0033,
+      "step": 2760
+    },
+    {
+      "epoch": 4.926222222222222,
+      "grad_norm": 0.04097575694322586,
+      "learning_rate": 7.908264136022144e-07,
+      "loss": 0.001,
+      "step": 2770
+    },
+    {
+      "epoch": 4.944,
+      "grad_norm": 0.0015862607397139072,
+      "learning_rate": 5.931198102016607e-07,
+      "loss": 0.0001,
+      "step": 2780
+    },
+    {
+      "epoch": 4.961777777777778,
+      "grad_norm": 0.0021847274620085955,
+      "learning_rate": 3.954132068011072e-07,
+      "loss": 0.0006,
+      "step": 2790
+    },
+    {
+      "epoch": 4.979555555555556,
+      "grad_norm": 0.00527564063668251,
+      "learning_rate": 1.977066034005536e-07,
+      "loss": 0.0243,
+      "step": 2800
+    },
+    {
+      "epoch": 4.997333333333334,
+      "grad_norm": 0.0007434898870997131,
+      "learning_rate": 0.0,
+      "loss": 0.0525,
+      "step": 2810
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.94051665811918e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null