Training in progress, step 18000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e1f6084c2fd12874836176a807971d304a89f7ecfc63e2081a9bd54f224b13b
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:13fbe4723123a9c016392f22f5c5a607f137024e3a3211fa73da181d0f6cd1aa
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:729c0d767d06adf4295f1acf80d3c9a43aee84e3de6cc9a899725bd2d9ba998b
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:3aa6efd41ace1816d77bf0b60c121855a1169e94c3066ee2c4a8939be056cb68
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6775411b7c96ce112db0ff86dbc4c7f4f5876ba69512e78981d49611b5ed959e
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:98e45d3c16114f00517a9e754366d6be11045def442e0374684988d3ee13c529
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c2e00f40f2b965358ee58725a6039af41eeb8a8f4527ae152ec5dad618307fd
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:773184c6d03f9fc1dff724dd2ebc3487575db231883b47dc4663fdc68f33bddb
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37ee15f1c9ceef9e456d1af53da3ed0fd0ec244051b974379f15c285ed42f8b7
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9a97caacfd2ffecaa53d612d1aaec198c719ff4db983e8469e19a70730a6af9
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e79d7f1dfea25dc4809dc0e5c220d70f3b690693b546131b59ad7f9ed9b129c
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee307f509a475bceeb88f57a12c9dbe31c5cc43a16b915e7c00fca8b909b56f5
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84d957adbd57639a95ced1440a685d29db26c75001a9b3061d2f7af9b9a721b1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:add33ce1c647f1ad24436fdd2c7095ade5081fad618777000690c7e187278b49
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.033149548337403904,
   "eval_steps": 500,
-  "global_step": 17000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5958,6 +5958,356 @@
       "learning_rate": 0.000494636149601328,
       "loss": 20.0712,
       "step": 17000
     }
   ],
   "logging_steps": 20,
@@ -5977,7 +6327,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2497927616331776e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.035099521769015894,
   "eval_steps": 500,
+  "global_step": 18000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.000494636149601328,
       "loss": 20.0712,
       "step": 17000
+    },
+    {
+      "epoch": 0.03318854780603614,
+      "grad_norm": 11.0,
+      "learning_rate": 0.0004946296475704186,
+      "loss": 20.1068,
+      "step": 17020
+    },
+    {
+      "epoch": 0.03322754727466838,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.0004946231455395093,
+      "loss": 20.0552,
+      "step": 17040
+    },
+    {
+      "epoch": 0.03326654674330062,
+      "grad_norm": 12.5,
+      "learning_rate": 0.0004946166435085999,
+      "loss": 20.0382,
+      "step": 17060
+    },
+    {
+      "epoch": 0.033305546211932865,
+      "grad_norm": 11.125,
+      "learning_rate": 0.0004946101414776906,
+      "loss": 20.1285,
+      "step": 17080
+    },
+    {
+      "epoch": 0.0333445456805651,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004946036394467813,
+      "loss": 20.0373,
+      "step": 17100
+    },
+    {
+      "epoch": 0.03338354514919734,
+      "grad_norm": 9.9375,
+      "learning_rate": 0.0004945971374158719,
+      "loss": 20.1946,
+      "step": 17120
+    },
+    {
+      "epoch": 0.033422544617829585,
+      "grad_norm": 10.6875,
+      "learning_rate": 0.0004945906353849625,
+      "loss": 20.1412,
+      "step": 17140
+    },
+    {
+      "epoch": 0.03346154408646182,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004945841333540531,
+      "loss": 20.078,
+      "step": 17160
+    },
+    {
+      "epoch": 0.03350054355509406,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004945776313231438,
+      "loss": 20.0913,
+      "step": 17180
+    },
+    {
+      "epoch": 0.033539543023726304,
+      "grad_norm": 11.75,
+      "learning_rate": 0.0004945711292922344,
+      "loss": 20.1428,
+      "step": 17200
+    },
+    {
+      "epoch": 0.03357854249235854,
+      "grad_norm": 10.8125,
+      "learning_rate": 0.0004945646272613251,
+      "loss": 20.0407,
+      "step": 17220
+    },
+    {
+      "epoch": 0.03361754196099078,
+      "grad_norm": 10.5625,
+      "learning_rate": 0.0004945581252304157,
+      "loss": 20.1396,
+      "step": 17240
+    },
+    {
+      "epoch": 0.03365654142962302,
+      "grad_norm": 10.0,
+      "learning_rate": 0.0004945516231995064,
+      "loss": 20.0334,
+      "step": 17260
+    },
+    {
+      "epoch": 0.03369554089825526,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004945451211685971,
+      "loss": 19.9909,
+      "step": 17280
+    },
+    {
+      "epoch": 0.0337345403668875,
+      "grad_norm": 12.4375,
+      "learning_rate": 0.0004945386191376876,
+      "loss": 20.0374,
+      "step": 17300
+    },
+    {
+      "epoch": 0.03377353983551974,
+      "grad_norm": 9.75,
+      "learning_rate": 0.0004945321171067783,
+      "loss": 20.0703,
+      "step": 17320
+    },
+    {
+      "epoch": 0.033812539304151984,
+      "grad_norm": 11.375,
+      "learning_rate": 0.0004945256150758689,
+      "loss": 19.9489,
+      "step": 17340
+    },
+    {
+      "epoch": 0.03385153877278422,
+      "grad_norm": 11.3125,
+      "learning_rate": 0.0004945191130449596,
+      "loss": 19.9904,
+      "step": 17360
+    },
+    {
+      "epoch": 0.03389053824141646,
+      "grad_norm": 10.3125,
+      "learning_rate": 0.0004945126110140502,
+      "loss": 19.9895,
+      "step": 17380
+    },
+    {
+      "epoch": 0.0339295377100487,
+      "grad_norm": 11.0,
+      "learning_rate": 0.0004945061089831409,
+      "loss": 20.0525,
+      "step": 17400
+    },
+    {
+      "epoch": 0.03396853717868094,
+      "grad_norm": 10.0,
+      "learning_rate": 0.0004944996069522316,
+      "loss": 20.0451,
+      "step": 17420
+    },
+    {
+      "epoch": 0.03400753664731318,
+      "grad_norm": 10.625,
+      "learning_rate": 0.0004944931049213222,
+      "loss": 20.0506,
+      "step": 17440
+    },
+    {
+      "epoch": 0.03404653611594542,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0004944866028904128,
+      "loss": 19.9625,
+      "step": 17460
+    },
+    {
+      "epoch": 0.03408553558457766,
+      "grad_norm": 11.625,
+      "learning_rate": 0.0004944801008595034,
+      "loss": 19.995,
+      "step": 17480
+    },
+    {
+      "epoch": 0.0341245350532099,
+      "grad_norm": 11.0625,
+      "learning_rate": 0.0004944735988285941,
+      "loss": 20.1062,
+      "step": 17500
+    },
+    {
+      "epoch": 0.03416353452184214,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0004944670967976847,
+      "loss": 19.9454,
+      "step": 17520
+    },
+    {
+      "epoch": 0.034202533990474376,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004944605947667754,
+      "loss": 19.8752,
+      "step": 17540
+    },
+    {
+      "epoch": 0.03424153345910662,
+      "grad_norm": 10.375,
+      "learning_rate": 0.000494454092735866,
+      "loss": 19.9649,
+      "step": 17560
+    },
+    {
+      "epoch": 0.03428053292773886,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004944475907049567,
+      "loss": 19.9261,
+      "step": 17580
+    },
+    {
+      "epoch": 0.0343195323963711,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004944410886740474,
+      "loss": 19.909,
+      "step": 17600
+    },
+    {
+      "epoch": 0.03435853186500334,
+      "grad_norm": 10.75,
+      "learning_rate": 0.000494434586643138,
+      "loss": 19.9778,
+      "step": 17620
+    },
+    {
+      "epoch": 0.03439753133363558,
+      "grad_norm": 11.5,
+      "learning_rate": 0.0004944280846122287,
+      "loss": 19.9709,
+      "step": 17640
+    },
+    {
+      "epoch": 0.03443653080226782,
+      "grad_norm": 11.125,
+      "learning_rate": 0.0004944215825813193,
+      "loss": 19.9898,
+      "step": 17660
+    },
+    {
+      "epoch": 0.03447553027090006,
+      "grad_norm": 10.5625,
+      "learning_rate": 0.00049441508055041,
+      "loss": 19.9979,
+      "step": 17680
+    },
+    {
+      "epoch": 0.0345145297395323,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004944085785195005,
+      "loss": 19.9206,
+      "step": 17700
+    },
+    {
+      "epoch": 0.03455352920816454,
+      "grad_norm": 11.0,
+      "learning_rate": 0.0004944020764885912,
+      "loss": 19.9701,
+      "step": 17720
+    },
+    {
+      "epoch": 0.034592528676796776,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004943955744576818,
+      "loss": 19.9937,
+      "step": 17740
+    },
+    {
+      "epoch": 0.03463152814542902,
+      "grad_norm": 12.5625,
+      "learning_rate": 0.0004943890724267725,
+      "loss": 20.0349,
+      "step": 17760
+    },
+    {
+      "epoch": 0.03467052761406126,
+      "grad_norm": 11.3125,
+      "learning_rate": 0.0004943825703958632,
+      "loss": 19.8582,
+      "step": 17780
+    },
+    {
+      "epoch": 0.034709527082693495,
+      "grad_norm": 12.125,
+      "learning_rate": 0.0004943760683649538,
+      "loss": 19.9185,
+      "step": 17800
+    },
+    {
+      "epoch": 0.03474852655132574,
+      "grad_norm": 10.625,
+      "learning_rate": 0.0004943695663340445,
+      "loss": 19.9073,
+      "step": 17820
+    },
+    {
+      "epoch": 0.03478752601995798,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.0004943630643031351,
+      "loss": 19.8189,
+      "step": 17840
+    },
+    {
+      "epoch": 0.03482652548859022,
+      "grad_norm": 12.375,
+      "learning_rate": 0.0004943565622722258,
+      "loss": 20.0152,
+      "step": 17860
+    },
+    {
+      "epoch": 0.034865524957222456,
+      "grad_norm": 10.0,
+      "learning_rate": 0.0004943500602413164,
+      "loss": 19.9768,
+      "step": 17880
+    },
+    {
+      "epoch": 0.0349045244258547,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004943435582104071,
+      "loss": 19.9124,
+      "step": 17900
+    },
+    {
+      "epoch": 0.03494352389448694,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004943370561794977,
+      "loss": 19.8925,
+      "step": 17920
+    },
+    {
+      "epoch": 0.034982523363119175,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004943305541485883,
+      "loss": 19.9456,
+      "step": 17940
+    },
+    {
+      "epoch": 0.03502152283175142,
+      "grad_norm": 10.625,
+      "learning_rate": 0.000494324052117679,
+      "loss": 19.8603,
+      "step": 17960
+    },
+    {
+      "epoch": 0.03506052230038366,
+      "grad_norm": 10.25,
+      "learning_rate": 0.0004943175500867696,
+      "loss": 19.909,
+      "step": 17980
+    },
+    {
+      "epoch": 0.035099521769015894,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004943110480558603,
+      "loss": 19.8528,
+      "step": 18000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 1.3232995623550058e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null