Training in progress, step 19000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13fbe4723123a9c016392f22f5c5a607f137024e3a3211fa73da181d0f6cd1aa
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:01e4827000f30108c5db6d9ab6168d6e7dfecf37eef3edc1465363ee9ea8e490
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3aa6efd41ace1816d77bf0b60c121855a1169e94c3066ee2c4a8939be056cb68
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:795bb5905ce658a665a647e1035b68562ea8227998cfd6cdd93e835459408e5d
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98e45d3c16114f00517a9e754366d6be11045def442e0374684988d3ee13c529
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:6872023f654a65ebb855f875663f2550ec7c7270f37183aedc09afdf3151f71c
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:773184c6d03f9fc1dff724dd2ebc3487575db231883b47dc4663fdc68f33bddb
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d749134b574c8d566f1f7b1e5e174cfc46c406c32210d882ffb530c2f402814
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9a97caacfd2ffecaa53d612d1aaec198c719ff4db983e8469e19a70730a6af9
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7b2317285b7aac6485bde8423b9bd42301b29e0cd0b6a3f299d06ddf3270099
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee307f509a475bceeb88f57a12c9dbe31c5cc43a16b915e7c00fca8b909b56f5
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a512863aeac154eb9ea09654b5c57fb002e6788836adf8be9c2844cb710adf1
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:add33ce1c647f1ad24436fdd2c7095ade5081fad618777000690c7e187278b49
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b73090e5ff4d77e40aae33305c58d2deda13e4f4510f1c076acf40a9f8a97bef
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.035099521769015894,
   "eval_steps": 500,
-  "global_step": 18000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6308,6 +6308,356 @@
       "learning_rate": 0.0004943110480558603,
       "loss": 19.8528,
       "step": 18000
     }
   ],
   "logging_steps": 20,
@@ -6327,7 +6677,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3232995623550058e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.03704949520062789,
   "eval_steps": 500,
+  "global_step": 19000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004943110480558603,
       "loss": 19.8528,
       "step": 18000
+    },
+    {
+      "epoch": 0.035138521237648136,
+      "grad_norm": 12.25,
+      "learning_rate": 0.0004943045460249509,
+      "loss": 19.9118,
+      "step": 18020
+    },
+    {
+      "epoch": 0.03517752070628038,
+      "grad_norm": 12.0625,
+      "learning_rate": 0.0004942980439940416,
+      "loss": 19.7998,
+      "step": 18040
+    },
+    {
+      "epoch": 0.03521652017491262,
+      "grad_norm": 10.5625,
+      "learning_rate": 0.0004942915419631321,
+      "loss": 19.8966,
+      "step": 18060
+    },
+    {
+      "epoch": 0.035255519643544855,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004942850399322228,
+      "loss": 19.8631,
+      "step": 18080
+    },
+    {
+      "epoch": 0.0352945191121771,
+      "grad_norm": 10.3125,
+      "learning_rate": 0.0004942785379013135,
+      "loss": 19.8226,
+      "step": 18100
+    },
+    {
+      "epoch": 0.03533351858080934,
+      "grad_norm": 9.9375,
+      "learning_rate": 0.0004942720358704041,
+      "loss": 19.8466,
+      "step": 18120
+    },
+    {
+      "epoch": 0.035372518049441574,
+      "grad_norm": 10.875,
+      "learning_rate": 0.0004942655338394948,
+      "loss": 19.8142,
+      "step": 18140
+    },
+    {
+      "epoch": 0.035411517518073816,
+      "grad_norm": 10.0,
+      "learning_rate": 0.0004942590318085854,
+      "loss": 19.8257,
+      "step": 18160
+    },
+    {
+      "epoch": 0.03545051698670606,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004942525297776761,
+      "loss": 19.7939,
+      "step": 18180
+    },
+    {
+      "epoch": 0.035489516455338294,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004942460277467667,
+      "loss": 19.8764,
+      "step": 18200
+    },
+    {
+      "epoch": 0.035528515923970536,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004942395257158573,
+      "loss": 19.8394,
+      "step": 18220
+    },
+    {
+      "epoch": 0.03556751539260278,
+      "grad_norm": 10.375,
+      "learning_rate": 0.000494233023684948,
+      "loss": 19.7666,
+      "step": 18240
+    },
+    {
+      "epoch": 0.03560651486123501,
+      "grad_norm": 10.625,
+      "learning_rate": 0.0004942265216540386,
+      "loss": 19.8165,
+      "step": 18260
+    },
+    {
+      "epoch": 0.035645514329867255,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.0004942200196231293,
+      "loss": 19.9201,
+      "step": 18280
+    },
+    {
+      "epoch": 0.0356845137984995,
+      "grad_norm": 11.0625,
+      "learning_rate": 0.0004942135175922199,
+      "loss": 19.8705,
+      "step": 18300
+    },
+    {
+      "epoch": 0.03572351326713174,
+      "grad_norm": 10.9375,
+      "learning_rate": 0.0004942070155613106,
+      "loss": 19.7906,
+      "step": 18320
+    },
+    {
+      "epoch": 0.035762512735763974,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004942005135304012,
+      "loss": 19.7983,
+      "step": 18340
+    },
+    {
+      "epoch": 0.035801512204396216,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004941940114994919,
+      "loss": 19.7921,
+      "step": 18360
+    },
+    {
+      "epoch": 0.03584051167302846,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.0004941875094685825,
+      "loss": 19.6906,
+      "step": 18380
+    },
+    {
+      "epoch": 0.03587951114166069,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004941810074376732,
+      "loss": 19.8046,
+      "step": 18400
+    },
+    {
+      "epoch": 0.035918510610292935,
+      "grad_norm": 10.5625,
+      "learning_rate": 0.0004941745054067638,
+      "loss": 19.8274,
+      "step": 18420
+    },
+    {
+      "epoch": 0.03595751007892518,
+      "grad_norm": 11.25,
+      "learning_rate": 0.0004941680033758544,
+      "loss": 19.7977,
+      "step": 18440
+    },
+    {
+      "epoch": 0.03599650954755741,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.0004941615013449451,
+      "loss": 19.7892,
+      "step": 18460
+    },
+    {
+      "epoch": 0.036035509016189654,
+      "grad_norm": 10.625,
+      "learning_rate": 0.0004941549993140357,
+      "loss": 19.6819,
+      "step": 18480
+    },
+    {
+      "epoch": 0.036074508484821896,
+      "grad_norm": 12.3125,
+      "learning_rate": 0.0004941484972831264,
+      "loss": 19.7501,
+      "step": 18500
+    },
+    {
+      "epoch": 0.03611350795345413,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.000494141995252217,
+      "loss": 19.7792,
+      "step": 18520
+    },
+    {
+      "epoch": 0.03615250742208637,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004941354932213077,
+      "loss": 19.7753,
+      "step": 18540
+    },
+    {
+      "epoch": 0.036191506890718615,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0004941289911903983,
+      "loss": 19.8244,
+      "step": 18560
+    },
+    {
+      "epoch": 0.03623050635935086,
+      "grad_norm": 10.75,
+      "learning_rate": 0.000494122489159489,
+      "loss": 19.7036,
+      "step": 18580
+    },
+    {
+      "epoch": 0.03626950582798309,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004941159871285797,
+      "loss": 19.7776,
+      "step": 18600
+    },
+    {
+      "epoch": 0.036308505296615334,
+      "grad_norm": 10.8125,
+      "learning_rate": 0.0004941094850976703,
+      "loss": 19.8757,
+      "step": 18620
+    },
+    {
+      "epoch": 0.036347504765247576,
+      "grad_norm": 9.625,
+      "learning_rate": 0.000494102983066761,
+      "loss": 19.8473,
+      "step": 18640
+    },
+    {
+      "epoch": 0.03638650423387981,
+      "grad_norm": 14.3125,
+      "learning_rate": 0.0004940964810358516,
+      "loss": 19.6959,
+      "step": 18660
+    },
+    {
+      "epoch": 0.03642550370251205,
+      "grad_norm": 12.25,
+      "learning_rate": 0.0004940899790049422,
+      "loss": 19.7052,
+      "step": 18680
+    },
+    {
+      "epoch": 0.036464503171144295,
+      "grad_norm": 10.625,
+      "learning_rate": 0.0004940834769740328,
+      "loss": 19.7221,
+      "step": 18700
+    },
+    {
+      "epoch": 0.03650350263977653,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004940769749431235,
+      "loss": 19.636,
+      "step": 18720
+    },
+    {
+      "epoch": 0.03654250210840877,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.0004940704729122142,
+      "loss": 19.7428,
+      "step": 18740
+    },
+    {
+      "epoch": 0.036581501577041015,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004940639708813048,
+      "loss": 19.7076,
+      "step": 18760
+    },
+    {
+      "epoch": 0.03662050104567325,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004940574688503955,
+      "loss": 19.6721,
+      "step": 18780
+    },
+    {
+      "epoch": 0.03665950051430549,
+      "grad_norm": 12.4375,
+      "learning_rate": 0.0004940509668194861,
+      "loss": 19.7135,
+      "step": 18800
+    },
+    {
+      "epoch": 0.036698499982937734,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004940444647885768,
+      "loss": 19.591,
+      "step": 18820
+    },
+    {
+      "epoch": 0.036737499451569976,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004940379627576673,
+      "loss": 19.595,
+      "step": 18840
+    },
+    {
+      "epoch": 0.03677649892020221,
+      "grad_norm": 10.375,
+      "learning_rate": 0.000494031460726758,
+      "loss": 19.7223,
+      "step": 18860
+    },
+    {
+      "epoch": 0.03681549838883445,
+      "grad_norm": 12.4375,
+      "learning_rate": 0.0004940249586958486,
+      "loss": 19.6684,
+      "step": 18880
+    },
+    {
+      "epoch": 0.036854497857466695,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004940184566649393,
+      "loss": 19.6797,
+      "step": 18900
+    },
+    {
+      "epoch": 0.03689349732609893,
+      "grad_norm": 10.0,
+      "learning_rate": 0.00049401195463403,
+      "loss": 19.712,
+      "step": 18920
+    },
+    {
+      "epoch": 0.03693249679473117,
+      "grad_norm": 10.25,
+      "learning_rate": 0.0004940054526031206,
+      "loss": 19.5788,
+      "step": 18940
+    },
+    {
+      "epoch": 0.036971496263363414,
+      "grad_norm": 11.8125,
+      "learning_rate": 0.0004939989505722113,
+      "loss": 19.6803,
+      "step": 18960
+    },
+    {
+      "epoch": 0.03701049573199565,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004939924485413019,
+      "loss": 19.705,
+      "step": 18980
+    },
+    {
+      "epoch": 0.03704949520062789,
+      "grad_norm": 9.25,
+      "learning_rate": 0.0004939859465103925,
+      "loss": 19.6594,
+      "step": 19000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 1.3968203395446604e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null