Training in progress, step 7500, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a619d173a81b959c06c6819e63784a3964cf704234614a53b41a95f8c4ce423b
 size 487156538

 version https://git-lfs.github.com/spec/v1
+oid sha256:85ef9658a737c10a3387f46921cffde7ab5a025ce78736b7108c955a7faeac8a
 size 487156538

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc07de972c404bd9d03e65d6a0a8bb8a57f33213d57e57b51c19d276698a2990
 size 1059459406

 version https://git-lfs.github.com/spec/v1
+oid sha256:3eb6ae8fa6329d3a9b6361b73791d9116263499067936e1da0e69f22ad24064a
 size 1059459406

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fde34039d7b04934a891fddf8651f7147686cc194dd14ef9c544d9f194e3db54
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea728a187b8c68044774e601a8e864fe3e690cd5a58b87c25fc28b6ccafe83e8
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5d095629c4afecfa399dffed86284dc4231689f617f0e254b3490299c477dd5
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe1466545cc900a21ebf0c37b4514ebc0bde5d4f73811c54ed7c2486869a9cb1
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6404ff16418ff06858ba815c4899c94a4c015e7870eab3f1b01051d9d511b73
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:4318c358ed565a24eac21beca64897e6b4960e90756779dce640fed08ea3eccd
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b933704c82ebaf750aa2519cd157aa39099844e58ed4ac2bed0623c91353a70d
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:a07179b14e6d25f457c0ab61baeffe9e5158660fa3b0e9f67490cfa9f8da1124
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cfcd8a09e8e46c589c8638cc20283a9b31e9d60ec45a6122361751489d45607f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7985ffec4b6d44038e4e914003aaad7f6fef0f867a7890a2fafa6d483c9c9580
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.01364981402128396,
   "eval_steps": 500,
-  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4908,6 +4908,356 @@
       "learning_rate": 0.0004978871650560052,
       "loss": 17.2572,
       "step": 7000
     }
   ],
   "logging_steps": 10,
@@ -4927,7 +5277,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.557487423730588e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.014624800737089957,
   "eval_steps": 500,
+  "global_step": 7500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004978871650560052,
       "loss": 17.2572,
       "step": 7000
+    },
+    {
+      "epoch": 0.01366931375560008,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004978839140405506,
+      "loss": 17.2407,
+      "step": 7010
+    },
+    {
+      "epoch": 0.0136888134899162,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004978806630250959,
+      "loss": 17.2084,
+      "step": 7020
+    },
+    {
+      "epoch": 0.01370831322423232,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004978774120096412,
+      "loss": 17.2807,
+      "step": 7030
+    },
+    {
+      "epoch": 0.01372781295854844,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004978741609941866,
+      "loss": 17.3416,
+      "step": 7040
+    },
+    {
+      "epoch": 0.01374731269286456,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.0004978709099787319,
+      "loss": 17.2057,
+      "step": 7050
+    },
+    {
+      "epoch": 0.01376681242718068,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.0004978676589632772,
+      "loss": 17.309,
+      "step": 7060
+    },
+    {
+      "epoch": 0.013786312161496799,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004978644079478225,
+      "loss": 17.2685,
+      "step": 7070
+    },
+    {
+      "epoch": 0.01380581189581292,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004978611569323679,
+      "loss": 17.2111,
+      "step": 7080
+    },
+    {
+      "epoch": 0.013825311630129039,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004978579059169132,
+      "loss": 17.1583,
+      "step": 7090
+    },
+    {
+      "epoch": 0.01384481136444516,
+      "grad_norm": 7.46875,
+      "learning_rate": 0.0004978546549014585,
+      "loss": 17.275,
+      "step": 7100
+    },
+    {
+      "epoch": 0.01386431109876128,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004978514038860039,
+      "loss": 17.2723,
+      "step": 7110
+    },
+    {
+      "epoch": 0.013883810833077399,
+      "grad_norm": 7.5625,
+      "learning_rate": 0.0004978481528705492,
+      "loss": 17.181,
+      "step": 7120
+    },
+    {
+      "epoch": 0.01390331056739352,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004978449018550945,
+      "loss": 17.2703,
+      "step": 7130
+    },
+    {
+      "epoch": 0.013922810301709639,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004978416508396397,
+      "loss": 17.1655,
+      "step": 7140
+    },
+    {
+      "epoch": 0.01394231003602576,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004978383998241851,
+      "loss": 17.0151,
+      "step": 7150
+    },
+    {
+      "epoch": 0.01396180977034188,
+      "grad_norm": 7.375,
+      "learning_rate": 0.0004978351488087304,
+      "loss": 17.1762,
+      "step": 7160
+    },
+    {
+      "epoch": 0.013981309504657998,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.0004978318977932757,
+      "loss": 17.1404,
+      "step": 7170
+    },
+    {
+      "epoch": 0.01400080923897412,
+      "grad_norm": 7.125,
+      "learning_rate": 0.000497828646777821,
+      "loss": 17.2528,
+      "step": 7180
+    },
+    {
+      "epoch": 0.014020308973290239,
+      "grad_norm": 7.75,
+      "learning_rate": 0.0004978253957623664,
+      "loss": 17.2482,
+      "step": 7190
+    },
+    {
+      "epoch": 0.014039808707606358,
+      "grad_norm": 7.96875,
+      "learning_rate": 0.0004978221447469117,
+      "loss": 17.2275,
+      "step": 7200
+    },
+    {
+      "epoch": 0.014059308441922479,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.000497818893731457,
+      "loss": 17.215,
+      "step": 7210
+    },
+    {
+      "epoch": 0.014078808176238598,
+      "grad_norm": 11.75,
+      "learning_rate": 0.0004978156427160024,
+      "loss": 17.1656,
+      "step": 7220
+    },
+    {
+      "epoch": 0.01409830791055472,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.0004978123917005477,
+      "loss": 17.259,
+      "step": 7230
+    },
+    {
+      "epoch": 0.014117807644870839,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.000497809140685093,
+      "loss": 17.1892,
+      "step": 7240
+    },
+    {
+      "epoch": 0.014137307379186958,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004978058896696383,
+      "loss": 17.158,
+      "step": 7250
+    },
+    {
+      "epoch": 0.014156807113503079,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004978026386541837,
+      "loss": 17.1097,
+      "step": 7260
+    },
+    {
+      "epoch": 0.014176306847819198,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.000497799387638729,
+      "loss": 17.2728,
+      "step": 7270
+    },
+    {
+      "epoch": 0.01419580658213532,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004977961366232743,
+      "loss": 17.218,
+      "step": 7280
+    },
+    {
+      "epoch": 0.014215306316451438,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.0004977928856078195,
+      "loss": 17.2388,
+      "step": 7290
+    },
+    {
+      "epoch": 0.014234806050767558,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004977896345923649,
+      "loss": 17.1839,
+      "step": 7300
+    },
+    {
+      "epoch": 0.014254305785083679,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004977863835769102,
+      "loss": 17.0956,
+      "step": 7310
+    },
+    {
+      "epoch": 0.014273805519399798,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004977831325614555,
+      "loss": 17.2038,
+      "step": 7320
+    },
+    {
+      "epoch": 0.014293305253715917,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004977798815460009,
+      "loss": 17.2453,
+      "step": 7330
+    },
+    {
+      "epoch": 0.014312804988032038,
+      "grad_norm": 6.125,
+      "learning_rate": 0.0004977766305305462,
+      "loss": 17.2322,
+      "step": 7340
+    },
+    {
+      "epoch": 0.014332304722348158,
+      "grad_norm": 7.46875,
+      "learning_rate": 0.0004977733795150915,
+      "loss": 17.1341,
+      "step": 7350
+    },
+    {
+      "epoch": 0.014351804456664279,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.0004977701284996368,
+      "loss": 17.2389,
+      "step": 7360
+    },
+    {
+      "epoch": 0.014371304190980398,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.0004977668774841822,
+      "loss": 17.2391,
+      "step": 7370
+    },
+    {
+      "epoch": 0.014390803925296517,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.0004977636264687275,
+      "loss": 17.1651,
+      "step": 7380
+    },
+    {
+      "epoch": 0.014410303659612638,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004977603754532728,
+      "loss": 17.2448,
+      "step": 7390
+    },
+    {
+      "epoch": 0.014429803393928757,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004977571244378182,
+      "loss": 17.168,
+      "step": 7400
+    },
+    {
+      "epoch": 0.014449303128244878,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004977538734223635,
+      "loss": 17.2225,
+      "step": 7410
+    },
+    {
+      "epoch": 0.014468802862560998,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004977506224069088,
+      "loss": 17.1799,
+      "step": 7420
+    },
+    {
+      "epoch": 0.014488302596877117,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004977473713914541,
+      "loss": 17.1958,
+      "step": 7430
+    },
+    {
+      "epoch": 0.014507802331193238,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004977441203759995,
+      "loss": 17.1678,
+      "step": 7440
+    },
+    {
+      "epoch": 0.014527302065509357,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004977408693605448,
+      "loss": 17.1689,
+      "step": 7450
+    },
+    {
+      "epoch": 0.014546801799825477,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004977376183450901,
+      "loss": 17.1743,
+      "step": 7460
+    },
+    {
+      "epoch": 0.014566301534141598,
+      "grad_norm": 7.125,
+      "learning_rate": 0.0004977343673296354,
+      "loss": 17.2665,
+      "step": 7470
+    },
+    {
+      "epoch": 0.014585801268457717,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004977311163141807,
+      "loss": 17.1666,
+      "step": 7480
+    },
+    {
+      "epoch": 0.014605301002773838,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.000497727865298726,
+      "loss": 17.1159,
+      "step": 7490
+    },
+    {
+      "epoch": 0.014624800737089957,
+      "grad_norm": 7.625,
+      "learning_rate": 0.0004977246142832713,
+      "loss": 17.2182,
+      "step": 7500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.6687308508994994e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null