Training in progress, step 7500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c76cf6da384756da592c5c50d0169c8d71834422387d445a1df34766e643d9bb
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c46250bd52dcffd2137953f30321a4ed3d622b1bca6be15bc5f8f084e4fc31f
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68bf75b9ec958e92f211abfe579212c04190172b49bdfbcf136eea9aae980133
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:71e649d70bbaea3f3c60f2aa0818a879521dffa0038d58ed1695489f8bca966b
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:012c45d165b3369856a4591817420a71a07d3d2cd37f890b655313517015a2fd
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:7105421fba4235e8fc90f3dbc4569b85e884f75c3232217a25f8f5042cf8247a
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c87a18ccc821b756f8fecf0a1e33873b3617702f02d6f52c0042644b36bee0d
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1a5b64fb90c999b23793906d64020914f128f72d1523c4f0f8e8ea53ab2425c
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.1826322013853692,
   "eval_steps": 500,
-  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5027,6 +5027,364 @@
       "eval_samples_per_second": 274.439,
       "eval_steps_per_second": 5.763,
       "step": 7000
     }
   ],
   "logging_steps": 10,
@@ -5046,7 +5404,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.3411768424608563e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.2671059300557528,
   "eval_steps": 500,
+  "global_step": 7500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 274.439,
       "eval_steps_per_second": 5.763,
       "step": 7000
+    },
+    {
+      "epoch": 1.184321675958777,
+      "grad_norm": 0.4751238524913788,
+      "learning_rate": 0.000145689605353268,
+      "loss": 4.423115158081055,
+      "step": 7010
+    },
+    {
+      "epoch": 1.1860111505321844,
+      "grad_norm": 0.5007295608520508,
+      "learning_rate": 0.00014521082727641152,
+      "loss": 4.394451522827149,
+      "step": 7020
+    },
+    {
+      "epoch": 1.1877006251055922,
+      "grad_norm": 0.48377716541290283,
+      "learning_rate": 0.0001447320980362472,
+      "loss": 4.4401298522949215,
+      "step": 7030
+    },
+    {
+      "epoch": 1.189390099679,
+      "grad_norm": 0.5163672566413879,
+      "learning_rate": 0.00014425342251452679,
+      "loss": 4.39234504699707,
+      "step": 7040
+    },
+    {
+      "epoch": 1.1910795742524076,
+      "grad_norm": 0.4772108793258667,
+      "learning_rate": 0.00014377480559245434,
+      "loss": 4.4317176818847654,
+      "step": 7050
+    },
+    {
+      "epoch": 1.1927690488258151,
+      "grad_norm": 0.5122771263122559,
+      "learning_rate": 0.00014329625215063629,
+      "loss": 4.436024856567383,
+      "step": 7060
+    },
+    {
+      "epoch": 1.1944585233992229,
+      "grad_norm": 0.4967247545719147,
+      "learning_rate": 0.00014281776706903177,
+      "loss": 4.399545288085937,
+      "step": 7070
+    },
+    {
+      "epoch": 1.1961479979726306,
+      "grad_norm": 0.5011969208717346,
+      "learning_rate": 0.0001423393552269028,
+      "loss": 4.415679550170898,
+      "step": 7080
+    },
+    {
+      "epoch": 1.1978374725460381,
+      "grad_norm": 0.5074766874313354,
+      "learning_rate": 0.00014186102150276454,
+      "loss": 4.4365581512451175,
+      "step": 7090
+    },
+    {
+      "epoch": 1.1995269471194459,
+      "grad_norm": 0.48712876439094543,
+      "learning_rate": 0.00014138277077433567,
+      "loss": 4.423890686035156,
+      "step": 7100
+    },
+    {
+      "epoch": 1.2012164216928536,
+      "grad_norm": 0.5180738568305969,
+      "learning_rate": 0.00014090460791848827,
+      "loss": 4.416677474975586,
+      "step": 7110
+    },
+    {
+      "epoch": 1.202905896266261,
+      "grad_norm": 0.45985448360443115,
+      "learning_rate": 0.00014042653781119868,
+      "loss": 4.396572494506836,
+      "step": 7120
+    },
+    {
+      "epoch": 1.2045953708396688,
+      "grad_norm": 0.4647870361804962,
+      "learning_rate": 0.0001399485653274973,
+      "loss": 4.408332824707031,
+      "step": 7130
+    },
+    {
+      "epoch": 1.2062848454130766,
+      "grad_norm": 0.47191551327705383,
+      "learning_rate": 0.00013947069534141904,
+      "loss": 4.4233543395996096,
+      "step": 7140
+    },
+    {
+      "epoch": 1.207974319986484,
+      "grad_norm": 0.481503427028656,
+      "learning_rate": 0.00013899293272595355,
+      "loss": 4.440040969848633,
+      "step": 7150
+    },
+    {
+      "epoch": 1.2096637945598918,
+      "grad_norm": 0.47193533182144165,
+      "learning_rate": 0.0001385152823529957,
+      "loss": 4.411207962036133,
+      "step": 7160
+    },
+    {
+      "epoch": 1.2113532691332995,
+      "grad_norm": 0.4883231520652771,
+      "learning_rate": 0.00013803774909329567,
+      "loss": 4.404615020751953,
+      "step": 7170
+    },
+    {
+      "epoch": 1.2130427437067073,
+      "grad_norm": 0.47556227445602417,
+      "learning_rate": 0.0001375603378164095,
+      "loss": 4.381325149536133,
+      "step": 7180
+    },
+    {
+      "epoch": 1.2147322182801148,
+      "grad_norm": 0.47802919149398804,
+      "learning_rate": 0.00013708305339064933,
+      "loss": 4.414505386352539,
+      "step": 7190
+    },
+    {
+      "epoch": 1.2164216928535225,
+      "grad_norm": 0.4807351529598236,
+      "learning_rate": 0.00013660590068303373,
+      "loss": 4.441463470458984,
+      "step": 7200
+    },
+    {
+      "epoch": 1.2181111674269303,
+      "grad_norm": 0.5178191065788269,
+      "learning_rate": 0.00013612888455923804,
+      "loss": 4.416116714477539,
+      "step": 7210
+    },
+    {
+      "epoch": 1.219800642000338,
+      "grad_norm": 0.48551276326179504,
+      "learning_rate": 0.0001356520098835449,
+      "loss": 4.4417884826660154,
+      "step": 7220
+    },
+    {
+      "epoch": 1.2214901165737455,
+      "grad_norm": 0.4903479218482971,
+      "learning_rate": 0.00013517528151879457,
+      "loss": 4.411055374145508,
+      "step": 7230
+    },
+    {
+      "epoch": 1.2231795911471532,
+      "grad_norm": 0.475599080324173,
+      "learning_rate": 0.0001346987043263352,
+      "loss": 4.41358642578125,
+      "step": 7240
+    },
+    {
+      "epoch": 1.224869065720561,
+      "grad_norm": 0.5013647079467773,
+      "learning_rate": 0.00013422228316597356,
+      "loss": 4.4313819885253904,
+      "step": 7250
+    },
+    {
+      "epoch": 1.2265585402939685,
+      "grad_norm": 0.4796617031097412,
+      "learning_rate": 0.00013374602289592508,
+      "loss": 4.431335830688477,
+      "step": 7260
+    },
+    {
+      "epoch": 1.2282480148673762,
+      "grad_norm": 0.5207120180130005,
+      "learning_rate": 0.00013326992837276494,
+      "loss": 4.4034477233886715,
+      "step": 7270
+    },
+    {
+      "epoch": 1.229937489440784,
+      "grad_norm": 0.48880499601364136,
+      "learning_rate": 0.0001327940044513777,
+      "loss": 4.412507629394531,
+      "step": 7280
+    },
+    {
+      "epoch": 1.2316269640141915,
+      "grad_norm": 0.4934345483779907,
+      "learning_rate": 0.00013231825598490854,
+      "loss": 4.4090087890625,
+      "step": 7290
+    },
+    {
+      "epoch": 1.2333164385875992,
+      "grad_norm": 0.4781767427921295,
+      "learning_rate": 0.0001318426878247133,
+      "loss": 4.418943786621094,
+      "step": 7300
+    },
+    {
+      "epoch": 1.235005913161007,
+      "grad_norm": 0.5096566081047058,
+      "learning_rate": 0.00013136730482030928,
+      "loss": 4.422417449951172,
+      "step": 7310
+    },
+    {
+      "epoch": 1.2366953877344147,
+      "grad_norm": 0.4950323700904846,
+      "learning_rate": 0.0001308921118193257,
+      "loss": 4.429974746704102,
+      "step": 7320
+    },
+    {
+      "epoch": 1.2383848623078222,
+      "grad_norm": 0.513830304145813,
+      "learning_rate": 0.00013041711366745408,
+      "loss": 4.422944641113281,
+      "step": 7330
+    },
+    {
+      "epoch": 1.24007433688123,
+      "grad_norm": 0.48782795667648315,
+      "learning_rate": 0.00012994231520839934,
+      "loss": 4.427762222290039,
+      "step": 7340
+    },
+    {
+      "epoch": 1.2417638114546377,
+      "grad_norm": 0.4940618872642517,
+      "learning_rate": 0.0001294677212838297,
+      "loss": 4.397305297851562,
+      "step": 7350
+    },
+    {
+      "epoch": 1.2434532860280454,
+      "grad_norm": 0.47247400879859924,
+      "learning_rate": 0.00012899333673332795,
+      "loss": 4.438782501220703,
+      "step": 7360
+    },
+    {
+      "epoch": 1.245142760601453,
+      "grad_norm": 0.508912205696106,
+      "learning_rate": 0.00012851916639434164,
+      "loss": 4.381727600097657,
+      "step": 7370
+    },
+    {
+      "epoch": 1.2468322351748606,
+      "grad_norm": 0.47959408164024353,
+      "learning_rate": 0.00012804521510213407,
+      "loss": 4.4016876220703125,
+      "step": 7380
+    },
+    {
+      "epoch": 1.2485217097482684,
+      "grad_norm": 0.4996829330921173,
+      "learning_rate": 0.00012757148768973483,
+      "loss": 4.403145217895508,
+      "step": 7390
+    },
+    {
+      "epoch": 1.2502111843216759,
+      "grad_norm": 0.4780764877796173,
+      "learning_rate": 0.00012709798898789042,
+      "loss": 4.438173294067383,
+      "step": 7400
+    },
+    {
+      "epoch": 1.2519006588950836,
+      "grad_norm": 0.4783228039741516,
+      "learning_rate": 0.00012662472382501524,
+      "loss": 4.408795928955078,
+      "step": 7410
+    },
+    {
+      "epoch": 1.2535901334684914,
+      "grad_norm": 0.5030134320259094,
+      "learning_rate": 0.0001261516970271422,
+      "loss": 4.420645523071289,
+      "step": 7420
+    },
+    {
+      "epoch": 1.2552796080418989,
+      "grad_norm": 0.4808700978755951,
+      "learning_rate": 0.0001256789134178735,
+      "loss": 4.436854553222656,
+      "step": 7430
+    },
+    {
+      "epoch": 1.2569690826153066,
+      "grad_norm": 0.493521511554718,
+      "learning_rate": 0.00012520637781833144,
+      "loss": 4.406418991088867,
+      "step": 7440
+    },
+    {
+      "epoch": 1.2586585571887143,
+      "grad_norm": 0.4652424454689026,
+      "learning_rate": 0.0001247340950471094,
+      "loss": 4.417532348632813,
+      "step": 7450
+    },
+    {
+      "epoch": 1.2603480317621218,
+      "grad_norm": 0.5171105265617371,
+      "learning_rate": 0.0001242620699202224,
+      "loss": 4.399831390380859,
+      "step": 7460
+    },
+    {
+      "epoch": 1.2620375063355296,
+      "grad_norm": 0.49303922057151794,
+      "learning_rate": 0.00012379030725105837,
+      "loss": 4.401795959472656,
+      "step": 7470
+    },
+    {
+      "epoch": 1.2637269809089373,
+      "grad_norm": 0.501957893371582,
+      "learning_rate": 0.00012331881185032872,
+      "loss": 4.388158798217773,
+      "step": 7480
+    },
+    {
+      "epoch": 1.265416455482345,
+      "grad_norm": 0.5417965650558472,
+      "learning_rate": 0.00012284758852601962,
+      "loss": 4.410275650024414,
+      "step": 7490
+    },
+    {
+      "epoch": 1.2671059300557528,
+      "grad_norm": 0.5049046277999878,
+      "learning_rate": 0.00012237664208334263,
+      "loss": 4.402982330322265,
+      "step": 7500
+    },
+    {
+      "epoch": 1.2671059300557528,
+      "eval_loss": 4.3625264167785645,
+      "eval_runtime": 3.7028,
+      "eval_samples_per_second": 270.066,
+      "eval_steps_per_second": 5.671,
+      "step": 7500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.5084052528902963e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null