Training in progress, step 7500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34c8f104effe1a88e833bb692c7b75c569bc83b156fc0482dcf0ed735fda2945
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:152c34cc1ef8eea86d84f7b0351d9f983b40e24507e8054571349aacd4aba343
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26c334859cc6eb4b1ef4006976a7f325a89208371148b26da8caf2a6573930ff
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:8012a529a81b3f92efa4c79d19d5460d546f7ff16907210ecdb6456891de9745
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2f6f65c0c5e2316b09e8cb46abab96e8f2ae754bdffd662e804a33277263cd9
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:b50695bbf99bef39c4d13662a35b1f845a2b2c6b19490939ad9cc39127e32ab1
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c87a18ccc821b756f8fecf0a1e33873b3617702f02d6f52c0042644b36bee0d
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1a5b64fb90c999b23793906d64020914f128f72d1523c4f0f8e8ea53ab2425c
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.1826322013853692,
   "eval_steps": 500,
-  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5027,6 +5027,364 @@
       "eval_samples_per_second": 245.6,
       "eval_steps_per_second": 5.158,
       "step": 7000
     }
   ],
   "logging_steps": 10,
@@ -5046,7 +5404,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.3411768424608563e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.2671059300557528,
   "eval_steps": 500,
+  "global_step": 7500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 245.6,
       "eval_steps_per_second": 5.158,
       "step": 7000
+    },
+    {
+      "epoch": 1.184321675958777,
+      "grad_norm": 0.49278581142425537,
+      "learning_rate": 0.000145689605353268,
+      "loss": 4.424203109741211,
+      "step": 7010
+    },
+    {
+      "epoch": 1.1860111505321844,
+      "grad_norm": 0.49644234776496887,
+      "learning_rate": 0.00014521082727641152,
+      "loss": 4.395336151123047,
+      "step": 7020
+    },
+    {
+      "epoch": 1.1877006251055922,
+      "grad_norm": 0.483456552028656,
+      "learning_rate": 0.0001447320980362472,
+      "loss": 4.440347671508789,
+      "step": 7030
+    },
+    {
+      "epoch": 1.189390099679,
+      "grad_norm": 0.5150992274284363,
+      "learning_rate": 0.00014425342251452679,
+      "loss": 4.393960571289062,
+      "step": 7040
+    },
+    {
+      "epoch": 1.1910795742524076,
+      "grad_norm": 0.47316014766693115,
+      "learning_rate": 0.00014377480559245434,
+      "loss": 4.433261108398438,
+      "step": 7050
+    },
+    {
+      "epoch": 1.1927690488258151,
+      "grad_norm": 0.5043189525604248,
+      "learning_rate": 0.00014329625215063629,
+      "loss": 4.437650680541992,
+      "step": 7060
+    },
+    {
+      "epoch": 1.1944585233992229,
+      "grad_norm": 0.49998390674591064,
+      "learning_rate": 0.00014281776706903177,
+      "loss": 4.40019416809082,
+      "step": 7070
+    },
+    {
+      "epoch": 1.1961479979726306,
+      "grad_norm": 0.5133141279220581,
+      "learning_rate": 0.0001423393552269028,
+      "loss": 4.417116928100586,
+      "step": 7080
+    },
+    {
+      "epoch": 1.1978374725460381,
+      "grad_norm": 0.513031005859375,
+      "learning_rate": 0.00014186102150276454,
+      "loss": 4.438409805297852,
+      "step": 7090
+    },
+    {
+      "epoch": 1.1995269471194459,
+      "grad_norm": 0.4915519058704376,
+      "learning_rate": 0.00014138277077433567,
+      "loss": 4.4253074645996096,
+      "step": 7100
+    },
+    {
+      "epoch": 1.2012164216928536,
+      "grad_norm": 0.5202800035476685,
+      "learning_rate": 0.00014090460791848827,
+      "loss": 4.41809310913086,
+      "step": 7110
+    },
+    {
+      "epoch": 1.202905896266261,
+      "grad_norm": 0.49077826738357544,
+      "learning_rate": 0.00014042653781119868,
+      "loss": 4.397499465942383,
+      "step": 7120
+    },
+    {
+      "epoch": 1.2045953708396688,
+      "grad_norm": 0.4648706912994385,
+      "learning_rate": 0.0001399485653274973,
+      "loss": 4.408271026611328,
+      "step": 7130
+    },
+    {
+      "epoch": 1.2062848454130766,
+      "grad_norm": 0.4614482820034027,
+      "learning_rate": 0.00013947069534141904,
+      "loss": 4.425214004516602,
+      "step": 7140
+    },
+    {
+      "epoch": 1.207974319986484,
+      "grad_norm": 0.4744400084018707,
+      "learning_rate": 0.00013899293272595355,
+      "loss": 4.440077590942383,
+      "step": 7150
+    },
+    {
+      "epoch": 1.2096637945598918,
+      "grad_norm": 0.46556323766708374,
+      "learning_rate": 0.0001385152823529957,
+      "loss": 4.412957382202149,
+      "step": 7160
+    },
+    {
+      "epoch": 1.2113532691332995,
+      "grad_norm": 0.4939349591732025,
+      "learning_rate": 0.00013803774909329567,
+      "loss": 4.405846023559571,
+      "step": 7170
+    },
+    {
+      "epoch": 1.2130427437067073,
+      "grad_norm": 0.47055721282958984,
+      "learning_rate": 0.0001375603378164095,
+      "loss": 4.382000350952149,
+      "step": 7180
+    },
+    {
+      "epoch": 1.2147322182801148,
+      "grad_norm": 0.47987523674964905,
+      "learning_rate": 0.00013708305339064933,
+      "loss": 4.415153121948242,
+      "step": 7190
+    },
+    {
+      "epoch": 1.2164216928535225,
+      "grad_norm": 0.4784037470817566,
+      "learning_rate": 0.00013660590068303373,
+      "loss": 4.4415229797363285,
+      "step": 7200
+    },
+    {
+      "epoch": 1.2181111674269303,
+      "grad_norm": 0.500056803226471,
+      "learning_rate": 0.00013612888455923804,
+      "loss": 4.416479873657226,
+      "step": 7210
+    },
+    {
+      "epoch": 1.219800642000338,
+      "grad_norm": 0.4778987169265747,
+      "learning_rate": 0.0001356520098835449,
+      "loss": 4.442354583740235,
+      "step": 7220
+    },
+    {
+      "epoch": 1.2214901165737455,
+      "grad_norm": 0.5005702376365662,
+      "learning_rate": 0.00013517528151879457,
+      "loss": 4.411639404296875,
+      "step": 7230
+    },
+    {
+      "epoch": 1.2231795911471532,
+      "grad_norm": 0.4689568281173706,
+      "learning_rate": 0.0001346987043263352,
+      "loss": 4.414199447631836,
+      "step": 7240
+    },
+    {
+      "epoch": 1.224869065720561,
+      "grad_norm": 0.4993502199649811,
+      "learning_rate": 0.00013422228316597356,
+      "loss": 4.432155609130859,
+      "step": 7250
+    },
+    {
+      "epoch": 1.2265585402939685,
+      "grad_norm": 0.4782608151435852,
+      "learning_rate": 0.00013374602289592508,
+      "loss": 4.431560897827149,
+      "step": 7260
+    },
+    {
+      "epoch": 1.2282480148673762,
+      "grad_norm": 0.5125144124031067,
+      "learning_rate": 0.00013326992837276494,
+      "loss": 4.405394744873047,
+      "step": 7270
+    },
+    {
+      "epoch": 1.229937489440784,
+      "grad_norm": 0.48408523201942444,
+      "learning_rate": 0.0001327940044513777,
+      "loss": 4.4137004852294925,
+      "step": 7280
+    },
+    {
+      "epoch": 1.2316269640141915,
+      "grad_norm": 0.4888753294944763,
+      "learning_rate": 0.00013231825598490854,
+      "loss": 4.409386062622071,
+      "step": 7290
+    },
+    {
+      "epoch": 1.2333164385875992,
+      "grad_norm": 0.47923538088798523,
+      "learning_rate": 0.0001318426878247133,
+      "loss": 4.4191631317138675,
+      "step": 7300
+    },
+    {
+      "epoch": 1.235005913161007,
+      "grad_norm": 0.46775540709495544,
+      "learning_rate": 0.00013136730482030928,
+      "loss": 4.423541259765625,
+      "step": 7310
+    },
+    {
+      "epoch": 1.2366953877344147,
+      "grad_norm": 0.48620909452438354,
+      "learning_rate": 0.0001308921118193257,
+      "loss": 4.431262969970703,
+      "step": 7320
+    },
+    {
+      "epoch": 1.2383848623078222,
+      "grad_norm": 0.5028111338615417,
+      "learning_rate": 0.00013041711366745408,
+      "loss": 4.423612976074219,
+      "step": 7330
+    },
+    {
+      "epoch": 1.24007433688123,
+      "grad_norm": 0.4982888996601105,
+      "learning_rate": 0.00012994231520839934,
+      "loss": 4.428596878051758,
+      "step": 7340
+    },
+    {
+      "epoch": 1.2417638114546377,
+      "grad_norm": 0.5141102075576782,
+      "learning_rate": 0.0001294677212838297,
+      "loss": 4.398578262329101,
+      "step": 7350
+    },
+    {
+      "epoch": 1.2434532860280454,
+      "grad_norm": 0.48103561997413635,
+      "learning_rate": 0.00012899333673332795,
+      "loss": 4.439675140380859,
+      "step": 7360
+    },
+    {
+      "epoch": 1.245142760601453,
+      "grad_norm": 0.5084096789360046,
+      "learning_rate": 0.00012851916639434164,
+      "loss": 4.3824302673339846,
+      "step": 7370
+    },
+    {
+      "epoch": 1.2468322351748606,
+      "grad_norm": 0.4776511788368225,
+      "learning_rate": 0.00012804521510213407,
+      "loss": 4.402749633789062,
+      "step": 7380
+    },
+    {
+      "epoch": 1.2485217097482684,
+      "grad_norm": 0.499318391084671,
+      "learning_rate": 0.00012757148768973483,
+      "loss": 4.405498886108399,
+      "step": 7390
+    },
+    {
+      "epoch": 1.2502111843216759,
+      "grad_norm": 0.4898117184638977,
+      "learning_rate": 0.00012709798898789042,
+      "loss": 4.4396411895751955,
+      "step": 7400
+    },
+    {
+      "epoch": 1.2519006588950836,
+      "grad_norm": 0.4777224361896515,
+      "learning_rate": 0.00012662472382501524,
+      "loss": 4.409711074829102,
+      "step": 7410
+    },
+    {
+      "epoch": 1.2535901334684914,
+      "grad_norm": 0.48530757427215576,
+      "learning_rate": 0.0001261516970271422,
+      "loss": 4.4214935302734375,
+      "step": 7420
+    },
+    {
+      "epoch": 1.2552796080418989,
+      "grad_norm": 0.48434415459632874,
+      "learning_rate": 0.0001256789134178735,
+      "loss": 4.438081741333008,
+      "step": 7430
+    },
+    {
+      "epoch": 1.2569690826153066,
+      "grad_norm": 0.4974631071090698,
+      "learning_rate": 0.00012520637781833144,
+      "loss": 4.407797622680664,
+      "step": 7440
+    },
+    {
+      "epoch": 1.2586585571887143,
+      "grad_norm": 0.4732743799686432,
+      "learning_rate": 0.0001247340950471094,
+      "loss": 4.418028259277344,
+      "step": 7450
+    },
+    {
+      "epoch": 1.2603480317621218,
+      "grad_norm": 0.5003547072410583,
+      "learning_rate": 0.0001242620699202224,
+      "loss": 4.400883483886719,
+      "step": 7460
+    },
+    {
+      "epoch": 1.2620375063355296,
+      "grad_norm": 0.49987900257110596,
+      "learning_rate": 0.00012379030725105837,
+      "loss": 4.402442169189453,
+      "step": 7470
+    },
+    {
+      "epoch": 1.2637269809089373,
+      "grad_norm": 0.49416637420654297,
+      "learning_rate": 0.00012331881185032872,
+      "loss": 4.388990020751953,
+      "step": 7480
+    },
+    {
+      "epoch": 1.265416455482345,
+      "grad_norm": 0.5343226194381714,
+      "learning_rate": 0.00012284758852601962,
+      "loss": 4.411848449707032,
+      "step": 7490
+    },
+    {
+      "epoch": 1.2671059300557528,
+      "grad_norm": 0.5128340125083923,
+      "learning_rate": 0.00012237664208334263,
+      "loss": 4.403173446655273,
+      "step": 7500
+    },
+    {
+      "epoch": 1.2671059300557528,
+      "eval_loss": 4.382744789123535,
+      "eval_runtime": 3.7472,
+      "eval_samples_per_second": 266.869,
+      "eval_steps_per_second": 5.604,
+      "step": 7500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.5084052528902963e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null