Training in progress, step 7000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:539ec21ed2f2d5401b90d0d0b28a43621343b47ec158a5dc912ef7d73a069cdf
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:34c8f104effe1a88e833bb692c7b75c569bc83b156fc0482dcf0ed735fda2945
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c631e1446372f309276121049f5c8b7603bed555765afc41b0a5db7f194949eb
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:26c334859cc6eb4b1ef4006976a7f325a89208371148b26da8caf2a6573930ff
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59ab6babcc58d5a8a0338e2999283607960e6faa29d71e8d0c3f11e2480b272d
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2f6f65c0c5e2316b09e8cb46abab96e8f2ae754bdffd662e804a33277263cd9
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad5a3c7ee6384cdea60f7a41957135fc1d6a8e0bdd3b9a0dd5c4c46f69d638ec
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c87a18ccc821b756f8fecf0a1e33873b3617702f02d6f52c0042644b36bee0d
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0981584727149856,
   "eval_steps": 500,
-  "global_step": 6500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4669,6 +4669,364 @@
       "eval_samples_per_second": 248.518,
       "eval_steps_per_second": 5.219,
       "step": 6500
     }
   ],
   "logging_steps": 10,
@@ -4688,7 +5046,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1739484320314163e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.1826322013853692,
   "eval_steps": 500,
+  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 248.518,
       "eval_steps_per_second": 5.219,
       "step": 6500
+    },
+    {
+      "epoch": 1.0998479472883933,
+      "grad_norm": 0.4943171739578247,
+      "learning_rate": 0.0001695828863325459,
+      "loss": 4.467470932006836,
+      "step": 6510
+    },
+    {
+      "epoch": 1.101537421861801,
+      "grad_norm": 0.474933385848999,
+      "learning_rate": 0.00016910788818067434,
+      "loss": 4.4371185302734375,
+      "step": 6520
+    },
+    {
+      "epoch": 1.1032268964352085,
+      "grad_norm": 0.5118041634559631,
+      "learning_rate": 0.0001686326951796907,
+      "loss": 4.451096725463867,
+      "step": 6530
+    },
+    {
+      "epoch": 1.1049163710086163,
+      "grad_norm": 0.5289651155471802,
+      "learning_rate": 0.00016815731217528667,
+      "loss": 4.448075485229492,
+      "step": 6540
+    },
+    {
+      "epoch": 1.106605845582024,
+      "grad_norm": 0.5182890295982361,
+      "learning_rate": 0.00016768174401509143,
+      "loss": 4.467396926879883,
+      "step": 6550
+    },
+    {
+      "epoch": 1.1082953201554318,
+      "grad_norm": 0.5209820866584778,
+      "learning_rate": 0.0001672059955486223,
+      "loss": 4.459186172485351,
+      "step": 6560
+    },
+    {
+      "epoch": 1.1099847947288393,
+      "grad_norm": 0.48584309220314026,
+      "learning_rate": 0.000166730071627235,
+      "loss": 4.46546516418457,
+      "step": 6570
+    },
+    {
+      "epoch": 1.111674269302247,
+      "grad_norm": 0.5017306804656982,
+      "learning_rate": 0.00016625397710407487,
+      "loss": 4.452592086791992,
+      "step": 6580
+    },
+    {
+      "epoch": 1.1133637438756547,
+      "grad_norm": 0.46485376358032227,
+      "learning_rate": 0.00016577771683402647,
+      "loss": 4.46324348449707,
+      "step": 6590
+    },
+    {
+      "epoch": 1.1150532184490622,
+      "grad_norm": 0.5154596567153931,
+      "learning_rate": 0.00016530129567366483,
+      "loss": 4.457768249511719,
+      "step": 6600
+    },
+    {
+      "epoch": 1.11674269302247,
+      "grad_norm": 0.49490463733673096,
+      "learning_rate": 0.0001648247184812054,
+      "loss": 4.427638244628906,
+      "step": 6610
+    },
+    {
+      "epoch": 1.1184321675958777,
+      "grad_norm": 0.4721022844314575,
+      "learning_rate": 0.00016434799011645507,
+      "loss": 4.4389793395996096,
+      "step": 6620
+    },
+    {
+      "epoch": 1.1201216421692854,
+      "grad_norm": 0.4648183286190033,
+      "learning_rate": 0.00016387111544076193,
+      "loss": 4.460124969482422,
+      "step": 6630
+    },
+    {
+      "epoch": 1.121811116742693,
+      "grad_norm": 0.5035665035247803,
+      "learning_rate": 0.00016339409931696625,
+      "loss": 4.439287185668945,
+      "step": 6640
+    },
+    {
+      "epoch": 1.1235005913161007,
+      "grad_norm": 0.4910880923271179,
+      "learning_rate": 0.00016291694660935065,
+      "loss": 4.456634140014648,
+      "step": 6650
+    },
+    {
+      "epoch": 1.1251900658895084,
+      "grad_norm": 0.48906245827674866,
+      "learning_rate": 0.00016243966218359047,
+      "loss": 4.428804016113281,
+      "step": 6660
+    },
+    {
+      "epoch": 1.126879540462916,
+      "grad_norm": 0.5756556391716003,
+      "learning_rate": 0.00016196225090670435,
+      "loss": 4.411157608032227,
+      "step": 6670
+    },
+    {
+      "epoch": 1.1285690150363237,
+      "grad_norm": 0.49011167883872986,
+      "learning_rate": 0.0001614847176470043,
+      "loss": 4.435109329223633,
+      "step": 6680
+    },
+    {
+      "epoch": 1.1302584896097314,
+      "grad_norm": 0.4775542616844177,
+      "learning_rate": 0.00016100706727404645,
+      "loss": 4.428675842285156,
+      "step": 6690
+    },
+    {
+      "epoch": 1.131947964183139,
+      "grad_norm": 0.5201391577720642,
+      "learning_rate": 0.00016052930465858094,
+      "loss": 4.4389808654785154,
+      "step": 6700
+    },
+    {
+      "epoch": 1.1336374387565467,
+      "grad_norm": 0.49004724621772766,
+      "learning_rate": 0.00016005143467250267,
+      "loss": 4.459021377563476,
+      "step": 6710
+    },
+    {
+      "epoch": 1.1353269133299544,
+      "grad_norm": 0.49011871218681335,
+      "learning_rate": 0.00015957346218880124,
+      "loss": 4.455972290039062,
+      "step": 6720
+    },
+    {
+      "epoch": 1.1370163879033621,
+      "grad_norm": 0.5173168182373047,
+      "learning_rate": 0.0001590953920815117,
+      "loss": 4.443459701538086,
+      "step": 6730
+    },
+    {
+      "epoch": 1.1387058624767696,
+      "grad_norm": 0.47700756788253784,
+      "learning_rate": 0.00015861722922566436,
+      "loss": 4.435110473632813,
+      "step": 6740
+    },
+    {
+      "epoch": 1.1403953370501774,
+      "grad_norm": 0.5626063942909241,
+      "learning_rate": 0.00015813897849723544,
+      "loss": 4.432453536987305,
+      "step": 6750
+    },
+    {
+      "epoch": 1.142084811623585,
+      "grad_norm": 0.49542316794395447,
+      "learning_rate": 0.0001576606447730972,
+      "loss": 4.4374950408935545,
+      "step": 6760
+    },
+    {
+      "epoch": 1.1437742861969928,
+      "grad_norm": 0.5116281509399414,
+      "learning_rate": 0.0001571822329309682,
+      "loss": 4.423119354248047,
+      "step": 6770
+    },
+    {
+      "epoch": 1.1454637607704004,
+      "grad_norm": 0.4868847131729126,
+      "learning_rate": 0.00015670374784936371,
+      "loss": 4.4402107238769535,
+      "step": 6780
+    },
+    {
+      "epoch": 1.147153235343808,
+      "grad_norm": 0.4938635230064392,
+      "learning_rate": 0.00015622519440754566,
+      "loss": 4.424631881713867,
+      "step": 6790
+    },
+    {
+      "epoch": 1.1488427099172158,
+      "grad_norm": 0.5740174651145935,
+      "learning_rate": 0.0001557465774854732,
+      "loss": 4.450838470458985,
+      "step": 6800
+    },
+    {
+      "epoch": 1.1505321844906233,
+      "grad_norm": 0.4828670918941498,
+      "learning_rate": 0.0001552679019637528,
+      "loss": 4.438276290893555,
+      "step": 6810
+    },
+    {
+      "epoch": 1.152221659064031,
+      "grad_norm": 0.4659689664840698,
+      "learning_rate": 0.00015478917272358848,
+      "loss": 4.426282501220703,
+      "step": 6820
+    },
+    {
+      "epoch": 1.1539111336374388,
+      "grad_norm": 0.4927656352519989,
+      "learning_rate": 0.000154310394646732,
+      "loss": 4.464373016357422,
+      "step": 6830
+    },
+    {
+      "epoch": 1.1556006082108463,
+      "grad_norm": 0.5161291360855103,
+      "learning_rate": 0.00015383157261543318,
+      "loss": 4.416297531127929,
+      "step": 6840
+    },
+    {
+      "epoch": 1.157290082784254,
+      "grad_norm": 0.4933563768863678,
+      "learning_rate": 0.00015335271151239,
+      "loss": 4.420982742309571,
+      "step": 6850
+    },
+    {
+      "epoch": 1.1589795573576618,
+      "grad_norm": 0.4847005307674408,
+      "learning_rate": 0.00015287381622069892,
+      "loss": 4.416022872924804,
+      "step": 6860
+    },
+    {
+      "epoch": 1.1606690319310695,
+      "grad_norm": 0.4981960654258728,
+      "learning_rate": 0.00015239489162380504,
+      "loss": 4.422767639160156,
+      "step": 6870
+    },
+    {
+      "epoch": 1.162358506504477,
+      "grad_norm": 0.5001937747001648,
+      "learning_rate": 0.0001519159426054522,
+      "loss": 4.4368339538574215,
+      "step": 6880
+    },
+    {
+      "epoch": 1.1640479810778848,
+      "grad_norm": 0.5044972896575928,
+      "learning_rate": 0.0001514369740496334,
+      "loss": 4.411078643798828,
+      "step": 6890
+    },
+    {
+      "epoch": 1.1657374556512925,
+      "grad_norm": 0.4734691083431244,
+      "learning_rate": 0.00015095799084054073,
+      "loss": 4.438079071044922,
+      "step": 6900
+    },
+    {
+      "epoch": 1.1674269302247002,
+      "grad_norm": 0.49377161264419556,
+      "learning_rate": 0.00015047899786251587,
+      "loss": 4.442370986938476,
+      "step": 6910
+    },
+    {
+      "epoch": 1.1691164047981077,
+      "grad_norm": 0.5010132193565369,
+      "learning_rate": 0.00015,
+      "loss": 4.442108917236328,
+      "step": 6920
+    },
+    {
+      "epoch": 1.1708058793715155,
+      "grad_norm": 0.5035766959190369,
+      "learning_rate": 0.0001495210021374841,
+      "loss": 4.430604553222656,
+      "step": 6930
+    },
+    {
+      "epoch": 1.1724953539449232,
+      "grad_norm": 0.4899141788482666,
+      "learning_rate": 0.00014904200915945927,
+      "loss": 4.435578918457031,
+      "step": 6940
+    },
+    {
+      "epoch": 1.1741848285183307,
+      "grad_norm": 0.4718686044216156,
+      "learning_rate": 0.00014856302595036663,
+      "loss": 4.429093551635742,
+      "step": 6950
+    },
+    {
+      "epoch": 1.1758743030917385,
+      "grad_norm": 0.4881162941455841,
+      "learning_rate": 0.00014808405739454776,
+      "loss": 4.408749008178711,
+      "step": 6960
+    },
+    {
+      "epoch": 1.1775637776651462,
+      "grad_norm": 0.46740713715553284,
+      "learning_rate": 0.00014760510837619493,
+      "loss": 4.419464492797852,
+      "step": 6970
+    },
+    {
+      "epoch": 1.1792532522385537,
+      "grad_norm": 0.4737609922885895,
+      "learning_rate": 0.00014712618377930105,
+      "loss": 4.421468353271484,
+      "step": 6980
+    },
+    {
+      "epoch": 1.1809427268119614,
+      "grad_norm": 0.4975055754184723,
+      "learning_rate": 0.00014664728848760996,
+      "loss": 4.422280502319336,
+      "step": 6990
+    },
+    {
+      "epoch": 1.1826322013853692,
+      "grad_norm": 0.4839191734790802,
+      "learning_rate": 0.00014616842738456682,
+      "loss": 4.395424652099609,
+      "step": 7000
+    },
+    {
+      "epoch": 1.1826322013853692,
+      "eval_loss": 4.412718772888184,
+      "eval_runtime": 4.0717,
+      "eval_samples_per_second": 245.6,
+      "eval_steps_per_second": 5.158,
+      "step": 7000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.3411768424608563e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null