Training in progress, step 10670, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +504 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb9f3b8a1efbafead71f0b4f3c0934de3258af1a9bfad9aecc866f7dc032377f
 size 738367848

 version https://git-lfs.github.com/spec/v1
+oid sha256:3745bf4a87f162fa13cf355e199ab846b0247a904a1213366e15055a6bf2f43a
 size 738367848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:090bb424ceafc5d15809a08a16a11eb78b1ac54f4b1366d08eb4391ce4040896
 size 1476823354

 version https://git-lfs.github.com/spec/v1
+oid sha256:7d1ed18d6fe47a085d91a34b52f0b0e9d63181b84ee4c405b93d5c8e09294fad
 size 1476823354

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c383f9e8151a96a9b2b8c275978c19aa387d72a92b0fa7ffae9836fb29ad4e1
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e73f3d2830bcf37e3d736f23b27e8ce733b3473d4cab28360690d45ce9f8fbaa
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c68bb140caa20e97fbacbd7b5bfac9f50a34da20ffb8898607809de5338939b7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:0272fe6cd9dc84504ed48d7b132ccc945c2d44d9831efb836fdb17160ecec1c1
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84b2b03148e735c06e96e9718897d377bb259c4fc8d0d7eac4359e0df9fd59c3
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b990e0a8c37f6931ac353c659c83107fdc7ef191bb09facaeb9644d6874f096
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb82703e57f841f914dfb29dc3442d88d2c174cf8ce56f91f0c9c5f2849c5754
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c914e450f9b754db2d81a37a948ec5a8925105ae3c9bf9546862109977afdeb
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58d69503f206ccaaa32432817fe07a0b2fe6f226f63d9d38c4bb47f2804049c1
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d26231e7958cdd63301effbce7fbbecf14c30b13c5cf7b6c8e00c1f8efd5317
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e0fdfa3710c3ff050391030ae78220221b31a31e2ceea64687f7a428110d141
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0d7d94f216995135aa7d9b310b13d4e41fe010a347e4abe474e09fb0b16e836
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a7b71b6ee08059a6838f5a634279837dec0a6f331500089354f5a30e88cd0b0
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c3ac9632d99d55f1b6c9a1a327344e69abc94e4998731b976e138a7dfbb679e
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b12210119d32a8e51c4b367e898fd3bee7de6dc9d70f1258ce5df806569ea8a4
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:f742e8fccfd64912166b4c2a12a9757a7223df0c62ca19cb8c5ef3c4dd1595a8
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cc7c9d1278bb28457a3d5ee38d783399f75651e4b34a536f0367d5f3082f3b4
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:a60d80d2f480f1553b178ffde691b3d8251b3da0f0ce4460a958add33beb93ab
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 9.372071227741332,
   "eval_steps": 250,
-  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7647,6 +7647,507 @@
       "eval_spearman_manhattan": 0.7421316928799319,
       "eval_steps_per_second": 7.269,
       "step": 10000
     }
   ],
   "logging_steps": 10,
@@ -7661,7 +8162,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 10.0,
   "eval_steps": 250,
+  "global_step": 10670,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_spearman_manhattan": 0.7421316928799319,
       "eval_steps_per_second": 7.269,
       "step": 10000
+    },
+    {
+      "epoch": 9.381443298969073,
+      "grad_norm": 0.4994644522666931,
+      "learning_rate": 9.926708547496669e-06,
+      "loss": 0.0226,
+      "step": 10010
+    },
+    {
+      "epoch": 9.390815370196814,
+      "grad_norm": 1.5270389318466187,
+      "learning_rate": 9.926635329262401e-06,
+      "loss": 0.0211,
+      "step": 10020
+    },
+    {
+      "epoch": 9.400187441424555,
+      "grad_norm": 0.47197312116622925,
+      "learning_rate": 9.92656211102813e-06,
+      "loss": 0.0235,
+      "step": 10030
+    },
+    {
+      "epoch": 9.409559512652296,
+      "grad_norm": 1.132454752922058,
+      "learning_rate": 9.926488892793863e-06,
+      "loss": 0.023,
+      "step": 10040
+    },
+    {
+      "epoch": 9.418931583880038,
+      "grad_norm": 0.7693812251091003,
+      "learning_rate": 9.926415674559593e-06,
+      "loss": 0.0247,
+      "step": 10050
+    },
+    {
+      "epoch": 9.42830365510778,
+      "grad_norm": 0.42411306500434875,
+      "learning_rate": 9.926342456325324e-06,
+      "loss": 0.0234,
+      "step": 10060
+    },
+    {
+      "epoch": 9.43767572633552,
+      "grad_norm": 0.9110538959503174,
+      "learning_rate": 9.926269238091055e-06,
+      "loss": 0.0256,
+      "step": 10070
+    },
+    {
+      "epoch": 9.447047797563261,
+      "grad_norm": 0.6932746171951294,
+      "learning_rate": 9.926196019856786e-06,
+      "loss": 0.0288,
+      "step": 10080
+    },
+    {
+      "epoch": 9.456419868791002,
+      "grad_norm": 0.6196317076683044,
+      "learning_rate": 9.926122801622516e-06,
+      "loss": 0.0239,
+      "step": 10090
+    },
+    {
+      "epoch": 9.465791940018745,
+      "grad_norm": 0.6985231637954712,
+      "learning_rate": 9.926049583388247e-06,
+      "loss": 0.0194,
+      "step": 10100
+    },
+    {
+      "epoch": 9.475164011246486,
+      "grad_norm": 0.8828220963478088,
+      "learning_rate": 9.925976365153978e-06,
+      "loss": 0.0282,
+      "step": 10110
+    },
+    {
+      "epoch": 9.484536082474227,
+      "grad_norm": 0.3887142241001129,
+      "learning_rate": 9.92590314691971e-06,
+      "loss": 0.0231,
+      "step": 10120
+    },
+    {
+      "epoch": 9.493908153701968,
+      "grad_norm": 0.696250855922699,
+      "learning_rate": 9.925829928685441e-06,
+      "loss": 0.0241,
+      "step": 10130
+    },
+    {
+      "epoch": 9.503280224929709,
+      "grad_norm": 0.9591291546821594,
+      "learning_rate": 9.925756710451172e-06,
+      "loss": 0.0237,
+      "step": 10140
+    },
+    {
+      "epoch": 9.512652296157452,
+      "grad_norm": 0.6247865557670593,
+      "learning_rate": 9.925683492216903e-06,
+      "loss": 0.0225,
+      "step": 10150
+    },
+    {
+      "epoch": 9.522024367385193,
+      "grad_norm": 0.8061539530754089,
+      "learning_rate": 9.925610273982633e-06,
+      "loss": 0.0248,
+      "step": 10160
+    },
+    {
+      "epoch": 9.531396438612934,
+      "grad_norm": 0.5681460499763489,
+      "learning_rate": 9.925537055748364e-06,
+      "loss": 0.0216,
+      "step": 10170
+    },
+    {
+      "epoch": 9.540768509840674,
+      "grad_norm": 0.7798430323600769,
+      "learning_rate": 9.925463837514095e-06,
+      "loss": 0.0205,
+      "step": 10180
+    },
+    {
+      "epoch": 9.550140581068415,
+      "grad_norm": 0.633307695388794,
+      "learning_rate": 9.925390619279827e-06,
+      "loss": 0.0257,
+      "step": 10190
+    },
+    {
+      "epoch": 9.559512652296158,
+      "grad_norm": 0.5352799892425537,
+      "learning_rate": 9.925317401045558e-06,
+      "loss": 0.0214,
+      "step": 10200
+    },
+    {
+      "epoch": 9.5688847235239,
+      "grad_norm": 1.4367021322250366,
+      "learning_rate": 9.925244182811287e-06,
+      "loss": 0.0245,
+      "step": 10210
+    },
+    {
+      "epoch": 9.57825679475164,
+      "grad_norm": 0.6616729497909546,
+      "learning_rate": 9.92517096457702e-06,
+      "loss": 0.0168,
+      "step": 10220
+    },
+    {
+      "epoch": 9.587628865979381,
+      "grad_norm": 0.5232043862342834,
+      "learning_rate": 9.92509774634275e-06,
+      "loss": 0.0229,
+      "step": 10230
+    },
+    {
+      "epoch": 9.597000937207122,
+      "grad_norm": 0.5471720099449158,
+      "learning_rate": 9.925024528108481e-06,
+      "loss": 0.0244,
+      "step": 10240
+    },
+    {
+      "epoch": 9.606373008434865,
+      "grad_norm": 0.8130425214767456,
+      "learning_rate": 9.924951309874212e-06,
+      "loss": 0.0243,
+      "step": 10250
+    },
+    {
+      "epoch": 9.606373008434865,
+      "eval_loss": 0.037354420870542526,
+      "eval_pearson_cosine": 0.7731273770332336,
+      "eval_pearson_dot": 0.7302557826042175,
+      "eval_pearson_euclidean": 0.7300422191619873,
+      "eval_pearson_manhattan": 0.7321226596832275,
+      "eval_runtime": 25.5048,
+      "eval_samples_per_second": 58.813,
+      "eval_spearman_cosine": 0.7727287355752905,
+      "eval_spearman_dot": 0.7305929253470385,
+      "eval_spearman_euclidean": 0.7346168467659768,
+      "eval_spearman_manhattan": 0.7364009847987945,
+      "eval_steps_per_second": 7.371,
+      "step": 10250
+    },
+    {
+      "epoch": 9.615745079662606,
+      "grad_norm": 0.497060626745224,
+      "learning_rate": 9.924878091639943e-06,
+      "loss": 0.0217,
+      "step": 10260
+    },
+    {
+      "epoch": 9.625117150890347,
+      "grad_norm": 0.985636830329895,
+      "learning_rate": 9.924804873405673e-06,
+      "loss": 0.0238,
+      "step": 10270
+    },
+    {
+      "epoch": 9.634489222118088,
+      "grad_norm": 0.8833957314491272,
+      "learning_rate": 9.924731655171404e-06,
+      "loss": 0.0215,
+      "step": 10280
+    },
+    {
+      "epoch": 9.643861293345829,
+      "grad_norm": 0.7223436832427979,
+      "learning_rate": 9.924658436937137e-06,
+      "loss": 0.0257,
+      "step": 10290
+    },
+    {
+      "epoch": 9.653233364573572,
+      "grad_norm": 1.0917994976043701,
+      "learning_rate": 9.924585218702867e-06,
+      "loss": 0.0272,
+      "step": 10300
+    },
+    {
+      "epoch": 9.662605435801312,
+      "grad_norm": 0.79998779296875,
+      "learning_rate": 9.924512000468598e-06,
+      "loss": 0.0232,
+      "step": 10310
+    },
+    {
+      "epoch": 9.671977507029053,
+      "grad_norm": 0.9708638191223145,
+      "learning_rate": 9.924438782234329e-06,
+      "loss": 0.0214,
+      "step": 10320
+    },
+    {
+      "epoch": 9.681349578256794,
+      "grad_norm": 0.5575175881385803,
+      "learning_rate": 9.92436556400006e-06,
+      "loss": 0.0256,
+      "step": 10330
+    },
+    {
+      "epoch": 9.690721649484535,
+      "grad_norm": 1.2645318508148193,
+      "learning_rate": 9.92429234576579e-06,
+      "loss": 0.0276,
+      "step": 10340
+    },
+    {
+      "epoch": 9.700093720712278,
+      "grad_norm": 0.6546396017074585,
+      "learning_rate": 9.924219127531521e-06,
+      "loss": 0.024,
+      "step": 10350
+    },
+    {
+      "epoch": 9.70946579194002,
+      "grad_norm": 0.8439049124717712,
+      "learning_rate": 9.924145909297252e-06,
+      "loss": 0.0259,
+      "step": 10360
+    },
+    {
+      "epoch": 9.71883786316776,
+      "grad_norm": 0.9637166261672974,
+      "learning_rate": 9.924072691062984e-06,
+      "loss": 0.0225,
+      "step": 10370
+    },
+    {
+      "epoch": 9.728209934395501,
+      "grad_norm": 0.6104253530502319,
+      "learning_rate": 9.923999472828713e-06,
+      "loss": 0.0254,
+      "step": 10380
+    },
+    {
+      "epoch": 9.737582005623242,
+      "grad_norm": 0.5664217472076416,
+      "learning_rate": 9.923926254594444e-06,
+      "loss": 0.0192,
+      "step": 10390
+    },
+    {
+      "epoch": 9.746954076850985,
+      "grad_norm": 0.6904122233390808,
+      "learning_rate": 9.923853036360176e-06,
+      "loss": 0.0213,
+      "step": 10400
+    },
+    {
+      "epoch": 9.756326148078726,
+      "grad_norm": 1.0864416360855103,
+      "learning_rate": 9.923779818125907e-06,
+      "loss": 0.0254,
+      "step": 10410
+    },
+    {
+      "epoch": 9.765698219306467,
+      "grad_norm": 0.791348397731781,
+      "learning_rate": 9.923706599891638e-06,
+      "loss": 0.0264,
+      "step": 10420
+    },
+    {
+      "epoch": 9.775070290534208,
+      "grad_norm": 0.7972745895385742,
+      "learning_rate": 9.923633381657369e-06,
+      "loss": 0.0206,
+      "step": 10430
+    },
+    {
+      "epoch": 9.784442361761949,
+      "grad_norm": 0.6930385231971741,
+      "learning_rate": 9.9235601634231e-06,
+      "loss": 0.0283,
+      "step": 10440
+    },
+    {
+      "epoch": 9.793814432989691,
+      "grad_norm": 0.5096721053123474,
+      "learning_rate": 9.92348694518883e-06,
+      "loss": 0.0263,
+      "step": 10450
+    },
+    {
+      "epoch": 9.803186504217432,
+      "grad_norm": 0.7492228150367737,
+      "learning_rate": 9.923413726954561e-06,
+      "loss": 0.0237,
+      "step": 10460
+    },
+    {
+      "epoch": 9.812558575445173,
+      "grad_norm": 0.8097043037414551,
+      "learning_rate": 9.923340508720293e-06,
+      "loss": 0.0225,
+      "step": 10470
+    },
+    {
+      "epoch": 9.821930646672914,
+      "grad_norm": 0.45464569330215454,
+      "learning_rate": 9.923267290486024e-06,
+      "loss": 0.0175,
+      "step": 10480
+    },
+    {
+      "epoch": 9.831302717900655,
+      "grad_norm": 0.6172147393226624,
+      "learning_rate": 9.923194072251753e-06,
+      "loss": 0.0272,
+      "step": 10490
+    },
+    {
+      "epoch": 9.840674789128398,
+      "grad_norm": 0.9826374650001526,
+      "learning_rate": 9.923120854017486e-06,
+      "loss": 0.0233,
+      "step": 10500
+    },
+    {
+      "epoch": 9.840674789128398,
+      "eval_loss": 0.03700366988778114,
+      "eval_pearson_cosine": 0.7760223746299744,
+      "eval_pearson_dot": 0.7342942953109741,
+      "eval_pearson_euclidean": 0.7316151857376099,
+      "eval_pearson_manhattan": 0.7336723804473877,
+      "eval_runtime": 22.135,
+      "eval_samples_per_second": 67.766,
+      "eval_spearman_cosine": 0.7753394120917871,
+      "eval_spearman_dot": 0.7356003834746606,
+      "eval_spearman_euclidean": 0.7371167930939387,
+      "eval_spearman_manhattan": 0.7388623589601665,
+      "eval_steps_per_second": 8.493,
+      "step": 10500
+    },
+    {
+      "epoch": 9.850046860356139,
+      "grad_norm": 0.5944278240203857,
+      "learning_rate": 9.923047635783216e-06,
+      "loss": 0.0245,
+      "step": 10510
+    },
+    {
+      "epoch": 9.85941893158388,
+      "grad_norm": 0.4207167625427246,
+      "learning_rate": 9.922974417548947e-06,
+      "loss": 0.0236,
+      "step": 10520
+    },
+    {
+      "epoch": 9.868791002811621,
+      "grad_norm": 1.185616374015808,
+      "learning_rate": 9.922901199314678e-06,
+      "loss": 0.025,
+      "step": 10530
+    },
+    {
+      "epoch": 9.878163074039362,
+      "grad_norm": 0.6041834354400635,
+      "learning_rate": 9.92282798108041e-06,
+      "loss": 0.0229,
+      "step": 10540
+    },
+    {
+      "epoch": 9.887535145267105,
+      "grad_norm": 1.3135936260223389,
+      "learning_rate": 9.92275476284614e-06,
+      "loss": 0.022,
+      "step": 10550
+    },
+    {
+      "epoch": 9.896907216494846,
+      "grad_norm": 0.7592184543609619,
+      "learning_rate": 9.92268154461187e-06,
+      "loss": 0.0251,
+      "step": 10560
+    },
+    {
+      "epoch": 9.906279287722587,
+      "grad_norm": 0.5679847002029419,
+      "learning_rate": 9.922608326377603e-06,
+      "loss": 0.0218,
+      "step": 10570
+    },
+    {
+      "epoch": 9.915651358950328,
+      "grad_norm": 1.1727142333984375,
+      "learning_rate": 9.922535108143333e-06,
+      "loss": 0.0266,
+      "step": 10580
+    },
+    {
+      "epoch": 9.925023430178069,
+      "grad_norm": 1.2769267559051514,
+      "learning_rate": 9.922461889909064e-06,
+      "loss": 0.0237,
+      "step": 10590
+    },
+    {
+      "epoch": 9.934395501405811,
+      "grad_norm": 0.6604001522064209,
+      "learning_rate": 9.922388671674795e-06,
+      "loss": 0.0206,
+      "step": 10600
+    },
+    {
+      "epoch": 9.943767572633552,
+      "grad_norm": 0.8065370321273804,
+      "learning_rate": 9.922315453440526e-06,
+      "loss": 0.0272,
+      "step": 10610
+    },
+    {
+      "epoch": 9.953139643861293,
+      "grad_norm": 1.0085433721542358,
+      "learning_rate": 9.922242235206256e-06,
+      "loss": 0.019,
+      "step": 10620
+    },
+    {
+      "epoch": 9.962511715089034,
+      "grad_norm": 0.9662045240402222,
+      "learning_rate": 9.922169016971987e-06,
+      "loss": 0.0218,
+      "step": 10630
+    },
+    {
+      "epoch": 9.971883786316775,
+      "grad_norm": 0.49303632974624634,
+      "learning_rate": 9.922095798737718e-06,
+      "loss": 0.0223,
+      "step": 10640
+    },
+    {
+      "epoch": 9.981255857544518,
+      "grad_norm": 0.7215604186058044,
+      "learning_rate": 9.92202258050345e-06,
+      "loss": 0.0259,
+      "step": 10650
+    },
+    {
+      "epoch": 9.990627928772259,
+      "grad_norm": 0.6104753017425537,
+      "learning_rate": 9.92194936226918e-06,
+      "loss": 0.0232,
+      "step": 10660
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 1.011549949645996,
+      "learning_rate": 9.92187614403491e-06,
+      "loss": 0.0234,
+      "step": 10670
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }