Training in progress, step 2500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86977870d3df332c5c975d8b4f0d570e4557c1d4fd4364b77a5fac955fe62c58
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb64855cd768e7aa0f6f46d54fdfee34da708569cc56da521bb521f1101f672a
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2be2d6cc85c202403ae7a604b614b11bb028322263b6e955934cf8a2d4ef8092
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b80bf474cf78a89650cb9274c36cb3774d0d787508f786edf03b70cad49b53c
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a141ddada80b12146ad2875b480471ca4604a84a507446df6ce95668765adaf4
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:e85fbbb21d9891a877eaefba7c40e5005f7303b4635375bbc6e0c808069fd11f
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a6e444c46ec49de792e4afbe9af4aa4613bca60425da2b0ac2cae225e516fcc
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c5f5054d1fb89b5c064db193ff9ee8b30b57ffe17a11e00d28cfa91ea00081e
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.33789491468153404,
   "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1447,6 +1447,364 @@
       "eval_samples_per_second": 270.591,
       "eval_steps_per_second": 5.682,
       "step": 2000
     }
   ],
   "logging_steps": 10,
@@ -1466,7 +1824,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.6891364171776e+16,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.42236864335191754,
   "eval_steps": 500,
+  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 270.591,
       "eval_steps_per_second": 5.682,
       "step": 2000
+    },
+    {
+      "epoch": 0.3395843892549417,
+      "grad_norm": 0.9857544302940369,
+      "learning_rate": 0.0002999993805131495,
+      "loss": 5.475189208984375,
+      "step": 2010
+    },
+    {
+      "epoch": 0.3412738638283494,
+      "grad_norm": 0.8797745704650879,
+      "learning_rate": 0.00029999723908369233,
+      "loss": 5.471670150756836,
+      "step": 2020
+    },
+    {
+      "epoch": 0.3429633384017571,
+      "grad_norm": 0.8447170257568359,
+      "learning_rate": 0.0002999935680854744,
+      "loss": 5.436410522460937,
+      "step": 2030
+    },
+    {
+      "epoch": 0.3446528129751647,
+      "grad_norm": 0.87137770652771,
+      "learning_rate": 0.00029998836755593,
+      "loss": 5.40980339050293,
+      "step": 2040
+    },
+    {
+      "epoch": 0.3463422875485724,
+      "grad_norm": 1.1437028646469116,
+      "learning_rate": 0.00029998163754809044,
+      "loss": 5.396385192871094,
+      "step": 2050
+    },
+    {
+      "epoch": 0.34803176212198006,
+      "grad_norm": 0.7306678295135498,
+      "learning_rate": 0.0002999733781305839,
+      "loss": 5.408660888671875,
+      "step": 2060
+    },
+    {
+      "epoch": 0.34972123669538774,
+      "grad_norm": 0.7920585870742798,
+      "learning_rate": 0.00029996358938763406,
+      "loss": 5.393876647949218,
+      "step": 2070
+    },
+    {
+      "epoch": 0.3514107112687954,
+      "grad_norm": 0.8976436257362366,
+      "learning_rate": 0.0002999522714190599,
+      "loss": 5.399116897583008,
+      "step": 2080
+    },
+    {
+      "epoch": 0.3531001858422031,
+      "grad_norm": 0.8801947832107544,
+      "learning_rate": 0.0002999394243402743,
+      "loss": 5.40356330871582,
+      "step": 2090
+    },
+    {
+      "epoch": 0.3547896604156107,
+      "grad_norm": 0.9378096461296082,
+      "learning_rate": 0.00029992504828228283,
+      "loss": 5.37578010559082,
+      "step": 2100
+    },
+    {
+      "epoch": 0.3564791349890184,
+      "grad_norm": 0.760123074054718,
+      "learning_rate": 0.00029990914339168286,
+      "loss": 5.383629989624024,
+      "step": 2110
+    },
+    {
+      "epoch": 0.3581686095624261,
+      "grad_norm": 0.8094545006752014,
+      "learning_rate": 0.00029989170983066126,
+      "loss": 5.359802627563477,
+      "step": 2120
+    },
+    {
+      "epoch": 0.35985808413583376,
+      "grad_norm": 0.9137438535690308,
+      "learning_rate": 0.0002998727477769937,
+      "loss": 5.361555862426758,
+      "step": 2130
+    },
+    {
+      "epoch": 0.36154755870924143,
+      "grad_norm": 0.7989398241043091,
+      "learning_rate": 0.0002998522574240421,
+      "loss": 5.354197692871094,
+      "step": 2140
+    },
+    {
+      "epoch": 0.3632370332826491,
+      "grad_norm": 0.8207266330718994,
+      "learning_rate": 0.00029983023898075305,
+      "loss": 5.331578063964844,
+      "step": 2150
+    },
+    {
+      "epoch": 0.3649265078560568,
+      "grad_norm": 0.9194368124008179,
+      "learning_rate": 0.00029980669267165545,
+      "loss": 5.32526741027832,
+      "step": 2160
+    },
+    {
+      "epoch": 0.3666159824294644,
+      "grad_norm": 0.80011385679245,
+      "learning_rate": 0.0002997816187368584,
+      "loss": 5.341778182983399,
+      "step": 2170
+    },
+    {
+      "epoch": 0.3683054570028721,
+      "grad_norm": 0.7985261678695679,
+      "learning_rate": 0.00029975501743204866,
+      "loss": 5.31537094116211,
+      "step": 2180
+    },
+    {
+      "epoch": 0.3699949315762798,
+      "grad_norm": 0.7046862244606018,
+      "learning_rate": 0.00029972688902848803,
+      "loss": 5.3185478210449215,
+      "step": 2190
+    },
+    {
+      "epoch": 0.37168440614968745,
+      "grad_norm": 0.833369791507721,
+      "learning_rate": 0.0002996972338130106,
+      "loss": 5.297074890136718,
+      "step": 2200
+    },
+    {
+      "epoch": 0.37337388072309513,
+      "grad_norm": 0.9138798117637634,
+      "learning_rate": 0.00029966605208801996,
+      "loss": 5.29405403137207,
+      "step": 2210
+    },
+    {
+      "epoch": 0.3750633552965028,
+      "grad_norm": 0.8588988780975342,
+      "learning_rate": 0.0002996333441714859,
+      "loss": 5.285926437377929,
+      "step": 2220
+    },
+    {
+      "epoch": 0.37675282986991043,
+      "grad_norm": 0.7140660285949707,
+      "learning_rate": 0.00029959911039694127,
+      "loss": 5.305549621582031,
+      "step": 2230
+    },
+    {
+      "epoch": 0.3784423044433181,
+      "grad_norm": 0.7165802717208862,
+      "learning_rate": 0.00029956335111347855,
+      "loss": 5.268837356567383,
+      "step": 2240
+    },
+    {
+      "epoch": 0.3801317790167258,
+      "grad_norm": 0.8172394037246704,
+      "learning_rate": 0.0002995260666857463,
+      "loss": 5.283020782470703,
+      "step": 2250
+    },
+    {
+      "epoch": 0.38182125359013347,
+      "grad_norm": 0.7977796792984009,
+      "learning_rate": 0.00029948725749394563,
+      "loss": 5.262269973754883,
+      "step": 2260
+    },
+    {
+      "epoch": 0.38351072816354115,
+      "grad_norm": 0.7707539200782776,
+      "learning_rate": 0.00029944692393382586,
+      "loss": 5.270823669433594,
+      "step": 2270
+    },
+    {
+      "epoch": 0.3852002027369488,
+      "grad_norm": 0.76548171043396,
+      "learning_rate": 0.000299405066416681,
+      "loss": 5.270006942749023,
+      "step": 2280
+    },
+    {
+      "epoch": 0.3868896773103565,
+      "grad_norm": 0.8181013464927673,
+      "learning_rate": 0.0002993616853693452,
+      "loss": 5.2521240234375,
+      "step": 2290
+    },
+    {
+      "epoch": 0.38857915188376413,
+      "grad_norm": 0.6938267350196838,
+      "learning_rate": 0.0002993167812341886,
+      "loss": 5.245725631713867,
+      "step": 2300
+    },
+    {
+      "epoch": 0.3902686264571718,
+      "grad_norm": 0.7547310590744019,
+      "learning_rate": 0.0002992703544691127,
+      "loss": 5.216205596923828,
+      "step": 2310
+    },
+    {
+      "epoch": 0.3919581010305795,
+      "grad_norm": 0.8312097787857056,
+      "learning_rate": 0.00029922240554754577,
+      "loss": 5.221836090087891,
+      "step": 2320
+    },
+    {
+      "epoch": 0.39364757560398717,
+      "grad_norm": 0.8383576273918152,
+      "learning_rate": 0.00029917293495843793,
+      "loss": 5.21032485961914,
+      "step": 2330
+    },
+    {
+      "epoch": 0.39533705017739484,
+      "grad_norm": 0.6876690983772278,
+      "learning_rate": 0.0002991219432062562,
+      "loss": 5.247097778320312,
+      "step": 2340
+    },
+    {
+      "epoch": 0.3970265247508025,
+      "grad_norm": 0.7683764696121216,
+      "learning_rate": 0.0002990694308109795,
+      "loss": 5.248017883300781,
+      "step": 2350
+    },
+    {
+      "epoch": 0.39871599932421015,
+      "grad_norm": 0.7274552583694458,
+      "learning_rate": 0.0002990153983080932,
+      "loss": 5.210857009887695,
+      "step": 2360
+    },
+    {
+      "epoch": 0.4004054738976178,
+      "grad_norm": 0.7037548422813416,
+      "learning_rate": 0.0002989598462485835,
+      "loss": 5.223086929321289,
+      "step": 2370
+    },
+    {
+      "epoch": 0.4020949484710255,
+      "grad_norm": 0.7380816340446472,
+      "learning_rate": 0.00029890277519893215,
+      "loss": 5.203308486938477,
+      "step": 2380
+    },
+    {
+      "epoch": 0.4037844230444332,
+      "grad_norm": 0.6980042457580566,
+      "learning_rate": 0.0002988441857411106,
+      "loss": 5.187635803222657,
+      "step": 2390
+    },
+    {
+      "epoch": 0.40547389761784086,
+      "grad_norm": 0.7107545137405396,
+      "learning_rate": 0.0002987840784725737,
+      "loss": 5.192476654052735,
+      "step": 2400
+    },
+    {
+      "epoch": 0.40716337219124854,
+      "grad_norm": 0.7168161869049072,
+      "learning_rate": 0.0002987224540062542,
+      "loss": 5.191188812255859,
+      "step": 2410
+    },
+    {
+      "epoch": 0.4088528467646562,
+      "grad_norm": 0.8272731900215149,
+      "learning_rate": 0.00029865931297055605,
+      "loss": 5.169822692871094,
+      "step": 2420
+    },
+    {
+      "epoch": 0.41054232133806384,
+      "grad_norm": 0.8275768160820007,
+      "learning_rate": 0.00029859465600934814,
+      "loss": 5.191945266723633,
+      "step": 2430
+    },
+    {
+      "epoch": 0.4122317959114715,
+      "grad_norm": 0.7465378642082214,
+      "learning_rate": 0.0002985284837819577,
+      "loss": 5.1748401641845705,
+      "step": 2440
+    },
+    {
+      "epoch": 0.4139212704848792,
+      "grad_norm": 0.7874022126197815,
+      "learning_rate": 0.0002984607969631636,
+      "loss": 5.167286682128906,
+      "step": 2450
+    },
+    {
+      "epoch": 0.4156107450582869,
+      "grad_norm": 0.7480391263961792,
+      "learning_rate": 0.00029839159624318954,
+      "loss": 5.167938232421875,
+      "step": 2460
+    },
+    {
+      "epoch": 0.41730021963169456,
+      "grad_norm": 0.7812421917915344,
+      "learning_rate": 0.00029832088232769694,
+      "loss": 5.159024810791015,
+      "step": 2470
+    },
+    {
+      "epoch": 0.41898969420510224,
+      "grad_norm": 0.7070655226707458,
+      "learning_rate": 0.0002982486559377776,
+      "loss": 5.166957092285156,
+      "step": 2480
+    },
+    {
+      "epoch": 0.42067916877850986,
+      "grad_norm": 0.6980714797973633,
+      "learning_rate": 0.0002981749178099467,
+      "loss": 5.127694702148437,
+      "step": 2490
+    },
+    {
+      "epoch": 0.42236864335191754,
+      "grad_norm": 0.661147952079773,
+      "learning_rate": 0.000298099668696135,
+      "loss": 5.172074890136718,
+      "step": 2500
+    },
+    {
+      "epoch": 0.42236864335191754,
+      "eval_loss": 5.1130194664001465,
+      "eval_runtime": 3.6713,
+      "eval_samples_per_second": 272.384,
+      "eval_steps_per_second": 5.72,
+      "step": 2500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 8.361420521472e+16,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null