Training in progress, step 200, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/config.json +1 -1
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +2 -2
last-checkpoint/scheduler.pt +2 -2
last-checkpoint/tokenizer.json +1 -1
last-checkpoint/trainer_state.json +21 -469
last-checkpoint/training_args.bin +2 -2

last-checkpoint/config.json CHANGED Viewed

@@ -18,7 +18,7 @@
   "position_embedding_type": "absolute",
   "tokenizer_class": "XLMRobertaTokenizer",
   "torch_dtype": "float32",
-  "transformers_version": "4.52.4",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 250037

   "position_embedding_type": "absolute",
   "tokenizer_class": "XLMRobertaTokenizer",
   "torch_dtype": "float32",
+  "transformers_version": "4.55.2",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 250037

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48393895744f0281dcc35ce037f939cbf0fcb9343f0e49a9b5a5800b72bf3aec
 size 471641972

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b047a0e786eb1d688011b6ef8ace7e38cf8666e860bf3a1d20a6db01c28377e
 size 471641972

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5927d7bb5b738cb043c868545306c06435eb54da996000898577927f1d970803
-size 943405434

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b5493e50fc6dd02e0330bd689be29ebdf17ddd1e1450ea8d752f0381e29d578
+size 943408715

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81805f5d9be60e795c66f48836696944f5979ee35820fd897c90082596563348
-size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb53e956d37160bedc46d0adb3a1f88aaddc065e5d5a137bf434121ef2313991
+size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7fa458e54f1f79beba7dd8cf425c5f35ff1c5b914a484c940ab3f4fb17abf3b
-size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f8d82f2b63956f7a657676c6c14dd16c65381bf4daf3a0e2823620944d3d859
+size 1465

last-checkpoint/tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20bb9e7979b312d46ef417756bf9907e1943529b027576f3a9458ded2caffcb3
 size 17082898

 version https://git-lfs.github.com/spec/v1
+oid sha256:6247be075973430c70e5980bd5af624466882ca896e8994078f6707661e890ca
 size 17082898

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,492 +2,44 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.08563368930082611,
   "eval_steps": 1000,
-  "global_step": 3400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.0012593189603062663,
-      "grad_norm": 7.101168155670166,
-      "learning_rate": 4.993829337094499e-05,
-      "loss": 8.8771,
       "step": 50
     },
     {
-      "epoch": 0.0025186379206125326,
-      "grad_norm": 9.807716369628906,
-      "learning_rate": 4.987532742292968e-05,
-      "loss": 7.4158,
       "step": 100
     },
     {
-      "epoch": 0.003777956880918799,
-      "grad_norm": 9.843791961669922,
-      "learning_rate": 4.981236147491437e-05,
-      "loss": 6.9685,
       "step": 150
     },
     {
-      "epoch": 0.005037275841225065,
-      "grad_norm": 8.359281539916992,
-      "learning_rate": 4.9749395526899055e-05,
-      "loss": 6.5252,
       "step": 200
-    },
-    {
-      "epoch": 0.006296594801531332,
-      "grad_norm": 8.851263046264648,
-      "learning_rate": 4.968642957888374e-05,
-      "loss": 6.1479,
-      "step": 250
-    },
-    {
-      "epoch": 0.007555913761837598,
-      "grad_norm": 9.075284957885742,
-      "learning_rate": 4.962346363086843e-05,
-      "loss": 6.074,
-      "step": 300
-    },
-    {
-      "epoch": 0.008815232722143865,
-      "grad_norm": 9.121552467346191,
-      "learning_rate": 4.956049768285311e-05,
-      "loss": 5.8874,
-      "step": 350
-    },
-    {
-      "epoch": 0.01007455168245013,
-      "grad_norm": 9.67652416229248,
-      "learning_rate": 4.9497531734837806e-05,
-      "loss": 5.7364,
-      "step": 400
-    },
-    {
-      "epoch": 0.011333870642756398,
-      "grad_norm": 8.584100723266602,
-      "learning_rate": 4.943456578682249e-05,
-      "loss": 5.6092,
-      "step": 450
-    },
-    {
-      "epoch": 0.012593189603062663,
-      "grad_norm": 9.832197189331055,
-      "learning_rate": 4.937159983880718e-05,
-      "loss": 5.5687,
-      "step": 500
-    },
-    {
-      "epoch": 0.01385250856336893,
-      "grad_norm": 10.394134521484375,
-      "learning_rate": 4.930863389079186e-05,
-      "loss": 5.3992,
-      "step": 550
-    },
-    {
-      "epoch": 0.015111827523675197,
-      "grad_norm": 8.916543960571289,
-      "learning_rate": 4.924566794277655e-05,
-      "loss": 5.386,
-      "step": 600
-    },
-    {
-      "epoch": 0.016371146483981462,
-      "grad_norm": 9.233148574829102,
-      "learning_rate": 4.9182701994761235e-05,
-      "loss": 5.3173,
-      "step": 650
-    },
-    {
-      "epoch": 0.01763046544428773,
-      "grad_norm": 9.256299018859863,
-      "learning_rate": 4.911973604674592e-05,
-      "loss": 5.2057,
-      "step": 700
-    },
-    {
-      "epoch": 0.018889784404593997,
-      "grad_norm": 8.488754272460938,
-      "learning_rate": 4.9056770098730606e-05,
-      "loss": 5.0931,
-      "step": 750
-    },
-    {
-      "epoch": 0.02014910336490026,
-      "grad_norm": 10.237517356872559,
-      "learning_rate": 4.899380415071529e-05,
-      "loss": 5.0613,
-      "step": 800
-    },
-    {
-      "epoch": 0.021408422325206528,
-      "grad_norm": 9.483453750610352,
-      "learning_rate": 4.893083820269998e-05,
-      "loss": 5.0304,
-      "step": 850
-    },
-    {
-      "epoch": 0.022667741285512796,
-      "grad_norm": 8.838459014892578,
-      "learning_rate": 4.886787225468467e-05,
-      "loss": 4.9694,
-      "step": 900
-    },
-    {
-      "epoch": 0.02392706024581906,
-      "grad_norm": 8.221150398254395,
-      "learning_rate": 4.880490630666936e-05,
-      "loss": 4.9632,
-      "step": 950
-    },
-    {
-      "epoch": 0.025186379206125327,
-      "grad_norm": 10.936125755310059,
-      "learning_rate": 4.874194035865404e-05,
-      "loss": 4.8524,
-      "step": 1000
-    },
-    {
-      "epoch": 0.026445698166431594,
-      "grad_norm": 9.897261619567871,
-      "learning_rate": 4.867897441063873e-05,
-      "loss": 4.8278,
-      "step": 1050
-    },
-    {
-      "epoch": 0.02770501712673786,
-      "grad_norm": 10.429959297180176,
-      "learning_rate": 4.8616008462623414e-05,
-      "loss": 4.7999,
-      "step": 1100
-    },
-    {
-      "epoch": 0.028964336087044126,
-      "grad_norm": 9.011711120605469,
-      "learning_rate": 4.855304251460811e-05,
-      "loss": 4.7221,
-      "step": 1150
-    },
-    {
-      "epoch": 0.030223655047350393,
-      "grad_norm": 8.87881088256836,
-      "learning_rate": 4.849007656659279e-05,
-      "loss": 4.7191,
-      "step": 1200
-    },
-    {
-      "epoch": 0.03148297400765666,
-      "grad_norm": 9.68855094909668,
-      "learning_rate": 4.842711061857748e-05,
-      "loss": 4.6946,
-      "step": 1250
-    },
-    {
-      "epoch": 0.032742292967962924,
-      "grad_norm": 11.467870712280273,
-      "learning_rate": 4.8364144670562164e-05,
-      "loss": 4.6298,
-      "step": 1300
-    },
-    {
-      "epoch": 0.03400161192826919,
-      "grad_norm": 10.919625282287598,
-      "learning_rate": 4.830117872254685e-05,
-      "loss": 4.6974,
-      "step": 1350
-    },
-    {
-      "epoch": 0.03526093088857546,
-      "grad_norm": 10.439410209655762,
-      "learning_rate": 4.8238212774531536e-05,
-      "loss": 4.5912,
-      "step": 1400
-    },
-    {
-      "epoch": 0.036520249848881726,
-      "grad_norm": 9.361510276794434,
-      "learning_rate": 4.817524682651622e-05,
-      "loss": 4.5724,
-      "step": 1450
-    },
-    {
-      "epoch": 0.037779568809187994,
-      "grad_norm": 11.82630443572998,
-      "learning_rate": 4.811228087850091e-05,
-      "loss": 4.4983,
-      "step": 1500
-    },
-    {
-      "epoch": 0.039038887769494254,
-      "grad_norm": 9.679414749145508,
-      "learning_rate": 4.8049314930485594e-05,
-      "loss": 4.5179,
-      "step": 1550
-    },
-    {
-      "epoch": 0.04029820672980052,
-      "grad_norm": 9.39099407196045,
-      "learning_rate": 4.798634898247028e-05,
-      "loss": 4.4314,
-      "step": 1600
-    },
-    {
-      "epoch": 0.04155752569010679,
-      "grad_norm": 9.744073867797852,
-      "learning_rate": 4.792338303445497e-05,
-      "loss": 4.5181,
-      "step": 1650
-    },
-    {
-      "epoch": 0.042816844650413056,
-      "grad_norm": 9.849005699157715,
-      "learning_rate": 4.786041708643966e-05,
-      "loss": 4.3855,
-      "step": 1700
-    },
-    {
-      "epoch": 0.044076163610719324,
-      "grad_norm": 9.082205772399902,
-      "learning_rate": 4.7797451138424344e-05,
-      "loss": 4.4138,
-      "step": 1750
-    },
-    {
-      "epoch": 0.04533548257102559,
-      "grad_norm": 8.309534072875977,
-      "learning_rate": 4.773448519040903e-05,
-      "loss": 4.4054,
-      "step": 1800
-    },
-    {
-      "epoch": 0.04659480153133186,
-      "grad_norm": 8.576716423034668,
-      "learning_rate": 4.7671519242393715e-05,
-      "loss": 4.3585,
-      "step": 1850
-    },
-    {
-      "epoch": 0.04785412049163812,
-      "grad_norm": 9.32400131225586,
-      "learning_rate": 4.760855329437841e-05,
-      "loss": 4.3509,
-      "step": 1900
-    },
-    {
-      "epoch": 0.049113439451944386,
-      "grad_norm": 8.500265121459961,
-      "learning_rate": 4.7545587346363094e-05,
-      "loss": 4.319,
-      "step": 1950
-    },
-    {
-      "epoch": 0.050372758412250654,
-      "grad_norm": 10.848652839660645,
-      "learning_rate": 4.748262139834777e-05,
-      "loss": 4.3335,
-      "step": 2000
-    },
-    {
-      "epoch": 0.05163207737255692,
-      "grad_norm": 9.254430770874023,
-      "learning_rate": 4.741965545033246e-05,
-      "loss": 4.3304,
-      "step": 2050
-    },
-    {
-      "epoch": 0.05289139633286319,
-      "grad_norm": 8.308243751525879,
-      "learning_rate": 4.7356689502317145e-05,
-      "loss": 4.3164,
-      "step": 2100
-    },
-    {
-      "epoch": 0.054150715293169456,
-      "grad_norm": 8.871552467346191,
-      "learning_rate": 4.729372355430183e-05,
-      "loss": 4.3035,
-      "step": 2150
-    },
-    {
-      "epoch": 0.05541003425347572,
-      "grad_norm": 8.991899490356445,
-      "learning_rate": 4.723075760628652e-05,
-      "loss": 4.2802,
-      "step": 2200
-    },
-    {
-      "epoch": 0.056669353213781984,
-      "grad_norm": 9.697725296020508,
-      "learning_rate": 4.716779165827121e-05,
-      "loss": 4.2517,
-      "step": 2250
-    },
-    {
-      "epoch": 0.05792867217408825,
-      "grad_norm": 9.194120407104492,
-      "learning_rate": 4.7104825710255895e-05,
-      "loss": 4.2075,
-      "step": 2300
-    },
-    {
-      "epoch": 0.05918799113439452,
-      "grad_norm": 8.314126968383789,
-      "learning_rate": 4.704185976224058e-05,
-      "loss": 4.2392,
-      "step": 2350
-    },
-    {
-      "epoch": 0.060447310094700786,
-      "grad_norm": 10.024466514587402,
-      "learning_rate": 4.697889381422527e-05,
-      "loss": 4.2244,
-      "step": 2400
-    },
-    {
-      "epoch": 0.06170662905500705,
-      "grad_norm": 8.209784507751465,
-      "learning_rate": 4.691592786620996e-05,
-      "loss": 4.1021,
-      "step": 2450
-    },
-    {
-      "epoch": 0.06296594801531331,
-      "grad_norm": 8.149948120117188,
-      "learning_rate": 4.6852961918194645e-05,
-      "loss": 4.0721,
-      "step": 2500
-    },
-    {
-      "epoch": 0.06422526697561959,
-      "grad_norm": 8.410172462463379,
-      "learning_rate": 4.678999597017933e-05,
-      "loss": 4.0433,
-      "step": 2550
-    },
-    {
-      "epoch": 0.06548458593592585,
-      "grad_norm": 10.219852447509766,
-      "learning_rate": 4.672703002216402e-05,
-      "loss": 4.0822,
-      "step": 2600
-    },
-    {
-      "epoch": 0.06674390489623212,
-      "grad_norm": 8.766775131225586,
-      "learning_rate": 4.66640640741487e-05,
-      "loss": 4.0743,
-      "step": 2650
-    },
-    {
-      "epoch": 0.06800322385653838,
-      "grad_norm": 9.098891258239746,
-      "learning_rate": 4.660109812613339e-05,
-      "loss": 4.076,
-      "step": 2700
-    },
-    {
-      "epoch": 0.06926254281684464,
-      "grad_norm": 8.314194679260254,
-      "learning_rate": 4.6538132178118074e-05,
-      "loss": 4.0504,
-      "step": 2750
-    },
-    {
-      "epoch": 0.07052186177715092,
-      "grad_norm": 8.34795093536377,
-      "learning_rate": 4.647516623010276e-05,
-      "loss": 4.0558,
-      "step": 2800
-    },
-    {
-      "epoch": 0.07178118073745718,
-      "grad_norm": 7.737346172332764,
-      "learning_rate": 4.6412200282087446e-05,
-      "loss": 4.0603,
-      "step": 2850
-    },
-    {
-      "epoch": 0.07304049969776345,
-      "grad_norm": 7.735106945037842,
-      "learning_rate": 4.634923433407213e-05,
-      "loss": 3.9872,
-      "step": 2900
-    },
-    {
-      "epoch": 0.07429981865806971,
-      "grad_norm": 10.933419227600098,
-      "learning_rate": 4.6286268386056825e-05,
-      "loss": 3.913,
-      "step": 2950
-    },
-    {
-      "epoch": 0.07555913761837599,
-      "grad_norm": 10.639062881469727,
-      "learning_rate": 4.622330243804151e-05,
-      "loss": 3.9741,
-      "step": 3000
-    },
-    {
-      "epoch": 0.07681845657868225,
-      "grad_norm": 9.638007164001465,
-      "learning_rate": 4.6160336490026196e-05,
-      "loss": 3.871,
-      "step": 3050
-    },
-    {
-      "epoch": 0.07807777553898851,
-      "grad_norm": 11.29076099395752,
-      "learning_rate": 4.609737054201088e-05,
-      "loss": 3.9504,
-      "step": 3100
-    },
-    {
-      "epoch": 0.07933709449929478,
-      "grad_norm": 8.069476127624512,
-      "learning_rate": 4.603440459399557e-05,
-      "loss": 3.9563,
-      "step": 3150
-    },
-    {
-      "epoch": 0.08059641345960104,
-      "grad_norm": 9.49170207977295,
-      "learning_rate": 4.597143864598026e-05,
-      "loss": 3.8954,
-      "step": 3200
-    },
-    {
-      "epoch": 0.08185573241990732,
-      "grad_norm": 7.927274227142334,
-      "learning_rate": 4.5908472697964946e-05,
-      "loss": 3.9952,
-      "step": 3250
-    },
-    {
-      "epoch": 0.08311505138021358,
-      "grad_norm": 9.124794006347656,
-      "learning_rate": 4.584550674994963e-05,
-      "loss": 3.8936,
-      "step": 3300
-    },
-    {
-      "epoch": 0.08437437034051985,
-      "grad_norm": 8.415815353393555,
-      "learning_rate": 4.578254080193432e-05,
-      "loss": 3.9416,
-      "step": 3350
-    },
-    {
-      "epoch": 0.08563368930082611,
-      "grad_norm": 7.427456378936768,
-      "learning_rate": 4.5719574853919e-05,
-      "loss": 3.8502,
-      "step": 3400
     }
   ],
   "logging_steps": 50,
-  "max_steps": 39704,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 200,
@@ -503,8 +55,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 1399136881240896.0,
-  "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null
 }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0025186379206125326,
   "eval_steps": 1000,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.0006296594801531332,
+      "grad_norm": 8.148825645446777,
+      "learning_rate": 4.99691466854725e-05,
+      "loss": 8.805,
       "step": 50
     },
     {
+      "epoch": 0.0012593189603062663,
+      "grad_norm": 9.406821250915527,
+      "learning_rate": 4.993766371146484e-05,
+      "loss": 7.6597,
       "step": 100
     },
     {
+      "epoch": 0.0018889784404593996,
+      "grad_norm": 12.482063293457031,
+      "learning_rate": 4.9906180737457186e-05,
+      "loss": 7.2093,
       "step": 150
     },
     {
+      "epoch": 0.0025186379206125326,
+      "grad_norm": 15.39159870147705,
+      "learning_rate": 4.987469776344953e-05,
+      "loss": 6.813,
       "step": 200
     }
   ],
   "logging_steps": 50,
+  "max_steps": 79408,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 62742348630528.0,
+  "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce79bc8c825400c8dd4f732cbde41f2404d4382fd9092537ed185c22c52c8b94
-size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:fba024ed71d2a66548909fa4f3d039c74313393df3397391d5c42a5b3a406964
+size 5841