models4world
/

checkpoint-100

Safetensors

qwen2

Model card Files Files and versions

xet

Community

models4world commited on Feb 9, 2025

Commit

a4a6ce9

verified ·

1 Parent(s): fa9429d

Upload trainer_state.json with huggingface_hub

Browse files

Files changed (1) hide show

trainer_state.json +733 -0

trainer_state.json ADDED Viewed

	@@ -0,0 +1,733 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.26628895184136,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0226628895184136,
+      "grad_norm": 5.715946698612309,
+      "learning_rate": 3.7037037037037036e-08,
+      "loss": 1.039,
+      "step": 1
+    },
+    {
+      "epoch": 0.0453257790368272,
+      "grad_norm": 5.918098634610158,
+      "learning_rate": 7.407407407407407e-08,
+      "loss": 1.0345,
+      "step": 2
+    },
+    {
+      "epoch": 0.0679886685552408,
+      "grad_norm": 5.967358491879423,
+      "learning_rate": 1.111111111111111e-07,
+      "loss": 1.0568,
+      "step": 3
+    },
+    {
+      "epoch": 0.0906515580736544,
+      "grad_norm": 6.076151471056227,
+      "learning_rate": 1.4814814814814815e-07,
+      "loss": 1.0407,
+      "step": 4
+    },
+    {
+      "epoch": 0.11331444759206799,
+      "grad_norm": 5.698276915195162,
+      "learning_rate": 1.8518518518518516e-07,
+      "loss": 1.0355,
+      "step": 5
+    },
+    {
+      "epoch": 0.1359773371104816,
+      "grad_norm": 5.524873495595531,
+      "learning_rate": 2.222222222222222e-07,
+      "loss": 1.0329,
+      "step": 6
+    },
+    {
+      "epoch": 0.15864022662889518,
+      "grad_norm": 5.663139068043792,
+      "learning_rate": 2.5925925925925923e-07,
+      "loss": 1.013,
+      "step": 7
+    },
+    {
+      "epoch": 0.1813031161473088,
+      "grad_norm": 5.483842003291619,
+      "learning_rate": 2.962962962962963e-07,
+      "loss": 1.0285,
+      "step": 8
+    },
+    {
+      "epoch": 0.20396600566572237,
+      "grad_norm": 5.501921058157795,
+      "learning_rate": 3.333333333333333e-07,
+      "loss": 1.0181,
+      "step": 9
+    },
+    {
+      "epoch": 0.22662889518413598,
+      "grad_norm": 5.691661611678567,
+      "learning_rate": 3.703703703703703e-07,
+      "loss": 1.0046,
+      "step": 10
+    },
+    {
+      "epoch": 0.24929178470254956,
+      "grad_norm": 5.490524973688248,
+      "learning_rate": 4.0740740740740737e-07,
+      "loss": 1.0291,
+      "step": 11
+    },
+    {
+      "epoch": 0.2719546742209632,
+      "grad_norm": 4.885236117260528,
+      "learning_rate": 4.444444444444444e-07,
+      "loss": 1.0084,
+      "step": 12
+    },
+    {
+      "epoch": 0.29461756373937675,
+      "grad_norm": 5.256688897749667,
+      "learning_rate": 4.814814814814814e-07,
+      "loss": 0.9945,
+      "step": 13
+    },
+    {
+      "epoch": 0.31728045325779036,
+      "grad_norm": 5.026023661790397,
+      "learning_rate": 5.185185185185185e-07,
+      "loss": 0.9936,
+      "step": 14
+    },
+    {
+      "epoch": 0.33994334277620397,
+      "grad_norm": 4.979666180740075,
+      "learning_rate": 5.555555555555555e-07,
+      "loss": 0.9997,
+      "step": 15
+    },
+    {
+      "epoch": 0.3626062322946176,
+      "grad_norm": 4.741351636847691,
+      "learning_rate": 5.925925925925926e-07,
+      "loss": 0.9904,
+      "step": 16
+    },
+    {
+      "epoch": 0.38526912181303113,
+      "grad_norm": 4.429638197959212,
+      "learning_rate": 6.296296296296296e-07,
+      "loss": 0.9779,
+      "step": 17
+    },
+    {
+      "epoch": 0.40793201133144474,
+      "grad_norm": 4.2702651723674006,
+      "learning_rate": 6.666666666666666e-07,
+      "loss": 0.9373,
+      "step": 18
+    },
+    {
+      "epoch": 0.43059490084985835,
+      "grad_norm": 4.371215055008036,
+      "learning_rate": 7.037037037037037e-07,
+      "loss": 0.9616,
+      "step": 19
+    },
+    {
+      "epoch": 0.45325779036827196,
+      "grad_norm": 4.300078040900759,
+      "learning_rate": 7.407407407407406e-07,
+      "loss": 0.9581,
+      "step": 20
+    },
+    {
+      "epoch": 0.47592067988668557,
+      "grad_norm": 4.242855799180736,
+      "learning_rate": 7.777777777777778e-07,
+      "loss": 0.9454,
+      "step": 21
+    },
+    {
+      "epoch": 0.4985835694050991,
+      "grad_norm": 3.4536592234259555,
+      "learning_rate": 8.148148148148147e-07,
+      "loss": 0.9274,
+      "step": 22
+    },
+    {
+      "epoch": 0.5212464589235127,
+      "grad_norm": 3.3525795982748203,
+      "learning_rate": 8.518518518518518e-07,
+      "loss": 0.8833,
+      "step": 23
+    },
+    {
+      "epoch": 0.5439093484419264,
+      "grad_norm": 3.110575381958802,
+      "learning_rate": 8.888888888888888e-07,
+      "loss": 0.9066,
+      "step": 24
+    },
+    {
+      "epoch": 0.56657223796034,
+      "grad_norm": 3.18785930927135,
+      "learning_rate": 9.259259259259259e-07,
+      "loss": 0.8896,
+      "step": 25
+    },
+    {
+      "epoch": 0.5892351274787535,
+      "grad_norm": 3.0188412291205684,
+      "learning_rate": 9.629629629629628e-07,
+      "loss": 0.9068,
+      "step": 26
+    },
+    {
+      "epoch": 0.6118980169971672,
+      "grad_norm": 3.0072699515749344,
+      "learning_rate": 1e-06,
+      "loss": 0.8959,
+      "step": 27
+    },
+    {
+      "epoch": 0.6345609065155807,
+      "grad_norm": 3.050779999599616,
+      "learning_rate": 9.999560724782173e-07,
+      "loss": 0.8648,
+      "step": 28
+    },
+    {
+      "epoch": 0.6572237960339944,
+      "grad_norm": 3.034749793056673,
+      "learning_rate": 9.998242976313776e-07,
+      "loss": 0.8763,
+      "step": 29
+    },
+    {
+      "epoch": 0.6798866855524079,
+      "grad_norm": 2.6230160618361897,
+      "learning_rate": 9.996046986136508e-07,
+      "loss": 0.8439,
+      "step": 30
+    },
+    {
+      "epoch": 0.7025495750708215,
+      "grad_norm": 2.619746810094255,
+      "learning_rate": 9.992973140107996e-07,
+      "loss": 0.8395,
+      "step": 31
+    },
+    {
+      "epoch": 0.7252124645892352,
+      "grad_norm": 2.2660982887250496,
+      "learning_rate": 9.989021978333994e-07,
+      "loss": 0.8407,
+      "step": 32
+    },
+    {
+      "epoch": 0.7478753541076487,
+      "grad_norm": 1.92948640709938,
+      "learning_rate": 9.984194195073478e-07,
+      "loss": 0.8175,
+      "step": 33
+    },
+    {
+      "epoch": 0.7705382436260623,
+      "grad_norm": 1.8673042037436878,
+      "learning_rate": 9.97849063861667e-07,
+      "loss": 0.7963,
+      "step": 34
+    },
+    {
+      "epoch": 0.7932011331444759,
+      "grad_norm": 1.841378707582655,
+      "learning_rate": 9.971912311135967e-07,
+      "loss": 0.8177,
+      "step": 35
+    },
+    {
+      "epoch": 0.8158640226628895,
+      "grad_norm": 1.6212101538356403,
+      "learning_rate": 9.964460368509865e-07,
+      "loss": 0.8036,
+      "step": 36
+    },
+    {
+      "epoch": 0.8385269121813032,
+      "grad_norm": 1.6148282593388759,
+      "learning_rate": 9.956136120119856e-07,
+      "loss": 0.7945,
+      "step": 37
+    },
+    {
+      "epoch": 0.8611898016997167,
+      "grad_norm": 1.5660870386151309,
+      "learning_rate": 9.946941028620347e-07,
+      "loss": 0.7919,
+      "step": 38
+    },
+    {
+      "epoch": 0.8838526912181303,
+      "grad_norm": 1.5162976532167538,
+      "learning_rate": 9.936876709681666e-07,
+      "loss": 0.7965,
+      "step": 39
+    },
+    {
+      "epoch": 0.9065155807365439,
+      "grad_norm": 1.4779616090178773,
+      "learning_rate": 9.92594493170617e-07,
+      "loss": 0.7872,
+      "step": 40
+    },
+    {
+      "epoch": 0.9291784702549575,
+      "grad_norm": 1.4588545367417372,
+      "learning_rate": 9.914147615517526e-07,
+      "loss": 0.7933,
+      "step": 41
+    },
+    {
+      "epoch": 0.9518413597733711,
+      "grad_norm": 1.2450088034935203,
+      "learning_rate": 9.901486834023181e-07,
+      "loss": 0.7401,
+      "step": 42
+    },
+    {
+      "epoch": 0.9745042492917847,
+      "grad_norm": 1.1159548060929454,
+      "learning_rate": 9.887964811850157e-07,
+      "loss": 0.7496,
+      "step": 43
+    },
+    {
+      "epoch": 0.9971671388101983,
+      "grad_norm": 1.0418410473138606,
+      "learning_rate": 9.87358392495415e-07,
+      "loss": 0.7568,
+      "step": 44
+    },
+    {
+      "epoch": 1.019830028328612,
+      "grad_norm": 2.1594760368768195,
+      "learning_rate": 9.858346700202048e-07,
+      "loss": 1.3469,
+      "step": 45
+    },
+    {
+      "epoch": 1.0424929178470255,
+      "grad_norm": 0.9706954495224399,
+      "learning_rate": 9.842255814927944e-07,
+      "loss": 0.7412,
+      "step": 46
+    },
+    {
+      "epoch": 1.065155807365439,
+      "grad_norm": 0.9479843401943371,
+      "learning_rate": 9.825314096462684e-07,
+      "loss": 0.712,
+      "step": 47
+    },
+    {
+      "epoch": 1.0878186968838528,
+      "grad_norm": 0.8785518016295425,
+      "learning_rate": 9.807524521637102e-07,
+      "loss": 0.721,
+      "step": 48
+    },
+    {
+      "epoch": 1.1104815864022664,
+      "grad_norm": 0.9083971698155864,
+      "learning_rate": 9.788890216258938e-07,
+      "loss": 0.7405,
+      "step": 49
+    },
+    {
+      "epoch": 1.13314447592068,
+      "grad_norm": 0.9052818651846114,
+      "learning_rate": 9.769414454563615e-07,
+      "loss": 0.7223,
+      "step": 50
+    },
+    {
+      "epoch": 1.1558073654390935,
+      "grad_norm": 0.8244297426454674,
+      "learning_rate": 9.749100658638914e-07,
+      "loss": 0.7113,
+      "step": 51
+    },
+    {
+      "epoch": 1.178470254957507,
+      "grad_norm": 0.7448472800310213,
+      "learning_rate": 9.72795239782369e-07,
+      "loss": 0.7001,
+      "step": 52
+    },
+    {
+      "epoch": 1.2011331444759206,
+      "grad_norm": 0.8936397991398377,
+      "learning_rate": 9.705973388080692e-07,
+      "loss": 0.6924,
+      "step": 53
+    },
+    {
+      "epoch": 1.2237960339943343,
+      "grad_norm": 0.7188466048624885,
+      "learning_rate": 9.68316749134364e-07,
+      "loss": 0.7005,
+      "step": 54
+    },
+    {
+      "epoch": 1.246458923512748,
+      "grad_norm": 0.6923178573722074,
+      "learning_rate": 9.659538714838633e-07,
+      "loss": 0.6983,
+      "step": 55
+    },
+    {
+      "epoch": 1.2691218130311614,
+      "grad_norm": 0.6963394168232236,
+      "learning_rate": 9.63509121038005e-07,
+      "loss": 0.6932,
+      "step": 56
+    },
+    {
+      "epoch": 1.291784702549575,
+      "grad_norm": 0.6743675615821408,
+      "learning_rate": 9.609829273641032e-07,
+      "loss": 0.6789,
+      "step": 57
+    },
+    {
+      "epoch": 1.3144475920679888,
+      "grad_norm": 0.6786035246894967,
+      "learning_rate": 9.583757343398684e-07,
+      "loss": 0.6628,
+      "step": 58
+    },
+    {
+      "epoch": 1.3371104815864023,
+      "grad_norm": 0.7270460673039131,
+      "learning_rate": 9.55688000075414e-07,
+      "loss": 0.6831,
+      "step": 59
+    },
+    {
+      "epoch": 1.3597733711048159,
+      "grad_norm": 0.6841455902480504,
+      "learning_rate": 9.529201968327616e-07,
+      "loss": 0.6951,
+      "step": 60
+    },
+    {
+      "epoch": 1.3824362606232294,
+      "grad_norm": 0.6153616879449294,
+      "learning_rate": 9.500728109428603e-07,
+      "loss": 0.676,
+      "step": 61
+    },
+    {
+      "epoch": 1.405099150141643,
+      "grad_norm": 0.6177487537567523,
+      "learning_rate": 9.47146342720133e-07,
+      "loss": 0.6842,
+      "step": 62
+    },
+    {
+      "epoch": 1.4277620396600565,
+      "grad_norm": 0.5753559089127149,
+      "learning_rate": 9.441413063745659e-07,
+      "loss": 0.6408,
+      "step": 63
+    },
+    {
+      "epoch": 1.4504249291784703,
+      "grad_norm": 0.620464077741966,
+      "learning_rate": 9.410582299213572e-07,
+      "loss": 0.6952,
+      "step": 64
+    },
+    {
+      "epoch": 1.4730878186968839,
+      "grad_norm": 0.587732312757755,
+      "learning_rate": 9.378976550881392e-07,
+      "loss": 0.6897,
+      "step": 65
+    },
+    {
+      "epoch": 1.4957507082152974,
+      "grad_norm": 0.6133303288545134,
+      "learning_rate": 9.346601372197913e-07,
+      "loss": 0.6319,
+      "step": 66
+    },
+    {
+      "epoch": 1.5184135977337112,
+      "grad_norm": 0.5975684805854956,
+      "learning_rate": 9.313462451808599e-07,
+      "loss": 0.7085,
+      "step": 67
+    },
+    {
+      "epoch": 1.5410764872521248,
+      "grad_norm": 0.5691716789827311,
+      "learning_rate": 9.279565612556042e-07,
+      "loss": 0.6799,
+      "step": 68
+    },
+    {
+      "epoch": 1.5637393767705383,
+      "grad_norm": 0.5623581760482004,
+      "learning_rate": 9.24491681045682e-07,
+      "loss": 0.6627,
+      "step": 69
+    },
+    {
+      "epoch": 1.5864022662889519,
+      "grad_norm": 0.5545018113642449,
+      "learning_rate": 9.209522133654968e-07,
+      "loss": 0.6673,
+      "step": 70
+    },
+    {
+      "epoch": 1.6090651558073654,
+      "grad_norm": 0.6223379664208608,
+      "learning_rate": 9.17338780135223e-07,
+      "loss": 0.6682,
+      "step": 71
+    },
+    {
+      "epoch": 1.631728045325779,
+      "grad_norm": 0.5484348938274137,
+      "learning_rate": 9.136520162715286e-07,
+      "loss": 0.6459,
+      "step": 72
+    },
+    {
+      "epoch": 1.6543909348441925,
+      "grad_norm": 0.598633459691356,
+      "learning_rate": 9.098925695760131e-07,
+      "loss": 0.6663,
+      "step": 73
+    },
+    {
+      "epoch": 1.677053824362606,
+      "grad_norm": 0.6063642708751795,
+      "learning_rate": 9.060611006213832e-07,
+      "loss": 0.6471,
+      "step": 74
+    },
+    {
+      "epoch": 1.6997167138810199,
+      "grad_norm": 0.5310843433827631,
+      "learning_rate": 9.021582826353824e-07,
+      "loss": 0.6422,
+      "step": 75
+    },
+    {
+      "epoch": 1.7223796033994334,
+      "grad_norm": 0.5899701772442509,
+      "learning_rate": 8.981848013824993e-07,
+      "loss": 0.6616,
+      "step": 76
+    },
+    {
+      "epoch": 1.7450424929178472,
+      "grad_norm": 0.6774981304086599,
+      "learning_rate": 8.94141355043471e-07,
+      "loss": 0.6442,
+      "step": 77
+    },
+    {
+      "epoch": 1.7677053824362607,
+      "grad_norm": 0.5555862881849043,
+      "learning_rate": 8.90028654092606e-07,
+      "loss": 0.6427,
+      "step": 78
+    },
+    {
+      "epoch": 1.7903682719546743,
+      "grad_norm": 0.5521769324318557,
+      "learning_rate": 8.858474211729469e-07,
+      "loss": 0.6308,
+      "step": 79
+    },
+    {
+      "epoch": 1.8130311614730878,
+      "grad_norm": 0.5094008328024741,
+      "learning_rate": 8.815983909692941e-07,
+      "loss": 0.6375,
+      "step": 80
+    },
+    {
+      "epoch": 1.8356940509915014,
+      "grad_norm": 0.47949684902186096,
+      "learning_rate": 8.77282310079115e-07,
+      "loss": 0.6124,
+      "step": 81
+    },
+    {
+      "epoch": 1.858356940509915,
+      "grad_norm": 0.5457213358478963,
+      "learning_rate": 8.72899936881359e-07,
+      "loss": 0.676,
+      "step": 82
+    },
+    {
+      "epoch": 1.8810198300283285,
+      "grad_norm": 0.5475114660934921,
+      "learning_rate": 8.684520414032023e-07,
+      "loss": 0.6462,
+      "step": 83
+    },
+    {
+      "epoch": 1.903682719546742,
+      "grad_norm": 0.5780771596548755,
+      "learning_rate": 8.639394051847471e-07,
+      "loss": 0.629,
+      "step": 84
+    },
+    {
+      "epoch": 1.9263456090651558,
+      "grad_norm": 0.5153044368837152,
+      "learning_rate": 8.593628211416963e-07,
+      "loss": 0.6607,
+      "step": 85
+    },
+    {
+      "epoch": 1.9490084985835694,
+      "grad_norm": 0.5078347714748787,
+      "learning_rate": 8.547230934260311e-07,
+      "loss": 0.653,
+      "step": 86
+    },
+    {
+      "epoch": 1.9716713881019832,
+      "grad_norm": 0.5090369208403657,
+      "learning_rate": 8.500210372847126e-07,
+      "loss": 0.6555,
+      "step": 87
+    },
+    {
+      "epoch": 1.9943342776203967,
+      "grad_norm": 0.521639825746896,
+      "learning_rate": 8.45257478916435e-07,
+      "loss": 0.6187,
+      "step": 88
+    },
+    {
+      "epoch": 2.0169971671388103,
+      "grad_norm": 1.4410319682327064,
+      "learning_rate": 8.404332553264546e-07,
+      "loss": 1.1825,
+      "step": 89
+    },
+    {
+      "epoch": 2.039660056657224,
+      "grad_norm": 0.5362038209234066,
+      "learning_rate": 8.355492141795184e-07,
+      "loss": 0.6046,
+      "step": 90
+    },
+    {
+      "epoch": 2.0623229461756374,
+      "grad_norm": 0.5295224430525873,
+      "learning_rate": 8.306062136509219e-07,
+      "loss": 0.607,
+      "step": 91
+    },
+    {
+      "epoch": 2.084985835694051,
+      "grad_norm": 0.5438204610049183,
+      "learning_rate": 8.256051222757187e-07,
+      "loss": 0.6425,
+      "step": 92
+    },
+    {
+      "epoch": 2.1076487252124645,
+      "grad_norm": 0.5637438849056178,
+      "learning_rate": 8.2054681879611e-07,
+      "loss": 0.6472,
+      "step": 93
+    },
+    {
+      "epoch": 2.130311614730878,
+      "grad_norm": 0.4915626737833171,
+      "learning_rate": 8.154321920070412e-07,
+      "loss": 0.6366,
+      "step": 94
+    },
+    {
+      "epoch": 2.1529745042492916,
+      "grad_norm": 0.5445000714826581,
+      "learning_rate": 8.102621406000308e-07,
+      "loss": 0.6302,
+      "step": 95
+    },
+    {
+      "epoch": 2.1756373937677056,
+      "grad_norm": 0.544017574639994,
+      "learning_rate": 8.050375730052621e-07,
+      "loss": 0.6016,
+      "step": 96
+    },
+    {
+      "epoch": 2.198300283286119,
+      "grad_norm": 0.7667138278664033,
+      "learning_rate": 7.997594072319625e-07,
+      "loss": 0.6476,
+      "step": 97
+    },
+    {
+      "epoch": 2.2209631728045327,
+      "grad_norm": 0.5723261101431134,
+      "learning_rate": 7.944285707070997e-07,
+      "loss": 0.5982,
+      "step": 98
+    },
+    {
+      "epoch": 2.2436260623229463,
+      "grad_norm": 0.5198427284810859,
+      "learning_rate": 7.890460001124241e-07,
+      "loss": 0.6373,
+      "step": 99
+    },
+    {
+      "epoch": 2.26628895184136,
+      "grad_norm": 0.5082652201383684,
+      "learning_rate": 7.83612641219884e-07,
+      "loss": 0.5894,
+      "step": 100
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 264,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 208143843852288.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}