PEFT
Safetensors
llama_lora_adapter / outputs /checkpoint-150 /trainer_state.json
Mat17892's picture
Upload folder using huggingface_hub
f1f5944 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.012,
"eval_steps": 500,
"global_step": 150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 8e-05,
"grad_norm": 0.18695563077926636,
"learning_rate": 4e-05,
"loss": 0.626,
"step": 1
},
{
"epoch": 0.00016,
"grad_norm": 0.16322645545005798,
"learning_rate": 8e-05,
"loss": 0.6748,
"step": 2
},
{
"epoch": 0.00024,
"grad_norm": 0.20551565289497375,
"learning_rate": 0.00012,
"loss": 0.8631,
"step": 3
},
{
"epoch": 0.00032,
"grad_norm": 0.19189168512821198,
"learning_rate": 0.00016,
"loss": 0.7236,
"step": 4
},
{
"epoch": 0.0004,
"grad_norm": 0.17240828275680542,
"learning_rate": 0.0002,
"loss": 0.6456,
"step": 5
},
{
"epoch": 0.00048,
"grad_norm": 0.1921045035123825,
"learning_rate": 0.00016,
"loss": 0.7794,
"step": 6
},
{
"epoch": 0.00056,
"grad_norm": 0.17362892627716064,
"learning_rate": 0.00012,
"loss": 0.5114,
"step": 7
},
{
"epoch": 0.00064,
"grad_norm": 0.22235442698001862,
"learning_rate": 8e-05,
"loss": 0.8547,
"step": 8
},
{
"epoch": 0.00072,
"grad_norm": 0.17304766178131104,
"learning_rate": 4e-05,
"loss": 0.7826,
"step": 9
},
{
"epoch": 0.0008,
"grad_norm": 0.20972397923469543,
"learning_rate": 0.0,
"loss": 0.6725,
"step": 10
},
{
"epoch": 0.00088,
"grad_norm": 0.18230140209197998,
"learning_rate": 0.0,
"loss": 0.8175,
"step": 11
},
{
"epoch": 0.00096,
"grad_norm": 0.2014843076467514,
"learning_rate": 0.00019988795518207283,
"loss": 1.0219,
"step": 12
},
{
"epoch": 0.00104,
"grad_norm": 0.24639324843883514,
"learning_rate": 0.0001998719487795118,
"loss": 0.8582,
"step": 13
},
{
"epoch": 0.00112,
"grad_norm": 0.1707516610622406,
"learning_rate": 0.0001998559423769508,
"loss": 0.5871,
"step": 14
},
{
"epoch": 0.0012,
"grad_norm": 0.17754444479942322,
"learning_rate": 0.00019983993597438976,
"loss": 0.8023,
"step": 15
},
{
"epoch": 0.00128,
"grad_norm": 0.2088427096605301,
"learning_rate": 0.00019982392957182873,
"loss": 0.5597,
"step": 16
},
{
"epoch": 0.00136,
"grad_norm": 0.2147207260131836,
"learning_rate": 0.0001998079231692677,
"loss": 0.9279,
"step": 17
},
{
"epoch": 0.00144,
"grad_norm": 0.2416459172964096,
"learning_rate": 0.0001997919167667067,
"loss": 0.7524,
"step": 18
},
{
"epoch": 0.00152,
"grad_norm": 0.18660244345664978,
"learning_rate": 0.00019977591036414566,
"loss": 0.723,
"step": 19
},
{
"epoch": 0.0016,
"grad_norm": 0.24767373502254486,
"learning_rate": 0.00019975990396158463,
"loss": 0.8528,
"step": 20
},
{
"epoch": 0.00168,
"grad_norm": 0.2119741290807724,
"learning_rate": 0.00019974389755902363,
"loss": 0.8555,
"step": 21
},
{
"epoch": 0.00176,
"grad_norm": 0.19749240577220917,
"learning_rate": 0.0001997278911564626,
"loss": 0.8141,
"step": 22
},
{
"epoch": 0.00184,
"grad_norm": 0.15635022521018982,
"learning_rate": 0.00019971188475390156,
"loss": 1.0032,
"step": 23
},
{
"epoch": 0.00192,
"grad_norm": 0.28329262137413025,
"learning_rate": 0.00019969587835134053,
"loss": 0.8104,
"step": 24
},
{
"epoch": 0.002,
"grad_norm": 0.2267996221780777,
"learning_rate": 0.00019967987194877953,
"loss": 0.5932,
"step": 25
},
{
"epoch": 0.00208,
"grad_norm": 0.2392159253358841,
"learning_rate": 0.0001996638655462185,
"loss": 0.7813,
"step": 26
},
{
"epoch": 0.00216,
"grad_norm": 0.29656457901000977,
"learning_rate": 0.00019964785914365746,
"loss": 0.7647,
"step": 27
},
{
"epoch": 0.00224,
"grad_norm": 0.276050329208374,
"learning_rate": 0.00019963185274109646,
"loss": 0.7368,
"step": 28
},
{
"epoch": 0.00232,
"grad_norm": 0.26816362142562866,
"learning_rate": 0.00019961584633853543,
"loss": 1.0461,
"step": 29
},
{
"epoch": 0.0024,
"grad_norm": 0.2567765414714813,
"learning_rate": 0.0001995998399359744,
"loss": 1.0064,
"step": 30
},
{
"epoch": 0.00248,
"grad_norm": 0.28481513261795044,
"learning_rate": 0.00019958383353341336,
"loss": 0.6283,
"step": 31
},
{
"epoch": 0.00256,
"grad_norm": 0.19182950258255005,
"learning_rate": 0.00019956782713085236,
"loss": 0.5184,
"step": 32
},
{
"epoch": 0.00264,
"grad_norm": 0.2858627438545227,
"learning_rate": 0.00019955182072829133,
"loss": 0.5853,
"step": 33
},
{
"epoch": 0.00272,
"grad_norm": 0.23260071873664856,
"learning_rate": 0.0001995358143257303,
"loss": 0.5486,
"step": 34
},
{
"epoch": 0.0028,
"grad_norm": 0.2574014365673065,
"learning_rate": 0.00019951980792316926,
"loss": 0.7127,
"step": 35
},
{
"epoch": 0.00288,
"grad_norm": 0.27332785725593567,
"learning_rate": 0.00019950380152060826,
"loss": 0.9821,
"step": 36
},
{
"epoch": 0.00296,
"grad_norm": 0.2918913960456848,
"learning_rate": 0.00019948779511804723,
"loss": 0.853,
"step": 37
},
{
"epoch": 0.00304,
"grad_norm": 0.22690187394618988,
"learning_rate": 0.0001994717887154862,
"loss": 0.6959,
"step": 38
},
{
"epoch": 0.00312,
"grad_norm": 0.24837082624435425,
"learning_rate": 0.00019945578231292518,
"loss": 0.7622,
"step": 39
},
{
"epoch": 0.0032,
"grad_norm": 0.24773573875427246,
"learning_rate": 0.00019943977591036416,
"loss": 0.9853,
"step": 40
},
{
"epoch": 0.00328,
"grad_norm": 0.2665715515613556,
"learning_rate": 0.00019942376950780313,
"loss": 0.7365,
"step": 41
},
{
"epoch": 0.00336,
"grad_norm": 0.2815437912940979,
"learning_rate": 0.0001994077631052421,
"loss": 0.9859,
"step": 42
},
{
"epoch": 0.00344,
"grad_norm": 0.23276300728321075,
"learning_rate": 0.00019939175670268108,
"loss": 0.7499,
"step": 43
},
{
"epoch": 0.00352,
"grad_norm": 0.2659528851509094,
"learning_rate": 0.00019937575030012006,
"loss": 0.7896,
"step": 44
},
{
"epoch": 0.0036,
"grad_norm": 0.2777968943119049,
"learning_rate": 0.00019935974389755903,
"loss": 0.7405,
"step": 45
},
{
"epoch": 0.00368,
"grad_norm": 0.2703694999217987,
"learning_rate": 0.000199343737494998,
"loss": 0.8549,
"step": 46
},
{
"epoch": 0.00376,
"grad_norm": 0.2913441061973572,
"learning_rate": 0.00019932773109243698,
"loss": 0.7648,
"step": 47
},
{
"epoch": 0.00384,
"grad_norm": 0.21126149594783783,
"learning_rate": 0.00019931172468987596,
"loss": 0.6442,
"step": 48
},
{
"epoch": 0.00392,
"grad_norm": 0.35344573855400085,
"learning_rate": 0.00019929571828731493,
"loss": 1.0157,
"step": 49
},
{
"epoch": 0.004,
"grad_norm": 0.211960569024086,
"learning_rate": 0.0001992797118847539,
"loss": 1.0145,
"step": 50
},
{
"epoch": 0.00408,
"grad_norm": 0.17948386073112488,
"learning_rate": 0.00019926370548219288,
"loss": 0.4476,
"step": 51
},
{
"epoch": 0.00416,
"grad_norm": 0.18907713890075684,
"learning_rate": 0.00019924769907963185,
"loss": 0.8988,
"step": 52
},
{
"epoch": 0.00424,
"grad_norm": 0.9255684614181519,
"learning_rate": 0.00019923169267707086,
"loss": 1.3091,
"step": 53
},
{
"epoch": 0.00432,
"grad_norm": 0.23128096759319305,
"learning_rate": 0.0001992156862745098,
"loss": 0.6738,
"step": 54
},
{
"epoch": 0.0044,
"grad_norm": 0.32358431816101074,
"learning_rate": 0.00019919967987194878,
"loss": 1.0512,
"step": 55
},
{
"epoch": 0.00448,
"grad_norm": 0.21004758775234222,
"learning_rate": 0.00019918367346938775,
"loss": 1.1109,
"step": 56
},
{
"epoch": 0.00456,
"grad_norm": 0.17308218777179718,
"learning_rate": 0.00019916766706682676,
"loss": 0.7186,
"step": 57
},
{
"epoch": 0.00464,
"grad_norm": 0.1969563513994217,
"learning_rate": 0.0001991516606642657,
"loss": 0.8231,
"step": 58
},
{
"epoch": 0.00472,
"grad_norm": 0.20930427312850952,
"learning_rate": 0.00019913565426170468,
"loss": 0.7483,
"step": 59
},
{
"epoch": 0.0048,
"grad_norm": 0.2239973247051239,
"learning_rate": 0.00019911964785914368,
"loss": 0.9065,
"step": 60
},
{
"epoch": 0.00488,
"grad_norm": 0.21532970666885376,
"learning_rate": 0.00019910364145658266,
"loss": 0.7133,
"step": 61
},
{
"epoch": 0.00496,
"grad_norm": 0.22679661214351654,
"learning_rate": 0.0001990876350540216,
"loss": 0.8632,
"step": 62
},
{
"epoch": 0.00504,
"grad_norm": 0.18961389362812042,
"learning_rate": 0.00019907162865146058,
"loss": 0.7713,
"step": 63
},
{
"epoch": 0.00512,
"grad_norm": 0.3985270857810974,
"learning_rate": 0.00019905562224889958,
"loss": 1.1621,
"step": 64
},
{
"epoch": 0.0052,
"grad_norm": 0.1857418268918991,
"learning_rate": 0.00019903961584633856,
"loss": 0.7665,
"step": 65
},
{
"epoch": 0.00528,
"grad_norm": 0.21082746982574463,
"learning_rate": 0.0001990236094437775,
"loss": 0.8936,
"step": 66
},
{
"epoch": 0.00536,
"grad_norm": 0.2598806619644165,
"learning_rate": 0.0001990076030412165,
"loss": 0.8367,
"step": 67
},
{
"epoch": 0.00544,
"grad_norm": 0.21064138412475586,
"learning_rate": 0.00019899159663865548,
"loss": 0.7481,
"step": 68
},
{
"epoch": 0.00552,
"grad_norm": 0.17963984608650208,
"learning_rate": 0.00019897559023609445,
"loss": 0.857,
"step": 69
},
{
"epoch": 0.0056,
"grad_norm": 0.2018403857946396,
"learning_rate": 0.0001989595838335334,
"loss": 0.6117,
"step": 70
},
{
"epoch": 0.00568,
"grad_norm": 0.2090141773223877,
"learning_rate": 0.0001989435774309724,
"loss": 0.4831,
"step": 71
},
{
"epoch": 0.00576,
"grad_norm": 0.19442321360111237,
"learning_rate": 0.00019892757102841138,
"loss": 0.6887,
"step": 72
},
{
"epoch": 0.00584,
"grad_norm": 0.20884303748607635,
"learning_rate": 0.00019891156462585035,
"loss": 0.8775,
"step": 73
},
{
"epoch": 0.00592,
"grad_norm": 0.23718436062335968,
"learning_rate": 0.00019889555822328933,
"loss": 0.9292,
"step": 74
},
{
"epoch": 0.006,
"grad_norm": 0.2717212438583374,
"learning_rate": 0.0001988795518207283,
"loss": 0.66,
"step": 75
},
{
"epoch": 0.00608,
"grad_norm": 0.2522720396518707,
"learning_rate": 0.00019886354541816728,
"loss": 0.7293,
"step": 76
},
{
"epoch": 0.00616,
"grad_norm": 0.22638511657714844,
"learning_rate": 0.00019884753901560625,
"loss": 0.9605,
"step": 77
},
{
"epoch": 0.00624,
"grad_norm": 0.25353768467903137,
"learning_rate": 0.00019883153261304523,
"loss": 0.7305,
"step": 78
},
{
"epoch": 0.00632,
"grad_norm": 0.26388710737228394,
"learning_rate": 0.0001988155262104842,
"loss": 1.0473,
"step": 79
},
{
"epoch": 0.0064,
"grad_norm": 0.22967277467250824,
"learning_rate": 0.00019879951980792318,
"loss": 0.8024,
"step": 80
},
{
"epoch": 0.00648,
"grad_norm": 0.21001595258712769,
"learning_rate": 0.00019878351340536215,
"loss": 0.6549,
"step": 81
},
{
"epoch": 0.00656,
"grad_norm": 0.22154393792152405,
"learning_rate": 0.00019876750700280113,
"loss": 0.8322,
"step": 82
},
{
"epoch": 0.00664,
"grad_norm": 0.2273344248533249,
"learning_rate": 0.0001987515006002401,
"loss": 0.8533,
"step": 83
},
{
"epoch": 0.00672,
"grad_norm": 0.2042098492383957,
"learning_rate": 0.00019873549419767908,
"loss": 1.0104,
"step": 84
},
{
"epoch": 0.0068,
"grad_norm": 0.18789270520210266,
"learning_rate": 0.00019871948779511805,
"loss": 0.8703,
"step": 85
},
{
"epoch": 0.00688,
"grad_norm": 0.16704747080802917,
"learning_rate": 0.00019870348139255703,
"loss": 0.6079,
"step": 86
},
{
"epoch": 0.00696,
"grad_norm": 0.20875659584999084,
"learning_rate": 0.000198687474989996,
"loss": 0.8806,
"step": 87
},
{
"epoch": 0.00704,
"grad_norm": 0.17773783206939697,
"learning_rate": 0.000198671468587435,
"loss": 0.6195,
"step": 88
},
{
"epoch": 0.00712,
"grad_norm": 0.20498760044574738,
"learning_rate": 0.00019865546218487395,
"loss": 0.8146,
"step": 89
},
{
"epoch": 0.0072,
"grad_norm": 0.1688094437122345,
"learning_rate": 0.00019863945578231293,
"loss": 1.1415,
"step": 90
},
{
"epoch": 0.00728,
"grad_norm": 0.22424210608005524,
"learning_rate": 0.0001986234493797519,
"loss": 0.8,
"step": 91
},
{
"epoch": 0.00736,
"grad_norm": 0.21771728992462158,
"learning_rate": 0.0001986074429771909,
"loss": 0.5614,
"step": 92
},
{
"epoch": 0.00744,
"grad_norm": 0.2241130769252777,
"learning_rate": 0.00019859143657462985,
"loss": 0.8084,
"step": 93
},
{
"epoch": 0.00752,
"grad_norm": 0.1654769629240036,
"learning_rate": 0.00019857543017206883,
"loss": 0.687,
"step": 94
},
{
"epoch": 0.0076,
"grad_norm": 0.16390787065029144,
"learning_rate": 0.0001985594237695078,
"loss": 0.5289,
"step": 95
},
{
"epoch": 0.00768,
"grad_norm": 0.259437620639801,
"learning_rate": 0.0001985434173669468,
"loss": 0.5644,
"step": 96
},
{
"epoch": 0.00776,
"grad_norm": 0.20152436196804047,
"learning_rate": 0.00019852741096438575,
"loss": 0.6532,
"step": 97
},
{
"epoch": 0.00784,
"grad_norm": 0.22755707800388336,
"learning_rate": 0.00019851140456182473,
"loss": 0.7435,
"step": 98
},
{
"epoch": 0.00792,
"grad_norm": 0.21967531740665436,
"learning_rate": 0.00019849539815926373,
"loss": 0.7607,
"step": 99
},
{
"epoch": 0.008,
"grad_norm": 0.18391412496566772,
"learning_rate": 0.0001984793917567027,
"loss": 0.7239,
"step": 100
},
{
"epoch": 0.00808,
"grad_norm": 0.2660037577152252,
"learning_rate": 0.00019846338535414165,
"loss": 0.7299,
"step": 101
},
{
"epoch": 0.00816,
"grad_norm": 0.1816340982913971,
"learning_rate": 0.00019844737895158062,
"loss": 0.7276,
"step": 102
},
{
"epoch": 0.00824,
"grad_norm": 0.21206796169281006,
"learning_rate": 0.00019843137254901963,
"loss": 0.7689,
"step": 103
},
{
"epoch": 0.00832,
"grad_norm": 0.18705548346042633,
"learning_rate": 0.0001984153661464586,
"loss": 0.7199,
"step": 104
},
{
"epoch": 0.0084,
"grad_norm": 0.2467879205942154,
"learning_rate": 0.00019839935974389755,
"loss": 1.0206,
"step": 105
},
{
"epoch": 0.00848,
"grad_norm": 0.2145715057849884,
"learning_rate": 0.00019838335334133655,
"loss": 0.8011,
"step": 106
},
{
"epoch": 0.00856,
"grad_norm": 0.23377610743045807,
"learning_rate": 0.00019836734693877553,
"loss": 0.9455,
"step": 107
},
{
"epoch": 0.00864,
"grad_norm": 0.13857395946979523,
"learning_rate": 0.0001983513405362145,
"loss": 0.603,
"step": 108
},
{
"epoch": 0.00872,
"grad_norm": 0.21066828072071075,
"learning_rate": 0.00019833533413365345,
"loss": 0.7147,
"step": 109
},
{
"epoch": 0.0088,
"grad_norm": 0.22423389554023743,
"learning_rate": 0.00019831932773109245,
"loss": 0.7619,
"step": 110
},
{
"epoch": 0.00888,
"grad_norm": 0.20110934972763062,
"learning_rate": 0.00019830332132853143,
"loss": 0.6215,
"step": 111
},
{
"epoch": 0.00896,
"grad_norm": 0.22843226790428162,
"learning_rate": 0.0001982873149259704,
"loss": 0.833,
"step": 112
},
{
"epoch": 0.00904,
"grad_norm": 0.171301007270813,
"learning_rate": 0.00019827130852340938,
"loss": 0.9602,
"step": 113
},
{
"epoch": 0.00912,
"grad_norm": 0.21754777431488037,
"learning_rate": 0.00019825530212084835,
"loss": 0.8931,
"step": 114
},
{
"epoch": 0.0092,
"grad_norm": 0.16314199566841125,
"learning_rate": 0.00019823929571828732,
"loss": 0.6414,
"step": 115
},
{
"epoch": 0.00928,
"grad_norm": 0.17339545488357544,
"learning_rate": 0.0001982232893157263,
"loss": 0.8579,
"step": 116
},
{
"epoch": 0.00936,
"grad_norm": 0.2185641974210739,
"learning_rate": 0.00019820728291316527,
"loss": 0.5762,
"step": 117
},
{
"epoch": 0.00944,
"grad_norm": 0.23066163063049316,
"learning_rate": 0.00019819127651060425,
"loss": 0.7929,
"step": 118
},
{
"epoch": 0.00952,
"grad_norm": 0.16946138441562653,
"learning_rate": 0.00019817527010804322,
"loss": 0.6734,
"step": 119
},
{
"epoch": 0.0096,
"grad_norm": 0.16290231049060822,
"learning_rate": 0.0001981592637054822,
"loss": 0.4331,
"step": 120
},
{
"epoch": 0.00968,
"grad_norm": 0.14785629510879517,
"learning_rate": 0.00019814325730292117,
"loss": 0.5846,
"step": 121
},
{
"epoch": 0.00976,
"grad_norm": 0.15986767411231995,
"learning_rate": 0.00019812725090036015,
"loss": 0.7937,
"step": 122
},
{
"epoch": 0.00984,
"grad_norm": 0.22597737610340118,
"learning_rate": 0.00019811124449779912,
"loss": 0.645,
"step": 123
},
{
"epoch": 0.00992,
"grad_norm": 0.16873855888843536,
"learning_rate": 0.0001980952380952381,
"loss": 0.783,
"step": 124
},
{
"epoch": 0.01,
"grad_norm": 0.24884037673473358,
"learning_rate": 0.00019807923169267707,
"loss": 0.806,
"step": 125
},
{
"epoch": 0.01008,
"grad_norm": 0.1921387016773224,
"learning_rate": 0.00019806322529011605,
"loss": 0.7133,
"step": 126
},
{
"epoch": 0.01016,
"grad_norm": 0.1714552938938141,
"learning_rate": 0.00019804721888755505,
"loss": 0.8823,
"step": 127
},
{
"epoch": 0.01024,
"grad_norm": 0.17558862268924713,
"learning_rate": 0.000198031212484994,
"loss": 0.5438,
"step": 128
},
{
"epoch": 0.01032,
"grad_norm": 0.20176133513450623,
"learning_rate": 0.00019801520608243297,
"loss": 0.6564,
"step": 129
},
{
"epoch": 0.0104,
"grad_norm": 0.16648930311203003,
"learning_rate": 0.00019799919967987195,
"loss": 0.8251,
"step": 130
},
{
"epoch": 0.01048,
"grad_norm": 0.15654760599136353,
"learning_rate": 0.00019798319327731095,
"loss": 0.7868,
"step": 131
},
{
"epoch": 0.01056,
"grad_norm": 0.1604606807231903,
"learning_rate": 0.0001979671868747499,
"loss": 0.9408,
"step": 132
},
{
"epoch": 0.01064,
"grad_norm": 0.1831110417842865,
"learning_rate": 0.00019795118047218887,
"loss": 0.6789,
"step": 133
},
{
"epoch": 0.01072,
"grad_norm": 0.19563564658164978,
"learning_rate": 0.00019793517406962787,
"loss": 0.6197,
"step": 134
},
{
"epoch": 0.0108,
"grad_norm": 0.15682204067707062,
"learning_rate": 0.00019791916766706685,
"loss": 0.3705,
"step": 135
},
{
"epoch": 0.01088,
"grad_norm": 0.20387424528598785,
"learning_rate": 0.0001979031612645058,
"loss": 0.6203,
"step": 136
},
{
"epoch": 0.01096,
"grad_norm": 0.18805289268493652,
"learning_rate": 0.00019788715486194477,
"loss": 0.5732,
"step": 137
},
{
"epoch": 0.01104,
"grad_norm": 0.190113365650177,
"learning_rate": 0.00019787114845938377,
"loss": 0.4919,
"step": 138
},
{
"epoch": 0.01112,
"grad_norm": 0.22532878816127777,
"learning_rate": 0.00019785514205682275,
"loss": 0.4651,
"step": 139
},
{
"epoch": 0.0112,
"grad_norm": 0.23364323377609253,
"learning_rate": 0.0001978391356542617,
"loss": 0.9228,
"step": 140
},
{
"epoch": 0.01128,
"grad_norm": 0.18550938367843628,
"learning_rate": 0.0001978231292517007,
"loss": 0.7556,
"step": 141
},
{
"epoch": 0.01136,
"grad_norm": 0.21325847506523132,
"learning_rate": 0.00019780712284913967,
"loss": 0.6726,
"step": 142
},
{
"epoch": 0.01144,
"grad_norm": 0.21966691315174103,
"learning_rate": 0.00019779111644657865,
"loss": 0.7203,
"step": 143
},
{
"epoch": 0.01152,
"grad_norm": 0.17671513557434082,
"learning_rate": 0.0001977751100440176,
"loss": 0.7607,
"step": 144
},
{
"epoch": 0.0116,
"grad_norm": 0.21979670226573944,
"learning_rate": 0.0001977591036414566,
"loss": 0.738,
"step": 145
},
{
"epoch": 0.01168,
"grad_norm": 0.24346943199634552,
"learning_rate": 0.00019774309723889557,
"loss": 0.9575,
"step": 146
},
{
"epoch": 0.01176,
"grad_norm": 0.17305152118206024,
"learning_rate": 0.00019772709083633455,
"loss": 0.7207,
"step": 147
},
{
"epoch": 0.01184,
"grad_norm": 0.17260083556175232,
"learning_rate": 0.0001977110844337735,
"loss": 0.7765,
"step": 148
},
{
"epoch": 0.01192,
"grad_norm": 0.20756393671035767,
"learning_rate": 0.0001976950780312125,
"loss": 0.7864,
"step": 149
},
{
"epoch": 0.012,
"grad_norm": 0.20012526214122772,
"learning_rate": 0.00019767907162865147,
"loss": 0.6435,
"step": 150
}
],
"logging_steps": 1,
"max_steps": 12500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.5678754959040512e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}