phi3-dpo-m1 / trainer_state.json
ludekcizinsky's picture
Upload folder using huggingface_hub
cae622e verified
Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN, "... is not valid JSON
{
"best_metric": 21.83156394958496,
"best_model_checkpoint": "./output/checkpoints/2024-05-27_09-04-31/checkpoint-100",
"epoch": 1.0,
"eval_steps": 100,
"global_step": 198,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.025252525252525252,
"grad_norm": 26.353445053100586,
"learning_rate": 4.000000000000001e-06,
"logits/chosen": 0.14427797496318817,
"logits/rejected": -0.5873457193374634,
"logps/chosen": -0.901843249797821,
"logps/rejected": -1.3607301712036133,
"loss": 24.9998,
"rewards/accuracies": 0.21250000596046448,
"rewards/chosen": -1.578416777192615e-05,
"rewards/margins": 2.430938138786587e-06,
"rewards/rejected": -1.8215103409602307e-05,
"step": 5
},
{
"epoch": 0.050505050505050504,
"grad_norm": NaN,
"learning_rate": 8.000000000000001e-06,
"logits/chosen": -0.10329052060842514,
"logits/rejected": -0.4683811664581299,
"logps/chosen": -0.9063997268676758,
"logps/rejected": -1.461859107017517,
"loss": 24.9337,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.00036737616755999625,
"rewards/margins": 0.0006723683327436447,
"rewards/rejected": -0.0010397445876151323,
"step": 10
},
{
"epoch": 0.07575757575757576,
"grad_norm": 13.749723434448242,
"learning_rate": 1.3000000000000001e-05,
"logits/chosen": -0.2425023317337036,
"logits/rejected": -0.6693668365478516,
"logps/chosen": -0.8707982897758484,
"logps/rejected": -1.1566194295883179,
"loss": 24.9041,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.0014928742311894894,
"rewards/margins": 0.0009821585845202208,
"rewards/rejected": -0.00247503281570971,
"step": 15
},
{
"epoch": 0.10101010101010101,
"grad_norm": 25.53832244873047,
"learning_rate": 1.8e-05,
"logits/chosen": -0.46215763688087463,
"logits/rejected": -0.9008939862251282,
"logps/chosen": -0.959465503692627,
"logps/rejected": -1.5446056127548218,
"loss": 24.2631,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.0067633287981152534,
"rewards/margins": 0.007808461785316467,
"rewards/rejected": -0.014571788720786572,
"step": 20
},
{
"epoch": 0.12626262626262627,
"grad_norm": 45.06657791137695,
"learning_rate": 1.9985985720017786e-05,
"logits/chosen": -0.04087737202644348,
"logits/rejected": -0.5188297033309937,
"logps/chosen": -0.9965022802352905,
"logps/rejected": -1.3733254671096802,
"loss": 24.1692,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.014551234431564808,
"rewards/margins": 0.009625318460166454,
"rewards/rejected": -0.024176552891731262,
"step": 25
},
{
"epoch": 0.15151515151515152,
"grad_norm": 28.255924224853516,
"learning_rate": 1.9900485105144544e-05,
"logits/chosen": -0.14505064487457275,
"logits/rejected": -0.5278365015983582,
"logps/chosen": -1.0397828817367554,
"logps/rejected": -1.44753897190094,
"loss": 24.1349,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.018694132566452026,
"rewards/margins": 0.01500606257468462,
"rewards/rejected": -0.03370019793510437,
"step": 30
},
{
"epoch": 0.17676767676767677,
"grad_norm": NaN,
"learning_rate": 1.9776556239997146e-05,
"logits/chosen": -0.4809038043022156,
"logits/rejected": -0.9093053936958313,
"logps/chosen": -1.3904650211334229,
"logps/rejected": -2.406257390975952,
"loss": 23.5774,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.04243111237883568,
"rewards/margins": 0.052741266787052155,
"rewards/rejected": -0.09517236799001694,
"step": 35
},
{
"epoch": 0.20202020202020202,
"grad_norm": 46.83095932006836,
"learning_rate": 1.955324742088516e-05,
"logits/chosen": -0.6266540288925171,
"logits/rejected": -1.0290076732635498,
"logps/chosen": -1.2514160871505737,
"logps/rejected": -2.1771531105041504,
"loss": 22.3291,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.03719799965620041,
"rewards/margins": 0.04375718533992767,
"rewards/rejected": -0.08095519244670868,
"step": 40
},
{
"epoch": 0.22727272727272727,
"grad_norm": 76.44580841064453,
"learning_rate": 1.9255590665712214e-05,
"logits/chosen": -0.6130943894386292,
"logits/rejected": -1.143413782119751,
"logps/chosen": -1.5433876514434814,
"logps/rejected": -2.6532750129699707,
"loss": 21.656,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.05903216451406479,
"rewards/margins": 0.05665038153529167,
"rewards/rejected": -0.11568254232406616,
"step": 45
},
{
"epoch": 0.25252525252525254,
"grad_norm": 78.35297393798828,
"learning_rate": 1.8965472436868288e-05,
"logits/chosen": -0.757357656955719,
"logits/rejected": -1.0666834115982056,
"logps/chosen": -1.3742765188217163,
"logps/rejected": -3.0053694248199463,
"loss": 22.6627,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.05383248254656792,
"rewards/margins": 0.09586457908153534,
"rewards/rejected": -0.14969706535339355,
"step": 50
},
{
"epoch": 0.2777777777777778,
"grad_norm": 82.29180145263672,
"learning_rate": 1.8540204424421264e-05,
"logits/chosen": -0.8564749956130981,
"logits/rejected": -1.3737789392471313,
"logps/chosen": -1.733337163925171,
"logps/rejected": -3.3698067665100098,
"loss": 19.3611,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.08400858938694,
"rewards/margins": 0.10341653972864151,
"rewards/rejected": -0.18742512166500092,
"step": 55
},
{
"epoch": 0.30303030303030304,
"grad_norm": 128.23907470703125,
"learning_rate": 1.804847246055326e-05,
"logits/chosen": -0.9640189409255981,
"logits/rejected": -1.1732914447784424,
"logps/chosen": -2.538499593734741,
"logps/rejected": -3.0090465545654297,
"loss": 29.7881,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.14390432834625244,
"rewards/margins": 0.02762184664607048,
"rewards/rejected": -0.17152616381645203,
"step": 60
},
{
"epoch": 0.3282828282828283,
"grad_norm": 49.566158294677734,
"learning_rate": 1.7494103438361252e-05,
"logits/chosen": -0.7158849239349365,
"logits/rejected": -1.0623328685760498,
"logps/chosen": -1.4396604299545288,
"logps/rejected": -2.000624179840088,
"loss": 21.9915,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.06708293408155441,
"rewards/margins": 0.03939032554626465,
"rewards/rejected": -0.10647325217723846,
"step": 65
},
{
"epoch": 0.35353535353535354,
"grad_norm": 76.89603424072266,
"learning_rate": 1.6881411722458688e-05,
"logits/chosen": -0.8769875764846802,
"logits/rejected": -1.1103827953338623,
"logps/chosen": -2.1615917682647705,
"logps/rejected": -3.0439255237579346,
"loss": 21.986,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.12244679778814316,
"rewards/margins": 0.06274138391017914,
"rewards/rejected": -0.1851881742477417,
"step": 70
},
{
"epoch": 0.3787878787878788,
"grad_norm": 134.9673309326172,
"learning_rate": 1.6215165572528598e-05,
"logits/chosen": -1.343915581703186,
"logits/rejected": -1.401227355003357,
"logps/chosen": -2.2325069904327393,
"logps/rejected": -3.132831573486328,
"loss": 21.2106,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.13722343742847443,
"rewards/margins": 0.06202084943652153,
"rewards/rejected": -0.19924426078796387,
"step": 75
},
{
"epoch": 0.40404040404040403,
"grad_norm": 137.21859741210938,
"learning_rate": 1.5500550034448415e-05,
"logits/chosen": -1.3024094104766846,
"logits/rejected": -1.5494719743728638,
"logps/chosen": -2.4210548400878906,
"logps/rejected": -3.3495230674743652,
"loss": 23.1095,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.15112502872943878,
"rewards/margins": 0.07425413280725479,
"rewards/rejected": -0.22537918388843536,
"step": 80
},
{
"epoch": 0.4292929292929293,
"grad_norm": 351.0603942871094,
"learning_rate": 1.5050862598575474e-05,
"logits/chosen": -1.311993956565857,
"logits/rejected": -1.6289136409759521,
"logps/chosen": -2.586198091506958,
"logps/rejected": -5.161986827850342,
"loss": 25.0728,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.16497337818145752,
"rewards/margins": 0.17613837122917175,
"rewards/rejected": -0.3411117494106293,
"step": 85
},
{
"epoch": 0.45454545454545453,
"grad_norm": 376.21038818359375,
"learning_rate": 1.4270564388663761e-05,
"logits/chosen": -1.4695305824279785,
"logits/rejected": -1.5699679851531982,
"logps/chosen": -3.0274829864501953,
"logps/rejected": -3.7816379070281982,
"loss": 24.3757,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.20180432498455048,
"rewards/margins": 0.06594176590442657,
"rewards/rejected": -0.26774606108665466,
"step": 90
},
{
"epoch": 0.4797979797979798,
"grad_norm": 83.94548034667969,
"learning_rate": 1.3457030606163564e-05,
"logits/chosen": -1.542257308959961,
"logits/rejected": -1.640545129776001,
"logps/chosen": -3.1931662559509277,
"logps/rejected": -4.362542152404785,
"loss": 21.7905,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.21774420142173767,
"rewards/margins": 0.09037742763757706,
"rewards/rejected": -0.30812162160873413,
"step": 95
},
{
"epoch": 0.5050505050505051,
"grad_norm": 174.58786010742188,
"learning_rate": 1.2616592559684408e-05,
"logits/chosen": -1.5426051616668701,
"logits/rejected": -1.7211687564849854,
"logps/chosen": -2.798499345779419,
"logps/rejected": -3.3964920043945312,
"loss": 25.8166,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.17160701751708984,
"rewards/margins": 0.05040215328335762,
"rewards/rejected": -0.22200918197631836,
"step": 100
},
{
"epoch": 0.5050505050505051,
"eval_logits/chosen": -1.919495940208435,
"eval_logits/rejected": -2.218794584274292,
"eval_logps/chosen": -2.5173401832580566,
"eval_logps/rejected": -3.3597702980041504,
"eval_loss": 21.83156394958496,
"eval_rewards/accuracies": 0.6421874761581421,
"eval_rewards/chosen": -0.15560917556285858,
"eval_rewards/margins": 0.05931411311030388,
"eval_rewards/rejected": -0.21492330729961395,
"eval_runtime": 256.4168,
"eval_samples_per_second": 2.496,
"eval_steps_per_second": 0.156,
"step": 100
},
{
"epoch": 0.5303030303030303,
"grad_norm": 94.69363403320312,
"learning_rate": 1.1755790939673208e-05,
"logits/chosen": -1.6892818212509155,
"logits/rejected": -1.860984206199646,
"logps/chosen": -2.6088526248931885,
"logps/rejected": -3.893810272216797,
"loss": 24.6292,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.1631762683391571,
"rewards/margins": 0.0721951425075531,
"rewards/rejected": -0.235371395945549,
"step": 105
},
{
"epoch": 0.5555555555555556,
"grad_norm": 299.636962890625,
"learning_rate": 1.088132491563602e-05,
"logits/chosen": -1.6523587703704834,
"logits/rejected": -1.648794412612915,
"logps/chosen": -2.3819022178649902,
"logps/rejected": -3.91084623336792,
"loss": 24.6609,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.14854103326797485,
"rewards/margins": 0.08199040591716766,
"rewards/rejected": -0.23053142428398132,
"step": 110
},
{
"epoch": 0.5808080808080808,
"grad_norm": 179.6541748046875,
"learning_rate": 1e-05,
"logits/chosen": -1.7479238510131836,
"logits/rejected": -1.8762273788452148,
"logps/chosen": -2.4850611686706543,
"logps/rejected": -3.9139976501464844,
"loss": 21.9825,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.1599404662847519,
"rewards/margins": 0.0621558353304863,
"rewards/rejected": -0.2220962941646576,
"step": 115
},
{
"epoch": 0.6060606060606061,
"grad_norm": 416.4597473144531,
"learning_rate": 9.118675084363986e-06,
"logits/chosen": -1.6893389225006104,
"logits/rejected": -1.9248136281967163,
"logps/chosen": -2.431549549102783,
"logps/rejected": -3.4075489044189453,
"loss": 23.7008,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.1548345983028412,
"rewards/margins": 0.03470990061759949,
"rewards/rejected": -0.18954448401927948,
"step": 120
},
{
"epoch": 0.6313131313131313,
"grad_norm": 106.20417022705078,
"learning_rate": 8.244209060326794e-06,
"logits/chosen": -1.6689144372940063,
"logits/rejected": -1.932077407836914,
"logps/chosen": -2.1763813495635986,
"logps/rejected": -4.08168888092041,
"loss": 20.9314,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.13834789395332336,
"rewards/margins": 0.07367957383394241,
"rewards/rejected": -0.21202746033668518,
"step": 125
},
{
"epoch": 0.6565656565656566,
"grad_norm": 628.0269775390625,
"learning_rate": 7.383407440315595e-06,
"logits/chosen": -1.7707713842391968,
"logits/rejected": -1.8211300373077393,
"logps/chosen": -2.591797351837158,
"logps/rejected": -4.223265647888184,
"loss": 19.9625,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.1714746505022049,
"rewards/margins": 0.10012316703796387,
"rewards/rejected": -0.27159780263900757,
"step": 130
},
{
"epoch": 0.6818181818181818,
"grad_norm": 166.4376220703125,
"learning_rate": 6.542969393836436e-06,
"logits/chosen": -1.6975538730621338,
"logits/rejected": -1.7919883728027344,
"logps/chosen": -2.655794858932495,
"logps/rejected": -3.9739787578582764,
"loss": 19.885,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.17945000529289246,
"rewards/margins": 0.08590926975011826,
"rewards/rejected": -0.2653592824935913,
"step": 135
},
{
"epoch": 0.7070707070707071,
"grad_norm": 924.48388671875,
"learning_rate": 5.729435611336239e-06,
"logits/chosen": -1.6683040857315063,
"logits/rejected": -1.8297067880630493,
"logps/chosen": -3.389685869216919,
"logps/rejected": -4.693975925445557,
"loss": 21.4041,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.22584636509418488,
"rewards/margins": 0.08828467130661011,
"rewards/rejected": -0.3141310513019562,
"step": 140
},
{
"epoch": 0.7323232323232324,
"grad_norm": 208.90626525878906,
"learning_rate": 4.949137401424527e-06,
"logits/chosen": -1.690625786781311,
"logits/rejected": -1.8179527521133423,
"logps/chosen": -3.1737165451049805,
"logps/rejected": -4.919283866882324,
"loss": 19.995,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.22326549887657166,
"rewards/margins": 0.10102611780166626,
"rewards/rejected": -0.3242916166782379,
"step": 145
},
{
"epoch": 0.7575757575757576,
"grad_norm": 243.00192260742188,
"learning_rate": 4.208147417604665e-06,
"logits/chosen": -1.6386387348175049,
"logits/rejected": -1.7950681447982788,
"logps/chosen": -3.373720645904541,
"logps/rejected": -4.483418941497803,
"loss": 20.3863,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.24290914833545685,
"rewards/margins": 0.07580031454563141,
"rewards/rejected": -0.31870946288108826,
"step": 150
},
{
"epoch": 0.7828282828282829,
"grad_norm": 205.0689697265625,
"learning_rate": 3.51223239798274e-06,
"logits/chosen": -1.7644588947296143,
"logits/rejected": -1.792384147644043,
"logps/chosen": -2.8454086780548096,
"logps/rejected": -4.108365058898926,
"loss": 22.1816,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.19682423770427704,
"rewards/margins": 0.07537179440259933,
"rewards/rejected": -0.2721960246562958,
"step": 155
},
{
"epoch": 0.8080808080808081,
"grad_norm": 202.64425659179688,
"learning_rate": 2.8668082857562006e-06,
"logits/chosen": -1.7155227661132812,
"logits/rejected": -1.7265026569366455,
"logps/chosen": -3.2442708015441895,
"logps/rejected": -5.168461799621582,
"loss": 20.5007,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.22376994788646698,
"rewards/margins": 0.11927111446857452,
"rewards/rejected": -0.3430410623550415,
"step": 160
},
{
"epoch": 0.8333333333333334,
"grad_norm": 146.06727600097656,
"learning_rate": 2.2768980797561125e-06,
"logits/chosen": -1.5448095798492432,
"logits/rejected": -1.6818040609359741,
"logps/chosen": -3.1757941246032715,
"logps/rejected": -4.661167144775391,
"loss": 23.3162,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.22800321877002716,
"rewards/margins": 0.10849568992853165,
"rewards/rejected": -0.336498886346817,
"step": 165
},
{
"epoch": 0.8585858585858586,
"grad_norm": 266.4602966308594,
"learning_rate": 1.7470927430702277e-06,
"logits/chosen": -1.77353036403656,
"logits/rejected": -1.8091161251068115,
"logps/chosen": -3.679595470428467,
"logps/rejected": -5.641579627990723,
"loss": 21.0313,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.2708187699317932,
"rewards/margins": 0.11891458183526993,
"rewards/rejected": -0.38973334431648254,
"step": 170
},
{
"epoch": 0.8838383838383839,
"grad_norm": 191.99391174316406,
"learning_rate": 1.281515473974614e-06,
"logits/chosen": -1.7262178659439087,
"logits/rejected": -1.7621949911117554,
"logps/chosen": -3.6915946006774902,
"logps/rejected": -4.522196292877197,
"loss": 23.1575,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.2737148106098175,
"rewards/margins": 0.06611393392086029,
"rewards/rejected": -0.339828759431839,
"step": 175
},
{
"epoch": 0.9090909090909091,
"grad_norm": 445.5780334472656,
"learning_rate": 8.837896172345827e-07,
"logits/chosen": -1.7799314260482788,
"logits/rejected": -1.758079171180725,
"logps/chosen": -3.744454860687256,
"logps/rejected": -5.533487319946289,
"loss": 22.4579,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.26133713126182556,
"rewards/margins": 0.10725338757038116,
"rewards/rejected": -0.36859050393104553,
"step": 180
},
{
"epoch": 0.9343434343434344,
"grad_norm": 351.77313232421875,
"learning_rate": 5.570104655044428e-07,
"logits/chosen": -1.8014914989471436,
"logits/rejected": -1.8869857788085938,
"logps/chosen": -3.1039249897003174,
"logps/rejected": -4.702515602111816,
"loss": 23.8499,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.22470612823963165,
"rewards/margins": 0.12236537039279938,
"rewards/rejected": -0.34707149863243103,
"step": 185
},
{
"epoch": 0.9595959595959596,
"grad_norm": 203.9517059326172,
"learning_rate": 3.0372117028111825e-07,
"logits/chosen": -1.650368332862854,
"logits/rejected": -1.7378900051116943,
"logps/chosen": -3.5179672241210938,
"logps/rejected": -4.001964092254639,
"loss": 25.7021,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.26367172598838806,
"rewards/margins": 0.021851424127817154,
"rewards/rejected": -0.2855231761932373,
"step": 190
},
{
"epoch": 0.9848484848484849,
"grad_norm": 358.923095703125,
"learning_rate": 1.2589294988404887e-07,
"logits/chosen": -1.6349338293075562,
"logits/rejected": -1.7700506448745728,
"logps/chosen": -3.084740161895752,
"logps/rejected": -4.999522686004639,
"loss": 18.5645,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.22231082618236542,
"rewards/margins": 0.12103237211704254,
"rewards/rejected": -0.34334319829940796,
"step": 195
},
{
"epoch": 1.0,
"step": 198,
"total_flos": 0.0,
"train_loss": 22.75462433786103,
"train_runtime": 3251.7686,
"train_samples_per_second": 0.973,
"train_steps_per_second": 0.061
}
],
"logging_steps": 5,
"max_steps": 198,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}