Table2Latex-RL / trainer_state.json
LLLHHH's picture
Upload folder using huggingface_hub
026a164 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9973045822102425,
"eval_steps": 500,
"global_step": 185,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 3001.0,
"completions/mean_length": 1635.390625,
"completions/min_length": 880.0,
"epoch": 0.005390835579514825,
"grad_norm": 0.07817294615231643,
"kl": 0.0,
"learning_rate": 2.127659574468085e-08,
"loss": 0.01464410312473774,
"memory(GiB)": 53.08,
"reward": 1.3704201579093933,
"reward_std": 0.19254888594150543,
"rewards/Table2LatexAcc/mean": 0.5549997389316559,
"rewards/Table2LatexAcc/std": 0.2269514873623848,
"rewards/Table2Latexform/mean": 0.815420389175415,
"rewards/Table2Latexform/std": 0.27713412046432495,
"step": 1,
"train_speed(iter/s)": 0.003012
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2923.875,
"completions/mean_length": 1629.7890625,
"completions/min_length": 886.0,
"epoch": 0.026954177897574125,
"grad_norm": 0.07213148347345341,
"kl": 1.5087425708770752e-05,
"learning_rate": 1.0638297872340425e-07,
"loss": 0.028215568512678146,
"memory(GiB)": 74.0,
"reward": 1.3842923939228058,
"reward_std": 0.18567332532256842,
"rewards/Table2LatexAcc/mean": 0.5712194591760635,
"rewards/Table2LatexAcc/std": 0.19849798548966646,
"rewards/Table2Latexform/mean": 0.8130729347467422,
"rewards/Table2Latexform/std": 0.2439529187977314,
"step": 5,
"train_speed(iter/s)": 0.003096
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2889.2,
"completions/mean_length": 1723.5890625,
"completions/min_length": 962.5,
"epoch": 0.05390835579514825,
"grad_norm": 0.06916726038351133,
"kl": 1.736283302307129e-05,
"learning_rate": 2.127659574468085e-07,
"loss": 0.019673459231853485,
"memory(GiB)": 74.0,
"reward": 1.3981751084327698,
"reward_std": 0.16928213015198706,
"rewards/Table2LatexAcc/mean": 0.573980861902237,
"rewards/Table2LatexAcc/std": 0.19604488760232924,
"rewards/Table2Latexform/mean": 0.8241942763328552,
"rewards/Table2Latexform/std": 0.23927551954984666,
"step": 10,
"train_speed(iter/s)": 0.003101
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2915.4,
"completions/mean_length": 1641.03125,
"completions/min_length": 704.9,
"epoch": 0.08086253369272237,
"grad_norm": 0.07279863906405569,
"kl": 2.1731853485107423e-05,
"learning_rate": 3.1914893617021275e-07,
"loss": 0.02381864786148071,
"memory(GiB)": 74.0,
"reward": 1.379032826423645,
"reward_std": 0.15062467977404595,
"rewards/Table2LatexAcc/mean": 0.5421488165855408,
"rewards/Table2LatexAcc/std": 0.19123097956180574,
"rewards/Table2Latexform/mean": 0.8368840157985687,
"rewards/Table2Latexform/std": 0.21790579557418824,
"step": 15,
"train_speed(iter/s)": 0.003068
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2961.5,
"completions/mean_length": 1699.853125,
"completions/min_length": 861.8,
"epoch": 0.1078167115902965,
"grad_norm": 0.07154949573308267,
"kl": 2.1332502365112303e-05,
"learning_rate": 4.25531914893617e-07,
"loss": 0.027176868915557862,
"memory(GiB)": 74.0,
"reward": 1.3628795862197876,
"reward_std": 0.19128143787384033,
"rewards/Table2LatexAcc/mean": 0.5719542324542999,
"rewards/Table2LatexAcc/std": 0.1954931139945984,
"rewards/Table2Latexform/mean": 0.7909253478050232,
"rewards/Table2Latexform/std": 0.278898648917675,
"step": 20,
"train_speed(iter/s)": 0.003057
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2639.0,
"completions/mean_length": 1594.8921875,
"completions/min_length": 790.5,
"epoch": 0.1347708894878706,
"grad_norm": 0.13131531927012402,
"kl": 2.13623046875e-05,
"learning_rate": 5.319148936170212e-07,
"loss": 0.01629452407360077,
"memory(GiB)": 74.0,
"reward": 1.4447253465652465,
"reward_std": 0.15758238062262536,
"rewards/Table2LatexAcc/mean": 0.6045001387596131,
"rewards/Table2LatexAcc/std": 0.18096636980772018,
"rewards/Table2Latexform/mean": 0.840225213766098,
"rewards/Table2Latexform/std": 0.22630088329315184,
"step": 25,
"train_speed(iter/s)": 0.003093
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2905.0,
"completions/mean_length": 1642.709375,
"completions/min_length": 804.4,
"epoch": 0.16172506738544473,
"grad_norm": 0.06595082858040417,
"kl": 2.499222755432129e-05,
"learning_rate": 6.382978723404255e-07,
"loss": 0.027088361978530883,
"memory(GiB)": 74.0,
"reward": 1.3934171557426454,
"reward_std": 0.17848547250032426,
"rewards/Table2LatexAcc/mean": 0.5714545011520386,
"rewards/Table2LatexAcc/std": 0.1947036311030388,
"rewards/Table2Latexform/mean": 0.8219626545906067,
"rewards/Table2Latexform/std": 0.26306993812322615,
"step": 30,
"train_speed(iter/s)": 0.003089
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2934.1,
"completions/mean_length": 1610.1421875,
"completions/min_length": 755.4,
"epoch": 0.18867924528301888,
"grad_norm": 0.06925838510518537,
"kl": 4.082918167114258e-05,
"learning_rate": 7.446808510638297e-07,
"loss": 0.026965773105621337,
"memory(GiB)": 74.0,
"reward": 1.3997071743011475,
"reward_std": 0.1628158211708069,
"rewards/Table2LatexAcc/mean": 0.5788642525672912,
"rewards/Table2LatexAcc/std": 0.1913457229733467,
"rewards/Table2Latexform/mean": 0.8208428978919983,
"rewards/Table2Latexform/std": 0.24103213250637054,
"step": 35,
"train_speed(iter/s)": 0.003093
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2921.4,
"completions/mean_length": 1588.996875,
"completions/min_length": 877.3,
"epoch": 0.215633423180593,
"grad_norm": 0.07126416986934427,
"kl": 7.665157318115234e-05,
"learning_rate": 8.51063829787234e-07,
"loss": 0.019620102643966675,
"memory(GiB)": 74.0,
"reward": 1.3808103442192077,
"reward_std": 0.16285659074783326,
"rewards/Table2LatexAcc/mean": 0.575913542509079,
"rewards/Table2LatexAcc/std": 0.1952654466032982,
"rewards/Table2Latexform/mean": 0.8048967957496643,
"rewards/Table2Latexform/std": 0.26039574593305587,
"step": 40,
"train_speed(iter/s)": 0.003098
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2788.6,
"completions/mean_length": 1593.7375,
"completions/min_length": 759.5,
"epoch": 0.24258760107816713,
"grad_norm": 0.08624615635734913,
"kl": 0.0001492023468017578,
"learning_rate": 9.574468085106384e-07,
"loss": 0.015057304501533508,
"memory(GiB)": 74.0,
"reward": 1.4499380350112916,
"reward_std": 0.13701159432530402,
"rewards/Table2LatexAcc/mean": 0.6036670506000519,
"rewards/Table2LatexAcc/std": 0.19481946676969528,
"rewards/Table2Latexform/mean": 0.8462709665298462,
"rewards/Table2Latexform/std": 0.2081604614853859,
"step": 45,
"train_speed(iter/s)": 0.00312
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2874.3,
"completions/mean_length": 1671.671875,
"completions/min_length": 803.0,
"epoch": 0.2695417789757412,
"grad_norm": 0.0716967712923244,
"kl": 0.00020017623901367188,
"learning_rate": 9.99971193595054e-07,
"loss": 0.01770862340927124,
"memory(GiB)": 74.0,
"reward": 1.4406983852386475,
"reward_std": 0.13970830887556077,
"rewards/Table2LatexAcc/mean": 0.5882811903953552,
"rewards/Table2LatexAcc/std": 0.1866762012243271,
"rewards/Table2Latexform/mean": 0.8524171948432923,
"rewards/Table2Latexform/std": 0.20919820815324783,
"step": 50,
"train_speed(iter/s)": 0.003122
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2952.4,
"completions/mean_length": 1598.51875,
"completions/min_length": 714.2,
"epoch": 0.29649595687331537,
"grad_norm": 0.06459857928181523,
"kl": 0.000313568115234375,
"learning_rate": 9.99795166473852e-07,
"loss": 0.028602027893066408,
"memory(GiB)": 74.0,
"reward": 1.4819077610969544,
"reward_std": 0.13568009808659554,
"rewards/Table2LatexAcc/mean": 0.6212433338165283,
"rewards/Table2LatexAcc/std": 0.2183626562356949,
"rewards/Table2Latexform/mean": 0.860664427280426,
"rewards/Table2Latexform/std": 0.22936906069517135,
"step": 55,
"train_speed(iter/s)": 0.003116
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2717.5,
"completions/mean_length": 1575.8484375,
"completions/min_length": 791.8,
"epoch": 0.32345013477088946,
"grad_norm": 0.0685119094996376,
"kl": 0.0005132675170898438,
"learning_rate": 9.994591720616975e-07,
"loss": 0.009688837081193924,
"memory(GiB)": 74.0,
"reward": 1.4809726119041442,
"reward_std": 0.12747596204280853,
"rewards/Table2LatexAcc/mean": 0.6219225466251374,
"rewards/Table2LatexAcc/std": 0.18778605610132218,
"rewards/Table2Latexform/mean": 0.8590500473976135,
"rewards/Table2Latexform/std": 0.2048894114792347,
"step": 60,
"train_speed(iter/s)": 0.003124
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2847.6,
"completions/mean_length": 1657.3984375,
"completions/min_length": 848.6,
"epoch": 0.3504043126684636,
"grad_norm": 0.08157608763386034,
"kl": 0.0006221771240234375,
"learning_rate": 9.98963317898878e-07,
"loss": 0.019288820028305054,
"memory(GiB)": 74.0,
"reward": 1.5079341650009155,
"reward_std": 0.14110046178102492,
"rewards/Table2LatexAcc/mean": 0.634680551290512,
"rewards/Table2LatexAcc/std": 0.20506853014230728,
"rewards/Table2Latexform/mean": 0.8732536375522614,
"rewards/Table2Latexform/std": 0.2030529037117958,
"step": 65,
"train_speed(iter/s)": 0.003131
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2604.5,
"completions/mean_length": 1594.03125,
"completions/min_length": 874.2,
"epoch": 0.37735849056603776,
"grad_norm": 0.08207408816722973,
"kl": 0.0008758544921875,
"learning_rate": 9.983077626913043e-07,
"loss": 0.01205739676952362,
"memory(GiB)": 74.0,
"reward": 1.507494068145752,
"reward_std": 0.11759327277541161,
"rewards/Table2LatexAcc/mean": 0.6351809322834014,
"rewards/Table2LatexAcc/std": 0.20378359854221345,
"rewards/Table2Latexform/mean": 0.8723131835460662,
"rewards/Table2Latexform/std": 0.19805027171969414,
"step": 70,
"train_speed(iter/s)": 0.003146
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2689.8,
"completions/mean_length": 1616.209375,
"completions/min_length": 859.7,
"epoch": 0.40431266846361186,
"grad_norm": 0.07397791319392964,
"kl": 0.0009979248046875,
"learning_rate": 9.974927162597145e-07,
"loss": 0.00553036704659462,
"memory(GiB)": 74.0,
"reward": 1.4614445567131042,
"reward_std": 0.09695540629327297,
"rewards/Table2LatexAcc/mean": 0.5970049917697906,
"rewards/Table2LatexAcc/std": 0.19226298183202745,
"rewards/Table2Latexform/mean": 0.8644395887851715,
"rewards/Table2Latexform/std": 0.19864091277122498,
"step": 75,
"train_speed(iter/s)": 0.003149
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2567.9,
"completions/mean_length": 1566.4984375,
"completions/min_length": 893.1,
"epoch": 0.431266846361186,
"grad_norm": 0.07175621361462335,
"kl": 0.0010894775390625,
"learning_rate": 9.965184394725169e-07,
"loss": 0.0031857024878263474,
"memory(GiB)": 74.0,
"reward": 1.519572389125824,
"reward_std": 0.11443859413266182,
"rewards/Table2LatexAcc/mean": 0.6457596719264984,
"rewards/Table2LatexAcc/std": 0.19394133985042572,
"rewards/Table2Latexform/mean": 0.8738127529621125,
"rewards/Table2Latexform/std": 0.2086488611996174,
"step": 80,
"train_speed(iter/s)": 0.003163
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2495.6,
"completions/mean_length": 1533.6078125,
"completions/min_length": 824.4,
"epoch": 0.4582210242587601,
"grad_norm": 0.07293410493568253,
"kl": 0.0012256622314453125,
"learning_rate": 9.953852441622956e-07,
"loss": 0.010935479402542114,
"memory(GiB)": 74.0,
"reward": 1.5418180227279663,
"reward_std": 0.09861706346273422,
"rewards/Table2LatexAcc/mean": 0.6385594129562377,
"rewards/Table2LatexAcc/std": 0.20701712965965272,
"rewards/Table2Latexform/mean": 0.9032586097717286,
"rewards/Table2Latexform/std": 0.13576763048768042,
"step": 85,
"train_speed(iter/s)": 0.003179
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2680.3,
"completions/mean_length": 1574.1953125,
"completions/min_length": 785.9,
"epoch": 0.48517520215633425,
"grad_norm": 0.06793751628944666,
"kl": 0.0012157440185546875,
"learning_rate": 9.940934930260036e-07,
"loss": 5.354555323719978e-05,
"memory(GiB)": 74.0,
"reward": 1.4896148085594176,
"reward_std": 0.09992180205881596,
"rewards/Table2LatexAcc/mean": 0.6215297818183899,
"rewards/Table2LatexAcc/std": 0.19945850372314453,
"rewards/Table2Latexform/mean": 0.8680850267410278,
"rewards/Table2Latexform/std": 0.21053530871868134,
"step": 90,
"train_speed(iter/s)": 0.003182
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2833.6,
"completions/mean_length": 1614.984375,
"completions/min_length": 836.7,
"epoch": 0.5121293800539084,
"grad_norm": 0.07800355989359384,
"kl": 0.001270294189453125,
"learning_rate": 9.92643599508875e-07,
"loss": 0.01619407832622528,
"memory(GiB)": 74.0,
"reward": 1.4949531078338623,
"reward_std": 0.13312736451625823,
"rewards/Table2LatexAcc/mean": 0.6362012684345245,
"rewards/Table2LatexAcc/std": 0.20503575205802918,
"rewards/Table2Latexform/mean": 0.8587518692016601,
"rewards/Table2Latexform/std": 0.21175305247306825,
"step": 95,
"train_speed(iter/s)": 0.003179
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2672.2,
"completions/mean_length": 1541.909375,
"completions/min_length": 850.3,
"epoch": 0.5390835579514824,
"grad_norm": 0.06742732491254022,
"kl": 0.001406097412109375,
"learning_rate": 9.910360276720974e-07,
"loss": 0.011617515981197358,
"memory(GiB)": 74.0,
"reward": 1.5225663423538207,
"reward_std": 0.12018043175339699,
"rewards/Table2LatexAcc/mean": 0.634308785200119,
"rewards/Table2LatexAcc/std": 0.19708103239536284,
"rewards/Table2Latexform/mean": 0.8882575571537018,
"rewards/Table2Latexform/std": 0.1708666443824768,
"step": 100,
"train_speed(iter/s)": 0.003187
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2863.4,
"completions/mean_length": 1613.728125,
"completions/min_length": 945.5,
"epoch": 0.5660377358490566,
"grad_norm": 0.0656531441071073,
"kl": 0.0012493133544921875,
"learning_rate": 9.89271292044279e-07,
"loss": 0.016812124848365785,
"memory(GiB)": 74.0,
"reward": 1.494718039035797,
"reward_std": 0.13982294127345085,
"rewards/Table2LatexAcc/mean": 0.6318223595619201,
"rewards/Table2LatexAcc/std": 0.2267067864537239,
"rewards/Table2Latexform/mean": 0.862895667552948,
"rewards/Table2Latexform/std": 0.21887822449207306,
"step": 105,
"train_speed(iter/s)": 0.003185
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2608.4,
"completions/mean_length": 1581.09375,
"completions/min_length": 790.2,
"epoch": 0.5929919137466307,
"grad_norm": 0.06582315254735907,
"kl": 0.001549530029296875,
"learning_rate": 9.873499574567681e-07,
"loss": 0.010095475614070893,
"memory(GiB)": 74.0,
"reward": 1.4990519642829896,
"reward_std": 0.10162455774843693,
"rewards/Table2LatexAcc/mean": 0.6363059639930725,
"rewards/Table2LatexAcc/std": 0.19157345294952394,
"rewards/Table2Latexform/mean": 0.862746000289917,
"rewards/Table2Latexform/std": 0.20280475318431854,
"step": 110,
"train_speed(iter/s)": 0.003195
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2667.1,
"completions/mean_length": 1623.0609375,
"completions/min_length": 862.5,
"epoch": 0.6199460916442049,
"grad_norm": 0.0676327840344494,
"kl": 0.0012561798095703125,
"learning_rate": 9.852726388628688e-07,
"loss": 0.009667134284973145,
"memory(GiB)": 74.0,
"reward": 1.499183714389801,
"reward_std": 0.11013109833002091,
"rewards/Table2LatexAcc/mean": 0.6425846576690674,
"rewards/Table2LatexAcc/std": 0.20786909610033036,
"rewards/Table2Latexform/mean": 0.8565990567207337,
"rewards/Table2Latexform/std": 0.22757124677300453,
"step": 115,
"train_speed(iter/s)": 0.003199
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2648.9,
"completions/mean_length": 1617.8234375,
"completions/min_length": 878.5,
"epoch": 0.6469002695417789,
"grad_norm": 0.05968134371530777,
"kl": 0.00138702392578125,
"learning_rate": 9.830400011410156e-07,
"loss": 0.003092067874968052,
"memory(GiB)": 74.0,
"reward": 1.4849407434463502,
"reward_std": 0.08951778598129749,
"rewards/Table2LatexAcc/mean": 0.6164660751819611,
"rewards/Table2LatexAcc/std": 0.204762826859951,
"rewards/Table2Latexform/mean": 0.8684746503829956,
"rewards/Table2Latexform/std": 0.20679847225546838,
"step": 120,
"train_speed(iter/s)": 0.003203
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2692.2,
"completions/mean_length": 1548.7328125,
"completions/min_length": 787.5,
"epoch": 0.6738544474393531,
"grad_norm": 0.08102589945740883,
"kl": 0.0015228271484375,
"learning_rate": 9.806527588819692e-07,
"loss": 0.010635277628898621,
"memory(GiB)": 74.0,
"reward": 1.4484204292297362,
"reward_std": 0.12051350250840187,
"rewards/Table2LatexAcc/mean": 0.5983371019363404,
"rewards/Table2LatexAcc/std": 0.19683932662010192,
"rewards/Table2Latexform/mean": 0.8500832915306091,
"rewards/Table2Latexform/std": 0.22092146053910255,
"step": 125,
"train_speed(iter/s)": 0.003201
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2778.3,
"completions/mean_length": 1625.8203125,
"completions/min_length": 975.0,
"epoch": 0.7008086253369272,
"grad_norm": 0.06581718834736765,
"kl": 0.0013751983642578125,
"learning_rate": 9.781116761600992e-07,
"loss": 0.008332135528326035,
"memory(GiB)": 74.0,
"reward": 1.4899320960044862,
"reward_std": 0.1020436353981495,
"rewards/Table2LatexAcc/mean": 0.6282051384449006,
"rewards/Table2LatexAcc/std": 0.18501487672328948,
"rewards/Table2Latexform/mean": 0.8617269277572632,
"rewards/Table2Latexform/std": 0.21871328055858613,
"step": 130,
"train_speed(iter/s)": 0.003199
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2638.1,
"completions/mean_length": 1558.1703125,
"completions/min_length": 627.1,
"epoch": 0.7277628032345014,
"grad_norm": 0.08042871559451785,
"kl": 0.0016143798828125,
"learning_rate": 9.75417566288832e-07,
"loss": 0.022313964366912842,
"memory(GiB)": 74.0,
"reward": 1.4969127774238586,
"reward_std": 0.09756124764680862,
"rewards/Table2LatexAcc/mean": 0.6259892284870148,
"rewards/Table2LatexAcc/std": 0.18883997797966004,
"rewards/Table2Latexform/mean": 0.8709235429763794,
"rewards/Table2Latexform/std": 0.20598914995789527,
"step": 135,
"train_speed(iter/s)": 0.003199
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2594.9,
"completions/mean_length": 1566.1609375,
"completions/min_length": 858.8,
"epoch": 0.7547169811320755,
"grad_norm": 0.06451190019619368,
"kl": 0.00159912109375,
"learning_rate": 9.725712915603353e-07,
"loss": 0.00471530370414257,
"memory(GiB)": 74.0,
"reward": 1.4983545541763306,
"reward_std": 0.10673168860375881,
"rewards/Table2LatexAcc/mean": 0.6402543127536774,
"rewards/Table2LatexAcc/std": 0.20193217247724532,
"rewards/Table2Latexform/mean": 0.8581002414226532,
"rewards/Table2Latexform/std": 0.2166273184120655,
"step": 140,
"train_speed(iter/s)": 0.003204
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2585.7,
"completions/mean_length": 1566.3171875,
"completions/min_length": 783.5,
"epoch": 0.7816711590296496,
"grad_norm": 0.06842850537031975,
"kl": 0.0016510009765625,
"learning_rate": 9.69573762969529e-07,
"loss": 0.008447134494781494,
"memory(GiB)": 74.0,
"reward": 1.5043591618537904,
"reward_std": 0.10398341864347457,
"rewards/Table2LatexAcc/mean": 0.6323516488075256,
"rewards/Table2LatexAcc/std": 0.19334442913532257,
"rewards/Table2Latexform/mean": 0.8720075249671936,
"rewards/Table2Latexform/std": 0.18240121901035308,
"step": 145,
"train_speed(iter/s)": 0.00321
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2677.8,
"completions/mean_length": 1609.378125,
"completions/min_length": 905.7,
"epoch": 0.8086253369272237,
"grad_norm": 0.06551621158024094,
"kl": 0.0015777587890625,
"learning_rate": 9.664259399225067e-07,
"loss": 0.005352784693241119,
"memory(GiB)": 74.0,
"reward": 1.5480861902236938,
"reward_std": 0.0993690624833107,
"rewards/Table2LatexAcc/mean": 0.6449747204780578,
"rewards/Table2LatexAcc/std": 0.1948181599378586,
"rewards/Table2Latexform/mean": 0.9031114995479583,
"rewards/Table2Latexform/std": 0.15625113472342492,
"step": 150,
"train_speed(iter/s)": 0.003209
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2549.0,
"completions/mean_length": 1590.425,
"completions/min_length": 906.1,
"epoch": 0.8355795148247979,
"grad_norm": 0.062419199774521504,
"kl": 0.001617431640625,
"learning_rate": 9.631288299294624e-07,
"loss": 0.005914273858070374,
"memory(GiB)": 74.0,
"reward": 1.5300285577774049,
"reward_std": 0.07754914276301861,
"rewards/Table2LatexAcc/mean": 0.6536247074604035,
"rewards/Table2LatexAcc/std": 0.1888583406805992,
"rewards/Table2Latexform/mean": 0.8764038562774659,
"rewards/Table2Latexform/std": 0.19668345972895623,
"step": 155,
"train_speed(iter/s)": 0.003214
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2675.8,
"completions/mean_length": 1594.9125,
"completions/min_length": 904.8,
"epoch": 0.862533692722372,
"grad_norm": 0.07381311561133726,
"kl": 0.001567840576171875,
"learning_rate": 9.596834882822218e-07,
"loss": 0.0008831036277115345,
"memory(GiB)": 74.0,
"reward": 1.5059723734855652,
"reward_std": 0.11190913170576096,
"rewards/Table2LatexAcc/mean": 0.6299772620201111,
"rewards/Table2LatexAcc/std": 0.18921414837241174,
"rewards/Table2Latexform/mean": 0.8759951233863831,
"rewards/Table2Latexform/std": 0.18999719768762588,
"step": 160,
"train_speed(iter/s)": 0.003217
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2556.4,
"completions/mean_length": 1544.603125,
"completions/min_length": 762.6,
"epoch": 0.889487870619946,
"grad_norm": 0.05908311708682426,
"kl": 0.00150909423828125,
"learning_rate": 9.560910177164787e-07,
"loss": 0.007628290355205536,
"memory(GiB)": 74.0,
"reward": 1.5502776145935058,
"reward_std": 0.07942587062716484,
"rewards/Table2LatexAcc/mean": 0.6583487272262574,
"rewards/Table2LatexAcc/std": 0.18620822578668594,
"rewards/Table2Latexform/mean": 0.8919288635253906,
"rewards/Table2Latexform/std": 0.18032970726490022,
"step": 165,
"train_speed(iter/s)": 0.003222
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2511.0,
"completions/mean_length": 1593.021875,
"completions/min_length": 835.0,
"epoch": 0.9164420485175202,
"grad_norm": 0.059058153054454235,
"kl": 0.00181427001953125,
"learning_rate": 9.523525680588476e-07,
"loss": 0.008848436921834946,
"memory(GiB)": 74.0,
"reward": 1.5144242644309998,
"reward_std": 0.09105739071965217,
"rewards/Table2LatexAcc/mean": 0.6321313917636872,
"rewards/Table2LatexAcc/std": 0.18138092905282974,
"rewards/Table2Latexform/mean": 0.8822928845882416,
"rewards/Table2Latexform/std": 0.19216497614979744,
"step": 170,
"train_speed(iter/s)": 0.003226
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2540.9,
"completions/mean_length": 1593.3265625,
"completions/min_length": 730.9,
"epoch": 0.9433962264150944,
"grad_norm": 0.060705012485582154,
"kl": 0.00139312744140625,
"learning_rate": 9.484693358588434e-07,
"loss": 0.007192098349332809,
"memory(GiB)": 74.0,
"reward": 1.5356804728507996,
"reward_std": 0.09475091025233269,
"rewards/Table2LatexAcc/mean": 0.6415718376636506,
"rewards/Table2LatexAcc/std": 0.1903410866856575,
"rewards/Table2Latexform/mean": 0.8941086232662201,
"rewards/Table2Latexform/std": 0.1649520058184862,
"step": 175,
"train_speed(iter/s)": 0.003229
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2650.3,
"completions/mean_length": 1586.671875,
"completions/min_length": 818.4,
"epoch": 0.9703504043126685,
"grad_norm": 0.07391935117978014,
"kl": 0.001525115966796875,
"learning_rate": 9.444425640059076e-07,
"loss": 0.007059115171432495,
"memory(GiB)": 74.0,
"reward": 1.5181043028831482,
"reward_std": 0.09545421227812767,
"rewards/Table2LatexAcc/mean": 0.638035798072815,
"rewards/Table2LatexAcc/std": 0.20127029120922088,
"rewards/Table2Latexform/mean": 0.8800684928894043,
"rewards/Table2Latexform/std": 0.18569674119353294,
"step": 180,
"train_speed(iter/s)": 0.00323
},
{
"clip_ratio": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 2459.1,
"completions/mean_length": 1533.0296875,
"completions/min_length": 935.5,
"epoch": 0.9973045822102425,
"grad_norm": 0.07964238807994012,
"kl": 0.00167999267578125,
"learning_rate": 9.402735413316011e-07,
"loss": -0.00023833760060369967,
"memory(GiB)": 74.0,
"reward": 1.5326952815055848,
"reward_std": 0.08919371329247952,
"rewards/Table2LatexAcc/mean": 0.6511692225933075,
"rewards/Table2LatexAcc/std": 0.1804724305868149,
"rewards/Table2Latexform/mean": 0.8815260589122772,
"rewards/Table2Latexform/std": 0.19189485386013985,
"step": 185,
"train_speed(iter/s)": 0.003237
}
],
"logging_steps": 5,
"max_steps": 925,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}