| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9973045822102425, | |
| "eval_steps": 500, | |
| "global_step": 185, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3001.0, | |
| "completions/mean_length": 1635.390625, | |
| "completions/min_length": 880.0, | |
| "epoch": 0.005390835579514825, | |
| "grad_norm": 0.07817294615231643, | |
| "kl": 0.0, | |
| "learning_rate": 2.127659574468085e-08, | |
| "loss": 0.01464410312473774, | |
| "memory(GiB)": 53.08, | |
| "reward": 1.3704201579093933, | |
| "reward_std": 0.19254888594150543, | |
| "rewards/Table2LatexAcc/mean": 0.5549997389316559, | |
| "rewards/Table2LatexAcc/std": 0.2269514873623848, | |
| "rewards/Table2Latexform/mean": 0.815420389175415, | |
| "rewards/Table2Latexform/std": 0.27713412046432495, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.003012 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2923.875, | |
| "completions/mean_length": 1629.7890625, | |
| "completions/min_length": 886.0, | |
| "epoch": 0.026954177897574125, | |
| "grad_norm": 0.07213148347345341, | |
| "kl": 1.5087425708770752e-05, | |
| "learning_rate": 1.0638297872340425e-07, | |
| "loss": 0.028215568512678146, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.3842923939228058, | |
| "reward_std": 0.18567332532256842, | |
| "rewards/Table2LatexAcc/mean": 0.5712194591760635, | |
| "rewards/Table2LatexAcc/std": 0.19849798548966646, | |
| "rewards/Table2Latexform/mean": 0.8130729347467422, | |
| "rewards/Table2Latexform/std": 0.2439529187977314, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.003096 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2889.2, | |
| "completions/mean_length": 1723.5890625, | |
| "completions/min_length": 962.5, | |
| "epoch": 0.05390835579514825, | |
| "grad_norm": 0.06916726038351133, | |
| "kl": 1.736283302307129e-05, | |
| "learning_rate": 2.127659574468085e-07, | |
| "loss": 0.019673459231853485, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.3981751084327698, | |
| "reward_std": 0.16928213015198706, | |
| "rewards/Table2LatexAcc/mean": 0.573980861902237, | |
| "rewards/Table2LatexAcc/std": 0.19604488760232924, | |
| "rewards/Table2Latexform/mean": 0.8241942763328552, | |
| "rewards/Table2Latexform/std": 0.23927551954984666, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.003101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2915.4, | |
| "completions/mean_length": 1641.03125, | |
| "completions/min_length": 704.9, | |
| "epoch": 0.08086253369272237, | |
| "grad_norm": 0.07279863906405569, | |
| "kl": 2.1731853485107423e-05, | |
| "learning_rate": 3.1914893617021275e-07, | |
| "loss": 0.02381864786148071, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.379032826423645, | |
| "reward_std": 0.15062467977404595, | |
| "rewards/Table2LatexAcc/mean": 0.5421488165855408, | |
| "rewards/Table2LatexAcc/std": 0.19123097956180574, | |
| "rewards/Table2Latexform/mean": 0.8368840157985687, | |
| "rewards/Table2Latexform/std": 0.21790579557418824, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.003068 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2961.5, | |
| "completions/mean_length": 1699.853125, | |
| "completions/min_length": 861.8, | |
| "epoch": 0.1078167115902965, | |
| "grad_norm": 0.07154949573308267, | |
| "kl": 2.1332502365112303e-05, | |
| "learning_rate": 4.25531914893617e-07, | |
| "loss": 0.027176868915557862, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.3628795862197876, | |
| "reward_std": 0.19128143787384033, | |
| "rewards/Table2LatexAcc/mean": 0.5719542324542999, | |
| "rewards/Table2LatexAcc/std": 0.1954931139945984, | |
| "rewards/Table2Latexform/mean": 0.7909253478050232, | |
| "rewards/Table2Latexform/std": 0.278898648917675, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.003057 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2639.0, | |
| "completions/mean_length": 1594.8921875, | |
| "completions/min_length": 790.5, | |
| "epoch": 0.1347708894878706, | |
| "grad_norm": 0.13131531927012402, | |
| "kl": 2.13623046875e-05, | |
| "learning_rate": 5.319148936170212e-07, | |
| "loss": 0.01629452407360077, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4447253465652465, | |
| "reward_std": 0.15758238062262536, | |
| "rewards/Table2LatexAcc/mean": 0.6045001387596131, | |
| "rewards/Table2LatexAcc/std": 0.18096636980772018, | |
| "rewards/Table2Latexform/mean": 0.840225213766098, | |
| "rewards/Table2Latexform/std": 0.22630088329315184, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.003093 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2905.0, | |
| "completions/mean_length": 1642.709375, | |
| "completions/min_length": 804.4, | |
| "epoch": 0.16172506738544473, | |
| "grad_norm": 0.06595082858040417, | |
| "kl": 2.499222755432129e-05, | |
| "learning_rate": 6.382978723404255e-07, | |
| "loss": 0.027088361978530883, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.3934171557426454, | |
| "reward_std": 0.17848547250032426, | |
| "rewards/Table2LatexAcc/mean": 0.5714545011520386, | |
| "rewards/Table2LatexAcc/std": 0.1947036311030388, | |
| "rewards/Table2Latexform/mean": 0.8219626545906067, | |
| "rewards/Table2Latexform/std": 0.26306993812322615, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.003089 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2934.1, | |
| "completions/mean_length": 1610.1421875, | |
| "completions/min_length": 755.4, | |
| "epoch": 0.18867924528301888, | |
| "grad_norm": 0.06925838510518537, | |
| "kl": 4.082918167114258e-05, | |
| "learning_rate": 7.446808510638297e-07, | |
| "loss": 0.026965773105621337, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.3997071743011475, | |
| "reward_std": 0.1628158211708069, | |
| "rewards/Table2LatexAcc/mean": 0.5788642525672912, | |
| "rewards/Table2LatexAcc/std": 0.1913457229733467, | |
| "rewards/Table2Latexform/mean": 0.8208428978919983, | |
| "rewards/Table2Latexform/std": 0.24103213250637054, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.003093 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2921.4, | |
| "completions/mean_length": 1588.996875, | |
| "completions/min_length": 877.3, | |
| "epoch": 0.215633423180593, | |
| "grad_norm": 0.07126416986934427, | |
| "kl": 7.665157318115234e-05, | |
| "learning_rate": 8.51063829787234e-07, | |
| "loss": 0.019620102643966675, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.3808103442192077, | |
| "reward_std": 0.16285659074783326, | |
| "rewards/Table2LatexAcc/mean": 0.575913542509079, | |
| "rewards/Table2LatexAcc/std": 0.1952654466032982, | |
| "rewards/Table2Latexform/mean": 0.8048967957496643, | |
| "rewards/Table2Latexform/std": 0.26039574593305587, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.003098 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2788.6, | |
| "completions/mean_length": 1593.7375, | |
| "completions/min_length": 759.5, | |
| "epoch": 0.24258760107816713, | |
| "grad_norm": 0.08624615635734913, | |
| "kl": 0.0001492023468017578, | |
| "learning_rate": 9.574468085106384e-07, | |
| "loss": 0.015057304501533508, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4499380350112916, | |
| "reward_std": 0.13701159432530402, | |
| "rewards/Table2LatexAcc/mean": 0.6036670506000519, | |
| "rewards/Table2LatexAcc/std": 0.19481946676969528, | |
| "rewards/Table2Latexform/mean": 0.8462709665298462, | |
| "rewards/Table2Latexform/std": 0.2081604614853859, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.00312 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2874.3, | |
| "completions/mean_length": 1671.671875, | |
| "completions/min_length": 803.0, | |
| "epoch": 0.2695417789757412, | |
| "grad_norm": 0.0716967712923244, | |
| "kl": 0.00020017623901367188, | |
| "learning_rate": 9.99971193595054e-07, | |
| "loss": 0.01770862340927124, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4406983852386475, | |
| "reward_std": 0.13970830887556077, | |
| "rewards/Table2LatexAcc/mean": 0.5882811903953552, | |
| "rewards/Table2LatexAcc/std": 0.1866762012243271, | |
| "rewards/Table2Latexform/mean": 0.8524171948432923, | |
| "rewards/Table2Latexform/std": 0.20919820815324783, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.003122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2952.4, | |
| "completions/mean_length": 1598.51875, | |
| "completions/min_length": 714.2, | |
| "epoch": 0.29649595687331537, | |
| "grad_norm": 0.06459857928181523, | |
| "kl": 0.000313568115234375, | |
| "learning_rate": 9.99795166473852e-07, | |
| "loss": 0.028602027893066408, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4819077610969544, | |
| "reward_std": 0.13568009808659554, | |
| "rewards/Table2LatexAcc/mean": 0.6212433338165283, | |
| "rewards/Table2LatexAcc/std": 0.2183626562356949, | |
| "rewards/Table2Latexform/mean": 0.860664427280426, | |
| "rewards/Table2Latexform/std": 0.22936906069517135, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.003116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2717.5, | |
| "completions/mean_length": 1575.8484375, | |
| "completions/min_length": 791.8, | |
| "epoch": 0.32345013477088946, | |
| "grad_norm": 0.0685119094996376, | |
| "kl": 0.0005132675170898438, | |
| "learning_rate": 9.994591720616975e-07, | |
| "loss": 0.009688837081193924, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4809726119041442, | |
| "reward_std": 0.12747596204280853, | |
| "rewards/Table2LatexAcc/mean": 0.6219225466251374, | |
| "rewards/Table2LatexAcc/std": 0.18778605610132218, | |
| "rewards/Table2Latexform/mean": 0.8590500473976135, | |
| "rewards/Table2Latexform/std": 0.2048894114792347, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.003124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2847.6, | |
| "completions/mean_length": 1657.3984375, | |
| "completions/min_length": 848.6, | |
| "epoch": 0.3504043126684636, | |
| "grad_norm": 0.08157608763386034, | |
| "kl": 0.0006221771240234375, | |
| "learning_rate": 9.98963317898878e-07, | |
| "loss": 0.019288820028305054, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5079341650009155, | |
| "reward_std": 0.14110046178102492, | |
| "rewards/Table2LatexAcc/mean": 0.634680551290512, | |
| "rewards/Table2LatexAcc/std": 0.20506853014230728, | |
| "rewards/Table2Latexform/mean": 0.8732536375522614, | |
| "rewards/Table2Latexform/std": 0.2030529037117958, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.003131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2604.5, | |
| "completions/mean_length": 1594.03125, | |
| "completions/min_length": 874.2, | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 0.08207408816722973, | |
| "kl": 0.0008758544921875, | |
| "learning_rate": 9.983077626913043e-07, | |
| "loss": 0.01205739676952362, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.507494068145752, | |
| "reward_std": 0.11759327277541161, | |
| "rewards/Table2LatexAcc/mean": 0.6351809322834014, | |
| "rewards/Table2LatexAcc/std": 0.20378359854221345, | |
| "rewards/Table2Latexform/mean": 0.8723131835460662, | |
| "rewards/Table2Latexform/std": 0.19805027171969414, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.003146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2689.8, | |
| "completions/mean_length": 1616.209375, | |
| "completions/min_length": 859.7, | |
| "epoch": 0.40431266846361186, | |
| "grad_norm": 0.07397791319392964, | |
| "kl": 0.0009979248046875, | |
| "learning_rate": 9.974927162597145e-07, | |
| "loss": 0.00553036704659462, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4614445567131042, | |
| "reward_std": 0.09695540629327297, | |
| "rewards/Table2LatexAcc/mean": 0.5970049917697906, | |
| "rewards/Table2LatexAcc/std": 0.19226298183202745, | |
| "rewards/Table2Latexform/mean": 0.8644395887851715, | |
| "rewards/Table2Latexform/std": 0.19864091277122498, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.003149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2567.9, | |
| "completions/mean_length": 1566.4984375, | |
| "completions/min_length": 893.1, | |
| "epoch": 0.431266846361186, | |
| "grad_norm": 0.07175621361462335, | |
| "kl": 0.0010894775390625, | |
| "learning_rate": 9.965184394725169e-07, | |
| "loss": 0.0031857024878263474, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.519572389125824, | |
| "reward_std": 0.11443859413266182, | |
| "rewards/Table2LatexAcc/mean": 0.6457596719264984, | |
| "rewards/Table2LatexAcc/std": 0.19394133985042572, | |
| "rewards/Table2Latexform/mean": 0.8738127529621125, | |
| "rewards/Table2Latexform/std": 0.2086488611996174, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.003163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2495.6, | |
| "completions/mean_length": 1533.6078125, | |
| "completions/min_length": 824.4, | |
| "epoch": 0.4582210242587601, | |
| "grad_norm": 0.07293410493568253, | |
| "kl": 0.0012256622314453125, | |
| "learning_rate": 9.953852441622956e-07, | |
| "loss": 0.010935479402542114, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5418180227279663, | |
| "reward_std": 0.09861706346273422, | |
| "rewards/Table2LatexAcc/mean": 0.6385594129562377, | |
| "rewards/Table2LatexAcc/std": 0.20701712965965272, | |
| "rewards/Table2Latexform/mean": 0.9032586097717286, | |
| "rewards/Table2Latexform/std": 0.13576763048768042, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.003179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2680.3, | |
| "completions/mean_length": 1574.1953125, | |
| "completions/min_length": 785.9, | |
| "epoch": 0.48517520215633425, | |
| "grad_norm": 0.06793751628944666, | |
| "kl": 0.0012157440185546875, | |
| "learning_rate": 9.940934930260036e-07, | |
| "loss": 5.354555323719978e-05, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4896148085594176, | |
| "reward_std": 0.09992180205881596, | |
| "rewards/Table2LatexAcc/mean": 0.6215297818183899, | |
| "rewards/Table2LatexAcc/std": 0.19945850372314453, | |
| "rewards/Table2Latexform/mean": 0.8680850267410278, | |
| "rewards/Table2Latexform/std": 0.21053530871868134, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.003182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2833.6, | |
| "completions/mean_length": 1614.984375, | |
| "completions/min_length": 836.7, | |
| "epoch": 0.5121293800539084, | |
| "grad_norm": 0.07800355989359384, | |
| "kl": 0.001270294189453125, | |
| "learning_rate": 9.92643599508875e-07, | |
| "loss": 0.01619407832622528, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4949531078338623, | |
| "reward_std": 0.13312736451625823, | |
| "rewards/Table2LatexAcc/mean": 0.6362012684345245, | |
| "rewards/Table2LatexAcc/std": 0.20503575205802918, | |
| "rewards/Table2Latexform/mean": 0.8587518692016601, | |
| "rewards/Table2Latexform/std": 0.21175305247306825, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.003179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2672.2, | |
| "completions/mean_length": 1541.909375, | |
| "completions/min_length": 850.3, | |
| "epoch": 0.5390835579514824, | |
| "grad_norm": 0.06742732491254022, | |
| "kl": 0.001406097412109375, | |
| "learning_rate": 9.910360276720974e-07, | |
| "loss": 0.011617515981197358, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5225663423538207, | |
| "reward_std": 0.12018043175339699, | |
| "rewards/Table2LatexAcc/mean": 0.634308785200119, | |
| "rewards/Table2LatexAcc/std": 0.19708103239536284, | |
| "rewards/Table2Latexform/mean": 0.8882575571537018, | |
| "rewards/Table2Latexform/std": 0.1708666443824768, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.003187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2863.4, | |
| "completions/mean_length": 1613.728125, | |
| "completions/min_length": 945.5, | |
| "epoch": 0.5660377358490566, | |
| "grad_norm": 0.0656531441071073, | |
| "kl": 0.0012493133544921875, | |
| "learning_rate": 9.89271292044279e-07, | |
| "loss": 0.016812124848365785, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.494718039035797, | |
| "reward_std": 0.13982294127345085, | |
| "rewards/Table2LatexAcc/mean": 0.6318223595619201, | |
| "rewards/Table2LatexAcc/std": 0.2267067864537239, | |
| "rewards/Table2Latexform/mean": 0.862895667552948, | |
| "rewards/Table2Latexform/std": 0.21887822449207306, | |
| "step": 105, | |
| "train_speed(iter/s)": 0.003185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2608.4, | |
| "completions/mean_length": 1581.09375, | |
| "completions/min_length": 790.2, | |
| "epoch": 0.5929919137466307, | |
| "grad_norm": 0.06582315254735907, | |
| "kl": 0.001549530029296875, | |
| "learning_rate": 9.873499574567681e-07, | |
| "loss": 0.010095475614070893, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4990519642829896, | |
| "reward_std": 0.10162455774843693, | |
| "rewards/Table2LatexAcc/mean": 0.6363059639930725, | |
| "rewards/Table2LatexAcc/std": 0.19157345294952394, | |
| "rewards/Table2Latexform/mean": 0.862746000289917, | |
| "rewards/Table2Latexform/std": 0.20280475318431854, | |
| "step": 110, | |
| "train_speed(iter/s)": 0.003195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2667.1, | |
| "completions/mean_length": 1623.0609375, | |
| "completions/min_length": 862.5, | |
| "epoch": 0.6199460916442049, | |
| "grad_norm": 0.0676327840344494, | |
| "kl": 0.0012561798095703125, | |
| "learning_rate": 9.852726388628688e-07, | |
| "loss": 0.009667134284973145, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.499183714389801, | |
| "reward_std": 0.11013109833002091, | |
| "rewards/Table2LatexAcc/mean": 0.6425846576690674, | |
| "rewards/Table2LatexAcc/std": 0.20786909610033036, | |
| "rewards/Table2Latexform/mean": 0.8565990567207337, | |
| "rewards/Table2Latexform/std": 0.22757124677300453, | |
| "step": 115, | |
| "train_speed(iter/s)": 0.003199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2648.9, | |
| "completions/mean_length": 1617.8234375, | |
| "completions/min_length": 878.5, | |
| "epoch": 0.6469002695417789, | |
| "grad_norm": 0.05968134371530777, | |
| "kl": 0.00138702392578125, | |
| "learning_rate": 9.830400011410156e-07, | |
| "loss": 0.003092067874968052, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4849407434463502, | |
| "reward_std": 0.08951778598129749, | |
| "rewards/Table2LatexAcc/mean": 0.6164660751819611, | |
| "rewards/Table2LatexAcc/std": 0.204762826859951, | |
| "rewards/Table2Latexform/mean": 0.8684746503829956, | |
| "rewards/Table2Latexform/std": 0.20679847225546838, | |
| "step": 120, | |
| "train_speed(iter/s)": 0.003203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2692.2, | |
| "completions/mean_length": 1548.7328125, | |
| "completions/min_length": 787.5, | |
| "epoch": 0.6738544474393531, | |
| "grad_norm": 0.08102589945740883, | |
| "kl": 0.0015228271484375, | |
| "learning_rate": 9.806527588819692e-07, | |
| "loss": 0.010635277628898621, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4484204292297362, | |
| "reward_std": 0.12051350250840187, | |
| "rewards/Table2LatexAcc/mean": 0.5983371019363404, | |
| "rewards/Table2LatexAcc/std": 0.19683932662010192, | |
| "rewards/Table2Latexform/mean": 0.8500832915306091, | |
| "rewards/Table2Latexform/std": 0.22092146053910255, | |
| "step": 125, | |
| "train_speed(iter/s)": 0.003201 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2778.3, | |
| "completions/mean_length": 1625.8203125, | |
| "completions/min_length": 975.0, | |
| "epoch": 0.7008086253369272, | |
| "grad_norm": 0.06581718834736765, | |
| "kl": 0.0013751983642578125, | |
| "learning_rate": 9.781116761600992e-07, | |
| "loss": 0.008332135528326035, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4899320960044862, | |
| "reward_std": 0.1020436353981495, | |
| "rewards/Table2LatexAcc/mean": 0.6282051384449006, | |
| "rewards/Table2LatexAcc/std": 0.18501487672328948, | |
| "rewards/Table2Latexform/mean": 0.8617269277572632, | |
| "rewards/Table2Latexform/std": 0.21871328055858613, | |
| "step": 130, | |
| "train_speed(iter/s)": 0.003199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2638.1, | |
| "completions/mean_length": 1558.1703125, | |
| "completions/min_length": 627.1, | |
| "epoch": 0.7277628032345014, | |
| "grad_norm": 0.08042871559451785, | |
| "kl": 0.0016143798828125, | |
| "learning_rate": 9.75417566288832e-07, | |
| "loss": 0.022313964366912842, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4969127774238586, | |
| "reward_std": 0.09756124764680862, | |
| "rewards/Table2LatexAcc/mean": 0.6259892284870148, | |
| "rewards/Table2LatexAcc/std": 0.18883997797966004, | |
| "rewards/Table2Latexform/mean": 0.8709235429763794, | |
| "rewards/Table2Latexform/std": 0.20598914995789527, | |
| "step": 135, | |
| "train_speed(iter/s)": 0.003199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2594.9, | |
| "completions/mean_length": 1566.1609375, | |
| "completions/min_length": 858.8, | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 0.06451190019619368, | |
| "kl": 0.00159912109375, | |
| "learning_rate": 9.725712915603353e-07, | |
| "loss": 0.00471530370414257, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.4983545541763306, | |
| "reward_std": 0.10673168860375881, | |
| "rewards/Table2LatexAcc/mean": 0.6402543127536774, | |
| "rewards/Table2LatexAcc/std": 0.20193217247724532, | |
| "rewards/Table2Latexform/mean": 0.8581002414226532, | |
| "rewards/Table2Latexform/std": 0.2166273184120655, | |
| "step": 140, | |
| "train_speed(iter/s)": 0.003204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2585.7, | |
| "completions/mean_length": 1566.3171875, | |
| "completions/min_length": 783.5, | |
| "epoch": 0.7816711590296496, | |
| "grad_norm": 0.06842850537031975, | |
| "kl": 0.0016510009765625, | |
| "learning_rate": 9.69573762969529e-07, | |
| "loss": 0.008447134494781494, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5043591618537904, | |
| "reward_std": 0.10398341864347457, | |
| "rewards/Table2LatexAcc/mean": 0.6323516488075256, | |
| "rewards/Table2LatexAcc/std": 0.19334442913532257, | |
| "rewards/Table2Latexform/mean": 0.8720075249671936, | |
| "rewards/Table2Latexform/std": 0.18240121901035308, | |
| "step": 145, | |
| "train_speed(iter/s)": 0.00321 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2677.8, | |
| "completions/mean_length": 1609.378125, | |
| "completions/min_length": 905.7, | |
| "epoch": 0.8086253369272237, | |
| "grad_norm": 0.06551621158024094, | |
| "kl": 0.0015777587890625, | |
| "learning_rate": 9.664259399225067e-07, | |
| "loss": 0.005352784693241119, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5480861902236938, | |
| "reward_std": 0.0993690624833107, | |
| "rewards/Table2LatexAcc/mean": 0.6449747204780578, | |
| "rewards/Table2LatexAcc/std": 0.1948181599378586, | |
| "rewards/Table2Latexform/mean": 0.9031114995479583, | |
| "rewards/Table2Latexform/std": 0.15625113472342492, | |
| "step": 150, | |
| "train_speed(iter/s)": 0.003209 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2549.0, | |
| "completions/mean_length": 1590.425, | |
| "completions/min_length": 906.1, | |
| "epoch": 0.8355795148247979, | |
| "grad_norm": 0.062419199774521504, | |
| "kl": 0.001617431640625, | |
| "learning_rate": 9.631288299294624e-07, | |
| "loss": 0.005914273858070374, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5300285577774049, | |
| "reward_std": 0.07754914276301861, | |
| "rewards/Table2LatexAcc/mean": 0.6536247074604035, | |
| "rewards/Table2LatexAcc/std": 0.1888583406805992, | |
| "rewards/Table2Latexform/mean": 0.8764038562774659, | |
| "rewards/Table2Latexform/std": 0.19668345972895623, | |
| "step": 155, | |
| "train_speed(iter/s)": 0.003214 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2675.8, | |
| "completions/mean_length": 1594.9125, | |
| "completions/min_length": 904.8, | |
| "epoch": 0.862533692722372, | |
| "grad_norm": 0.07381311561133726, | |
| "kl": 0.001567840576171875, | |
| "learning_rate": 9.596834882822218e-07, | |
| "loss": 0.0008831036277115345, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5059723734855652, | |
| "reward_std": 0.11190913170576096, | |
| "rewards/Table2LatexAcc/mean": 0.6299772620201111, | |
| "rewards/Table2LatexAcc/std": 0.18921414837241174, | |
| "rewards/Table2Latexform/mean": 0.8759951233863831, | |
| "rewards/Table2Latexform/std": 0.18999719768762588, | |
| "step": 160, | |
| "train_speed(iter/s)": 0.003217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2556.4, | |
| "completions/mean_length": 1544.603125, | |
| "completions/min_length": 762.6, | |
| "epoch": 0.889487870619946, | |
| "grad_norm": 0.05908311708682426, | |
| "kl": 0.00150909423828125, | |
| "learning_rate": 9.560910177164787e-07, | |
| "loss": 0.007628290355205536, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5502776145935058, | |
| "reward_std": 0.07942587062716484, | |
| "rewards/Table2LatexAcc/mean": 0.6583487272262574, | |
| "rewards/Table2LatexAcc/std": 0.18620822578668594, | |
| "rewards/Table2Latexform/mean": 0.8919288635253906, | |
| "rewards/Table2Latexform/std": 0.18032970726490022, | |
| "step": 165, | |
| "train_speed(iter/s)": 0.003222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2511.0, | |
| "completions/mean_length": 1593.021875, | |
| "completions/min_length": 835.0, | |
| "epoch": 0.9164420485175202, | |
| "grad_norm": 0.059058153054454235, | |
| "kl": 0.00181427001953125, | |
| "learning_rate": 9.523525680588476e-07, | |
| "loss": 0.008848436921834946, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5144242644309998, | |
| "reward_std": 0.09105739071965217, | |
| "rewards/Table2LatexAcc/mean": 0.6321313917636872, | |
| "rewards/Table2LatexAcc/std": 0.18138092905282974, | |
| "rewards/Table2Latexform/mean": 0.8822928845882416, | |
| "rewards/Table2Latexform/std": 0.19216497614979744, | |
| "step": 170, | |
| "train_speed(iter/s)": 0.003226 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2540.9, | |
| "completions/mean_length": 1593.3265625, | |
| "completions/min_length": 730.9, | |
| "epoch": 0.9433962264150944, | |
| "grad_norm": 0.060705012485582154, | |
| "kl": 0.00139312744140625, | |
| "learning_rate": 9.484693358588434e-07, | |
| "loss": 0.007192098349332809, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5356804728507996, | |
| "reward_std": 0.09475091025233269, | |
| "rewards/Table2LatexAcc/mean": 0.6415718376636506, | |
| "rewards/Table2LatexAcc/std": 0.1903410866856575, | |
| "rewards/Table2Latexform/mean": 0.8941086232662201, | |
| "rewards/Table2Latexform/std": 0.1649520058184862, | |
| "step": 175, | |
| "train_speed(iter/s)": 0.003229 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2650.3, | |
| "completions/mean_length": 1586.671875, | |
| "completions/min_length": 818.4, | |
| "epoch": 0.9703504043126685, | |
| "grad_norm": 0.07391935117978014, | |
| "kl": 0.001525115966796875, | |
| "learning_rate": 9.444425640059076e-07, | |
| "loss": 0.007059115171432495, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5181043028831482, | |
| "reward_std": 0.09545421227812767, | |
| "rewards/Table2LatexAcc/mean": 0.638035798072815, | |
| "rewards/Table2LatexAcc/std": 0.20127029120922088, | |
| "rewards/Table2Latexform/mean": 0.8800684928894043, | |
| "rewards/Table2Latexform/std": 0.18569674119353294, | |
| "step": 180, | |
| "train_speed(iter/s)": 0.00323 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2459.1, | |
| "completions/mean_length": 1533.0296875, | |
| "completions/min_length": 935.5, | |
| "epoch": 0.9973045822102425, | |
| "grad_norm": 0.07964238807994012, | |
| "kl": 0.00167999267578125, | |
| "learning_rate": 9.402735413316011e-07, | |
| "loss": -0.00023833760060369967, | |
| "memory(GiB)": 74.0, | |
| "reward": 1.5326952815055848, | |
| "reward_std": 0.08919371329247952, | |
| "rewards/Table2LatexAcc/mean": 0.6511692225933075, | |
| "rewards/Table2LatexAcc/std": 0.1804724305868149, | |
| "rewards/Table2Latexform/mean": 0.8815260589122772, | |
| "rewards/Table2Latexform/std": 0.19189485386013985, | |
| "step": 185, | |
| "train_speed(iter/s)": 0.003237 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 925, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |